update guessit and subliminal libs. Fixes #678

This commit is contained in:
clinton-hall 2015-01-19 14:22:30 +10:30
parent ff50e5144c
commit f716323b76
72 changed files with 9350 additions and 3032 deletions

View file

@ -1,249 +0,0 @@
Afghanistan|AF|AFG|004|ISO 3166-2:AF
Åland Islands|AX|ALA|248|ISO 3166-2:AX
Albania|AL|ALB|008|ISO 3166-2:AL
Algeria|DZ|DZA|012|ISO 3166-2:DZ
American Samoa|AS|ASM|016|ISO 3166-2:AS
Andorra|AD|AND|020|ISO 3166-2:AD
Angola|AO|AGO|024|ISO 3166-2:AO
Anguilla|AI|AIA|660|ISO 3166-2:AI
Antarctica|AQ|ATA|010|ISO 3166-2:AQ
Antigua and Barbuda|AG|ATG|028|ISO 3166-2:AG
Argentina|AR|ARG|032|ISO 3166-2:AR
Armenia|AM|ARM|051|ISO 3166-2:AM
Aruba|AW|ABW|533|ISO 3166-2:AW
Australia|AU|AUS|036|ISO 3166-2:AU
Austria|AT|AUT|040|ISO 3166-2:AT
Azerbaijan|AZ|AZE|031|ISO 3166-2:AZ
Bahamas|BS|BHS|044|ISO 3166-2:BS
Bahrain|BH|BHR|048|ISO 3166-2:BH
Bangladesh|BD|BGD|050|ISO 3166-2:BD
Barbados|BB|BRB|052|ISO 3166-2:BB
Belarus|BY|BLR|112|ISO 3166-2:BY
Belgium|BE|BEL|056|ISO 3166-2:BE
Belize|BZ|BLZ|084|ISO 3166-2:BZ
Benin|BJ|BEN|204|ISO 3166-2:BJ
Bermuda|BM|BMU|060|ISO 3166-2:BM
Bhutan|BT|BTN|064|ISO 3166-2:BT
Bolivia, Plurinational State of|BO|BOL|068|ISO 3166-2:BO
Bonaire, Sint Eustatius and Saba|BQ|BES|535|ISO 3166-2:BQ
Bosnia and Herzegovina|BA|BIH|070|ISO 3166-2:BA
Botswana|BW|BWA|072|ISO 3166-2:BW
Bouvet Island|BV|BVT|074|ISO 3166-2:BV
Brazil|BR|BRA|076|ISO 3166-2:BR
British Indian Ocean Territory|IO|IOT|086|ISO 3166-2:IO
Brunei Darussalam|BN|BRN|096|ISO 3166-2:BN
Bulgaria|BG|BGR|100|ISO 3166-2:BG
Burkina Faso|BF|BFA|854|ISO 3166-2:BF
Burundi|BI|BDI|108|ISO 3166-2:BI
Cambodia|KH|KHM|116|ISO 3166-2:KH
Cameroon|CM|CMR|120|ISO 3166-2:CM
Canada|CA|CAN|124|ISO 3166-2:CA
Cape Verde|CV|CPV|132|ISO 3166-2:CV
Cayman Islands|KY|CYM|136|ISO 3166-2:KY
Central African Republic|CF|CAF|140|ISO 3166-2:CF
Chad|TD|TCD|148|ISO 3166-2:TD
Chile|CL|CHL|152|ISO 3166-2:CL
China|CN|CHN|156|ISO 3166-2:CN
Christmas Island|CX|CXR|162|ISO 3166-2:CX
Cocos (Keeling) Islands|CC|CCK|166|ISO 3166-2:CC
Colombia|CO|COL|170|ISO 3166-2:CO
Comoros|KM|COM|174|ISO 3166-2:KM
Congo|CG|COG|178|ISO 3166-2:CG
Congo, the Democratic Republic of the|CD|COD|180|ISO 3166-2:CD
Cook Islands|CK|COK|184|ISO 3166-2:CK
Costa Rica|CR|CRI|188|ISO 3166-2:CR
Côte d'Ivoire|CI|CIV|384|ISO 3166-2:CI
Croatia|HR|HRV|191|ISO 3166-2:HR
Cuba|CU|CUB|192|ISO 3166-2:CU
Curaçao|CW|CUW|531|ISO 3166-2:CW
Cyprus|CY|CYP|196|ISO 3166-2:CY
Czech Republic|CZ|CZE|203|ISO 3166-2:CZ
Denmark|DK|DNK|208|ISO 3166-2:DK
Djibouti|DJ|DJI|262|ISO 3166-2:DJ
Dominica|DM|DMA|212|ISO 3166-2:DM
Dominican Republic|DO|DOM|214|ISO 3166-2:DO
Ecuador|EC|ECU|218|ISO 3166-2:EC
Egypt|EG|EGY|818|ISO 3166-2:EG
El Salvador|SV|SLV|222|ISO 3166-2:SV
Equatorial Guinea|GQ|GNQ|226|ISO 3166-2:GQ
Eritrea|ER|ERI|232|ISO 3166-2:ER
Estonia|EE|EST|233|ISO 3166-2:EE
Ethiopia|ET|ETH|231|ISO 3166-2:ET
Falkland Islands (Malvinas|FK|FLK|238|ISO 3166-2:FK
Faroe Islands|FO|FRO|234|ISO 3166-2:FO
Fiji|FJ|FJI|242|ISO 3166-2:FJ
Finland|FI|FIN|246|ISO 3166-2:FI
France|FR|FRA|250|ISO 3166-2:FR
French Guiana|GF|GUF|254|ISO 3166-2:GF
French Polynesia|PF|PYF|258|ISO 3166-2:PF
French Southern Territories|TF|ATF|260|ISO 3166-2:TF
Gabon|GA|GAB|266|ISO 3166-2:GA
Gambia|GM|GMB|270|ISO 3166-2:GM
Georgia|GE|GEO|268|ISO 3166-2:GE
Germany|DE|DEU|276|ISO 3166-2:DE
Ghana|GH|GHA|288|ISO 3166-2:GH
Gibraltar|GI|GIB|292|ISO 3166-2:GI
Greece|GR|GRC|300|ISO 3166-2:GR
Greenland|GL|GRL|304|ISO 3166-2:GL
Grenada|GD|GRD|308|ISO 3166-2:GD
Guadeloupe|GP|GLP|312|ISO 3166-2:GP
Guam|GU|GUM|316|ISO 3166-2:GU
Guatemala|GT|GTM|320|ISO 3166-2:GT
Guernsey|GG|GGY|831|ISO 3166-2:GG
Guinea|GN|GIN|324|ISO 3166-2:GN
Guinea-Bissau|GW|GNB|624|ISO 3166-2:GW
Guyana|GY|GUY|328|ISO 3166-2:GY
Haiti|HT|HTI|332|ISO 3166-2:HT
Heard Island and McDonald Islands|HM|HMD|334|ISO 3166-2:HM
Holy See (Vatican City State|VA|VAT|336|ISO 3166-2:VA
Honduras|HN|HND|340|ISO 3166-2:HN
Hong Kong|HK|HKG|344|ISO 3166-2:HK
Hungary|HU|HUN|348|ISO 3166-2:HU
Iceland|IS|ISL|352|ISO 3166-2:IS
India|IN|IND|356|ISO 3166-2:IN
Indonesia|ID|IDN|360|ISO 3166-2:ID
Iran, Islamic Republic of|IR|IRN|364|ISO 3166-2:IR
Iraq|IQ|IRQ|368|ISO 3166-2:IQ
Ireland|IE|IRL|372|ISO 3166-2:IE
Isle of Man|IM|IMN|833|ISO 3166-2:IM
Israel|IL|ISR|376|ISO 3166-2:IL
Italy|IT|ITA|380|ISO 3166-2:IT
Jamaica|JM|JAM|388|ISO 3166-2:JM
Japan|JP|JPN|392|ISO 3166-2:JP
Jersey|JE|JEY|832|ISO 3166-2:JE
Jordan|JO|JOR|400|ISO 3166-2:JO
Kazakhstan|KZ|KAZ|398|ISO 3166-2:KZ
Kenya|KE|KEN|404|ISO 3166-2:KE
Kiribati|KI|KIR|296|ISO 3166-2:KI
Korea, Democratic People's Republic of|KP|PRK|408|ISO 3166-2:KP
Korea, Republic of|KR|KOR|410|ISO 3166-2:KR
Kuwait|KW|KWT|414|ISO 3166-2:KW
Kyrgyzstan|KG|KGZ|417|ISO 3166-2:KG
Lao People's Democratic Republic|LA|LAO|418|ISO 3166-2:LA
Latvia|LV|LVA|428|ISO 3166-2:LV
Lebanon|LB|LBN|422|ISO 3166-2:LB
Lesotho|LS|LSO|426|ISO 3166-2:LS
Liberia|LR|LBR|430|ISO 3166-2:LR
Libya|LY|LBY|434|ISO 3166-2:LY
Liechtenstein|LI|LIE|438|ISO 3166-2:LI
Lithuania|LT|LTU|440|ISO 3166-2:LT
Luxembourg|LU|LUX|442|ISO 3166-2:LU
Macao|MO|MAC|446|ISO 3166-2:MO
Macedonia, the former Yugoslav Republic of|MK|MKD|807|ISO 3166-2:MK
Madagascar|MG|MDG|450|ISO 3166-2:MG
Malawi|MW|MWI|454|ISO 3166-2:MW
Malaysia|MY|MYS|458|ISO 3166-2:MY
Maldives|MV|MDV|462|ISO 3166-2:MV
Mali|ML|MLI|466|ISO 3166-2:ML
Malta|MT|MLT|470|ISO 3166-2:MT
Marshall Islands|MH|MHL|584|ISO 3166-2:MH
Martinique|MQ|MTQ|474|ISO 3166-2:MQ
Mauritania|MR|MRT|478|ISO 3166-2:MR
Mauritius|MU|MUS|480|ISO 3166-2:MU
Mayotte|YT|MYT|175|ISO 3166-2:YT
Mexico|MX|MEX|484|ISO 3166-2:MX
Micronesia, Federated States of|FM|FSM|583|ISO 3166-2:FM
Moldova, Republic of|MD|MDA|498|ISO 3166-2:MD
Monaco|MC|MCO|492|ISO 3166-2:MC
Mongolia|MN|MNG|496|ISO 3166-2:MN
Montenegro|ME|MNE|499|ISO 3166-2:ME
Montserrat|MS|MSR|500|ISO 3166-2:MS
Morocco|MA|MAR|504|ISO 3166-2:MA
Mozambique|MZ|MOZ|508|ISO 3166-2:MZ
Myanmar|MM|MMR|104|ISO 3166-2:MM
Namibia|NA|NAM|516|ISO 3166-2:NA
Nauru|NR|NRU|520|ISO 3166-2:NR
Nepal|NP|NPL|524|ISO 3166-2:NP
Netherlands|NL|NLD|528|ISO 3166-2:NL
New Caledonia|NC|NCL|540|ISO 3166-2:NC
New Zealand|NZ|NZL|554|ISO 3166-2:NZ
Nicaragua|NI|NIC|558|ISO 3166-2:NI
Niger|NE|NER|562|ISO 3166-2:NE
Nigeria|NG|NGA|566|ISO 3166-2:NG
Niue|NU|NIU|570|ISO 3166-2:NU
Norfolk Island|NF|NFK|574|ISO 3166-2:NF
Northern Mariana Islands|MP|MNP|580|ISO 3166-2:MP
Norway|NO|NOR|578|ISO 3166-2:NO
Oman|OM|OMN|512|ISO 3166-2:OM
Pakistan|PK|PAK|586|ISO 3166-2:PK
Palau|PW|PLW|585|ISO 3166-2:PW
Palestinian Territory, Occupied|PS|PSE|275|ISO 3166-2:PS
Panama|PA|PAN|591|ISO 3166-2:PA
Papua New Guinea|PG|PNG|598|ISO 3166-2:PG
Paraguay|PY|PRY|600|ISO 3166-2:PY
Peru|PE|PER|604|ISO 3166-2:PE
Philippines|PH|PHL|608|ISO 3166-2:PH
Pitcairn|PN|PCN|612|ISO 3166-2:PN
Poland|PL|POL|616|ISO 3166-2:PL
Portugal|PT|PRT|620|ISO 3166-2:PT
Puerto Rico|PR|PRI|630|ISO 3166-2:PR
Qatar|QA|QAT|634|ISO 3166-2:QA
Réunion|RE|REU|638|ISO 3166-2:RE
Romania|RO|ROU|642|ISO 3166-2:RO
Russian Federation|RU|RUS|643|ISO 3166-2:RU
Rwanda|RW|RWA|646|ISO 3166-2:RW
Saint Barthélemy|BL|BLM|652|ISO 3166-2:BL
Saint Helena, Ascension and Tristan da Cunha|SH|SHN|654|ISO 3166-2:SH
Saint Kitts and Nevis|KN|KNA|659|ISO 3166-2:KN
Saint Lucia|LC|LCA|662|ISO 3166-2:LC
Saint Martin (French part|MF|MAF|663|ISO 3166-2:MF
Saint Pierre and Miquelon|PM|SPM|666|ISO 3166-2:PM
Saint Vincent and the Grenadines|VC|VCT|670|ISO 3166-2:VC
Samoa|WS|WSM|882|ISO 3166-2:WS
San Marino|SM|SMR|674|ISO 3166-2:SM
Sao Tome and Principe|ST|STP|678|ISO 3166-2:ST
Saudi Arabia|SA|SAU|682|ISO 3166-2:SA
Senegal|SN|SEN|686|ISO 3166-2:SN
Serbia|RS|SRB|688|ISO 3166-2:RS
Seychelles|SC|SYC|690|ISO 3166-2:SC
Sierra Leone|SL|SLE|694|ISO 3166-2:SL
Singapore|SG|SGP|702|ISO 3166-2:SG
Sint Maarten (Dutch part|SX|SXM|534|ISO 3166-2:SX
Slovakia|SK|SVK|703|ISO 3166-2:SK
Slovenia|SI|SVN|705|ISO 3166-2:SI
Solomon Islands|SB|SLB|090|ISO 3166-2:SB
Somalia|SO|SOM|706|ISO 3166-2:SO
South Africa|ZA|ZAF|710|ISO 3166-2:ZA
South Georgia and the South Sandwich Islands|GS|SGS|239|ISO 3166-2:GS
South Sudan|SS|SSD|728|ISO 3166-2:SS
Spain|ES|ESP|724|ISO 3166-2:ES
Sri Lanka|LK|LKA|144|ISO 3166-2:LK
Sudan|SD|SDN|729|ISO 3166-2:SD
Suriname|SR|SUR|740|ISO 3166-2:SR
Svalbard and Jan Mayen|SJ|SJM|744|ISO 3166-2:SJ
Swaziland|SZ|SWZ|748|ISO 3166-2:SZ
Sweden|SE|SWE|752|ISO 3166-2:SE
Switzerland|CH|CHE|756|ISO 3166-2:CH
Syrian Arab Republic|SY|SYR|760|ISO 3166-2:SY
Taiwan, Province of China|TW|TWN|158|ISO 3166-2:TW
Tajikistan|TJ|TJK|762|ISO 3166-2:TJ
Tanzania, United Republic of|TZ|TZA|834|ISO 3166-2:TZ
Thailand|TH|THA|764|ISO 3166-2:TH
Timor-Leste|TL|TLS|626|ISO 3166-2:TL
Togo|TG|TGO|768|ISO 3166-2:TG
Tokelau|TK|TKL|772|ISO 3166-2:TK
Tonga|TO|TON|776|ISO 3166-2:TO
Trinidad and Tobago|TT|TTO|780|ISO 3166-2:TT
Tunisia|TN|TUN|788|ISO 3166-2:TN
Turkey|TR|TUR|792|ISO 3166-2:TR
Turkmenistan|TM|TKM|795|ISO 3166-2:TM
Turks and Caicos Islands|TC|TCA|796|ISO 3166-2:TC
Tuvalu|TV|TUV|798|ISO 3166-2:TV
Uganda|UG|UGA|800|ISO 3166-2:UG
Ukraine|UA|UKR|804|ISO 3166-2:UA
United Arab Emirates|AE|ARE|784|ISO 3166-2:AE
United Kingdom|GB|GBR|826|ISO 3166-2:GB
United States|US|USA|840|ISO 3166-2:US
United States Minor Outlying Islands|UM|UMI|581|ISO 3166-2:UM
Uruguay|UY|URY|858|ISO 3166-2:UY
Uzbekistan|UZ|UZB|860|ISO 3166-2:UZ
Vanuatu|VU|VUT|548|ISO 3166-2:VU
Venezuela, Bolivarian Republic of|VE|VEN|862|ISO 3166-2:VE
Viet Nam|VN|VNM|704|ISO 3166-2:VN
Virgin Islands, British|VG|VGB|092|ISO 3166-2:VG
Virgin Islands, U.S|VI|VIR|850|ISO 3166-2:VI
Wallis and Futuna|WF|WLF|876|ISO 3166-2:WF
Western Sahara|EH|ESH|732|ISO 3166-2:EH
Yemen|YE|YEM|887|ISO 3166-2:YE
Zambia|ZM|ZMB|894|ISO 3166-2:ZM
Zimbabwe|ZW|ZWE|716|ISO 3166-2:ZW

View file

@ -1,485 +0,0 @@
aar||aa|Afar|afar
abk||ab|Abkhazian|abkhaze
ace|||Achinese|aceh
ach|||Acoli|acoli
ada|||Adangme|adangme
ady|||Adyghe; Adygei|adyghé
afa|||Afro-Asiatic languages|afro-asiatiques, langues
afh|||Afrihili|afrihili
afr||af|Afrikaans|afrikaans
ain|||Ainu|aïnou
aka||ak|Akan|akan
akk|||Akkadian|akkadien
alb|sqi|sq|Albanian|albanais
ale|||Aleut|aléoute
alg|||Algonquian languages|algonquines, langues
alt|||Southern Altai|altai du Sud
amh||am|Amharic|amharique
ang|||English, Old (ca.450-1100)|anglo-saxon (ca.450-1100)
anp|||Angika|angika
apa|||Apache languages|apaches, langues
ara||ar|Arabic|arabe
arc|||Official Aramaic (700-300 BCE); Imperial Aramaic (700-300 BCE)|araméen d'empire (700-300 BCE)
arg||an|Aragonese|aragonais
arm|hye|hy|Armenian|arménien
arn|||Mapudungun; Mapuche|mapudungun; mapuche; mapuce
arp|||Arapaho|arapaho
art|||Artificial languages|artificielles, langues
arw|||Arawak|arawak
asm||as|Assamese|assamais
ast|||Asturian; Bable; Leonese; Asturleonese|asturien; bable; léonais; asturoléonais
ath|||Athapascan languages|athapascanes, langues
aus|||Australian languages|australiennes, langues
ava||av|Avaric|avar
ave||ae|Avestan|avestique
awa|||Awadhi|awadhi
aym||ay|Aymara|aymara
aze||az|Azerbaijani|azéri
bad|||Banda languages|banda, langues
bai|||Bamileke languages|bamiléké, langues
bak||ba|Bashkir|bachkir
bal|||Baluchi|baloutchi
bam||bm|Bambara|bambara
ban|||Balinese|balinais
baq|eus|eu|Basque|basque
bas|||Basa|basa
bat|||Baltic languages|baltes, langues
bej|||Beja; Bedawiyet|bedja
bel||be|Belarusian|biélorusse
bem|||Bemba|bemba
ben||bn|Bengali|bengali
ber|||Berber languages|berbères, langues
bho|||Bhojpuri|bhojpuri
bih||bh|Bihari languages|langues biharis
bik|||Bikol|bikol
bin|||Bini; Edo|bini; edo
bis||bi|Bislama|bichlamar
bla|||Siksika|blackfoot
bnt|||Bantu (Other)|bantoues, autres langues
bos||bs|Bosnian|bosniaque
bra|||Braj|braj
bre||br|Breton|breton
btk|||Batak languages|batak, langues
bua|||Buriat|bouriate
bug|||Buginese|bugi
bul||bg|Bulgarian|bulgare
bur|mya|my|Burmese|birman
byn|||Blin; Bilin|blin; bilen
cad|||Caddo|caddo
cai|||Central American Indian languages|amérindiennes de L'Amérique centrale, langues
car|||Galibi Carib|karib; galibi; carib
cat||ca|Catalan; Valencian|catalan; valencien
cau|||Caucasian languages|caucasiennes, langues
ceb|||Cebuano|cebuano
cel|||Celtic languages|celtiques, langues; celtes, langues
cha||ch|Chamorro|chamorro
chb|||Chibcha|chibcha
che||ce|Chechen|tchétchène
chg|||Chagatai|djaghataï
chi|zho|zh|Chinese|chinois
chk|||Chuukese|chuuk
chm|||Mari|mari
chn|||Chinook jargon|chinook, jargon
cho|||Choctaw|choctaw
chp|||Chipewyan; Dene Suline|chipewyan
chr|||Cherokee|cherokee
chu||cu|Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic|slavon d'église; vieux slave; slavon liturgique; vieux bulgare
chv||cv|Chuvash|tchouvache
chy|||Cheyenne|cheyenne
cmc|||Chamic languages|chames, langues
cop|||Coptic|copte
cor||kw|Cornish|cornique
cos||co|Corsican|corse
cpe|||Creoles and pidgins, English based|créoles et pidgins basés sur l'anglais
cpf|||Creoles and pidgins, French-based |créoles et pidgins basés sur le français
cpp|||Creoles and pidgins, Portuguese-based |créoles et pidgins basés sur le portugais
cre||cr|Cree|cree
crh|||Crimean Tatar; Crimean Turkish|tatar de Crimé
crp|||Creoles and pidgins |créoles et pidgins
csb|||Kashubian|kachoube
cus|||Cushitic languages|couchitiques, langues
cze|ces|cs|Czech|tchèque
dak|||Dakota|dakota
dan||da|Danish|danois
dar|||Dargwa|dargwa
day|||Land Dayak languages|dayak, langues
del|||Delaware|delaware
den|||Slave (Athapascan)|esclave (athapascan)
dgr|||Dogrib|dogrib
din|||Dinka|dinka
div||dv|Divehi; Dhivehi; Maldivian|maldivien
doi|||Dogri|dogri
dra|||Dravidian languages|dravidiennes, langues
dsb|||Lower Sorbian|bas-sorabe
dua|||Duala|douala
dum|||Dutch, Middle (ca.1050-1350)|néerlandais moyen (ca. 1050-1350)
dut|nld|nl|Dutch; Flemish|néerlandais; flamand
dyu|||Dyula|dioula
dzo||dz|Dzongkha|dzongkha
efi|||Efik|efik
egy|||Egyptian (Ancient)|égyptien
eka|||Ekajuk|ekajuk
elx|||Elamite|élamite
eng||en|English|anglais
enm|||English, Middle (1100-1500)|anglais moyen (1100-1500)
epo||eo|Esperanto|espéranto
est||et|Estonian|estonien
ewe||ee|Ewe|éwé
ewo|||Ewondo|éwondo
fan|||Fang|fang
fao||fo|Faroese|féroïen
fat|||Fanti|fanti
fij||fj|Fijian|fidjien
fil|||Filipino; Pilipino|filipino; pilipino
fin||fi|Finnish|finnois
fiu|||Finno-Ugrian languages|finno-ougriennes, langues
fon|||Fon|fon
fre|fra|fr|French|français
frm|||French, Middle (ca.1400-1600)|français moyen (1400-1600)
fro|||French, Old (842-ca.1400)|français ancien (842-ca.1400)
frr|||Northern Frisian|frison septentrional
frs|||Eastern Frisian|frison oriental
fry||fy|Western Frisian|frison occidental
ful||ff|Fulah|peul
fur|||Friulian|frioulan
gaa|||Ga|ga
gay|||Gayo|gayo
gba|||Gbaya|gbaya
gem|||Germanic languages|germaniques, langues
geo|kat|ka|Georgian|géorgien
ger|deu|de|German|allemand
gez|||Geez|guèze
gil|||Gilbertese|kiribati
gla||gd|Gaelic; Scottish Gaelic|gaélique; gaélique écossais
gle||ga|Irish|irlandais
glg||gl|Galician|galicien
glv||gv|Manx|manx; mannois
gmh|||German, Middle High (ca.1050-1500)|allemand, moyen haut (ca. 1050-1500)
goh|||German, Old High (ca.750-1050)|allemand, vieux haut (ca. 750-1050)
gon|||Gondi|gond
gor|||Gorontalo|gorontalo
got|||Gothic|gothique
grb|||Grebo|grebo
grc|||Greek, Ancient (to 1453)|grec ancien (jusqu'à 1453)
gre|ell|el|Greek, Modern (1453-)|grec moderne (après 1453)
grn||gn|Guarani|guarani
gsw|||Swiss German; Alemannic; Alsatian|suisse alémanique; alémanique; alsacien
guj||gu|Gujarati|goudjrati
gwi|||Gwich'in|gwich'in
hai|||Haida|haida
hat||ht|Haitian; Haitian Creole|haïtien; créole haïtien
hau||ha|Hausa|haoussa
haw|||Hawaiian|hawaïen
heb||he|Hebrew|hébreu
her||hz|Herero|herero
hil|||Hiligaynon|hiligaynon
him|||Himachali languages; Western Pahari languages|langues himachalis; langues paharis occidentales
hin||hi|Hindi|hindi
hit|||Hittite|hittite
hmn|||Hmong; Mong|hmong
hmo||ho|Hiri Motu|hiri motu
hrv||hr|Croatian|croate
hsb|||Upper Sorbian|haut-sorabe
hun||hu|Hungarian|hongrois
hup|||Hupa|hupa
iba|||Iban|iban
ibo||ig|Igbo|igbo
ice|isl|is|Icelandic|islandais
ido||io|Ido|ido
iii||ii|Sichuan Yi; Nuosu|yi de Sichuan
ijo|||Ijo languages|ijo, langues
iku||iu|Inuktitut|inuktitut
ile||ie|Interlingue; Occidental|interlingue
ilo|||Iloko|ilocano
ina||ia|Interlingua (International Auxiliary Language Association)|interlingua (langue auxiliaire internationale)
inc|||Indic languages|indo-aryennes, langues
ind||id|Indonesian|indonésien
ine|||Indo-European languages|indo-européennes, langues
inh|||Ingush|ingouche
ipk||ik|Inupiaq|inupiaq
ira|||Iranian languages|iraniennes, langues
iro|||Iroquoian languages|iroquoises, langues
ita||it|Italian|italien
jav||jv|Javanese|javanais
jbo|||Lojban|lojban
jpn||ja|Japanese|japonais
jpr|||Judeo-Persian|judéo-persan
jrb|||Judeo-Arabic|judéo-arabe
kaa|||Kara-Kalpak|karakalpak
kab|||Kabyle|kabyle
kac|||Kachin; Jingpho|kachin; jingpho
kal||kl|Kalaallisut; Greenlandic|groenlandais
kam|||Kamba|kamba
kan||kn|Kannada|kannada
kar|||Karen languages|karen, langues
kas||ks|Kashmiri|kashmiri
kau||kr|Kanuri|kanouri
kaw|||Kawi|kawi
kaz||kk|Kazakh|kazakh
kbd|||Kabardian|kabardien
kha|||Khasi|khasi
khi|||Khoisan languages|khoïsan, langues
khm||km|Central Khmer|khmer central
kho|||Khotanese; Sakan|khotanais; sakan
kik||ki|Kikuyu; Gikuyu|kikuyu
kin||rw|Kinyarwanda|rwanda
kir||ky|Kirghiz; Kyrgyz|kirghiz
kmb|||Kimbundu|kimbundu
kok|||Konkani|konkani
kom||kv|Komi|kom
kon||kg|Kongo|kongo
kor||ko|Korean|coréen
kos|||Kosraean|kosrae
kpe|||Kpelle|kpellé
krc|||Karachay-Balkar|karatchai balkar
krl|||Karelian|carélien
kro|||Kru languages|krou, langues
kru|||Kurukh|kurukh
kua||kj|Kuanyama; Kwanyama|kuanyama; kwanyama
kum|||Kumyk|koumyk
kur||ku|Kurdish|kurde
kut|||Kutenai|kutenai
lad|||Ladino|judéo-espagnol
lah|||Lahnda|lahnda
lam|||Lamba|lamba
lao||lo|Lao|lao
lat||la|Latin|latin
lav||lv|Latvian|letton
lez|||Lezghian|lezghien
lim||li|Limburgan; Limburger; Limburgish|limbourgeois
lin||ln|Lingala|lingala
lit||lt|Lithuanian|lituanien
lol|||Mongo|mongo
loz|||Lozi|lozi
ltz||lb|Luxembourgish; Letzeburgesch|luxembourgeois
lua|||Luba-Lulua|luba-lulua
lub||lu|Luba-Katanga|luba-katanga
lug||lg|Ganda|ganda
lui|||Luiseno|luiseno
lun|||Lunda|lunda
luo|||Luo (Kenya and Tanzania)|luo (Kenya et Tanzanie)
lus|||Lushai|lushai
mac|mkd|mk|Macedonian|macédonien
mad|||Madurese|madourais
mag|||Magahi|magahi
mah||mh|Marshallese|marshall
mai|||Maithili|maithili
mak|||Makasar|makassar
mal||ml|Malayalam|malayalam
man|||Mandingo|mandingue
mao|mri|mi|Maori|maori
map|||Austronesian languages|austronésiennes, langues
mar||mr|Marathi|marathe
mas|||Masai|massaï
may|msa|ms|Malay|malais
mdf|||Moksha|moksa
mdr|||Mandar|mandar
men|||Mende|mendé
mga|||Irish, Middle (900-1200)|irlandais moyen (900-1200)
mic|||Mi'kmaq; Micmac|mi'kmaq; micmac
min|||Minangkabau|minangkabau
mis|||Uncoded languages|langues non codées
mkh|||Mon-Khmer languages|môn-khmer, langues
mlg||mg|Malagasy|malgache
mlt||mt|Maltese|maltais
mnc|||Manchu|mandchou
mni|||Manipuri|manipuri
mno|||Manobo languages|manobo, langues
moh|||Mohawk|mohawk
mon||mn|Mongolian|mongol
mos|||Mossi|moré
mul|||Multiple languages|multilingue
mun|||Munda languages|mounda, langues
mus|||Creek|muskogee
mwl|||Mirandese|mirandais
mwr|||Marwari|marvari
myn|||Mayan languages|maya, langues
myv|||Erzya|erza
nah|||Nahuatl languages|nahuatl, langues
nai|||North American Indian languages|nord-amérindiennes, langues
nap|||Neapolitan|napolitain
nau||na|Nauru|nauruan
nav||nv|Navajo; Navaho|navaho
nbl||nr|Ndebele, South; South Ndebele|ndébélé du Sud
nde||nd|Ndebele, North; North Ndebele|ndébélé du Nord
ndo||ng|Ndonga|ndonga
nds|||Low German; Low Saxon; German, Low; Saxon, Low|bas allemand; bas saxon; allemand, bas; saxon, bas
nep||ne|Nepali|népalais
new|||Nepal Bhasa; Newari|nepal bhasa; newari
nia|||Nias|nias
nic|||Niger-Kordofanian languages|nigéro-kordofaniennes, langues
niu|||Niuean|niué
nno||nn|Norwegian Nynorsk; Nynorsk, Norwegian|norvégien nynorsk; nynorsk, norvégien
nob||nb|Bokmål, Norwegian; Norwegian Bokmål|norvégien bokmål
nog|||Nogai|nogaï; nogay
non|||Norse, Old|norrois, vieux
nor||no|Norwegian|norvégien
nqo|||N'Ko|n'ko
nso|||Pedi; Sepedi; Northern Sotho|pedi; sepedi; sotho du Nord
nub|||Nubian languages|nubiennes, langues
nwc|||Classical Newari; Old Newari; Classical Nepal Bhasa|newari classique
nya||ny|Chichewa; Chewa; Nyanja|chichewa; chewa; nyanja
nym|||Nyamwezi|nyamwezi
nyn|||Nyankole|nyankolé
nyo|||Nyoro|nyoro
nzi|||Nzima|nzema
oci||oc|Occitan (post 1500); Provençal|occitan (après 1500); provençal
oji||oj|Ojibwa|ojibwa
ori||or|Oriya|oriya
orm||om|Oromo|galla
osa|||Osage|osage
oss||os|Ossetian; Ossetic|ossète
ota|||Turkish, Ottoman (1500-1928)|turc ottoman (1500-1928)
oto|||Otomian languages|otomi, langues
paa|||Papuan languages|papoues, langues
pag|||Pangasinan|pangasinan
pal|||Pahlavi|pahlavi
pam|||Pampanga; Kapampangan|pampangan
pan||pa|Panjabi; Punjabi|pendjabi
pap|||Papiamento|papiamento
pau|||Palauan|palau
peo|||Persian, Old (ca.600-400 B.C.)|perse, vieux (ca. 600-400 av. J.-C.)
per|fas|fa|Persian|persan
phi|||Philippine languages|philippines, langues
phn|||Phoenician|phénicien
pli||pi|Pali|pali
pol||pl|Polish|polonais
pon|||Pohnpeian|pohnpei
por||pt|Portuguese|portugais
pra|||Prakrit languages|prâkrit, langues
pro|||Provençal, Old (to 1500)|provençal ancien (jusqu'à 1500)
pus||ps|Pushto; Pashto|pachto
qaa-qtz|||Reserved for local use|réservée à l'usage local
que||qu|Quechua|quechua
raj|||Rajasthani|rajasthani
rap|||Rapanui|rapanui
rar|||Rarotongan; Cook Islands Maori|rarotonga; maori des îles Cook
roa|||Romance languages|romanes, langues
roh||rm|Romansh|romanche
rom|||Romany|tsigane
rum|ron|ro|Romanian; Moldavian; Moldovan|roumain; moldave
run||rn|Rundi|rundi
rup|||Aromanian; Arumanian; Macedo-Romanian|aroumain; macédo-roumain
rus||ru|Russian|russe
sad|||Sandawe|sandawe
sag||sg|Sango|sango
sah|||Yakut|iakoute
sai|||South American Indian (Other)|indiennes d'Amérique du Sud, autres langues
sal|||Salishan languages|salishennes, langues
sam|||Samaritan Aramaic|samaritain
san||sa|Sanskrit|sanskrit
sas|||Sasak|sasak
sat|||Santali|santal
scn|||Sicilian|sicilien
sco|||Scots|écossais
sel|||Selkup|selkoupe
sem|||Semitic languages|sémitiques, langues
sga|||Irish, Old (to 900)|irlandais ancien (jusqu'à 900)
sgn|||Sign Languages|langues des signes
shn|||Shan|chan
sid|||Sidamo|sidamo
sin||si|Sinhala; Sinhalese|singhalais
sio|||Siouan languages|sioux, langues
sit|||Sino-Tibetan languages|sino-tibétaines, langues
sla|||Slavic languages|slaves, langues
slo|slk|sk|Slovak|slovaque
slv||sl|Slovenian|slovène
sma|||Southern Sami|sami du Sud
sme||se|Northern Sami|sami du Nord
smi|||Sami languages|sames, langues
smj|||Lule Sami|sami de Lule
smn|||Inari Sami|sami d'Inari
smo||sm|Samoan|samoan
sms|||Skolt Sami|sami skolt
sna||sn|Shona|shona
snd||sd|Sindhi|sindhi
snk|||Soninke|soninké
sog|||Sogdian|sogdien
som||so|Somali|somali
son|||Songhai languages|songhai, langues
sot||st|Sotho, Southern|sotho du Sud
spa||es|Spanish; Castilian|espagnol; castillan
srd||sc|Sardinian|sarde
srn|||Sranan Tongo|sranan tongo
srp||sr|Serbian|serbe
srr|||Serer|sérère
ssa|||Nilo-Saharan languages|nilo-sahariennes, langues
ssw||ss|Swati|swati
suk|||Sukuma|sukuma
sun||su|Sundanese|soundanais
sus|||Susu|soussou
sux|||Sumerian|sumérien
swa||sw|Swahili|swahili
swe||sv|Swedish|suédois
syc|||Classical Syriac|syriaque classique
syr|||Syriac|syriaque
tah||ty|Tahitian|tahitien
tai|||Tai languages|tai, langues
tam||ta|Tamil|tamoul
tat||tt|Tatar|tatar
tel||te|Telugu|télougou
tem|||Timne|temne
ter|||Tereno|tereno
tet|||Tetum|tetum
tgk||tg|Tajik|tadjik
tgl||tl|Tagalog|tagalog
tha||th|Thai|thaï
tib|bod|bo|Tibetan|tibétain
tig|||Tigre|tigré
tir||ti|Tigrinya|tigrigna
tiv|||Tiv|tiv
tkl|||Tokelau|tokelau
tlh|||Klingon; tlhIngan-Hol|klingon
tli|||Tlingit|tlingit
tmh|||Tamashek|tamacheq
tog|||Tonga (Nyasa)|tonga (Nyasa)
ton||to|Tonga (Tonga Islands)|tongan (Îles Tonga)
tpi|||Tok Pisin|tok pisin
tsi|||Tsimshian|tsimshian
tsn||tn|Tswana|tswana
tso||ts|Tsonga|tsonga
tuk||tk|Turkmen|turkmène
tum|||Tumbuka|tumbuka
tup|||Tupi languages|tupi, langues
tur||tr|Turkish|turc
tut|||Altaic languages|altaïques, langues
tvl|||Tuvalu|tuvalu
twi||tw|Twi|twi
tyv|||Tuvinian|touva
udm|||Udmurt|oudmourte
uga|||Ugaritic|ougaritique
uig||ug|Uighur; Uyghur|ouïgour
ukr||uk|Ukrainian|ukrainien
umb|||Umbundu|umbundu
und|||Undetermined|indéterminée
urd||ur|Urdu|ourdou
uzb||uz|Uzbek|ouszbek
vai|||Vai|vaï
ven||ve|Venda|venda
vie||vi|Vietnamese|vietnamien
vol||vo|Volapük|volapük
vot|||Votic|vote
wak|||Wakashan languages|wakashanes, langues
wal|||Walamo|walamo
war|||Waray|waray
was|||Washo|washo
wel|cym|cy|Welsh|gallois
wen|||Sorbian languages|sorabes, langues
wln||wa|Walloon|wallon
wol||wo|Wolof|wolof
xal|||Kalmyk; Oirat|kalmouk; oïrat
xho||xh|Xhosa|xhosa
yao|||Yao|yao
yap|||Yapese|yapois
yid||yi|Yiddish|yiddish
yor||yo|Yoruba|yoruba
ypk|||Yupik languages|yupik, langues
zap|||Zapotec|zapotèque
zbl|||Blissymbols; Blissymbolics; Bliss|symboles Bliss; Bliss
zen|||Zenaga|zenaga
zha||za|Zhuang; Chuang|zhuang; chuang
znd|||Zande languages|zandé, langues
zul||zu|Zulu|zoulou
zun|||Zuni|zuni
zxx|||No linguistic content; Not applicable|pas de contenu linguistique; non applicable
zza|||Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki|zaza; dimili; dimli; kirdki; kirmanjki; zazaki

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,70 +18,86 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
import pkg_resources
from .__version__ import __version__
__version__ = '0.6.2'
__all__ = ['Guess', 'Language',
'guess_file_info', 'guess_video_info',
'guess_movie_info', 'guess_episode_info']
'guess_movie_info', 'guess_episode_info',
'default_options']
# Do python3 detection before importing any other module, to be sure that
# it will then always be available
# with code from http://lucumr.pocoo.org/2011/1/22/forwards-compatible-python/
import sys
if sys.version_info[0] >= 3:
PY3 = True
if sys.version_info[0] >= 3: # pragma: no cover
PY2, PY3 = False, True
unicode_text_type = str
native_text_type = str
base_text_type = str
def u(x):
return str(x)
def s(x):
return x
class UnicodeMixin(object):
__str__ = lambda x: x.__unicode__()
import binascii
def to_hex(x):
return binascii.hexlify(x).decode('utf-8')
else:
PY3 = False
__all__ = [ str(s) for s in __all__ ] # fix imports for python2
else: # pragma: no cover
PY2, PY3 = True, False
__all__ = [str(s) for s in __all__] # fix imports for python2
unicode_text_type = unicode
native_text_type = str
base_text_type = basestring
def u(x):
if isinstance(x, str):
return x.decode('utf-8')
if isinstance(x, list):
return [u(s) for s in x]
return unicode(x)
def s(x):
if isinstance(x, unicode):
return x.encode('utf-8')
if isinstance(x, list):
return [ s(y) for y in x ]
return [s(y) for y in x]
if isinstance(x, tuple):
return tuple(s(y) for y in x)
if isinstance(x, dict):
return dict((s(key), s(value)) for key, value in x.items())
return x
class UnicodeMixin(object):
__str__ = lambda x: unicode(x).encode('utf-8')
def to_hex(x):
return x.encode('hex')
range = xrange
from guessit.guess import Guess, merge_all
from guessit.guess import Guess, smart_merge
from guessit.language import Language
from guessit.matcher import IterativeMatcher
from guessit.textutils import clean_string
from guessit.textutils import clean_default, is_camel, from_camel
import babelfish
import os.path
import logging
import json
from copy import deepcopy
log = logging.getLogger(__name__)
class NullHandler(logging.Handler):
def emit(self, record):
pass
@ -91,137 +107,193 @@ h = NullHandler()
log.addHandler(h)
def _guess_filename(filename, filetype):
def find_nodes(tree, props):
"""Yields all nodes containing any of the given props."""
if isinstance(props, base_text_type):
props = [props]
for node in tree.nodes():
if any(prop in node.guess for prop in props):
yield node
def _guess_filename(filename, options=None, **kwargs):
mtree = _build_filename_mtree(filename, options=options, **kwargs)
if options.get('split_camel'):
_add_camel_properties(mtree, options=options)
return mtree.matched()
def warning(title):
log.warning('%s, guesses: %s - %s' % (title, m.nice_string(), m2.nice_string()))
return m
mtree = IterativeMatcher(filename, filetype=filetype)
def _build_filename_mtree(filename, options=None, **kwargs):
mtree = IterativeMatcher(filename, options=options, **kwargs)
second_pass_options = mtree.second_pass_options
if second_pass_options:
log.debug("Running 2nd pass")
merged_options = dict(options)
merged_options.update(second_pass_options)
mtree = IterativeMatcher(filename, options=merged_options, **kwargs)
return mtree
m = mtree.matched()
second_pass_opts = []
second_pass_transfo_opts = {}
def _add_camel_properties(mtree, options=None, **kwargs):
prop = 'title' if mtree.matched().get('type') != 'episode' else 'series'
value = mtree.matched().get(prop)
_guess_camel_string(mtree, value, options=options, skip_title=False, **kwargs)
# if there are multiple possible years found, we assume the first one is
# part of the title, reparse the tree taking this into account
years = set(n.value for n in find_nodes(mtree.match_tree, 'year'))
if len(years) >= 2:
second_pass_opts.append('skip_first_year')
for leaf in mtree.match_tree.unidentified_leaves():
value = leaf.value
_guess_camel_string(mtree, value, options=options, skip_title=True, **kwargs)
to_skip_language_nodes = []
title_nodes = set(n for n in find_nodes(mtree.match_tree, ['title', 'series']))
title_spans = {}
for title_node in title_nodes:
title_spans[title_node.span[0]] = title_node
title_spans[title_node.span[1]] = title_node
def _guess_camel_string(mtree, string, options=None, skip_title=False, **kwargs):
if string and is_camel(string):
log.debug('"%s" is camel cased. Try to detect more properties.' % (string,))
uncameled_value = from_camel(string)
merged_options = dict(options)
if 'type' in mtree.match_tree.info:
current_type = mtree.match_tree.info.get('type')
if current_type and current_type != 'unknown':
merged_options['type'] = current_type
camel_tree = _build_filename_mtree(uncameled_value, options=merged_options, name_only=True, skip_title=skip_title, **kwargs)
if len(camel_tree.matched()) > 0:
mtree.matched().update(camel_tree.matched())
return True
return False
for lang_key in ('language', 'subtitleLanguage'):
langs = {}
lang_nodes = set(n for n in find_nodes(mtree.match_tree, lang_key))
for lang_node in lang_nodes:
lang = lang_node.guess.get(lang_key, None)
if len(lang_node.value) > 3 and (lang_node.span[0] in title_spans.keys() or lang_node.span[1] in title_spans.keys()):
# Language is next or before title, and is not a language code. Add to skip for 2nd pass.
def guess_video_metadata(filename):
"""Gets the video metadata properties out of a given file. The file needs to
exist on the filesystem to be able to be analyzed. An empty guess is
returned otherwise.
# if filetype is subtitle and the language appears last, just before
# the extension, then it is likely a subtitle language
parts = clean_string(lang_node.root.value).split()
if m['type'] in ['moviesubtitle', 'episodesubtitle'] and (parts.index(lang_node.value) == len(parts) - 2):
continue
You need to have the Enzyme python package installed for this to work."""
result = Guess()
to_skip_language_nodes.append(lang_node)
elif not lang in langs:
langs[lang] = lang_node
def found(prop, value):
result[prop] = value
log.debug('Found with enzyme %s: %s' % (prop, value))
# first get the size of the file, in bytes
try:
size = os.stat(filename).st_size
found('fileSize', size)
except Exception as e:
log.error('Cannot get video file size: %s' % e)
# file probably does not exist, we might as well return now
return result
# then get additional metadata from the file using enzyme, if available
try:
import enzyme
with open(filename) as f:
mkv = enzyme.MKV(f)
found('duration', mkv.info.duration.total_seconds())
if mkv.video_tracks:
video_track = mkv.video_tracks[0]
# resolution
if video_track.height in (480, 720, 1080):
if video_track.interlaced:
found('screenSize', '%di' % video_track.height)
else:
# The same language was found. Keep the more confident one, and add others to skip for 2nd pass.
existing_lang_node = langs[lang]
to_skip = None
if existing_lang_node.guess.confidence('language') >= lang_node.guess.confidence('language'):
# lang_node is to remove
to_skip = lang_node
found('screenSize', '%dp' % video_track.height)
else:
# existing_lang_node is to remove
langs[lang] = lang_node
to_skip = existing_lang_node
to_skip_language_nodes.append(to_skip)
# TODO: do we want this?
#found('screenSize', '%dx%d' % (video_track.width, video_track.height))
pass
# video codec
if video_track.codec_id == 'V_MPEG4/ISO/AVC':
found('videoCodec', 'h264')
elif video_track.codec_id == 'V_MPEG4/ISO/SP':
found('videoCodec', 'DivX')
elif video_track.codec_id == 'V_MPEG4/ISO/ASP':
found('videoCodec', 'XviD')
else:
log.warning('MKV has no video track')
if mkv.audio_tracks:
audio_track = mkv.audio_tracks[0]
# audio codec
if audio_track.codec_id == 'A_AC3':
found('audioCodec', 'AC3')
elif audio_track.codec_id == 'A_DTS':
found('audioCodec', 'DTS')
elif audio_track.codec_id == 'A_AAC':
found('audioCodec', 'AAC')
else:
log.warning('MKV has no audio track')
if mkv.subtitle_tracks:
embedded_subtitle_languages = set()
for st in mkv.subtitle_tracks:
try:
if st.language:
lang = babelfish.Language.fromalpha3b(st.language)
elif st.name:
lang = babelfish.Language.fromname(st.name)
else:
lang = babelfish.Language('und')
except babelfish.Error:
lang = babelfish.Language('und')
embedded_subtitle_languages.add(lang)
found('subtitleLanguage', embedded_subtitle_languages)
else:
log.debug('MKV has no subtitle track')
return result
except ImportError:
log.error('Cannot get video file metadata, missing dependency: enzyme')
log.error('Please install it from PyPI, by doing eg: pip install enzyme')
return result
except IOError as e:
log.error('Could not open file: %s' % filename)
log.error('Make sure it exists and is available for reading on the filesystem')
log.error('Error: %s' % e)
return result
except enzyme.Error as e:
log.error('Cannot guess video file metadata')
log.error('enzyme.Error while reading file: %s' % filename)
log.error('Error: %s' % e)
return result
default_options = {}
if to_skip_language_nodes:
second_pass_transfo_opts['guess_language'] = (
((), { 'skip': [ { 'node_idx': node.parent.node_idx,
'span': node.span }
for node in to_skip_language_nodes ] }))
if second_pass_opts or second_pass_transfo_opts:
# 2nd pass is needed
log.info("Running 2nd pass with options: %s" % second_pass_opts)
log.info("Transfo options: %s" % second_pass_transfo_opts)
mtree = IterativeMatcher(filename, filetype=filetype,
opts=second_pass_opts,
transfo_opts=second_pass_transfo_opts)
m = mtree.matched()
if 'language' not in m and 'subtitleLanguage' not in m or 'title' not in m:
return m
# if we found some language, make sure we didn't cut a title or sth...
mtree2 = IterativeMatcher(filename, filetype=filetype,
opts=['nolanguage', 'nocountry'])
m2 = mtree2.matched()
if m.get('title') != m2.get('title'):
title = next(find_nodes(mtree.match_tree, 'title'))
title2 = next(find_nodes(mtree2.match_tree, 'title'))
# if a node is in an explicit group, then the correct title is probably
# the other one
if title.root.node_at(title.node_idx[:2]).is_explicit():
return m2
elif title2.root.node_at(title2.node_idx[:2]).is_explicit():
return m
return m
def guess_file_info(filename, filetype='autodetect', info=None):
def guess_file_info(filename, info=None, options=None, **kwargs):
"""info can contain the names of the various plugins, such as 'filename' to
detect filename info, or 'hash_md5' to get the md5 hash of the file.
>>> guess_file_info('tests/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1'])
{'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'}
>>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
>>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1'])
>>> g['hash_md5'], g['hash_sha1']
('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
"""
info = info or 'filename'
options = options or {}
if default_options:
merged_options = deepcopy(default_options)
merged_options.update(options)
options = merged_options
result = []
hashers = []
# Force unicode as soon as possible
filename = u(filename)
if info is None:
info = ['filename']
if isinstance(info, base_text_type):
info = [info]
for infotype in info:
if infotype == 'filename':
result.append(_guess_filename(filename, filetype))
result.append(_guess_filename(filename, options, **kwargs))
elif infotype == 'hash_mpc':
from guessit.hash_mpc import hash_file
try:
result.append(Guess({'hash_mpc': hash_file(filename)},
result.append(Guess({infotype: hash_file(filename)},
confidence=1.0))
except Exception as e:
log.warning('Could not compute MPC-style hash because: %s' % e)
@ -229,7 +301,7 @@ def guess_file_info(filename, filetype='autodetect', info=None):
elif infotype == 'hash_ed2k':
from guessit.hash_ed2k import hash_file
try:
result.append(Guess({'hash_ed2k': hash_file(filename)},
result.append(Guess({infotype: hash_file(filename)},
confidence=1.0))
except Exception as e:
log.warning('Could not compute ed2k hash because: %s' % e)
@ -243,6 +315,11 @@ def guess_file_info(filename, filetype='autodetect', info=None):
except AttributeError:
log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname)
elif infotype == 'video':
g = guess_video_metadata(filename)
if g:
result.append(g)
else:
log.warning('Invalid infotype: %s' % infotype)
@ -265,25 +342,18 @@ def guess_file_info(filename, filetype='autodetect', info=None):
except Exception as e:
log.warning('Could not compute hash because: %s' % e)
result = merge_all(result)
# last minute adjustments
# if country is in the guessed properties, make it part of the filename
if 'series' in result and 'country' in result:
result['series'] += ' (%s)' % result['country'].alpha2.upper()
result = smart_merge(result)
return result
def guess_video_info(filename, info=None):
return guess_file_info(filename, 'autodetect', info)
def guess_video_info(filename, info=None, options=None, **kwargs):
return guess_file_info(filename, info=info, options=options, type='video', **kwargs)
def guess_movie_info(filename, info=None):
return guess_file_info(filename, 'movie', info)
def guess_movie_info(filename, info=None, options=None, **kwargs):
return guess_file_info(filename, info=info, options=options, type='movie', **kwargs)
def guess_episode_info(filename, info=None):
return guess_file_info(filename, 'episode', info)
def guess_episode_info(filename, info=None, options=None, **kwargs):
return guess_file_info(filename, info=info, options=options, type='episode', **kwargs)

View file

@ -2,7 +2,8 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,29 +19,120 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from __future__ import print_function
from guessit import u
from guessit import slogging, guess_file_info
from optparse import OptionParser
from __future__ import absolute_import, division, print_function, unicode_literals
from collections import defaultdict
import logging
import sys
import os
import locale
from guessit import PY2, u, guess_file_info, __version__
from guessit.options import get_opts
from guessit.__version__ import __version__
def detect_filename(filename, filetype, info=['filename'], advanced = False):
def guess_file(filename, info='filename', options=None, **kwargs):
options = options or {}
filename = u(filename)
if not options.get('yaml') and not options.get('show_property'):
print('For:', filename)
print('GuessIt found:', guess_file_info(filename, filetype, info).nice_string(advanced))
guess = guess_file_info(filename, info, options, **kwargs)
if not options.get('unidentified'):
try:
del guess['unidentified']
except KeyError:
pass
if options.get('show_property'):
print(guess.get(options.get('show_property'), ''))
return
if options.get('yaml'):
import yaml
for k, v in guess.items():
if isinstance(v, list) and len(v) == 1:
guess[k] = v[0]
ystr = yaml.safe_dump({filename: dict(guess)}, default_flow_style=False)
i = 0
for yline in ystr.splitlines():
if i == 0:
print("? " + yline[:-1])
elif i == 1:
print(":" + yline[1:])
else:
print(yline)
i += 1
return
print('GuessIt found:', guess.nice_string(options.get('advanced')))
def run_demo(episodes=True, movies=True, advanced=False):
def _supported_properties():
all_properties = defaultdict(list)
transformers_properties = []
from guessit.plugins import transformers
for transformer in transformers.all_transformers():
supported_properties = transformer.supported_properties()
transformers_properties.append((transformer, supported_properties))
if isinstance(supported_properties, dict):
for property_name, possible_values in supported_properties.items():
all_properties[property_name].extend(possible_values)
else:
for property_name in supported_properties:
all_properties[property_name] # just make sure it exists
return all_properties, transformers_properties
def display_transformers():
print('GuessIt transformers:')
_, transformers_properties = _supported_properties()
for transformer, _ in transformers_properties:
print('[@] %s (%s)' % (transformer.name, transformer.priority))
def display_properties(options):
values = options.values
transformers = options.transformers
name_only = options.name_only
print('GuessIt properties:')
all_properties, transformers_properties = _supported_properties()
if name_only:
# the 'container' property does not apply when using the --name-only
# option
del all_properties['container']
if transformers:
for transformer, properties_list in transformers_properties:
print('[@] %s (%s)' % (transformer.name, transformer.priority))
for property_name in properties_list:
property_values = all_properties.get(property_name)
print(' [+] %s' % (property_name,))
if property_values and values:
_display_property_values(property_name, indent=4)
else:
properties_list = sorted(all_properties.keys())
for property_name in properties_list:
property_values = all_properties.get(property_name)
print(' [+] %s' % (property_name,))
if property_values and values:
_display_property_values(property_name, indent=4)
def _display_property_values(property_name, indent=2):
all_properties, _ = _supported_properties()
property_values = all_properties.get(property_name)
for property_value in property_values:
print(indent * ' ' + '[!] %s' % (property_value,))
def run_demo(episodes=True, movies=True, options=None):
# NOTE: tests should not be added here but rather in the tests/ folder
# this is just intended as a quick example
if episodes:
testeps = [ 'Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi',
testeps = ['Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi',
'Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi',
'Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.[tvu.org.ru].avi',
'Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi',
@ -48,22 +140,20 @@ def run_demo(episodes=True, movies=True, advanced=False):
'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg',
'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi',
'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi',
'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi'
]
'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi']
for f in testeps:
print('-'*80)
detect_filename(f, filetype='episode', advanced=advanced)
print('-' * 80)
guess_file(f, options=options, type='episode')
if movies:
testmovies = [ 'Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv',
testmovies = ['Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv',
'Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi',
'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi',
'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv',
'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv',
'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi', # FIXME: PROPER and R5 get overwritten
'[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv', # FIXME: title gets overwritten
'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi',
'[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv',
'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi',
'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt',
'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv',
@ -79,48 +169,115 @@ def run_demo(episodes=True, movies=True, advanced=False):
]
for f in testmovies:
print('-'*80)
detect_filename(f, filetype = 'movie', advanced = advanced)
print('-' * 80)
guess_file(f, options=options, type='movie')
def main():
slogging.setupLogging()
def submit_bug(filename, options):
import requests # only import when needed
from requests.exceptions import RequestException
try:
opts = dict((k, v) for k, v in options.__dict__.items()
if v and k != 'submit_bug')
r = requests.post('http://localhost:5000/bugs', {'filename': filename,
'version': __version__,
'options': str(opts)})
if r.status_code == 200:
print('Successfully submitted file: %s' % r.text)
else:
print('Could not submit bug at the moment, please try again later.')
except RequestException as e:
print('Could not submit bug at the moment, please try again later.')
def main(args=None, setup_logging=True):
if setup_logging:
from guessit import slogging
slogging.setup_logging()
if PY2: # pragma: no cover
import codecs
import locale
import sys
# see http://bugs.python.org/issue2128
if sys.version_info.major < 3 and os.name == 'nt':
if os.name == 'nt':
for i, a in enumerate(sys.argv):
sys.argv[i] = a.decode(locale.getpreferredencoding())
parser = OptionParser(usage = 'usage: %prog [options] file1 [file2...]')
parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
help = 'display debug output')
parser.add_option('-i', '--info', dest = 'info', default = 'filename',
help = 'the desired information type: filename, hash_mpc or a hash from python\'s '
'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
'them, comma-separated')
parser.add_option('-t', '--type', dest = 'filetype', default = 'autodetect',
help = 'the suggested file type: movie, episode or autodetect')
parser.add_option('-a', '--advanced', dest = 'advanced', action='store_true', default = False,
help = 'display advanced information for filename guesses, as json output')
parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False,
help = 'run a few builtin tests instead of analyzing a file')
# see https://github.com/wackou/guessit/issues/43
# and http://stackoverflow.com/questions/4545661/unicodedecodeerror-when-redirecting-to-file
# Wrap sys.stdout into a StreamWriter to allow writing unicode.
sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
options, args = parser.parse_args()
from guessit.plugins import transformers
if args:
options = get_opts().parse_args(args)
else: # pragma: no cover
options = get_opts().parse_args()
if options.verbose:
logging.getLogger('guessit').setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
help_required = True
if options.properties or options.values:
display_properties(options)
help_required = False
elif options.transformers:
display_transformers()
help_required = False
if options.demo:
run_demo(episodes=True, movies=True, advanced=options.advanced)
else:
if args:
for filename in args:
detect_filename(filename,
filetype = options.filetype,
info = options.info.split(','),
advanced = options.advanced)
run_demo(episodes=True, movies=True, options=vars(options))
help_required = False
if options.version:
print('+-------------------------------------------------------+')
print('+ GuessIt ' + __version__ + (28-len(__version__)) * ' ' + '+')
print('+-------------------------------------------------------+')
print('| Please report any bug or feature request at |')
print('| https://github.com/wackou/guessit/issues. |')
print('+-------------------------------------------------------+')
help_required = False
if options.yaml:
try:
import yaml, babelfish
def default_representer(dumper, data):
return dumper.represent_str(str(data))
yaml.SafeDumper.add_representer(babelfish.Language, default_representer)
yaml.SafeDumper.add_representer(babelfish.Country, default_representer)
except ImportError: # pragma: no cover
print('PyYAML not found. Using default output.')
filenames = []
if options.filename:
filenames.extend(options.filename)
if options.input_file:
input_file = open(options.input_file, 'r')
try:
filenames.extend([line.strip() for line in input_file.readlines()])
finally:
input_file.close()
filenames = filter(lambda f: f, filenames)
if filenames:
help_required = False
if options.submit_bug:
for filename in filenames:
submit_bug(filename, options)
else:
parser.print_help()
for filename in filenames:
guess_file(filename,
info=options.info.split(','),
options=vars(options))
if help_required: # pragma: no cover
get_opts().print_help()
if __name__ == '__main__':
main()

View file

@ -0,0 +1,20 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
__version__ = '0.10.2.dev0'

771
libs/guessit/containers.py Normal file
View file

@ -0,0 +1,771 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from .patterns import compile_pattern, sep
from . import base_text_type
from .guess import Guess
import types
def _get_span(prop, match):
"""Retrieves span for a match"""
if not prop.global_span and match.re.groups:
start = None
end = None
for i in range(1, match.re.groups + 1):
span = match.span(i)
if start is None or span[0] < start:
start = span[0]
if end is None or span[1] > end:
end = span[1]
return start, end
else:
return match.span()
start = span[0]
end = span[1]
def _trim_span(span, value, blanks = sep):
start, end = span
for i in range(0, len(value)):
if value[i] in blanks:
start += 1
else:
break
for i in reversed(range(0, len(value))):
if value[i] in blanks:
end -= 1
else:
break
if end <= start:
return -1, -1
return start, end
def _get_groups(compiled_re):
"""
Retrieves groups from re
:return: list of group names
"""
if compiled_re.groups:
indexgroup = {}
for k, i in compiled_re.groupindex.items():
indexgroup[i] = k
ret = []
for i in range(1, compiled_re.groups + 1):
ret.append(indexgroup.get(i, i))
return ret
else:
return [None]
class NoValidator(object):
def validate(self, prop, string, node, match, entry_start, entry_end):
return True
class LeftValidator(object):
"""Make sure our match is starting by separator, or by another entry"""
def validate(self, prop, string, node, match, entry_start, entry_end):
span = _get_span(prop, match)
span = _trim_span(span, string[span[0]:span[1]])
start, end = span
sep_start = start <= 0 or string[start - 1] in sep
start_by_other = start in entry_end
if not sep_start and not start_by_other:
return False
return True
class RightValidator(object):
"""Make sure our match is ended by separator, or by another entry"""
def validate(self, prop, string, node, match, entry_start, entry_end):
span = _get_span(prop, match)
span = _trim_span(span, string[span[0]:span[1]])
start, end = span
sep_end = end >= len(string) or string[end] in sep
end_by_other = end in entry_start
if not sep_end and not end_by_other:
return False
return True
class ChainedValidator(object):
def __init__(self, *validators):
self._validators = validators
def validate(self, prop, string, node, match, entry_start, entry_end):
for validator in self._validators:
if not validator.validate(prop, string, node, match, entry_start, entry_end):
return False
return True
class SameKeyValidator(object):
def __init__(self, validator_function):
self.validator_function = validator_function
def validate(self, prop, string, node, match, entry_start, entry_end):
for key in prop.keys:
for same_value_leaf in node.root.leaves_containing(key):
ret = self.validator_function(same_value_leaf, key, prop, string, node, match, entry_start, entry_end)
if ret is not None:
return ret
return True
class OnlyOneValidator(SameKeyValidator):
def __init__(self):
super(OnlyOneValidator, self).__init__(lambda same_value_leaf, key, prop, string, node, match, entry_start, entry_end: False)
class DefaultValidator(object):
"""Make sure our match is surrounded by separators, or by another entry"""
def validate(self, prop, string, node, match, entry_start, entry_end):
span = _get_span(prop, match)
span = _trim_span(span, string[span[0]:span[1]])
start, end = span
sep_start = start <= 0 or string[start - 1] in sep
sep_end = end >= len(string) or string[end] in sep
start_by_other = start in entry_end
end_by_other = end in entry_start
if (sep_start or start_by_other) and (sep_end or end_by_other):
return True
return False
class FunctionValidator(object):
def __init__(self, function):
self.function = function
def validate(self, prop, string, node, match, entry_start, entry_end):
return self.function(prop, string, node, match, entry_start, entry_end)
class FormatterValidator(object):
def __init__(self, group_name=None, formatted_validator=None):
self.group_name = group_name
self.formatted_validator = formatted_validator
def validate(self, prop, string, node, match, entry_start, entry_end):
if self.group_name:
formatted = prop.format(match.group(self.group_name), self.group_name)
else:
formatted = prop.format(match.group())
if self.formatted_validator:
return self.formatted_validator(formatted)
else:
return formatted
def _get_positions(prop, string, node, match, entry_start, entry_end):
span = match.span()
start = span[0]
end = span[1]
at_start = True
at_end = True
while start > 0:
start -= 1
if string[start] not in sep:
at_start = False
break
while end < len(string) - 1:
end += 1
if string[end] not in sep:
at_end = False
break
return at_start, at_end
class WeakValidator(DefaultValidator):
"""Make sure our match is surrounded by separators and is the first or last element in the string"""
def validate(self, prop, string, node, match, entry_start, entry_end):
if super(WeakValidator, self).validate(prop, string, node, match, entry_start, entry_end):
at_start, at_end = _get_positions(prop, string, node, match, entry_start, entry_end)
return at_start or at_end
return False
class NeighborValidator(DefaultValidator):
"""Make sure the node is next another one"""
def validate(self, prop, string, node, match, entry_start, entry_end):
at_start, at_end = _get_positions(prop, string, node, match, entry_start, entry_end)
if at_start:
previous_leaf = node.root.previous_leaf(node)
if previous_leaf is not None:
return True
if at_end:
next_leaf = node.root.next_leaf(node)
if next_leaf is not None:
return True
return False
class LeavesValidator(DefaultValidator):
def __init__(self, lambdas=None, previous_lambdas=None, next_lambdas=None, both_side=False, default_=True):
self.previous_lambdas = previous_lambdas if previous_lambdas is not None else []
self.next_lambdas = next_lambdas if next_lambdas is not None else []
if lambdas:
self.previous_lambdas.extend(lambdas)
self.next_lambdas.extend(lambdas)
self.both_side = both_side
self.default_ = default_
"""Make sure our match is surrounded by separators and validates defined lambdas"""
def validate(self, prop, string, node, match, entry_start, entry_end):
if self.default_:
super_ret = super(LeavesValidator, self).validate(prop, string, node, match, entry_start, entry_end)
else:
super_ret = True
if not super_ret:
return False
previous_ = self._validate_previous(prop, string, node, match, entry_start, entry_end)
next_ = self._validate_next(prop, string, node, match, entry_start, entry_end)
if previous_ is None and next_ is None:
return super_ret
if self.both_side:
return previous_ and next_
else:
return previous_ or next_
def _validate_previous(self, prop, string, node, match, entry_start, entry_end):
if self.previous_lambdas:
for leaf in node.root.previous_leaves(node):
for lambda_ in self.previous_lambdas:
ret = self._check_rule(lambda_, leaf)
if ret is not None:
return ret
return False
def _validate_next(self, prop, string, node, match, entry_start, entry_end):
if self.next_lambdas:
for leaf in node.root.next_leaves(node):
for lambda_ in self.next_lambdas:
ret = self._check_rule(lambda_, leaf)
if ret is not None:
return ret
return False
def _check_rule(self, lambda_, previous_leaf):
return lambda_(previous_leaf)
class _Property:
"""Represents a property configuration."""
def __init__(self, keys=None, pattern=None, canonical_form=None, canonical_from_pattern=True, confidence=1.0, enhance=True, global_span=False, validator=DefaultValidator(), formatter=None, disabler=None, confidence_lambda=None):
"""
:param keys: Keys of the property (format, screenSize, ...)
:type keys: string
:param canonical_form: Unique value of the property (DVD, 720p, ...)
:type canonical_form: string
:param pattern: Regexp pattern
:type pattern: string
:param confidence: confidence
:type confidence: float
:param enhance: enhance the pattern
:type enhance: boolean
:param global_span: if True, the whole match span will used to create the Guess.
Else, the span from the capturing groups will be used.
:type global_span: boolean
:param validator: Validator to use
:type validator: :class:`DefaultValidator`
:param formatter: Formater to use
:type formatter: function
"""
if isinstance(keys, list):
self.keys = keys
elif isinstance(keys, base_text_type):
self.keys = [keys]
else:
self.keys = []
self.canonical_form = canonical_form
if pattern is not None:
self.pattern = pattern
else:
self.pattern = canonical_form
if self.canonical_form is None and canonical_from_pattern:
self.canonical_form = self.pattern
self.compiled = compile_pattern(self.pattern, enhance=enhance)
for group_name in _get_groups(self.compiled):
if isinstance(group_name, base_text_type) and not group_name in self.keys:
self.keys.append(group_name)
if not self.keys:
raise ValueError("No property key is defined")
self.confidence = confidence
self.confidence_lambda = confidence_lambda
self.global_span = global_span
self.validator = validator
self.formatter = formatter
self.disabler = disabler
def disabled(self, options):
if self.disabler:
return self.disabler(options)
return False
def format(self, value, group_name=None):
"""Retrieves the final value from re group match value"""
formatter = None
if isinstance(self.formatter, dict):
formatter = self.formatter.get(group_name)
if formatter is None and group_name is not None:
formatter = self.formatter.get(None)
else:
formatter = self.formatter
if isinstance(formatter, types.FunctionType):
return formatter(value)
elif formatter is not None:
return formatter.format(value)
return value
def __repr__(self):
return "%s: %s" % (self.keys, self.canonical_form if self.canonical_form else self.pattern)
class PropertiesContainer(object):
def __init__(self, **kwargs):
self._properties = []
self.default_property_kwargs = kwargs
def unregister_property(self, name, *canonical_forms):
"""Unregister a property canonical forms
If canonical_forms are specified, only those values will be unregistered
:param name: Property name to unregister
:type name: string
:param canonical_forms: Values to unregister
:type canonical_forms: varargs of string
"""
_properties = [prop for prop in self._properties if prop.name == name and (not canonical_forms or prop.canonical_form in canonical_forms)]
def register_property(self, name, *patterns, **property_params):
"""Register property with defined canonical form and patterns.
:param name: name of the property (format, screenSize, ...)
:type name: string
:param patterns: regular expression patterns to register for the property canonical_form
:type patterns: varargs of string
"""
properties = []
for pattern in patterns:
params = dict(self.default_property_kwargs)
params.update(property_params)
if isinstance(pattern, dict):
params.update(pattern)
prop = _Property(name, **params)
else:
prop = _Property(name, pattern, **params)
self._properties.append(prop)
properties.append(prop)
return properties
def register_canonical_properties(self, name, *canonical_forms, **property_params):
"""Register properties from their canonical forms.
:param name: name of the property (releaseGroup, ...)
:type name: string
:param canonical_forms: values of the property ('ESiR', 'WAF', 'SEPTiC', ...)
:type canonical_forms: varargs of strings
"""
properties = []
for canonical_form in canonical_forms:
params = dict(property_params)
params['canonical_form'] = canonical_form
properties.extend(self.register_property(name, canonical_form, **property_params))
return properties
def unregister_all_properties(self):
"""Unregister all defined properties"""
self._properties.clear()
def find_properties(self, string, node, options, name=None, validate=True, re_match=False, sort=True, multiple=False):
"""Find all distinct properties for given string
If no capturing group is defined in the property, value will be grabbed from the entire match.
If one ore more unnamed capturing group is defined in the property, first capturing group will be used.
If named capturing group are defined in the property, they will be returned as property key.
If validate, found properties will be validated by their defined validator
If re_match, re.match will be used instead of re.search.
if sort, found properties will be sorted from longer match to shorter match.
If multiple is False and multiple values are found for the same property, the more confident one will be returned.
If multiple is False and multiple values are found for the same property and the same confidence, the longer will be returned.
:param string: input string
:type string: string
:param node: current node of the matching tree
:type node: :class:`guessit.matchtree.MatchTree`
:param name: name of property to find
:type name: string
:param re_match: use re.match instead of re.search
:type re_match: bool
:param multiple: Allows multiple property values to be returned
:type multiple: bool
:return: found properties
:rtype: list of tuples (:class:`_Property`, match, list of tuples (property_name, tuple(value_start, value_end)))
:see: `_Property`
:see: `register_property`
:see: `register_canonical_properties`
"""
entry_start = {}
entry_end = {}
entries = []
duplicate_matches = {}
ret = []
if not string.strip():
return ret
# search all properties
for prop in self.get_properties(name):
if not prop.disabled(options):
valid_match = None
if re_match:
match = prop.compiled.match(string)
if match:
entries.append((prop, match))
else:
matches = list(prop.compiled.finditer(string))
duplicate_matches[prop] = matches
for match in matches:
entries.append((prop, match))
for prop, match in entries:
# compute confidence
if prop.confidence_lambda:
computed_confidence = prop.confidence_lambda(match)
if computed_confidence is not None:
prop.confidence = computed_confidence
if validate:
# compute entries start and ends
for prop, match in entries:
start, end = _get_span(prop, match)
if start not in entry_start:
entry_start[start] = [prop]
else:
entry_start[start].append(prop)
if end not in entry_end:
entry_end[end] = [prop]
else:
entry_end[end].append(prop)
# remove invalid values
while True:
invalid_entries = []
for entry in entries:
prop, match = entry
if not prop.validator.validate(prop, string, node, match, entry_start, entry_end):
invalid_entries.append(entry)
if not invalid_entries:
break
for entry in invalid_entries:
prop, match = entry
entries.remove(entry)
prop_duplicate_matches = duplicate_matches.get(prop)
if prop_duplicate_matches:
prop_duplicate_matches.remove(match)
invalid_span = _get_span(prop, match)
start = invalid_span[0]
end = invalid_span[1]
entry_start[start].remove(prop)
if not entry_start.get(start):
del entry_start[start]
entry_end[end].remove(prop)
if not entry_end.get(end):
del entry_end[end]
for prop, prop_duplicate_matches in duplicate_matches.items():
# Keeping the last valid match.
# Needed for the.100.109.hdtv-lol.mp4
for duplicate_match in prop_duplicate_matches[:-1]:
entries.remove((prop, duplicate_match))
if multiple:
ret = entries
else:
# keep only best match if multiple values where found
entries_dict = {}
for entry in entries:
for key in prop.keys:
if key not in entries_dict:
entries_dict[key] = []
entries_dict[key].append(entry)
for key_entries in entries_dict.values():
if multiple:
for entry in key_entries:
ret.append(entry)
else:
best_ret = {}
best_prop, best_match = None, None
if len(key_entries) == 1:
best_prop, best_match = key_entries[0]
else:
for prop, match in key_entries:
start, end = _get_span(prop, match)
if not best_prop or \
best_prop.confidence < best_prop.confidence or \
best_prop.confidence == best_prop.confidence and \
best_match.span()[1] - best_match.span()[0] < match.span()[1] - match.span()[0]:
best_prop, best_match = prop, match
best_ret[best_prop] = best_match
for prop, match in best_ret.items():
ret.append((prop, match))
if sort:
def _sorting(x):
_, x_match = x
x_start, x_end = x_match.span()
return x_start - x_end
ret.sort(key=_sorting)
return ret
def as_guess(self, found_properties, input=None, filter_=None, sep_replacement=None, multiple=False, *args, **kwargs):
if filter_ is None:
filter_ = lambda property, *args, **kwargs: True
guesses = [] if multiple else None
for prop, match in found_properties:
first_key = None
for key in prop.keys:
# First property key will be used as base for effective name
if isinstance(key, base_text_type):
if first_key is None:
first_key = key
break
property_name = first_key if first_key else None
span = _get_span(prop, match)
guess = Guess(confidence=prop.confidence, input=input, span=span, prop=property_name)
groups = _get_groups(match.re)
for group_name in groups:
name = group_name if isinstance(group_name, base_text_type) else property_name if property_name not in groups else None
if name:
value = self._effective_prop_value(prop, group_name, input, match.span(group_name) if group_name else match.span(), sep_replacement)
if not value is None:
is_string = isinstance(value, base_text_type)
if not is_string or is_string and value: # Keep non empty strings and other defined objects
if isinstance(value, dict):
for k, v in value.items():
if k is None:
k = name
guess[k] = v
else:
if name in guess:
if not isinstance(guess[name], list):
guess[name] = [guess[name]]
guess[name].append(value)
else:
guess[name] = value
if group_name:
guess.metadata(prop).span = match.span(group_name)
if filter_(guess):
if multiple:
guesses.append(guess)
else:
return guess
return guesses
def _effective_prop_value(self, prop, group_name, input=None, span=None, sep_replacement=None):
if prop.canonical_form:
return prop.canonical_form
if input is None:
return None
value = input
if span is not None:
value = value[span[0]:span[1]]
value = input[span[0]:span[1]] if input else None
if sep_replacement:
for sep_char in sep:
value = value.replace(sep_char, sep_replacement)
if value:
value = prop.format(value, group_name)
return value
def get_properties(self, name=None, canonical_form=None):
"""Retrieve properties
:return: Properties
:rtype: generator
"""
for prop in self._properties:
if (name is None or name in prop.keys) and (canonical_form is None or prop.canonical_form == canonical_form):
yield prop
def get_supported_properties(self):
supported_properties = {}
for prop in self.get_properties():
for k in prop.keys:
values = supported_properties.get(k)
if not values:
values = set()
supported_properties[k] = values
if prop.canonical_form:
values.add(prop.canonical_form)
return supported_properties
class QualitiesContainer():
def __init__(self):
self._qualities = {}
def register_quality(self, name, canonical_form, rating):
"""Register a quality rating.
:param name: Name of the property
:type name: string
:param canonical_form: Value of the property
:type canonical_form: string
:param rating: Estimated quality rating for the property
:type rating: int
"""
property_qualities = self._qualities.get(name)
if property_qualities is None:
property_qualities = {}
self._qualities[name] = property_qualities
property_qualities[canonical_form] = rating
def unregister_quality(self, name, *canonical_forms):
"""Unregister quality ratings for given property name.
If canonical_forms are specified, only those values will be unregistered
:param name: Name of the property
:type name: string
:param canonical_forms: Value of the property
:type canonical_forms: string
"""
if not canonical_forms:
if name in self._qualities:
del self._qualities[name]
else:
property_qualities = self._qualities.get(name)
if property_qualities is not None:
for property_canonical_form in canonical_forms:
if property_canonical_form in property_qualities:
del property_qualities[property_canonical_form]
if not property_qualities:
del self._qualities[name]
def clear_qualities(self,):
"""Unregister all defined quality ratings.
"""
self._qualities.clear()
def rate_quality(self, guess, *props):
"""Rate the quality of guess.
:param guess: Guess to rate
:type guess: :class:`guessit.guess.Guess`
:param props: Properties to include in the rating. if empty, rating will be performed for all guess properties.
:type props: varargs of string
:return: Quality of the guess. The higher, the better.
:rtype: int
"""
rate = 0
if not props:
props = guess.keys()
for prop in props:
prop_value = guess.get(prop)
prop_qualities = self._qualities.get(prop)
if prop_value is not None and prop_qualities is not None:
rate += prop_qualities.get(prop_value, 0)
return rate
def best_quality_properties(self, props, *guesses):
"""Retrieve the best quality guess, based on given properties
:param props: Properties to include in the rating
:type props: list of strings
:param guesses: Guesses to rate
:type guesses: :class:`guessit.guess.Guess`
:return: Best quality guess from all passed guesses
:rtype: :class:`guessit.guess.Guess`
"""
best_guess = None
best_rate = None
for guess in guesses:
rate = self.rate_quality(guess, *props)
if best_rate is None or best_rate < rate:
best_rate = rate
best_guess = guess
return best_guess
def best_quality(self, *guesses):
"""Retrieve the best quality guess.
:param guesses: Guesses to rate
:type guesses: :class:`guessit.guess.Guess`
:return: Best quality guess from all passed guesses
:rtype: :class:`guessit.guess.Guess`
"""
best_guess = None
best_rate = None
for guess in guesses:
rate = self.rate_quality(guess)
if best_rate is None or best_rate < rate:
best_rate = rate
best_guess = guess
return best_guess

View file

@ -1,112 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import UnicodeMixin, base_text_type, u
from guessit.fileutils import load_file_in_same_dir
import logging
__all__ = [ 'Country' ]
log = logging.getLogger(__name__)
# parsed from http://en.wikipedia.org/wiki/ISO_3166-1
#
# Description of the fields:
# "An English name, an alpha-2 code (when given),
# an alpha-3 code (when given), a numeric code, and an ISO 31666-2 code
# are all separated by pipe (|) characters."
_iso3166_contents = load_file_in_same_dir(__file__, 'ISO-3166-1_utf8.txt')
country_matrix = [ l.strip().split('|')
for l in _iso3166_contents.strip().split('\n') ]
country_matrix += [ [ 'Unknown', 'un', 'unk', '', '' ],
[ 'Latin America', '', 'lat', '', '' ]
]
country_to_alpha3 = dict((c[0].lower(), c[2].lower()) for c in country_matrix)
country_to_alpha3.update(dict((c[1].lower(), c[2].lower()) for c in country_matrix))
country_to_alpha3.update(dict((c[2].lower(), c[2].lower()) for c in country_matrix))
# add here exceptions / non ISO representations
# Note: remember to put those exceptions in lower-case, they won't work otherwise
country_to_alpha3.update({ 'latinoamérica': 'lat',
'brazilian': 'bra',
'españa': 'esp',
'uk': 'gbr'
})
country_alpha3_to_en_name = dict((c[2].lower(), c[0]) for c in country_matrix)
country_alpha3_to_alpha2 = dict((c[2].lower(), c[1].lower()) for c in country_matrix)
class Country(UnicodeMixin):
"""This class represents a country.
You can initialize it with pretty much anything, as it knows conversion
from ISO-3166 2-letter and 3-letter codes, and an English name.
"""
def __init__(self, country, strict=False):
country = u(country.strip().lower())
self.alpha3 = country_to_alpha3.get(country)
if self.alpha3 is None and strict:
msg = 'The given string "%s" could not be identified as a country'
raise ValueError(msg % country)
if self.alpha3 is None:
self.alpha3 = 'unk'
@property
def alpha2(self):
return country_alpha3_to_alpha2[self.alpha3]
@property
def english_name(self):
return country_alpha3_to_en_name[self.alpha3]
def __hash__(self):
return hash(self.alpha3)
def __eq__(self, other):
if isinstance(other, Country):
return self.alpha3 == other.alpha3
if isinstance(other, base_text_type):
try:
return self == Country(other)
except ValueError:
return False
return False
def __ne__(self, other):
return not self == other
def __unicode__(self):
return self.english_name
def __repr__(self):
return 'Country(%s)' % self.english_name

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,15 +18,38 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
import datetime
import re
def valid_year(year):
return 1920 < year < datetime.date.today().year + 5
from dateutil import parser
_dsep = r'[-/ \.]'
_dsep_bis = r'[-/ \.x]'
date_regexps = [
re.compile('[^\d](\d{8})[^\d]', re.IGNORECASE),
re.compile('[^\d](\d{6})[^\d]', re.IGNORECASE),
re.compile('[^\d](\d{2})%s(\d{1,2})%s(\d{1,2})[^\d]' % (_dsep, _dsep), re.IGNORECASE),
re.compile('[^\d](\d{1,2})%s(\d{1,2})%s(\d{2})[^\d]' % (_dsep, _dsep), re.IGNORECASE),
re.compile('[^\d](\d{4})%s(\d{1,2})%s(\d{1,2})[^\d]' % (_dsep_bis, _dsep), re.IGNORECASE),
re.compile('[^\d](\d{1,2})%s(\d{1,2})%s(\d{4})[^\d]' % (_dsep, _dsep_bis), re.IGNORECASE),
re.compile('[^\d](\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4})[^\d]' % (_dsep, _dsep), re.IGNORECASE)]
def valid_year(year, today=None):
"""Check if number is a valid year"""
if not today:
today = datetime.date.today()
return 1920 < year < today.year + 5
def search_year(string):
"""Looks for year patterns, and if found return the year and group span.
Assumes there are sentinels at the beginning and end of the string that
always allow matching a non-digit delimiting the date.
@ -34,10 +57,10 @@ def search_year(string):
and now + 5 years, so for instance 2000 would be returned as a valid
year but 1492 would not.
>>> search_year('in the year 2000...')
(2000, (12, 16))
>>> search_year(' in the year 2000... ')
(2000, (13, 17))
>>> search_year('they arrived in 1492.')
>>> search_year(' they arrived in 1492. ')
(None, None)
"""
match = re.search(r'[^0-9]([0-9]{4})[^0-9]', string)
@ -49,85 +72,58 @@ def search_year(string):
return (None, None)
def search_date(string):
def search_date(string, year_first=None, day_first=True):
"""Looks for date patterns, and if found return the date and group span.
Assumes there are sentinels at the beginning and end of the string that
always allow matching a non-digit delimiting the date.
>>> search_date('This happened on 2002-04-22.')
(datetime.date(2002, 4, 22), (17, 27))
Year can be defined on two digit only. It will return the nearest possible
date from today.
>>> search_date('And this on 17-06-1998.')
(datetime.date(1998, 6, 17), (12, 22))
>>> search_date(' This happened on 2002-04-22. ')
(datetime.date(2002, 4, 22), (18, 28))
>>> search_date('no date in here')
>>> search_date(' And this on 17-06-1998. ')
(datetime.date(1998, 6, 17), (13, 23))
>>> search_date(' no date in here ')
(None, None)
"""
dsep = r'[-/ \.]'
date_rexps = [
# 20010823
r'[^0-9]' +
r'(?P<year>[0-9]{4})' +
r'(?P<month>[0-9]{2})' +
r'(?P<day>[0-9]{2})' +
r'[^0-9]',
# 2001-08-23
r'[^0-9]' +
r'(?P<year>[0-9]{4})' + dsep +
r'(?P<month>[0-9]{2})' + dsep +
r'(?P<day>[0-9]{2})' +
r'[^0-9]',
# 23-08-2001
r'[^0-9]' +
r'(?P<day>[0-9]{2})' + dsep +
r'(?P<month>[0-9]{2})' + dsep +
r'(?P<year>[0-9]{4})' +
r'[^0-9]',
# 23-08-01
r'[^0-9]' +
r'(?P<day>[0-9]{2})' + dsep +
r'(?P<month>[0-9]{2})' + dsep +
r'(?P<year>[0-9]{2})' +
r'[^0-9]',
]
for drexp in date_rexps:
match = re.search(drexp, string)
if match:
d = match.groupdict()
year, month, day = int(d['year']), int(d['month']), int(d['day'])
# years specified as 2 digits should be adjusted here
if year < 100:
if year > (datetime.date.today().year % 100) + 5:
year = 1900 + year
start, end = None, None
match = None
for date_re in date_regexps:
s = date_re.search(string)
if s and (match is None or s.end() - s.start() > len(match)):
start, end = s.start(), s.end()
if date_re.groups:
match = '-'.join(s.groups())
else:
year = 2000 + year
match = s.group()
if match is None:
return None, None
today = datetime.date.today()
# If day_first/year_first is undefined, parse is made using both possible values.
yearfirst_opts = [False, True]
if year_first is not None:
yearfirst_opts = [year_first]
dayfirst_opts = [True, False]
if day_first is not None:
dayfirst_opts = [day_first]
kwargs_list = ({'dayfirst': d, 'yearfirst': y} for d in dayfirst_opts for y in yearfirst_opts)
for kwargs in kwargs_list:
try:
date = parser.parse(match, **kwargs)
except (ValueError, TypeError) as e: #see https://bugs.launchpad.net/dateutil/+bug/1247643
date = None
try:
date = datetime.date(year, month, day)
except ValueError:
try:
date = datetime.date(year, day, month)
except ValueError:
pass
if date is None:
continue
# check date plausibility
if not 1900 < date.year < datetime.date.today().year + 5:
continue
# looks like we have a valid date
# note: span is [+1,-1] because we don't want to include the
# non-digit char
start, end = match.span()
return (date, (start + 1, end - 1))
if date and valid_year(date.year, today=today):
return date.date(), (start+1, end-1) #compensate for sentinels
return None, None

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,7 +18,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit import s, u
import os.path
import zipfile
@ -44,17 +45,13 @@ def split_path(path):
result = []
while True:
head, tail = os.path.split(path)
headlen = len(head)
# on Unix systems, the root folder is '/'
if head and head == '/'*headlen and tail == '':
return ['/'] + result
if not head and not tail:
return result
# on Windows, the root folder is a drive letter (eg: 'C:\') or for shares \\
if ((headlen == 3 and head[1:] == ':\\') or (headlen == 2 and head == '\\\\')) and tail == '':
return [head] + result
if head == '' and tail == '':
if not tail and head == path:
# Make sure we won't have an infinite loop.
result = [head] + result
return result
# we just split a directory ending with '/', so tail is empty
@ -70,8 +67,8 @@ def split_path(path):
def file_in_same_dir(ref_file, desired_file):
"""Return the path for a file in the same dir as a given reference file.
>>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings'))
'~/smewt/smewt.settings'
>>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings')) == os.path.normpath('~/smewt/smewt.settings')
True
"""
return os.path.join(*(split_path(ref_file)[:-1] + [desired_file]))
@ -85,6 +82,6 @@ def load_file_in_same_dir(ref_file, filename):
if p.endswith('.zip'):
zfilename = os.path.join(*path[:i + 1])
zfile = zipfile.ZipFile(zfilename)
return zfile.read('/'.join(path[i + 1:]))
return u(zfile.read('/'.join(path[i + 1:])))
return u(io.open(os.path.join(*path), encoding='utf-8').read())

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,10 +18,10 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit import UnicodeMixin, s, u, base_text_type
from guessit.language import Language
from guessit.country import Country
from babelfish import Language, Country
import json
import datetime
import logging
@ -29,6 +29,111 @@ import logging
log = logging.getLogger(__name__)
class GuessMetadata(object):
"""GuessMetadata contains confidence, an input string, span and related property.
If defined on a property of Guess object, it overrides the object defined as global.
:param parent: The parent metadata, used for undefined properties in self object
:type parent: :class: `GuessMedata`
:param confidence: The confidence (from 0.0 to 1.0)
:type confidence: number
:param input: The input string
:type input: string
:param span: The input string
:type span: tuple (int, int)
:param prop: The found property definition
:type prop: :class `guessit.containers._Property`
"""
def __init__(self, parent=None, confidence=None, input=None, span=None, prop=None, *args, **kwargs):
self.parent = parent
if confidence is None and self.parent is None:
self._confidence = 1.0
else:
self._confidence = confidence
self._input = input
self._span = span
self._prop = prop
@property
def confidence(self):
"""The confidence
:rtype: int
:return: confidence value
"""
return self._confidence if self._confidence is not None else self.parent.confidence if self.parent else None
@confidence.setter
def confidence(self, confidence):
self._confidence = confidence
@property
def input(self):
"""The input
:rtype: string
:return: String used to find this guess value
"""
return self._input if self._input is not None else self.parent.input if self.parent else None
@input.setter
def input(self, input):
"""The input
:rtype: string
"""
self._input = input
@property
def span(self):
"""The span
:rtype: tuple (int, int)
:return: span of input string used to find this guess value
"""
return self._span if self._span is not None else self.parent.span if self.parent else None
@span.setter
def span(self, span):
"""The span
:rtype: tuple (int, int)
:return: span of input string used to find this guess value
"""
self._span = span
@property
def prop(self):
"""The property
:rtype: :class:`_Property`
:return: The property
"""
return self._prop if self._prop is not None else self.parent.prop if self.parent else None
@property
def raw(self):
"""Return the raw information (original match from the string,
not the cleaned version) associated with the given property name."""
if self.input and self.span:
return self.input[self.span[0]:self.span[1]]
return None
def __repr__(self, *args, **kwargs):
return object.__repr__(self, *args, **kwargs)
def _split_kwargs(**kwargs):
metadata_args = {}
for prop in dir(GuessMetadata):
try:
metadata_args[prop] = kwargs.pop(prop)
except KeyError:
pass
return metadata_args, kwargs
class Guess(UnicodeMixin, dict):
"""A Guess is a dictionary which has an associated confidence for each of
its values.
@ -37,39 +142,58 @@ class Guess(UnicodeMixin, dict):
simple dict."""
def __init__(self, *args, **kwargs):
try:
confidence = kwargs.pop('confidence')
except KeyError:
confidence = 0
try:
raw = kwargs.pop('raw')
except KeyError:
raw = None
metadata_kwargs, kwargs = _split_kwargs(**kwargs)
self._global_metadata = GuessMetadata(**metadata_kwargs)
dict.__init__(self, *args, **kwargs)
self._confidence = {}
self._raw = {}
self._metadata = {}
for prop in self:
self._confidence[prop] = confidence
self._raw[prop] = raw
self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
def rename(self, old_name, new_name):
if old_name in self._metadata:
metadata = self._metadata[old_name]
del self._metadata[old_name]
self._metadata[new_name] = metadata
if old_name in self:
value = self[old_name]
del self[old_name]
self[new_name] = value
return True
return False
def to_dict(self, advanced=False):
"""Return the guess as a dict containing only base types, ie:
where dates, languages, countries, etc. are converted to strings.
if advanced is True, return the data as a json string containing
also the raw information of the properties."""
data = dict(self)
for prop, value in data.items():
if isinstance(value, datetime.date):
data[prop] = value.isoformat()
elif isinstance(value, (Language, Country, base_text_type)):
elif isinstance(value, (UnicodeMixin, base_text_type)):
data[prop] = u(value)
elif isinstance(value, (Language, Country)):
data[prop] = value.guessit
elif isinstance(value, list):
data[prop] = [u(x) for x in value]
if advanced:
data[prop] = {"value": data[prop], "raw": self.raw(prop), "confidence": self.confidence(prop)}
metadata = self.metadata(prop)
prop_data = {'value': data[prop]}
if metadata.raw:
prop_data['raw'] = metadata.raw
if metadata.confidence:
prop_data['confidence'] = metadata.confidence
data[prop] = prop_data
return data
def nice_string(self, advanced=False):
"""Return a string with the property names and their values,
that also displays the associated confidence to each property.
FIXME: doc with param"""
if advanced:
data = self.to_dict(advanced)
return json.dumps(data, indent=4)
@ -89,39 +213,54 @@ class Guess(UnicodeMixin, dict):
def __unicode__(self):
return u(self.to_dict())
def confidence(self, prop):
return self._confidence.get(prop, -1)
def metadata(self, prop=None):
"""Return the metadata associated with the given property name
If no property name is given, get the global_metadata
"""
if prop is None:
return self._global_metadata
if prop not in self._metadata:
self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
return self._metadata[prop]
def confidence(self, prop=None):
return self.metadata(prop).confidence
def set_confidence(self, prop, confidence):
self.metadata(prop).confidence = confidence
def raw(self, prop):
return self._raw.get(prop, None)
return self.metadata(prop).raw
def set(self, prop, value, confidence=None, raw=None):
self[prop] = value
if confidence is not None:
self._confidence[prop] = confidence
if raw is not None:
self._raw[prop] = raw
def set(self, prop_name, value, *args, **kwargs):
if value is None:
try:
del self[prop_name]
except KeyError:
pass
try:
del self._metadata[prop_name]
except KeyError:
pass
else:
self[prop_name] = value
if 'metadata' in kwargs.keys():
self._metadata[prop_name] = kwargs['metadata']
else:
self._metadata[prop_name] = GuessMetadata(parent=self._global_metadata, *args, **kwargs)
def set_confidence(self, prop, value):
self._confidence[prop] = value
def set_raw(self, prop, value):
self._raw[prop] = value
def update(self, other, confidence=None, raw=None):
def update(self, other, confidence=None):
dict.update(self, other)
if isinstance(other, Guess):
for prop in other:
self._confidence[prop] = other.confidence(prop)
self._raw[prop] = other.raw(prop)
try:
self._metadata[prop] = other._metadata[prop]
except KeyError:
pass
if confidence is not None:
for prop in other:
self._confidence[prop] = confidence
if raw is not None:
for prop in other:
self._raw[prop] = raw
self.set_confidence(prop, confidence)
def update_highest_confidence(self, other):
"""Update this guess with the values from the given one. In case
@ -131,11 +270,10 @@ class Guess(UnicodeMixin, dict):
raise ValueError('Can only call this function on Guess instances')
for prop in other:
if prop in self and self.confidence(prop) >= other.confidence(prop):
if prop in self and self.metadata(prop).confidence >= other.metadata(prop).confidence:
continue
self[prop] = other[prop]
self._confidence[prop] = other.confidence(prop)
self._raw[prop] = other.raw(prop)
self._metadata[prop] = other.metadata(prop)
def choose_int(g1, g2):
@ -193,26 +331,26 @@ def choose_string(g1, g2):
combined_prob = 1 - (1 - c1) * (1 - c2)
if v1l == v2l:
return (v1, combined_prob)
return v1, combined_prob
# check for common patterns
elif v1l == 'the ' + v2l:
return (v1, combined_prob)
return v1, combined_prob
elif v2l == 'the ' + v1l:
return (v2, combined_prob)
return v2, combined_prob
# if one string is contained in the other, return the shortest one
elif v2l in v1l:
return (v2, combined_prob)
return v2, combined_prob
elif v1l in v2l:
return (v1, combined_prob)
return v1, combined_prob
# in case of conflict, return the one with highest confidence
else:
if c1 > c2:
return (v1, c1 - c2)
return v1, c1 - c2
else:
return (v2, c2 - c1)
return v2, c2 - c1
def _merge_similar_guesses_nocheck(guesses, prop, choose):
@ -226,17 +364,7 @@ def _merge_similar_guesses_nocheck(guesses, prop, choose):
g1, g2 = similar[0], similar[1]
other_props = set(g1) & set(g2) - set([prop])
if other_props:
log.debug('guess 1: %s' % g1)
log.debug('guess 2: %s' % g2)
for prop in other_props:
if g1[prop] != g2[prop]:
log.warning('both guesses to be merged have more than one '
'different property in common, bailing out...')
return
# merge all props of s2 into s1, updating the confidence for the
# merge only this prop of s2 into s1, updating the confidence for the
# considered property
v1, v2 = g1[prop], g2[prop]
c1, c2 = g1.confidence(prop), g2.confidence(prop)
@ -248,10 +376,11 @@ def _merge_similar_guesses_nocheck(guesses, prop, choose):
msg = "Updating non-matching property '%s' with confidence %.2f"
log.debug(msg % (prop, new_confidence))
g2[prop] = new_value
g2.set_confidence(prop, new_confidence)
g1.set(prop, new_value, confidence=new_confidence)
g2.pop(prop)
g1.update(g2)
# remove g2 if there are no properties left
if not g2.keys():
guesses.remove(g2)
@ -286,43 +415,53 @@ def merge_all(guesses, append=None):
instead of being merged.
>>> s(merge_all([ Guess({'season': 2}, confidence=0.6),
... Guess({'episodeNumber': 13}, confidence=0.8) ]))
{'season': 2, 'episodeNumber': 13}
... Guess({'episodeNumber': 13}, confidence=0.8) ])
... ) == {'season': 2, 'episodeNumber': 13}
True
>>> s(merge_all([ Guess({'episodeNumber': 27}, confidence=0.02),
... Guess({'season': 1}, confidence=0.2) ]))
{'season': 1}
... Guess({'season': 1}, confidence=0.2) ])
... ) == {'season': 1}
True
>>> s(merge_all([ Guess({'other': 'PROPER'}, confidence=0.8),
... Guess({'releaseGroup': '2HD'}, confidence=0.8) ],
... append=['other']))
{'releaseGroup': '2HD', 'other': ['PROPER']}
... append=['other'])
... ) == {'releaseGroup': '2HD', 'other': ['PROPER']}
True
"""
result = Guess()
if not guesses:
return Guess()
return result
result = guesses[0]
if append is None:
append = []
for g in guesses[1:]:
for g in guesses:
# first append our appendable properties
for prop in append:
if prop in g:
result.set(prop, result.get(prop, []) + [g[prop]],
if isinstance(g[prop], (list, set)):
new_values = result.get(prop, []) + list(g[prop])
else:
new_values = result.get(prop, []) + [g[prop]]
result.set(prop, new_values,
# TODO: what to do with confidence here? maybe an
# arithmetic mean...
confidence=g.confidence(prop),
raw=g.raw(prop))
confidence=g.metadata(prop).confidence,
input=g.metadata(prop).input,
span=g.metadata(prop).span,
prop=g.metadata(prop).prop)
del g[prop]
# then merge the remaining ones
dups = set(result) & set(g)
if dups:
log.warning('duplicate properties %s in merged result...' % [ (result[p], g[p]) for p in dups] )
log.debug('duplicate properties %s in merged result...' % [(result[p], g[p]) for p in dups])
result.update_highest_confidence(g)
@ -338,8 +477,38 @@ def merge_all(guesses, append=None):
if isinstance(value, list):
result[prop] = list(set(value))
else:
result[prop] = [ value ]
result[prop] = [value]
except KeyError:
pass
return result
def smart_merge(guesses):
"""First tries to merge well-known similar properties, and then merges
the rest with a merge_all call.
Should be the function to call in most cases, unless one wants to have more
control.
Warning: this function is destructive, ie: it will merge the list in-place.
"""
# 1- try to merge similar information together and give it a higher
# confidence
for int_part in ('year', 'season', 'episodeNumber'):
merge_similar_guesses(guesses, int_part, choose_int)
for string_part in ('title', 'series', 'container', 'format',
'releaseGroup', 'website', 'audioCodec',
'videoCodec', 'screenSize', 'episodeFormat',
'audioChannels', 'idNumber'):
merge_similar_guesses(guesses, string_part, choose_string)
# 2- merge the rest, potentially discarding information not properly
# merged before
result = merge_all(guesses,
append=['language', 'subtitleLanguage', 'other',
'episodeDetails', 'unidentified'])
return result

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,17 +18,21 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit import s, to_hex
import hashlib
import os.path
from functools import reduce
def hash_file(filename):
"""Returns the ed2k hash of a given file.
>>> s(hash_file('tests/dummy.srt'))
'ed2k://|file|dummy.srt|44|1CA0B9DED3473B926AA93A0A546138BB|/'
>>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
>>> s(hash_file(testfile))
'ed2k://|file|dummy.srt|59|41F58B913AB3973F593BEBA8B8DF6510|/'
"""
return 'ed2k://|file|%s|%d|%s|/' % (os.path.basename(filename),
os.path.getsize(filename),

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,7 +18,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
import struct
import os
@ -28,7 +29,7 @@ def hash_file(filename):
http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
and is licensed under the GPL."""
longlongformat = 'q' # long long
longlongformat = b'q' # long long
bytesize = struct.calcsize(longlongformat)
f = open(filename, "rb")
@ -39,18 +40,18 @@ def hash_file(filename):
if filesize < 65536 * 2:
raise Exception("SizeError: size is %d, should be > 132K..." % filesize)
for x in range(65536 / bytesize):
for x in range(int(65536 / bytesize)):
buf = f.read(bytesize)
(l_value,) = struct.unpack(longlongformat, buf)
hash_value += l_value
hash_value = hash_value & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
hash_value &= 0xFFFFFFFFFFFFFFFF # to remain as 64bit number
f.seek(max(0, filesize - 65536), 0)
for x in range(65536 / bytesize):
for x in range(int(65536 / bytesize)):
buf = f.read(bytesize)
(l_value,) = struct.unpack(longlongformat, buf)
hash_value += l_value
hash_value = hash_value & 0xFFFFFFFFFFFFFFFF
hash_value &= 0xFFFFFFFFFFFFFFFF
f.close()

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,373 +18,284 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import UnicodeMixin, base_text_type, u, s
from guessit.fileutils import load_file_in_same_dir
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit import UnicodeMixin, base_text_type, u
from guessit.textutils import find_words
from guessit.country import Country
from babelfish import Language, Country
import babelfish
import re
import logging
from guessit.guess import Guess
__all__ = [ 'is_iso_language', 'is_language', 'lang_set', 'Language',
'ALL_LANGUAGES', 'ALL_LANGUAGES_NAMES', 'UNDETERMINED',
'search_language', 'guess_language' ]
__all__ = ['Language', 'UNDETERMINED',
'search_language', 'guess_language']
log = logging.getLogger(__name__)
UNDETERMINED = babelfish.Language('und')
# downloaded from http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
#
# Description of the fields:
# "An alpha-3 (bibliographic) code, an alpha-3 (terminologic) code (when given),
# an alpha-2 code (when given), an English name, and a French name of a language
# are all separated by pipe (|) characters."
_iso639_contents = load_file_in_same_dir(__file__, 'ISO-639-2_utf-8.txt')
# drop the BOM from the beginning of the file
_iso639_contents = _iso639_contents[1:]
language_matrix = [ l.strip().split('|')
for l in _iso639_contents.strip().split('\n') ]
# update information in the language matrix
language_matrix += [['mol', '', 'mo', 'Moldavian', 'moldave'],
['ass', '', '', 'Assyrian', 'assyrien']]
for lang in language_matrix:
# remove unused languages that shadow other common ones with a non-official form
if (lang[2] == 'se' or # Northern Sami shadows Swedish
lang[2] == 'br'): # Breton shadows Brazilian
lang[2] = ''
# add missing information
if lang[0] == 'und':
lang[2] = 'un'
if lang[0] == 'srp':
lang[1] = 'scc' # from OpenSubtitles
lng3 = frozenset(l[0] for l in language_matrix if l[0])
lng3term = frozenset(l[1] for l in language_matrix if l[1])
lng2 = frozenset(l[2] for l in language_matrix if l[2])
lng_en_name = frozenset(lng for l in language_matrix
for lng in l[3].lower().split('; ') if lng)
lng_fr_name = frozenset(lng for l in language_matrix
for lng in l[4].lower().split('; ') if lng)
lng_all_names = lng3 | lng3term | lng2 | lng_en_name | lng_fr_name
lng3_to_lng3term = dict((l[0], l[1]) for l in language_matrix if l[1])
lng3term_to_lng3 = dict((l[1], l[0]) for l in language_matrix if l[1])
lng3_to_lng2 = dict((l[0], l[2]) for l in language_matrix if l[2])
lng2_to_lng3 = dict((l[2], l[0]) for l in language_matrix if l[2])
# we only return the first given english name, hoping it is the most used one
lng3_to_lng_en_name = dict((l[0], l[3].split('; ')[0])
for l in language_matrix if l[3])
lng_en_name_to_lng3 = dict((en_name.lower(), l[0])
for l in language_matrix if l[3]
for en_name in l[3].split('; '))
# we only return the first given french name, hoping it is the most used one
lng3_to_lng_fr_name = dict((l[0], l[4].split('; ')[0])
for l in language_matrix if l[4])
lng_fr_name_to_lng3 = dict((fr_name.lower(), l[0])
for l in language_matrix if l[4]
for fr_name in l[4].split('; '))
# contains a list of exceptions: strings that should be parsed as a language
# but which are not in an ISO form
lng_exceptions = { 'unknown': ('und', None),
'inconnu': ('und', None),
'unk': ('und', None),
'un': ('und', None),
'gr': ('gre', None),
'greek': ('gre', None),
'esp': ('spa', None),
'español': ('spa', None),
'se': ('swe', None),
'po': ('pt', 'br'),
'pb': ('pt', 'br'),
'pob': ('pt', 'br'),
'br': ('pt', 'br'),
'brazilian': ('pt', 'br'),
'català': ('cat', None),
'cz': ('cze', None),
'ua': ('ukr', None),
'cn': ('chi', None),
'chs': ('chi', None),
'jp': ('jpn', None),
'scr': ('hrv', None)
SYN = {('und', None): ['unknown', 'inconnu', 'unk', 'un'],
('ell', None): ['gr', 'greek'],
('spa', None): ['esp', 'español'],
('fra', None): ['français', 'vf', 'vff', 'vfi'],
('swe', None): ['se'],
('por', 'BR'): ['po', 'pb', 'pob', 'br', 'brazilian'],
('cat', None): ['català'],
('ces', None): ['cz'],
('ukr', None): ['ua'],
('zho', None): ['cn'],
('jpn', None): ['jp'],
('hrv', None): ['scr'],
('mul', None): ['multi', 'dl'], # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/
}
def is_iso_language(language):
return language.lower() in lng_all_names
def is_language(language):
return is_iso_language(language) or language in lng_exceptions
def lang_set(languages, strict=False):
"""Return a set of guessit.Language created from their given string
representation.
if strict is True, then this will raise an exception if any language
could not be identified.
"""
return set(Language(l, strict=strict) for l in languages)
class Language(UnicodeMixin):
"""This class represents a human language.
You can initialize it with pretty much anything, as it knows conversion
from ISO-639 2-letter and 3-letter codes, English and French names.
You can also distinguish languages for specific countries, such as
Portuguese and Brazilian Portuguese.
There are various properties on the language object that give you the
representation of the language for a specific usage, such as .alpha3
to get the ISO 3-letter code, or .opensubtitles to get the OpenSubtitles
language code.
>>> Language('fr')
Language(French)
>>> s(Language('eng').french_name)
'anglais'
>>> s(Language('pt(br)').country.english_name)
'Brazil'
>>> s(Language('Español (Latinoamérica)').country.english_name)
'Latin America'
>>> Language('Spanish (Latin America)') == Language('Español (Latinoamérica)')
True
>>> s(Language('zz', strict=False).english_name)
'Undetermined'
>>> s(Language('pt(br)').opensubtitles)
'pob'
"""
class GuessitConverter(babelfish.LanguageReverseConverter):
_with_country_regexp = re.compile('(.*)\((.*)\)')
_with_country_regexp2 = re.compile('(.*)-(.*)')
def __init__(self, language, country=None, strict=False, scheme=None):
language = u(language.strip().lower())
with_country = (Language._with_country_regexp.match(language) or
Language._with_country_regexp2.match(language))
def __init__(self):
self.guessit_exceptions = {}
for (alpha3, country), synlist in SYN.items():
for syn in synlist:
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
@property
def codes(self):
return (babelfish.language_converters['alpha3b'].codes |
babelfish.language_converters['alpha2'].codes |
babelfish.language_converters['name'].codes |
babelfish.language_converters['opensubtitles'].codes |
babelfish.country_converters['name'].codes |
frozenset(self.guessit_exceptions.keys()))
def convert(self, alpha3, country=None, script=None):
return str(babelfish.Language(alpha3, country, script))
def reverse(self, name):
with_country = (GuessitConverter._with_country_regexp.match(name) or
GuessitConverter._with_country_regexp2.match(name))
name = u(name.lower())
if with_country:
self.lang = Language(with_country.group(1)).lang
self.country = Country(with_country.group(2))
return
lang = Language.fromguessit(with_country.group(1).strip())
lang.country = babelfish.Country.fromguessit(with_country.group(2).strip())
return (lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None)
self.lang = None
self.country = Country(country) if country else None
# first look for scheme specific languages
if scheme == 'opensubtitles':
if language == 'br':
self.lang = 'bre'
return
elif language == 'se':
self.lang = 'sme'
return
elif scheme is not None:
log.warning('Unrecognized scheme: "%s" - Proceeding with standard one' % scheme)
# look for ISO language codes
if len(language) == 2:
self.lang = lng2_to_lng3.get(language)
elif len(language) == 3:
self.lang = (language
if language in lng3
else lng3term_to_lng3.get(language))
else:
self.lang = (lng_en_name_to_lng3.get(language) or
lng_fr_name_to_lng3.get(language))
# general language exceptions
if self.lang is None and language in lng_exceptions:
lang, country = lng_exceptions[language]
self.lang = Language(lang).alpha3
self.country = Country(country) if country else None
msg = 'The given string "%s" could not be identified as a language' % language
if self.lang is None and strict:
raise ValueError(msg)
if self.lang is None:
log.debug(msg)
self.lang = 'und'
@property
def alpha2(self):
return lng3_to_lng2[self.lang]
@property
def alpha3(self):
return self.lang
@property
def alpha3term(self):
return lng3_to_lng3term[self.lang]
@property
def english_name(self):
return lng3_to_lng_en_name[self.lang]
@property
def french_name(self):
return lng3_to_lng_fr_name[self.lang]
@property
def opensubtitles(self):
if self.lang == 'por' and self.country and self.country.alpha2 == 'br':
return 'pob'
elif self.lang in ['gre', 'srp']:
return self.alpha3term
return self.alpha3
@property
def tmdb(self):
if self.country:
return '%s-%s' % (self.alpha2, self.country.alpha2.upper())
return self.alpha2
def __hash__(self):
return hash(self.lang)
def __eq__(self, other):
if isinstance(other, Language):
return self.lang == other.lang
if isinstance(other, base_text_type):
# exceptions come first, as they need to override a potential match
# with any of the other guessers
try:
return self == Language(other)
return self.guessit_exceptions[name]
except KeyError:
pass
for conv in [babelfish.Language,
babelfish.Language.fromalpha3b,
babelfish.Language.fromalpha2,
babelfish.Language.fromname,
babelfish.Language.fromopensubtitles]:
try:
c = conv(name)
return c.alpha3, c.country, c.script
except (ValueError, babelfish.LanguageReverseError):
pass
raise babelfish.LanguageReverseError(name)
babelfish.language_converters['guessit'] = GuessitConverter()
COUNTRIES_SYN = {'ES': ['españa'],
'GB': ['UK'],
'BR': ['brazilian', 'bra'],
# FIXME: this one is a bit of a stretch, not sure how to do
# it properly, though...
'MX': ['Latinoamérica', 'latin america']
}
class GuessitCountryConverter(babelfish.CountryReverseConverter):
def __init__(self):
self.guessit_exceptions = {}
for alpha2, synlist in COUNTRIES_SYN.items():
for syn in synlist:
self.guessit_exceptions[syn.lower()] = alpha2
@property
def codes(self):
return (babelfish.country_converters['name'].codes |
frozenset(babelfish.COUNTRIES.values()) |
frozenset(self.guessit_exceptions.keys()))
def convert(self, alpha2):
if alpha2 == 'GB':
return 'UK'
return str(Country(alpha2))
def reverse(self, name):
# exceptions come first, as they need to override a potential match
# with any of the other guessers
try:
return self.guessit_exceptions[name.lower()]
except KeyError:
pass
try:
return babelfish.Country(name.upper()).alpha2
except ValueError:
return False
pass
return False
for conv in [babelfish.Country.fromname]:
try:
return conv(name).alpha2
except babelfish.CountryReverseError:
pass
def __ne__(self, other):
return not self == other
raise babelfish.CountryReverseError(name)
def __nonzero__(self):
return self.lang != 'und'
def __unicode__(self):
if self.country:
return '%s(%s)' % (self.english_name, self.country.alpha2)
babelfish.country_converters['guessit'] = GuessitCountryConverter()
# list of common words which could be interpreted as languages, but which
# are far too common to be able to say they represent a language in the
# middle of a string (where they most likely carry their commmon meaning)
LNG_COMMON_WORDS = frozenset([
# english words
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb', 'bt',
'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt', 'in', 'ad', 'ice', 'ay',
# french words
'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
'mal', 'est', 'vol', 'or', 'mon', 'se', 'je', 'tu', 'me',
'ne', 'ma', 'va', 'au',
# japanese words,
'wa', 'ga', 'ao',
# spanish words
'la', 'el', 'del', 'por', 'mar',
# other
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
'vi', 'ben', 'da', 'lt', 'ch',
# new from babelfish
'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
'job', 'gag', 'reel', 'www', 'for', 'ayu', 'csi', 'ren', 'moi', 'sur',
'fer', 'fun', 'two', 'big', 'psy', 'air',
# movie title
'brazil',
# release groups
'bs', # Bosnian
'kz',
# countries
'gt', 'lt',
# part/pt
'pt'
])
LNG_COMMON_WORDS_STRICT = frozenset(['brazil'])
subtitle_prefixes = ['sub', 'subs', 'st', 'vost', 'subforced', 'fansub', 'hardsub']
subtitle_suffixes = ['subforced', 'fansub', 'hardsub']
lang_prefixes = ['true']
def find_possible_languages(string, allowed_languages=None):
"""Find possible languages in the string
:return: list of tuple (property, Language, lang_word, word)
"""
common_words = None
if allowed_languages:
common_words = LNG_COMMON_WORDS_STRICT
else:
return self.english_name
common_words = LNG_COMMON_WORDS
def __repr__(self):
if self.country:
return 'Language(%s, country=%s)' % (self.english_name, self.country)
else:
return 'Language(%s)' % self.english_name
words = find_words(string)
valid_words = []
for word in words:
lang_word = word.lower()
key = 'language'
for prefix in subtitle_prefixes:
if lang_word.startswith(prefix):
lang_word = lang_word[len(prefix):]
key = 'subtitleLanguage'
for suffix in subtitle_suffixes:
if lang_word.endswith(suffix):
lang_word = lang_word[:len(suffix)]
key = 'subtitleLanguage'
for prefix in lang_prefixes:
if lang_word.startswith(prefix):
lang_word = lang_word[len(prefix):]
if lang_word not in common_words:
try:
lang = Language.fromguessit(lang_word)
if allowed_languages:
if lang.name.lower() in allowed_languages or lang.alpha2.lower() in allowed_languages or lang.alpha3.lower() in allowed_languages:
valid_words.append((key, lang, lang_word, word))
# Keep language with alpha2 equivalent. Others are probably
# uncommon languages.
elif lang == 'mul' or hasattr(lang, 'alpha2'):
valid_words.append((key, lang, lang_word, word))
except babelfish.Error:
pass
return valid_words
UNDETERMINED = Language('und')
ALL_LANGUAGES = frozenset(Language(lng) for lng in lng_all_names) - frozenset([UNDETERMINED])
ALL_LANGUAGES_NAMES = lng_all_names
def search_language(string, lang_filter=None, skip=None):
def search_language(string, allowed_languages=None):
"""Looks for language patterns, and if found return the language object,
its group span and an associated confidence.
you can specify a list of allowed languages using the lang_filter argument,
as in lang_filter = [ 'fr', 'eng', 'spanish' ]
>>> search_language('movie [en].avi')
(Language(English), (7, 9), 0.8)
>>> search_language('movie [en].avi')['language']
<Language [en]>
>>> search_language('the zen fat cat and the gay mad men got a new fan', allowed_languages = ['en', 'fr', 'es'])
>>> search_language('the zen fat cat and the gay mad men got a new fan', lang_filter = ['en', 'fr', 'es'])
(None, None, None)
"""
# list of common words which could be interpreted as languages, but which
# are far too common to be able to say they represent a language in the
# middle of a string (where they most likely carry their commmon meaning)
lng_common_words = frozenset([
# english words
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
'fry', 'cop', 'zen', 'gay', 'fat', 'cherokee', 'got', 'an', 'as',
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi',
# french words
'bas', 'de', 'le', 'son', 'vo', 'vf', 'ne', 'ca', 'ce', 'et', 'que',
'mal', 'est', 'vol', 'or', 'mon', 'se',
# spanish words
'la', 'el', 'del', 'por', 'mar',
# other
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
'vi', 'ben', 'da', 'lt'
])
sep = r'[](){} \._-+'
if allowed_languages:
allowed_languages = set(Language.fromguessit(lang) for lang in allowed_languages)
if lang_filter:
lang_filter = lang_set(lang_filter)
slow = ' %s ' % string.lower()
confidence = 1.0 # for all of them
for lang in set(find_words(slow)) & lng_all_names:
if lang in lng_common_words:
continue
pos = slow.find(lang)
if pos != -1:
end = pos + len(lang)
# skip if span in in skip list
while skip and (pos - 1, end - 1) in skip:
pos = slow.find(lang, end)
if pos == -1:
continue
end = pos + len(lang)
if pos == -1:
continue
# make sure our word is always surrounded by separators
if slow[pos - 1] not in sep or slow[end] not in sep:
continue
language = Language(slow[pos:end])
if lang_filter and language not in lang_filter:
continue
for prop, language, lang, word in find_possible_languages(string, allowed_languages):
pos = string.find(word)
end = pos + len(word)
# only allow those languages that have a 2-letter code, those that
# don't are too esoteric and probably false matches
if language.lang not in lng3_to_lng2:
continue
# if language.lang not in lng3_to_lng2:
# continue
# confidence depends on lng2, lng3, english name, ...
# confidence depends on alpha2, alpha3, english name, ...
if len(lang) == 2:
confidence = 0.8
elif len(lang) == 3:
confidence = 0.9
elif prop == 'subtitleLanguage':
confidence = 0.6 # Subtitle prefix found with language
else:
# Note: we could either be really confident that we found a
# language or assume that full language names are too
# common words and lower their confidence accordingly
confidence = 0.3 # going with the low-confidence route here
return language, (pos - 1, end - 1), confidence
return Guess({prop: language}, confidence=confidence, input=string, span=(pos, end))
return None, None, None
return None
def guess_language(text):
def guess_language(text): # pragma: no cover
"""Guess the language in which a body of text is written.
This uses the external guess-language python module, and will fail and return
@ -392,7 +303,7 @@ def guess_language(text):
"""
try:
from guess_language import guessLanguage
return Language(guessLanguage(text))
return Language.fromguessit(guessLanguage(text))
except ImportError:
log.error('Cannot detect the language of the given text body, missing dependency: guess-language')

View file

@ -2,7 +2,8 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,29 +19,36 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import PY3, u, base_text_type
from guessit.matchtree import MatchTree
from guessit.textutils import normalize_unicode, clean_string
from __future__ import absolute_import, division, print_function, \
unicode_literals
import logging
from guessit import PY3, u
from guessit.transfo import TransformerException
from guessit.matchtree import MatchTree
from guessit.textutils import normalize_unicode, clean_default
from guessit.guess import Guess
import inspect
log = logging.getLogger(__name__)
class IterativeMatcher(object):
def __init__(self, filename, filetype='autodetect', opts=None, transfo_opts=None):
"""An iterative matcher tries to match different patterns that appear
in the filename.
The 'filetype' argument indicates which type of file you want to match.
If it is 'autodetect', the matcher will try to see whether it can guess
The ``filetype`` argument indicates which type of file you want to match.
If it is undefined, the matcher will try to see whether it can guess
that the file corresponds to an episode, or otherwise will assume it is
a movie.
The recognized 'filetype' values are:
[ autodetect, subtitle, info, movie, moviesubtitle, movieinfo, episode,
episodesubtitle, episodeinfo ]
The recognized ``filetype`` values are:
``['subtitle', 'info', 'movie', 'moviesubtitle', 'movieinfo', 'episode',
'episodesubtitle', 'episodeinfo']``
``options`` is a dict of options values to be passed to the transformations used
by the matcher.
The IterativeMatcher works mainly in 2 steps:
@ -48,7 +56,7 @@ class IterativeMatcher(object):
which have a semantic meaning, such as episode number, movie title,
etc...
The match_tree created looks like the following:
The match_tree created looks like the following::
0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111
0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000
@ -58,123 +66,241 @@ class IterativeMatcher(object):
[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv
The first 3 lines indicates the group index in which a char in the
filename is located. So for instance, x264 is the group (0, 4, 1), and
it corresponds to a video codec, denoted by the letter'v' in the 4th line.
filename is located. So for instance, ``x264`` (in the middle) is the group (0, 4, 1), and
it corresponds to a video codec, denoted by the letter ``v`` in the 4th line.
(for more info, see guess.matchtree.to_string)
Second, it tries to merge all this information into a single object
containing all the found properties, and does some (basic) conflict
resolution when they arise.
When you create the Matcher, you can pass it:
- a list 'opts' of option names, that act as global flags
- a dict 'transfo_opts' of { transfo_name: (transfo_args, transfo_kwargs) }
with which to call the transfo.process() function.
"""
valid_filetypes = ('autodetect', 'subtitle', 'info', 'video',
'movie', 'moviesubtitle', 'movieinfo',
'episode', 'episodesubtitle', 'episodeinfo')
if filetype not in valid_filetypes:
raise ValueError("filetype needs to be one of %s" % valid_filetypes)
def __init__(self, filename, options=None, **kwargs):
options = dict(options or {})
for k, v in kwargs.items():
if k not in options or not options[k]:
options[k] = v # options dict has priority over keyword arguments
self._validate_options(options)
if not PY3 and not isinstance(filename, unicode):
log.warning('Given filename to matcher is not unicode...')
filename = filename.decode('utf-8')
filename = normalize_unicode(filename)
if options and options.get('clean_function'):
clean_function = options.get('clean_function')
if not hasattr(clean_function, '__call__'):
module, function = clean_function.rsplit('.')
if not module:
module = 'guessit.textutils'
clean_function = getattr(__import__(module), function)
if not clean_function:
log.error('Can\'t find clean function %s. Default will be used.' % options.get('clean_function'))
clean_function = clean_default
else:
clean_function = clean_default
if opts is None:
opts = []
if not isinstance(opts, list):
raise ValueError('opts must be a list of option names! Received: type=%s val=%s',
type(opts), opts)
if transfo_opts is None:
transfo_opts = {}
if not isinstance(transfo_opts, dict):
raise ValueError('transfo_opts must be a dict of { transfo_name: (args, kwargs) }. '+
'Received: type=%s val=%s', type(transfo_opts), transfo_opts)
self.match_tree = MatchTree(filename)
self.match_tree = MatchTree(filename, clean_function=clean_function)
self.options = options
self._transfo_calls = []
# sanity check: make sure we don't process a (mostly) empty string
if clean_string(filename) == '':
if clean_function(filename).strip() == '':
return
from guessit.plugins import transformers
try:
mtree = self.match_tree
mtree.guess.set('type', filetype, confidence=1.0)
if 'type' in self.options:
mtree.guess.set('type', self.options['type'], confidence=0.0)
def apply_transfo(transfo_name, *args, **kwargs):
transfo = __import__('guessit.transfo.' + transfo_name,
globals=globals(), locals=locals(),
fromlist=['process'], level=0)
default_args, default_kwargs = transfo_opts.get(transfo_name, ((), {}))
all_args = args or default_args
all_kwargs = dict(default_kwargs)
all_kwargs.update(kwargs) # keep all kwargs merged together
transfo.process(mtree, *all_args, **all_kwargs)
# Process
for transformer in transformers.all_transformers():
disabled = options.get('disabled_transformers')
if not disabled or transformer.name not in disabled:
self._process(transformer, False)
# 1- first split our path into dirs + basename + ext
apply_transfo('split_path_components')
# 2- guess the file type now (will be useful later)
apply_transfo('guess_filetype', filetype)
if mtree.guess['type'] == 'unknown':
return
# 3- split each of those into explicit groups (separated by parentheses
# or square brackets)
apply_transfo('split_explicit_groups')
# 4- try to match information for specific patterns
# NOTE: order needs to comply to the following:
# - website before language (eg: tvu.org.ru vs russian)
# - language before episodes_rexps
# - properties before language (eg: he-aac vs hebrew)
# - release_group before properties (eg: XviD-?? vs xvid)
if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'):
strategy = [ 'guess_date', 'guess_website', 'guess_release_group',
'guess_properties', 'guess_language',
'guess_video_rexps',
'guess_episodes_rexps', 'guess_weak_episodes_rexps' ]
else:
strategy = [ 'guess_date', 'guess_website', 'guess_release_group',
'guess_properties', 'guess_language',
'guess_video_rexps' ]
if 'nolanguage' in opts:
strategy.remove('guess_language')
for name in strategy:
apply_transfo(name)
# more guessers for both movies and episodes
apply_transfo('guess_bonus_features')
apply_transfo('guess_year', skip_first_year=('skip_first_year' in opts))
if 'nocountry' not in opts:
apply_transfo('guess_country')
apply_transfo('guess_idnumber')
# split into '-' separated subgroups (with required separator chars
# around the dash)
apply_transfo('split_on_dash')
# 5- try to identify the remaining unknown groups by looking at their
# position relative to other known elements
if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'):
apply_transfo('guess_episode_info_from_position')
else:
apply_transfo('guess_movie_title_from_position')
# 6- perform some post-processing steps
apply_transfo('post_process')
# Post-process
for transformer in transformers.all_transformers():
disabled = options.get('disabled_transformers')
if not disabled or transformer.name not in disabled:
self._process(transformer, True)
log.debug('Found match tree:\n%s' % u(mtree))
except TransformerException as e:
log.debug('An error has occurred in Transformer %s: %s' % (e.transformer, e))
def _process(self, transformer, post=False):
if not hasattr(transformer, 'should_process') or transformer.should_process(self.match_tree, self.options):
if post:
transformer.post_process(self.match_tree, self.options)
else:
transformer.process(self.match_tree, self.options)
self._transfo_calls.append(transformer)
@property
def second_pass_options(self):
second_pass_options = {}
for transformer in self._transfo_calls:
if hasattr(transformer, 'second_pass_options'):
transformer_second_pass_options = transformer.second_pass_options(self.match_tree, self.options)
if transformer_second_pass_options:
second_pass_options.update(transformer_second_pass_options)
return second_pass_options
def _validate_options(self, options):
valid_filetypes = ('subtitle', 'info', 'video',
'movie', 'moviesubtitle', 'movieinfo',
'episode', 'episodesubtitle', 'episodeinfo')
type_ = options.get('type')
if type_ and type_ not in valid_filetypes:
raise ValueError("filetype needs to be one of %s" % (valid_filetypes,))
def matched(self):
return self.match_tree.matched()
def build_guess(node, name, value=None, confidence=1.0):
guess = Guess({name: node.clean_value if value is None else value}, confidence=confidence)
guess.metadata().input = node.value if value is None else value
if value is None:
left_offset = 0
right_offset = 0
clean_value = node.clean_value
for i in range(0, len(node.value)):
if clean_value[0] == node.value[i]:
break
left_offset += 1
for i in reversed(range(0, len(node.value))):
if clean_value[-1] == node.value[i]:
break
right_offset += 1
guess.metadata().span = (node.span[0] - node.offset + left_offset, node.span[1] - node.offset - right_offset)
return guess
def found_property(node, name, value=None, confidence=1.0, update_guess=True, logger=None):
# automatically retrieve the log object from the caller frame
if not logger:
caller_frame = inspect.stack()[1][0]
logger = caller_frame.f_locals['self'].log
guess = build_guess(node, name, value, confidence)
return found_guess(node, guess, update_guess=update_guess, logger=logger)
def found_guess(node, guess, update_guess=True, logger=None):
if node.guess:
if update_guess:
node.guess.update_highest_confidence(guess)
else:
child = node.add_child(guess.metadata().span)
child.guess = guess
else:
node.guess = guess
log_found_guess(guess, logger)
return node.guess
def log_found_guess(guess, logger=None):
for k, v in guess.items():
(logger or log).debug('Property found: %s=%s (%s) (confidence=%.2f)' %
(k, v, guess.raw(k), guess.confidence(k)))
def _get_split_spans(node, span):
partition_spans = node.get_partition_spans(span)
for to_remove_span in partition_spans:
if to_remove_span[0] == span[0] and to_remove_span[1] in [span[1], span[1] + 1]:
partition_spans.remove(to_remove_span)
break
return partition_spans
class GuessFinder(object):
def __init__(self, guess_func, confidence=None, logger=None, options=None):
self.guess_func = guess_func
self.confidence = confidence
self.logger = logger or log
self.options = options
def process_nodes(self, nodes):
for node in nodes:
self.process_node(node)
def process_node(self, node, iterative=True, partial_span=None):
if partial_span:
value = node.value[partial_span[0]:partial_span[1]]
else:
value = node.value
string = ' %s ' % value # add sentinels
if not self.options:
matcher_result = self.guess_func(string, node)
else:
matcher_result = self.guess_func(string, node, self.options)
if matcher_result:
if not isinstance(matcher_result, Guess):
result, span = matcher_result
else:
result, span = matcher_result, matcher_result.metadata().span
if result:
# readjust span to compensate for sentinels
span = (span[0] - 1, span[1] - 1)
# readjust span to compensate for partial_span
if partial_span:
span = (span[0] + partial_span[0], span[1] + partial_span[0])
partition_spans = None
if self.options and 'skip_nodes' in self.options:
skip_nodes = self.options.get('skip_nodes')
for skip_node in skip_nodes:
if skip_node.parent.node_idx == node.node_idx[:len(skip_node.parent.node_idx)] and\
skip_node.span == span or\
skip_node.span == (span[0] + skip_node.offset, span[1] + skip_node.offset):
if partition_spans is None:
partition_spans = _get_split_spans(node, skip_node.span)
else:
new_partition_spans = []
for partition_span in partition_spans:
tmp_node = MatchTree(value, span=partition_span, parent=node)
tmp_partitions_spans = _get_split_spans(tmp_node, skip_node.span)
new_partition_spans.extend(tmp_partitions_spans)
partition_spans.extend(new_partition_spans)
if not partition_spans:
# restore sentinels compensation
if isinstance(result, Guess):
guess = result
else:
guess = Guess(result, confidence=self.confidence, input=string, span=span)
if not iterative:
found_guess(node, guess, logger=self.logger)
else:
absolute_span = (span[0] + node.offset, span[1] + node.offset)
node.partition(span)
if node.is_leaf():
found_guess(node, guess, logger=self.logger)
else:
found_child = None
for child in node.children:
if child.span == absolute_span:
found_guess(child, guess, logger=self.logger)
found_child = child
break
for child in node.children:
if child is not found_child:
self.process_node(child)
else:
for partition_span in partition_spans:
self.process_node(node, partial_span=partition_span)

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,12 +18,15 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import UnicodeMixin, base_text_type, Guess
from guessit.textutils import clean_string, str_fill
from __future__ import absolute_import, division, print_function, unicode_literals
import guessit # @UnusedImport needed for doctests
from guessit import UnicodeMixin, base_text_type
from guessit.textutils import clean_default, str_fill
from guessit.patterns import group_delimiters
from guessit.guess import (merge_similar_guesses, merge_all,
choose_int, choose_string)
from guessit.guess import (merge_similar_guesses, smart_merge,
choose_int, choose_string, Guess)
from itertools import takewhile
import copy
import logging
@ -31,23 +34,71 @@ log = logging.getLogger(__name__)
class BaseMatchTree(UnicodeMixin):
"""A MatchTree represents the hierarchical split of a string into its
constituent semantic groups."""
"""A BaseMatchTree is a tree covering the filename, where each
node represents a substring in the filename and can have a ``Guess``
associated with it that contains the information that has been guessed
in this node. Nodes can be further split into subnodes until a proper
split has been found.
def __init__(self, string='', span=None, parent=None):
Each node has the following attributes:
- string = the original string of which this node represents a region
- span = a pair of (begin, end) indices delimiting the substring
- parent = parent node
- children = list of children nodes
- guess = Guess()
BaseMatchTrees are displayed in the following way:
>>> path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
>>> print(guessit.IterativeMatcher(path).match_tree)
000000 1111111111111111 2222222222222222222222222222222222222222222 333
000000 0000000000111111 0000000000111111222222222222222222222222222 000
011112 011112000011111222222222222222222 000
011112222222222222
0000011112222
01112 0111
Movies/__________(____)/Dark.City.(____).DC._____.____.___.____-___.___
tttttttttt yyyy yyyy fffff ssss aaa vvvv rrr ccc
Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
The last line contains the filename, which you can use a reference.
The previous line contains the type of property that has been found.
The line before that contains the filename, where all the found groups
have been blanked. Basically, what is left on this line are the leftover
groups which could not be identified.
The lines before that indicate the indices of the groups in the tree.
For instance, the part of the filename 'BDRip' is the leaf with index
``(2, 2, 1)`` (read from top to bottom), and its meaning is 'format'
(as shown by the ``f``'s on the last-but-one line).
"""
def __init__(self, string='', span=None, parent=None, clean_function=None):
self.string = string
self.span = span or (0, len(string))
self.parent = parent
self.children = []
self.guess = Guess()
self._clean_value = None
self._clean_function = clean_function or clean_default
@property
def value(self):
"""Return the substring that this node matches."""
return self.string[self.span[0]:self.span[1]]
@property
def clean_value(self):
return clean_string(self.value)
"""Return a cleaned value of the matched substring, with better
presentation formatting (punctuation marks removed, duplicate
spaces, ...)"""
if self._clean_value is None:
self._clean_value = self.clean_string(self.value)
return self._clean_value
def clean_string(self, string):
return self._clean_function(string)
@property
def offset(self):
@ -55,6 +106,8 @@ class BaseMatchTree(UnicodeMixin):
@property
def info(self):
"""Return a dict containing all the info guessed by this node,
subnodes included."""
result = dict(self.guess)
for c in self.children:
@ -64,6 +117,7 @@ class BaseMatchTree(UnicodeMixin):
@property
def root(self):
"""Return the root node of the tree."""
if not self.parent:
return self
@ -71,28 +125,43 @@ class BaseMatchTree(UnicodeMixin):
@property
def depth(self):
"""Return the depth of this node."""
if self.is_leaf():
return 0
return 1 + max(c.depth for c in self.children)
def is_leaf(self):
"""Return whether this node is a leaf or not."""
return self.children == []
def add_child(self, span):
child = MatchTree(self.string, span=span, parent=self)
"""Add a new child node to this node with the given span."""
child = MatchTree(self.string, span=span, parent=self, clean_function=self._clean_function)
self.children.append(child)
return child
def partition(self, indices):
def get_partition_spans(self, indices):
"""Return the list of absolute spans for the regions of the original
string defined by splitting this node at the given indices (relative
to this node)"""
indices = sorted(indices)
if indices[0] != 0:
indices.insert(0, 0)
if indices[-1] != len(self.value):
indices.append(len(self.value))
spans = []
for start, end in zip(indices[:-1], indices[1:]):
self.add_child(span=(self.offset + start,
spans.append((self.offset + start,
self.offset + end))
return spans
def partition(self, indices):
"""Partition this node by splitting it at the given indices,
relative to this node."""
for partition_span in self.get_partition_spans(indices):
self.add_child(span=partition_span)
def split_on_components(self, components):
offset = 0
@ -104,6 +173,7 @@ class BaseMatchTree(UnicodeMixin):
offset = end
def nodes_at_depth(self, depth):
"""Return all the nodes at a given depth in the tree"""
if depth == 0:
yield self
@ -113,38 +183,109 @@ class BaseMatchTree(UnicodeMixin):
@property
def node_idx(self):
"""Return this node's index in the tree, as a tuple.
If this node is the root of the tree, then return ()."""
if self.parent is None:
return ()
return self.parent.node_idx + (self.parent.children.index(self),)
return self.parent.node_idx + (self.node_last_idx,)
@property
def node_last_idx(self):
if self.parent is None:
return None
return self.parent.children.index(self)
def node_at(self, idx):
"""Return the node at the given index in the subtree rooted at
this node."""
if not idx:
return self
try:
return self.children[idx[0]].node_at(idx[1:])
except:
except IndexError:
raise ValueError('Non-existent node index: %s' % (idx,))
def nodes(self):
"""Return all the nodes and subnodes in this tree."""
yield self
for child in self.children:
for node in child.nodes():
yield node
def _leaves(self):
def leaves(self):
"""Return a generator over all the nodes that are leaves."""
if self.is_leaf():
yield self
else:
for child in self.children:
# pylint: disable=W0212
for leaf in child._leaves():
for leaf in child.leaves():
yield leaf
def leaves(self):
return list(self._leaves())
def group_node(self):
return self._other_group_node(0)
def previous_group_node(self):
return self._other_group_node(-1)
def next_group_node(self):
return self._other_group_node(+1)
def _other_group_node(self, offset):
if len(self.node_idx) > 1:
group_idx = self.node_idx[:2]
if group_idx[1] + offset >= 0:
other_group_idx = (group_idx[0], group_idx[1] + offset)
try:
other_group_node = self.root.node_at(other_group_idx)
return other_group_node
except ValueError:
pass
return None
def previous_leaf(self, leaf):
"""Return previous leaf for this node"""
return self._other_leaf(leaf, -1)
def next_leaf(self, leaf):
"""Return next leaf for this node"""
return self._other_leaf(leaf, +1)
def _other_leaf(self, leaf, offset):
leaves = list(self.leaves())
index = leaves.index(leaf) + offset
if index > 0 and index < len(leaves):
return leaves[index]
return None
def previous_leaves(self, leaf):
"""Return previous leaves for this node"""
leaves = list(self.leaves())
index = leaves.index(leaf)
if index > 0 and index < len(leaves):
previous_leaves = leaves[:index]
previous_leaves.reverse()
return previous_leaves
return []
def next_leaves(self, leaf):
"""Return next leaves for this node"""
leaves = list(self.leaves())
index = leaves.index(leaf)
if index > 0 and index < len(leaves):
return leaves[index + 1:len(leaves)]
return []
def to_string(self):
"""Return a readable string representation of this tree.
The result is a multi-line string, where the lines are:
- line 1 -> N-2: each line contains the nodes at the given depth in the tree
- line N-2: original string where all the found groups have been blanked
- line N-1: type of property that has been found
- line N: the original string, which you can use a reference.
"""
empty_line = ' ' * len(self.string)
def to_hex(x):
@ -153,14 +294,17 @@ class BaseMatchTree(UnicodeMixin):
return x
def meaning(result):
mmap = { 'episodeNumber': 'E',
mmap = {'episodeNumber': 'E',
'season': 'S',
'extension': 'e',
'format': 'f',
'language': 'l',
'country': 'C',
'videoCodec': 'v',
'videoProfile': 'v',
'audioCodec': 'a',
'audioProfile': 'a',
'audioChannels': 'a',
'website': 'w',
'container': 'c',
'series': 'T',
@ -168,7 +312,8 @@ class BaseMatchTree(UnicodeMixin):
'date': 'd',
'year': 'y',
'releaseGroup': 'r',
'screenSize': 's'
'screenSize': 's',
'other': 'o'
}
if result is None:
@ -180,7 +325,7 @@ class BaseMatchTree(UnicodeMixin):
return 'x'
lines = [ empty_line ] * (self.depth + 2) # +2: remaining, meaning
lines = [empty_line] * (self.depth + 2) # +2: remaining, meaning
lines[-2] = self.string
for node in self.nodes():
@ -198,63 +343,61 @@ class BaseMatchTree(UnicodeMixin):
lines.append(self.string)
return '\n'.join(lines)
return '\n'.join(l.rstrip() for l in lines)
def __unicode__(self):
return self.to_string()
def __repr__(self):
return '<MatchTree: root=%s>' % self.value
class MatchTree(BaseMatchTree):
"""The MatchTree contains a few "utility" methods which are not necessary
for the BaseMatchTree, but add a lot of convenience for writing
higher-level rules."""
higher-level rules.
"""
def _unidentified_leaves(self,
valid=lambda leaf: len(leaf.clean_value) >= 2):
for leaf in self._leaves():
def unidentified_leaves(self,
valid=lambda leaf: len(leaf.clean_value) > 0):
"""Return a generator of leaves that are not empty."""
for leaf in self.leaves():
if not leaf.guess and valid(leaf):
yield leaf
def unidentified_leaves(self,
valid=lambda leaf: len(leaf.clean_value) >= 2):
return list(self._unidentified_leaves(valid))
def _leaves_containing(self, property_name):
def leaves_containing(self, property_name):
"""Return a generator of leaves that guessed the given property."""
if isinstance(property_name, base_text_type):
property_name = [ property_name ]
property_name = [property_name]
for leaf in self._leaves():
for leaf in self.leaves():
for prop in property_name:
if prop in leaf.guess:
yield leaf
break
def leaves_containing(self, property_name):
return list(self._leaves_containing(property_name))
def first_leaf_containing(self, property_name):
"""Return the first leaf containing the given property."""
try:
return next(self._leaves_containing(property_name))
return next(self.leaves_containing(property_name))
except StopIteration:
return None
def _previous_unidentified_leaves(self, node):
node_idx = node.node_idx
for leaf in self._unidentified_leaves():
if leaf.node_idx < node_idx:
yield leaf
def previous_unidentified_leaves(self, node):
return list(self._previous_unidentified_leaves(node))
def _previous_leaves_containing(self, node, property_name):
"""Return a generator of non-empty leaves that are before the given
node (in the string)."""
node_idx = node.node_idx
for leaf in self._leaves_containing(property_name):
for leaf in self.unidentified_leaves():
if leaf.node_idx < node_idx:
yield leaf
def previous_leaves_containing(self, node, property_name):
return list(self._previous_leaves_containing(node, property_name))
"""Return a generator of leaves containing the given property that are
before the given node (in the string)."""
node_idx = node.node_idx
for leaf in self.leaves_containing(property_name):
if leaf.node_idx < node_idx:
yield leaf
def is_explicit(self):
"""Return whether the group was explicitly enclosed by
@ -262,26 +405,22 @@ class MatchTree(BaseMatchTree):
return (self.value[0] + self.value[-1]) in group_delimiters
def matched(self):
"""Return a single guess that contains all the info found in the
nodes of this tree, trying to merge properties as good as possible.
"""
if not getattr(self, '_matched_result', None):
# we need to make a copy here, as the merge functions work in place and
# calling them on the match tree would modify it
parts = [node.guess for node in self.nodes() if node.guess]
parts = copy.deepcopy(parts)
parts = [copy.copy(node.guess) for node in self.nodes() if node.guess]
# 1- try to merge similar information together and give it a higher
# confidence
for int_part in ('year', 'season', 'episodeNumber'):
merge_similar_guesses(parts, int_part, choose_int)
for string_part in ('title', 'series', 'container', 'format',
'releaseGroup', 'website', 'audioCodec',
'videoCodec', 'screenSize', 'episodeFormat',
'audioChannels', 'idNumber'):
merge_similar_guesses(parts, string_part, choose_string)
# 2- merge the rest, potentially discarding information not properly
# merged before
result = merge_all(parts,
append=['language', 'subtitleLanguage', 'other'])
result = smart_merge(parts)
log.debug('Final result: ' + result.nice_string())
return result
self._matched_result = result
for unidentified_leaves in self.unidentified_leaves():
if 'unidentified' not in self._matched_result:
self._matched_result['unidentified'] = []
self._matched_result['unidentified'].append(unidentified_leaves.clean_value)
return self._matched_result

69
libs/guessit/options.py Normal file
View file

@ -0,0 +1,69 @@
from argparse import ArgumentParser
def build_opts(transformers=None):
opts = ArgumentParser()
opts.add_argument(dest='filename', help='Filename or release name to guess', nargs='*')
naming_opts = opts.add_argument_group("Naming")
naming_opts.add_argument('-t', '--type', dest='type', default=None,
help='The suggested file type: movie, episode. If undefined, type will be guessed.')
naming_opts.add_argument('-n', '--name-only', dest='name_only', action='store_true', default=False,
help='Parse files as name only. Disable folder parsing, extension parsing, and file content analysis.')
naming_opts.add_argument('-c', '--split-camel', dest='split_camel', action='store_true', default=False,
help='Split camel case part of filename.')
naming_opts.add_argument('-X', '--disabled-transformer', action='append', dest='disabled_transformers',
help='Transformer to disable (can be used multiple time)')
output_opts = opts.add_argument_group("Output")
output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=False,
help='Display debug output')
output_opts.add_argument('-P', '--show-property', dest='show_property', default=None,
help='Display the value of a single property (title, series, videoCodec, year, type ...)'),
output_opts.add_argument('-u', '--unidentified', dest='unidentified', action='store_true', default=False,
help='Display the unidentified parts.'),
output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=False,
help='Display advanced information for filename guesses, as json output')
output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=False,
help='Display information for filename guesses as yaml output (like unit-test)')
output_opts.add_argument('-f', '--input-file', dest='input_file', default=False,
help='Read filenames from an input file.')
output_opts.add_argument('-d', '--demo', action='store_true', dest='demo', default=False,
help='Run a few builtin tests instead of analyzing a file')
information_opts = opts.add_argument_group("Information")
information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=False,
help='Display properties that can be guessed.')
information_opts.add_argument('-V', '--values', dest='values', action='store_true', default=False,
help='Display property values that can be guessed.')
information_opts.add_argument('-s', '--transformers', dest='transformers', action='store_true', default=False,
help='Display transformers that can be used.')
information_opts.add_argument('--version', dest='version', action='store_true', default=False,
help='Display the guessit version.')
webservice_opts = opts.add_argument_group("guessit.io")
webservice_opts.add_argument('-b', '--bug', action='store_true', dest='submit_bug', default=False,
help='Submit a wrong detection to the guessit.io service')
other_opts = opts.add_argument_group("Other features")
other_opts.add_argument('-i', '--info', dest='info', default='filename',
help='The desired information type: filename, video, hash_mpc or a hash from python\'s '
'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
'them, comma-separated')
if transformers:
for transformer in transformers:
transformer.register_arguments(opts, naming_opts, output_opts, information_opts, webservice_opts, other_opts)
return opts, naming_opts, output_opts, information_opts, webservice_opts, other_opts
_opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts = None, None, None, None, None, None
def reload(transformers=None):
global _opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts
_opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts = build_opts(transformers)
def get_opts():
return _opts

View file

@ -1,250 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
import re
subtitle_exts = [ 'srt', 'idx', 'sub', 'ssa' ]
info_exts = [ 'nfo' ]
video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv']
group_delimiters = [ '()', '[]', '{}' ]
# separator character regexp
sep = r'[][,)(}{+ /\._-]' # regexp art, hehe :D
# character used to represent a deleted char (when matching groups)
deleted = '_'
# format: [ (regexp, confidence, span_adjust) ]
episode_rexps = [ # ... Season 2 ...
(r'season (?P<season>[0-9]+)', 1.0, (0, 0)),
(r'saison (?P<season>[0-9]+)', 1.0, (0, 0)),
# ... s02e13 ...
(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<episodeNumber>(?:-?[eE-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
# ... s03-x02 ... # FIXME: redundant? remove it?
#(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<bonusNumber>(?:-?[xX-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
# ... 2x13 ...
(r'[^0-9](?P<season>[0-9]{1,2})[^0-9 .-]?(?P<episodeNumber>(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)),
# ... s02 ...
#(sep + r's(?P<season>[0-9]{1,2})' + sep, 0.6, (1, -1)),
(r's(?P<season>[0-9]{1,2})[^0-9]', 0.6, (0, -1)),
# v2 or v3 for some mangas which have multiples rips
(r'(?P<episodeNumber>[0-9]{1,3})v[23]' + sep, 0.6, (0, 0)),
# ... ep 23 ...
('ep' + sep + r'(?P<episodeNumber>[0-9]{1,2})[^0-9]', 0.7, (0, -1)),
# ... e13 ... for a mini-series without a season number
(sep + r'e(?P<episodeNumber>[0-9]{1,2})' + sep, 0.6, (1, -1))
]
weak_episode_rexps = [ # ... 213 or 0106 ...
(sep + r'(?P<episodeNumber>[0-9]{2,4})' + sep, (1, -1))
]
non_episode_title = [ 'extras', 'rip' ]
video_rexps = [ # cd number
(r'cd ?(?P<cdNumber>[0-9])( ?of ?(?P<cdNumberTotal>[0-9]))?', 1.0, (0, 0)),
(r'(?P<cdNumberTotal>[1-9]) cds?', 0.9, (0, 0)),
# special editions
(r'edition' + sep + r'(?P<edition>collector)', 1.0, (0, 0)),
(r'(?P<edition>collector)' + sep + 'edition', 1.0, (0, 0)),
(r'(?P<edition>special)' + sep + 'edition', 1.0, (0, 0)),
(r'(?P<edition>criterion)' + sep + 'edition', 1.0, (0, 0)),
# director's cut
(r"(?P<edition>director'?s?" + sep + "cut)", 1.0, (0, 0)),
# video size
(r'(?P<width>[0-9]{3,4})x(?P<height>[0-9]{3,4})', 0.9, (0, 0)),
# website
(r'(?P<website>www(\.[a-zA-Z0-9]+){2,3})', 0.8, (0, 0)),
# bonusNumber: ... x01 ...
(r'x(?P<bonusNumber>[0-9]{1,2})', 1.0, (0, 0)),
# filmNumber: ... f01 ...
(r'f(?P<filmNumber>[0-9]{1,2})', 1.0, (0, 0))
]
websites = [ 'tvu.org.ru', 'emule-island.com', 'UsaBit.com', 'www.divx-overnet.com',
'sharethefiles.com' ]
unlikely_series = [ 'series' ]
# prop_multi is a dict of { property_name: { canonical_form: [ pattern ] } }
# pattern is a string considered as a regexp, with the addition that dashes are
# replaced with '([ \.-_])?' which matches more types of separators (or none)
# note: simpler patterns need to be at the end of the list to not shadow more
# complete ones, eg: 'AAC' needs to come after 'He-AAC'
# ie: from most specific to less specific
prop_multi = { 'format': { 'DVD': [ 'DVD', 'DVD-Rip', 'VIDEO-TS', 'DVDivX' ],
'HD-DVD': [ 'HD-(?:DVD)?-Rip', 'HD-DVD' ],
'BluRay': [ 'Blu-ray', 'B[DR]Rip' ],
'HDTV': [ 'HD-TV' ],
'DVB': [ 'DVB-Rip', 'DVB', 'PD-TV' ],
'WEBRip': [ 'WEB-Rip' ],
'Screener': [ 'DVD-SCR', 'Screener' ],
'VHS': [ 'VHS' ],
'WEB-DL': [ 'WEB-DL' ] },
'is3D': { True: [ '3D' ] },
'screenSize': { '480p': [ '480[pi]?' ],
'720p': [ '720[pi]?' ],
'1080i': [ '1080i' ],
'1080p': [ '1080p', '1080[^i]' ] },
'videoCodec': { 'XviD': [ 'Xvid' ],
'DivX': [ 'DVDivX', 'DivX' ],
'h264': [ '[hx]-264' ],
'Rv10': [ 'Rv10' ],
'Mpeg2': [ 'Mpeg2' ] },
# has nothing to do here (or on filenames for that matter), but some
# releases use it and it helps to identify release groups, so we adapt
'videoApi': { 'DXVA': [ 'DXVA' ] },
'audioCodec': { 'AC3': [ 'AC3' ],
'DTS': [ 'DTS' ],
'AAC': [ 'He-AAC', 'AAC-He', 'AAC' ] },
'audioChannels': { '5.1': [ r'5\.1', 'DD5[._ ]1', '5ch' ] },
'episodeFormat': { 'Minisode': [ 'Minisodes?' ] }
}
# prop_single dict of { property_name: [ canonical_form ] }
prop_single = { 'releaseGroup': [ 'ESiR', 'WAF', 'SEPTiC', r'\[XCT\]', 'iNT', 'PUKKA',
'CHD', 'ViTE', 'TLF', 'FLAiTE',
'MDX', 'GM4F', 'DVL', 'SVD', 'iLUMiNADOS',
'aXXo', 'KLAXXON', 'NoTV', 'ZeaL', 'LOL',
'CtrlHD', 'POD', 'WiKi','IMMERSE', 'FQM',
'2HD', 'CTU', 'HALCYON', 'EbP', 'SiTV',
'HDBRiSe', 'AlFleNi-TeaM', 'EVOLVE', '0TV',
'TLA', 'NTB', 'ASAP', 'MOMENTUM', 'FoV', 'D-Z0N3',
'TrollHD', 'ECI'
],
# potentially confusing release group names (they are words)
'weakReleaseGroup': [ 'DEiTY', 'FiNaLe', 'UnSeeN', 'KiNGS', 'CLUE', 'DIMENSION',
'SAiNTS', 'ARROW', 'EuReKA', 'SiNNERS', 'DiRTY', 'REWARD',
'REPTiLE',
],
'other': [ 'PROPER', 'REPACK', 'LIMITED', 'DualAudio', 'Audiofixed', 'R5',
'complete', 'classic', # not so sure about these ones, could appear in a title
'ws' ] # widescreen
}
_dash = '-'
_psep = '[-. _]?'
def _to_rexp(prop):
return re.compile(prop.replace(_dash, _psep), re.IGNORECASE)
# properties_rexps dict of { property_name: { canonical_form: [ rexp ] } }
# containing the rexps compiled from both prop_multi and prop_single
properties_rexps = dict((type, dict((canonical_form,
[ _to_rexp(pattern) for pattern in patterns ])
for canonical_form, patterns in props.items()))
for type, props in prop_multi.items())
properties_rexps.update(dict((type, dict((canonical_form, [ _to_rexp(canonical_form) ])
for canonical_form in props))
for type, props in prop_single.items()))
def find_properties(string):
result = []
for property_name, props in properties_rexps.items():
# FIXME: this should be done in a more flexible way...
if property_name in ['weakReleaseGroup']:
continue
for canonical_form, rexps in props.items():
for value_rexp in rexps:
match = value_rexp.search(string)
if match:
start, end = match.span()
# make sure our word is always surrounded by separators
# note: sep is a regexp, but in this case using it as
# a char sequence achieves the same goal
if ((start > 0 and string[start-1] not in sep) or
(end < len(string) and string[end] not in sep)):
continue
result.append((property_name, canonical_form, start, end))
return result
property_synonyms = { 'Special Edition': [ 'Special' ],
'Collector Edition': [ 'Collector' ],
'Criterion Edition': [ 'Criterion' ]
}
def revert_synonyms():
reverse = {}
for canonical, synonyms in property_synonyms.items():
for synonym in synonyms:
reverse[synonym.lower()] = canonical
return reverse
reverse_synonyms = revert_synonyms()
def canonical_form(string):
return reverse_synonyms.get(string.lower(), string)
def compute_canonical_form(property_name, value):
"""Return the canonical form of a property given its type if it is a valid
one, None otherwise."""
if isinstance(value, basestring):
for canonical_form, rexps in properties_rexps[property_name].items():
for rexp in rexps:
if rexp.match(value):
return canonical_form
return None

View file

@ -0,0 +1,77 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import re
from guessit import base_text_type
group_delimiters = ['()', '[]', '{}']
# separator character regexp
sep = r'[][,)(}:{+ /~/\._-]' # regexp art, hehe :D
_dash = '-'
_psep = '[\W_]?'
def build_or_pattern(patterns, escape=False):
"""Build a or pattern string from a list of possible patterns
"""
or_pattern = []
for pattern in patterns:
if not or_pattern:
or_pattern.append('(?:')
else:
or_pattern.append('|')
or_pattern.append('(?:%s)' % re.escape(pattern) if escape else pattern)
or_pattern.append(')')
return ''.join(or_pattern)
def compile_pattern(pattern, enhance=True):
"""Compile and enhance a pattern
:param pattern: Pattern to compile (regexp).
:type pattern: string
:param pattern: Enhance pattern before compiling.
:type pattern: string
:return: The compiled pattern
:rtype: regular expression object
"""
return re.compile(enhance_pattern(pattern) if enhance else pattern, re.IGNORECASE)
def enhance_pattern(pattern):
"""Enhance pattern to match more equivalent values.
'-' are replaced by '[\W_]?', which matches more types of separators (or none)
:param pattern: Pattern to enhance (regexp).
:type pattern: string
:return: The enhanced pattern
:rtype: string
"""
return pattern.replace(_dash, _psep)

View file

@ -0,0 +1,32 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
# Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
subtitle_exts = ['srt', 'idx', 'sub', 'ssa', 'ass']
info_exts = ['nfo']
video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
'iso']

View file

@ -0,0 +1,150 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import re
digital_numeral = '\d{1,4}'
roman_numeral = "(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})"
english_word_numeral_list = [
'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
]
french_word_numeral_list = [
'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
]
french_alt_word_numeral_list = [
'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
]
def __build_word_numeral(*args, **kwargs):
re_ = None
for word_list in args:
for word in word_list:
if not re_:
re_ = '(?:(?=\w+)'
else:
re_ += '|'
re_ += word
re_ += ')'
return re_
word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
__romanNumeralMap = (
('M', 1000),
('CM', 900),
('D', 500),
('CD', 400),
('C', 100),
('XC', 90),
('L', 50),
('XL', 40),
('X', 10),
('IX', 9),
('V', 5),
('IV', 4),
('I', 1)
)
__romanNumeralPattern = re.compile('^' + roman_numeral + '$')
def __parse_roman(value):
"""convert Roman numeral to integer"""
if not __romanNumeralPattern.search(value):
raise ValueError('Invalid Roman numeral: %s' % value)
result = 0
index = 0
for num, integer in __romanNumeralMap:
while value[index:index + len(num)] == num:
result += integer
index += len(num)
return result
def __parse_word(value):
"""Convert Word numeral to integer"""
for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
try:
return word_list.index(value.lower())
except ValueError:
pass
raise ValueError
_clean_re = re.compile('[^\d]*(\d+)[^\d]*')
def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
"""Parse a numeric value into integer.
input can be an integer as a string, a roman numeral or a word
:param value: Value to parse. Can be an integer, roman numeral or word.
:type value: string
:return: Numeric value, or None if value can't be parsed
:rtype: int
"""
if int_enabled:
try:
if clean:
match = _clean_re.match(value)
if match:
clean_value = match.group(1)
return int(clean_value)
return int(value)
except ValueError:
pass
if roman_enabled:
try:
if clean:
for word in value.split():
try:
return __parse_roman(word.upper())
except ValueError:
pass
return __parse_roman(value)
except ValueError:
pass
if word_enabled:
try:
if clean:
for word in value.split():
try:
return __parse_word(word)
except ValueError:
pass
return __parse_word(value)
except ValueError:
pass
raise ValueError('Invalid numeral: ' + value)

View file

@ -0,0 +1,21 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals

View file

@ -0,0 +1,219 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.options import reload as reload_options
from stevedore import ExtensionManager
from pkg_resources import EntryPoint
from stevedore.extension import Extension
from logging import getLogger
log = getLogger(__name__)
class Transformer(object): # pragma: no cover
def __init__(self, priority=0):
self.priority = priority
self.log = getLogger(self.name)
@property
def name(self):
return self.__class__.__name__
def supported_properties(self):
return {}
def second_pass_options(self, mtree, options=None):
return None
def should_process(self, mtree, options=None):
return True
def process(self, mtree, options=None):
pass
def post_process(self, mtree, options=None):
pass
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
pass
def rate_quality(self, guess, *props):
return 0
class CustomTransformerExtensionManager(ExtensionManager):
def __init__(self, namespace='guessit.transformer', invoke_on_load=True,
invoke_args=(), invoke_kwds={}, propagate_map_exceptions=True, on_load_failure_callback=None,
verify_requirements=False):
super(CustomTransformerExtensionManager, self).__init__(namespace=namespace,
invoke_on_load=invoke_on_load,
invoke_args=invoke_args,
invoke_kwds=invoke_kwds,
propagate_map_exceptions=propagate_map_exceptions,
on_load_failure_callback=on_load_failure_callback,
verify_requirements=verify_requirements)
def order_extensions(self, extensions):
"""Order the loaded transformers
It should follow those rules
- website before language (eg: tvu.org.ru vs russian)
- language before episodes_rexps
- properties before language (eg: he-aac vs hebrew)
- release_group before properties (eg: XviD-?? vs xvid)
"""
extensions.sort(key=lambda ext: -ext.obj.priority)
return extensions
def _load_one_plugin(self, ep, invoke_on_load, invoke_args, invoke_kwds, verify_requirements=True):
if not ep.dist:
# `require` argument of ep.load() is deprecated in newer versions of setuptools
if hasattr(ep, 'resolve'):
plugin = ep.resolve()
elif hasattr(ep, '_load'):
plugin = ep._load()
else:
plugin = ep.load(require=False)
else:
plugin = ep.load()
if invoke_on_load:
obj = plugin(*invoke_args, **invoke_kwds)
else:
obj = None
return Extension(ep.name, ep, plugin, obj)
def _load_plugins(self, invoke_on_load, invoke_args, invoke_kwds, verify_requirements):
return self.order_extensions(super(CustomTransformerExtensionManager, self)._load_plugins(invoke_on_load, invoke_args, invoke_kwds, verify_requirements))
def objects(self):
return self.map(self._get_obj)
def _get_obj(self, ext):
return ext.obj
def object(self, name):
try:
return self[name].obj
except KeyError:
return None
def register_module(self, name=None, module_name=None, attrs=(), entry_point=None):
if entry_point:
ep = EntryPoint.parse(entry_point)
else:
ep = EntryPoint(name, module_name, attrs)
loaded = self._load_one_plugin(ep, invoke_on_load=True, invoke_args=(), invoke_kwds={})
if loaded:
self.extensions.append(loaded)
self.extensions = self.order_extensions(self.extensions)
self._extensions_by_name = None
class DefaultTransformerExtensionManager(CustomTransformerExtensionManager):
@property
def _internal_entry_points(self):
return ['split_path_components = guessit.transfo.split_path_components:SplitPathComponents',
'guess_filetype = guessit.transfo.guess_filetype:GuessFiletype',
'split_explicit_groups = guessit.transfo.split_explicit_groups:SplitExplicitGroups',
'guess_date = guessit.transfo.guess_date:GuessDate',
'guess_website = guessit.transfo.guess_website:GuessWebsite',
'guess_release_group = guessit.transfo.guess_release_group:GuessReleaseGroup',
'guess_properties = guessit.transfo.guess_properties:GuessProperties',
'guess_language = guessit.transfo.guess_language:GuessLanguage',
'guess_video_rexps = guessit.transfo.guess_video_rexps:GuessVideoRexps',
'guess_episodes_rexps = guessit.transfo.guess_episodes_rexps:GuessEpisodesRexps',
'guess_weak_episodes_rexps = guessit.transfo.guess_weak_episodes_rexps:GuessWeakEpisodesRexps',
'guess_bonus_features = guessit.transfo.guess_bonus_features:GuessBonusFeatures',
'guess_year = guessit.transfo.guess_year:GuessYear',
'guess_country = guessit.transfo.guess_country:GuessCountry',
'guess_idnumber = guessit.transfo.guess_idnumber:GuessIdnumber',
'split_on_dash = guessit.transfo.split_on_dash:SplitOnDash',
'guess_episode_info_from_position = guessit.transfo.guess_episode_info_from_position:GuessEpisodeInfoFromPosition',
'guess_movie_title_from_position = guessit.transfo.guess_movie_title_from_position:GuessMovieTitleFromPosition',
'guess_episode_details = guessit.transfo.guess_episode_details:GuessEpisodeDetails',
'expected_series = guessit.transfo.expected_series:ExpectedSeries',
'expected_title = guessit.transfo.expected_title:ExpectedTitle',]
def _find_entry_points(self, namespace):
entry_points = {}
# Internal entry points
if namespace == self.namespace:
for internal_entry_point_str in self._internal_entry_points:
internal_entry_point = EntryPoint.parse(internal_entry_point_str)
entry_points[internal_entry_point.name] = internal_entry_point
# Package entry points
setuptools_entrypoints = super(DefaultTransformerExtensionManager, self)._find_entry_points(namespace)
for setuptools_entrypoint in setuptools_entrypoints:
entry_points[setuptools_entrypoint.name] = setuptools_entrypoint
return list(entry_points.values())
_extensions = None
def all_transformers():
return _extensions.objects()
def get_transformer(name):
return _extensions.object(name)
def add_transformer(name, module_name, class_name):
"""
Add a transformer
:param name: the name of the transformer. ie: 'guess_regexp_id'
:param name: the module name. ie: 'flexget.utils.parsers.transformers.guess_regexp_id'
:param class_name: the class name. ie: 'GuessRegexpId'
"""
_extensions.register_module(name, module_name, (class_name,))
def add_transformer(entry_point):
"""
Add a transformer
:param entry_point: entry point spec format. ie: 'guess_regexp_id = flexget.utils.parsers.transformers.guess_regexp_id:GuessRegexpId'
"""
_extensions.register_module(entry_point = entry_point)
def reload(custom=False):
"""
Reload extension manager with default or custom one.
:param custom: if True, custom manager will be used, else default one.
Default manager will load default extensions from guessit and setuptools packaging extensions
Custom manager will not load default extensions from guessit, using only setuptools packaging extensions.
:type custom: boolean
"""
global _extensions
if custom:
_extensions = CustomTransformerExtensionManager()
else:
_extensions = DefaultTransformerExtensionManager()
reload_options(all_transformers())
reload()

65
libs/guessit/quality.py Normal file
View file

@ -0,0 +1,65 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.plugins.transformers import all_transformers
def best_quality_properties(props, *guesses):
"""Retrieve the best quality guess, based on given properties
:param props: Properties to include in the rating
:type props: list of strings
:param guesses: Guesses to rate
:type guesses: :class:`guessit.guess.Guess`
:return: Best quality guess from all passed guesses
:rtype: :class:`guessit.guess.Guess`
"""
best_guess = None
best_rate = None
for guess in guesses:
for transformer in all_transformers():
rate = transformer.rate_quality(guess, *props)
if best_rate is None or best_rate < rate:
best_rate = rate
best_guess = guess
return best_guess
def best_quality(*guesses):
"""Retrieve the best quality guess.
:param guesses: Guesses to rate
:type guesses: :class:`guessit.guess.Guess`
:return: Best quality guess from all passed guesses
:rtype: :class:`guessit.guess.Guess`
"""
best_guess = None
best_rate = None
for guess in guesses:
for transformer in all_transformers():
rate = transformer.rate_quality(guess)
if best_rate is None or best_rate < rate:
best_rate = rate
best_guess = guess
return best_guess

View file

@ -1,28 +1,28 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Smewt - A smart collection manager
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# Smewt is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Smewt is distributed in the hope that it will be useful,
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
import sys
import os, os.path
import os
GREEN_FONT = "\x1B[0;32m"
YELLOW_FONT = "\x1B[0;33m"
@ -31,7 +31,7 @@ RED_FONT = "\x1B[0;31m"
RESET_FONT = "\x1B[0m"
def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False):
def setup_logging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False): # pragma: no cover
"""Set up a nice colored logger as the main application logger."""
class SimpleFormatter(logging.Formatter):

BIN
libs/guessit/test/1MB Normal file

Binary file not shown.

View file

@ -0,0 +1,26 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
from guessit.slogging import setup_logging
setup_logging()
logging.disable(logging.INFO)

View file

@ -0,0 +1,40 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test import (test_api, test_autodetect, test_autodetect_all, test_doctests,
test_episode, test_hashes, test_language, test_main,
test_matchtree, test_movie, test_quality, test_utils)
from unittest import TextTestRunner
import logging
def main():
for suite in [test_api.suite, test_autodetect.suite,
test_autodetect_all.suite, test_doctests.suite,
test_episode.suite, test_hashes.suite, test_language.suite,
test_main.suite, test_matchtree.suite, test_movie.suite,
test_quality.suite, test_utils.suite]:
TextTestRunner(verbosity=2).run(suite)
if __name__ == '__main__':
main()

View file

@ -0,0 +1,489 @@
? Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv
: type: movie
title: Fear and Loathing in Las Vegas
year: 1998
screenSize: 720p
format: HD-DVD
audioCodec: DTS
videoCodec: h264
releaseGroup: ESiR
? Leopard.dmg
: type: unknown
extension: dmg
? Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi
: type: episode
series: Duckman
season: 1
episodeNumber: 1
title: I, Duckman
date: 2002-11-07
? Series/Neverwhere/Neverwhere.05.Down.Street.[tvu.org.ru].avi
: type: episode
series: Neverwhere
episodeNumber: 5
title: Down Street
website: tvu.org.ru
? Neverwhere.05.Down.Street.[tvu.org.ru].avi
: type: episode
series: Neverwhere
episodeNumber: 5
title: Down Street
website: tvu.org.ru
? Series/Breaking Bad/Minisodes/Breaking.Bad.(Minisodes).01.Good.Cop.Bad.Cop.WEBRip.XviD.avi
: type: episode
series: Breaking Bad
episodeFormat: Minisode
episodeNumber: 1
title: Good Cop Bad Cop
format: WEBRip
videoCodec: XviD
? Series/Kaamelott/Kaamelott - Livre V - Ep 23 - Le Forfait.avi
: type: episode
series: Kaamelott
episodeNumber: 23
title: Le Forfait
? Movies/The Doors (1991)/09.03.08.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
: type: movie
title: The Doors
year: 1991
date: 2008-03-09
format: BluRay
screenSize: 720p
audioCodec: AC3
videoCodec: h264
releaseGroup: HiS@SiLUHD
language: english
website: sharethefiles.com
? Movies/M.A.S.H. (1970)/MASH.(1970).[Divx.5.02][Dual-Subtitulos][DVDRip].ogm
: type: movie
title: M.A.S.H.
year: 1970
videoCodec: DivX
format: DVD
? the.mentalist.501.hdtv-lol.mp4
: type: episode
series: The Mentalist
season: 5
episodeNumber: 1
format: HDTV
releaseGroup: LOL
? the.simpsons.2401.hdtv-lol.mp4
: type: episode
series: The Simpsons
season: 24
episodeNumber: 1
format: HDTV
releaseGroup: LOL
? Homeland.S02E01.HDTV.x264-EVOLVE.mp4
: type: episode
series: Homeland
season: 2
episodeNumber: 1
format: HDTV
videoCodec: h264
releaseGroup: EVOLVE
? /media/Band_of_Brothers-e01-Currahee.mkv
: type: episode
series: Band of Brothers
episodeNumber: 1
title: Currahee
? /media/Band_of_Brothers-x02-We_Stand_Alone_Together.mkv
: type: episode
series: Band of Brothers
bonusNumber: 2
bonusTitle: We Stand Alone Together
? /movies/James_Bond-f21-Casino_Royale-x02-Stunts.mkv
: type: movie
title: Casino Royale
filmSeries: James Bond
filmNumber: 21
bonusNumber: 2
bonusTitle: Stunts
? /TV Shows/new.girl.117.hdtv-lol.mp4
: type: episode
series: New Girl
season: 1
episodeNumber: 17
format: HDTV
releaseGroup: LOL
? The.Office.(US).1x03.Health.Care.HDTV.XviD-LOL.avi
: type: episode
series: The Office (US)
country: US
season: 1
episodeNumber: 3
title: Health Care
format: HDTV
videoCodec: XviD
releaseGroup: LOL
? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4
: type: movie
title: The Insider
year: 1999
bonusNumber: 2
bonusTitle: 60 Minutes Interview-1996
? OSS_117--Cairo,_Nest_of_Spies.mkv
: type: movie
title: OSS 117--Cairo, Nest of Spies
? Rush.._Beyond_The_Lighted_Stage-x09-Between_Sun_and_Moon-2002_Hartford.mkv
: type: movie
title: Rush Beyond The Lighted Stage
bonusNumber: 9
bonusTitle: Between Sun and Moon-2002 Hartford
? House.Hunters.International.S56E06.720p.hdtv.x264.mp4
: type: episode
series: House Hunters International
season: 56
episodeNumber: 6
screenSize: 720p
format: HDTV
videoCodec: h264
? White.House.Down.2013.1080p.BluRay.DTS-HD.MA.5.1.x264-PublicHD.mkv
: type: movie
title: White House Down
year: 2013
screenSize: 1080p
format: BluRay
audioCodec: DTS
audioProfile: HDMA
videoCodec: h264
releaseGroup: PublicHD
audioChannels: "5.1"
? White.House.Down.2013.1080p.BluRay.DTSHD.MA.5.1.x264-PublicHD.mkv
: type: movie
title: White House Down
year: 2013
screenSize: 1080p
format: BluRay
audioCodec: DTS
audioProfile: HDMA
videoCodec: h264
releaseGroup: PublicHD
audioChannels: "5.1"
? Hostages.S01E01.Pilot.for.Air.720p.WEB-DL.DD5.1.H.264-NTb.nfo
: type: episodeinfo
series: Hostages
title: Pilot for Air
season: 1
episodeNumber: 1
screenSize: 720p
format: WEB-DL
audioChannels: "5.1"
videoCodec: h264
audioCodec: DolbyDigital
releaseGroup: NTb
? Despicable.Me.2.2013.1080p.BluRay.x264-VeDeTT.nfo
: type: movieinfo
title: Despicable Me 2
year: 2013
screenSize: 1080p
format: BluRay
videoCodec: h264
releaseGroup: VeDeTT
? Le Cinquieme Commando 1971 SUBFORCED FRENCH DVDRiP XViD AC3 Bandix.mkv
: type: movie
audioCodec: AC3
format: DVD
releaseGroup: Bandix
subtitleLanguage: French
title: Le Cinquieme Commando
videoCodec: XviD
year: 1971
? Le Seigneur des Anneaux - La Communauté de l'Anneau - Version Longue - BDRip.mkv
: type: movie
format: BluRay
title: Le Seigneur des Anneaux
? La petite bande (Michel Deville - 1983) VF PAL MP4 x264 AAC.mkv
: type: movie
audioCodec: AAC
language: French
title: La petite bande
videoCodec: h264
year: 1983
? Retour de Flammes (Gregor Schnitzler 2003) FULL DVD.iso
: type: movie
format: DVD
title: Retour de Flammes
type: movie
year: 2003
? A.Common.Title.Special.2014.avi
: type: movie
year: 2014
title: A Common Title Special
? A.Common.Title.2014.Special.avi
: type: episode
year: 2014
series: A Common Title
title: Special
episodeDetails: Special
? A.Common.Title.2014.Special.Edition.avi
: type: movie
year: 2014
title: A Common Title
edition: Special Edition
? Downton.Abbey.2013.Christmas.Special.HDTV.x264-FoV.mp4
: type: episode
year: 2013
series: Downton Abbey
title: Christmas Special
videoCodec: h264
releaseGroup: FoV
format: HDTV
episodeDetails: Special
? Doctor_Who_2013_Christmas_Special.The_Time_of_The_Doctor.HD
: options: -n
type: episode
series: Doctor Who
other: HD
episodeDetails: Special
title: Christmas Special The Time of The Doctor
year: 2013
? Doctor Who 2005 50th Anniversary Special The Day of the Doctor 3.avi
: type: episode
series: Doctor Who
episodeDetails: Special
title: 50th Anniversary Special The Day of the Doctor 3
year: 2005
? Robot Chicken S06-Born Again Virgin Christmas Special HDTV x264.avi
: type: episode
series: Robot Chicken
format: HDTV
season: 6
title: Born Again Virgin Christmas Special
videoCodec: h264
episodeDetails: Special
? Wicked.Tuna.S03E00.Head.To.Tail.Special.HDTV.x264-YesTV
: options: -n
type: episode
series: Wicked Tuna
title: Head To Tail Special
releaseGroup: YesTV
season: 3
episodeNumber: 0
videoCodec: h264
format: HDTV
episodeDetails: Special
? The.Voice.UK.S03E12.HDTV.x264-C4TV
: options: -n
episodeNumber: 12
videoCodec: h264
format: HDTV
series: The Voice (UK)
releaseGroup: C4TV
season: 3
country: United Kingdom
type: episode
? /tmp/star.trek.9/star.trek.9.mkv
: type: movie
title: star trek 9
? star.trek.9.mkv
: type: movie
title: star trek 9
? FlexGet.S01E02.TheName.HDTV.xvid
: options: -n
episodeNumber: 2
format: HDTV
season: 1
series: FlexGet
title: TheName
type: episode
videoCodec: XviD
? FlexGet.S01E02.TheName.HDTV.xvid
: options: -n
episodeNumber: 2
format: HDTV
season: 1
series: FlexGet
title: TheName
type: episode
videoCodec: XviD
? some.series.S03E14.Title.Here.720p
: options: -n
episodeNumber: 14
screenSize: 720p
season: 3
series: some series
title: Title Here
type: episode
? '[the.group] Some.Series.S03E15.Title.Two.720p'
: options: -n
episodeNumber: 15
releaseGroup: the.group
screenSize: 720p
season: 3
series: Some Series
title: Title Two
type: episode
? 'HD 720p: Some series.S03E16.Title.Three'
: options: -n
episodeNumber: 16
other: HD
screenSize: 720p
season: 3
series: Some series
title: Title Three
type: episode
? Something.Season.2.1of4.Ep.Title.HDTV.torrent
: episodeCount: 4
episodeNumber: 1
format: HDTV
season: 2
series: Something
title: Title
type: episode
? Show-A (US) - Episode Title S02E09 hdtv
: options: -n
country: US
episodeNumber: 9
format: HDTV
season: 2
series: Show-A (US)
type: episode
? Jack's.Show.S03E01.blah.1080p
: options: -n
episodeNumber: 1
screenSize: 1080p
season: 3
series: Jack's Show
title: blah
type: episode
? FlexGet.epic
: options: -n
title: FlexGet epic
type: movie
? FlexGet.Apt.1
: options: -n
title: FlexGet Apt 1
type: movie
? FlexGet.aptitude
: options: -n
title: FlexGet aptitude
type: movie
? FlexGet.Step1
: options: -n
title: FlexGet Step1
type: movie
? Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720 * 432].avi
: format: DVD
screenSize: 720x432
title: El Bosque Animado
videoCodec: XviD
year: 1987
type: movie
? Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi
: format: DVD
screenSize: 720x432
title: El Bosque Animado
videoCodec: XviD
year: 1987
type: movie
? 2009.shoot.fruit.chan.multi.dvd9.pal
: options: -n
format: DVD
language: mul
other: PAL
title: shoot fruit chan
type: movie
year: 2009
? 2009.shoot.fruit.chan.multi.dvd5.pal
: options: -n
format: DVD
language: mul
other: PAL
title: shoot fruit chan
type: movie
year: 2009
? The.Flash.2014.S01E01.PREAIR.WEBRip.XviD-EVO.avi
: episodeNumber: 1
format: WEBRip
other: Preair
releaseGroup: EVO
season: 1
series: The Flash
type: episode
videoCodec: XviD
year: 2014
? Ice.Lake.Rebels.S01E06.Ice.Lake.Games.720p.HDTV.x264-DHD
: options: -n
episodeNumber: 6
format: HDTV
releaseGroup: DHD
screenSize: 720p
season: 1
series: Ice Lake Rebels
title: Ice Lake Games
type: episode
videoCodec: h264
? The League - S06E10 - Epi Sexy.mkv
: episodeNumber: 10
season: 6
series: The League
title: Epi Sexy
type: episode
? Stay (2005) [1080p]/Stay.2005.1080p.BluRay.x264.YIFY.mp4
: format: BluRay
releaseGroup: YIFY
screenSize: 1080p
title: Stay
type: movie
videoCodec: h264
year: 2005

View file

@ -0,0 +1 @@
Just a dummy srt file (used for unittests: do not remove!)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,187 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit import base_text_type, u
from collections import defaultdict
from unittest import TestCase, TestLoader, TextTestRunner
import shlex
import babelfish
import yaml, logging, sys, os
from os.path import *
def currentPath():
'''Returns the path in which the calling file is located.'''
return dirname(join(os.getcwd(), sys._getframe(1).f_globals['__file__']))
def addImportPath(path):
'''Function that adds the specified path to the import path. The path can be
absolute or relative to the calling file.'''
importPath = abspath(join(currentPath(), path))
sys.path = [importPath] + sys.path
log = logging.getLogger(__name__)
from guessit.plugins import transformers
from guessit.options import get_opts
import guessit
from guessit import *
from guessit.matcher import *
from guessit.fileutils import *
def allTests(testClass):
return TestLoader().loadTestsFromTestCase(testClass)
class TestGuessit(TestCase):
def checkMinimumFieldsCorrect(self, filename, filetype=None, remove_type=True,
exclude_files=None):
groundTruth = yaml.load(load_file_in_same_dir(__file__, filename))
def guess_func(string, options=None):
return guess_file_info(string, options=options, type=filetype)
return self.checkFields(groundTruth, guess_func, remove_type, exclude_files)
def checkFields(self, groundTruth, guess_func, remove_type=True,
exclude_files=None):
total = 0
exclude_files = exclude_files or []
fails = defaultdict(list)
additionals = defaultdict(list)
for filename, required_fields in groundTruth.items():
filename = u(filename)
if filename in exclude_files:
continue
log.debug('\n' + '-' * 120)
log.info('Guessing information for file: %s' % filename)
options = required_fields.pop('options') if 'options' in required_fields else None
if options:
args = shlex.split(options)
options = get_opts().parse_args(args)
options = vars(options)
try:
found = guess_func(filename, options)
except Exception as e:
fails[filename].append("An exception has occured in %s: %s" % (filename, e))
log.exception("An exception has occured in %s: %s" % (filename, e))
continue
total = total + 1
# no need for these in the unittests
if remove_type:
try:
del found['type']
except:
pass
for prop in ('container', 'mimetype', 'unidentified'):
if prop in found:
del found[prop]
# props which are list of just 1 elem should be opened for easier writing of the tests
for prop in ('language', 'subtitleLanguage', 'other', 'episodeDetails', 'unidentified'):
value = found.get(prop, None)
if isinstance(value, list) and len(value) == 1:
found[prop] = value[0]
# look for missing properties
for prop, value in required_fields.items():
if prop not in found:
log.debug("Prop '%s' not found in: %s" % (prop, filename))
fails[filename].append("'%s' not found in: %s" % (prop, filename))
continue
# if both properties are strings, do a case-insensitive comparison
if (isinstance(value, base_text_type) and
isinstance(found[prop], base_text_type)):
if value.lower() != found[prop].lower():
log.debug("Wrong prop value [str] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
elif isinstance(value, list) and isinstance(found[prop], list):
if found[prop] and isinstance(found[prop][0], babelfish.Language):
# list of languages
s1 = set(Language.fromguessit(s) for s in value)
s2 = set(found[prop])
else:
# by default we assume list of strings and do a case-insensitive
# comparison on their elements
s1 = set(u(s).lower() for s in value)
s2 = set(u(s).lower() for s in found[prop])
if s1 != s2:
log.debug("Wrong prop value [list] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
elif isinstance(found[prop], babelfish.Language):
try:
if babelfish.Language.fromguessit(value) != found[prop]:
raise ValueError
except:
log.debug("Wrong prop value [Language] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
elif isinstance(found[prop], babelfish.Country):
try:
if babelfish.Country.fromguessit(value) != found[prop]:
raise ValueError
except:
log.debug("Wrong prop value [Country] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
# otherwise, just compare their values directly
else:
if found[prop] != value:
log.debug("Wrong prop value for '%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
fails[filename].append("'%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
# look for additional properties
for prop, value in found.items():
if prop not in required_fields:
log.debug("Found additional info for prop = '%s': '%s'" % (prop, u(value)))
additionals[filename].append("'%s': '%s'" % (prop, u(value)))
correct = total - len(fails)
log.info('SUMMARY: Guessed correctly %d out of %d filenames' % (correct, total))
for failed_entry, failed_properties in fails.items():
log.error('---- ' + failed_entry + ' ----')
for failed_property in failed_properties:
log.error("FAILED: " + failed_property)
for additional_entry, additional_properties in additionals.items():
log.warning('---- ' + additional_entry + ' ----')
for additional_property in additional_properties:
log.warning("ADDITIONAL: " + additional_property)
self.assertTrue(correct == total,
msg='Correct: %d < Total: %d' % (correct, total))

View file

@ -0,0 +1,754 @@
? Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv
: title: Fear and Loathing in Las Vegas
year: 1998
screenSize: 720p
format: HD-DVD
audioCodec: DTS
videoCodec: h264
releaseGroup: ESiR
? Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi
: title: El Dia de la Bestia
year: 1995
format: DVD
language: spanish
videoCodec: DivX
releaseGroup: Artik[SEDG]
? Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
: title: Dark City
year: 1998
format: BluRay
screenSize: 720p
audioCodec: DTS
videoCodec: h264
releaseGroup: CHD
? Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv
: title: Sin City
year: 2005
format: BluRay
screenSize: 720p
videoCodec: h264
audioCodec: AC3
releaseGroup: SEPTiC
? Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi
: title: Borat
year: 2006
other: PROPER
format: DVD
other: [ R5, Proper ]
videoCodec: XviD
releaseGroup: PUKKA
? "[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv"
: title: Le Prestige
format: DVD
videoCodec: h264
videoProfile: HP
audioCodec: AAC
audioProfile: HE
language: [ french, english ]
subtitleLanguage: [ french, english ]
releaseGroup: XCT
? Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi
: title: Battle Royale
year: 2000
edition: special edition
cdNumber: 1
cdNumberTotal: 2
format: DVD
videoCodec: XviD
releaseGroup: ZeaL
? Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.avi
: title: Brazil
edition: Criterion Edition
year: 1985
cdNumber: 2
? Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv
: title: Persepolis
year: 2007
videoCodec: h264
audioCodec: AAC
language: [ French, English ]
subtitleLanguage: [ French, English ]
releaseGroup: XCT
? Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv
: title: Toy Story
year: 1995
format: HDTV
screenSize: 720p
language: [ english, spanish ]
? Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi
: title: Office Space
year: 1999
format: DVD
language: [ english, spanish ]
videoCodec: XviD
audioCodec: AC3
? Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.avi
: title: Wild Zero
year: 2000
videoCodec: DivX
releaseGroup: EPiC
? movies/Baraka_Edition_Collector.avi
: title: Baraka
edition: collector edition
? Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director's.Cut).CD1.DVDRip.XviD.AC3-WAF.avi
: title: Blade Runner
year: 1982
edition: Director's Cut
cdNumber: 1
format: DVD
videoCodec: XviD
audioCodec: AC3
releaseGroup: WAF
? movies/American.The.Bill.Hicks.Story.2009.DVDRip.XviD-EPiSODE.[UsaBit.com]/UsaBit.com_esd-americanbh.avi
: title: American The Bill Hicks Story
year: 2009
format: DVD
videoCodec: XviD
releaseGroup: EPiSODE
website: UsaBit.com
? movies/Charlie.And.Boots.DVDRip.XviD-TheWretched/wthd-cab.avi
: title: Charlie And Boots
format: DVD
videoCodec: XviD
releaseGroup: TheWretched
? movies/Steig Larsson Millenium Trilogy (2009) BRrip 720 AAC x264/(1)The Girl With The Dragon Tattoo (2009) BRrip 720 AAC x264.mkv
: title: The Girl With The Dragon Tattoo
filmSeries: Steig Larsson Millenium Trilogy
filmNumber: 1
year: 2009
format: BluRay
audioCodec: AAC
videoCodec: h264
screenSize: 720p
? movies/Greenberg.REPACK.LiMiTED.DVDRip.XviD-ARROW/arw-repack-greenberg.dvdrip.xvid.avi
: title: Greenberg
format: DVD
videoCodec: XviD
releaseGroup: ARROW
other: ['Proper', 'Limited']
? Movies/Fr - Paris 2054, Renaissance (2005) - De Christian Volckman - (Film Divx Science Fiction Fantastique Thriller Policier N&B).avi
: title: Paris 2054, Renaissance
year: 2005
language: french
videoCodec: DivX
? Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
: title: Avida
year: 2006
language: french
format: DVD
videoCodec: XviD
releaseGroup: PROD
? Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
: title: Alice in Wonderland
format: DVD
videoCodec: XviD
releaseGroup: DiAMOND
? Movies/Ne.Le.Dis.A.Personne.Fr 2 cd/personnea_mp.avi
: title: Ne Le Dis A Personne
language: french
cdNumberTotal: 2
? Movies/Bunker Palace Hôtel (Enki Bilal) (1989)/Enki Bilal - Bunker Palace Hotel (Fr Vhs Rip).avi
: title: Bunker Palace Hôtel
year: 1989
language: french
format: VHS
? Movies/21 (2008)/21.(2008).DVDRip.x264.AC3-FtS.[sharethefiles.com].mkv
: title: "21"
year: 2008
format: DVD
videoCodec: h264
audioCodec: AC3
releaseGroup: FtS
website: sharethefiles.com
? Movies/9 (2009)/9.2009.Blu-ray.DTS.720p.x264.HDBRiSe.[sharethefiles.com].mkv
: title: "9"
year: 2009
format: BluRay
audioCodec: DTS
screenSize: 720p
videoCodec: h264
releaseGroup: HDBRiSe
website: sharethefiles.com
? Movies/Mamma.Mia.2008.DVDRip.AC3.XviD-CrazyTeam/Mamma.Mia.2008.DVDRip.AC3.XviD-CrazyTeam.avi
: title: Mamma Mia
year: 2008
format: DVD
audioCodec: AC3
videoCodec: XviD
releaseGroup: CrazyTeam
? Movies/M.A.S.H. (1970)/MASH.(1970).[Divx.5.02][Dual-Subtitulos][DVDRip].ogm
: title: M.A.S.H.
year: 1970
videoCodec: DivX
format: DVD
? Movies/The Doors (1991)/09.03.08.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
: title: The Doors
year: 1991
date: 2008-03-09
format: BluRay
screenSize: 720p
audioCodec: AC3
videoCodec: h264
releaseGroup: HiS@SiLUHD
language: english
website: sharethefiles.com
? Movies/The Doors (1991)/08.03.09.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
: options: --date-year-first
title: The Doors
year: 1991
date: 2008-03-09
format: BluRay
screenSize: 720p
audioCodec: AC3
videoCodec: h264
releaseGroup: HiS@SiLUHD
language: english
website: sharethefiles.com
? Movies/Ratatouille/video_ts-ratatouille.srt
: title: Ratatouille
format: DVD
? Movies/001 __ A classer/Fantomas se déchaine - Louis de Funès.avi
: title: Fantomas se déchaine
? Movies/Comme une Image (2004)/Comme.Une.Image.FRENCH.DVDRiP.XViD-NTK.par-www.divx-overnet.com.avi
: title: Comme une Image
year: 2004
language: french
format: DVD
videoCodec: XviD
releaseGroup: NTK
website: www.divx-overnet.com
? Movies/Fantastic Mr Fox/Fantastic.Mr.Fox.2009.DVDRip.{x264+LC-AAC.5.1}{Fr-Eng}{Sub.Fr-Eng}-™.[sharethefiles.com].mkv
: title: Fantastic Mr Fox
year: 2009
format: DVD
videoCodec: h264
audioCodec: AAC
audioProfile: LC
audioChannels: "5.1"
language: [ french, english ]
subtitleLanguage: [ french, english ]
website: sharethefiles.com
? Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi
: title: Somewhere
year: 2010
format: DVD
videoCodec: XviD
releaseGroup: iLG
? Movies/Moon_(2009).mkv
: title: Moon
year: 2009
? Movies/Moon_(2009)-x01.mkv
: title: Moon
year: 2009
bonusNumber: 1
? Movies/Moon_(2009)-x02-Making_Of.mkv
: title: Moon
year: 2009
bonusNumber: 2
bonusTitle: Making Of
? movies/James_Bond-f17-Goldeneye.mkv
: title: Goldeneye
filmSeries: James Bond
filmNumber: 17
? /movies/James_Bond-f21-Casino_Royale.mkv
: title: Casino Royale
filmSeries: James Bond
filmNumber: 21
? /movies/James_Bond-f21-Casino_Royale-x01-Becoming_Bond.mkv
: title: Casino Royale
filmSeries: James Bond
filmNumber: 21
bonusNumber: 1
bonusTitle: Becoming Bond
? /movies/James_Bond-f21-Casino_Royale-x02-Stunts.mkv
: title: Casino Royale
filmSeries: James Bond
filmNumber: 21
bonusNumber: 2
bonusTitle: Stunts
? OSS_117--Cairo,_Nest_of_Spies.mkv
: title: OSS 117--Cairo, Nest of Spies
? The Godfather Part III.mkv
: title: The Godfather
part: 3
? Foobar Part VI.mkv
: title: Foobar
part: 6
? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4
: title: The Insider
year: 1999
bonusNumber: 2
bonusTitle: 60 Minutes Interview-1996
? Rush.._Beyond_The_Lighted_Stage-x09-Between_Sun_and_Moon-2002_Hartford.mkv
: title: Rush Beyond The Lighted Stage
bonusNumber: 9
bonusTitle: Between Sun and Moon-2002 Hartford
? /public/uTorrent/Downloads Finished/Movies/Indiana.Jones.and.the.Temple.of.Doom.1984.HDTV.720p.x264.AC3.5.1-REDµX/Indiana.Jones.and.the.Temple.of.Doom.1984.HDTV.720p.x264.AC3.5.1-REDµX.mkv
: title: Indiana Jones and the Temple of Doom
year: 1984
format: HDTV
screenSize: 720p
videoCodec: h264
audioCodec: AC3
audioChannels: "5.1"
releaseGroup: REDµX
? The.Directors.Notebook.2006.Blu-Ray.x264.DXVA.720p.AC3-de[42].mkv
: title: The Directors Notebook
year: 2006
format: BluRay
videoCodec: h264
videoApi: DXVA
screenSize: 720p
audioCodec: AC3
releaseGroup: de[42]
? Movies/Cosmopolis.2012.LiMiTED.720p.BluRay.x264-AN0NYM0US[bb]/ano-cosmo.720p.mkv
: title: Cosmopolis
year: 2012
screenSize: 720p
videoCodec: h264
releaseGroup: AN0NYM0US[bb]
format: BluRay
other: LIMITED
? movies/La Science des Rêves (2006)/La.Science.Des.Reves.FRENCH.DVDRip.XviD-MP-AceBot.avi
: title: La Science des Rêves
year: 2006
format: DVD
videoCodec: XviD
videoProfile: MP
releaseGroup: AceBot
language: French
? The_Italian_Job.mkv
: title: The Italian Job
? The.Rum.Diary.2011.1080p.BluRay.DTS.x264.D-Z0N3.mkv
: title: The Rum Diary
year: 2011
screenSize: 1080p
format: BluRay
videoCodec: h264
audioCodec: DTS
releaseGroup: D-Z0N3
? Life.Of.Pi.2012.1080p.BluRay.DTS.x264.D-Z0N3.mkv
: title: Life Of Pi
year: 2012
screenSize: 1080p
format: BluRay
videoCodec: h264
audioCodec: DTS
releaseGroup: D-Z0N3
? The.Kings.Speech.2010.1080p.BluRay.DTS.x264.D Z0N3.mkv
: title: The Kings Speech
year: 2010
screenSize: 1080p
format: BluRay
audioCodec: DTS
videoCodec: h264
releaseGroup: D Z0N3
? Street.Kings.2008.BluRay.1080p.DTS.x264.dxva EuReKA.mkv
: title: Street Kings
year: 2008
format: BluRay
screenSize: 1080p
audioCodec: DTS
videoCodec: h264
videoApi: DXVA
releaseGroup: EuReKa
? 2001.A.Space.Odyssey.1968.HDDVD.1080p.DTS.x264.dxva EuReKA.mkv
: title: 2001 A Space Odyssey
year: 1968
format: HD-DVD
screenSize: 1080p
audioCodec: DTS
videoCodec: h264
videoApi: DXVA
releaseGroup: EuReKa
? 2012.2009.720p.BluRay.x264.DTS WiKi.mkv
: title: "2012"
year: 2009
screenSize: 720p
format: BluRay
videoCodec: h264
audioCodec: DTS
releaseGroup: WiKi
? /share/Download/movie/Dead Man Down (2013) BRRiP XViD DD5_1 Custom NLSubs =-_lt Q_o_Q gt-=_/XD607ebb-BRc59935-5155473f-1c5f49/XD607ebb-BRc59935-5155473f-1c5f49.avi
: title: Dead Man Down
year: 2013
format: BluRay
videoCodec: XviD
audioChannels: "5.1"
audioCodec: DolbyDigital
idNumber: XD607ebb-BRc59935-5155473f-1c5f49
? Pacific.Rim.3D.2013.COMPLETE.BLURAY-PCH.avi
: title: Pacific Rim
year: 2013
format: BluRay
other:
- complete
- 3D
releaseGroup: PCH
? Immersion.French.2011.STV.READNFO.QC.FRENCH.ENGLISH.NTSC.DVDR.nfo
: title: Immersion French
year: 2011
language:
- French
- English
format: DVD
? Immersion.French.2011.STV.READNFO.QC.FRENCH.NTSC.DVDR.nfo
: title: Immersion French
year: 2011
language: French
format: DVD
? Immersion.French.2011.STV.READNFO.QC.NTSC.DVDR.nfo
: title: Immersion French
year: 2011
format: DVD
? French.Immersion.2011.STV.READNFO.QC.ENGLISH.NTSC.DVDR.nfo
: title: French Immersion
year: 2011
language: ENGLISH
format: DVD
? Howl's_Moving_Castle_(2004)_[720p,HDTV,x264,DTS]-FlexGet.avi
: videoCodec: h264
format: HDTV
title: Howl's Moving Castle
screenSize: 720p
year: 2004
audioCodec: DTS
releaseGroup: FlexGet
? Pirates de langkasuka.2008.FRENCH.1920X1080.h264.AVC.AsiaRa.mkv
: screenSize: 1080p
year: 2008
language: French
videoCodec: h264
title: Pirates de langkasuka
releaseGroup: AsiaRa
? Masala (2013) Telugu Movie HD DVDScr XviD - Exclusive.avi
: year: 2013
videoCodec: XviD
title: Masala
format: HD-DVD
other: screener
language: Telugu
releaseGroup: Exclusive
? Django Unchained 2012 DVDSCR X264 AAC-P2P.nfo
: year: 2012
other: screener
videoCodec: h264
title: Django Unchained
audioCodec: AAC
format: DVD
releaseGroup: P2P
? Ejecutiva.En.Apuros(2009).BLURAY.SCR.Xvid.Spanish.LanzamientosD.nfo
: year: 2009
other: screener
format: BluRay
videoCodec: XviD
language: Spanish
title: Ejecutiva En Apuros
? Die.Schluempfe.2.German.DL.1080p.BluRay.x264-EXQUiSiTE.mkv
: title: Die Schluempfe 2
format: BluRay
language:
- Multiple languages
- German
videoCodec: h264
releaseGroup: EXQUiSiTE
screenSize: 1080p
? Rocky 1976 French SubForced BRRip x264 AC3-FUNKY.mkv
: title: Rocky
year: 1976
subtitleLanguage: French
format: BluRay
videoCodec: h264
audioCodec: AC3
releaseGroup: FUNKY
? REDLINE (BD 1080p H264 10bit FLAC) [3xR].mkv
: title: REDLINE
format: BluRay
videoCodec: h264
videoProfile: 10bit
audioCodec: Flac
screenSize: 1080p
? The.Lizzie.McGuire.Movie.(2003).HR.DVDRiP.avi
: title: The Lizzie McGuire Movie
year: 2003
format: DVD
other: HR
? Hua.Mulan.BRRIP.MP4.x264.720p-HR.avi
: title: Hua Mulan
videoCodec: h264
format: BluRay
screenSize: 720p
other: HR
? Dr.Seuss.The.Lorax.2012.DVDRip.LiNE.XviD.AC3.HQ.Hive-CM8.mp4
: videoCodec: XviD
title: Dr Seuss The Lorax
format: DVD
other: LiNE
year: 2012
audioCodec: AC3
audioProfile: HQ
releaseGroup: Hive-CM8
? "Star Wars: Episode IV - A New Hope (2004) Special Edition.MKV"
: title: Star Wars Episode IV
year: 2004
edition: Special Edition
? Dr.LiNE.The.Lorax.2012.DVDRip.LiNE.XviD.AC3.HQ.Hive-CM8.mp4
: videoCodec: XviD
title: Dr LiNE The Lorax
format: DVD
other: LiNE
year: 2012
audioCodec: AC3
audioProfile: HQ
releaseGroup: Hive-CM8
? Perfect Child-2007-TRUEFRENCH-TVRip.Xvid-h@mster.avi
: releaseGroup: h@mster
title: Perfect Child
videoCodec: XviD
language: French
format: TV
year: 2007
? entre.ciel.et.terre.(1994).dvdrip.h264.aac-psypeon.avi
: audioCodec: AAC
format: DVD
releaseGroup: psypeon
title: entre ciel et terre
videoCodec: h264
year: 1994
? Yves.Saint.Laurent.2013.FRENCH.DVDSCR.MD.XviD-ViVARiUM.avi
: format: DVD
language: French
other: Screener
releaseGroup: ViVARiUM
title: Yves Saint Laurent
videoCodec: XviD
year: 2013
? Echec et Mort - Hard to Kill - Steven Seagal Multi 1080p BluRay x264 CCATS.avi
: format: BluRay
language: Multiple languages
releaseGroup: CCATS
screenSize: 1080p
title: Echec et Mort
videoCodec: h264
? Paparazzi - Timsit/Lindon (MKV 1080p tvripHD)
: options: -n
title: Paparazzi
screenSize: 1080p
format: HDTV
? some.movie.720p.bluray.x264-mind
: options: -n
title: some movie
screenSize: 720p
videoCodec: h264
releaseGroup: mind
format: BluRay
? Dr LiNE The Lorax 720p h264 BluRay
: options: -n
title: Dr LiNE The Lorax
screenSize: 720p
videoCodec: h264
format: BluRay
? BeatdownFrenchDVDRip.mkv
: options: -c
title: Beatdown
language: French
format: DVD
? YvesSaintLaurent2013FrenchDVDScrXvid.avi
: options: -c
format: DVD
language: French
other: Screener
title: Yves saint laurent
videoCodec: XviD
year: 2013
? Elle.s.en.va.720p.mkv
: screenSize: 720p
title: Elle s en va
? FooBar.7.PDTV-FlexGet
: options: -n
format: DVB
releaseGroup: FlexGet
title: FooBar 7
? h265 - HEVC Riddick Unrated Director Cut French 1080p DTS.mkv
: audioCodec: DTS
edition: Director's cut
language: fr
screenSize: 1080p
title: Riddick Unrated
videoCodec: h265
? "[h265 - HEVC] Riddick Unrated Director Cut French [1080p DTS].mkv"
: audioCodec: DTS
edition: Director's cut
language: fr
screenSize: 1080p
title: Riddick Unrated
videoCodec: h265
? Barbecue-2014-French-mHD-1080p
: options: -n
language: fr
other: mHD
screenSize: 1080p
title: Barbecue
year: 2014
? Underworld Quadrilogie VO+VFF+VFQ 1080p HDlight.x264~Tonyk~Monde Infernal
: options: -n
language:
- fr
- vo
other: HDLight
screenSize: 1080p
title: Underworld Quadrilogie
videoCodec: h264
? A Bout Portant (The Killers).PAL.Multi.DVD-R-KZ
: options: -n
format: DVD
language: mul
releaseGroup: KZ
title: A Bout Portant
? "Mise à Sac (Alain Cavalier, 1967) [Vhs.Rip.Vff]"
: options: -n
format: VHS
language: fr
title: "Mise à Sac"
year: 1967
? A Bout Portant (The Killers).PAL.Multi.DVD-R-KZ
: options: -n
format: DVD
language: mul
releaseGroup: KZ
title: A Bout Portant
? Youth.In.Revolt.(Be.Bad).2009.MULTI.1080p.LAME3*92-MEDIOZZ
: options: -n
audioCodec: MP3
language: mul
releaseGroup: MEDIOZZ
screenSize: 1080p
title: Youth In Revolt
year: 2009
? La Defense Lincoln (The Lincoln Lawyer) 2011 [DVDRIP][Vostfr]
: options: -n
format: DVD
subtitleLanguage: fr
title: La Defense Lincoln
year: 2011
? '[h265 - HEVC] Fight Club French 1080p DTS.'
: options: -n
audioCodec: DTS
language: fr
screenSize: 1080p
title: Fight Club
videoCodec: h265
? Love Gourou (Mike Myers) - FR
: options: -n
language: fr
title: Love Gourou
? '[h265 - hevc] transformers 2 1080p french ac3 6ch.'
: options: -n
audioChannels: '5.1'
audioCodec: AC3
language: fr
screenSize: 1080p
title: transformers 2
videoCodec: h265

View file

@ -0,0 +1,473 @@
IdSubLanguage ISO639 LanguageName UploadEnabled WebEnabled
aar aa Afar, afar 0 0
abk ab Abkhazian 0 0
ace Achinese 0 0
ach Acoli 0 0
ada Adangme 0 0
ady adyghé 0 0
afa Afro-Asiatic (Other) 0 0
afh Afrihili 0 0
afr af Afrikaans 0 0
ain Ainu 0 0
aka ak Akan 0 0
akk Akkadian 0 0
alb sq Albanian 1 1
ale Aleut 0 0
alg Algonquian languages 0 0
alt Southern Altai 0 0
amh am Amharic 0 0
ang English, Old (ca.450-1100) 0 0
apa Apache languages 0 0
ara ar Arabic 1 1
arc Aramaic 0 0
arg an Aragonese 0 0
arm hy Armenian 1 0
arn Araucanian 0 0
arp Arapaho 0 0
art Artificial (Other) 0 0
arw Arawak 0 0
asm as Assamese 0 0
ast Asturian, Bable 0 0
ath Athapascan languages 0 0
aus Australian languages 0 0
ava av Avaric 0 0
ave ae Avestan 0 0
awa Awadhi 0 0
aym ay Aymara 0 0
aze az Azerbaijani 0 0
bad Banda 0 0
bai Bamileke languages 0 0
bak ba Bashkir 0 0
bal Baluchi 0 0
bam bm Bambara 0 0
ban Balinese 0 0
baq eu Basque 1 1
bas Basa 0 0
bat Baltic (Other) 0 0
bej Beja 0 0
bel be Belarusian 0 0
bem Bemba 0 0
ben bn Bengali 1 0
ber Berber (Other) 0 0
bho Bhojpuri 0 0
bih bh Bihari 0 0
bik Bikol 0 0
bin Bini 0 0
bis bi Bislama 0 0
bla Siksika 0 0
bnt Bantu (Other) 0 0
bos bs Bosnian 1 0
bra Braj 0 0
bre br Breton 1 0
btk Batak (Indonesia) 0 0
bua Buriat 0 0
bug Buginese 0 0
bul bg Bulgarian 1 1
bur my Burmese 0 0
byn Blin 0 0
cad Caddo 0 0
cai Central American Indian (Other) 0 0
car Carib 0 0
cat ca Catalan 1 1
cau Caucasian (Other) 0 0
ceb Cebuano 0 0
cel Celtic (Other) 0 0
cha ch Chamorro 0 0
chb Chibcha 0 0
che ce Chechen 0 0
chg Chagatai 0 0
chi zh Chinese 1 1
chk Chuukese 0 0
chm Mari 0 0
chn Chinook jargon 0 0
cho Choctaw 0 0
chp Chipewyan 0 0
chr Cherokee 0 0
chu cu Church Slavic 0 0
chv cv Chuvash 0 0
chy Cheyenne 0 0
cmc Chamic languages 0 0
cop Coptic 0 0
cor kw Cornish 0 0
cos co Corsican 0 0
cpe Creoles and pidgins, English based (Other) 0 0
cpf Creoles and pidgins, French-based (Other) 0 0
cpp Creoles and pidgins, Portuguese-based (Other) 0 0
cre cr Cree 0 0
crh Crimean Tatar 0 0
crp Creoles and pidgins (Other) 0 0
csb Kashubian 0 0
cus Cushitic (Other)' couchitiques, autres langues 0 0
cze cs Czech 1 1
dak Dakota 0 0
dan da Danish 1 1
dar Dargwa 0 0
day Dayak 0 0
del Delaware 0 0
den Slave (Athapascan) 0 0
dgr Dogrib 0 0
din Dinka 0 0
div dv Divehi 0 0
doi Dogri 0 0
dra Dravidian (Other) 0 0
dua Duala 0 0
dum Dutch, Middle (ca.1050-1350) 0 0
dut nl Dutch 1 1
dyu Dyula 0 0
dzo dz Dzongkha 0 0
efi Efik 0 0
egy Egyptian (Ancient) 0 0
eka Ekajuk 0 0
elx Elamite 0 0
eng en English 1 1
enm English, Middle (1100-1500) 0 0
epo eo Esperanto 1 0
est et Estonian 1 1
ewe ee Ewe 0 0
ewo Ewondo 0 0
fan Fang 0 0
fao fo Faroese 0 0
fat Fanti 0 0
fij fj Fijian 0 0
fil Filipino 0 0
fin fi Finnish 1 1
fiu Finno-Ugrian (Other) 0 0
fon Fon 0 0
fre fr French 1 1
frm French, Middle (ca.1400-1600) 0 0
fro French, Old (842-ca.1400) 0 0
fry fy Frisian 0 0
ful ff Fulah 0 0
fur Friulian 0 0
gaa Ga 0 0
gay Gayo 0 0
gba Gbaya 0 0
gem Germanic (Other) 0 0
geo ka Georgian 1 1
ger de German 1 1
gez Geez 0 0
gil Gilbertese 0 0
gla gd Gaelic 0 0
gle ga Irish 0 0
glg gl Galician 1 1
glv gv Manx 0 0
gmh German, Middle High (ca.1050-1500) 0 0
goh German, Old High (ca.750-1050) 0 0
gon Gondi 0 0
gor Gorontalo 0 0
got Gothic 0 0
grb Grebo 0 0
grc Greek, Ancient (to 1453) 0 0
ell el Greek 1 1
grn gn Guarani 0 0
guj gu Gujarati 0 0
gwi Gwich´in 0 0
hai Haida 0 0
hat ht Haitian 0 0
hau ha Hausa 0 0
haw Hawaiian 0 0
heb he Hebrew 1 1
her hz Herero 0 0
hil Hiligaynon 0 0
him Himachali 0 0
hin hi Hindi 1 1
hit Hittite 0 0
hmn Hmong 0 0
hmo ho Hiri Motu 0 0
hrv hr Croatian 1 1
hun hu Hungarian 1 1
hup Hupa 0 0
iba Iban 0 0
ibo ig Igbo 0 0
ice is Icelandic 1 1
ido io Ido 0 0
iii ii Sichuan Yi 0 0
ijo Ijo 0 0
iku iu Inuktitut 0 0
ile ie Interlingue 0 0
ilo Iloko 0 0
ina ia Interlingua (International Auxiliary Language Asso 0 0
inc Indic (Other) 0 0
ind id Indonesian 1 1
ine Indo-European (Other) 0 0
inh Ingush 0 0
ipk ik Inupiaq 0 0
ira Iranian (Other) 0 0
iro Iroquoian languages 0 0
ita it Italian 1 1
jav jv Javanese 0 0
jpn ja Japanese 1 1
jpr Judeo-Persian 0 0
jrb Judeo-Arabic 0 0
kaa Kara-Kalpak 0 0
kab Kabyle 0 0
kac Kachin 0 0
kal kl Kalaallisut 0 0
kam Kamba 0 0
kan kn Kannada 0 0
kar Karen 0 0
kas ks Kashmiri 0 0
kau kr Kanuri 0 0
kaw Kawi 0 0
kaz kk Kazakh 1 0
kbd Kabardian 0 0
kha Khasi 0 0
khi Khoisan (Other) 0 0
khm km Khmer 1 1
kho Khotanese 0 0
kik ki Kikuyu 0 0
kin rw Kinyarwanda 0 0
kir ky Kirghiz 0 0
kmb Kimbundu 0 0
kok Konkani 0 0
kom kv Komi 0 0
kon kg Kongo 0 0
kor ko Korean 1 1
kos Kosraean 0 0
kpe Kpelle 0 0
krc Karachay-Balkar 0 0
kro Kru 0 0
kru Kurukh 0 0
kua kj Kuanyama 0 0
kum Kumyk 0 0
kur ku Kurdish 0 0
kut Kutenai 0 0
lad Ladino 0 0
lah Lahnda 0 0
lam Lamba 0 0
lao lo Lao 0 0
lat la Latin 0 0
lav lv Latvian 1 0
lez Lezghian 0 0
lim li Limburgan 0 0
lin ln Lingala 0 0
lit lt Lithuanian 1 0
lol Mongo 0 0
loz Lozi 0 0
ltz lb Luxembourgish 1 0
lua Luba-Lulua 0 0
lub lu Luba-Katanga 0 0
lug lg Ganda 0 0
lui Luiseno 0 0
lun Lunda 0 0
luo Luo (Kenya and Tanzania) 0 0
lus lushai 0 0
mac mk Macedonian 1 1
mad Madurese 0 0
mag Magahi 0 0
mah mh Marshallese 0 0
mai Maithili 0 0
mak Makasar 0 0
mal ml Malayalam 0 0
man Mandingo 0 0
mao mi Maori 0 0
map Austronesian (Other) 0 0
mar mr Marathi 0 0
mas Masai 0 0
may ms Malay 1 1
mdf Moksha 0 0
mdr Mandar 0 0
men Mende 0 0
mga Irish, Middle (900-1200) 0 0
mic Mi'kmaq 0 0
min Minangkabau 0 0
mis Miscellaneous languages 0 0
mkh Mon-Khmer (Other) 0 0
mlg mg Malagasy 0 0
mlt mt Maltese 0 0
mnc Manchu 0 0
mni Manipuri 0 0
mno Manobo languages 0 0
moh Mohawk 0 0
mol mo Moldavian 0 0
mon mn Mongolian 1 0
mos Mossi 0 0
mwl Mirandese 0 0
mul Multiple languages 0 0
mun Munda languages 0 0
mus Creek 0 0
mwr Marwari 0 0
myn Mayan languages 0 0
myv Erzya 0 0
nah Nahuatl 0 0
nai North American Indian 0 0
nap Neapolitan 0 0
nau na Nauru 0 0
nav nv Navajo 0 0
nbl nr Ndebele, South 0 0
nde nd Ndebele, North 0 0
ndo ng Ndonga 0 0
nds Low German 0 0
nep ne Nepali 0 0
new Nepal Bhasa 0 0
nia Nias 0 0
nic Niger-Kordofanian (Other) 0 0
niu Niuean 0 0
nno nn Norwegian Nynorsk 0 0
nob nb Norwegian Bokmal 0 0
nog Nogai 0 0
non Norse, Old 0 0
nor no Norwegian 1 1
nso Northern Sotho 0 0
nub Nubian languages 0 0
nwc Classical Newari 0 0
nya ny Chichewa 0 0
nym Nyamwezi 0 0
nyn Nyankole 0 0
nyo Nyoro 0 0
nzi Nzima 0 0
oci oc Occitan 1 1
oji oj Ojibwa 0 0
ori or Oriya 0 0
orm om Oromo 0 0
osa Osage 0 0
oss os Ossetian 0 0
ota Turkish, Ottoman (1500-1928) 0 0
oto Otomian languages 0 0
paa Papuan (Other) 0 0
pag Pangasinan 0 0
pal Pahlavi 0 0
pam Pampanga 0 0
pan pa Panjabi 0 0
pap Papiamento 0 0
pau Palauan 0 0
peo Persian, Old (ca.600-400 B.C.) 0 0
per fa Persian 1 1
phi Philippine (Other) 0 0
phn Phoenician 0 0
pli pi Pali 0 0
pol pl Polish 1 1
pon Pohnpeian 0 0
por pt Portuguese 1 1
pra Prakrit languages 0 0
pro Provençal, Old (to 1500) 0 0
pus ps Pushto 0 0
que qu Quechua 0 0
raj Rajasthani 0 0
rap Rapanui 0 0
rar Rarotongan 0 0
roa Romance (Other) 0 0
roh rm Raeto-Romance 0 0
rom Romany 0 0
run rn Rundi 0 0
rup Aromanian 0 0
rus ru Russian 1 1
sad Sandawe 0 0
sag sg Sango 0 0
sah Yakut 0 0
sai South American Indian (Other) 0 0
sal Salishan languages 0 0
sam Samaritan Aramaic 0 0
san sa Sanskrit 0 0
sas Sasak 0 0
sat Santali 0 0
scc sr Serbian 1 1
scn Sicilian 0 0
sco Scots 0 0
sel Selkup 0 0
sem Semitic (Other) 0 0
sga Irish, Old (to 900) 0 0
sgn Sign Languages 0 0
shn Shan 0 0
sid Sidamo 0 0
sin si Sinhalese 1 1
sio Siouan languages 0 0
sit Sino-Tibetan (Other) 0 0
sla Slavic (Other) 0 0
slo sk Slovak 1 1
slv sl Slovenian 1 1
sma Southern Sami 0 0
sme se Northern Sami 0 0
smi Sami languages (Other) 0 0
smj Lule Sami 0 0
smn Inari Sami 0 0
smo sm Samoan 0 0
sms Skolt Sami 0 0
sna sn Shona 0 0
snd sd Sindhi 0 0
snk Soninke 0 0
sog Sogdian 0 0
som so Somali 0 0
son Songhai 0 0
sot st Sotho, Southern 0 0
spa es Spanish 1 1
srd sc Sardinian 0 0
srr Serer 0 0
ssa Nilo-Saharan (Other) 0 0
ssw ss Swati 0 0
suk Sukuma 0 0
sun su Sundanese 0 0
sus Susu 0 0
sux Sumerian 0 0
swa sw Swahili 1 0
swe sv Swedish 1 1
syr Syriac 1 0
tah ty Tahitian 0 0
tai Tai (Other) 0 0
tam ta Tamil 0 0
tat tt Tatar 0 0
tel te Telugu 0 0
tem Timne 0 0
ter Tereno 0 0
tet Tetum 0 0
tgk tg Tajik 0 0
tgl tl Tagalog 1 1
tha th Thai 1 1
tib bo Tibetan 0 0
tig Tigre 0 0
tir ti Tigrinya 0 0
tiv Tiv 0 0
tkl Tokelau 0 0
tlh Klingon 0 0
tli Tlingit 0 0
tmh Tamashek 0 0
tog Tonga (Nyasa) 0 0
ton to Tonga (Tonga Islands) 0 0
tpi Tok Pisin 0 0
tsi Tsimshian 0 0
tsn tn Tswana 0 0
tso ts Tsonga 0 0
tuk tk Turkmen 0 0
tum Tumbuka 0 0
tup Tupi languages 0 0
tur tr Turkish 1 1
tut Altaic (Other) 0 0
tvl Tuvalu 0 0
twi tw Twi 0 0
tyv Tuvinian 0 0
udm Udmurt 0 0
uga Ugaritic 0 0
uig ug Uighur 0 0
ukr uk Ukrainian 1 1
umb Umbundu 0 0
und Undetermined 0 0
urd ur Urdu 1 0
uzb uz Uzbek 0 0
vai Vai 0 0
ven ve Venda 0 0
vie vi Vietnamese 1 1
vol vo Volapük 0 0
vot Votic 0 0
wak Wakashan languages 0 0
wal Walamo 0 0
war Waray 0 0
was Washo 0 0
wel cy Welsh 0 0
wen Sorbian languages 0 0
wln wa Walloon 0 0
wol wo Wolof 0 0
xal Kalmyk 0 0
xho xh Xhosa 0 0
yao Yao 0 0
yap Yapese 0 0
yid yi Yiddish 0 0
yor yo Yoruba 0 0
ypk Yupik languages 0 0
zap Zapotec 0 0
zen Zenaga 0 0
zha za Zhuang 0 0
znd Zande 0 0
zul zu Zulu 0 0
zun Zuni 0 0
rum ro Romanian 1 1
pob pb Brazilian 1 1

View file

@ -0,0 +1,54 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
class TestApi(TestGuessit):
def test_api(self):
movie_path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
movie_info = guessit.guess_movie_info(movie_path)
video_info = guessit.guess_video_info(movie_path)
episode_info = guessit.guess_episode_info(movie_path)
file_info = guessit.guess_file_info(movie_path)
self.assertEqual(guessit.guess_file_info(movie_path, type='movie'), movie_info)
self.assertEqual(guessit.guess_file_info(movie_path, type='video'), video_info)
self.assertEqual(guessit.guess_file_info(movie_path, type='episode'), episode_info)
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'movie'}), movie_info)
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'video'}), video_info)
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'episode'}), episode_info)
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'episode'}, type='movie'), episode_info) # kwargs priority other options
movie_path_name_only = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD'
file_info_name_only = guessit.guess_file_info(movie_path_name_only, options={"name_only": True})
self.assertFalse('container' in file_info_name_only)
self.assertTrue('container' in file_info)
suite = allTests(TestApi)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -0,0 +1,45 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
class TestAutoDetect(TestGuessit):
def testEmpty(self):
result = guessit.guess_file_info('')
self.assertEqual(result, {})
result = guessit.guess_file_info('___-__')
self.assertEqual(result, {})
result = guessit.guess_file_info('__-.avc')
self.assertEqual(result, {'type': 'unknown', 'extension': 'avc'})
def testAutoDetect(self):
self.checkMinimumFieldsCorrect(filename='autodetect.yaml',
remove_type=False)
suite = allTests(TestAutoDetect)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -0,0 +1,46 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
IGNORE_EPISODES = []
IGNORE_MOVIES = []
class TestAutoDetectAll(TestGuessit):
def testAutoMatcher(self):
self.checkMinimumFieldsCorrect(filename='autodetect.yaml',
remove_type=False)
def testAutoMatcherMovies(self):
self.checkMinimumFieldsCorrect(filename='movies.yaml',
exclude_files=IGNORE_MOVIES)
def testAutoMatcherEpisodes(self):
self.checkMinimumFieldsCorrect(filename='episodes.yaml',
exclude_files=IGNORE_EPISODES)
suite = allTests(TestAutoDetectAll)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -0,0 +1,45 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
import guessit
import guessit.hash_ed2k
import unittest
import doctest
def load_tests(loader, tests, ignore):
tests.addTests(doctest.DocTestSuite(guessit))
tests.addTests(doctest.DocTestSuite(guessit.date))
tests.addTests(doctest.DocTestSuite(guessit.fileutils))
tests.addTests(doctest.DocTestSuite(guessit.guess))
tests.addTests(doctest.DocTestSuite(guessit.hash_ed2k))
tests.addTests(doctest.DocTestSuite(guessit.language))
tests.addTests(doctest.DocTestSuite(guessit.matchtree))
tests.addTests(doctest.DocTestSuite(guessit.textutils))
return tests
suite = unittest.TestSuite()
load_tests(None, suite, None)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -0,0 +1,35 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
class TestEpisode(TestGuessit):
def testEpisodes(self):
self.checkMinimumFieldsCorrect(filetype='episode',
filename='episodes.yaml')
suite = allTests(TestEpisode)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -0,0 +1,46 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
class TestHashes(TestGuessit):
def test_hashes(self):
hashes = (
('hash_mpc', '1MB', u'8542ad406c15c8bd'), # TODO: Check if this value is valid
('hash_ed2k', '1MB', u'ed2k://|file|1MB|1048576|AA3CC5552A9931A76B61A41D306735F7|/'), # TODO: Check if this value is valid
('hash_md5', '1MB', u'5d8dcbca8d8ac21766f28797d6c3954c'),
('hash_sha1', '1MB', u'51d2b8f3248d7ee495b7750c8da5aa3b3819de9d'),
('hash_md5', 'dummy.srt', u'64de6b5893cac24456c46a935ef9c359'),
('hash_sha1', 'dummy.srt', u'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
)
for hash_type, filename, expected_value in hashes:
guess = guess_file_info(file_in_same_dir(__file__, filename), hash_type)
computed_value = guess.get(hash_type)
self.assertEqual(expected_value, guess.get(hash_type), "Invalid %s for %s: %s != %s" % (hash_type, filename, computed_value, expected_value))
suite = allTests(TestHashes)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -0,0 +1,130 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
import io
class TestLanguage(TestGuessit):
def check_languages(self, languages):
for lang1, lang2 in languages.items():
self.assertEqual(Language.fromguessit(lang1),
Language.fromguessit(lang2))
def test_addic7ed(self):
languages = {'English': 'en',
'English (US)': 'en-US',
'English (UK)': 'en-UK',
'Italian': 'it',
'Portuguese': 'pt',
'Portuguese (Brazilian)': 'pt-BR',
'Romanian': 'ro',
'Español (Latinoamérica)': 'es-MX',
'Español (España)': 'es-ES',
'Spanish (Latin America)': 'es-MX',
'Español': 'es',
'Spanish': 'es',
'Spanish (Spain)': 'es-ES',
'French': 'fr',
'Greek': 'el',
'Arabic': 'ar',
'German': 'de',
'Croatian': 'hr',
'Indonesian': 'id',
'Hebrew': 'he',
'Russian': 'ru',
'Turkish': 'tr',
'Swedish': 'se',
'Czech': 'cs',
'Dutch': 'nl',
'Hungarian': 'hu',
'Norwegian': 'no',
'Polish': 'pl',
'Persian': 'fa'}
self.check_languages(languages)
def test_subswiki(self):
languages = {'English (US)': 'en-US', 'English (UK)': 'en-UK', 'English': 'en',
'French': 'fr', 'Brazilian': 'po', 'Portuguese': 'pt',
'Español (Latinoamérica)': 'es-MX', 'Español (España)': 'es-ES',
'Español': 'es', 'Italian': 'it', 'Català': 'ca'}
self.check_languages(languages)
def test_tvsubtitles(self):
languages = {'English': 'en', 'Español': 'es', 'French': 'fr', 'German': 'de',
'Brazilian': 'br', 'Russian': 'ru', 'Ukrainian': 'ua', 'Italian': 'it',
'Greek': 'gr', 'Arabic': 'ar', 'Hungarian': 'hu', 'Polish': 'pl',
'Turkish': 'tr', 'Dutch': 'nl', 'Portuguese': 'pt', 'Swedish': 'sv',
'Danish': 'da', 'Finnish': 'fi', 'Korean': 'ko', 'Chinese': 'cn',
'Japanese': 'jp', 'Bulgarian': 'bg', 'Czech': 'cz', 'Romanian': 'ro'}
self.check_languages(languages)
def test_opensubtitles(self):
opensubtitles_langfile = file_in_same_dir(__file__, 'opensubtitles_languages_2012_05_09.txt')
for l in [u(l).strip() for l in io.open(opensubtitles_langfile, encoding='utf-8')][1:]:
idlang, alpha2, _, upload_enabled, web_enabled = l.strip().split('\t')
# do not test languages that are too esoteric / not widely available
if int(upload_enabled) and int(web_enabled):
# check that we recognize the opensubtitles language code correctly
# and that we are able to output this code from a language
self.assertEqual(idlang, Language.fromguessit(idlang).opensubtitles)
if alpha2:
# check we recognize the opensubtitles 2-letter code correctly
self.check_languages({idlang: alpha2})
def test_tmdb(self):
# examples from http://api.themoviedb.org/2.1/language-tags
for lang in ['en-US', 'en-CA', 'es-MX', 'fr-PF']:
self.assertEqual(lang, str(Language.fromguessit(lang)))
def test_subtitulos(self):
languages = {'English (US)': 'en-US', 'English (UK)': 'en-UK', 'English': 'en',
'French': 'fr', 'Brazilian': 'po', 'Portuguese': 'pt',
'Español (Latinoamérica)': 'es-MX', 'Español (España)': 'es-ES',
'Español': 'es', 'Italian': 'it', 'Català': 'ca'}
self.check_languages(languages)
def test_thesubdb(self):
languages = {'af': 'af', 'cs': 'cs', 'da': 'da', 'de': 'de', 'en': 'en', 'es': 'es', 'fi': 'fi',
'fr': 'fr', 'hu': 'hu', 'id': 'id', 'it': 'it', 'la': 'la', 'nl': 'nl', 'no': 'no',
'oc': 'oc', 'pl': 'pl', 'pt': 'pt', 'ro': 'ro', 'ru': 'ru', 'sl': 'sl', 'sr': 'sr',
'sv': 'sv', 'tr': 'tr'}
self.check_languages(languages)
def test_exceptions(self):
self.assertEqual(Language.fromguessit('br'), Language.fromguessit('pt(br)'))
self.assertEqual(Language.fromguessit('unknown'),
Language.fromguessit('und'))
suite = allTests(TestLanguage)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -0,0 +1,69 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
from guessit.fileutils import split_path, file_in_same_dir
from guessit.textutils import strip_brackets, str_replace, str_fill
from guessit import PY2
from guessit import __main__
if PY2:
from StringIO import StringIO
else:
from io import StringIO
class TestMain(TestGuessit):
def setUp(self):
self._stdout = sys.stdout
string_out = StringIO()
sys.stdout = string_out
def tearDown(self):
sys.stdout = self._stdout
def test_list_properties(self):
__main__.main(["-p"], False)
__main__.main(["-V"], False)
def test_list_transformers(self):
__main__.main(["--transformers"], False)
__main__.main(["-V", "--transformers"], False)
def test_demo(self):
__main__.main(["-d"], False)
def test_filename(self):
__main__.main(["A.Movie.2014.avi"], False)
__main__.main(["A.Movie.2014.avi", "A.2nd.Movie.2014.avi"], False)
__main__.main(["-y", "A.Movie.2014.avi"], False)
__main__.main(["-a", "A.Movie.2014.avi"], False)
__main__.main(["-v", "A.Movie.2014.avi"], False)
__main__.main(["-t", "movie", "A.Movie.2014.avi"], False)
__main__.main(["-t", "episode", "A.Serie.S02E06.avi"], False)
__main__.main(["-i", "hash_mpc", file_in_same_dir(__file__, "1MB")], False)
__main__.main(["-i", "hash_md5", file_in_same_dir(__file__, "1MB")], False)
suite = allTests(TestMain)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -0,0 +1,93 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
from guessit.transfo.guess_release_group import GuessReleaseGroup
from guessit.transfo.guess_properties import GuessProperties
from guessit.matchtree import BaseMatchTree
keywords = yaml.load("""
? Xvid PROPER
: videoCodec: Xvid
other: PROPER
? PROPER-Xvid
: videoCodec: Xvid
other: PROPER
""")
def guess_info(string, options=None):
mtree = MatchTree(string)
GuessReleaseGroup().process(mtree, options)
GuessProperties().process(mtree, options)
return mtree.matched()
class TestMatchTree(TestGuessit):
def test_base_tree(self):
t = BaseMatchTree('One Two Three(Three) Four')
t.partition((3, 7, 20))
leaves = list(t.leaves())
self.assertEqual(leaves[0].span, (0, 3))
self.assertEqual('One', leaves[0].value)
self.assertEqual(' Two', leaves[1].value)
self.assertEqual(' Three(Three)', leaves[2].value)
self.assertEqual(' Four', leaves[3].value)
leaves[2].partition((1, 6, 7, 12))
three_leaves = list(leaves[2].leaves())
self.assertEqual('Three', three_leaves[1].value)
self.assertEqual('Three', three_leaves[3].value)
leaves = list(t.leaves())
self.assertEqual(len(leaves), 8)
self.assertEqual(leaves[5], three_leaves[3])
self.assertEqual(t.previous_leaf(leaves[5]), leaves[4])
self.assertEqual(t.next_leaf(leaves[5]), leaves[6])
self.assertEqual(t.next_leaves(leaves[5]), [leaves[6], leaves[7]])
self.assertEqual(t.previous_leaves(leaves[5]), [leaves[4], leaves[3], leaves[2], leaves[1], leaves[0]])
self.assertEqual(t.next_leaf(leaves[7]), None)
self.assertEqual(t.previous_leaf(leaves[0]), None)
self.assertEqual(t.next_leaves(leaves[7]), [])
self.assertEqual(t.previous_leaves(leaves[0]), [])
def test_match(self):
self.checkFields(keywords, guess_info)
suite = allTests(TestMatchTree)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -0,0 +1,35 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
class TestMovie(TestGuessit):
def testMovies(self):
self.checkMinimumFieldsCorrect(filetype='movie',
filename='movies.yaml')
suite = allTests(TestMovie)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -0,0 +1,126 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.quality import best_quality, best_quality_properties
from guessit.containers import QualitiesContainer
from guessit.test.guessittest import *
class TestQuality(TestGuessit):
def test_container(self):
container = QualitiesContainer()
container.register_quality('color', 'red', 10)
container.register_quality('color', 'orange', 20)
container.register_quality('color', 'green', 30)
container.register_quality('context', 'sun', 100)
container.register_quality('context', 'sea', 200)
container.register_quality('context', 'sex', 300)
g1 = Guess()
g1['color'] = 'red'
g2 = Guess()
g2['color'] = 'green'
g3 = Guess()
g3['color'] = 'orange'
q3 = container.rate_quality(g3)
self.assertEqual(q3, 20, "ORANGE should be rated 20. Don't ask why!")
q1 = container.rate_quality(g1)
q2 = container.rate_quality(g2)
self.assertTrue(q2 > q1, "GREEN should be greater than RED. Don't ask why!")
g1['context'] = 'sex'
g2['context'] = 'sun'
q1 = container.rate_quality(g1)
q2 = container.rate_quality(g2)
self.assertTrue(q1 > q2, "SEX should be greater than SUN. Don't ask why!")
self.assertEqual(container.best_quality(g1, g2), g1, "RED&SEX should be better than GREEN&SUN. Don't ask why!")
self.assertEqual(container.best_quality_properties(['color'], g1, g2), g2, "GREEN should be better than RED. Don't ask why!")
self.assertEqual(container.best_quality_properties(['context'], g1, g2), g1, "SEX should be better than SUN. Don't ask why!")
q1 = container.rate_quality(g1, 'color')
q2 = container.rate_quality(g2, 'color')
self.assertTrue(q2 > q1, "GREEN should be greater than RED. Don't ask why!")
container.unregister_quality('context', 'sex')
container.unregister_quality('context', 'sun')
q1 = container.rate_quality(g1)
q2 = container.rate_quality(g2)
self.assertTrue(q2 > q1, "GREEN&SUN should be greater than RED&SEX. Don't ask why!")
g3['context'] = 'sea'
container.unregister_quality('context', 'sea')
q3 = container.rate_quality(g3, 'context')
self.assertEqual(q3, 0, "Context should be unregistered.")
container.unregister_quality('color')
q3 = container.rate_quality(g3, 'color')
self.assertEqual(q3, 0, "Color should be unregistered.")
container.clear_qualities()
q1 = container.rate_quality(g1)
q2 = container.rate_quality(g2)
self.assertTrue(q1 == q2 == 0, "Empty quality container should rate each guess to 0")
def test_quality_transformers(self):
guess_720p = guessit.guess_file_info("2012.2009.720p.BluRay.x264.DTS WiKi.mkv")
guess_1080p = guessit.guess_file_info("2012.2009.1080p.BluRay.x264.MP3 WiKi.mkv")
self.assertTrue('audioCodec' in guess_720p, "audioCodec should be present")
self.assertTrue('audioCodec' in guess_1080p, "audioCodec should be present")
self.assertTrue('screenSize' in guess_720p, "screenSize should be present")
self.assertTrue('screenSize' in guess_1080p, "screenSize should be present")
best_quality_guess = best_quality(guess_720p, guess_1080p)
self.assertTrue(guess_1080p == best_quality_guess, "1080p+MP3 is not the best global quality")
best_quality_guess = best_quality_properties(['screenSize'], guess_720p, guess_1080p)
self.assertTrue(guess_1080p == best_quality_guess, "1080p is not the best screenSize")
best_quality_guess = best_quality_properties(['audioCodec'], guess_720p, guess_1080p)
self.assertTrue(guess_720p == best_quality_guess, "DTS is not the best audioCodec")
suite = allTests(TestQuality)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -0,0 +1,163 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.test.guessittest import *
from guessit.fileutils import split_path
from guessit.textutils import strip_brackets, str_replace, str_fill, from_camel, is_camel,\
levenshtein, reorder_title
from guessit import PY2
from guessit.date import search_date, search_year
from datetime import datetime, date, timedelta
class TestUtils(TestGuessit):
def test_splitpath(self):
alltests = {False: {'/usr/bin/smewt': ['/', 'usr', 'bin', 'smewt'],
'relative_path/to/my_folder/': ['relative_path', 'to', 'my_folder'],
'//some/path': ['//', 'some', 'path'],
'//some//path': ['//', 'some', 'path'],
'///some////path': ['///', 'some', 'path']
},
True: {'C:\\Program Files\\Smewt\\smewt.exe': ['C:\\', 'Program Files', 'Smewt', 'smewt.exe'],
'Documents and Settings\\User\\config': ['Documents and Settings', 'User', 'config'],
'C:\\Documents and Settings\\User\\config': ['C:\\', 'Documents and Settings', 'User', 'config'],
# http://bugs.python.org/issue19945
'\\\\netdrive\\share': ['\\\\', 'netdrive', 'share'] if PY2 else ['\\\\netdrive\\share'],
'\\\\netdrive\\share\\folder': ['\\\\', 'netdrive', 'share', 'folder'] if PY2 else ['\\\\netdrive\\share\\', 'folder'],
}
}
tests = alltests[sys.platform == 'win32']
for path, split in tests.items():
self.assertEqual(split, split_path(path))
def test_strip_brackets(self):
allTests = (('', ''),
('[test]', 'test'),
('{test2}', 'test2'),
('(test3)', 'test3'),
('(test4]', '(test4]'),
)
for i, e in allTests:
self.assertEqual(e, strip_brackets(i))
def test_levenshtein(self):
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmno"), 0)
self.assertEqual(levenshtein("abcdef ghijk lmnop", "abcdef ghijk lmno"), 1)
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmn"), 1)
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnp"), 1)
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnq"), 1)
self.assertEqual(levenshtein("cbcdef ghijk lmno", "abcdef ghijk lmnq"), 2)
self.assertEqual(levenshtein("cbcdef ghihk lmno", "abcdef ghijk lmnq"), 3)
def test_reorder_title(self):
self.assertEqual(reorder_title("Simpsons, The"), "The Simpsons")
self.assertEqual(reorder_title("Simpsons,The"), "The Simpsons")
self.assertEqual(reorder_title("Simpsons,Les", articles=('the', 'le', 'la', 'les')), "Les Simpsons")
self.assertEqual(reorder_title("Simpsons, Les", articles=('the', 'le', 'la', 'les')), "Les Simpsons")
def test_camel(self):
self.assertEqual("", from_camel(""))
self.assertEqual("Hello world", str_replace("Hello World", 6, 'w'))
self.assertEqual("Hello *****", str_fill("Hello World", (6, 11), '*'))
self.assertTrue("This is camel", from_camel("ThisIsCamel"))
self.assertEqual('camel case', from_camel('camelCase'))
self.assertEqual('A case', from_camel('ACase'))
self.assertEqual('MiXedCaSe is not camel case', from_camel('MiXedCaSe is not camelCase'))
self.assertEqual("This is camel cased title", from_camel("ThisIsCamelCasedTitle"))
self.assertEqual("This is camel CASED title", from_camel("ThisIsCamelCASEDTitle"))
self.assertEqual("These are camel CASED title", from_camel("TheseAreCamelCASEDTitle"))
self.assertEqual("Give a camel case string", from_camel("GiveACamelCaseString"))
self.assertEqual("Death TO camel case", from_camel("DeathTOCamelCase"))
self.assertEqual("But i like java too:)", from_camel("ButILikeJavaToo:)"))
self.assertEqual("Beatdown french DVD rip.mkv", from_camel("BeatdownFrenchDVDRip.mkv"))
self.assertEqual("DO NOTHING ON UPPER CASE", from_camel("DO NOTHING ON UPPER CASE"))
self.assertFalse(is_camel("this_is_not_camel"))
self.assertTrue(is_camel("ThisIsCamel"))
self.assertEqual("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv", from_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv"))
self.assertFalse(is_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv"))
self.assertEqual("A2LiNE", from_camel("A2LiNE"))
def test_date(self):
self.assertEqual(search_year(' in the year 2000... '), (2000, (13, 17)))
self.assertEqual(search_year(' they arrived in 1492. '), (None, None))
today = date.today()
today_year_2 = int(str(today.year)[2:])
future = today + timedelta(days=1000)
future_year_2 = int(str(future.year)[2:])
past = today - timedelta(days=10000)
past_year_2 = int(str(past.year)[2:])
self.assertEqual(search_date(' Something before 2002-04-22 '), (date(2002, 4, 22), (18, 28)))
self.assertEqual(search_date(' 2002-04-22 Something after '), (date(2002, 4, 22), (1, 11)))
self.assertEqual(search_date(' This happened on 2002-04-22. '), (date(2002, 4, 22), (18, 28)))
self.assertEqual(search_date(' This happened on 22-04-2002. '), (date(2002, 4, 22), (18, 28)))
self.assertEqual(search_date(' This happened on 13-04-%s. ' % (today_year_2,)), (date(today.year, 4, 13), (18, 26)))
self.assertEqual(search_date(' This happened on 22-04-%s. ' % (future_year_2,)), (date(future.year, 4, 22), (18, 26)))
self.assertEqual(search_date(' This happened on 20-04-%s. ' % (past_year_2)), (date(past.year, 4, 20), (18, 26)))
self.assertEqual(search_date(' This happened on 13-06-14. ', year_first=True), (date(2013, 6, 14), (18, 26)))
self.assertEqual(search_date(' This happened on 13-05-14. ', year_first=False), (date(2014, 5, 13), (18, 26)))
self.assertEqual(search_date(' This happened on 04-13-%s. ' % (today_year_2,)), (date(today.year, 4, 13), (18, 26)))
self.assertEqual(search_date(' This happened on 04-22-%s. ' % (future_year_2,)), (date(future.year, 4, 22), (18, 26)))
self.assertEqual(search_date(' This happened on 04-20-%s. ' % (past_year_2)), (date(past.year, 4, 20), (18, 26)))
self.assertEqual(search_date(' This happened on 35-12-%s. ' % (today_year_2,)), (None, None))
self.assertEqual(search_date(' This happened on 37-18-%s. ' % (future_year_2,)), (None, None))
self.assertEqual(search_date(' This happened on 44-42-%s. ' % (past_year_2)), (None, None))
self.assertEqual(search_date(' This happened on %s. ' % (today, )), (today, (18, 28)))
self.assertEqual(search_date(' This happened on %s. ' % (future, )), (future, (18, 28)))
self.assertEqual(search_date(' This happened on %s. ' % (past, )), (past, (18, 28)))
self.assertEqual(search_date(' released date: 04-03-1901? '), (None, None))
self.assertEqual(search_date(' There\'s no date in here. '), (None, None))
self.assertEqual(search_date(' Something 01-02-03 '), (date(2003, 2, 1), (11, 19)))
self.assertEqual(search_date(' Something 01-02-03 ', year_first=False, day_first=True), (date(2003, 2, 1), (11, 19)))
self.assertEqual(search_date(' Something 01-02-03 ', year_first=True), (date(2001, 2, 3), (11, 19)))
self.assertEqual(search_date(' Something 01-02-03 ', day_first=False), (date(2003, 1, 2), (11, 19)))
suite = allTests(TestUtils)
if __name__ == '__main__':
TextTestRunner(verbosity=2).run(suite)

View file

@ -1,24 +1,25 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Smewt - A smart collection manager
# Copyright (c) 2008-2012 Nicolas Wack <wackou@gmail.com>
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# Smewt is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Smewt is distributed in the hope that it will be useful,
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit import s
from guessit.patterns import sep
import functools
@ -27,6 +28,7 @@ import re
# string-related functions
def normalize_unicode(s):
return unicodedata.normalize('NFC', s)
@ -43,41 +45,63 @@ def strip_brackets(s):
return s
def clean_string(st):
_dotted_rexp = re.compile(r'(?:\W|^)(([A-Za-z]\.){2,}[A-Za-z]\.?)')
def clean_default(st):
for c in sep:
# do not remove certain chars
if c in ['-', ',']:
continue
if c == '.':
# we should not remove the dots for acronyms and such
dotted = _dotted_rexp.search(st)
if dotted:
s = dotted.group(1)
exclude_begin, exclude_end = dotted.span(1)
st = (st[:exclude_begin].replace(c, ' ') +
st[exclude_begin:exclude_end] +
st[exclude_end:].replace(c, ' '))
continue
st = st.replace(c, ' ')
parts = st.split()
result = ' '.join(p for p in parts if p != '')
# now also remove dashes on the outer part of the string
while result and result[0] in sep:
while result and result[0] in '-':
result = result[1:]
while result and result[-1] in sep:
while result and result[-1] in '-':
result = result[:-1]
return result
_words_rexp = re.compile('\w+', re.UNICODE)
def find_words(s):
return _words_rexp.findall(s.replace('_', ' '))
def reorder_title(title):
def iter_words(s):
return _words_rexp.finditer(s.replace('_', ' '))
def reorder_title(title, articles=('the',), separators=(',', ', ')):
ltitle = title.lower()
if ltitle[-4:] == ',the':
return title[-3:] + ' ' + title[:-4]
if ltitle[-5:] == ', the':
return title[-3:] + ' ' + title[:-5]
for article in articles:
for separator in separators:
suffix = separator + article
if ltitle[-len(suffix):] == suffix:
return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
return title
def str_replace(string, pos, c):
return string[:pos] + c + string[pos+1:]
return string[:pos] + c + string[pos + 1:]
def str_fill(string, region, c):
@ -85,7 +109,6 @@ def str_fill(string, region, c):
return string[:start] + c * (end - start) + string[end:]
def levenshtein(a, b):
if not a:
return len(b)
@ -95,25 +118,25 @@ def levenshtein(a, b):
m = len(a)
n = len(b)
d = []
for i in range(m+1):
d.append([0] * (n+1))
for i in range(m + 1):
d.append([0] * (n + 1))
for i in range(m+1):
for i in range(m + 1):
d[i][0] = i
for j in range(n+1):
for j in range(n + 1):
d[0][j] = j
for i in range(1, m+1):
for j in range(1, n+1):
if a[i-1] == b[j-1]:
for i in range(1, m + 1):
for j in range(1, n + 1):
if a[i - 1] == b[j - 1]:
cost = 0
else:
cost = 1
d[i][j] = min(d[i-1][j] + 1, # deletion
d[i][j-1] + 1, # insertion
d[i-1][j-1] + cost # substitution
d[i][j] = min(d[i - 1][j] + 1, # deletion
d[i][j - 1] + 1, # insertion
d[i - 1][j - 1] + cost # substitution
)
return d[m][n]
@ -151,7 +174,7 @@ def find_first_level_groups_span(string, enclosing):
end = i
if not depth:
# we emptied our stack, so we have a 1st level group
result.append((start, end+1))
result.append((start, end + 1))
except IndexError:
# we closed a group which was not opened before
pass
@ -172,7 +195,7 @@ def split_on_groups(string, groups):
"""
if not groups:
return [ string ]
return [string]
boundaries = sorted(set(functools.reduce(lambda l, x: l + list(x), groups, [])))
if boundaries[0] != 0:
@ -180,10 +203,10 @@ def split_on_groups(string, groups):
if boundaries[-1] != len(string):
boundaries.append(len(string))
groups = [ string[start:end] for start, end in zip(boundaries[:-1],
boundaries[1:]) ]
groups = [string[start:end] for start, end in zip(boundaries[:-1],
boundaries[1:])]
return [ g for g in groups if g ] # return only non-empty groups
return [g for g in groups if g] # return only non-empty groups
def find_first_level_groups(string, enclosing, blank_sep=None):
@ -219,6 +242,114 @@ def find_first_level_groups(string, enclosing, blank_sep=None):
if blank_sep:
for start, end in groups:
string = str_replace(string, start, blank_sep)
string = str_replace(string, end-1, blank_sep)
string = str_replace(string, end - 1, blank_sep)
return split_on_groups(string, groups)
_camel_word2_set = set(('is', 'to',))
_camel_word3_set = set(('the',))
def _camel_split_and_lower(string, i):
"""Retrieves a tuple (need_split, need_lower)
need_split is True if this char is a first letter in a camelCasedString.
need_lower is True if this char should be lowercased.
"""
def islower(c):
return c.isalpha() and not c.isupper()
previous_char2 = string[i - 2] if i > 1 else None
previous_char = string[i - 1] if i > 0 else None
char = string[i]
next_char = string[i + 1] if i + 1 < len(string) else None
next_char2 = string[i + 2] if i + 2 < len(string) else None
char_upper = char.isupper()
char_lower = islower(char)
# previous_char2_lower = islower(previous_char2) if previous_char2 else False
previous_char2_upper = previous_char2.isupper() if previous_char2 else False
previous_char_lower = islower(previous_char) if previous_char else False
previous_char_upper = previous_char.isupper() if previous_char else False
next_char_upper = next_char.isupper() if next_char else False
next_char_lower = islower(next_char) if next_char else False
next_char2_upper = next_char2.isupper() if next_char2 else False
# next_char2_lower = islower(next_char2) if next_char2 else False
mixedcase_word = (previous_char_upper and char_lower and next_char_upper) or \
(previous_char_lower and char_upper and next_char_lower and next_char2_upper) or \
(previous_char2_upper and previous_char_lower and char_upper)
if mixedcase_word:
word2 = (char + next_char).lower() if next_char else None
word3 = (char + next_char + next_char2).lower() if next_char and next_char2 else None
word2b = (previous_char2 + previous_char).lower() if previous_char2 and previous_char else None
if word2 in _camel_word2_set or word2b in _camel_word2_set or word3 in _camel_word3_set:
mixedcase_word = False
uppercase_word = previous_char_upper and char_upper and next_char_upper or (char_upper and next_char_upper and next_char2_upper)
need_split = char_upper and previous_char_lower and not mixedcase_word
if not need_split:
previous_char_upper = string[i - 1].isupper() if i > 0 else False
next_char_lower = (string[i + 1].isalpha() and not string[i + 1].isupper()) if i + 1 < len(string) else False
need_split = char_upper and previous_char_upper and next_char_lower
uppercase_word = previous_char_upper and not next_char_lower
need_lower = not uppercase_word and not mixedcase_word and need_split
return (need_split, need_lower)
def is_camel(string):
"""
>>> is_camel('dogEATDog')
True
>>> is_camel('DeathToCamelCase')
True
>>> is_camel('death_to_camel_case')
False
>>> is_camel('TheBest')
True
>>> is_camel('The Best')
False
"""
for i in range(0, len(string)):
need_split, _ = _camel_split_and_lower(string, i)
if need_split:
return True
return False
def from_camel(string):
"""
>>> from_camel('dogEATDog') == 'dog EAT dog'
True
>>> from_camel('DeathToCamelCase') == 'Death to camel case'
True
>>> from_camel('TheBest') == 'The best'
True
>>> from_camel('MiXedCaSe is not camelCase') == 'MiXedCaSe is not camel case'
True
"""
if not string:
return string
pieces = []
for i in range(0, len(string)):
char = string[i]
need_split, need_lower = _camel_split_and_lower(string, i)
if need_split:
pieces.append(' ')
if need_lower:
pieces.append(char.lower())
else:
pieces.append(char)
return ''.join(pieces)

View file

@ -0,0 +1,341 @@
# Version 2013112900, Last Updated Fri Nov 29 07:07:01 2013 UTC
AC
AD
AE
AERO
AF
AG
AI
AL
AM
AN
AO
AQ
AR
ARPA
AS
ASIA
AT
AU
AW
AX
AZ
BA
BB
BD
BE
BF
BG
BH
BI
BIKE
BIZ
BJ
BM
BN
BO
BR
BS
BT
BV
BW
BY
BZ
CA
CAMERA
CAT
CC
CD
CF
CG
CH
CI
CK
CL
CLOTHING
CM
CN
CO
COM
CONSTRUCTION
CONTRACTORS
COOP
CR
CU
CV
CW
CX
CY
CZ
DE
DIAMONDS
DIRECTORY
DJ
DK
DM
DO
DZ
EC
EDU
EE
EG
ENTERPRISES
EQUIPMENT
ER
ES
ESTATE
ET
EU
FI
FJ
FK
FM
FO
FR
GA
GALLERY
GB
GD
GE
GF
GG
GH
GI
GL
GM
GN
GOV
GP
GQ
GR
GRAPHICS
GS
GT
GU
GURU
GW
GY
HK
HM
HN
HOLDINGS
HR
HT
HU
ID
IE
IL
IM
IN
INFO
INT
IO
IQ
IR
IS
IT
JE
JM
JO
JOBS
JP
KE
KG
KH
KI
KITCHEN
KM
KN
KP
KR
KW
KY
KZ
LA
LAND
LB
LC
LI
LIGHTING
LK
LR
LS
LT
LU
LV
LY
MA
MC
MD
ME
MG
MH
MIL
MK
ML
MM
MN
MO
MOBI
MP
MQ
MR
MS
MT
MU
MUSEUM
MV
MW
MX
MY
MZ
NA
NAME
NC
NE
NET
NF
NG
NI
NL
NO
NP
NR
NU
NZ
OM
ORG
PA
PE
PF
PG
PH
PHOTOGRAPHY
PK
PL
PLUMBING
PM
PN
POST
PR
PRO
PS
PT
PW
PY
QA
RE
RO
RS
RU
RW
SA
SB
SC
SD
SE
SEXY
SG
SH
SI
SINGLES
SJ
SK
SL
SM
SN
SO
SR
ST
SU
SV
SX
SY
SZ
TATTOO
TC
TD
TECHNOLOGY
TEL
TF
TG
TH
TIPS
TJ
TK
TL
TM
TN
TO
TODAY
TP
TR
TRAVEL
TT
TV
TW
TZ
UA
UG
UK
US
UY
UZ
VA
VC
VE
VENTURES
VG
VI
VN
VOYAGE
VU
WF
WS
XN--3E0B707E
XN--45BRJ9C
XN--80AO21A
XN--80ASEHDB
XN--80ASWG
XN--90A3AC
XN--CLCHC0EA0B2G2A9GCD
XN--FIQS8S
XN--FIQZ9S
XN--FPCRJ9C3D
XN--FZC2C9E2C
XN--GECRJ9C
XN--H2BRJ9C
XN--J1AMH
XN--J6W193G
XN--KPRW13D
XN--KPRY57D
XN--L1ACC
XN--LGBBAT1AD8J
XN--MGB9AWBF
XN--MGBA3A4F16A
XN--MGBAAM7A8H
XN--MGBAYH7GPA
XN--MGBBH1A71E
XN--MGBC0A9AZCG
XN--MGBERP4A5D4AR
XN--MGBX4CD0AB
XN--NGBC5AZD
XN--O3CW4H
XN--OGBPF8FL
XN--P1AI
XN--PGBS0DH
XN--Q9JYB4C
XN--S9BRJ9C
XN--UNUP4Y
XN--WGBH1C
XN--WGBL6A
XN--XKC2AL3HYE2A
XN--XKC2DL3A5EE0H
XN--YFRO4I67O
XN--YGBI2AMMX
XXX
YE
YT
ZA
ZM
ZW

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,92 +18,13 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import base_text_type, Guess
from guessit.patterns import canonical_form
from guessit.textutils import clean_string
import logging
log = logging.getLogger(__name__)
from __future__ import absolute_import, division, print_function, unicode_literals
def found_property(node, name, confidence):
node.guess = Guess({name: node.clean_value}, confidence=confidence, raw=node.value)
log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
class TransformerException(Exception):
def __init__(self, transformer, message):
# Call the base class constructor with the parameters it needs
Exception.__init__(self, message)
def format_guess(guess):
"""Format all the found values to their natural type.
For instance, a year would be stored as an int value, etc...
Note that this modifies the dictionary given as input.
"""
for prop, value in guess.items():
if prop in ('season', 'episodeNumber', 'year', 'cdNumber',
'cdNumberTotal', 'bonusNumber', 'filmNumber'):
guess[prop] = int(guess[prop])
elif isinstance(value, base_text_type):
if prop in ('edition',):
value = clean_string(value)
guess[prop] = canonical_form(value).replace('\\', '')
return guess
def find_and_split_node(node, strategy, logger):
string = ' %s ' % node.value # add sentinels
for matcher, confidence, args, kwargs in strategy:
all_args = [string]
if getattr(matcher, 'use_node', False):
all_args.append(node)
if args:
all_args.append(args)
if kwargs:
result, span = matcher(*all_args, **kwargs)
else:
result, span = matcher(*all_args)
if result:
# readjust span to compensate for sentinels
span = (span[0] - 1, span[1] - 1)
if isinstance(result, Guess):
if confidence is None:
confidence = result.confidence(list(result.keys())[0])
else:
if confidence is None:
confidence = 1.0
guess = format_guess(Guess(result, confidence=confidence, raw=string[span[0] + 1:span[1] + 1]))
msg = 'Found with confidence %.2f: %s' % (confidence, guess)
(logger or log).debug(msg)
node.partition(span)
absolute_span = (span[0] + node.offset, span[1] + node.offset)
for child in node.children:
if child.span == absolute_span:
child.guess = guess
else:
find_and_split_node(child, strategy, logger)
return
class SingleNodeGuesser(object):
def __init__(self, guess_func, confidence, logger, *args, **kwargs):
self.guess_func = guess_func
self.confidence = confidence
self.logger = logger
self.args = args
self.kwargs = kwargs
def process(self, mtree):
# strategy is a list of pairs (guesser, confidence)
# - if the guesser returns a guessit.Guess and confidence is specified,
# it will override it, otherwise it will leave the guess confidence
# - if the guesser returns a simple dict as a guess and confidence is
# specified, it will use it, or 1.0 otherwise
strategy = [ (self.guess_func, self.confidence, self.args, self.kwargs) ]
for node in mtree.unidentified_leaves():
find_and_split_node(node, strategy, self.logger)
self.transformer = transformer

View file

@ -0,0 +1,60 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.containers import PropertiesContainer
from guessit.matcher import GuessFinder
from guessit.plugins.transformers import Transformer
import re
class ExpectedSeries(Transformer):
def __init__(self):
Transformer.__init__(self, 230)
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
naming_opts.add_argument('-S', '--expected-series', action='append', dest='expected_series',
help='Expected series to parse (can be used multiple times)')
def should_process(self, mtree, options=None):
return options and options.get('expected_series')
def expected_series(self, string, node=None, options=None):
container = PropertiesContainer(enhance=True, canonical_from_pattern=False)
for expected_serie in options.get('expected_series'):
if expected_serie.startswith('re:'):
expected_serie = expected_serie[3:]
expected_serie = expected_serie.replace(' ', '-')
container.register_property('series', expected_serie, enhance=True)
else:
expected_serie = re.escape(expected_serie)
container.register_property('series', expected_serie, enhance=False)
found = container.find_properties(string, node, options)
return container.as_guess(found, string)
def supported_properties(self):
return ['series']
def process(self, mtree, options=None):
GuessFinder(self.expected_series, None, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -0,0 +1,61 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.containers import PropertiesContainer
from guessit.matcher import GuessFinder
from guessit.plugins.transformers import Transformer
import re
class ExpectedTitle(Transformer):
def __init__(self):
Transformer.__init__(self, 225)
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
naming_opts.add_argument('-T', '--expected-title', action='append', dest='expected_title',
help='Expected title (can be used multiple times)')
def should_process(self, mtree, options=None):
return options and options.get('expected_title')
def expected_titles(self, string, node=None, options=None):
container = PropertiesContainer(enhance=True, canonical_from_pattern=False)
for expected_title in options.get('expected_title'):
if expected_title.startswith('re:'):
expected_title = expected_title[3:]
expected_title = expected_title.replace(' ', '-')
container.register_property('title', expected_title, enhance=True)
else:
expected_title = re.escape(expected_title)
container.register_property('title', expected_title, enhance=False)
found = container.find_properties(string, node, options)
return container.as_guess(found, string)
def supported_properties(self):
return ['title']
def process(self, mtree, options=None):
GuessFinder(self.expected_titles, None, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,16 +18,22 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit.transfo import found_property
import logging
from __future__ import absolute_import, division, print_function, unicode_literals
log = logging.getLogger(__name__)
from guessit.plugins.transformers import Transformer
from guessit.matcher import found_property
def process(mtree):
class GuessBonusFeatures(Transformer):
def __init__(self):
Transformer.__init__(self, -150)
def supported_properties(self):
return ['bonusNumber', 'bonusTitle', 'filmNumber', 'filmSeries', 'title', 'series']
def process(self, mtree, options=None):
def previous_group(g):
for leaf in mtree.unidentified_leaves()[::-1]:
for leaf in reversed(list(mtree.unidentified_leaves())):
if leaf.node_idx < g.node_idx:
return leaf
@ -39,23 +45,23 @@ def process(mtree):
def same_group(g1, g2):
return g1.node_idx[:2] == g2.node_idx[:2]
bonus = [ node for node in mtree.leaves() if 'bonusNumber' in node.guess ]
bonus = [node for node in mtree.leaves() if 'bonusNumber' in node.guess]
if bonus:
bonusTitle = next_group(bonus[0])
if same_group(bonusTitle, bonus[0]):
found_property(bonusTitle, 'bonusTitle', 0.8)
bonus_title = next_group(bonus[0])
if bonus_title and same_group(bonus_title, bonus[0]):
found_property(bonus_title, 'bonusTitle', confidence=0.8)
filmNumber = [ node for node in mtree.leaves()
if 'filmNumber' in node.guess ]
if filmNumber:
filmSeries = previous_group(filmNumber[0])
found_property(filmSeries, 'filmSeries', 0.9)
film_number = [node for node in mtree.leaves()
if 'filmNumber' in node.guess]
if film_number:
film_series = previous_group(film_number[0])
found_property(film_series, 'filmSeries', confidence=0.9)
title = next_group(filmNumber[0])
found_property(title, 'title', 0.9)
title = next_group(film_number[0])
found_property(title, 'title', confidence=0.9)
season = [ node for node in mtree.leaves() if 'season' in node.guess ]
season = [node for node in mtree.leaves() if 'season' in node.guess]
if season and 'bonusNumber' in mtree.info:
series = previous_group(season[0])
if same_group(series, season[0]):
found_property(series, 'series', 0.9)
found_property(series, 'series', confidence=0.9)

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,31 +18,107 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit.country import Country
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.plugins.transformers import Transformer
from babelfish import Country
from guessit import Guess
from guessit.textutils import iter_words
from guessit.matcher import GuessFinder, found_guess
from guessit.language import LNG_COMMON_WORDS
import babelfish
import logging
log = logging.getLogger(__name__)
# list of common words which could be interpreted as countries, but which
# are far too common to be able to say they represent a country
country_common_words = frozenset([ 'bt', 'bb' ])
def process(mtree):
for node in mtree.unidentified_leaves():
if len(node.node_idx) == 2:
c = node.value[1:-1].lower()
if c in country_common_words:
continue
class GuessCountry(Transformer):
def __init__(self):
Transformer.__init__(self, -170)
self.replace_language = frozenset(['uk'])
# only keep explicit groups (enclosed in parentheses/brackets)
if node.value[0] + node.value[-1] not in ['()', '[]', '{}']:
continue
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
naming_opts.add_argument('-C', '--allowed-country', action='append', dest='allowed_countries',
help='Allowed country (can be used multiple times)')
def supported_properties(self):
return ['country']
def should_process(self, mtree, options=None):
options = options or {}
return options.get('country', True)
def _scan_country(self, country, strict=False):
"""
Find a country if it is at the start or end of country string
"""
words_match = list(iter_words(country.lower()))
s = ""
start = None
for word_match in words_match:
if not start:
start = word_match.start(0)
s += word_match.group(0)
try:
country = Country(c, strict=True)
except ValueError:
return Country.fromguessit(s), (start, word_match.end(0))
except babelfish.Error:
continue
node.guess = Guess(country=country, confidence=1.0, raw=c)
words_match.reverse()
s = ""
end = None
for word_match in words_match:
if not end:
end = word_match.end(0)
s = word_match.group(0) + s
try:
return Country.fromguessit(s), (word_match.start(0), end)
except babelfish.Error:
continue
return Country.fromguessit(country), (start, end)
def is_valid_country(self, country, options=None):
if options and options.get('allowed_countries'):
allowed_countries = options.get('allowed_countries')
return country.name.lower() in allowed_countries or country.alpha2.lower() in allowed_countries
else:
return (country.name.lower() not in LNG_COMMON_WORDS and
country.alpha2.lower() not in LNG_COMMON_WORDS)
def guess_country(self, string, node=None, options=None):
c = string.strip().lower()
if c not in LNG_COMMON_WORDS:
try:
country, country_span = self._scan_country(c, True)
if self.is_valid_country(country, options):
guess = Guess(country=country, confidence=1.0, input=node.value, span=(country_span[0] + 1, country_span[1] + 1))
return guess
except babelfish.Error:
pass
return None, None
def process(self, mtree, options=None):
GuessFinder(self.guess_country, None, self.log, options).process_nodes(mtree.unidentified_leaves())
for node in mtree.leaves_containing('language'):
c = node.clean_value.lower()
if c in self.replace_language:
node.guess.set('language', None)
try:
country = Country.fromguessit(c)
if self.is_valid_country(country, options):
guess = Guess(country=country, confidence=0.9, input=node.value, span=node.span)
found_guess(node, guess, logger=log)
except babelfish.Error:
pass
def post_process(self, mtree, options=None, *args, **kwargs):
# if country is in the guessed properties, make it part of the series name
series_leaves = list(mtree.leaves_containing('series'))
country_leaves = list(mtree.leaves_containing('country'))
if series_leaves and country_leaves:
country_leaf = country_leaves[0]
for serie_leaf in series_leaves:
serie_leaf.guess['series'] += ' (%s)' % str(country_leaf.guess['country'].guessit)

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,21 +18,32 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit.transfo import SingleNodeGuesser
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder
from guessit.date import search_date
import logging
log = logging.getLogger(__name__)
def guess_date(string):
date, span = search_date(string)
class GuessDate(Transformer):
def __init__(self):
Transformer.__init__(self, 50)
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
naming_opts.add_argument('-Y', '--date-year-first', action='store_true', dest='date_year_first', default=None,
help='If short date is found, consider the first digits as the year.')
naming_opts.add_argument('-D', '--date-day-first', action='store_true', dest='date_day_first', default=None,
help='If short date is found, consider the second digits as the day.')
def supported_properties(self):
return ['date']
def guess_date(self, string, node=None, options=None):
date, span = search_date(string, options.get('date_year_first') if options else False, options.get('date_day_first') if options else False)
if date:
return { 'date': date }, span
return {'date': date}, span
else:
return None, None
def process(mtree):
SingleNodeGuesser(guess_date, 1.0, log).process(mtree)
def process(self, mtree, options=None):
GuessFinder(self.guess_date, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -0,0 +1,64 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.plugins.transformers import Transformer
from guessit.matcher import found_guess
from guessit.containers import PropertiesContainer
import itertools
class GuessEpisodeDetails(Transformer):
def __init__(self):
Transformer.__init__(self, -205)
self.container = PropertiesContainer()
self.container.register_property('episodeDetails', 'Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired')
self.container.register_property('episodeDetails', 'Extras?', canonical_form='Extras')
def guess_details(self, string, node=None, options=None):
properties = self.container.find_properties(string, node, options, 'episodeDetails', multiple=True)
guesses = self.container.as_guess(properties, multiple=True)
return guesses
def second_pass_options(self, mtree, options=None):
if not mtree.guess.get('type', '').startswith('episode'):
for unidentified_leaf in mtree.unidentified_leaves():
properties = self.container.find_properties(unidentified_leaf.value, unidentified_leaf, options, 'episodeDetails')
guess = self.container.as_guess(properties)
if guess:
return {'type': 'episode'}
return None
def supported_properties(self):
return self.container.get_supported_properties()
def process(self, mtree, options=None):
if (mtree.guess.get('type', '').startswith('episode') and
(not mtree.info.get('episodeNumber') or
mtree.info.get('season') == 0)):
for leaf in itertools.chain(mtree.leaves_containing('title'),
mtree.unidentified_leaves()):
guesses = self.guess_details(leaf.value, leaf, options)
for guess in guesses:
found_guess(leaf, guess, update_guess=False)
return None

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,38 +18,53 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit.transfo import found_property
from guessit.patterns import non_episode_title, unlikely_series
import logging
from __future__ import absolute_import, division, print_function, unicode_literals
log = logging.getLogger(__name__)
from guessit.plugins.transformers import Transformer, get_transformer
from guessit.textutils import reorder_title
from guessit.matcher import found_property
def match_from_epnum_position(mtree, node):
class GuessEpisodeInfoFromPosition(Transformer):
def __init__(self):
Transformer.__init__(self, -200)
def supported_properties(self):
return ['title', 'series']
def match_from_epnum_position(self, mtree, node, options):
epnum_idx = node.node_idx
# a few helper functions to be able to filter using high-level semantics
def before_epnum_in_same_pathgroup():
return [ leaf for leaf in mtree.unidentified_leaves()
return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
if (leaf.node_idx[0] == epnum_idx[0] and
leaf.node_idx[1:] < epnum_idx[1:]) ]
leaf.node_idx[1:] < epnum_idx[1:])]
def after_epnum_in_same_pathgroup():
return [ leaf for leaf in mtree.unidentified_leaves()
return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
if (leaf.node_idx[0] == epnum_idx[0] and
leaf.node_idx[1:] > epnum_idx[1:]) ]
leaf.node_idx[1:] > epnum_idx[1:])]
def after_epnum_in_same_explicitgroup():
return [ leaf for leaf in mtree.unidentified_leaves()
return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
if (leaf.node_idx[:2] == epnum_idx[:2] and
leaf.node_idx[2:] > epnum_idx[2:]) ]
leaf.node_idx[2:] > epnum_idx[2:])]
# epnumber is the first group and there are only 2 after it in same
# path group
# -> series title - episode title
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
if n.clean_value.lower() not in non_episode_title ]
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
if ('title' not in mtree.info and # no title
'series' in mtree.info and # series present
before_epnum_in_same_pathgroup() == [] and # no groups before
len(title_candidates) == 1): # only 1 group after
found_property(title_candidates[0], 'title', confidence=0.4)
return
if ('title' not in mtree.info and # no title
before_epnum_in_same_pathgroup() == [] and # no groups before
len(title_candidates) == 2): # only 2 groups after
@ -65,18 +80,14 @@ def match_from_epnum_position(mtree, node):
found_property(series_candidates[0], 'series', confidence=0.7)
# only 1 group after (in the same path group) and it's probably the
# episode title
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
if n.clean_value.lower() not in non_episode_title ]
# episode title.
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
if len(title_candidates) == 1:
found_property(title_candidates[0], 'title', confidence=0.5)
return
else:
# try in the same explicit group, with lower confidence
title_candidates = [ n for n in after_epnum_in_same_explicitgroup()
if n.clean_value.lower() not in non_episode_title
]
title_candidates = self._filter_candidates(after_epnum_in_same_explicitgroup(), options)
if len(title_candidates) == 1:
found_property(title_candidates[0], 'title', confidence=0.4)
return
@ -85,8 +96,7 @@ def match_from_epnum_position(mtree, node):
return
# get the one with the longest value
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
if n.clean_value.lower() not in non_episode_title ]
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
if title_candidates:
maxidx = -1
maxv = -1
@ -96,51 +106,76 @@ def match_from_epnum_position(mtree, node):
maxv = len(c.clean_value)
found_property(title_candidates[maxidx], 'title', confidence=0.3)
def should_process(self, mtree, options=None):
options = options or {}
return not options.get('skip_title') and mtree.guess.get('type', '').startswith('episode')
def process(mtree):
def _filter_candidates(self, candidates, options):
episode_details_transformer = get_transformer('guess_episode_details')
if episode_details_transformer:
return [n for n in candidates if not episode_details_transformer.container.find_properties(n.value, n, options, re_match=True)]
else:
return candidates
def process(self, mtree, options=None):
"""
try to identify the remaining unknown groups by looking at their
position relative to other known elements
"""
eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
if not eps:
eps = [node for node in mtree.leaves() if 'date' in node.guess]
if eps:
match_from_epnum_position(mtree, eps[0])
self.match_from_epnum_position(mtree, eps[0], options)
else:
# if we don't have the episode number, but at least 2 groups in the
# basename, then it's probably series - eptitle
basename = mtree.node_at((-2,))
title_candidates = [ n for n in basename.unidentified_leaves()
if n.clean_value.lower() not in non_episode_title
]
if len(title_candidates) >= 2:
found_property(title_candidates[0], 'series', 0.4)
found_property(title_candidates[1], 'title', 0.4)
title_candidates = self._filter_candidates(basename.unidentified_leaves(), options)
if len(title_candidates) >= 2 and 'series' not in mtree.info:
found_property(title_candidates[0], 'series', confidence=0.4)
found_property(title_candidates[1], 'title', confidence=0.4)
elif len(title_candidates) == 1:
# but if there's only one candidate, it's probably the series name
found_property(title_candidates[0], 'series', 0.4)
found_property(title_candidates[0], 'series' if 'series' not in mtree.info else 'title', confidence=0.4)
# if we only have 1 remaining valid group in the folder containing the
# file, then it's likely that it is the series name
try:
series_candidates = mtree.node_at((-3,)).unidentified_leaves()
series_candidates = list(mtree.node_at((-3,)).unidentified_leaves())
except ValueError:
series_candidates = []
if len(series_candidates) == 1:
found_property(series_candidates[0], 'series', 0.3)
found_property(series_candidates[0], 'series', confidence=0.3)
# if there's a path group that only contains the season info, then the
# previous one is most likely the series title (ie: ../series/season X/..)
eps = [ node for node in mtree.nodes()
if 'season' in node.guess and 'episodeNumber' not in node.guess ]
eps = [node for node in mtree.nodes()
if 'season' in node.guess and 'episodeNumber' not in node.guess]
if eps:
previous = [ node for node in mtree.unidentified_leaves()
if node.node_idx[0] == eps[0].node_idx[0] - 1 ]
previous = [node for node in mtree.unidentified_leaves()
if node.node_idx[0] == eps[0].node_idx[0] - 1]
if len(previous) == 1:
found_property(previous[0], 'series', 0.5)
found_property(previous[0], 'series', confidence=0.5)
# reduce the confidence of unlikely series
# If we have found title without any serie name, replace it by the serie name.
if 'series' not in mtree.info and 'title' in mtree.info:
title_leaf = mtree.first_leaf_containing('title')
metadata = title_leaf.guess.metadata('title')
value = title_leaf.guess['title']
del title_leaf.guess['title']
title_leaf.guess.set('series', value, metadata=metadata)
def post_process(self, mtree, options=None):
for node in mtree.nodes():
if 'series' in node.guess:
if node.guess['series'].lower() in unlikely_series:
new_confidence = node.guess.confidence('series') * 0.5
node.guess.set_confidence('series', new_confidence)
if 'series' not in node.guess:
continue
node.guess['series'] = reorder_title(node.guess['series'])

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,49 +18,176 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import Guess
from guessit.transfo import SingleNodeGuesser
from guessit.patterns import episode_rexps
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder
from guessit.patterns import sep, build_or_pattern
from guessit.containers import PropertiesContainer, WeakValidator, NoValidator, ChainedValidator, DefaultValidator, \
FormatterValidator
from guessit.patterns.numeral import numeral, digital_numeral, parse_numeral
import re
import logging
log = logging.getLogger(__name__)
def number_list(s):
l = [ int(n) for n in re.sub('[^0-9]+', ' ', s).split() ]
if len(l) == 2:
# it is an episode interval, return all numbers in between
return range(l[0], l[1]+1)
return l
def guess_episodes_rexps(string):
for rexp, confidence, span_adjust in episode_rexps:
match = re.search(rexp, string, re.IGNORECASE)
if match:
span = (match.start() + span_adjust[0],
match.end() + span_adjust[1])
guess = Guess(match.groupdict(), confidence=confidence, raw=string[span[0]:span[1]])
# decide whether we have only a single episode number or an
# episode list
if guess.get('episodeNumber'):
eplist = number_list(guess['episodeNumber'])
guess.set('episodeNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]])
if len(eplist) > 1:
guess.set('episodeList', eplist, confidence=confidence, raw=string[span[0]:span[1]])
if guess.get('bonusNumber'):
eplist = number_list(guess['bonusNumber'])
guess.set('bonusNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]])
return guess, span
return None, None
def process(mtree):
SingleNodeGuesser(guess_episodes_rexps, None, log).process(mtree)
class GuessEpisodesRexps(Transformer):
def __init__(self):
Transformer.__init__(self, 20)
range_separators = ['-', 'to', 'a']
discrete_separators = ['&', 'and', 'et']
of_separators = ['of', 'sur', '/', '\\']
season_words = ['seasons?', 'saisons?', 'series?']
episode_words = ['episodes?']
season_markers = ['s']
episode_markers = ['e', 'ep']
discrete_sep = sep
for range_separator in range_separators:
discrete_sep = discrete_sep.replace(range_separator, '')
discrete_separators.append(discrete_sep)
all_separators = list(range_separators)
all_separators.extend(discrete_separators)
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE)
discrete_separators_re = re.compile(build_or_pattern(discrete_separators), re.IGNORECASE)
all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE)
of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)
season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE)
episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE)
season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE)
episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE)
def list_parser(value, property_list_name, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False):
discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value))
discrete_elements = [x.strip() for x in discrete_elements]
proper_discrete_elements = []
i = 0
while i < len(discrete_elements):
if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]):
proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2])
i += 3
else:
match = range_separators_re.search(discrete_elements[i])
if match and match.start() == 0:
proper_discrete_elements[i-1] = proper_discrete_elements[i-1] + discrete_elements[i]
elif match and match.end() == len(discrete_elements[i]):
proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1])
else:
proper_discrete_elements.append(discrete_elements[i])
i += 1
discrete_elements = proper_discrete_elements
ret = []
for discrete_element in discrete_elements:
range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element))
range_values = [x.strip() for x in range_values]
if len(range_values) > 1:
for x in range(0, len(range_values) - 1):
start_range_ep = parse_numeral(range_values[x])
end_range_ep = parse_numeral(range_values[x+1])
for range_ep in range(start_range_ep, end_range_ep + 1):
if range_ep not in ret:
ret.append(range_ep)
else:
discrete_value = parse_numeral(discrete_element)
if discrete_value not in ret:
ret.append(discrete_value)
if len(ret) > 1:
if not allow_discrete:
valid_ret = list()
# replace discrete elements by ranges
valid_ret.append(ret[0])
for i in range(0, len(ret) - 1):
previous = valid_ret[len(valid_ret) - 1]
if ret[i+1] < previous:
pass
else:
valid_ret.append(ret[i+1])
ret = valid_ret
if fill_gaps:
ret = list(range(min(ret), max(ret) + 1))
if len(ret) > 1:
return {None: ret[0], property_list_name: ret}
if len(ret) > 0:
return ret[0]
return None
def episode_parser_x(value):
return list_parser(value, 'episodeList', discrete_separators_re=re.compile('x', re.IGNORECASE))
def episode_parser_e(value):
return list_parser(value, 'episodeList', discrete_separators_re=re.compile('e', re.IGNORECASE), fill_gaps=True)
def episode_parser(value):
return list_parser(value, 'episodeList')
def season_parser(value):
return list_parser(value, 'seasonList')
class ResolutionCollisionValidator(object):
def validate(self, prop, string, node, match, entry_start, entry_end):
return len(match.group(2)) < 3 # limit
self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral)
self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={None: parse_numeral, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), FormatterValidator('season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False)))
self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser}, validator=NoValidator())
# self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
self.container.register_property(None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
self.container.register_property(None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'season': season_parser}, validator=NoValidator())
self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral)
self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral)
self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral)
self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral)
self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral)
self.container.register_property('episodeNumber', r'^ ?0(\d{1,2})' + sep, confidence=0.4, formatter=parse_numeral)
self.container.register_property('episodeNumber', sep + r'(\d{2}) ?$', confidence=0.4, formatter=parse_numeral)
self.container.register_property('episodeNumber', sep + r'0(\d{1,2}) ?$', confidence=0.4, formatter=parse_numeral)
self.container.register_property(None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral)
self.container.register_property(None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
self.container.register_property(None, r'((?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral)
self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())
self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<other>xAll))', confidence=1.0, formatter={None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number', default=False,
help='Guess "serie.213.avi" as the episodeNumber 213. Without this option, '
'it will be guessed as season 2, episodeNumber 13')
def supported_properties(self):
return ['episodeNumber', 'season', 'episodeList', 'seasonList', 'episodeCount', 'seasonCount', 'version', 'other']
def guess_episodes_rexps(self, string, node=None, options=None):
found = self.container.find_properties(string, node, options)
return self.container.as_guess(found, string)
def should_process(self, mtree, options=None):
return mtree.guess.get('type', '').startswith('episode')
def process(self, mtree, options=None):
GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,173 +18,196 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import Guess
from guessit.patterns import (subtitle_exts, info_exts, video_exts, episode_rexps,
find_properties, compute_canonical_form)
from guessit.date import valid_year
from guessit.textutils import clean_string
from __future__ import absolute_import, division, print_function, unicode_literals
import mimetypes
import os.path
import re
import mimetypes
import logging
log = logging.getLogger(__name__)
from guessit.guess import Guess
from guessit.patterns.extension import subtitle_exts, info_exts, video_exts
from guessit.transfo import TransformerException
from guessit.plugins.transformers import Transformer, get_transformer
from guessit.matcher import log_found_guess, found_guess, found_property
# List of well known movies and series, hardcoded because they cannot be
# guessed appropriately otherwise
MOVIES = [ 'OSS 117' ]
SERIES = [ 'Band of Brothers' ]
MOVIES = [ m.lower() for m in MOVIES ]
SERIES = [ s.lower() for s in SERIES ]
class GuessFiletype(Transformer):
def __init__(self):
Transformer.__init__(self, 200)
# List of well known movies and series, hardcoded because they cannot be
# guessed appropriately otherwise
MOVIES = ['OSS 117']
SERIES = ['Band of Brothers']
MOVIES = [m.lower() for m in MOVIES]
SERIES = [s.lower() for s in SERIES]
def guess_filetype(self, mtree, options=None):
options = options or {}
def guess_filetype(mtree, filetype):
# put the filetype inside a dummy container to be able to have the
# following functions work correctly as closures
# this is a workaround for python 2 which doesn't have the
# 'nonlocal' keyword (python 3 does have it)
filetype_container = [filetype]
# 'nonlocal' keyword which we could use here in the upgrade_* functions
# (python 3 does have it)
filetype_container = [mtree.guess.get('type')]
other = {}
filename = mtree.string
def upgrade_episode():
if filetype_container[0] == 'video':
filetype_container[0] = 'episode'
elif filetype_container[0] == 'subtitle':
if filetype_container[0] == 'subtitle':
filetype_container[0] = 'episodesubtitle'
elif filetype_container[0] == 'info':
filetype_container[0] = 'episodeinfo'
elif (not filetype_container[0] or
filetype_container[0] == 'video'):
filetype_container[0] = 'episode'
def upgrade_movie():
if filetype_container[0] == 'video':
filetype_container[0] = 'movie'
elif filetype_container[0] == 'subtitle':
if filetype_container[0] == 'subtitle':
filetype_container[0] = 'moviesubtitle'
elif filetype_container[0] == 'info':
filetype_container[0] = 'movieinfo'
elif (not filetype_container[0] or
filetype_container[0] == 'video'):
filetype_container[0] = 'movie'
def upgrade_subtitle():
if 'movie' in filetype_container[0]:
if filetype_container[0] == 'movie':
filetype_container[0] = 'moviesubtitle'
elif 'episode' in filetype_container[0]:
elif filetype_container[0] == 'episode':
filetype_container[0] = 'episodesubtitle'
else:
elif not filetype_container[0]:
filetype_container[0] = 'subtitle'
def upgrade_info():
if 'movie' in filetype_container[0]:
if filetype_container[0] == 'movie':
filetype_container[0] = 'movieinfo'
elif 'episode' in filetype_container[0]:
elif filetype_container[0] == 'episode':
filetype_container[0] = 'episodeinfo'
else:
elif not filetype_container[0]:
filetype_container[0] = 'info'
def upgrade(type='unknown'):
if filetype_container[0] == 'autodetect':
filetype_container[0] = type
# look at the extension first
fileext = os.path.splitext(filename)[1][1:].lower()
if fileext in subtitle_exts:
upgrade_subtitle()
other = { 'container': fileext }
other = {'container': fileext}
elif fileext in info_exts:
upgrade_info()
other = { 'container': fileext }
other = {'container': fileext}
elif fileext in video_exts:
upgrade(type='video')
other = { 'container': fileext }
other = {'container': fileext}
else:
upgrade(type='unknown')
other = { 'extension': fileext }
if fileext and not options.get('name_only'):
other = {'extension': fileext}
list(mtree.unidentified_leaves())[-1].guess = Guess(other)
# check whether we are in a 'Movies', 'Tv Shows', ... folder
folder_rexps = [ (r'Movies?', upgrade_movie),
folder_rexps = [(r'Movies?', upgrade_movie),
(r'Films?', upgrade_movie),
(r'Tv[ _-]?Shows?', upgrade_episode),
(r'Series', upgrade_episode)
]
(r'Series?', upgrade_episode),
(r'Episodes?', upgrade_episode)]
for frexp, upgrade_func in folder_rexps:
frexp = re.compile(frexp, re.IGNORECASE)
for pathgroup in mtree.children:
if frexp.match(pathgroup.value):
upgrade_func()
return filetype_container[0], other
# check for a few specific cases which will unintentionally make the
# following heuristics confused (eg: OSS 117 will look like an episode,
# season 1, epnum 17, when it is in fact a movie)
fname = clean_string(filename).lower()
for m in MOVIES:
fname = mtree.clean_string(filename).lower()
for m in self.MOVIES:
if m in fname:
log.debug('Found in exception list of movies -> type = movie')
self.log.debug('Found in exception list of movies -> type = movie')
upgrade_movie()
for s in SERIES:
return filetype_container[0], other
for s in self.SERIES:
if s in fname:
log.debug('Found in exception list of series -> type = episode')
self.log.debug('Found in exception list of series -> type = episode')
upgrade_episode()
return filetype_container[0], other
# now look whether there are some specific hints for episode vs movie
if filetype_container[0] in ('video', 'subtitle', 'info'):
# if we have an episode_rexp (eg: s02e13), it is an episode
for rexp, _, _ in episode_rexps:
match = re.search(rexp, filename, re.IGNORECASE)
if match:
log.debug('Found matching regexp: "%s" (string = "%s") -> type = episode', rexp, match.group())
upgrade_episode()
break
# if we have a 3-4 digit number that's not a year, maybe an episode
match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename)
if match:
fullnumber = int(match.group()[1:-1])
#season = fullnumber // 100
epnumber = fullnumber % 100
possible = True
# check for validity
if epnumber > 40:
possible = False
if valid_year(fullnumber):
possible = False
if possible:
log.debug('Found possible episode number: %s (from string "%s") -> type = episode', epnumber, match.group())
episode_transformer = get_transformer('guess_episodes_rexps')
if episode_transformer:
filename_parts = list(x.value for x in mtree.unidentified_leaves());
filename_parts.append(filename)
for filename_part in filename_parts:
guess = episode_transformer.guess_episodes_rexps(filename_part)
if guess:
self.log.debug('Found guess_episodes_rexps: %s -> type = episode', guess)
upgrade_episode()
return filetype_container[0], other
properties_transformer = get_transformer('guess_properties')
if properties_transformer:
# if we have certain properties characteristic of episodes, it is an ep
for prop, value, _, _ in find_properties(filename):
log.debug('prop: %s = %s' % (prop, value))
if prop == 'episodeFormat':
log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
found = properties_transformer.container.find_properties(filename, mtree, options, 'episodeFormat')
guess = properties_transformer.container.as_guess(found, filename)
if guess:
self.log.debug('Found characteristic property of episodes: %s"', guess)
upgrade_episode()
break
return filetype_container[0], other
elif compute_canonical_form('format', value) == 'DVB':
log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
weak_episode_transformer = get_transformer('guess_weak_episodes_rexps')
if weak_episode_transformer:
found = properties_transformer.container.find_properties(filename, mtree, options, 'crc32')
guess = properties_transformer.container.as_guess(found, filename)
if guess:
found = weak_episode_transformer.container.find_properties(filename, mtree, options)
guess = weak_episode_transformer.container.as_guess(found, filename)
if guess:
self.log.debug('Found characteristic property of episodes: %s"', guess)
upgrade_episode()
break
return filetype_container[0], other
found = properties_transformer.container.find_properties(filename, mtree, options, 'format')
guess = properties_transformer.container.as_guess(found, filename)
if guess and guess['format'] in ('HDTV', 'WEBRip', 'WEB-DL', 'DVB'):
# Use weak episodes only if TV or WEB source
weak_episode_transformer = get_transformer('guess_weak_episodes_rexps')
if weak_episode_transformer:
guess = weak_episode_transformer.guess_weak_episodes_rexps(filename)
if guess:
self.log.debug('Found guess_weak_episodes_rexps: %s -> type = episode', guess)
upgrade_episode()
return filetype_container[0], other
website_transformer = get_transformer('guess_website')
if website_transformer:
found = website_transformer.container.find_properties(filename, mtree, options, 'website')
guess = website_transformer.container.as_guess(found, filename)
if guess:
for namepart in ('tv', 'serie', 'episode'):
if namepart in guess['website']:
# origin-specific type
if 'tvu.org.ru' in filename:
log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
self.log.debug('Found characteristic property of episodes: %s', guess)
upgrade_episode()
return filetype_container[0], other
if filetype_container[0] in ('subtitle', 'info') or (not filetype_container[0] and fileext in video_exts):
# if no episode info found, assume it's a movie
log.debug('Nothing characteristic found, assuming type = movie')
self.log.debug('Nothing characteristic found, assuming type = movie')
upgrade_movie()
filetype = filetype_container[0]
return filetype, other
if not filetype_container[0]:
self.log.debug('Nothing characteristic found, assuming type = unknown')
filetype_container[0] = 'unknown'
return filetype_container[0], other
def process(mtree, filetype='autodetect'):
filetype, other = guess_filetype(mtree, filetype)
def process(self, mtree, options=None):
"""guess the file type now (will be useful later)
"""
filetype, other = self.guess_filetype(mtree, options)
mtree.guess.set('type', filetype, confidence=1.0)
log.debug('Found with confidence %.2f: %s' % (1.0, mtree.guess))
log_found_guess(mtree.guess)
filetype_info = Guess(other, confidence=1.0)
# guess the mimetype of the filename
@ -195,5 +218,20 @@ def process(mtree, filetype='autodetect'):
filetype_info.update({'mimetype': mime}, confidence=1.0)
node_ext = mtree.node_at((-1,))
node_ext.guess = filetype_info
log.debug('Found with confidence %.2f: %s' % (1.0, node_ext.guess))
found_guess(node_ext, filetype_info)
if mtree.guess.get('type') in [None, 'unknown']:
if options.get('name_only'):
mtree.guess.set('type', 'movie', confidence=0.6)
else:
raise TransformerException(__name__, 'Unknown file type')
def post_process(self, mtree, options=None):
# now look whether there are some specific hints for episode vs movie
# If we have a date and no year, this is a TV Show.
if 'date' in mtree.info and 'year' not in mtree.info and mtree.info.get('type') != 'episode':
mtree.guess['type'] = 'episode'
for type_leaves in mtree.leaves_containing('type'):
type_leaves.guess['type'] = 'episode'
for title_leaves in mtree.leaves_containing('title'):
title_leaves.guess.rename('title', 'series')

View file

@ -18,40 +18,47 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit.transfo import SingleNodeGuesser
from guessit.patterns import find_properties
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder
import re
import logging
log = logging.getLogger(__name__)
_DIGIT = 0
_LETTER = 1
_OTHER = 2
def guess_properties(string):
try:
prop, value, pos, end = find_properties(string)[0]
return { prop: value }, (pos, end)
except IndexError:
return None, None
class GuessIdnumber(Transformer):
def __init__(self):
Transformer.__init__(self, 220)
_idnum = re.compile(r'(?P<idNumber>[a-zA-Z0-9-]{10,})') # 1.0, (0, 0))
def supported_properties(self):
return ['idNumber']
def guess_idnumber(string):
match = _idnum.search(string)
_idnum = re.compile(r'(?P<idNumber>[a-zA-Z0-9-]{20,})') # 1.0, (0, 0))
def guess_idnumber(self, string, node=None, options=None):
match = self._idnum.search(string)
if match is not None:
result = match.groupdict()
switch_count = 0
DIGIT = 0
LETTER = 1
OTHER = 2
last = LETTER
switch_letter_count = 0;
letter_count = 0;
last_letter = None
last = _LETTER
for c in result['idNumber']:
if c in '0123456789':
ci = DIGIT
ci = _DIGIT
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
ci = LETTER
ci = _LETTER
if c != last_letter:
switch_letter_count += 1
last_letter = c
letter_count += 1
else:
ci = OTHER
ci = _OTHER
if ci != last:
switch_count += 1
@ -59,13 +66,14 @@ def guess_idnumber(string):
last = ci
switch_ratio = float(switch_count) / len(result['idNumber'])
letters_ratio = (float(switch_letter_count) / letter_count) if letter_count > 0 else 1
# only return the result as probable if we alternate often between
# char type (more likely for hash values than for common words)
if switch_ratio > 0.4:
if switch_ratio > 0.4 and letters_ratio > 0.4:
return result, match.span()
return None, None
def process(mtree):
SingleNodeGuesser(guess_idnumber, 0.4, log).process(mtree)
def process(self, mtree, options=None):
GuessFinder(self.guess_idnumber, 0.4, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,38 +18,169 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import Guess
from guessit.transfo import SingleNodeGuesser
from guessit.language import search_language
import logging
from __future__ import absolute_import, division, print_function, unicode_literals
log = logging.getLogger(__name__)
from guessit.language import search_language, subtitle_prefixes, subtitle_suffixes
from guessit.patterns.extension import subtitle_exts
from guessit.textutils import find_words
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder
def guess_language(string, node, skip=None):
if skip:
relative_skip = []
for entry in skip:
node_idx = entry['node_idx']
span = entry['span']
if node_idx == node.node_idx[:len(node_idx)]:
relative_span = (span[0] - node.offset + 1, span[1] - node.offset + 1)
relative_skip.append(relative_span)
skip = relative_skip
class GuessLanguage(Transformer):
def __init__(self):
Transformer.__init__(self, 30)
language, span, confidence = search_language(string, skip=skip)
if language:
return (Guess({'language': language},
confidence=confidence,
raw= string[span[0]:span[1]]),
span)
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
naming_opts.add_argument('-L', '--allowed-languages', action='append', dest='allowed_languages',
help='Allowed language (can be used multiple times)')
return None, None
def supported_properties(self):
return ['language', 'subtitleLanguage']
guess_language.use_node = True
def guess_language(self, string, node=None, options=None):
allowed_languages = None
if options and 'allowed_languages' in options:
allowed_languages = options.get('allowed_languages')
guess = search_language(string, allowed_languages)
return guess
def _skip_language_on_second_pass(self, mtree, node):
"""Check if found node is a valid language node, or if it's a false positive.
def process(mtree, *args, **kwargs):
SingleNodeGuesser(guess_language, None, log, *args, **kwargs).process(mtree)
# Note: 'language' is promoted to 'subtitleLanguage' in the post_process transfo
:param mtree: Tree detected on first pass.
:type mtree: :class:`guessit.matchtree.MatchTree`
:param node: Node that contains a language Guess
:type node: :class:`guessit.matchtree.MatchTree`
:return: True if a second pass skipping this node is required
:rtype: bool
"""
unidentified_starts = {}
unidentified_ends = {}
property_starts = {}
property_ends = {}
title_starts = {}
title_ends = {}
for unidentified_node in mtree.unidentified_leaves():
unidentified_starts[unidentified_node.span[0]] = unidentified_node
unidentified_ends[unidentified_node.span[1]] = unidentified_node
for property_node in mtree.leaves_containing('year'):
property_starts[property_node.span[0]] = property_node
property_ends[property_node.span[1]] = property_node
for title_node in mtree.leaves_containing(['title', 'series']):
title_starts[title_node.span[0]] = title_node
title_ends[title_node.span[1]] = title_node
return node.span[0] in title_ends.keys() and (node.span[1] in unidentified_starts.keys() or node.span[1] + 1 in property_starts.keys()) or\
node.span[1] in title_starts.keys() and (node.span[0] == node.group_node().span[0] or node.span[0] in unidentified_ends.keys() or node.span[0] in property_ends.keys())
def second_pass_options(self, mtree, options=None):
m = mtree.matched()
to_skip_language_nodes = []
for lang_key in ('language', 'subtitleLanguage'):
langs = {}
lang_nodes = set(mtree.leaves_containing(lang_key))
for lang_node in lang_nodes:
lang = lang_node.guess.get(lang_key, None)
if self._skip_language_on_second_pass(mtree, lang_node):
# Language probably split the title. Add to skip for 2nd pass.
# if filetype is subtitle and the language appears last, just before
# the extension, then it is likely a subtitle language
parts = mtree.clean_string(lang_node.root.value).split()
if m.get('type') in ['moviesubtitle', 'episodesubtitle']:
if lang_node.value in parts and \
(parts.index(lang_node.value) == len(parts) - 2):
continue
to_skip_language_nodes.append(lang_node)
elif lang not in langs:
langs[lang] = lang_node
else:
# The same language was found. Keep the more confident one,
# and add others to skip for 2nd pass.
existing_lang_node = langs[lang]
to_skip = None
if (existing_lang_node.guess.confidence('language') >=
lang_node.guess.confidence('language')):
# lang_node is to remove
to_skip = lang_node
else:
# existing_lang_node is to remove
langs[lang] = lang_node
to_skip = existing_lang_node
to_skip_language_nodes.append(to_skip)
if to_skip_language_nodes:
# Also skip same value nodes
skipped_values = [skip_node.value for skip_node in to_skip_language_nodes]
for lang_key in ('language', 'subtitleLanguage'):
lang_nodes = set(mtree.leaves_containing(lang_key))
for lang_node in lang_nodes:
if lang_node not in to_skip_language_nodes and lang_node.value in skipped_values:
to_skip_language_nodes.append(lang_node)
return {'skip_nodes': to_skip_language_nodes}
return None
def should_process(self, mtree, options=None):
options = options or {}
return options.get('language', True)
def process(self, mtree, options=None):
GuessFinder(self.guess_language, None, self.log, options).process_nodes(mtree.unidentified_leaves())
def promote_subtitle(self, node):
if 'language' in node.guess:
node.guess.set('subtitleLanguage', node.guess['language'],
confidence=node.guess.confidence('language'))
del node.guess['language']
def post_process(self, mtree, options=None):
# 1- try to promote language to subtitle language where it makes sense
for node in mtree.nodes():
if 'language' not in node.guess:
continue
# - if we matched a language in a file with a sub extension and that
# the group is the last group of the filename, it is probably the
# language of the subtitle
# (eg: 'xxx.english.srt')
if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
node == list(mtree.leaves())[-2]):
self.promote_subtitle(node)
# - if we find in the same explicit group
# a subtitle prefix before the language,
# or a subtitle suffix after the language,
# then upgrade the language
explicit_group = mtree.node_at(node.node_idx[:2])
group_str = explicit_group.value.lower()
for sub_prefix in subtitle_prefixes:
if (sub_prefix in find_words(group_str) and
0 <= group_str.find(sub_prefix) < (node.span[0] - explicit_group.span[0])):
self.promote_subtitle(node)
for sub_suffix in subtitle_suffixes:
if (sub_suffix in find_words(group_str) and
(node.span[0] - explicit_group.span[0]) < group_str.find(sub_suffix)):
self.promote_subtitle(node)
# - if a language is in an explicit group just preceded by "st",
# it is a subtitle language (eg: '...st[fr-eng]...')
try:
idx = node.node_idx
previous = list(mtree.node_at((idx[0], idx[1] - 1)).leaves())[-1]
if previous.value.lower()[-2:] == 'st':
self.promote_subtitle(node)
except IndexError:
pass

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,45 +18,51 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import Guess
import unicodedata
import logging
from __future__ import absolute_import, division, print_function, unicode_literals
log = logging.getLogger(__name__)
from guessit.plugins.transformers import Transformer
from guessit.matcher import found_property
from guessit import u
def process(mtree):
def found_property(node, name, value, confidence):
node.guess = Guess({ name: value },
confidence=confidence,
raw=value)
log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
class GuessMovieTitleFromPosition(Transformer):
def __init__(self):
Transformer.__init__(self, -200)
def found_title(node, confidence):
found_property(node, 'title', node.clean_value, confidence)
def supported_properties(self):
return ['title']
def should_process(self, mtree, options=None):
options = options or {}
return not options.get('skip_title') and not mtree.guess.get('type', '').startswith('episode')
def process(self, mtree, options=None):
"""
try to identify the remaining unknown groups by looking at their
position relative to other known elements
"""
if 'title' in mtree.info:
return
basename = mtree.node_at((-2,))
all_valid = lambda leaf: len(leaf.clean_value) > 0
basename_leftover = basename.unidentified_leaves(valid=all_valid)
basename_leftover = list(basename.unidentified_leaves(valid=all_valid))
try:
folder = mtree.node_at((-3,))
folder_leftover = folder.unidentified_leaves()
folder_leftover = list(folder.unidentified_leaves())
except ValueError:
folder = None
folder_leftover = []
log.debug('folder: %s' % folder_leftover)
log.debug('basename: %s' % basename_leftover)
self.log.debug('folder: %s' % u(folder_leftover))
self.log.debug('basename: %s' % u(basename_leftover))
# specific cases:
# if we find the same group both in the folder name and the filename,
# it's a good candidate for title
if (folder_leftover and basename_leftover and
folder_leftover[0].clean_value == basename_leftover[0].clean_value):
found_title(folder_leftover[0], confidence=0.8)
if folder_leftover and basename_leftover and folder_leftover[0].clean_value == basename_leftover[0].clean_value:
found_property(folder_leftover[0], 'title', confidence=0.8)
return
# specific cases:
@ -64,61 +70,52 @@ def process(mtree):
# group, and the folder only contains 1 unidentified one, then we have
# a series
# ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
try:
if len(folder_leftover) > 0 and len(basename_leftover) > 1:
series = folder_leftover[0]
filmNumber = basename_leftover[0]
film_number = basename_leftover[0]
title = basename_leftover[1]
basename_leaves = basename.leaves()
basename_leaves = list(basename.leaves())
num = int(filmNumber.clean_value)
num = None
try:
num = int(film_number.clean_value)
except ValueError:
pass
log.debug('series: %s' % series.clean_value)
log.debug('title: %s' % title.clean_value)
if num:
self.log.debug('series: %s' % series.clean_value)
self.log.debug('title: %s' % title.clean_value)
if (series.clean_value != title.clean_value and
series.clean_value != filmNumber.clean_value and
basename_leaves.index(filmNumber) == 0 and
series.clean_value != film_number.clean_value and
basename_leaves.index(film_number) == 0 and
basename_leaves.index(title) == 1):
found_title(title, confidence=0.6)
found_property(series, 'filmSeries',
series.clean_value, confidence=0.6)
found_property(filmNumber, 'filmNumber',
num, confidence=0.6)
found_property(title, 'title', confidence=0.6)
found_property(series, 'filmSeries', confidence=0.6)
found_property(film_number, 'filmNumber', num, confidence=0.6)
return
except Exception:
pass
# specific cases:
# - movies/tttttt (yyyy)/tttttt.ccc
try:
if mtree.node_at((-4, 0)).value.lower() == 'movies':
folder = mtree.node_at((-3,))
# Note:too generic, might solve all the unittests as they all
# contain 'movies' in their path
#
#if containing_folder.is_leaf() and not containing_folder.guess:
# containing_folder.guess =
# Guess({ 'title': clean_string(containing_folder.value) },
# confidence=0.7)
if folder:
year_group = folder.first_leaf_containing('year')
if year_group:
groups_before = folder.previous_unidentified_leaves(year_group)
found_title(groups_before[0], confidence=0.8)
if groups_before:
try:
node = next(groups_before)
found_property(node, 'title', confidence=0.8)
return
except Exception:
except StopIteration:
pass
# if we have either format or videoCodec in the folder containing the file
# or one of its parents, then we should probably look for the title in
# there rather than in the basename
# if we have either format or videoCodec in the folder containing the
# file or one of its parents, then we should probably look for the title
# in there rather than in the basename
try:
props = mtree.previous_leaves_containing(mtree.children[-2],
[ 'videoCodec', 'format',
'language' ])
props = list(mtree.previous_leaves_containing(mtree.children[-2],
['videoCodec',
'format',
'language']))
except IndexError:
props = []
@ -127,48 +124,50 @@ def process(mtree):
if all(g.node_idx[0] == group_idx for g in props):
# if they're all in the same group, take leftover info from there
leftover = mtree.node_at((group_idx,)).unidentified_leaves()
if leftover:
found_title(leftover[0], confidence=0.7)
try:
found_property(next(leftover), 'title', confidence=0.7)
return
except StopIteration:
pass
# look for title in basename if there are some remaining undidentified
# look for title in basename if there are some remaining unidentified
# groups there
if basename_leftover:
title_candidate = basename_leftover[0]
# if basename is only one word and the containing folder has at least
# 3 words in it, we should take the title from the folder name
# ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
# ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here?
if (title_candidate.clean_value.count(' ') == 0 and
folder_leftover and
folder_leftover[0].clean_value.count(' ') >= 2):
if (basename_leftover[0].clean_value.count(' ') == 0 and
folder_leftover and folder_leftover[0].clean_value.count(' ') >= 2):
found_title(folder_leftover[0], confidence=0.7)
found_property(folder_leftover[0], 'title', confidence=0.7)
return
# if there are only 2 unidentified groups, the first of which is inside
# brackets or parentheses, we take the second one for the title:
# if there are only many unidentified groups, take the first of which is
# not inside brackets or parentheses.
# ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
if len(basename_leftover) == 2 and basename_leftover[0].is_explicit():
found_title(basename_leftover[1], confidence=0.8)
if basename_leftover[0].is_explicit():
for basename_leftover_elt in basename_leftover:
if not basename_leftover_elt.is_explicit():
found_property(basename_leftover_elt, 'title', confidence=0.8)
return
# if all else fails, take the first remaining unidentified group in the
# basename as title
found_title(title_candidate, confidence=0.6)
found_property(basename_leftover[0], 'title', confidence=0.6)
return
# if there are no leftover groups in the basename, look in the folder name
if folder_leftover:
found_title(folder_leftover[0], confidence=0.5)
found_property(folder_leftover[0], 'title', confidence=0.5)
return
# if nothing worked, look if we have a very small group at the beginning
# of the basename
basename = mtree.node_at((-2,))
basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
if basename_leftover:
found_title(basename_leftover[0], confidence=0.4)
try:
found_property(next(basename_leftover), 'title', confidence=0.4)
return
except StopIteration:
pass

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,21 +18,271 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit.transfo import SingleNodeGuesser
from guessit.patterns import find_properties
import logging
from __future__ import absolute_import, division, print_function, unicode_literals
log = logging.getLogger(__name__)
from guessit.containers import PropertiesContainer, WeakValidator, LeavesValidator, QualitiesContainer, NoValidator, \
ChainedValidator, DefaultValidator, OnlyOneValidator, LeftValidator, NeighborValidator
from guessit.patterns import sep, build_or_pattern
from guessit.patterns.extension import subtitle_exts, video_exts, info_exts
from guessit.patterns.numeral import numeral, parse_numeral
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder, found_property
import re
def guess_properties(string):
try:
prop, value, pos, end = find_properties(string)[0]
return { prop: value }, (pos, end)
except IndexError:
return None, None
class GuessProperties(Transformer):
def __init__(self):
Transformer.__init__(self, 35)
self.container = PropertiesContainer()
self.qualities = QualitiesContainer()
def process(mtree):
SingleNodeGuesser(guess_properties, 1.0, log).process(mtree)
def register_property(propname, props, **kwargs):
"""props a dict of {value: [patterns]}"""
for canonical_form, patterns in props.items():
if isinstance(patterns, tuple):
patterns2, pattern_kwarg = patterns
if kwargs:
current_kwarg = dict(kwargs)
current_kwarg.update(pattern_kwarg)
else:
current_kwarg = dict(pattern_kwarg)
current_kwarg['canonical_form'] = canonical_form
self.container.register_property(propname, *patterns2, **current_kwarg)
elif kwargs:
current_kwarg = dict(kwargs)
current_kwarg['canonical_form'] = canonical_form
self.container.register_property(propname, *patterns, **current_kwarg)
else:
self.container.register_property(propname, *patterns, canonical_form=canonical_form)
def register_quality(propname, quality_dict):
"""props a dict of {canonical_form: quality}"""
for canonical_form, quality in quality_dict.items():
self.qualities.register_quality(propname, canonical_form, quality)
register_property('container', {'mp4': ['MP4']})
# http://en.wikipedia.org/wiki/Pirated_movie_release_types
register_property('format', {'VHS': ['VHS', 'VHS-Rip'],
'Cam': ['CAM', 'CAMRip', 'HD-CAM'],
#'Telesync': ['TELESYNC', 'PDVD'],
'Telesync': (['TS', 'HD-TS'], {'confidence': 0.4}),
'Workprint': ['WORKPRINT', 'WP'],
'Telecine': ['TELECINE', 'TC'],
'PPV': ['PPV', 'PPV-Rip'], # Pay Per View
'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS', 'DVD-R', 'DVD-9', 'DVD-5'],
'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP'],
'VOD': ['VOD', 'VOD-Rip'],
'WEBRip': ['WEB-Rip'],
'WEB-DL': ['WEB-DL', 'WEB-HD', 'WEB'],
'HD-DVD': ['HD-(?:DVD)?-Rip', 'HD-DVD'],
'BluRay': ['Blu-ray(?:-Rip)?', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
})
register_quality('format', {'VHS': -100,
'Cam': -90,
'Telesync': -80,
'Workprint': -70,
'Telecine': -60,
'PPV': -50,
'TV': -30,
'DVB': -20,
'DVD': 0,
'HDTV': 20,
'VOD': 40,
'WEBRip': 50,
'WEB-DL': 60,
'HD-DVD': 80,
'BluRay': 100
})
register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'],
'368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'],
'480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'],
#'480p': (['hr'], {'confidence': 0.2}), # duplicate dict key
'576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'],
'720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'],
'900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'],
'1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'],
'1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080p?x?'],
'4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)']
},
validator=ChainedValidator(DefaultValidator(), OnlyOneValidator()))
class ResolutionValidator(object):
"""Make sure our match is surrounded by separators, or by another entry"""
def validate(self, prop, string, node, match, entry_start, entry_end):
"""
span = _get_span(prop, match)
span = _trim_span(span, string[span[0]:span[1]])
start, end = span
sep_start = start <= 0 or string[start - 1] in sep
sep_end = end >= len(string) or string[end] in sep
start_by_other = start in entry_end
end_by_other = end in entry_start
if (sep_start or start_by_other) and (sep_end or end_by_other):
return True
return False
"""
return True
_digits_re = re.compile('\d+')
def resolution_formatter(value):
digits = _digits_re.findall(value)
return 'x'.join(digits)
self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter, validator=ChainedValidator(DefaultValidator(), ResolutionValidator()))
register_quality('screenSize', {'360p': -300,
'368p': -200,
'480p': -100,
'576p': 0,
'720p': 100,
'900p': 130,
'1080i': 180,
'1080p': 200,
'4K': 400
})
_videoCodecProperty = {'Real': ['Rv\d{2}'], # http://en.wikipedia.org/wiki/RealVideo
'Mpeg2': ['Mpeg2'],
'DivX': ['DVDivX', 'DivX'],
'XviD': ['XviD'],
'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'],
'h265': ['[hx]-265(?:-HEVC)?', 'HEVC']
}
register_property('videoCodec', _videoCodecProperty)
register_quality('videoCodec', {'Real': -50,
'Mpeg2': -30,
'DivX': -10,
'XviD': 0,
'h264': 100,
'h265': 150
})
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
# http://fr.wikipedia.org/wiki/H.264
self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit')
self.container.register_property('videoProfile', '8.?bit', canonical_form='8bit')
self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
register_quality('videoProfile', {'BP': -20,
'XP': -10,
'MP': 0,
'HP': 10,
'10bit': 15,
'Hi422P': 25,
'Hi444PP': 35
})
# has nothing to do here (or on filenames for that matter), but some
# releases use it and it helps to identify release groups, so we adapt
register_property('videoApi', {'DXVA': ['DXVA']})
register_property('audioCodec', {'MP3': ['MP3', 'LAME', 'LAME(?:\d)+-(?:\d)+'],
'DolbyDigital': ['DD'],
'AAC': ['AAC'],
'AC3': ['AC3'],
'Flac': ['FLAC'],
'DTS': (['DTS'], {'validator': LeftValidator()}),
'TrueHD': ['True-HD']
})
register_quality('audioCodec', {'MP3': 10,
'DolbyDigital': 30,
'AAC': 35,
'AC3': 40,
'Flac': 45,
'DTS': 60,
'TrueHD': 70
})
self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3']))
register_quality('audioProfile', {'HD': 20,
'HDMA': 50,
'LC': 0,
'HQ': 0,
'HE': 20
})
register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch', '8ch'],
'5.1': ['5[\W_]1', '5ch', '6ch'],
'2.0': ['2[\W_]0', '2ch', 'stereo'],
'1.0': ['1[\W_]0', '1ch', 'mono']
})
register_quality('audioChannels', {'7.1': 200,
'5.1': 100,
'2.0': 0,
'1.0': -100
})
self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode')
self.container.register_property('crc32', '(?:[a-fA-F]|[0-9]){8}', enhance=False, canonical_from_pattern=False)
weak_episode_words = ['pt', 'part']
self.container.register_property(None, '(' + build_or_pattern(weak_episode_words) + sep + '?(?P<part>' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral)
register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
'DualAudio': ['Dual-Audio'],
'WideScreen': ['ws', 'wide-screen'],
'Netflix': ['Netflix', 'NF']
})
self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=NeighborValidator())
self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form='Proper')
self.container.register_property('other', 'Fansub', canonical_form='Fansub')
self.container.register_property('other', 'Fastsub', canonical_form='Fastsub')
self.container.register_property('other', '(?:Seasons?' + sep + '?)?Complete', canonical_form='Complete')
self.container.register_property('other', 'R5', 'RC', canonical_form='R5')
self.container.register_property('other', 'Pre-Air', 'Preair', canonical_form='Preair')
self.container.register_canonical_properties('other', 'Screener', 'Remux', '3D', 'HD', 'mHD', 'HDLight', 'HQ',
'DDC',
'HR', 'PAL', 'SECAM', 'NTSC')
self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator())
for prop in self.container.get_properties('format'):
self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener')
for exts in (subtitle_exts, info_exts, video_exts):
for container in exts:
self.container.register_property('container', container, confidence=0.3)
def guess_properties(self, string, node=None, options=None):
found = self.container.find_properties(string, node, options)
return self.container.as_guess(found, string)
def supported_properties(self):
return self.container.get_supported_properties()
def process(self, mtree, options=None):
GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
proper_count = 0
for other_leaf in mtree.leaves_containing('other'):
if 'other' in other_leaf.info and 'Proper' in other_leaf.info['other']:
proper_count += 1
if proper_count:
found_property(mtree, 'properCount', proper_count)
def rate_quality(self, guess, *props):
return self.qualities.rate_quality(guess, *props)

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,69 +18,187 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit.transfo import SingleNodeGuesser
from guessit.patterns import prop_multi, compute_canonical_form, _dash, _psep
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder, build_guess
from guessit.containers import PropertiesContainer
from guessit.patterns import sep
from guessit.guess import Guess
from guessit.textutils import strip_brackets
import re
import logging
log = logging.getLogger(__name__)
def get_patterns(property_name):
return [ p.replace(_dash, _psep) for patterns in prop_multi[property_name].values() for p in patterns ]
CODECS = get_patterns('videoCodec')
FORMATS = get_patterns('format')
VAPIS = get_patterns('videoApi')
# RG names following a codec or format, with a potential space or dash inside the name
GROUP_NAMES = [ r'(?P<videoCodec>' + codec + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
for codec in CODECS ]
GROUP_NAMES += [ r'(?P<format>' + fmt + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
for fmt in FORMATS ]
GROUP_NAMES += [ r'(?P<videoApi>' + api + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
for api in VAPIS ]
GROUP_NAMES2 = [ r'\.(?P<videoCodec>' + codec + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
for codec in CODECS ]
GROUP_NAMES2 += [ r'\.(?P<format>' + fmt + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
for fmt in FORMATS ]
GROUP_NAMES2 += [ r'\.(?P<videoApi>' + vapi + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
for vapi in VAPIS ]
GROUP_NAMES = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES ]
GROUP_NAMES2 = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES2 ]
def adjust_metadata(md):
return dict((property_name, compute_canonical_form(property_name, value) or value)
for property_name, value in md.items())
def guess_release_group(string):
# first try to see whether we have both a known codec and a known release group
for rexp in GROUP_NAMES:
match = rexp.search(string)
while match:
metadata = match.groupdict()
# make sure this is an actual release group we caught
release_group = (compute_canonical_form('releaseGroup', metadata['releaseGroup']) or
compute_canonical_form('weakReleaseGroup', metadata['releaseGroup']))
if release_group:
return adjust_metadata(metadata), (match.start(1), match.end(2))
class GuessReleaseGroup(Transformer):
def __init__(self):
Transformer.__init__(self, -190)
# we didn't find anything conclusive, keep searching
match = rexp.search(string, match.span()[0]+1)
self.container = PropertiesContainer(canonical_from_pattern=False)
self._allowed_groupname_pattern = '[\w@#€£$&!\?]'
self._forbidden_groupname_lambda = [lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'],
lambda elt: self._is_number(elt)]
# If the previous property in this list, the match will be considered as safe
# and group name can contain a separator.
self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels', 'other']
self.previous_safe_values = {'other': ['Complete']}
self.next_safe_properties = ['extension', 'website']
self.next_safe_values = {'format': ['Telesync']}
self.container.sep_replace_char = '-'
self.container.canonical_from_pattern = False
self.container.enhance = True
self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+')
self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+')
self.re_sep = re.compile('(' + sep + ')')
# pick anything as releaseGroup as long as we have a codec in front
# this doesn't include a potential dash ('-') ending the release group
# eg: [...].X264-HiS@SiLUHD-English.[...]
for rexp in GROUP_NAMES2:
match = rexp.search(string)
if match:
return adjust_metadata(match.groupdict()), (match.start(1), match.end(2))
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group',
help='Expected release group (can be used multiple times)')
return None, None
def supported_properties(self):
return self.container.get_supported_properties()
def _is_number(self, s):
try:
int(s)
return True
except ValueError:
return False
def process(mtree):
SingleNodeGuesser(guess_release_group, 0.8, log).process(mtree)
def validate_group_name(self, guess):
val = guess['releaseGroup']
if len(val) > 1:
checked_val = ""
forbidden = False
for elt in self.re_sep.split(val): # separators are in the list because of capturing group
if forbidden:
# Previous was forbidden, don't had separator
forbidden = False
continue
for forbidden_lambda in self._forbidden_groupname_lambda:
forbidden = forbidden_lambda(elt.lower())
if forbidden:
if checked_val:
# Removing previous separator
checked_val = checked_val[0:len(checked_val) - 1]
break
if not forbidden:
checked_val += elt
val = checked_val
if not val:
return False
if self.re_sep.match(val[-1]):
val = val[:len(val)-1]
if self.re_sep.match(val[0]):
val = val[1:]
guess['releaseGroup'] = val
forbidden = False
for forbidden_lambda in self._forbidden_groupname_lambda:
forbidden = forbidden_lambda(val.lower())
if forbidden:
break
if not forbidden:
return True
return False
def is_leaf_previous(self, leaf, node):
if leaf.span[1] <= node.span[0]:
for idx in range(leaf.span[1], node.span[0]):
if leaf.root.value[idx] not in sep:
return False
return True
return False
def validate_next_leaves(self, node):
if 'series' in node.root.info or 'title' in node.root.info:
# --expected-series or --expected-title is used.
return True
# Make sure to avoid collision with 'series' or 'title' guessed later. Should be more precise.
leaves = node.root.unidentified_leaves()
return len(list(leaves)) > 1
def validate_node(self, leaf, node, safe=False):
if not self.is_leaf_previous(leaf, node):
return False
if not self.validate_next_leaves(node):
return False
if safe:
for k, v in leaf.guess.items():
if k in self.previous_safe_values and not v in self.previous_safe_values[k]:
return False
return True
def guess_release_group(self, string, node=None, options=None):
if options and options.get('expected_group'):
expected_container = PropertiesContainer(enhance=True, canonical_from_pattern=False)
for expected_group in options.get('expected_group'):
if expected_group.startswith('re:'):
expected_group = expected_group[3:]
expected_group = expected_group.replace(' ', '-')
expected_container.register_property('releaseGroup', expected_group, enhance=True)
else:
expected_group = re.escape(expected_group)
expected_container.register_property('releaseGroup', expected_group, enhance=False)
found = expected_container.find_properties(string, node, options, 'releaseGroup')
guess = expected_container.as_guess(found, string, self.validate_group_name)
if guess:
return guess
found = self.container.find_properties(string, node, options, 'releaseGroup')
guess = self.container.as_guess(found, string, self.validate_group_name)
validated_guess = None
if guess:
group_node = node.group_node()
if group_node:
for leaf in group_node.leaves_containing(self.previous_safe_properties):
if self.validate_node(leaf, node, True):
if leaf.root.value[leaf.span[1]] == '-':
guess.metadata().confidence = 1
else:
guess.metadata().confidence = 0.7
validated_guess = guess
if not validated_guess:
# If previous group last leaf is identified as a safe property,
# consider the raw value as a releaseGroup
previous_group_node = node.previous_group_node()
if previous_group_node:
for leaf in previous_group_node.leaves_containing(self.previous_safe_properties):
if self.validate_node(leaf, node, False):
guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value)))
if self.validate_group_name(guess):
node.guess = guess
validated_guess = guess
if validated_guess:
# If following group nodes have only one unidentified leaf, it belongs to the release group
next_group_node = node
while True:
next_group_node = next_group_node.next_group_node()
if next_group_node:
leaves = list(next_group_node.leaves())
if len(leaves) == 1 and not leaves[0].guess:
validated_guess['releaseGroup'] = validated_guess['releaseGroup'] + leaves[0].value
leaves[0].guess = validated_guess
else:
break
else:
break
if not validated_guess and node.is_explicit() and node.node_last_idx == 0: # first node from group
validated_guess = build_guess(node, 'releaseGroup', value=node.value[1:len(node.value)-1])
validated_guess.metadata().confidence = 0.4
validated_guess.metadata().span = 1, len(node.value)
node.guess = validated_guess
if validated_guess:
# Strip brackets
validated_guess['releaseGroup'] = strip_brackets(validated_guess['releaseGroup'])
return validated_guess
def process(self, mtree, options=None):
GuessFinder(self.guess_release_group, None, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,33 +18,41 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import Guess
from guessit.transfo import SingleNodeGuesser
from guessit.patterns import video_rexps, sep
import re
import logging
from __future__ import absolute_import, division, print_function, \
unicode_literals
log = logging.getLogger(__name__)
from guessit.patterns import _psep
from guessit.containers import PropertiesContainer
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder
from guessit.patterns.numeral import parse_numeral
def guess_video_rexps(string):
string = '-' + string + '-'
for rexp, confidence, span_adjust in video_rexps:
match = re.search(sep + rexp + sep, string, re.IGNORECASE)
if match:
metadata = match.groupdict()
# is this the better place to put it? (maybe, as it is at least
# the soonest that we can catch it)
if metadata.get('cdNumberTotal', -1) is None:
del metadata['cdNumberTotal']
span = (match.start() + span_adjust[0],
match.end() + span_adjust[1] - 2)
return (Guess(metadata, confidence=confidence, raw=string[span[0]:span[1]]),
span)
class GuessVideoRexps(Transformer):
def __init__(self):
Transformer.__init__(self, 25)
return None, None
self.container = PropertiesContainer(canonical_from_pattern=False)
self.container.register_property(None, 'cd' + _psep + '(?P<cdNumber>[0-9])(?:' + _psep + 'of' + _psep + '(?P<cdNumberTotal>[0-9]))?', confidence=1.0, enhance=False, global_span=True, formatter=parse_numeral)
self.container.register_property('cdNumberTotal', '([1-9])' + _psep + 'cds?', confidence=0.9, enhance=False, formatter=parse_numeral)
def process(mtree):
SingleNodeGuesser(guess_video_rexps, None, log).process(mtree)
self.container.register_property('bonusNumber', 'x([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)
self.container.register_property('filmNumber', 'f([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)
self.container.register_property('edition', 'collector', 'collector-edition', 'edition-collector', canonical_form='Collector Edition')
self.container.register_property('edition', 'special-edition', 'edition-special', canonical_form='Special Edition')
self.container.register_property('edition', 'criterion', 'criterion-edition', 'edition-criterion', canonical_form='Criterion Edition')
self.container.register_property('edition', 'deluxe', 'cdeluxe-edition', 'edition-deluxe', canonical_form='Deluxe Edition')
self.container.register_property('edition', 'director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', canonical_form='Director\'s cut')
def supported_properties(self):
return self.container.get_supported_properties()
def guess_video_rexps(self, string, node=None, options=None):
found = self.container.find_properties(string, node, options)
return self.container.as_guess(found, string)
def process(self, mtree, options=None):
GuessFinder(self.guess_video_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,45 +18,64 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import Guess
from guessit.transfo import SingleNodeGuesser
from guessit.patterns import weak_episode_rexps
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder
from guessit.patterns import sep, build_or_pattern
from guessit.containers import PropertiesContainer, LeavesValidator, NoValidator, WeakValidator
from guessit.patterns.numeral import numeral, parse_numeral
from guessit.date import valid_year
import re
import logging
log = logging.getLogger(__name__)
def guess_weak_episodes_rexps(string, node):
if 'episodeNumber' in node.root.info:
return None, None
class GuessWeakEpisodesRexps(Transformer):
def __init__(self):
Transformer.__init__(self, 15)
for rexp, span_adjust in weak_episode_rexps:
match = re.search(rexp, string, re.IGNORECASE)
if match:
metadata = match.groupdict()
span = (match.start() + span_adjust[0],
match.end() + span_adjust[1])
of_separators = ['of', 'sur', '/', '\\']
of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)
epnum = int(metadata['episodeNumber'])
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
episode_words = ['episodes?']
def _formater(episode_number):
epnum = parse_numeral(episode_number)
if not valid_year(epnum):
if epnum > 100:
season, epnum = epnum // 100, epnum % 100
# episodes which have a season > 25 are most likely errors
# (Simpsons is at 23!)
if season > 25:
continue
return Guess({ 'season': season,
'episodeNumber': epnum },
confidence=0.6, raw=string[span[0]:span[1]]), span
# episodes which have a season > 50 are most likely errors
# (Simpson is at 25!)
if season > 50:
return None
return {'season': season, 'episodeNumber': epnum}
else:
return Guess(metadata, confidence=0.3, raw=string[span[0]:span[1]]), span
return epnum
return None, None
self.container.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater, disabler=lambda options: options.get('episode_prefer_number') if options else False)
self.container.register_property(['episodeNumber', 'season'], '[0-9]{4}', confidence=0.6, formatter=_formater)
self.container.register_property('episodeNumber', '[^0-9](\d{1,3})', confidence=0.6, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
self.container.register_property(None, '(' + build_or_pattern(episode_words) + sep + '?(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.4, formatter=parse_numeral)
self.container.register_property(None, r'(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +')', confidence=0.6, formatter=parse_numeral)
self.container.register_property('episodeNumber', r'^' + sep + '?(\d{1,3})' + sep, confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
self.container.register_property('episodeNumber', sep + r'(\d{1,3})' + sep + '?$', confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
def supported_properties(self):
return self.container.get_supported_properties()
guess_weak_episodes_rexps.use_node = True
def guess_weak_episodes_rexps(self, string, node=None, options=None):
if node and 'episodeNumber' in node.root.info:
return None
properties = self.container.find_properties(string, node, options)
guess = self.container.as_guess(properties, string)
def process(mtree):
SingleNodeGuesser(guess_weak_episodes_rexps, 0.6, log).process(mtree)
return guess
def should_process(self, mtree, options=None):
return mtree.guess.get('type', '').startswith('episode')
def process(self, mtree, options=None):
GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,22 +18,39 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit.transfo import SingleNodeGuesser
from guessit.patterns import websites
import logging
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.patterns import build_or_pattern
from guessit.containers import PropertiesContainer
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder
from pkg_resources import resource_stream # @UnresolvedImport
log = logging.getLogger(__name__)
TLDS = [l.strip().decode('utf-8')
for l in resource_stream('guessit', 'tlds-alpha-by-domain.txt').readlines()
if b'--' not in l][1:]
def guess_website(string):
low = string.lower()
for site in websites:
pos = low.find(site.lower())
if pos != -1:
return {'website': site}, (pos, pos + len(site))
return None, None
class GuessWebsite(Transformer):
def __init__(self):
Transformer.__init__(self, 45)
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
def process(mtree):
SingleNodeGuesser(guess_website, 1.0, log).process(mtree)
tlds_pattern = build_or_pattern(TLDS) # All registered domain extension
safe_tlds_pattern = build_or_pattern(['com', 'org', 'net']) # For sure a website extension
safe_subdomains_pattern = build_or_pattern(['www']) # For sure a website subdomain
safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net']) # Those words before a tlds are sure
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
def supported_properties(self):
return self.container.get_supported_properties()
def guess_website(self, string, node=None, options=None):
found = self.container.find_properties(string, node, options, 'website')
return self.container.as_guess(found, string)
def process(self, mtree, options=None):
GuessFinder(self.guess_website, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,33 +18,40 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit.transfo import SingleNodeGuesser
from guessit.date import search_year
import logging
from __future__ import absolute_import, division, print_function, unicode_literals
log = logging.getLogger(__name__)
from guessit.plugins.transformers import Transformer
from guessit.matcher import GuessFinder
from guessit.date import search_year, valid_year
def guess_year(string):
class GuessYear(Transformer):
def __init__(self):
Transformer.__init__(self, -160)
def supported_properties(self):
return ['year']
def guess_year(self, string, node=None, options=None):
year, span = search_year(string)
if year:
return { 'year': year }, span
return {'year': year}, span
else:
return None, None
def guess_year_skip_first(string):
year, span = search_year(string)
if year:
year2, span2 = guess_year(string[span[1]:])
if year2:
return year2, (span2[0]+span[1], span2[1]+span[1])
def second_pass_options(self, mtree, options=None):
year_nodes = list(mtree.leaves_containing('year'))
if len(year_nodes) > 1:
return {'skip_nodes': year_nodes[:len(year_nodes) - 1]}
return None
return None, None
def process(self, mtree, options=None):
GuessFinder(self.guess_year, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
def process(mtree, skip_first_year=False):
if skip_first_year:
SingleNodeGuesser(guess_year_skip_first, 1.0, log).process(mtree)
else:
SingleNodeGuesser(guess_year, 1.0, log).process(mtree)
# if we found a season number that is a valid year, it is usually safe to assume
# we can also set the year property to that value
for n in mtree.leaves_containing('season'):
g = n.guess
season = g['season']
if valid_year(season):
g['year'] = season

View file

@ -1,73 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit.patterns import subtitle_exts
from guessit.textutils import reorder_title, find_words
import logging
log = logging.getLogger(__name__)
def process(mtree):
# 1- try to promote language to subtitle language where it makes sense
for node in mtree.nodes():
if 'language' not in node.guess:
continue
def promote_subtitle():
# pylint: disable=W0631
node.guess.set('subtitleLanguage', node.guess['language'],
confidence=node.guess.confidence('language'))
del node.guess['language']
# - if we matched a language in a file with a sub extension and that
# the group is the last group of the filename, it is probably the
# language of the subtitle
# (eg: 'xxx.english.srt')
if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
node == mtree.leaves()[-2]):
promote_subtitle()
# - if we find the word 'sub' before the language, and in the same explicit
# group, then upgrade the language
explicit_group = mtree.node_at(node.node_idx[:2])
group_str = explicit_group.value.lower()
if ('sub' in find_words(group_str) and
0 <= group_str.find('sub') < (node.span[0] - explicit_group.span[0])):
promote_subtitle()
# - if a language is in an explicit group just preceded by "st",
# it is a subtitle language (eg: '...st[fr-eng]...')
try:
idx = node.node_idx
previous = mtree.node_at((idx[0], idx[1] - 1)).leaves()[-1]
if previous.value.lower()[-2:] == 'st':
promote_subtitle()
except IndexError:
pass
# 2- ", the" at the end of a series title should be prepended to it
for node in mtree.nodes():
if 'series' not in node.guess:
continue
node.guess['series'] = reorder_title(node.guess['series'])

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,27 +18,32 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.plugins.transformers import Transformer
from guessit.textutils import find_first_level_groups
from guessit.patterns import group_delimiters
import functools
import logging
log = logging.getLogger(__name__)
from functools import reduce
def process(mtree):
"""return the string split into explicit groups, that is, those either
class SplitExplicitGroups(Transformer):
def __init__(self):
Transformer.__init__(self, 250)
def process(self, mtree, options=None):
"""split each of those into explicit groups (separated by parentheses or square brackets)
:return: return the string split into explicit groups, that is, those either
between parenthese, square brackets or curly braces, and those separated
by a dash."""
for c in mtree.children:
groups = find_first_level_groups(c.value, group_delimiters[0])
for delimiters in group_delimiters:
flatten = lambda l, x: l + find_first_level_groups(x, delimiters)
groups = functools.reduce(flatten, groups, [])
groups = reduce(flatten, groups, [])
# do not do this at this moment, it is not strong enough and can break other
# patterns, such as dates, etc...
#groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
# groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
c.split_on_components(groups)

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,24 +18,29 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.plugins.transformers import Transformer
from guessit.patterns import sep
import re
import logging
log = logging.getLogger(__name__)
def process(mtree):
class SplitOnDash(Transformer):
def __init__(self):
Transformer.__init__(self, 245)
def process(self, mtree, options=None):
"""split into '-' separated subgroups (with required separator chars
around the dash)
"""
for node in mtree.unidentified_leaves():
indices = []
didx = 0
pattern = re.compile(sep + '-' + sep)
match = pattern.search(node.value)
while match:
span = match.span()
indices.extend([ span[0], span[1] ])
indices.extend([span[0], span[1]])
match = pattern.search(node.value, span[1])
if indices:

View file

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
@ -18,19 +18,28 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals
from guessit.plugins.transformers import Transformer
from guessit import fileutils
import os.path
import logging
log = logging.getLogger(__name__)
from os.path import splitext
def process(mtree):
"""Returns the filename split into [ dir*, basename, ext ]."""
class SplitPathComponents(Transformer):
def __init__(self):
Transformer.__init__(self, 255)
def process(self, mtree, options=None):
"""first split our path into dirs + basename + ext
:return: the filename split into [ dir*, basename, ext ]
"""
if not options.get('name_only'):
components = fileutils.split_path(mtree.value)
basename = components.pop(-1)
components += list(os.path.splitext(basename))
components += list(splitext(basename))
components[-1] = components[-1][1:] # remove the '.' from the extension
mtree.split_on_components(components)
else:
mtree.split_on_components([mtree.value, ''])

View file

@ -99,13 +99,15 @@ class OpenSubtitlesProvider(Provider):
def no_operation(self):
checked(self.server.NoOperation(self.token))
def query(self, languages, hash=None, size=None, imdb_id=None, query=None): # @ReservedAssignment
def query(self, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None): # @ReservedAssignment
searches = []
if hash and size:
searches.append({'moviehash': hash, 'moviebytesize': str(size)})
if imdb_id:
searches.append({'imdbid': imdb_id})
if query:
if query and season and episode:
searches.append({'query': query, 'season': season, 'episode': episode})
elif query:
searches.append({'query': query})
if not searches:
raise ValueError('One or more parameter missing')
@ -126,10 +128,16 @@ class OpenSubtitlesProvider(Provider):
def list_subtitles(self, video, languages):
query = None
season = None
episode = None
if ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id:
query = video.name.split(os.sep)[-1]
if isinstance(video, Episode):
query = video.series
season = video.season
episode = video.episode
return self.query(languages, hash=video.hashes.get('opensubtitles'), size=video.size, imdb_id=video.imdb_id,
query=query)
query=query, season=season, episode=episode)
def download_subtitle(self, subtitle):
response = checked(self.server.DownloadSubtitles(self.token, [subtitle.id]))

View file

@ -51,7 +51,14 @@ class Subtitle(object):
encodings.append('windows-1255')
elif self.language.alpha3 == 'tur':
encodings.extend(['iso-8859-9', 'windows-1254'])
elif self.language.alpha3 == 'pol':
# Eastern European Group 1
encodings.extend(['windows-1250'])
elif self.language.alpha3 == 'bul':
# Eastern European Group 2
encodings.extend(['windows-1251'])
else:
# Western European (windows-1252)
encodings.append('latin-1')
# try to decode