update guessit and subliminal libs. Fixes #678

2025-08-19 12:59:36 -07:00 · 2015-01-19 14:22:30 +10:30 · 2015-01-19 14:22:30 +10:30 · f716323b76
commit f716323b76
parent ff50e5144c
72 changed files with 9350 additions and 3032 deletions
--- a/libs/guessit/ISO-3166-1_utf8.txt
+++ b/libs/guessit/ISO-3166-1_utf8.txt
@ -1,249 +0,0 @@
 Afghanistan|AF|AFG|004|ISO 3166-2:AF
 Åland Islands|AX|ALA|248|ISO 3166-2:AX
 Albania|AL|ALB|008|ISO 3166-2:AL
 Algeria|DZ|DZA|012|ISO 3166-2:DZ
 American Samoa|AS|ASM|016|ISO 3166-2:AS
 Andorra|AD|AND|020|ISO 3166-2:AD
 Angola|AO|AGO|024|ISO 3166-2:AO
 Anguilla|AI|AIA|660|ISO 3166-2:AI
 Antarctica|AQ|ATA|010|ISO 3166-2:AQ
 Antigua and Barbuda|AG|ATG|028|ISO 3166-2:AG
 Argentina|AR|ARG|032|ISO 3166-2:AR
 Armenia|AM|ARM|051|ISO 3166-2:AM
 Aruba|AW|ABW|533|ISO 3166-2:AW
 Australia|AU|AUS|036|ISO 3166-2:AU
 Austria|AT|AUT|040|ISO 3166-2:AT
 Azerbaijan|AZ|AZE|031|ISO 3166-2:AZ
 Bahamas|BS|BHS|044|ISO 3166-2:BS
 Bahrain|BH|BHR|048|ISO 3166-2:BH
 Bangladesh|BD|BGD|050|ISO 3166-2:BD
 Barbados|BB|BRB|052|ISO 3166-2:BB
 Belarus|BY|BLR|112|ISO 3166-2:BY
 Belgium|BE|BEL|056|ISO 3166-2:BE
 Belize|BZ|BLZ|084|ISO 3166-2:BZ
 Benin|BJ|BEN|204|ISO 3166-2:BJ
 Bermuda|BM|BMU|060|ISO 3166-2:BM
 Bhutan|BT|BTN|064|ISO 3166-2:BT
 Bolivia, Plurinational State of|BO|BOL|068|ISO 3166-2:BO
 Bonaire, Sint Eustatius and Saba|BQ|BES|535|ISO 3166-2:BQ
 Bosnia and Herzegovina|BA|BIH|070|ISO 3166-2:BA
 Botswana|BW|BWA|072|ISO 3166-2:BW
 Bouvet Island|BV|BVT|074|ISO 3166-2:BV
 Brazil|BR|BRA|076|ISO 3166-2:BR
 British Indian Ocean Territory|IO|IOT|086|ISO 3166-2:IO
 Brunei Darussalam|BN|BRN|096|ISO 3166-2:BN
 Bulgaria|BG|BGR|100|ISO 3166-2:BG
 Burkina Faso|BF|BFA|854|ISO 3166-2:BF
 Burundi|BI|BDI|108|ISO 3166-2:BI
 Cambodia|KH|KHM|116|ISO 3166-2:KH
 Cameroon|CM|CMR|120|ISO 3166-2:CM
 Canada|CA|CAN|124|ISO 3166-2:CA
 Cape Verde|CV|CPV|132|ISO 3166-2:CV
 Cayman Islands|KY|CYM|136|ISO 3166-2:KY
 Central African Republic|CF|CAF|140|ISO 3166-2:CF
 Chad|TD|TCD|148|ISO 3166-2:TD
 Chile|CL|CHL|152|ISO 3166-2:CL
 China|CN|CHN|156|ISO 3166-2:CN
 Christmas Island|CX|CXR|162|ISO 3166-2:CX
 Cocos (Keeling) Islands|CC|CCK|166|ISO 3166-2:CC
 Colombia|CO|COL|170|ISO 3166-2:CO
 Comoros|KM|COM|174|ISO 3166-2:KM
 Congo|CG|COG|178|ISO 3166-2:CG
 Congo, the Democratic Republic of the|CD|COD|180|ISO 3166-2:CD
 Cook Islands|CK|COK|184|ISO 3166-2:CK
 Costa Rica|CR|CRI|188|ISO 3166-2:CR
 Côte d'Ivoire|CI|CIV|384|ISO 3166-2:CI
 Croatia|HR|HRV|191|ISO 3166-2:HR
 Cuba|CU|CUB|192|ISO 3166-2:CU
 Curaçao|CW|CUW|531|ISO 3166-2:CW
 Cyprus|CY|CYP|196|ISO 3166-2:CY
 Czech Republic|CZ|CZE|203|ISO 3166-2:CZ
 Denmark|DK|DNK|208|ISO 3166-2:DK
 Djibouti|DJ|DJI|262|ISO 3166-2:DJ
 Dominica|DM|DMA|212|ISO 3166-2:DM
 Dominican Republic|DO|DOM|214|ISO 3166-2:DO
 Ecuador|EC|ECU|218|ISO 3166-2:EC
 Egypt|EG|EGY|818|ISO 3166-2:EG
 El Salvador|SV|SLV|222|ISO 3166-2:SV
 Equatorial Guinea|GQ|GNQ|226|ISO 3166-2:GQ
 Eritrea|ER|ERI|232|ISO 3166-2:ER
 Estonia|EE|EST|233|ISO 3166-2:EE
 Ethiopia|ET|ETH|231|ISO 3166-2:ET
 Falkland Islands (Malvinas|FK|FLK|238|ISO 3166-2:FK
 Faroe Islands|FO|FRO|234|ISO 3166-2:FO
 Fiji|FJ|FJI|242|ISO 3166-2:FJ
 Finland|FI|FIN|246|ISO 3166-2:FI
 France|FR|FRA|250|ISO 3166-2:FR
 French Guiana|GF|GUF|254|ISO 3166-2:GF
 French Polynesia|PF|PYF|258|ISO 3166-2:PF
 French Southern Territories|TF|ATF|260|ISO 3166-2:TF
 Gabon|GA|GAB|266|ISO 3166-2:GA
 Gambia|GM|GMB|270|ISO 3166-2:GM
 Georgia|GE|GEO|268|ISO 3166-2:GE
 Germany|DE|DEU|276|ISO 3166-2:DE
 Ghana|GH|GHA|288|ISO 3166-2:GH
 Gibraltar|GI|GIB|292|ISO 3166-2:GI
 Greece|GR|GRC|300|ISO 3166-2:GR
 Greenland|GL|GRL|304|ISO 3166-2:GL
 Grenada|GD|GRD|308|ISO 3166-2:GD
 Guadeloupe|GP|GLP|312|ISO 3166-2:GP
 Guam|GU|GUM|316|ISO 3166-2:GU
 Guatemala|GT|GTM|320|ISO 3166-2:GT
 Guernsey|GG|GGY|831|ISO 3166-2:GG
 Guinea|GN|GIN|324|ISO 3166-2:GN
 Guinea-Bissau|GW|GNB|624|ISO 3166-2:GW
 Guyana|GY|GUY|328|ISO 3166-2:GY
 Haiti|HT|HTI|332|ISO 3166-2:HT
 Heard Island and McDonald Islands|HM|HMD|334|ISO 3166-2:HM
 Holy See (Vatican City State|VA|VAT|336|ISO 3166-2:VA
 Honduras|HN|HND|340|ISO 3166-2:HN
 Hong Kong|HK|HKG|344|ISO 3166-2:HK
 Hungary|HU|HUN|348|ISO 3166-2:HU
 Iceland|IS|ISL|352|ISO 3166-2:IS
 India|IN|IND|356|ISO 3166-2:IN
 Indonesia|ID|IDN|360|ISO 3166-2:ID
 Iran, Islamic Republic of|IR|IRN|364|ISO 3166-2:IR
 Iraq|IQ|IRQ|368|ISO 3166-2:IQ
 Ireland|IE|IRL|372|ISO 3166-2:IE
 Isle of Man|IM|IMN|833|ISO 3166-2:IM
 Israel|IL|ISR|376|ISO 3166-2:IL
 Italy|IT|ITA|380|ISO 3166-2:IT
 Jamaica|JM|JAM|388|ISO 3166-2:JM
 Japan|JP|JPN|392|ISO 3166-2:JP
 Jersey|JE|JEY|832|ISO 3166-2:JE
 Jordan|JO|JOR|400|ISO 3166-2:JO
 Kazakhstan|KZ|KAZ|398|ISO 3166-2:KZ
 Kenya|KE|KEN|404|ISO 3166-2:KE
 Kiribati|KI|KIR|296|ISO 3166-2:KI
 Korea, Democratic People's Republic of|KP|PRK|408|ISO 3166-2:KP
 Korea, Republic of|KR|KOR|410|ISO 3166-2:KR
 Kuwait|KW|KWT|414|ISO 3166-2:KW
 Kyrgyzstan|KG|KGZ|417|ISO 3166-2:KG
 Lao People's Democratic Republic|LA|LAO|418|ISO 3166-2:LA
 Latvia|LV|LVA|428|ISO 3166-2:LV
 Lebanon|LB|LBN|422|ISO 3166-2:LB
 Lesotho|LS|LSO|426|ISO 3166-2:LS
 Liberia|LR|LBR|430|ISO 3166-2:LR
 Libya|LY|LBY|434|ISO 3166-2:LY
 Liechtenstein|LI|LIE|438|ISO 3166-2:LI
 Lithuania|LT|LTU|440|ISO 3166-2:LT
 Luxembourg|LU|LUX|442|ISO 3166-2:LU
 Macao|MO|MAC|446|ISO 3166-2:MO
 Macedonia, the former Yugoslav Republic of|MK|MKD|807|ISO 3166-2:MK
 Madagascar|MG|MDG|450|ISO 3166-2:MG
 Malawi|MW|MWI|454|ISO 3166-2:MW
 Malaysia|MY|MYS|458|ISO 3166-2:MY
 Maldives|MV|MDV|462|ISO 3166-2:MV
 Mali|ML|MLI|466|ISO 3166-2:ML
 Malta|MT|MLT|470|ISO 3166-2:MT
 Marshall Islands|MH|MHL|584|ISO 3166-2:MH
 Martinique|MQ|MTQ|474|ISO 3166-2:MQ
 Mauritania|MR|MRT|478|ISO 3166-2:MR
 Mauritius|MU|MUS|480|ISO 3166-2:MU
 Mayotte|YT|MYT|175|ISO 3166-2:YT
 Mexico|MX|MEX|484|ISO 3166-2:MX
 Micronesia, Federated States of|FM|FSM|583|ISO 3166-2:FM
 Moldova, Republic of|MD|MDA|498|ISO 3166-2:MD
 Monaco|MC|MCO|492|ISO 3166-2:MC
 Mongolia|MN|MNG|496|ISO 3166-2:MN
 Montenegro|ME|MNE|499|ISO 3166-2:ME
 Montserrat|MS|MSR|500|ISO 3166-2:MS
 Morocco|MA|MAR|504|ISO 3166-2:MA
 Mozambique|MZ|MOZ|508|ISO 3166-2:MZ
 Myanmar|MM|MMR|104|ISO 3166-2:MM
 Namibia|NA|NAM|516|ISO 3166-2:NA
 Nauru|NR|NRU|520|ISO 3166-2:NR
 Nepal|NP|NPL|524|ISO 3166-2:NP
 Netherlands|NL|NLD|528|ISO 3166-2:NL
 New Caledonia|NC|NCL|540|ISO 3166-2:NC
 New Zealand|NZ|NZL|554|ISO 3166-2:NZ
 Nicaragua|NI|NIC|558|ISO 3166-2:NI
 Niger|NE|NER|562|ISO 3166-2:NE
 Nigeria|NG|NGA|566|ISO 3166-2:NG
 Niue|NU|NIU|570|ISO 3166-2:NU
 Norfolk Island|NF|NFK|574|ISO 3166-2:NF
 Northern Mariana Islands|MP|MNP|580|ISO 3166-2:MP
 Norway|NO|NOR|578|ISO 3166-2:NO
 Oman|OM|OMN|512|ISO 3166-2:OM
 Pakistan|PK|PAK|586|ISO 3166-2:PK
 Palau|PW|PLW|585|ISO 3166-2:PW
 Palestinian Territory, Occupied|PS|PSE|275|ISO 3166-2:PS
 Panama|PA|PAN|591|ISO 3166-2:PA
 Papua New Guinea|PG|PNG|598|ISO 3166-2:PG
 Paraguay|PY|PRY|600|ISO 3166-2:PY
 Peru|PE|PER|604|ISO 3166-2:PE
 Philippines|PH|PHL|608|ISO 3166-2:PH
 Pitcairn|PN|PCN|612|ISO 3166-2:PN
 Poland|PL|POL|616|ISO 3166-2:PL
 Portugal|PT|PRT|620|ISO 3166-2:PT
 Puerto Rico|PR|PRI|630|ISO 3166-2:PR
 Qatar|QA|QAT|634|ISO 3166-2:QA
 Réunion|RE|REU|638|ISO 3166-2:RE
 Romania|RO|ROU|642|ISO 3166-2:RO
 Russian Federation|RU|RUS|643|ISO 3166-2:RU
 Rwanda|RW|RWA|646|ISO 3166-2:RW
 Saint Barthélemy|BL|BLM|652|ISO 3166-2:BL
 Saint Helena, Ascension and Tristan da Cunha|SH|SHN|654|ISO 3166-2:SH
 Saint Kitts and Nevis|KN|KNA|659|ISO 3166-2:KN
 Saint Lucia|LC|LCA|662|ISO 3166-2:LC
 Saint Martin (French part|MF|MAF|663|ISO 3166-2:MF
 Saint Pierre and Miquelon|PM|SPM|666|ISO 3166-2:PM
 Saint Vincent and the Grenadines|VC|VCT|670|ISO 3166-2:VC
 Samoa|WS|WSM|882|ISO 3166-2:WS
 San Marino|SM|SMR|674|ISO 3166-2:SM
 Sao Tome and Principe|ST|STP|678|ISO 3166-2:ST
 Saudi Arabia|SA|SAU|682|ISO 3166-2:SA
 Senegal|SN|SEN|686|ISO 3166-2:SN
 Serbia|RS|SRB|688|ISO 3166-2:RS
 Seychelles|SC|SYC|690|ISO 3166-2:SC
 Sierra Leone|SL|SLE|694|ISO 3166-2:SL
 Singapore|SG|SGP|702|ISO 3166-2:SG
 Sint Maarten (Dutch part|SX|SXM|534|ISO 3166-2:SX
 Slovakia|SK|SVK|703|ISO 3166-2:SK
 Slovenia|SI|SVN|705|ISO 3166-2:SI
 Solomon Islands|SB|SLB|090|ISO 3166-2:SB
 Somalia|SO|SOM|706|ISO 3166-2:SO
 South Africa|ZA|ZAF|710|ISO 3166-2:ZA
 South Georgia and the South Sandwich Islands|GS|SGS|239|ISO 3166-2:GS
 South Sudan|SS|SSD|728|ISO 3166-2:SS
 Spain|ES|ESP|724|ISO 3166-2:ES
 Sri Lanka|LK|LKA|144|ISO 3166-2:LK
 Sudan|SD|SDN|729|ISO 3166-2:SD
 Suriname|SR|SUR|740|ISO 3166-2:SR
 Svalbard and Jan Mayen|SJ|SJM|744|ISO 3166-2:SJ
 Swaziland|SZ|SWZ|748|ISO 3166-2:SZ
 Sweden|SE|SWE|752|ISO 3166-2:SE
 Switzerland|CH|CHE|756|ISO 3166-2:CH
 Syrian Arab Republic|SY|SYR|760|ISO 3166-2:SY
 Taiwan, Province of China|TW|TWN|158|ISO 3166-2:TW
 Tajikistan|TJ|TJK|762|ISO 3166-2:TJ
 Tanzania, United Republic of|TZ|TZA|834|ISO 3166-2:TZ
 Thailand|TH|THA|764|ISO 3166-2:TH
 Timor-Leste|TL|TLS|626|ISO 3166-2:TL
 Togo|TG|TGO|768|ISO 3166-2:TG
 Tokelau|TK|TKL|772|ISO 3166-2:TK
 Tonga|TO|TON|776|ISO 3166-2:TO
 Trinidad and Tobago|TT|TTO|780|ISO 3166-2:TT
 Tunisia|TN|TUN|788|ISO 3166-2:TN
 Turkey|TR|TUR|792|ISO 3166-2:TR
 Turkmenistan|TM|TKM|795|ISO 3166-2:TM
 Turks and Caicos Islands|TC|TCA|796|ISO 3166-2:TC
 Tuvalu|TV|TUV|798|ISO 3166-2:TV
 Uganda|UG|UGA|800|ISO 3166-2:UG
 Ukraine|UA|UKR|804|ISO 3166-2:UA
 United Arab Emirates|AE|ARE|784|ISO 3166-2:AE
 United Kingdom|GB|GBR|826|ISO 3166-2:GB
 United States|US|USA|840|ISO 3166-2:US
 United States Minor Outlying Islands|UM|UMI|581|ISO 3166-2:UM
 Uruguay|UY|URY|858|ISO 3166-2:UY
 Uzbekistan|UZ|UZB|860|ISO 3166-2:UZ
 Vanuatu|VU|VUT|548|ISO 3166-2:VU
 Venezuela, Bolivarian Republic of|VE|VEN|862|ISO 3166-2:VE
 Viet Nam|VN|VNM|704|ISO 3166-2:VN
 Virgin Islands, British|VG|VGB|092|ISO 3166-2:VG
 Virgin Islands, U.S|VI|VIR|850|ISO 3166-2:VI
 Wallis and Futuna|WF|WLF|876|ISO 3166-2:WF
 Western Sahara|EH|ESH|732|ISO 3166-2:EH
 Yemen|YE|YEM|887|ISO 3166-2:YE
 Zambia|ZM|ZMB|894|ISO 3166-2:ZM
 Zimbabwe|ZW|ZWE|716|ISO 3166-2:ZW
--- a/libs/guessit/ISO-639-2_utf-8.txt
+++ b/libs/guessit/ISO-639-2_utf-8.txt
@ -1,485 +0,0 @@
 aar||aa|Afar|afar
 abk||ab|Abkhazian|abkhaze
 ace|||Achinese|aceh
 ach|||Acoli|acoli
 ada|||Adangme|adangme
 ady|||Adyghe; Adygei|adyghé
 afa|||Afro-Asiatic languages|afro-asiatiques, langues
 afh|||Afrihili|afrihili
 afr||af|Afrikaans|afrikaans
 ain|||Ainu|aïnou
 aka||ak|Akan|akan
 akk|||Akkadian|akkadien
 alb|sqi|sq|Albanian|albanais
 ale|||Aleut|aléoute
 alg|||Algonquian languages|algonquines, langues
 alt|||Southern Altai|altai du Sud
 amh||am|Amharic|amharique
 ang|||English, Old (ca.450-1100)|anglo-saxon (ca.450-1100)
 anp|||Angika|angika
 apa|||Apache languages|apaches, langues
 ara||ar|Arabic|arabe
 arc|||Official Aramaic (700-300 BCE); Imperial Aramaic (700-300 BCE)|araméen d'empire (700-300 BCE)
 arg||an|Aragonese|aragonais
 arm|hye|hy|Armenian|arménien
 arn|||Mapudungun; Mapuche|mapudungun; mapuche; mapuce
 arp|||Arapaho|arapaho
 art|||Artificial languages|artificielles, langues
 arw|||Arawak|arawak
 asm||as|Assamese|assamais
 ast|||Asturian; Bable; Leonese; Asturleonese|asturien; bable; léonais; asturoléonais
 ath|||Athapascan languages|athapascanes, langues
 aus|||Australian languages|australiennes, langues
 ava||av|Avaric|avar
 ave||ae|Avestan|avestique
 awa|||Awadhi|awadhi
 aym||ay|Aymara|aymara
 aze||az|Azerbaijani|azéri
 bad|||Banda languages|banda, langues
 bai|||Bamileke languages|bamiléké, langues
 bak||ba|Bashkir|bachkir
 bal|||Baluchi|baloutchi
 bam||bm|Bambara|bambara
 ban|||Balinese|balinais
 baq|eus|eu|Basque|basque
 bas|||Basa|basa
 bat|||Baltic languages|baltes, langues
 bej|||Beja; Bedawiyet|bedja
 bel||be|Belarusian|biélorusse
 bem|||Bemba|bemba
 ben||bn|Bengali|bengali
 ber|||Berber languages|berbères, langues
 bho|||Bhojpuri|bhojpuri
 bih||bh|Bihari languages|langues biharis
 bik|||Bikol|bikol
 bin|||Bini; Edo|bini; edo
 bis||bi|Bislama|bichlamar
 bla|||Siksika|blackfoot
 bnt|||Bantu (Other)|bantoues, autres langues
 bos||bs|Bosnian|bosniaque
 bra|||Braj|braj
 bre||br|Breton|breton
 btk|||Batak languages|batak, langues
 bua|||Buriat|bouriate
 bug|||Buginese|bugi
 bul||bg|Bulgarian|bulgare
 bur|mya|my|Burmese|birman
 byn|||Blin; Bilin|blin; bilen
 cad|||Caddo|caddo
 cai|||Central American Indian languages|amérindiennes de L'Amérique centrale, langues
 car|||Galibi Carib|karib; galibi; carib
 cat||ca|Catalan; Valencian|catalan; valencien
 cau|||Caucasian languages|caucasiennes, langues
 ceb|||Cebuano|cebuano
 cel|||Celtic languages|celtiques, langues; celtes, langues
 cha||ch|Chamorro|chamorro
 chb|||Chibcha|chibcha
 che||ce|Chechen|tchétchène
 chg|||Chagatai|djaghataï
 chi|zho|zh|Chinese|chinois
 chk|||Chuukese|chuuk
 chm|||Mari|mari
 chn|||Chinook jargon|chinook, jargon
 cho|||Choctaw|choctaw
 chp|||Chipewyan; Dene Suline|chipewyan
 chr|||Cherokee|cherokee
 chu||cu|Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic|slavon d'église; vieux slave; slavon liturgique; vieux bulgare
 chv||cv|Chuvash|tchouvache
 chy|||Cheyenne|cheyenne
 cmc|||Chamic languages|chames, langues
 cop|||Coptic|copte
 cor||kw|Cornish|cornique
 cos||co|Corsican|corse
 cpe|||Creoles and pidgins, English based|créoles et pidgins basés sur l'anglais
 cpf|||Creoles and pidgins, French-based |créoles et pidgins basés sur le français
 cpp|||Creoles and pidgins, Portuguese-based |créoles et pidgins basés sur le portugais
 cre||cr|Cree|cree
 crh|||Crimean Tatar; Crimean Turkish|tatar de Crimé
 crp|||Creoles and pidgins |créoles et pidgins
 csb|||Kashubian|kachoube
 cus|||Cushitic languages|couchitiques, langues
 cze|ces|cs|Czech|tchèque
 dak|||Dakota|dakota
 dan||da|Danish|danois
 dar|||Dargwa|dargwa
 day|||Land Dayak languages|dayak, langues
 del|||Delaware|delaware
 den|||Slave (Athapascan)|esclave (athapascan)
 dgr|||Dogrib|dogrib
 din|||Dinka|dinka
 div||dv|Divehi; Dhivehi; Maldivian|maldivien
 doi|||Dogri|dogri
 dra|||Dravidian languages|dravidiennes, langues
 dsb|||Lower Sorbian|bas-sorabe
 dua|||Duala|douala
 dum|||Dutch, Middle (ca.1050-1350)|néerlandais moyen (ca. 1050-1350)
 dut|nld|nl|Dutch; Flemish|néerlandais; flamand
 dyu|||Dyula|dioula
 dzo||dz|Dzongkha|dzongkha
 efi|||Efik|efik
 egy|||Egyptian (Ancient)|égyptien
 eka|||Ekajuk|ekajuk
 elx|||Elamite|élamite
 eng||en|English|anglais
 enm|||English, Middle (1100-1500)|anglais moyen (1100-1500)
 epo||eo|Esperanto|espéranto
 est||et|Estonian|estonien
 ewe||ee|Ewe|éwé
 ewo|||Ewondo|éwondo
 fan|||Fang|fang
 fao||fo|Faroese|féroïen
 fat|||Fanti|fanti
 fij||fj|Fijian|fidjien
 fil|||Filipino; Pilipino|filipino; pilipino
 fin||fi|Finnish|finnois
 fiu|||Finno-Ugrian languages|finno-ougriennes, langues
 fon|||Fon|fon
 fre|fra|fr|French|français
 frm|||French, Middle (ca.1400-1600)|français moyen (1400-1600)
 fro|||French, Old (842-ca.1400)|français ancien (842-ca.1400)
 frr|||Northern Frisian|frison septentrional
 frs|||Eastern Frisian|frison oriental
 fry||fy|Western Frisian|frison occidental
 ful||ff|Fulah|peul
 fur|||Friulian|frioulan
 gaa|||Ga|ga
 gay|||Gayo|gayo
 gba|||Gbaya|gbaya
 gem|||Germanic languages|germaniques, langues
 geo|kat|ka|Georgian|géorgien
 ger|deu|de|German|allemand
 gez|||Geez|guèze
 gil|||Gilbertese|kiribati
 gla||gd|Gaelic; Scottish Gaelic|gaélique; gaélique écossais
 gle||ga|Irish|irlandais
 glg||gl|Galician|galicien
 glv||gv|Manx|manx; mannois
 gmh|||German, Middle High (ca.1050-1500)|allemand, moyen haut (ca. 1050-1500)
 goh|||German, Old High (ca.750-1050)|allemand, vieux haut (ca. 750-1050)
 gon|||Gondi|gond
 gor|||Gorontalo|gorontalo
 got|||Gothic|gothique
 grb|||Grebo|grebo
 grc|||Greek, Ancient (to 1453)|grec ancien (jusqu'à 1453)
 gre|ell|el|Greek, Modern (1453-)|grec moderne (après 1453)
 grn||gn|Guarani|guarani
 gsw|||Swiss German; Alemannic; Alsatian|suisse alémanique; alémanique; alsacien
 guj||gu|Gujarati|goudjrati
 gwi|||Gwich'in|gwich'in
 hai|||Haida|haida
 hat||ht|Haitian; Haitian Creole|haïtien; créole haïtien
 hau||ha|Hausa|haoussa
 haw|||Hawaiian|hawaïen
 heb||he|Hebrew|hébreu
 her||hz|Herero|herero
 hil|||Hiligaynon|hiligaynon
 him|||Himachali languages; Western Pahari languages|langues himachalis; langues paharis occidentales
 hin||hi|Hindi|hindi
 hit|||Hittite|hittite
 hmn|||Hmong; Mong|hmong
 hmo||ho|Hiri Motu|hiri motu
 hrv||hr|Croatian|croate
 hsb|||Upper Sorbian|haut-sorabe
 hun||hu|Hungarian|hongrois
 hup|||Hupa|hupa
 iba|||Iban|iban
 ibo||ig|Igbo|igbo
 ice|isl|is|Icelandic|islandais
 ido||io|Ido|ido
 iii||ii|Sichuan Yi; Nuosu|yi de Sichuan
 ijo|||Ijo languages|ijo, langues
 iku||iu|Inuktitut|inuktitut
 ile||ie|Interlingue; Occidental|interlingue
 ilo|||Iloko|ilocano
 ina||ia|Interlingua (International Auxiliary Language Association)|interlingua (langue auxiliaire internationale)
 inc|||Indic languages|indo-aryennes, langues
 ind||id|Indonesian|indonésien
 ine|||Indo-European languages|indo-européennes, langues
 inh|||Ingush|ingouche
 ipk||ik|Inupiaq|inupiaq
 ira|||Iranian languages|iraniennes, langues
 iro|||Iroquoian languages|iroquoises, langues
 ita||it|Italian|italien
 jav||jv|Javanese|javanais
 jbo|||Lojban|lojban
 jpn||ja|Japanese|japonais
 jpr|||Judeo-Persian|judéo-persan
 jrb|||Judeo-Arabic|judéo-arabe
 kaa|||Kara-Kalpak|karakalpak
 kab|||Kabyle|kabyle
 kac|||Kachin; Jingpho|kachin; jingpho
 kal||kl|Kalaallisut; Greenlandic|groenlandais
 kam|||Kamba|kamba
 kan||kn|Kannada|kannada
 kar|||Karen languages|karen, langues
 kas||ks|Kashmiri|kashmiri
 kau||kr|Kanuri|kanouri
 kaw|||Kawi|kawi
 kaz||kk|Kazakh|kazakh
 kbd|||Kabardian|kabardien
 kha|||Khasi|khasi
 khi|||Khoisan languages|khoïsan, langues
 khm||km|Central Khmer|khmer central
 kho|||Khotanese; Sakan|khotanais; sakan
 kik||ki|Kikuyu; Gikuyu|kikuyu
 kin||rw|Kinyarwanda|rwanda
 kir||ky|Kirghiz; Kyrgyz|kirghiz
 kmb|||Kimbundu|kimbundu
 kok|||Konkani|konkani
 kom||kv|Komi|kom
 kon||kg|Kongo|kongo
 kor||ko|Korean|coréen
 kos|||Kosraean|kosrae
 kpe|||Kpelle|kpellé
 krc|||Karachay-Balkar|karatchai balkar
 krl|||Karelian|carélien
 kro|||Kru languages|krou, langues
 kru|||Kurukh|kurukh
 kua||kj|Kuanyama; Kwanyama|kuanyama; kwanyama
 kum|||Kumyk|koumyk
 kur||ku|Kurdish|kurde
 kut|||Kutenai|kutenai
 lad|||Ladino|judéo-espagnol
 lah|||Lahnda|lahnda
 lam|||Lamba|lamba
 lao||lo|Lao|lao
 lat||la|Latin|latin
 lav||lv|Latvian|letton
 lez|||Lezghian|lezghien
 lim||li|Limburgan; Limburger; Limburgish|limbourgeois
 lin||ln|Lingala|lingala
 lit||lt|Lithuanian|lituanien
 lol|||Mongo|mongo
 loz|||Lozi|lozi
 ltz||lb|Luxembourgish; Letzeburgesch|luxembourgeois
 lua|||Luba-Lulua|luba-lulua
 lub||lu|Luba-Katanga|luba-katanga
 lug||lg|Ganda|ganda
 lui|||Luiseno|luiseno
 lun|||Lunda|lunda
 luo|||Luo (Kenya and Tanzania)|luo (Kenya et Tanzanie)
 lus|||Lushai|lushai
 mac|mkd|mk|Macedonian|macédonien
 mad|||Madurese|madourais
 mag|||Magahi|magahi
 mah||mh|Marshallese|marshall
 mai|||Maithili|maithili
 mak|||Makasar|makassar
 mal||ml|Malayalam|malayalam
 man|||Mandingo|mandingue
 mao|mri|mi|Maori|maori
 map|||Austronesian languages|austronésiennes, langues
 mar||mr|Marathi|marathe
 mas|||Masai|massaï
 may|msa|ms|Malay|malais
 mdf|||Moksha|moksa
 mdr|||Mandar|mandar
 men|||Mende|mendé
 mga|||Irish, Middle (900-1200)|irlandais moyen (900-1200)
 mic|||Mi'kmaq; Micmac|mi'kmaq; micmac
 min|||Minangkabau|minangkabau
 mis|||Uncoded languages|langues non codées
 mkh|||Mon-Khmer languages|môn-khmer, langues
 mlg||mg|Malagasy|malgache
 mlt||mt|Maltese|maltais
 mnc|||Manchu|mandchou
 mni|||Manipuri|manipuri
 mno|||Manobo languages|manobo, langues
 moh|||Mohawk|mohawk
 mon||mn|Mongolian|mongol
 mos|||Mossi|moré
 mul|||Multiple languages|multilingue
 mun|||Munda languages|mounda, langues
 mus|||Creek|muskogee
 mwl|||Mirandese|mirandais
 mwr|||Marwari|marvari
 myn|||Mayan languages|maya, langues
 myv|||Erzya|erza
 nah|||Nahuatl languages|nahuatl, langues
 nai|||North American Indian languages|nord-amérindiennes, langues
 nap|||Neapolitan|napolitain
 nau||na|Nauru|nauruan
 nav||nv|Navajo; Navaho|navaho
 nbl||nr|Ndebele, South; South Ndebele|ndébélé du Sud
 nde||nd|Ndebele, North; North Ndebele|ndébélé du Nord
 ndo||ng|Ndonga|ndonga
 nds|||Low German; Low Saxon; German, Low; Saxon, Low|bas allemand; bas saxon; allemand, bas; saxon, bas
 nep||ne|Nepali|népalais
 new|||Nepal Bhasa; Newari|nepal bhasa; newari
 nia|||Nias|nias
 nic|||Niger-Kordofanian languages|nigéro-kordofaniennes, langues
 niu|||Niuean|niué
 nno||nn|Norwegian Nynorsk; Nynorsk, Norwegian|norvégien nynorsk; nynorsk, norvégien
 nob||nb|Bokmål, Norwegian; Norwegian Bokmål|norvégien bokmål
 nog|||Nogai|nogaï; nogay
 non|||Norse, Old|norrois, vieux
 nor||no|Norwegian|norvégien
 nqo|||N'Ko|n'ko
 nso|||Pedi; Sepedi; Northern Sotho|pedi; sepedi; sotho du Nord
 nub|||Nubian languages|nubiennes, langues
 nwc|||Classical Newari; Old Newari; Classical Nepal Bhasa|newari classique
 nya||ny|Chichewa; Chewa; Nyanja|chichewa; chewa; nyanja
 nym|||Nyamwezi|nyamwezi
 nyn|||Nyankole|nyankolé
 nyo|||Nyoro|nyoro
 nzi|||Nzima|nzema
 oci||oc|Occitan (post 1500); Provençal|occitan (après 1500); provençal
 oji||oj|Ojibwa|ojibwa
 ori||or|Oriya|oriya
 orm||om|Oromo|galla
 osa|||Osage|osage
 oss||os|Ossetian; Ossetic|ossète
 ota|||Turkish, Ottoman (1500-1928)|turc ottoman (1500-1928)
 oto|||Otomian languages|otomi, langues
 paa|||Papuan languages|papoues, langues
 pag|||Pangasinan|pangasinan
 pal|||Pahlavi|pahlavi
 pam|||Pampanga; Kapampangan|pampangan
 pan||pa|Panjabi; Punjabi|pendjabi
 pap|||Papiamento|papiamento
 pau|||Palauan|palau
 peo|||Persian, Old (ca.600-400 B.C.)|perse, vieux (ca. 600-400 av. J.-C.)
 per|fas|fa|Persian|persan
 phi|||Philippine languages|philippines, langues
 phn|||Phoenician|phénicien
 pli||pi|Pali|pali
 pol||pl|Polish|polonais
 pon|||Pohnpeian|pohnpei
 por||pt|Portuguese|portugais
 pra|||Prakrit languages|prâkrit, langues
 pro|||Provençal, Old (to 1500)|provençal ancien (jusqu'à 1500)
 pus||ps|Pushto; Pashto|pachto
 qaa-qtz|||Reserved for local use|réservée à l'usage local
 que||qu|Quechua|quechua
 raj|||Rajasthani|rajasthani
 rap|||Rapanui|rapanui
 rar|||Rarotongan; Cook Islands Maori|rarotonga; maori des îles Cook
 roa|||Romance languages|romanes, langues
 roh||rm|Romansh|romanche
 rom|||Romany|tsigane
 rum|ron|ro|Romanian; Moldavian; Moldovan|roumain; moldave
 run||rn|Rundi|rundi
 rup|||Aromanian; Arumanian; Macedo-Romanian|aroumain; macédo-roumain
 rus||ru|Russian|russe
 sad|||Sandawe|sandawe
 sag||sg|Sango|sango
 sah|||Yakut|iakoute
 sai|||South American Indian (Other)|indiennes d'Amérique du Sud, autres langues
 sal|||Salishan languages|salishennes, langues
 sam|||Samaritan Aramaic|samaritain
 san||sa|Sanskrit|sanskrit
 sas|||Sasak|sasak
 sat|||Santali|santal
 scn|||Sicilian|sicilien
 sco|||Scots|écossais
 sel|||Selkup|selkoupe
 sem|||Semitic languages|sémitiques, langues
 sga|||Irish, Old (to 900)|irlandais ancien (jusqu'à 900)
 sgn|||Sign Languages|langues des signes
 shn|||Shan|chan
 sid|||Sidamo|sidamo
 sin||si|Sinhala; Sinhalese|singhalais
 sio|||Siouan languages|sioux, langues
 sit|||Sino-Tibetan languages|sino-tibétaines, langues
 sla|||Slavic languages|slaves, langues
 slo|slk|sk|Slovak|slovaque
 slv||sl|Slovenian|slovène
 sma|||Southern Sami|sami du Sud
 sme||se|Northern Sami|sami du Nord
 smi|||Sami languages|sames, langues
 smj|||Lule Sami|sami de Lule
 smn|||Inari Sami|sami d'Inari
 smo||sm|Samoan|samoan
 sms|||Skolt Sami|sami skolt
 sna||sn|Shona|shona
 snd||sd|Sindhi|sindhi
 snk|||Soninke|soninké
 sog|||Sogdian|sogdien
 som||so|Somali|somali
 son|||Songhai languages|songhai, langues
 sot||st|Sotho, Southern|sotho du Sud
 spa||es|Spanish; Castilian|espagnol; castillan
 srd||sc|Sardinian|sarde
 srn|||Sranan Tongo|sranan tongo
 srp||sr|Serbian|serbe
 srr|||Serer|sérère
 ssa|||Nilo-Saharan languages|nilo-sahariennes, langues
 ssw||ss|Swati|swati
 suk|||Sukuma|sukuma
 sun||su|Sundanese|soundanais
 sus|||Susu|soussou
 sux|||Sumerian|sumérien
 swa||sw|Swahili|swahili
 swe||sv|Swedish|suédois
 syc|||Classical Syriac|syriaque classique
 syr|||Syriac|syriaque
 tah||ty|Tahitian|tahitien
 tai|||Tai languages|tai, langues
 tam||ta|Tamil|tamoul
 tat||tt|Tatar|tatar
 tel||te|Telugu|télougou
 tem|||Timne|temne
 ter|||Tereno|tereno
 tet|||Tetum|tetum
 tgk||tg|Tajik|tadjik
 tgl||tl|Tagalog|tagalog
 tha||th|Thai|thaï
 tib|bod|bo|Tibetan|tibétain
 tig|||Tigre|tigré
 tir||ti|Tigrinya|tigrigna
 tiv|||Tiv|tiv
 tkl|||Tokelau|tokelau
 tlh|||Klingon; tlhIngan-Hol|klingon
 tli|||Tlingit|tlingit
 tmh|||Tamashek|tamacheq
 tog|||Tonga (Nyasa)|tonga (Nyasa)
 ton||to|Tonga (Tonga Islands)|tongan (Îles Tonga)
 tpi|||Tok Pisin|tok pisin
 tsi|||Tsimshian|tsimshian
 tsn||tn|Tswana|tswana
 tso||ts|Tsonga|tsonga
 tuk||tk|Turkmen|turkmène
 tum|||Tumbuka|tumbuka
 tup|||Tupi languages|tupi, langues
 tur||tr|Turkish|turc
 tut|||Altaic languages|altaïques, langues
 tvl|||Tuvalu|tuvalu
 twi||tw|Twi|twi
 tyv|||Tuvinian|touva
 udm|||Udmurt|oudmourte
 uga|||Ugaritic|ougaritique
 uig||ug|Uighur; Uyghur|ouïgour
 ukr||uk|Ukrainian|ukrainien
 umb|||Umbundu|umbundu
 und|||Undetermined|indéterminée
 urd||ur|Urdu|ourdou
 uzb||uz|Uzbek|ouszbek
 vai|||Vai|vaï
 ven||ve|Venda|venda
 vie||vi|Vietnamese|vietnamien
 vol||vo|Volapük|volapük
 vot|||Votic|vote
 wak|||Wakashan languages|wakashanes, langues
 wal|||Walamo|walamo
 war|||Waray|waray
 was|||Washo|washo
 wel|cym|cy|Welsh|gallois
 wen|||Sorbian languages|sorabes, langues
 wln||wa|Walloon|wallon
 wol||wo|Wolof|wolof
 xal|||Kalmyk; Oirat|kalmouk; oïrat
 xho||xh|Xhosa|xhosa
 yao|||Yao|yao
 yap|||Yapese|yapois
 yid||yi|Yiddish|yiddish
 yor||yo|Yoruba|yoruba
 ypk|||Yupik languages|yupik, langues
 zap|||Zapotec|zapotèque
 zbl|||Blissymbols; Blissymbolics; Bliss|symboles Bliss; Bliss
 zen|||Zenaga|zenaga
 zha||za|Zhuang; Chuang|zhuang; chuang
 znd|||Zande languages|zandé, langues
 zul||zu|Zulu|zoulou
 zun|||Zuni|zuni
 zxx|||No linguistic content; Not applicable|pas de contenu linguistique; non applicable
 zza|||Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki|zaza; dimili; dimli; kirdki; kirmanjki; zazaki
--- a/libs/guessit/init.py
+++ b/libs/guessit/init.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,70 +18,86 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 import pkg_resources
 from .__version__ import __version__
 __version__ = '0.6.2'
 __all__ = ['Guess', 'Language',
           'guess_file_info', 'guess_video_info',
-           'guess_movie_info', 'guess_episode_info']
+           'guess_movie_info', 'guess_episode_info',
           'default_options']
 # Do python3 detection before importing any other module, to be sure that
 # it will then always be available
 # with code from http://lucumr.pocoo.org/2011/1/22/forwards-compatible-python/
 import sys
-if sys.version_info[0] >= 3:
+if sys.version_info[0] >= 3:  # pragma: no cover
-    PY3 = True
+    PY2, PY3 = False, True
    unicode_text_type = str
    native_text_type = str
    base_text_type = str
    def u(x):
        return str(x)
    def s(x):
        return x
    class UnicodeMixin(object):
        __str__ = lambda x: x.__unicode__()
    import binascii
    def to_hex(x):
        return binascii.hexlify(x).decode('utf-8')
-else:
+else:   # pragma: no cover
-    PY3 = False
+    PY2, PY3 = True, False
-    __all__ = [ str(s) for s in __all__ ] # fix imports for python2
+    __all__ = [str(s) for s in __all__]  # fix imports for python2
    unicode_text_type = unicode
    native_text_type = str
    base_text_type = basestring
    def u(x):
        if isinstance(x, str):
            return x.decode('utf-8')
        if isinstance(x, list):
            return [u(s) for s in x]
        return unicode(x)
    def s(x):
        if isinstance(x, unicode):
            return x.encode('utf-8')
        if isinstance(x, list):
-            return [ s(y) for y in x ]
+            return [s(y) for y in x]
        if isinstance(x, tuple):
            return tuple(s(y) for y in x)
        if isinstance(x, dict):
            return dict((s(key), s(value)) for key, value in x.items())
        return x
    class UnicodeMixin(object):
        __str__ = lambda x: unicode(x).encode('utf-8')
    def to_hex(x):
        return x.encode('hex')
    range = xrange
-from guessit.guess import Guess, merge_all
+
 from guessit.guess import Guess, smart_merge
 from guessit.language import Language
 from guessit.matcher import IterativeMatcher
-from guessit.textutils import clean_string
+from guessit.textutils import clean_default, is_camel, from_camel
 import babelfish
 import os.path
 import logging
-import json
+from copy import deepcopy
 log = logging.getLogger(__name__)
 class NullHandler(logging.Handler):
    def emit(self, record):
        pass
@ -91,137 +107,193 @@ h = NullHandler()
 log.addHandler(h)
-def _guess_filename(filename, filetype):
+def _guess_filename(filename, options=None, **kwargs):
-    def find_nodes(tree, props):
+    mtree = _build_filename_mtree(filename, options=options, **kwargs)
-        """Yields all nodes containing any of the given props."""
+    if options.get('split_camel'):
-        if isinstance(props, base_text_type):
+        _add_camel_properties(mtree, options=options)
-            props = [props]
+    return mtree.matched()
        for node in tree.nodes():
            if any(prop in node.guess for prop in props):
                yield node
    def warning(title):
        log.warning('%s, guesses: %s - %s' % (title, m.nice_string(), m2.nice_string()))
        return m
-    mtree = IterativeMatcher(filename, filetype=filetype)
+def _build_filename_mtree(filename, options=None, **kwargs):
    mtree = IterativeMatcher(filename, options=options, **kwargs)
    second_pass_options = mtree.second_pass_options
    if second_pass_options:
        log.debug("Running 2nd pass")
        merged_options = dict(options)
        merged_options.update(second_pass_options)
        mtree = IterativeMatcher(filename, options=merged_options, **kwargs)
    return mtree
    m = mtree.matched()
-    second_pass_opts = []
+def _add_camel_properties(mtree, options=None, **kwargs):
-    second_pass_transfo_opts = {}
+    prop = 'title' if mtree.matched().get('type') != 'episode' else 'series'
    value = mtree.matched().get(prop)
    _guess_camel_string(mtree, value, options=options, skip_title=False, **kwargs)
-    # if there are multiple possible years found, we assume the first one is
+    for leaf in mtree.match_tree.unidentified_leaves():
-    # part of the title, reparse the tree taking this into account
+        value = leaf.value
-    years = set(n.value for n in find_nodes(mtree.match_tree, 'year'))
+        _guess_camel_string(mtree, value, options=options, skip_title=True, **kwargs)
    if len(years) >= 2:
        second_pass_opts.append('skip_first_year')
    to_skip_language_nodes = []
-    title_nodes = set(n for n in find_nodes(mtree.match_tree, ['title', 'series']))
+def _guess_camel_string(mtree, string, options=None, skip_title=False, **kwargs):
-    title_spans = {}
+    if string and is_camel(string):
-    for title_node in title_nodes:
+        log.debug('"%s" is camel cased. Try to detect more properties.' % (string,))
-        title_spans[title_node.span[0]] = title_node
+        uncameled_value = from_camel(string)
-        title_spans[title_node.span[1]] = title_node
+        merged_options = dict(options)
        if 'type' in mtree.match_tree.info:
            current_type = mtree.match_tree.info.get('type')
            if current_type and current_type != 'unknown':
                merged_options['type'] = current_type
        camel_tree = _build_filename_mtree(uncameled_value, options=merged_options, name_only=True, skip_title=skip_title, **kwargs)
        if len(camel_tree.matched()) > 0:
            mtree.matched().update(camel_tree.matched())
            return True
    return False
    for lang_key in ('language', 'subtitleLanguage'):
        langs = {}
        lang_nodes = set(n for n in find_nodes(mtree.match_tree, lang_key))
-        for lang_node in lang_nodes:
+def guess_video_metadata(filename):
-            lang = lang_node.guess.get(lang_key, None)
+    """Gets the video metadata properties out of a given file. The file needs to
-            if len(lang_node.value) > 3 and (lang_node.span[0] in title_spans.keys() or lang_node.span[1] in title_spans.keys()):
+    exist on the filesystem to be able to be analyzed. An empty guess is
-                # Language is next or before title, and is not a language code. Add to skip for 2nd pass.
+    returned otherwise.
-                # if filetype is subtitle and the language appears last, just before
+    You need to have the Enzyme python package installed for this to work."""
-                # the extension, then it is likely a subtitle language
+    result = Guess()
                parts = clean_string(lang_node.root.value).split()
                if m['type'] in ['moviesubtitle', 'episodesubtitle'] and (parts.index(lang_node.value) == len(parts) - 2):
                    continue
-                to_skip_language_nodes.append(lang_node)
+    def found(prop, value):
-            elif not lang in langs:
+        result[prop] = value
-                langs[lang] = lang_node
+        log.debug('Found with enzyme %s: %s' % (prop, value))
    # first get the size of the file, in bytes
    try:
        size = os.stat(filename).st_size
        found('fileSize', size)
    except Exception as e:
        log.error('Cannot get video file size: %s' % e)
        # file probably does not exist, we might as well return now
        return result
    # then get additional metadata from the file using enzyme, if available
    try:
        import enzyme
        with open(filename) as f:
            mkv = enzyme.MKV(f)
            found('duration', mkv.info.duration.total_seconds())
            if mkv.video_tracks:
                video_track = mkv.video_tracks[0]
                # resolution
                if video_track.height in (480, 720, 1080):
                    if video_track.interlaced:
                        found('screenSize', '%di' % video_track.height)
                    else:
-                # The same language was found. Keep the more confident one, and add others to skip for 2nd pass.
+                        found('screenSize', '%dp' % video_track.height)
                existing_lang_node = langs[lang]
                to_skip = None
                if existing_lang_node.guess.confidence('language') >= lang_node.guess.confidence('language'):
                    # lang_node is to remove
                    to_skip = lang_node
                else:
-                    # existing_lang_node is to remove
+                    # TODO: do we want this?
-                    langs[lang] = lang_node
+                    #found('screenSize', '%dx%d' % (video_track.width, video_track.height))
-                    to_skip = existing_lang_node
+                    pass
-                to_skip_language_nodes.append(to_skip)
+
                # video codec
                if video_track.codec_id == 'V_MPEG4/ISO/AVC':
                    found('videoCodec', 'h264')
                elif video_track.codec_id == 'V_MPEG4/ISO/SP':
                    found('videoCodec', 'DivX')
                elif video_track.codec_id == 'V_MPEG4/ISO/ASP':
                    found('videoCodec', 'XviD')
            else:
                log.warning('MKV has no video track')
            if mkv.audio_tracks:
                audio_track = mkv.audio_tracks[0]
                # audio codec
                if audio_track.codec_id == 'A_AC3':
                    found('audioCodec', 'AC3')
                elif audio_track.codec_id == 'A_DTS':
                    found('audioCodec', 'DTS')
                elif audio_track.codec_id == 'A_AAC':
                    found('audioCodec', 'AAC')
            else:
                log.warning('MKV has no audio track')
            if mkv.subtitle_tracks:
                embedded_subtitle_languages = set()
                for st in mkv.subtitle_tracks:
                    try:
                        if st.language:
                            lang = babelfish.Language.fromalpha3b(st.language)
                        elif st.name:
                            lang = babelfish.Language.fromname(st.name)
                        else:
                            lang = babelfish.Language('und')
                    except babelfish.Error:
                        lang = babelfish.Language('und')
                    embedded_subtitle_languages.add(lang)
                found('subtitleLanguage', embedded_subtitle_languages)
            else:
                log.debug('MKV has no subtitle track')
        return result
    except ImportError:
        log.error('Cannot get video file metadata, missing dependency: enzyme')
        log.error('Please install it from PyPI, by doing eg: pip install enzyme')
        return result
    except IOError as e:
        log.error('Could not open file: %s' % filename)
        log.error('Make sure it exists and is available for reading on the filesystem')
        log.error('Error: %s' % e)
        return result
    except enzyme.Error as e:
        log.error('Cannot guess video file metadata')
        log.error('enzyme.Error while reading file: %s' % filename)
        log.error('Error: %s' % e)
        return result
 default_options = {}
-    if to_skip_language_nodes:
+def guess_file_info(filename, info=None, options=None, **kwargs):
        second_pass_transfo_opts['guess_language'] = (
            ((), { 'skip': [ { 'node_idx': node.parent.node_idx,
                               'span': node.span }
                             for node in to_skip_language_nodes ] }))
    if second_pass_opts or second_pass_transfo_opts:
        # 2nd pass is needed
        log.info("Running 2nd pass with options: %s" % second_pass_opts)
        log.info("Transfo options: %s" % second_pass_transfo_opts)
        mtree = IterativeMatcher(filename, filetype=filetype,
                                 opts=second_pass_opts,
                                 transfo_opts=second_pass_transfo_opts)
    m = mtree.matched()
    if 'language' not in m and 'subtitleLanguage' not in m or 'title' not in m:
        return m
    # if we found some language, make sure we didn't cut a title or sth...
    mtree2 = IterativeMatcher(filename, filetype=filetype,
                              opts=['nolanguage', 'nocountry'])
    m2 = mtree2.matched()
    if m.get('title') != m2.get('title'):
        title = next(find_nodes(mtree.match_tree, 'title'))
        title2 = next(find_nodes(mtree2.match_tree, 'title'))
        # if a node is in an explicit group, then the correct title is probably
        # the other one
        if title.root.node_at(title.node_idx[:2]).is_explicit():
            return m2
        elif title2.root.node_at(title2.node_idx[:2]).is_explicit():
            return m
    return m
 def guess_file_info(filename, filetype='autodetect', info=None):
    """info can contain the names of the various plugins, such as 'filename' to
    detect filename info, or 'hash_md5' to get the md5 hash of the file.
-    >>> guess_file_info('tests/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1'])
+    >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
-    {'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'}
+    >>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1'])
    >>> g['hash_md5'], g['hash_sha1']
    ('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
    """
    info = info or 'filename'
    options = options or {}
    if default_options:
        merged_options = deepcopy(default_options)
        merged_options.update(options)
        options = merged_options
    result = []
    hashers = []
    # Force unicode as soon as possible
    filename = u(filename)
    if info is None:
        info = ['filename']
    if isinstance(info, base_text_type):
        info = [info]
    for infotype in info:
        if infotype == 'filename':
-            result.append(_guess_filename(filename, filetype))
+            result.append(_guess_filename(filename, options, **kwargs))
        elif infotype == 'hash_mpc':
            from guessit.hash_mpc import hash_file
            try:
-                result.append(Guess({'hash_mpc': hash_file(filename)},
+                result.append(Guess({infotype: hash_file(filename)},
                                    confidence=1.0))
            except Exception as e:
                log.warning('Could not compute MPC-style hash because: %s' % e)
@ -229,7 +301,7 @@ def guess_file_info(filename, filetype='autodetect', info=None):
        elif infotype == 'hash_ed2k':
            from guessit.hash_ed2k import hash_file
            try:
-                result.append(Guess({'hash_ed2k': hash_file(filename)},
+                result.append(Guess({infotype: hash_file(filename)},
                                    confidence=1.0))
            except Exception as e:
                log.warning('Could not compute ed2k hash because: %s' % e)
@ -243,6 +315,11 @@ def guess_file_info(filename, filetype='autodetect', info=None):
            except AttributeError:
                log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname)
        elif infotype == 'video':
            g = guess_video_metadata(filename)
            if g:
                result.append(g)
        else:
            log.warning('Invalid infotype: %s' % infotype)
@ -265,25 +342,18 @@ def guess_file_info(filename, filetype='autodetect', info=None):
        except Exception as e:
            log.warning('Could not compute hash because: %s' % e)
-    result = merge_all(result)
+    result = smart_merge(result)
    # last minute adjustments
    # if country is in the guessed properties, make it part of the filename
    if 'series' in result and 'country' in result:
        result['series'] += ' (%s)' % result['country'].alpha2.upper()
    return result
-def guess_video_info(filename, info=None):
+def guess_video_info(filename, info=None, options=None, **kwargs):
-    return guess_file_info(filename, 'autodetect', info)
+    return guess_file_info(filename, info=info, options=options, type='video', **kwargs)
-def guess_movie_info(filename, info=None):
+def guess_movie_info(filename, info=None, options=None, **kwargs):
-    return guess_file_info(filename, 'movie', info)
+    return guess_file_info(filename, info=info, options=options, type='movie', **kwargs)
-def guess_episode_info(filename, info=None):
+def guess_episode_info(filename, info=None, options=None, **kwargs):
-    return guess_file_info(filename, 'episode', info)
+    return guess_file_info(filename, info=info, options=options, type='episode', **kwargs)
--- a/libs/guessit/main.py
+++ b/libs/guessit/main.py
@ -2,7 +2,8 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 # Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,29 +19,120 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
-from __future__ import print_function
+from collections import defaultdict
 from guessit import u
 from guessit import slogging, guess_file_info
 from optparse import OptionParser
 import logging
 import sys
 import os
-import locale
+
 from guessit import PY2, u, guess_file_info, __version__
 from guessit.options import get_opts
 from guessit.__version__ import __version__
-def detect_filename(filename, filetype, info=['filename'], advanced = False):
+def guess_file(filename, info='filename', options=None, **kwargs):
    options = options or {}
    filename = u(filename)
    if not options.get('yaml') and not options.get('show_property'):
        print('For:', filename)
-    print('GuessIt found:', guess_file_info(filename, filetype, info).nice_string(advanced))
+    guess = guess_file_info(filename, info, options, **kwargs)
    if not options.get('unidentified'):
        try:
            del guess['unidentified']
        except KeyError:
            pass
    if options.get('show_property'):
        print(guess.get(options.get('show_property'), ''))
        return
    if options.get('yaml'):
        import yaml
        for k, v in guess.items():
            if isinstance(v, list) and len(v) == 1:
                guess[k] = v[0]
        ystr = yaml.safe_dump({filename: dict(guess)}, default_flow_style=False)
        i = 0
        for yline in ystr.splitlines():
            if i == 0:
                print("? " + yline[:-1])
            elif i == 1:
                print(":" + yline[1:])
            else:
                print(yline)
            i += 1
        return
    print('GuessIt found:', guess.nice_string(options.get('advanced')))
-def run_demo(episodes=True, movies=True, advanced=False):
+def _supported_properties():
    all_properties = defaultdict(list)
    transformers_properties = []
    from guessit.plugins import transformers
    for transformer in transformers.all_transformers():
        supported_properties = transformer.supported_properties()
        transformers_properties.append((transformer, supported_properties))
        if isinstance(supported_properties, dict):
            for property_name, possible_values in supported_properties.items():
                all_properties[property_name].extend(possible_values)
        else:
            for property_name in supported_properties:
                all_properties[property_name] # just make sure it exists
    return all_properties, transformers_properties
 def display_transformers():
    print('GuessIt transformers:')
    _, transformers_properties = _supported_properties()
    for transformer, _ in transformers_properties:
        print('[@] %s (%s)' % (transformer.name, transformer.priority))
 def display_properties(options):
    values = options.values
    transformers = options.transformers
    name_only = options.name_only
    print('GuessIt properties:')
    all_properties, transformers_properties = _supported_properties()
    if name_only:
        # the 'container' property does not apply when using the --name-only
        # option
        del all_properties['container']
    if transformers:
        for transformer, properties_list in transformers_properties:
            print('[@] %s (%s)' % (transformer.name, transformer.priority))
            for property_name in properties_list:
                property_values = all_properties.get(property_name)
                print('  [+] %s' % (property_name,))
                if property_values and values:
                    _display_property_values(property_name, indent=4)
    else:
        properties_list = sorted(all_properties.keys())
        for property_name in properties_list:
            property_values = all_properties.get(property_name)
            print('  [+] %s' % (property_name,))
            if property_values and values:
                _display_property_values(property_name, indent=4)
 def _display_property_values(property_name, indent=2):
    all_properties, _ = _supported_properties()
    property_values = all_properties.get(property_name)
    for property_value in property_values:
        print(indent * ' ' + '[!] %s' % (property_value,))
 def run_demo(episodes=True, movies=True, options=None):
    # NOTE: tests should not be added here but rather in the tests/ folder
    #       this is just intended as a quick example
    if episodes:
-        testeps = [ 'Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi',
+        testeps = ['Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi',
                   'Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi',
                   'Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.[tvu.org.ru].avi',
                   'Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi',
@ -48,22 +140,20 @@ def run_demo(episodes=True, movies=True, advanced=False):
                   'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg',
                   'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi',
                   'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi',
-                    'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi'
+                   'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi']
                    ]
        for f in testeps:
-            print('-'*80)
+            print('-' * 80)
-            detect_filename(f, filetype='episode', advanced=advanced)
+            guess_file(f, options=options, type='episode')
    if movies:
-        testmovies = [ 'Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv',
+        testmovies = ['Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv',
                      'Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi',
                      'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi',
                      'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv',
                      'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv',
-                       'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi', # FIXME: PROPER and R5 get overwritten
+                      'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi',
-                       '[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv', # FIXME: title gets overwritten
+                      '[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv',
                      'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi',
                      'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt',
                      'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv',
@ -79,48 +169,115 @@ def run_demo(episodes=True, movies=True, advanced=False):
                      ]
        for f in testmovies:
-            print('-'*80)
+            print('-' * 80)
-            detect_filename(f, filetype = 'movie', advanced = advanced)
+            guess_file(f, options=options, type='movie')
-def main():
+def submit_bug(filename, options):
-    slogging.setupLogging()
+    import requests  # only import when needed
    from requests.exceptions import RequestException
    try:
        opts = dict((k, v) for k, v in options.__dict__.items()
                    if v and k != 'submit_bug')
        r = requests.post('http://localhost:5000/bugs', {'filename': filename,
                                                         'version': __version__,
                                                         'options': str(opts)})
        if r.status_code == 200:
            print('Successfully submitted file: %s' % r.text)
        else:
            print('Could not submit bug at the moment, please try again later.')
    except RequestException as e:
        print('Could not submit bug at the moment, please try again later.')
 def main(args=None, setup_logging=True):
    if setup_logging:
        from guessit import slogging
        slogging.setup_logging()
    if PY2:  # pragma: no cover
        import codecs
        import locale
        import sys
        # see http://bugs.python.org/issue2128
-    if sys.version_info.major < 3 and os.name == 'nt':        
+        if os.name == 'nt':
            for i, a in enumerate(sys.argv):
                sys.argv[i] = a.decode(locale.getpreferredencoding())
-    parser = OptionParser(usage = 'usage: %prog [options] file1 [file2...]')
+        # see https://github.com/wackou/guessit/issues/43
-    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+        # and http://stackoverflow.com/questions/4545661/unicodedecodeerror-when-redirecting-to-file
-                      help = 'display debug output')
+        # Wrap sys.stdout into a StreamWriter to allow writing unicode.
-    parser.add_option('-i', '--info', dest = 'info', default = 'filename',
+        sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
                      help = 'the desired information type: filename, hash_mpc or a hash from python\'s '
                             'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
                             'them, comma-separated')
    parser.add_option('-t', '--type', dest = 'filetype', default = 'autodetect',
                      help = 'the suggested file type: movie, episode or autodetect')
    parser.add_option('-a', '--advanced', dest = 'advanced', action='store_true', default = False,
                  help = 'display advanced information for filename guesses, as json output')
    parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False,
                      help = 'run a few builtin tests instead of analyzing a file')
-    options, args = parser.parse_args()
+    from guessit.plugins import transformers
    if args:
        options = get_opts().parse_args(args)
    else:  # pragma: no cover
        options = get_opts().parse_args()
    if options.verbose:
-        logging.getLogger('guessit').setLevel(logging.DEBUG)
+        logging.getLogger().setLevel(logging.DEBUG)
    help_required = True
    if options.properties or options.values:
        display_properties(options)
        help_required = False
    elif options.transformers:
        display_transformers()
        help_required = False
    if options.demo:
-        run_demo(episodes=True, movies=True, advanced=options.advanced)
+        run_demo(episodes=True, movies=True, options=vars(options))
-    else:
+        help_required = False
        if args:
            for filename in args:
                detect_filename(filename,
                                filetype = options.filetype,
                                info = options.info.split(','),
                                advanced = options.advanced)
    if options.version:
        print('+-------------------------------------------------------+')
        print('+                   GuessIt ' + __version__ + (28-len(__version__)) * ' ' + '+')
        print('+-------------------------------------------------------+')
        print('|      Please report any bug or feature request at      |')
        print('|       https://github.com/wackou/guessit/issues.       |')
        print('+-------------------------------------------------------+')
        help_required = False
    if options.yaml:
        try:
            import yaml, babelfish
            def default_representer(dumper, data):
                return dumper.represent_str(str(data))
            yaml.SafeDumper.add_representer(babelfish.Language, default_representer)
            yaml.SafeDumper.add_representer(babelfish.Country, default_representer)
        except ImportError:  # pragma: no cover
            print('PyYAML not found. Using default output.')
    filenames = []
    if options.filename:
        filenames.extend(options.filename)
    if options.input_file:
        input_file = open(options.input_file, 'r')
        try:
            filenames.extend([line.strip() for line in input_file.readlines()])
        finally:
            input_file.close()
    filenames = filter(lambda f: f, filenames)
    if filenames:
        help_required = False
        if options.submit_bug:
            for filename in filenames:
                submit_bug(filename, options)
        else:
-            parser.print_help()
+            for filename in filenames:
                guess_file(filename,
                           info=options.info.split(','),
                           options=vars(options))
    if help_required:  # pragma: no cover
        get_opts().print_help()
 if __name__ == '__main__':
    main()
--- a/libs/guessit/version.py
+++ b/libs/guessit/version.py
@ -0,0 +1,20 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 __version__ = '0.10.2.dev0'
--- a/libs/guessit/containers.py
+++ b/libs/guessit/containers.py
@ -0,0 +1,771 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 # Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from .patterns import compile_pattern, sep
 from . import base_text_type
 from .guess import Guess
 import types
 def _get_span(prop, match):
    """Retrieves span for a match"""
    if not prop.global_span and match.re.groups:
        start = None
        end = None
        for i in range(1, match.re.groups + 1):
            span = match.span(i)
            if start is None or span[0] < start:
                start = span[0]
            if end is None or span[1] > end:
                end = span[1]
        return start, end
    else:
        return match.span()
        start = span[0]
        end = span[1]
 def _trim_span(span, value, blanks = sep):
    start, end = span
    for i in range(0, len(value)):
        if value[i] in blanks:
            start += 1
        else:
            break
    for i in reversed(range(0, len(value))):
        if value[i] in blanks:
            end -= 1
        else:
            break
    if end <= start:
        return -1, -1
    return start, end
 def _get_groups(compiled_re):
    """
    Retrieves groups from re
    :return: list of group names
    """
    if compiled_re.groups:
        indexgroup = {}
        for k, i in compiled_re.groupindex.items():
            indexgroup[i] = k
        ret = []
        for i in range(1, compiled_re.groups + 1):
            ret.append(indexgroup.get(i, i))
        return ret
    else:
        return [None]
 class NoValidator(object):
    def validate(self, prop, string, node, match, entry_start, entry_end):
        return True
 class LeftValidator(object):
    """Make sure our match is starting by separator, or by another entry"""
    def validate(self, prop, string, node, match, entry_start, entry_end):
        span = _get_span(prop, match)
        span = _trim_span(span, string[span[0]:span[1]])
        start, end = span
        sep_start = start <= 0 or string[start - 1] in sep
        start_by_other = start in entry_end
        if not sep_start and not start_by_other:
            return False
        return True
 class RightValidator(object):
    """Make sure our match is ended by separator, or by another entry"""
    def validate(self, prop, string, node, match, entry_start, entry_end):
        span = _get_span(prop, match)
        span = _trim_span(span, string[span[0]:span[1]])
        start, end = span
        sep_end = end >= len(string) or string[end] in sep
        end_by_other = end in entry_start
        if not sep_end and not end_by_other:
            return False
        return True
 class ChainedValidator(object):
    def __init__(self, *validators):
        self._validators = validators
    def validate(self, prop, string, node, match, entry_start, entry_end):
        for validator in self._validators:
            if not validator.validate(prop, string, node, match, entry_start, entry_end):
                return False
        return True
 class SameKeyValidator(object):
    def __init__(self, validator_function):
        self.validator_function = validator_function
    def validate(self, prop, string, node, match, entry_start, entry_end):
        for key in prop.keys:
            for same_value_leaf in node.root.leaves_containing(key):
                ret = self.validator_function(same_value_leaf, key, prop, string, node, match, entry_start, entry_end)
                if ret is not None:
                    return ret
        return True
 class OnlyOneValidator(SameKeyValidator):
    def __init__(self):
        super(OnlyOneValidator, self).__init__(lambda same_value_leaf, key, prop, string, node, match, entry_start, entry_end: False)
 class DefaultValidator(object):
    """Make sure our match is surrounded by separators, or by another entry"""
    def validate(self, prop, string, node, match, entry_start, entry_end):
        span = _get_span(prop, match)
        span = _trim_span(span, string[span[0]:span[1]])
        start, end = span
        sep_start = start <= 0 or string[start - 1] in sep
        sep_end = end >= len(string) or string[end] in sep
        start_by_other = start in entry_end
        end_by_other = end in entry_start
        if (sep_start or start_by_other) and (sep_end or end_by_other):
            return True
        return False
 class FunctionValidator(object):
    def __init__(self, function):
        self.function = function
    def validate(self, prop, string, node, match, entry_start, entry_end):
        return self.function(prop, string, node, match, entry_start, entry_end)
 class FormatterValidator(object):
    def __init__(self, group_name=None, formatted_validator=None):
        self.group_name = group_name
        self.formatted_validator = formatted_validator
    def validate(self, prop, string, node, match, entry_start, entry_end):
        if self.group_name:
            formatted = prop.format(match.group(self.group_name), self.group_name)
        else:
            formatted = prop.format(match.group())
        if self.formatted_validator:
            return self.formatted_validator(formatted)
        else:
            return formatted
 def _get_positions(prop, string, node, match, entry_start, entry_end):
    span = match.span()
    start = span[0]
    end = span[1]
    at_start = True
    at_end = True
    while start > 0:
        start -= 1
        if string[start] not in sep:
            at_start = False
            break
    while end < len(string) - 1:
        end += 1
        if string[end] not in sep:
            at_end = False
            break
    return at_start, at_end
 class WeakValidator(DefaultValidator):
    """Make sure our match is surrounded by separators and is the first or last element in the string"""
    def validate(self, prop, string, node, match, entry_start, entry_end):
        if super(WeakValidator, self).validate(prop, string, node, match, entry_start, entry_end):
            at_start, at_end = _get_positions(prop, string, node, match, entry_start, entry_end)
            return at_start or at_end
        return False
 class NeighborValidator(DefaultValidator):
    """Make sure the node is next another one"""
    def validate(self, prop, string, node, match, entry_start, entry_end):
        at_start, at_end = _get_positions(prop, string, node, match, entry_start, entry_end)
        if at_start:
            previous_leaf = node.root.previous_leaf(node)
            if previous_leaf is not None:
                return True
        if at_end:
            next_leaf = node.root.next_leaf(node)
            if next_leaf is not None:
                return True
        return False
 class LeavesValidator(DefaultValidator):
    def __init__(self, lambdas=None, previous_lambdas=None, next_lambdas=None, both_side=False, default_=True):
        self.previous_lambdas = previous_lambdas if previous_lambdas is not None else []
        self.next_lambdas = next_lambdas if next_lambdas is not None else []
        if lambdas:
            self.previous_lambdas.extend(lambdas)
            self.next_lambdas.extend(lambdas)
        self.both_side = both_side
        self.default_ = default_
    """Make sure our match is surrounded by separators and validates defined lambdas"""
    def validate(self, prop, string, node, match, entry_start, entry_end):
        if self.default_:
            super_ret = super(LeavesValidator, self).validate(prop, string, node, match, entry_start, entry_end)
        else:
            super_ret = True
        if not super_ret:
            return False
        previous_ = self._validate_previous(prop, string, node, match, entry_start, entry_end)
        next_ = self._validate_next(prop, string, node, match, entry_start, entry_end)
        if previous_ is None and next_ is None:
            return super_ret
        if self.both_side:
            return previous_ and next_
        else:
            return previous_ or next_
    def _validate_previous(self, prop, string, node, match, entry_start, entry_end):
        if self.previous_lambdas:
            for leaf in node.root.previous_leaves(node):
                for lambda_ in self.previous_lambdas:
                    ret = self._check_rule(lambda_, leaf)
                    if ret is not None:
                        return ret
            return False
    def _validate_next(self, prop, string, node, match, entry_start, entry_end):
        if self.next_lambdas:
            for leaf in node.root.next_leaves(node):
                for lambda_ in self.next_lambdas:
                    ret = self._check_rule(lambda_, leaf)
                    if ret is not None:
                        return ret
            return False
    def _check_rule(self, lambda_, previous_leaf):
        return lambda_(previous_leaf)
 class _Property:
    """Represents a property configuration."""
    def __init__(self, keys=None, pattern=None, canonical_form=None, canonical_from_pattern=True, confidence=1.0, enhance=True, global_span=False, validator=DefaultValidator(), formatter=None, disabler=None, confidence_lambda=None):
        """
        :param keys: Keys of the property (format, screenSize, ...)
        :type keys: string
        :param canonical_form: Unique value of the property (DVD, 720p, ...)
        :type canonical_form: string
        :param pattern: Regexp pattern
        :type pattern: string
        :param confidence: confidence
        :type confidence: float
        :param enhance: enhance the pattern
        :type enhance: boolean
        :param global_span: if True, the whole match span will used to create the Guess.
                            Else, the span from the capturing groups will be used.
        :type global_span: boolean
        :param validator: Validator to use
        :type validator: :class:`DefaultValidator`
        :param formatter: Formater to use
        :type formatter: function
        """
        if isinstance(keys, list):
            self.keys = keys
        elif isinstance(keys, base_text_type):
            self.keys = [keys]
        else:
            self.keys = []
        self.canonical_form = canonical_form
        if pattern is not None:
            self.pattern = pattern
        else:
            self.pattern = canonical_form
        if self.canonical_form is None and canonical_from_pattern:
            self.canonical_form = self.pattern
        self.compiled = compile_pattern(self.pattern, enhance=enhance)
        for group_name in _get_groups(self.compiled):
            if isinstance(group_name, base_text_type) and not group_name in self.keys:
                self.keys.append(group_name)
        if not self.keys:
            raise ValueError("No property key is defined")
        self.confidence = confidence
        self.confidence_lambda = confidence_lambda
        self.global_span = global_span
        self.validator = validator
        self.formatter = formatter
        self.disabler = disabler
    def disabled(self, options):
        if self.disabler:
            return self.disabler(options)
        return False
    def format(self, value, group_name=None):
        """Retrieves the final value from re group match value"""
        formatter = None
        if isinstance(self.formatter, dict):
            formatter = self.formatter.get(group_name)
            if formatter is None and group_name is not None:
                formatter = self.formatter.get(None)
        else:
            formatter = self.formatter
        if isinstance(formatter, types.FunctionType):
            return formatter(value)
        elif formatter is not None:
            return formatter.format(value)
        return value
    def __repr__(self):
        return "%s: %s" % (self.keys, self.canonical_form if self.canonical_form else self.pattern)
 class PropertiesContainer(object):
    def __init__(self, **kwargs):
        self._properties = []
        self.default_property_kwargs = kwargs
    def unregister_property(self, name, *canonical_forms):
        """Unregister a property canonical forms
        If canonical_forms are specified, only those values will be unregistered
        :param name: Property name to unregister
        :type name: string
        :param canonical_forms: Values to unregister
        :type canonical_forms: varargs of string
        """
        _properties = [prop for prop in self._properties if prop.name == name and (not canonical_forms or prop.canonical_form in canonical_forms)]
    def register_property(self, name, *patterns, **property_params):
        """Register property with defined canonical form and patterns.
        :param name: name of the property (format, screenSize, ...)
        :type name: string
        :param patterns: regular expression patterns to register for the property canonical_form
        :type patterns: varargs of string
        """
        properties = []
        for pattern in patterns:
            params = dict(self.default_property_kwargs)
            params.update(property_params)
            if isinstance(pattern, dict):
                params.update(pattern)
                prop = _Property(name, **params)
            else:
                prop = _Property(name, pattern, **params)
            self._properties.append(prop)
            properties.append(prop)
        return properties
    def register_canonical_properties(self, name, *canonical_forms, **property_params):
        """Register properties from their canonical forms.
        :param name: name of the property (releaseGroup, ...)
        :type name: string
        :param canonical_forms: values of the property ('ESiR', 'WAF', 'SEPTiC', ...)
        :type canonical_forms: varargs of strings
        """
        properties = []
        for canonical_form in canonical_forms:
            params = dict(property_params)
            params['canonical_form'] = canonical_form
            properties.extend(self.register_property(name, canonical_form, **property_params))
        return properties
    def unregister_all_properties(self):
        """Unregister all defined properties"""
        self._properties.clear()
    def find_properties(self, string, node, options, name=None, validate=True, re_match=False, sort=True, multiple=False):
        """Find all distinct properties for given string
        If no capturing group is defined in the property, value will be grabbed from the entire match.
        If one ore more unnamed capturing group is defined in the property, first capturing group will be used.
        If named capturing group are defined in the property, they will be returned as property key.
        If validate, found properties will be validated by their defined validator
        If re_match, re.match will be used instead of re.search.
        if sort, found properties will be sorted from longer match to shorter match.
        If multiple is False and multiple values are found for the same property, the more confident one will be returned.
        If multiple is False and multiple values are found for the same property and the same confidence, the longer will be returned.
        :param string: input string
        :type string: string
        :param node: current node of the matching tree
        :type node: :class:`guessit.matchtree.MatchTree`
        :param name: name of property to find
        :type name: string
        :param re_match: use re.match instead of re.search
        :type re_match: bool
        :param multiple: Allows multiple property values to be returned
        :type multiple: bool
        :return: found properties
        :rtype: list of tuples (:class:`_Property`, match, list of tuples (property_name, tuple(value_start, value_end)))
        :see: `_Property`
        :see: `register_property`
        :see: `register_canonical_properties`
        """
        entry_start = {}
        entry_end = {}
        entries = []
        duplicate_matches = {}
        ret = []
        if not string.strip():
            return ret
        # search all properties
        for prop in self.get_properties(name):
            if not prop.disabled(options):
                valid_match = None
                if re_match:
                    match = prop.compiled.match(string)
                    if match:
                        entries.append((prop, match))
                else:
                    matches = list(prop.compiled.finditer(string))
                    duplicate_matches[prop] = matches
                    for match in matches:
                        entries.append((prop, match))
        for prop, match in entries:
            # compute confidence
            if prop.confidence_lambda:
                computed_confidence = prop.confidence_lambda(match)
                if computed_confidence is not None:
                    prop.confidence = computed_confidence
        if validate:
            # compute entries start and ends
            for prop, match in entries:
                start, end = _get_span(prop, match)
                if start not in entry_start:
                    entry_start[start] = [prop]
                else:
                    entry_start[start].append(prop)
                if end not in entry_end:
                    entry_end[end] = [prop]
                else:
                    entry_end[end].append(prop)
            # remove invalid values
            while True:
                invalid_entries = []
                for entry in entries:
                    prop, match = entry
                    if not prop.validator.validate(prop, string, node, match, entry_start, entry_end):
                        invalid_entries.append(entry)
                if not invalid_entries:
                    break
                for entry in invalid_entries:
                    prop, match = entry
                    entries.remove(entry)
                    prop_duplicate_matches = duplicate_matches.get(prop)
                    if prop_duplicate_matches:
                        prop_duplicate_matches.remove(match)
                    invalid_span = _get_span(prop, match)
                    start = invalid_span[0]
                    end = invalid_span[1]
                    entry_start[start].remove(prop)
                    if not entry_start.get(start):
                        del entry_start[start]
                    entry_end[end].remove(prop)
                    if not entry_end.get(end):
                        del entry_end[end]
        for prop, prop_duplicate_matches in duplicate_matches.items():
            # Keeping the last valid match.
            # Needed for the.100.109.hdtv-lol.mp4
            for duplicate_match in prop_duplicate_matches[:-1]:
                entries.remove((prop, duplicate_match))
        if multiple:
            ret = entries
        else:
            # keep only best match if multiple values where found
            entries_dict = {}
            for entry in entries:
                for key in prop.keys:
                    if key not in entries_dict:
                        entries_dict[key] = []
                    entries_dict[key].append(entry)
            for key_entries in entries_dict.values():
                if multiple:
                    for entry in key_entries:
                        ret.append(entry)
                else:
                    best_ret = {}
                    best_prop, best_match = None, None
                    if len(key_entries) == 1:
                        best_prop, best_match = key_entries[0]
                    else:
                        for prop, match in key_entries:
                            start, end = _get_span(prop, match)
                            if not best_prop or \
                            best_prop.confidence < best_prop.confidence or \
                            best_prop.confidence == best_prop.confidence and \
                            best_match.span()[1] - best_match.span()[0] < match.span()[1] - match.span()[0]:
                                best_prop, best_match = prop, match
                    best_ret[best_prop] = best_match
                    for prop, match in best_ret.items():
                        ret.append((prop, match))
        if sort:
            def _sorting(x):
                _, x_match = x
                x_start, x_end = x_match.span()
                return x_start - x_end
            ret.sort(key=_sorting)
        return ret
    def as_guess(self, found_properties, input=None, filter_=None, sep_replacement=None, multiple=False, *args, **kwargs):
        if filter_ is None:
            filter_ = lambda property, *args, **kwargs: True
        guesses = [] if multiple else None
        for prop, match in found_properties:
            first_key = None
            for key in prop.keys:
                # First property key will be used as base for effective name
                if isinstance(key, base_text_type):
                    if first_key is None:
                        first_key = key
                        break
            property_name = first_key if first_key else None
            span = _get_span(prop, match)
            guess = Guess(confidence=prop.confidence, input=input, span=span, prop=property_name)
            groups = _get_groups(match.re)
            for group_name in groups:
                name = group_name if isinstance(group_name, base_text_type) else property_name if property_name not in groups else None
                if name:
                    value = self._effective_prop_value(prop, group_name, input, match.span(group_name) if group_name else match.span(), sep_replacement)
                    if not value is None:
                        is_string = isinstance(value, base_text_type)
                        if not is_string or is_string and value:  # Keep non empty strings and other defined objects
                            if isinstance(value, dict):
                                for k, v in value.items():
                                    if k is None:
                                        k = name
                                    guess[k] = v
                            else:
                                if name in guess:
                                    if not isinstance(guess[name], list):
                                        guess[name] = [guess[name]]
                                    guess[name].append(value)
                                else:
                                    guess[name] = value
                            if group_name:
                                guess.metadata(prop).span = match.span(group_name)
            if filter_(guess):
                if multiple:
                    guesses.append(guess)
                else:
                    return guess
        return guesses
    def _effective_prop_value(self, prop, group_name, input=None, span=None, sep_replacement=None):
        if prop.canonical_form:
            return prop.canonical_form
        if input is None:
            return None
        value = input
        if span is not None:
            value = value[span[0]:span[1]]
        value = input[span[0]:span[1]] if input else None
        if sep_replacement:
            for sep_char in sep:
                value = value.replace(sep_char, sep_replacement)
        if value:
            value = prop.format(value, group_name)
        return value
    def get_properties(self, name=None, canonical_form=None):
        """Retrieve properties
        :return: Properties
        :rtype: generator
        """
        for prop in self._properties:
            if (name is None or name in prop.keys) and (canonical_form is None or prop.canonical_form == canonical_form):
                yield prop
    def get_supported_properties(self):
        supported_properties = {}
        for prop in self.get_properties():
            for k in prop.keys:
                values = supported_properties.get(k)
                if not values:
                    values = set()
                    supported_properties[k] = values
                if prop.canonical_form:
                    values.add(prop.canonical_form)
        return supported_properties
 class QualitiesContainer():
    def __init__(self):
        self._qualities = {}
    def register_quality(self, name, canonical_form, rating):
        """Register a quality rating.
        :param name: Name of the property
        :type name: string
        :param canonical_form: Value of the property
        :type canonical_form: string
        :param rating: Estimated quality rating for the property
        :type rating: int
        """
        property_qualities = self._qualities.get(name)
        if property_qualities is None:
            property_qualities = {}
            self._qualities[name] = property_qualities
        property_qualities[canonical_form] = rating
    def unregister_quality(self, name, *canonical_forms):
        """Unregister quality ratings for given property name.
        If canonical_forms are specified, only those values will be unregistered
        :param name: Name of the property
        :type name: string
        :param canonical_forms: Value of the property
        :type canonical_forms: string
        """
        if not canonical_forms:
            if name in self._qualities:
                del self._qualities[name]
        else:
            property_qualities = self._qualities.get(name)
            if property_qualities is not None:
                for property_canonical_form in canonical_forms:
                    if property_canonical_form in property_qualities:
                        del property_qualities[property_canonical_form]
            if not property_qualities:
                del self._qualities[name]
    def clear_qualities(self,):
        """Unregister all defined quality ratings.
        """
        self._qualities.clear()
    def rate_quality(self, guess, *props):
        """Rate the quality of guess.
        :param guess: Guess to rate
        :type guess: :class:`guessit.guess.Guess`
        :param props: Properties to include in the rating. if empty, rating will be performed for all guess properties.
        :type props: varargs of string
        :return: Quality of the guess. The higher, the better.
        :rtype: int
        """
        rate = 0
        if not props:
            props = guess.keys()
        for prop in props:
            prop_value = guess.get(prop)
            prop_qualities = self._qualities.get(prop)
            if prop_value is not None and prop_qualities is not None:
                rate += prop_qualities.get(prop_value, 0)
        return rate
    def best_quality_properties(self, props, *guesses):
        """Retrieve the best quality guess, based on given properties
        :param props: Properties to include in the rating
        :type props: list of strings
        :param guesses: Guesses to rate
        :type guesses: :class:`guessit.guess.Guess`
        :return: Best quality guess from all passed guesses
        :rtype: :class:`guessit.guess.Guess`
        """
        best_guess = None
        best_rate = None
        for guess in guesses:
            rate = self.rate_quality(guess, *props)
            if best_rate is None or best_rate < rate:
                best_rate = rate
                best_guess = guess
        return best_guess
    def best_quality(self, *guesses):
        """Retrieve the best quality guess.
        :param guesses: Guesses to rate
        :type guesses: :class:`guessit.guess.Guess`
        :return: Best quality guess from all passed guesses
        :rtype: :class:`guessit.guess.Guess`
        """
        best_guess = None
        best_rate = None
        for guess in guesses:
            rate = self.rate_quality(guess)
            if best_rate is None or best_rate < rate:
                best_rate = rate
                best_guess = guess
        return best_guess
--- a/libs/guessit/country.py
+++ b/libs/guessit/country.py
@ -1,112 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import unicode_literals
 from guessit import UnicodeMixin, base_text_type, u
 from guessit.fileutils import load_file_in_same_dir
 import logging
 __all__ = [ 'Country' ]
 log = logging.getLogger(__name__)
 # parsed from http://en.wikipedia.org/wiki/ISO_3166-1
 #
 # Description of the fields:
 # "An English name, an alpha-2 code (when given),
 # an alpha-3 code (when given), a numeric code, and an ISO 31666-2 code
 # are all separated by pipe (|) characters."
 _iso3166_contents = load_file_in_same_dir(__file__, 'ISO-3166-1_utf8.txt')
 country_matrix = [ l.strip().split('|')
                   for l in _iso3166_contents.strip().split('\n') ]
 country_matrix += [ [ 'Unknown', 'un', 'unk', '', '' ],
                    [ 'Latin America', '', 'lat', '', '' ]
                    ]
 country_to_alpha3 = dict((c[0].lower(), c[2].lower()) for c in country_matrix)
 country_to_alpha3.update(dict((c[1].lower(), c[2].lower()) for c in country_matrix))
 country_to_alpha3.update(dict((c[2].lower(), c[2].lower()) for c in country_matrix))
 # add here exceptions / non ISO representations
 # Note: remember to put those exceptions in lower-case, they won't work otherwise
 country_to_alpha3.update({ 'latinoamérica': 'lat',
                           'brazilian': 'bra',
                           'españa': 'esp',
                           'uk': 'gbr'
                           })
 country_alpha3_to_en_name = dict((c[2].lower(), c[0]) for c in country_matrix)
 country_alpha3_to_alpha2 = dict((c[2].lower(), c[1].lower()) for c in country_matrix)
 class Country(UnicodeMixin):
    """This class represents a country.
    You can initialize it with pretty much anything, as it knows conversion
    from ISO-3166 2-letter and 3-letter codes, and an English name.
    """
    def __init__(self, country, strict=False):
        country = u(country.strip().lower())
        self.alpha3 = country_to_alpha3.get(country)
        if self.alpha3 is None and strict:
            msg = 'The given string "%s" could not be identified as a country'
            raise ValueError(msg % country)
        if self.alpha3 is None:
            self.alpha3 = 'unk'
    @property
    def alpha2(self):
        return country_alpha3_to_alpha2[self.alpha3]
    @property
    def english_name(self):
        return country_alpha3_to_en_name[self.alpha3]
    def __hash__(self):
        return hash(self.alpha3)
    def __eq__(self, other):
        if isinstance(other, Country):
            return self.alpha3 == other.alpha3
        if isinstance(other, base_text_type):
            try:
                return self == Country(other)
            except ValueError:
                return False
        return False
    def __ne__(self, other):
        return not self == other
    def __unicode__(self):
        return self.english_name
    def __repr__(self):
        return 'Country(%s)' % self.english_name
--- a/libs/guessit/date.py
+++ b/libs/guessit/date.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,15 +18,38 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 import datetime
 import re
-def valid_year(year):
+from dateutil import parser
-    return 1920 < year < datetime.date.today().year + 5
+
 _dsep = r'[-/ \.]'
 _dsep_bis = r'[-/ \.x]'
 date_regexps = [
    re.compile('[^\d](\d{8})[^\d]', re.IGNORECASE),
    re.compile('[^\d](\d{6})[^\d]', re.IGNORECASE),
    re.compile('[^\d](\d{2})%s(\d{1,2})%s(\d{1,2})[^\d]' % (_dsep, _dsep), re.IGNORECASE),
    re.compile('[^\d](\d{1,2})%s(\d{1,2})%s(\d{2})[^\d]' % (_dsep, _dsep), re.IGNORECASE),
    re.compile('[^\d](\d{4})%s(\d{1,2})%s(\d{1,2})[^\d]' % (_dsep_bis, _dsep), re.IGNORECASE),
    re.compile('[^\d](\d{1,2})%s(\d{1,2})%s(\d{4})[^\d]' % (_dsep, _dsep_bis), re.IGNORECASE),
    re.compile('[^\d](\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4})[^\d]' % (_dsep, _dsep), re.IGNORECASE)]
 def valid_year(year, today=None):
    """Check if number is a valid year"""
    if not today:
        today = datetime.date.today()
    return 1920 < year < today.year + 5
 def search_year(string):
    """Looks for year patterns, and if found return the year and group span.
    Assumes there are sentinels at the beginning and end of the string that
    always allow matching a non-digit delimiting the date.
@ -34,10 +57,10 @@ def search_year(string):
    and now + 5 years, so for instance 2000 would be returned as a valid
    year but 1492 would not.
-    >>> search_year('in the year 2000...')
+    >>> search_year(' in the year 2000... ')
-    (2000, (12, 16))
+    (2000, (13, 17))
-    >>> search_year('they arrived in 1492.')
+    >>> search_year(' they arrived in 1492. ')
    (None, None)
    """
    match = re.search(r'[^0-9]([0-9]{4})[^0-9]', string)
@ -49,85 +72,58 @@ def search_year(string):
    return (None, None)
-def search_date(string):
+def search_date(string, year_first=None, day_first=True):
    """Looks for date patterns, and if found return the date and group span.
    Assumes there are sentinels at the beginning and end of the string that
    always allow matching a non-digit delimiting the date.
-    >>> search_date('This happened on 2002-04-22.')
+    Year can be defined on two digit only. It will return the nearest possible
-    (datetime.date(2002, 4, 22), (17, 27))
+    date from today.
-    >>> search_date('And this on 17-06-1998.')
+    >>> search_date(' This happened on 2002-04-22. ')
-    (datetime.date(1998, 6, 17), (12, 22))
+    (datetime.date(2002, 4, 22), (18, 28))
-    >>> search_date('no date in here')
+    >>> search_date(' And this on 17-06-1998. ')
    (datetime.date(1998, 6, 17), (13, 23))
    >>> search_date(' no date in here ')
    (None, None)
    """
-
+    start, end = None, None
-    dsep = r'[-/ \.]'
+    match = None
-
+    for date_re in date_regexps:
-    date_rexps = [
+        s = date_re.search(string)
-        # 20010823
+        if s and (match is None or s.end() - s.start() > len(match)):
-        r'[^0-9]' +
+            start, end = s.start(), s.end()
-        r'(?P<year>[0-9]{4})' +
+            if date_re.groups:
-        r'(?P<month>[0-9]{2})' +
+                match = '-'.join(s.groups())
        r'(?P<day>[0-9]{2})' +
        r'[^0-9]',
        # 2001-08-23
        r'[^0-9]' +
        r'(?P<year>[0-9]{4})' + dsep +
        r'(?P<month>[0-9]{2})' + dsep +
        r'(?P<day>[0-9]{2})' +
        r'[^0-9]',
        # 23-08-2001
        r'[^0-9]' +
        r'(?P<day>[0-9]{2})' + dsep +
        r'(?P<month>[0-9]{2})' + dsep +
        r'(?P<year>[0-9]{4})' +
        r'[^0-9]',
        # 23-08-01
        r'[^0-9]' +
        r'(?P<day>[0-9]{2})' + dsep +
        r'(?P<month>[0-9]{2})' + dsep +
        r'(?P<year>[0-9]{2})' +
        r'[^0-9]',
        ]
    for drexp in date_rexps:
        match = re.search(drexp, string)
        if match:
            d = match.groupdict()
            year, month, day = int(d['year']), int(d['month']), int(d['day'])
            # years specified as 2 digits should be adjusted here
            if year < 100:
                if year > (datetime.date.today().year % 100) + 5:
                    year = 1900 + year
            else:
-                    year = 2000 + year
+                match = s.group()
    if match is None:
        return None, None
    today = datetime.date.today()
    # If day_first/year_first is undefined, parse is made using both possible values.
    yearfirst_opts = [False, True]
    if year_first is not None:
        yearfirst_opts = [year_first]
    dayfirst_opts = [True, False]
    if day_first is not None:
        dayfirst_opts = [day_first]
    kwargs_list = ({'dayfirst': d, 'yearfirst': y} for d in dayfirst_opts for y in yearfirst_opts)
    for kwargs in kwargs_list:
        try:
            date = parser.parse(match, **kwargs)
        except (ValueError, TypeError) as e: #see https://bugs.launchpad.net/dateutil/+bug/1247643
            date = None
            try:
                date = datetime.date(year, month, day)
            except ValueError:
                try:
                    date = datetime.date(year, day, month)
                except ValueError:
            pass
            if date is None:
                continue
        # check date plausibility
-            if not 1900 < date.year < datetime.date.today().year + 5:
+        if date and valid_year(date.year, today=today):
-                continue
+            return date.date(), (start+1, end-1) #compensate for sentinels
            # looks like we have a valid date
            # note: span is  [+1,-1] because we don't want to include the
            # non-digit char
            start, end = match.span()
            return (date, (start + 1, end - 1))
    return None, None
--- a/libs/guessit/fileutils.py
+++ b/libs/guessit/fileutils.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,7 +18,8 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit import s, u
 import os.path
 import zipfile
@ -44,17 +45,13 @@ def split_path(path):
    result = []
    while True:
        head, tail = os.path.split(path)
        headlen = len(head)
-        # on Unix systems, the root folder is '/'
+        if not head and not tail:
-        if head and head == '/'*headlen and tail == '':
+            return result
            return ['/'] + result
-        # on Windows, the root folder is a drive letter (eg: 'C:\') or for shares \\
+        if not tail and head == path:
-        if ((headlen == 3 and head[1:] == ':\\') or (headlen == 2 and head == '\\\\')) and tail == '':
+            # Make sure we won't have an infinite loop.
-            return [head] + result
+            result = [head] + result
        if head == '' and tail == '':
            return result
        # we just split a directory ending with '/', so tail is empty
@ -70,8 +67,8 @@ def split_path(path):
 def file_in_same_dir(ref_file, desired_file):
    """Return the path for a file in the same dir as a given reference file.
-    >>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings'))
+    >>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings')) == os.path.normpath('~/smewt/smewt.settings')
-    '~/smewt/smewt.settings'
+    True
    """
    return os.path.join(*(split_path(ref_file)[:-1] + [desired_file]))
@ -85,6 +82,6 @@ def load_file_in_same_dir(ref_file, filename):
        if p.endswith('.zip'):
            zfilename = os.path.join(*path[:i + 1])
            zfile = zipfile.ZipFile(zfilename)
-            return zfile.read('/'.join(path[i + 1:]))
+            return u(zfile.read('/'.join(path[i + 1:])))
    return u(io.open(os.path.join(*path), encoding='utf-8').read())
--- a/libs/guessit/guess.py
+++ b/libs/guessit/guess.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,10 +18,10 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit import UnicodeMixin, s, u, base_text_type
-from guessit.language import Language
+from babelfish import Language, Country
 from guessit.country import Country
 import json
 import datetime
 import logging
@ -29,6 +29,111 @@ import logging
 log = logging.getLogger(__name__)
 class GuessMetadata(object):
    """GuessMetadata contains confidence, an input string, span and related property.
    If defined on a property of Guess object, it overrides the object defined as global.
    :param parent: The parent metadata, used for undefined properties in self object
    :type parent: :class: `GuessMedata`
    :param confidence: The confidence (from 0.0 to 1.0)
    :type confidence: number
    :param input: The input string
    :type input: string
    :param span: The input string
    :type span: tuple (int, int)
    :param prop: The found property definition
    :type prop: :class `guessit.containers._Property`
    """
    def __init__(self, parent=None, confidence=None, input=None, span=None, prop=None, *args, **kwargs):
        self.parent = parent
        if confidence is None and self.parent is None:
            self._confidence = 1.0
        else:
            self._confidence = confidence
        self._input = input
        self._span = span
        self._prop = prop
    @property
    def confidence(self):
        """The confidence
        :rtype: int
        :return: confidence value
        """
        return self._confidence if self._confidence is not None else self.parent.confidence if self.parent else None
    @confidence.setter
    def confidence(self, confidence):
        self._confidence = confidence
    @property
    def input(self):
        """The input
        :rtype: string
        :return: String used to find this guess value
        """
        return self._input if self._input is not None else self.parent.input if self.parent else None
    @input.setter
    def input(self, input):
        """The input
        :rtype: string
        """
        self._input = input
    @property
    def span(self):
        """The span
        :rtype: tuple (int, int)
        :return: span of input string used to find this guess value
        """
        return self._span if self._span is not None else self.parent.span if self.parent else None
    @span.setter
    def span(self, span):
        """The span
        :rtype: tuple (int, int)
        :return: span of input string used to find this guess value
        """
        self._span = span
    @property
    def prop(self):
        """The property
        :rtype: :class:`_Property`
        :return: The property
        """
        return self._prop if self._prop is not None else self.parent.prop if self.parent else None
    @property
    def raw(self):
        """Return the raw information (original match from the string,
        not the cleaned version) associated with the given property name."""
        if self.input and self.span:
            return self.input[self.span[0]:self.span[1]]
        return None
    def __repr__(self, *args, **kwargs):
        return object.__repr__(self, *args, **kwargs)
 def _split_kwargs(**kwargs):
    metadata_args = {}
    for prop in dir(GuessMetadata):
        try:
            metadata_args[prop] = kwargs.pop(prop)
        except KeyError:
            pass
    return metadata_args, kwargs
 class Guess(UnicodeMixin, dict):
    """A Guess is a dictionary which has an associated confidence for each of
    its values.
@ -37,39 +142,58 @@ class Guess(UnicodeMixin, dict):
    simple dict."""
    def __init__(self, *args, **kwargs):
-        try:
+        metadata_kwargs, kwargs = _split_kwargs(**kwargs)
-            confidence = kwargs.pop('confidence')
+        self._global_metadata = GuessMetadata(**metadata_kwargs)
        except KeyError:
            confidence = 0
        try:
            raw = kwargs.pop('raw')
        except KeyError:
            raw = None
        dict.__init__(self, *args, **kwargs)
-        self._confidence = {}
+        self._metadata = {}
        self._raw = {}
        for prop in self:
-            self._confidence[prop] = confidence
+            self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
-            self._raw[prop] = raw
+
    def rename(self, old_name, new_name):
        if old_name in self._metadata:
            metadata = self._metadata[old_name]
            del self._metadata[old_name]
            self._metadata[new_name] = metadata
        if old_name in self:
            value = self[old_name]
            del self[old_name]
            self[new_name] = value
            return True
        return False
    def to_dict(self, advanced=False):
        """Return the guess as a dict containing only base types, ie:
        where dates, languages, countries, etc. are converted to strings.
        if advanced is True, return the data as a json string containing
        also the raw information of the properties."""
        data = dict(self)
        for prop, value in data.items():
            if isinstance(value, datetime.date):
                data[prop] = value.isoformat()
-            elif isinstance(value, (Language, Country, base_text_type)):
+            elif isinstance(value, (UnicodeMixin, base_text_type)):
                data[prop] = u(value)
            elif isinstance(value, (Language, Country)):
                data[prop] = value.guessit
            elif isinstance(value, list):
                data[prop] = [u(x) for x in value]
            if advanced:
-                data[prop] = {"value": data[prop], "raw": self.raw(prop), "confidence": self.confidence(prop)}
+                metadata = self.metadata(prop)
                prop_data = {'value': data[prop]}
                if metadata.raw:
                    prop_data['raw'] = metadata.raw
                if metadata.confidence:
                    prop_data['confidence'] = metadata.confidence
                data[prop] = prop_data
        return data
    def nice_string(self, advanced=False):
        """Return a string with the property names and their values,
        that also displays the associated confidence to each property.
        FIXME: doc with param"""
        if advanced:
            data = self.to_dict(advanced)
            return json.dumps(data, indent=4)
@ -89,39 +213,54 @@ class Guess(UnicodeMixin, dict):
    def __unicode__(self):
        return u(self.to_dict())
-    def confidence(self, prop):
+    def metadata(self, prop=None):
-        return self._confidence.get(prop, -1)
+        """Return the metadata associated with the given property name
        If no property name is given, get the global_metadata
        """
        if prop is None:
            return self._global_metadata
        if prop not in self._metadata:
            self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
        return self._metadata[prop]
    def confidence(self, prop=None):
        return self.metadata(prop).confidence
    def set_confidence(self, prop, confidence):
        self.metadata(prop).confidence = confidence
    def raw(self, prop):
-        return self._raw.get(prop, None)
+        return self.metadata(prop).raw
-    def set(self, prop, value, confidence=None, raw=None):
+    def set(self, prop_name, value, *args, **kwargs):
-        self[prop] = value
+        if value is None:
-        if confidence is not None:
+            try:
-            self._confidence[prop] = confidence
+                del self[prop_name]
-        if raw is not None:
+            except KeyError:
-            self._raw[prop] = raw
+                pass
            try:
                del self._metadata[prop_name]
            except KeyError:
                pass
        else:
            self[prop_name] = value
            if 'metadata' in kwargs.keys():
                self._metadata[prop_name] = kwargs['metadata']
            else:
                self._metadata[prop_name] = GuessMetadata(parent=self._global_metadata, *args, **kwargs)
-    def set_confidence(self, prop, value):
+    def update(self, other, confidence=None):
        self._confidence[prop] = value
    def set_raw(self, prop, value):
        self._raw[prop] = value
    def update(self, other, confidence=None, raw=None):
        dict.update(self, other)
        if isinstance(other, Guess):
            for prop in other:
-                self._confidence[prop] = other.confidence(prop)
+                try:
-                self._raw[prop] = other.raw(prop)
+                    self._metadata[prop] = other._metadata[prop]
-
+                except KeyError:
                    pass
        if confidence is not None:
            for prop in other:
-                self._confidence[prop] = confidence
+                self.set_confidence(prop, confidence)
        if raw is not None:
            for prop in other:
                self._raw[prop] = raw
    def update_highest_confidence(self, other):
        """Update this guess with the values from the given one. In case
@ -131,11 +270,10 @@ class Guess(UnicodeMixin, dict):
            raise ValueError('Can only call this function on Guess instances')
        for prop in other:
-            if prop in self and self.confidence(prop) >= other.confidence(prop):
+            if prop in self and self.metadata(prop).confidence >= other.metadata(prop).confidence:
                continue
            self[prop] = other[prop]
-            self._confidence[prop] = other.confidence(prop)
+            self._metadata[prop] = other.metadata(prop)
            self._raw[prop] = other.raw(prop)
 def choose_int(g1, g2):
@ -193,26 +331,26 @@ def choose_string(g1, g2):
    combined_prob = 1 - (1 - c1) * (1 - c2)
    if v1l == v2l:
-        return (v1, combined_prob)
+        return v1, combined_prob
    # check for common patterns
    elif v1l == 'the ' + v2l:
-        return (v1, combined_prob)
+        return v1, combined_prob
    elif v2l == 'the ' + v1l:
-        return (v2, combined_prob)
+        return v2, combined_prob
    # if one string is contained in the other, return the shortest one
    elif v2l in v1l:
-        return (v2, combined_prob)
+        return v2, combined_prob
    elif v1l in v2l:
-        return (v1, combined_prob)
+        return v1, combined_prob
    # in case of conflict, return the one with highest confidence
    else:
        if c1 > c2:
-            return (v1, c1 - c2)
+            return v1, c1 - c2
        else:
-            return (v2, c2 - c1)
+            return v2, c2 - c1
 def _merge_similar_guesses_nocheck(guesses, prop, choose):
@ -226,17 +364,7 @@ def _merge_similar_guesses_nocheck(guesses, prop, choose):
    g1, g2 = similar[0], similar[1]
-    other_props = set(g1) & set(g2) - set([prop])
+    # merge only this prop of s2 into s1, updating the confidence for the
    if other_props:
        log.debug('guess 1: %s' % g1)
        log.debug('guess 2: %s' % g2)
        for prop in other_props:
            if g1[prop] != g2[prop]:
                log.warning('both guesses to be merged have more than one '
                            'different property in common, bailing out...')
                return
    # merge all props of s2 into s1, updating the confidence for the
    # considered property
    v1, v2 = g1[prop], g2[prop]
    c1, c2 = g1.confidence(prop), g2.confidence(prop)
@ -248,10 +376,11 @@ def _merge_similar_guesses_nocheck(guesses, prop, choose):
        msg = "Updating non-matching property '%s' with confidence %.2f"
    log.debug(msg % (prop, new_confidence))
-    g2[prop] = new_value
+    g1.set(prop, new_value, confidence=new_confidence)
-    g2.set_confidence(prop, new_confidence)
+    g2.pop(prop)
-    g1.update(g2)
+    # remove g2 if there are no properties left
    if not g2.keys():
        guesses.remove(g2)
@ -286,43 +415,53 @@ def merge_all(guesses, append=None):
    instead of being merged.
    >>> s(merge_all([ Guess({'season': 2}, confidence=0.6),
-    ...               Guess({'episodeNumber': 13}, confidence=0.8) ]))
+    ...               Guess({'episodeNumber': 13}, confidence=0.8) ])
-    {'season': 2, 'episodeNumber': 13}
+    ... ) == {'season': 2, 'episodeNumber': 13}
    True
    >>> s(merge_all([ Guess({'episodeNumber': 27}, confidence=0.02),
-    ...               Guess({'season': 1}, confidence=0.2) ]))
+    ...               Guess({'season': 1}, confidence=0.2) ])
-    {'season': 1}
+    ... ) == {'season': 1}
    True
    >>> s(merge_all([ Guess({'other': 'PROPER'}, confidence=0.8),
    ...               Guess({'releaseGroup': '2HD'}, confidence=0.8) ],
-    ...             append=['other']))
+    ...             append=['other'])
-    {'releaseGroup': '2HD', 'other': ['PROPER']}
+    ... ) == {'releaseGroup': '2HD', 'other': ['PROPER']}
-
+    True
    """
    result = Guess()
    if not guesses:
-        return Guess()
+        return result
    result = guesses[0]
    if append is None:
        append = []
-    for g in guesses[1:]:
+    for g in guesses:
        # first append our appendable properties
        for prop in append:
            if prop in g:
-                result.set(prop, result.get(prop, []) + [g[prop]],
+                if isinstance(g[prop], (list, set)):
                    new_values = result.get(prop, []) + list(g[prop])
                else:
                    new_values = result.get(prop, []) + [g[prop]]
                result.set(prop, new_values,
                           # TODO: what to do with confidence here? maybe an
                           # arithmetic mean...
-                           confidence=g.confidence(prop),
+                           confidence=g.metadata(prop).confidence,
-                           raw=g.raw(prop))
+                           input=g.metadata(prop).input,
                           span=g.metadata(prop).span,
                           prop=g.metadata(prop).prop)
                del g[prop]
        # then merge the remaining ones
        dups = set(result) & set(g)
        if dups:
-            log.warning('duplicate properties %s in merged result...' % [ (result[p], g[p]) for p in dups] )
+            log.debug('duplicate properties %s in merged result...' % [(result[p], g[p]) for p in dups])
        result.update_highest_confidence(g)
@ -338,8 +477,38 @@ def merge_all(guesses, append=None):
            if isinstance(value, list):
                result[prop] = list(set(value))
            else:
-                result[prop] = [ value ]
+                result[prop] = [value]
        except KeyError:
            pass
    return result
 def smart_merge(guesses):
    """First tries to merge well-known similar properties, and then merges
    the rest with a merge_all call.
    Should be the function to call in most cases, unless one wants to have more
    control.
    Warning: this function is destructive, ie: it will merge the list in-place.
    """
    # 1- try to merge similar information together and give it a higher
    #    confidence
    for int_part in ('year', 'season', 'episodeNumber'):
        merge_similar_guesses(guesses, int_part, choose_int)
    for string_part in ('title', 'series', 'container', 'format',
                        'releaseGroup', 'website', 'audioCodec',
                        'videoCodec', 'screenSize', 'episodeFormat',
                        'audioChannels', 'idNumber'):
        merge_similar_guesses(guesses, string_part, choose_string)
    # 2- merge the rest, potentially discarding information not properly
    #    merged before
    result = merge_all(guesses,
                       append=['language', 'subtitleLanguage', 'other',
                               'episodeDetails', 'unidentified'])
    return result
--- a/libs/guessit/hash_ed2k.py
+++ b/libs/guessit/hash_ed2k.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,17 +18,21 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit import s, to_hex
 import hashlib
 import os.path
 from functools import reduce
 def hash_file(filename):
    """Returns the ed2k hash of a given file.
-    >>> s(hash_file('tests/dummy.srt'))
+    >>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
-    'ed2k://|file|dummy.srt|44|1CA0B9DED3473B926AA93A0A546138BB|/'
+    >>> s(hash_file(testfile))
    'ed2k://|file|dummy.srt|59|41F58B913AB3973F593BEBA8B8DF6510|/'
    """
    return 'ed2k://|file|%s|%d|%s|/' % (os.path.basename(filename),
                                        os.path.getsize(filename),
--- a/libs/guessit/hash_mpc.py
+++ b/libs/guessit/hash_mpc.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,7 +18,8 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 import struct
 import os
@ -28,7 +29,7 @@ def hash_file(filename):
    http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
    and is licensed under the GPL."""
-    longlongformat = 'q'  # long long
+    longlongformat = b'q'  # long long
    bytesize = struct.calcsize(longlongformat)
    f = open(filename, "rb")
@ -39,18 +40,18 @@ def hash_file(filename):
    if filesize < 65536 * 2:
        raise Exception("SizeError: size is %d, should be > 132K..." % filesize)
-    for x in range(65536 / bytesize):
+    for x in range(int(65536 / bytesize)):
        buf = f.read(bytesize)
        (l_value,) = struct.unpack(longlongformat, buf)
        hash_value += l_value
-        hash_value = hash_value & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
+        hash_value &= 0xFFFFFFFFFFFFFFFF  # to remain as 64bit number
    f.seek(max(0, filesize - 65536), 0)
-    for x in range(65536 / bytesize):
+    for x in range(int(65536 / bytesize)):
        buf = f.read(bytesize)
        (l_value,) = struct.unpack(longlongformat, buf)
        hash_value += l_value
-        hash_value = hash_value & 0xFFFFFFFFFFFFFFFF
+        hash_value &= 0xFFFFFFFFFFFFFFFF
    f.close()
--- a/libs/guessit/language.py
+++ b/libs/guessit/language.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,373 +18,284 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
-from guessit import UnicodeMixin, base_text_type, u, s
+
-from guessit.fileutils import load_file_in_same_dir
+from guessit import UnicodeMixin, base_text_type, u
 from guessit.textutils import find_words
-from guessit.country import Country
+from babelfish import Language, Country
 import babelfish
 import re
 import logging
 from guessit.guess import Guess
-__all__ = [ 'is_iso_language', 'is_language', 'lang_set', 'Language',
+__all__ = ['Language', 'UNDETERMINED',
-            'ALL_LANGUAGES', 'ALL_LANGUAGES_NAMES', 'UNDETERMINED',
+           'search_language', 'guess_language']
            'search_language', 'guess_language' ]
 log = logging.getLogger(__name__)
 UNDETERMINED = babelfish.Language('und')
-# downloaded from http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
+SYN = {('und', None): ['unknown', 'inconnu', 'unk', 'un'],
-#
+       ('ell', None): ['gr', 'greek'],
-# Description of the fields:
+       ('spa', None): ['esp', 'español'],
-# "An alpha-3 (bibliographic) code, an alpha-3 (terminologic) code (when given),
+       ('fra', None): ['français', 'vf', 'vff', 'vfi'],
-# an alpha-2 code (when given), an English name, and a French name of a language
+       ('swe', None): ['se'],
-# are all separated by pipe (|) characters."
+       ('por', 'BR'): ['po', 'pb', 'pob', 'br', 'brazilian'],
-_iso639_contents = load_file_in_same_dir(__file__, 'ISO-639-2_utf-8.txt')
+       ('cat', None): ['català'],
-
+       ('ces', None): ['cz'],
-# drop the BOM from the beginning of the file
+       ('ukr', None): ['ua'],
-_iso639_contents = _iso639_contents[1:]
+       ('zho', None): ['cn'],
-
+       ('jpn', None): ['jp'],
-language_matrix = [ l.strip().split('|')
+       ('hrv', None): ['scr'],
-                    for l in _iso639_contents.strip().split('\n') ]
+       ('mul', None): ['multi', 'dl'],  # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/
 # update information in the language matrix
 language_matrix += [['mol', '', 'mo', 'Moldavian', 'moldave'],
                    ['ass', '', '', 'Assyrian', 'assyrien']]
 for lang in language_matrix:
    # remove unused languages that shadow other common ones with a non-official form
    if (lang[2] == 'se' or # Northern Sami shadows Swedish
        lang[2] == 'br'):  # Breton shadows Brazilian
        lang[2] = ''
    # add missing information
    if lang[0] == 'und':
        lang[2] = 'un'
    if lang[0] == 'srp':
        lang[1] = 'scc' # from OpenSubtitles
 lng3        = frozenset(l[0] for l in language_matrix if l[0])
 lng3term    = frozenset(l[1] for l in language_matrix if l[1])
 lng2        = frozenset(l[2] for l in language_matrix if l[2])
 lng_en_name = frozenset(lng for l in language_matrix
                        for lng in l[3].lower().split('; ') if lng)
 lng_fr_name = frozenset(lng for l in language_matrix
                        for lng in l[4].lower().split('; ') if lng)
 lng_all_names = lng3 | lng3term | lng2 | lng_en_name | lng_fr_name
 lng3_to_lng3term = dict((l[0], l[1]) for l in language_matrix if l[1])
 lng3term_to_lng3 = dict((l[1], l[0]) for l in language_matrix if l[1])
 lng3_to_lng2 = dict((l[0], l[2]) for l in language_matrix if l[2])
 lng2_to_lng3 = dict((l[2], l[0]) for l in language_matrix if l[2])
 # we only return the first given english name, hoping it is the most used one
 lng3_to_lng_en_name = dict((l[0], l[3].split('; ')[0])
                           for l in language_matrix if l[3])
 lng_en_name_to_lng3 = dict((en_name.lower(), l[0])
                           for l in language_matrix if l[3]
                           for en_name in l[3].split('; '))
 # we only return the first given french name, hoping it is the most used one
 lng3_to_lng_fr_name = dict((l[0], l[4].split('; ')[0])
                           for l in language_matrix if l[4])
 lng_fr_name_to_lng3 = dict((fr_name.lower(), l[0])
                           for l in language_matrix if l[4]
                           for fr_name in l[4].split('; '))
 # contains a list of exceptions: strings that should be parsed as a language
 # but which are not in an ISO form
 lng_exceptions = { 'unknown': ('und', None),
                   'inconnu': ('und', None),
                   'unk': ('und', None),
                   'un': ('und', None),
                   'gr': ('gre', None),
                   'greek': ('gre', None),
                   'esp': ('spa', None),
                   'español': ('spa', None),
                   'se': ('swe', None),
                   'po': ('pt', 'br'),
                   'pb': ('pt', 'br'),
                   'pob': ('pt', 'br'),
                   'br': ('pt', 'br'),
                   'brazilian': ('pt', 'br'),
                   'català': ('cat', None),
                   'cz': ('cze', None),
                   'ua': ('ukr', None),
                   'cn': ('chi', None),
                   'chs': ('chi', None),
                   'jp': ('jpn', None),
                   'scr': ('hrv', None)
       }
-def is_iso_language(language):
+class GuessitConverter(babelfish.LanguageReverseConverter):
    return language.lower() in lng_all_names
 def is_language(language):
    return is_iso_language(language) or language in lng_exceptions
 def lang_set(languages, strict=False):
    """Return a set of guessit.Language created from their given string
    representation.
    if strict is True, then this will raise an exception if any language
    could not be identified.
    """
    return set(Language(l, strict=strict) for l in languages)
 class Language(UnicodeMixin):
    """This class represents a human language.
    You can initialize it with pretty much anything, as it knows conversion
    from ISO-639 2-letter and 3-letter codes, English and French names.
    You can also distinguish languages for specific countries, such as
    Portuguese and Brazilian Portuguese.
    There are various properties on the language object that give you the
    representation of the language for a specific usage, such as .alpha3
    to get the ISO 3-letter code, or .opensubtitles to get the OpenSubtitles
    language code.
    >>> Language('fr')
    Language(French)
    >>> s(Language('eng').french_name)
    'anglais'
    >>> s(Language('pt(br)').country.english_name)
    'Brazil'
    >>> s(Language('Español (Latinoamérica)').country.english_name)
    'Latin America'
    >>> Language('Spanish (Latin America)') == Language('Español (Latinoamérica)')
    True
    >>> s(Language('zz', strict=False).english_name)
    'Undetermined'
    >>> s(Language('pt(br)').opensubtitles)
    'pob'
    """
    _with_country_regexp = re.compile('(.*)\((.*)\)')
    _with_country_regexp2 = re.compile('(.*)-(.*)')
-    def __init__(self, language, country=None, strict=False, scheme=None):
+    def __init__(self):
-        language = u(language.strip().lower())
+        self.guessit_exceptions = {}
-        with_country = (Language._with_country_regexp.match(language) or
+        for (alpha3, country), synlist in SYN.items():
-                        Language._with_country_regexp2.match(language))
+            for syn in synlist:
                self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
    @property
    def codes(self):
        return (babelfish.language_converters['alpha3b'].codes |
                babelfish.language_converters['alpha2'].codes |
                babelfish.language_converters['name'].codes |
                babelfish.language_converters['opensubtitles'].codes |
                babelfish.country_converters['name'].codes |
                frozenset(self.guessit_exceptions.keys()))
    def convert(self, alpha3, country=None, script=None):
        return str(babelfish.Language(alpha3, country, script))
    def reverse(self, name):
        with_country = (GuessitConverter._with_country_regexp.match(name) or
                        GuessitConverter._with_country_regexp2.match(name))
        name  = u(name.lower())
        if with_country:
-            self.lang = Language(with_country.group(1)).lang
+            lang = Language.fromguessit(with_country.group(1).strip())
-            self.country = Country(with_country.group(2))
+            lang.country = babelfish.Country.fromguessit(with_country.group(2).strip())
-            return
+            return (lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None)
-        self.lang = None
+        # exceptions come first, as they need to override a potential match
-        self.country = Country(country) if country else None
+        # with any of the other guessers
        # first look for scheme specific languages
        if scheme == 'opensubtitles':
            if language == 'br':
                self.lang = 'bre'
                return
            elif language == 'se':
                self.lang = 'sme'
                return
        elif scheme is not None:
            log.warning('Unrecognized scheme: "%s" - Proceeding with standard one' % scheme)
        # look for ISO language codes
        if len(language) == 2:
            self.lang = lng2_to_lng3.get(language)
        elif len(language) == 3:
            self.lang = (language
                         if language in lng3
                         else lng3term_to_lng3.get(language))
        else:
            self.lang = (lng_en_name_to_lng3.get(language) or
                         lng_fr_name_to_lng3.get(language))
        # general language exceptions
        if self.lang is None and language in lng_exceptions:
            lang, country = lng_exceptions[language]
            self.lang = Language(lang).alpha3
            self.country = Country(country) if country else None
        msg = 'The given string "%s" could not be identified as a language' % language
        if self.lang is None and strict:
            raise ValueError(msg)
        if self.lang is None:
            log.debug(msg)
            self.lang = 'und'
    @property
    def alpha2(self):
        return lng3_to_lng2[self.lang]
    @property
    def alpha3(self):
        return self.lang
    @property
    def alpha3term(self):
        return lng3_to_lng3term[self.lang]
    @property
    def english_name(self):
        return lng3_to_lng_en_name[self.lang]
    @property
    def french_name(self):
        return lng3_to_lng_fr_name[self.lang]
    @property
    def opensubtitles(self):
        if self.lang == 'por' and self.country and self.country.alpha2 == 'br':
            return 'pob'
        elif self.lang in ['gre', 'srp']:
            return self.alpha3term
        return self.alpha3
    @property
    def tmdb(self):
        if self.country:
            return '%s-%s' % (self.alpha2, self.country.alpha2.upper())
        return self.alpha2
    def __hash__(self):
        return hash(self.lang)
    def __eq__(self, other):
        if isinstance(other, Language):
            return self.lang == other.lang
        if isinstance(other, base_text_type):
        try:
-                return self == Language(other)
+            return self.guessit_exceptions[name]
        except KeyError:
            pass
        for conv in [babelfish.Language,
                     babelfish.Language.fromalpha3b,
                     babelfish.Language.fromalpha2,
                     babelfish.Language.fromname,
                     babelfish.Language.fromopensubtitles]:
            try:
                c = conv(name)
                return c.alpha3, c.country, c.script
            except (ValueError, babelfish.LanguageReverseError):
                pass
        raise babelfish.LanguageReverseError(name)
 babelfish.language_converters['guessit'] = GuessitConverter()
 COUNTRIES_SYN = {'ES': ['españa'],
                 'GB': ['UK'],
                 'BR': ['brazilian', 'bra'],
                 # FIXME: this one is a bit of a stretch, not sure how to do
                 #        it properly, though...
                 'MX': ['Latinoamérica', 'latin america']
                 }
 class GuessitCountryConverter(babelfish.CountryReverseConverter):
    def __init__(self):
        self.guessit_exceptions = {}
        for alpha2, synlist in COUNTRIES_SYN.items():
            for syn in synlist:
                self.guessit_exceptions[syn.lower()] = alpha2
    @property
    def codes(self):
        return (babelfish.country_converters['name'].codes |
                frozenset(babelfish.COUNTRIES.values()) |
                frozenset(self.guessit_exceptions.keys()))
    def convert(self, alpha2):
        if alpha2 == 'GB':
            return 'UK'
        return str(Country(alpha2))
    def reverse(self, name):
        # exceptions come first, as they need to override a potential match
        # with any of the other guessers
        try:
            return self.guessit_exceptions[name.lower()]
        except KeyError:
            pass
        try:
            return babelfish.Country(name.upper()).alpha2
        except ValueError:
-                return False
+            pass
-        return False
+        for conv in [babelfish.Country.fromname]:
            try:
                return conv(name).alpha2
            except babelfish.CountryReverseError:
                pass
-    def __ne__(self, other):
+        raise babelfish.CountryReverseError(name)
        return not self == other
    def __nonzero__(self):
        return self.lang != 'und'
-    def __unicode__(self):
+babelfish.country_converters['guessit'] = GuessitCountryConverter()
-        if self.country:
+
-            return '%s(%s)' % (self.english_name, self.country.alpha2)
+
 # list of common words which could be interpreted as languages, but which
 # are far too common to be able to say they represent a language in the
 # middle of a string (where they most likely carry their commmon meaning)
 LNG_COMMON_WORDS = frozenset([
    # english words
    'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
    'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
    'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
    'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb', 'bt',
    'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt', 'in', 'ad', 'ice', 'ay',
    # french words
    'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
    'mal', 'est', 'vol', 'or', 'mon', 'se', 'je', 'tu', 'me',
    'ne', 'ma', 'va', 'au',
    # japanese words,
    'wa', 'ga', 'ao',
    # spanish words
    'la', 'el', 'del', 'por', 'mar',
    # other
    'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
    'vi', 'ben', 'da', 'lt', 'ch',
    # new from babelfish
    'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
    'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
    'job', 'gag', 'reel', 'www', 'for', 'ayu', 'csi', 'ren', 'moi', 'sur',
    'fer', 'fun', 'two', 'big', 'psy', 'air',
    # movie title
    'brazil',
    # release groups
    'bs',  # Bosnian
    'kz',
    # countries
    'gt', 'lt',
    # part/pt
    'pt'
    ])
 LNG_COMMON_WORDS_STRICT = frozenset(['brazil'])
 subtitle_prefixes = ['sub', 'subs', 'st', 'vost', 'subforced', 'fansub', 'hardsub']
 subtitle_suffixes = ['subforced', 'fansub', 'hardsub']
 lang_prefixes = ['true']
 def find_possible_languages(string, allowed_languages=None):
    """Find possible languages in the string
    :return: list of tuple (property, Language, lang_word, word)
    """
    common_words = None
    if allowed_languages:
        common_words = LNG_COMMON_WORDS_STRICT
    else:
-            return self.english_name
+        common_words = LNG_COMMON_WORDS
-    def __repr__(self):
+    words = find_words(string)
-        if self.country:
+
-            return 'Language(%s, country=%s)' % (self.english_name, self.country)
+    valid_words = []
-        else:
+    for word in words:
-            return 'Language(%s)' % self.english_name
+        lang_word = word.lower()
        key = 'language'
        for prefix in subtitle_prefixes:
            if lang_word.startswith(prefix):
                lang_word = lang_word[len(prefix):]
                key = 'subtitleLanguage'
        for suffix in subtitle_suffixes:
            if lang_word.endswith(suffix):
                lang_word = lang_word[:len(suffix)]
                key = 'subtitleLanguage'
        for prefix in lang_prefixes:
            if lang_word.startswith(prefix):
                lang_word = lang_word[len(prefix):]
        if lang_word not in common_words:
            try:
                lang = Language.fromguessit(lang_word)
                if allowed_languages:
                    if lang.name.lower() in allowed_languages or lang.alpha2.lower() in allowed_languages or lang.alpha3.lower() in allowed_languages:
                        valid_words.append((key, lang, lang_word, word))
                # Keep language with alpha2 equivalent. Others are probably
                # uncommon languages.
                elif lang == 'mul' or hasattr(lang, 'alpha2'):
                    valid_words.append((key, lang, lang_word, word))
            except babelfish.Error:
                pass
    return valid_words
-UNDETERMINED = Language('und')
+def search_language(string, allowed_languages=None):
 ALL_LANGUAGES = frozenset(Language(lng) for lng in lng_all_names) - frozenset([UNDETERMINED])
 ALL_LANGUAGES_NAMES = lng_all_names
 def search_language(string, lang_filter=None, skip=None):
    """Looks for language patterns, and if found return the language object,
    its group span and an associated confidence.
    you can specify a list of allowed languages using the lang_filter argument,
    as in lang_filter = [ 'fr', 'eng', 'spanish' ]
-    >>> search_language('movie [en].avi')
+    >>> search_language('movie [en].avi')['language']
-    (Language(English), (7, 9), 0.8)
+    <Language [en]>
    >>> search_language('the zen fat cat and the gay mad men got a new fan', allowed_languages = ['en', 'fr', 'es'])
    >>> search_language('the zen fat cat and the gay mad men got a new fan', lang_filter = ['en', 'fr', 'es'])
    (None, None, None)
    """
-    # list of common words which could be interpreted as languages, but which
+    if allowed_languages:
-    # are far too common to be able to say they represent a language in the
+        allowed_languages = set(Language.fromguessit(lang) for lang in allowed_languages)
    # middle of a string (where they most likely carry their commmon meaning)
    lng_common_words = frozenset([
        # english words
        'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
        'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
        'fry', 'cop', 'zen', 'gay', 'fat', 'cherokee', 'got', 'an', 'as',
        'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi',
        # french words
        'bas', 'de', 'le', 'son', 'vo', 'vf', 'ne', 'ca', 'ce', 'et', 'que',
        'mal', 'est', 'vol', 'or', 'mon', 'se',
        # spanish words
        'la', 'el', 'del', 'por', 'mar',
        # other
        'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
        'vi', 'ben', 'da', 'lt'
        ])
    sep = r'[](){} \._-+'
    if lang_filter:
        lang_filter = lang_set(lang_filter)
    slow = ' %s ' % string.lower()
    confidence = 1.0  # for all of them
-    for lang in set(find_words(slow)) & lng_all_names:
+    for prop, language, lang, word in find_possible_languages(string, allowed_languages):
-
+        pos = string.find(word)
-        if lang in lng_common_words:
+        end = pos + len(word)
            continue
        pos = slow.find(lang)
        if pos != -1:
            end = pos + len(lang)
            # skip if span in in skip list
            while skip and (pos - 1, end - 1) in skip:
                pos = slow.find(lang, end)
                if pos == -1:
                    continue
                end = pos + len(lang)                
            if pos == -1:
                continue
            # make sure our word is always surrounded by separators
            if slow[pos - 1] not in sep or slow[end] not in sep:
                continue
            language = Language(slow[pos:end])
            if lang_filter and language not in lang_filter:
                continue
        # only allow those languages that have a 2-letter code, those that
        # don't are too esoteric and probably false matches
-            if language.lang not in lng3_to_lng2:
+        # if language.lang not in lng3_to_lng2:
-                continue
+        #     continue
-            # confidence depends on lng2, lng3, english name, ...
+        # confidence depends on alpha2, alpha3, english name, ...
        if len(lang) == 2:
            confidence = 0.8
        elif len(lang) == 3:
            confidence = 0.9
        elif prop == 'subtitleLanguage':
            confidence = 0.6  # Subtitle prefix found with language
        else:
            # Note: we could either be really confident that we found a
            #       language or assume that full language names are too
            #       common words and lower their confidence accordingly
            confidence = 0.3  # going with the low-confidence route here
-            return language, (pos - 1, end - 1), confidence
+        return Guess({prop: language}, confidence=confidence, input=string, span=(pos, end))
-    return None, None, None
+    return None
-def guess_language(text):
+def guess_language(text):  # pragma: no cover
    """Guess the language in which a body of text is written.
    This uses the external guess-language python module, and will fail and return
@ -392,7 +303,7 @@ def guess_language(text):
    """
    try:
        from guess_language import guessLanguage
-        return Language(guessLanguage(text))
+        return Language.fromguessit(guessLanguage(text))
    except ImportError:
        log.error('Cannot detect the language of the given text body, missing dependency: guess-language')
--- a/libs/guessit/matcher.py
+++ b/libs/guessit/matcher.py
@ -2,7 +2,8 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 # Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,29 +19,36 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, \
-from guessit import PY3, u, base_text_type
+    unicode_literals
-from guessit.matchtree import MatchTree
+
 from guessit.textutils import normalize_unicode, clean_string
 import logging
 from guessit import PY3, u
 from guessit.transfo import TransformerException
 from guessit.matchtree import MatchTree
 from guessit.textutils import normalize_unicode, clean_default
 from guessit.guess import Guess
 import inspect
 log = logging.getLogger(__name__)
 class IterativeMatcher(object):
    def __init__(self, filename, filetype='autodetect', opts=None, transfo_opts=None):
    """An iterative matcher tries to match different patterns that appear
    in the filename.
-        The 'filetype' argument indicates which type of file you want to match.
+    The ``filetype`` argument indicates which type of file you want to match.
-        If it is 'autodetect', the matcher will try to see whether it can guess
+    If it is undefined, the matcher will try to see whether it can guess
    that the file corresponds to an episode, or otherwise will assume it is
    a movie.
-        The recognized 'filetype' values are:
+    The recognized ``filetype`` values are:
-        [ autodetect, subtitle, info, movie, moviesubtitle, movieinfo, episode,
+    ``['subtitle', 'info', 'movie', 'moviesubtitle', 'movieinfo', 'episode',
-        episodesubtitle, episodeinfo ]
+    'episodesubtitle', 'episodeinfo']``
    ``options`` is a dict of options values to be passed to the transformations used
    by the matcher.
    The IterativeMatcher works mainly in 2 steps:
@ -48,7 +56,7 @@ class IterativeMatcher(object):
    which have a semantic meaning, such as episode number, movie title,
    etc...
-        The match_tree created looks like the following:
+    The match_tree created looks like the following::
      0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111
      0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000
@ -58,123 +66,241 @@ class IterativeMatcher(object):
      [XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv
    The first 3 lines indicates the group index in which a char in the
-        filename is located. So for instance, x264 is the group (0, 4, 1), and
+    filename is located. So for instance, ``x264`` (in the middle) is the group (0, 4, 1), and
-        it corresponds to a video codec, denoted by the letter'v' in the 4th line.
+    it corresponds to a video codec, denoted by the letter ``v`` in the 4th line.
    (for more info, see guess.matchtree.to_string)
    Second, it tries to merge all this information into a single object
    containing all the found properties, and does some (basic) conflict
    resolution when they arise.
        When you create the Matcher, you can pass it:
         - a list 'opts' of option names, that act as global flags
         - a dict 'transfo_opts' of { transfo_name: (transfo_args, transfo_kwargs) }
           with which to call the transfo.process() function.
    """
-
+    def __init__(self, filename, options=None, **kwargs):
-        valid_filetypes = ('autodetect', 'subtitle', 'info', 'video',
+        options = dict(options or {})
-                           'movie', 'moviesubtitle', 'movieinfo',
+        for k, v in kwargs.items():
-                           'episode', 'episodesubtitle', 'episodeinfo')
+            if k not in options or not options[k]:
-        if filetype not in valid_filetypes:
+                options[k] = v  # options dict has priority over keyword arguments
-            raise ValueError("filetype needs to be one of %s" % valid_filetypes)
+        self._validate_options(options)
        if not PY3 and not isinstance(filename, unicode):
            log.warning('Given filename to matcher is not unicode...')
            filename = filename.decode('utf-8')
        filename = normalize_unicode(filename)
        if options and options.get('clean_function'):
            clean_function = options.get('clean_function')
            if not hasattr(clean_function, '__call__'):
                module, function = clean_function.rsplit('.')
                if not module:
                    module = 'guessit.textutils'
                clean_function = getattr(__import__(module), function)
                if not clean_function:
                    log.error('Can\'t find clean function %s. Default will be used.' % options.get('clean_function'))
                    clean_function = clean_default
        else:
            clean_function = clean_default
-        if opts is None:
+        self.match_tree = MatchTree(filename, clean_function=clean_function)
-            opts = []
+        self.options = options
-        if not isinstance(opts, list):
+        self._transfo_calls = []
            raise ValueError('opts must be a list of option names! Received: type=%s val=%s',
                             type(opts), opts)
        if transfo_opts is None:
            transfo_opts = {}
        if not isinstance(transfo_opts, dict):
            raise ValueError('transfo_opts must be a dict of { transfo_name: (args, kwargs) }. '+
                             'Received: type=%s val=%s', type(transfo_opts), transfo_opts)
        self.match_tree = MatchTree(filename)
        # sanity check: make sure we don't process a (mostly) empty string
-        if clean_string(filename) == '':
+        if clean_function(filename).strip() == '':
            return
        from guessit.plugins import transformers
        try:
            mtree = self.match_tree
-        mtree.guess.set('type', filetype, confidence=1.0)
+            if 'type' in self.options:
                mtree.guess.set('type', self.options['type'], confidence=0.0)
-        def apply_transfo(transfo_name, *args, **kwargs):
+            # Process
-            transfo = __import__('guessit.transfo.' + transfo_name,
+            for transformer in transformers.all_transformers():
-                                 globals=globals(), locals=locals(),
+                disabled = options.get('disabled_transformers')
-                                 fromlist=['process'], level=0)
+                if not disabled or transformer.name not in disabled:
-            default_args, default_kwargs = transfo_opts.get(transfo_name, ((), {}))
+                    self._process(transformer, False)
            all_args = args or default_args
            all_kwargs = dict(default_kwargs)
            all_kwargs.update(kwargs) # keep all kwargs merged together
            transfo.process(mtree, *all_args, **all_kwargs)
-        # 1- first split our path into dirs + basename + ext
+            # Post-process
-        apply_transfo('split_path_components')
+            for transformer in transformers.all_transformers():
-
+                disabled = options.get('disabled_transformers')
-        # 2- guess the file type now (will be useful later)
+                if not disabled or transformer.name not in disabled:
-        apply_transfo('guess_filetype', filetype)
+                    self._process(transformer, True)
        if mtree.guess['type'] == 'unknown':
            return
        # 3- split each of those into explicit groups (separated by parentheses
        #    or square brackets)
        apply_transfo('split_explicit_groups')
        # 4- try to match information for specific patterns
        # NOTE: order needs to comply to the following:
        #       - website before language (eg: tvu.org.ru vs russian)
        #       - language before episodes_rexps
        #       - properties before language (eg: he-aac vs hebrew)
        #       - release_group before properties (eg: XviD-?? vs xvid)
        if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'):
            strategy = [ 'guess_date', 'guess_website', 'guess_release_group',
                         'guess_properties', 'guess_language',
                         'guess_video_rexps',
                         'guess_episodes_rexps', 'guess_weak_episodes_rexps' ]
        else:
            strategy = [ 'guess_date', 'guess_website', 'guess_release_group',
                         'guess_properties', 'guess_language',
                         'guess_video_rexps' ]
        if 'nolanguage' in opts:
            strategy.remove('guess_language')
        for name in strategy:
            apply_transfo(name)
        # more guessers for both movies and episodes
        apply_transfo('guess_bonus_features')
        apply_transfo('guess_year', skip_first_year=('skip_first_year' in opts))
        if 'nocountry' not in opts:
            apply_transfo('guess_country')
        apply_transfo('guess_idnumber')
        # split into '-' separated subgroups (with required separator chars
        # around the dash)
        apply_transfo('split_on_dash')
        # 5- try to identify the remaining unknown groups by looking at their
        #    position relative to other known elements
        if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'):
            apply_transfo('guess_episode_info_from_position')
        else:
            apply_transfo('guess_movie_title_from_position')
        # 6- perform some post-processing steps
        apply_transfo('post_process')
            log.debug('Found match tree:\n%s' % u(mtree))
        except TransformerException as e:
            log.debug('An error has occurred in Transformer %s: %s' % (e.transformer, e))
    def _process(self, transformer, post=False):
        if not hasattr(transformer, 'should_process') or transformer.should_process(self.match_tree, self.options):
            if post:
                transformer.post_process(self.match_tree, self.options)
            else:
                transformer.process(self.match_tree, self.options)
                self._transfo_calls.append(transformer)
    @property
    def second_pass_options(self):
        second_pass_options = {}
        for transformer in self._transfo_calls:
            if hasattr(transformer, 'second_pass_options'):
                transformer_second_pass_options = transformer.second_pass_options(self.match_tree, self.options)
                if transformer_second_pass_options:
                    second_pass_options.update(transformer_second_pass_options)
        return second_pass_options
    def _validate_options(self, options):
        valid_filetypes = ('subtitle', 'info', 'video',
                           'movie', 'moviesubtitle', 'movieinfo',
                           'episode', 'episodesubtitle', 'episodeinfo')
        type_ = options.get('type')
        if type_ and type_ not in valid_filetypes:
            raise ValueError("filetype needs to be one of %s" % (valid_filetypes,))
    def matched(self):
        return self.match_tree.matched()
 def build_guess(node, name, value=None, confidence=1.0):
    guess = Guess({name: node.clean_value if value is None else value}, confidence=confidence)
    guess.metadata().input = node.value if value is None else value
    if value is None:
        left_offset = 0
        right_offset = 0
        clean_value = node.clean_value
        for i in range(0, len(node.value)):
            if clean_value[0] == node.value[i]:
                break
            left_offset += 1
        for i in reversed(range(0, len(node.value))):
            if clean_value[-1] == node.value[i]:
                break
            right_offset += 1
        guess.metadata().span = (node.span[0] - node.offset + left_offset, node.span[1] - node.offset - right_offset)
    return guess
 def found_property(node, name, value=None, confidence=1.0, update_guess=True, logger=None):
    # automatically retrieve the log object from the caller frame
    if not logger:
        caller_frame = inspect.stack()[1][0]
        logger = caller_frame.f_locals['self'].log
    guess = build_guess(node, name, value, confidence)
    return found_guess(node, guess, update_guess=update_guess, logger=logger)
 def found_guess(node, guess, update_guess=True, logger=None):
    if node.guess:
        if update_guess:
            node.guess.update_highest_confidence(guess)
        else:
            child = node.add_child(guess.metadata().span)
            child.guess = guess
    else:
        node.guess = guess
    log_found_guess(guess, logger)
    return node.guess
 def log_found_guess(guess, logger=None):
    for k, v in guess.items():
        (logger or log).debug('Property found: %s=%s (%s) (confidence=%.2f)' %
                              (k, v, guess.raw(k), guess.confidence(k)))
 def _get_split_spans(node, span):
    partition_spans = node.get_partition_spans(span)
    for to_remove_span in partition_spans:
        if to_remove_span[0] == span[0] and to_remove_span[1] in [span[1], span[1] + 1]:
            partition_spans.remove(to_remove_span)
            break
    return partition_spans
 class GuessFinder(object):
    def __init__(self, guess_func, confidence=None, logger=None, options=None):
        self.guess_func = guess_func
        self.confidence = confidence
        self.logger = logger or log
        self.options = options
    def process_nodes(self, nodes):
        for node in nodes:
            self.process_node(node)
    def process_node(self, node, iterative=True, partial_span=None):
        if partial_span:
            value = node.value[partial_span[0]:partial_span[1]]
        else:
            value = node.value
        string = ' %s ' % value  # add sentinels
        if not self.options:
            matcher_result = self.guess_func(string, node)
        else:
            matcher_result = self.guess_func(string, node, self.options)
        if matcher_result:
            if not isinstance(matcher_result, Guess):
                result, span = matcher_result
            else:
                result, span = matcher_result, matcher_result.metadata().span
            if result:
                # readjust span to compensate for sentinels
                span = (span[0] - 1, span[1] - 1)
                # readjust span to compensate for partial_span
                if partial_span:
                    span = (span[0] + partial_span[0], span[1] + partial_span[0])
                partition_spans = None
                if self.options and 'skip_nodes' in self.options:
                    skip_nodes = self.options.get('skip_nodes')
                    for skip_node in skip_nodes:
                        if skip_node.parent.node_idx == node.node_idx[:len(skip_node.parent.node_idx)] and\
                            skip_node.span == span or\
                                skip_node.span == (span[0] + skip_node.offset, span[1] + skip_node.offset):
                            if partition_spans is None:
                                partition_spans = _get_split_spans(node, skip_node.span)
                            else:
                                new_partition_spans = []
                                for partition_span in partition_spans:
                                    tmp_node = MatchTree(value, span=partition_span, parent=node)
                                    tmp_partitions_spans = _get_split_spans(tmp_node, skip_node.span)
                                    new_partition_spans.extend(tmp_partitions_spans)
                                partition_spans.extend(new_partition_spans)
                if not partition_spans:
                    # restore sentinels compensation
                    if isinstance(result, Guess):
                        guess = result
                    else:
                        guess = Guess(result, confidence=self.confidence, input=string, span=span)
                    if not iterative:
                        found_guess(node, guess, logger=self.logger)
                    else:
                        absolute_span = (span[0] + node.offset, span[1] + node.offset)
                        node.partition(span)
                        if node.is_leaf():
                            found_guess(node, guess, logger=self.logger)
                        else:
                            found_child = None
                            for child in node.children:
                                if child.span == absolute_span:
                                    found_guess(child, guess, logger=self.logger)
                                    found_child = child
                                    break
                            for child in node.children:
                                if child is not found_child:
                                    self.process_node(child)
                else:
                    for partition_span in partition_spans:
                        self.process_node(node, partial_span=partition_span)
--- a/libs/guessit/matchtree.py
+++ b/libs/guessit/matchtree.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,12 +18,15 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
-from guessit import UnicodeMixin, base_text_type, Guess
+
-from guessit.textutils import clean_string, str_fill
+import guessit  # @UnusedImport needed for doctests
 from guessit import UnicodeMixin, base_text_type
 from guessit.textutils import clean_default, str_fill
 from guessit.patterns import group_delimiters
-from guessit.guess import (merge_similar_guesses, merge_all,
+from guessit.guess import (merge_similar_guesses, smart_merge,
-                           choose_int, choose_string)
+                           choose_int, choose_string, Guess)
 from itertools import takewhile
 import copy
 import logging
@ -31,23 +34,71 @@ log = logging.getLogger(__name__)
 class BaseMatchTree(UnicodeMixin):
-    """A MatchTree represents the hierarchical split of a string into its
+    """A BaseMatchTree is a tree covering the filename, where each
-    constituent semantic groups."""
+    node represents a substring in the filename and can have a ``Guess``
    associated with it that contains the information that has been guessed
    in this node. Nodes can be further split into subnodes until a proper
    split has been found.
-    def __init__(self, string='', span=None, parent=None):
+    Each node has the following attributes:
     - string = the original string of which this node represents a region
     - span = a pair of (begin, end) indices delimiting the substring
     - parent = parent node
     - children = list of children nodes
     - guess = Guess()
    BaseMatchTrees are displayed in the following way:
        >>> path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
        >>> print(guessit.IterativeMatcher(path).match_tree)
        000000 1111111111111111 2222222222222222222222222222222222222222222 333
        000000 0000000000111111 0000000000111111222222222222222222222222222 000
                         011112           011112000011111222222222222222222 000
                                                         011112222222222222
                                                              0000011112222
                                                              01112    0111
        Movies/__________(____)/Dark.City.(____).DC._____.____.___.____-___.___
               tttttttttt yyyy             yyyy     fffff ssss aaa vvvv rrr ccc
        Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
    The last line contains the filename, which you can use a reference.
    The previous line contains the type of property that has been found.
    The line before that contains the filename, where all the found groups
    have been blanked. Basically, what is left on this line are the leftover
    groups which could not be identified.
    The lines before that indicate the indices of the groups in the tree.
    For instance, the part of the filename 'BDRip' is the leaf with index
    ``(2, 2, 1)`` (read from top to bottom), and its meaning is 'format'
    (as shown by the ``f``'s on the last-but-one line).
    """
    def __init__(self, string='', span=None, parent=None, clean_function=None):
        self.string = string
        self.span = span or (0, len(string))
        self.parent = parent
        self.children = []
        self.guess = Guess()
        self._clean_value = None
        self._clean_function = clean_function or clean_default
    @property
    def value(self):
        """Return the substring that this node matches."""
        return self.string[self.span[0]:self.span[1]]
    @property
    def clean_value(self):
-        return clean_string(self.value)
+        """Return a cleaned value of the matched substring, with better
        presentation formatting (punctuation marks removed, duplicate
        spaces, ...)"""
        if self._clean_value is None:
            self._clean_value = self.clean_string(self.value)
        return self._clean_value
    def clean_string(self, string):
        return self._clean_function(string)
    @property
    def offset(self):
@ -55,6 +106,8 @@ class BaseMatchTree(UnicodeMixin):
    @property
    def info(self):
        """Return a dict containing all the info guessed by this node,
        subnodes included."""
        result = dict(self.guess)
        for c in self.children:
@ -64,6 +117,7 @@ class BaseMatchTree(UnicodeMixin):
    @property
    def root(self):
        """Return the root node of the tree."""
        if not self.parent:
            return self
@ -71,28 +125,43 @@ class BaseMatchTree(UnicodeMixin):
    @property
    def depth(self):
        """Return the depth of this node."""
        if self.is_leaf():
            return 0
        return 1 + max(c.depth for c in self.children)
    def is_leaf(self):
        """Return whether this node is a leaf or not."""
        return self.children == []
    def add_child(self, span):
-        child = MatchTree(self.string, span=span, parent=self)
+        """Add a new child node to this node with the given span."""
        child = MatchTree(self.string, span=span, parent=self, clean_function=self._clean_function)
        self.children.append(child)
        return child
-    def partition(self, indices):
+    def get_partition_spans(self, indices):
        """Return the list of absolute spans for the regions of the original
        string defined by splitting this node at the given indices (relative
        to this node)"""
        indices = sorted(indices)
        if indices[0] != 0:
            indices.insert(0, 0)
        if indices[-1] != len(self.value):
            indices.append(len(self.value))
        spans = []
        for start, end in zip(indices[:-1], indices[1:]):
-            self.add_child(span=(self.offset + start,
+            spans.append((self.offset + start,
                     self.offset + end))
        return spans
    def partition(self, indices):
        """Partition this node by splitting it at the given indices,
        relative to this node."""
        for partition_span in self.get_partition_spans(indices):
            self.add_child(span=partition_span)
    def split_on_components(self, components):
        offset = 0
@ -104,6 +173,7 @@ class BaseMatchTree(UnicodeMixin):
            offset = end
    def nodes_at_depth(self, depth):
        """Return all the nodes at a given depth in the tree"""
        if depth == 0:
            yield self
@ -113,38 +183,109 @@ class BaseMatchTree(UnicodeMixin):
    @property
    def node_idx(self):
        """Return this node's index in the tree, as a tuple.
        If this node is the root of the tree, then return ()."""
        if self.parent is None:
            return ()
-        return self.parent.node_idx + (self.parent.children.index(self),)
+        return self.parent.node_idx + (self.node_last_idx,)
    @property
    def node_last_idx(self):
        if self.parent is None:
            return None
        return self.parent.children.index(self)
    def node_at(self, idx):
        """Return the node at the given index in the subtree rooted at
        this node."""
        if not idx:
            return self
        try:
            return self.children[idx[0]].node_at(idx[1:])
-        except:
+        except IndexError:
            raise ValueError('Non-existent node index: %s' % (idx,))
    def nodes(self):
        """Return all the nodes and subnodes in this tree."""
        yield self
        for child in self.children:
            for node in child.nodes():
                yield node
-    def _leaves(self):
+    def leaves(self):
        """Return a generator over all the nodes that are leaves."""
        if self.is_leaf():
            yield self
        else:
            for child in self.children:
                # pylint: disable=W0212
-                for leaf in child._leaves():
+                for leaf in child.leaves():
                    yield leaf
-    def leaves(self):
+    def group_node(self):
-        return list(self._leaves())
+        return self._other_group_node(0)
    def previous_group_node(self):
        return self._other_group_node(-1)
    def next_group_node(self):
        return self._other_group_node(+1)
    def _other_group_node(self, offset):
        if len(self.node_idx) > 1:
            group_idx = self.node_idx[:2]
            if group_idx[1] + offset >= 0:
                other_group_idx = (group_idx[0], group_idx[1] + offset)
                try:
                    other_group_node = self.root.node_at(other_group_idx)
                    return other_group_node
                except ValueError:
                    pass
        return None
    def previous_leaf(self, leaf):
        """Return previous leaf for this node"""
        return self._other_leaf(leaf, -1)
    def next_leaf(self, leaf):
        """Return next leaf for this node"""
        return self._other_leaf(leaf, +1)
    def _other_leaf(self, leaf, offset):
        leaves = list(self.leaves())
        index = leaves.index(leaf) + offset
        if index > 0 and index < len(leaves):
            return leaves[index]
        return None
    def previous_leaves(self, leaf):
        """Return previous leaves for this node"""
        leaves = list(self.leaves())
        index = leaves.index(leaf)
        if index > 0 and index < len(leaves):
            previous_leaves = leaves[:index]
            previous_leaves.reverse()
            return previous_leaves
        return []
    def next_leaves(self, leaf):
        """Return next leaves for this node"""
        leaves = list(self.leaves())
        index = leaves.index(leaf)
        if index > 0 and index < len(leaves):
            return leaves[index + 1:len(leaves)]
        return []
    def to_string(self):
        """Return a readable string representation of this tree.
        The result is a multi-line string, where the lines are:
         - line 1 -> N-2: each line contains the nodes at the given depth in the tree
         - line N-2: original string where all the found groups have been blanked
         - line N-1: type of property that has been found
         - line N: the original string, which you can use a reference.
        """
        empty_line = ' ' * len(self.string)
        def to_hex(x):
@ -153,14 +294,17 @@ class BaseMatchTree(UnicodeMixin):
            return x
        def meaning(result):
-            mmap = { 'episodeNumber': 'E',
+            mmap = {'episodeNumber': 'E',
                    'season': 'S',
                    'extension': 'e',
                    'format': 'f',
                    'language': 'l',
                    'country': 'C',
                    'videoCodec': 'v',
                    'videoProfile': 'v',
                    'audioCodec': 'a',
                    'audioProfile': 'a',
                    'audioChannels': 'a',
                    'website': 'w',
                    'container': 'c',
                    'series': 'T',
@ -168,7 +312,8 @@ class BaseMatchTree(UnicodeMixin):
                    'date': 'd',
                    'year': 'y',
                    'releaseGroup': 'r',
-                     'screenSize': 's'
+                    'screenSize': 's',
                    'other': 'o'
                    }
            if result is None:
@ -180,7 +325,7 @@ class BaseMatchTree(UnicodeMixin):
            return 'x'
-        lines = [ empty_line ] * (self.depth + 2) # +2: remaining, meaning
+        lines = [empty_line] * (self.depth + 2)  # +2: remaining, meaning
        lines[-2] = self.string
        for node in self.nodes():
@ -198,63 +343,61 @@ class BaseMatchTree(UnicodeMixin):
        lines.append(self.string)
-        return '\n'.join(lines)
+        return '\n'.join(l.rstrip() for l in lines)
    def __unicode__(self):
        return self.to_string()
    def __repr__(self):
        return '<MatchTree: root=%s>' % self.value
 class MatchTree(BaseMatchTree):
    """The MatchTree contains a few "utility" methods which are not necessary
    for the BaseMatchTree, but add a lot of convenience for writing
-    higher-level rules."""
+    higher-level rules.
    """
-    def _unidentified_leaves(self,
+    def unidentified_leaves(self,
-                             valid=lambda leaf: len(leaf.clean_value) >= 2):
+                            valid=lambda leaf: len(leaf.clean_value) > 0):
-        for leaf in self._leaves():
+        """Return a generator of leaves that are not empty."""
        for leaf in self.leaves():
            if not leaf.guess and valid(leaf):
                yield leaf
-    def unidentified_leaves(self,
+    def leaves_containing(self, property_name):
-                            valid=lambda leaf: len(leaf.clean_value) >= 2):
+        """Return a generator of leaves that guessed the given property."""
        return list(self._unidentified_leaves(valid))
    def _leaves_containing(self, property_name):
        if isinstance(property_name, base_text_type):
-            property_name = [ property_name ]
+            property_name = [property_name]
-        for leaf in self._leaves():
+        for leaf in self.leaves():
            for prop in property_name:
                if prop in leaf.guess:
                    yield leaf
                    break
    def leaves_containing(self, property_name):
        return list(self._leaves_containing(property_name))
    def first_leaf_containing(self, property_name):
        """Return the first leaf containing the given property."""
        try:
-            return next(self._leaves_containing(property_name))
+            return next(self.leaves_containing(property_name))
        except StopIteration:
            return None
    def _previous_unidentified_leaves(self, node):
        node_idx = node.node_idx
        for leaf in self._unidentified_leaves():
            if leaf.node_idx < node_idx:
                yield leaf
    def previous_unidentified_leaves(self, node):
-        return list(self._previous_unidentified_leaves(node))
+        """Return a generator of non-empty leaves that are before the given
-
+        node (in the string)."""
    def _previous_leaves_containing(self, node, property_name):
        node_idx = node.node_idx
-        for leaf in self._leaves_containing(property_name):
+        for leaf in self.unidentified_leaves():
            if leaf.node_idx < node_idx:
                yield leaf
    def previous_leaves_containing(self, node, property_name):
-        return list(self._previous_leaves_containing(node, property_name))
+        """Return a generator of leaves containing the given property that are
        before the given node (in the string)."""
        node_idx = node.node_idx
        for leaf in self.leaves_containing(property_name):
            if leaf.node_idx < node_idx:
                yield leaf
    def is_explicit(self):
        """Return whether the group was explicitly enclosed by
@ -262,26 +405,22 @@ class MatchTree(BaseMatchTree):
        return (self.value[0] + self.value[-1]) in group_delimiters
    def matched(self):
        """Return a single guess that contains all the info found in the
        nodes of this tree, trying to merge properties as good as possible.
        """
        if not getattr(self, '_matched_result', None):
            # we need to make a copy here, as the merge functions work in place and
            # calling them on the match tree would modify it
-        parts = [node.guess for node in self.nodes() if node.guess]
+            parts = [copy.copy(node.guess) for node in self.nodes() if node.guess]
        parts = copy.deepcopy(parts)
-        # 1- try to merge similar information together and give it a higher
+            result = smart_merge(parts)
        #    confidence
        for int_part in ('year', 'season', 'episodeNumber'):
            merge_similar_guesses(parts, int_part, choose_int)
        for string_part in ('title', 'series', 'container', 'format',
                            'releaseGroup', 'website', 'audioCodec',
                            'videoCodec', 'screenSize', 'episodeFormat',
                            'audioChannels', 'idNumber'):
            merge_similar_guesses(parts, string_part, choose_string)
        # 2- merge the rest, potentially discarding information not properly
        #    merged before
        result = merge_all(parts,
                           append=['language', 'subtitleLanguage', 'other'])
            log.debug('Final result: ' + result.nice_string())
-        return result
+            self._matched_result = result
        for unidentified_leaves in self.unidentified_leaves():
            if 'unidentified' not in self._matched_result:
                self._matched_result['unidentified'] = []
            self._matched_result['unidentified'].append(unidentified_leaves.clean_value)
        return self._matched_result
--- a/libs/guessit/options.py
+++ b/libs/guessit/options.py
@ -0,0 +1,69 @@
 from argparse import ArgumentParser
 def build_opts(transformers=None):
    opts = ArgumentParser()
    opts.add_argument(dest='filename', help='Filename or release name to guess', nargs='*')
    naming_opts = opts.add_argument_group("Naming")
    naming_opts.add_argument('-t', '--type', dest='type', default=None,
                             help='The suggested file type: movie, episode. If undefined, type will be guessed.')
    naming_opts.add_argument('-n', '--name-only', dest='name_only', action='store_true', default=False,
                             help='Parse files as name only. Disable folder parsing, extension parsing, and file content analysis.')
    naming_opts.add_argument('-c', '--split-camel', dest='split_camel', action='store_true', default=False,
                             help='Split camel case part of filename.')
    naming_opts.add_argument('-X', '--disabled-transformer', action='append', dest='disabled_transformers',
                             help='Transformer to disable (can be used multiple time)')
    output_opts = opts.add_argument_group("Output")
    output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=False,
                             help='Display debug output')
    output_opts.add_argument('-P', '--show-property', dest='show_property', default=None,
                             help='Display the value of a single property (title, series, videoCodec, year, type ...)'),
    output_opts.add_argument('-u', '--unidentified', dest='unidentified', action='store_true', default=False,
                             help='Display the unidentified parts.'),
    output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=False,
                             help='Display advanced information for filename guesses, as json output')
    output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=False,
                             help='Display information for filename guesses as yaml output (like unit-test)')
    output_opts.add_argument('-f', '--input-file', dest='input_file', default=False,
                             help='Read filenames from an input file.')
    output_opts.add_argument('-d', '--demo', action='store_true', dest='demo', default=False,
                             help='Run a few builtin tests instead of analyzing a file')
    information_opts = opts.add_argument_group("Information")
    information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=False,
                                  help='Display properties that can be guessed.')
    information_opts.add_argument('-V', '--values', dest='values', action='store_true', default=False,
                                  help='Display property values that can be guessed.')
    information_opts.add_argument('-s', '--transformers', dest='transformers', action='store_true', default=False,
                                  help='Display transformers that can be used.')
    information_opts.add_argument('--version', dest='version', action='store_true', default=False,
                                  help='Display the guessit version.')
    webservice_opts = opts.add_argument_group("guessit.io")
    webservice_opts.add_argument('-b', '--bug', action='store_true', dest='submit_bug', default=False,
                                 help='Submit a wrong detection to the guessit.io service')
    other_opts = opts.add_argument_group("Other features")
    other_opts.add_argument('-i', '--info', dest='info', default='filename',
                            help='The desired information type: filename, video, hash_mpc or a hash from python\'s '
                            'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
                            'them, comma-separated')
    if transformers:
        for transformer in transformers:
            transformer.register_arguments(opts, naming_opts, output_opts, information_opts, webservice_opts, other_opts)
    return opts, naming_opts, output_opts, information_opts, webservice_opts, other_opts
 _opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts = None, None, None, None, None, None
 def reload(transformers=None):
    global _opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts
    _opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts = build_opts(transformers)
 def get_opts():
    return _opts
--- a/libs/guessit/patterns.py
+++ b/libs/guessit/patterns.py
@ -1,250 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
 # Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import unicode_literals
 import re
 subtitle_exts = [ 'srt', 'idx', 'sub', 'ssa' ]
 info_exts = [ 'nfo' ]
 video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
              'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
              'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv']
 group_delimiters = [ '()', '[]', '{}' ]
 # separator character regexp
 sep = r'[][,)(}{+ /\._-]' # regexp art, hehe :D
 # character used to represent a deleted char (when matching groups)
 deleted = '_'
 # format: [ (regexp, confidence, span_adjust) ]
 episode_rexps = [ # ... Season 2 ...
                  (r'season (?P<season>[0-9]+)', 1.0, (0, 0)),
                  (r'saison (?P<season>[0-9]+)', 1.0, (0, 0)),
                  # ... s02e13 ...
                  (r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<episodeNumber>(?:-?[eE-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
                  # ... s03-x02 ... # FIXME: redundant? remove it?
                  #(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<bonusNumber>(?:-?[xX-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
                  # ... 2x13 ...
                  (r'[^0-9](?P<season>[0-9]{1,2})[^0-9 .-]?(?P<episodeNumber>(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)),
                  # ... s02 ...
                  #(sep + r's(?P<season>[0-9]{1,2})' + sep, 0.6, (1, -1)),
                  (r's(?P<season>[0-9]{1,2})[^0-9]', 0.6, (0, -1)),
                  # v2 or v3 for some mangas which have multiples rips
                  (r'(?P<episodeNumber>[0-9]{1,3})v[23]' + sep, 0.6, (0, 0)),
                  # ... ep 23 ...
                  ('ep' + sep + r'(?P<episodeNumber>[0-9]{1,2})[^0-9]', 0.7, (0, -1)),
                  # ... e13 ... for a mini-series without a season number
                  (sep + r'e(?P<episodeNumber>[0-9]{1,2})' + sep, 0.6, (1, -1))
                  ]
 weak_episode_rexps = [ # ... 213 or 0106 ...
                       (sep + r'(?P<episodeNumber>[0-9]{2,4})' + sep, (1, -1))
                       ]
 non_episode_title = [ 'extras', 'rip' ]
 video_rexps = [ # cd number
                (r'cd ?(?P<cdNumber>[0-9])( ?of ?(?P<cdNumberTotal>[0-9]))?', 1.0, (0, 0)),
                (r'(?P<cdNumberTotal>[1-9]) cds?', 0.9, (0, 0)),
                # special editions
                (r'edition' + sep + r'(?P<edition>collector)', 1.0, (0, 0)),
                (r'(?P<edition>collector)' + sep + 'edition', 1.0, (0, 0)),
                (r'(?P<edition>special)' + sep + 'edition', 1.0, (0, 0)),
                (r'(?P<edition>criterion)' + sep + 'edition', 1.0, (0, 0)),
                # director's cut
                (r"(?P<edition>director'?s?" + sep + "cut)", 1.0, (0, 0)),
                # video size
                (r'(?P<width>[0-9]{3,4})x(?P<height>[0-9]{3,4})', 0.9, (0, 0)),
                # website
                (r'(?P<website>www(\.[a-zA-Z0-9]+){2,3})', 0.8, (0, 0)),
                # bonusNumber: ... x01 ...
                (r'x(?P<bonusNumber>[0-9]{1,2})', 1.0, (0, 0)),
                # filmNumber: ... f01 ...
                (r'f(?P<filmNumber>[0-9]{1,2})', 1.0, (0, 0))
                ]
 websites = [ 'tvu.org.ru', 'emule-island.com', 'UsaBit.com', 'www.divx-overnet.com',
             'sharethefiles.com' ]
 unlikely_series = [ 'series' ]
 # prop_multi is a dict of { property_name: { canonical_form: [ pattern ] } }
 # pattern is a string considered as a regexp, with the addition that dashes are
 # replaced with '([ \.-_])?' which matches more types of separators (or none)
 # note: simpler patterns need to be at the end of the list to not shadow more
 #       complete ones, eg: 'AAC' needs to come after 'He-AAC'
 #       ie: from most specific to less specific
 prop_multi = { 'format': { 'DVD': [ 'DVD', 'DVD-Rip', 'VIDEO-TS', 'DVDivX' ],
                           'HD-DVD': [ 'HD-(?:DVD)?-Rip', 'HD-DVD' ],
                           'BluRay': [ 'Blu-ray', 'B[DR]Rip' ],
                           'HDTV': [ 'HD-TV' ],
                           'DVB': [ 'DVB-Rip', 'DVB', 'PD-TV' ],
                           'WEBRip': [ 'WEB-Rip' ],
                           'Screener': [ 'DVD-SCR', 'Screener' ],
                           'VHS': [ 'VHS' ],
                           'WEB-DL': [ 'WEB-DL' ] },
               'is3D': { True: [ '3D' ] },
               'screenSize': { '480p': [ '480[pi]?' ],
                               '720p': [ '720[pi]?' ],
                               '1080i': [ '1080i' ],
                               '1080p': [ '1080p', '1080[^i]' ] },
               'videoCodec': { 'XviD': [ 'Xvid' ],
                               'DivX': [ 'DVDivX', 'DivX' ],
                               'h264': [ '[hx]-264' ],
                               'Rv10': [ 'Rv10' ],
                               'Mpeg2': [ 'Mpeg2' ] },
               # has nothing to do here (or on filenames for that matter), but some
               # releases use it and it helps to identify release groups, so we adapt
               'videoApi': {  'DXVA': [ 'DXVA' ] },
               'audioCodec': { 'AC3': [ 'AC3' ],
                               'DTS': [ 'DTS' ],
                               'AAC': [ 'He-AAC', 'AAC-He', 'AAC' ] },
               'audioChannels': { '5.1': [ r'5\.1', 'DD5[._ ]1', '5ch' ] },
               'episodeFormat': { 'Minisode': [ 'Minisodes?' ] }
               }
 # prop_single dict of { property_name: [ canonical_form ] }
 prop_single = { 'releaseGroup': [ 'ESiR', 'WAF', 'SEPTiC', r'\[XCT\]', 'iNT', 'PUKKA',
                                  'CHD', 'ViTE', 'TLF', 'FLAiTE',
                                  'MDX', 'GM4F', 'DVL', 'SVD', 'iLUMiNADOS',
                                  'aXXo', 'KLAXXON', 'NoTV', 'ZeaL', 'LOL',
                                  'CtrlHD', 'POD', 'WiKi','IMMERSE', 'FQM',
                                  '2HD',  'CTU', 'HALCYON', 'EbP', 'SiTV',
                                  'HDBRiSe', 'AlFleNi-TeaM', 'EVOLVE', '0TV',
                                  'TLA', 'NTB', 'ASAP', 'MOMENTUM', 'FoV', 'D-Z0N3',
                                  'TrollHD', 'ECI'
                                  ],
                # potentially confusing release group names (they are words)
                'weakReleaseGroup': [ 'DEiTY', 'FiNaLe', 'UnSeeN', 'KiNGS', 'CLUE', 'DIMENSION',
                                      'SAiNTS', 'ARROW', 'EuReKA', 'SiNNERS', 'DiRTY', 'REWARD',
                                      'REPTiLE',
                                      ],
                'other': [ 'PROPER', 'REPACK', 'LIMITED', 'DualAudio', 'Audiofixed', 'R5',
                           'complete', 'classic', # not so sure about these ones, could appear in a title
                           'ws' ] # widescreen
                }
 _dash = '-'
 _psep = '[-. _]?'
 def _to_rexp(prop):
    return re.compile(prop.replace(_dash, _psep), re.IGNORECASE)
 # properties_rexps dict of { property_name: { canonical_form: [ rexp ] } }
 # containing the rexps compiled from both prop_multi and prop_single
 properties_rexps = dict((type, dict((canonical_form,
                                     [ _to_rexp(pattern) for pattern in patterns ])
                                    for canonical_form, patterns in props.items()))
                        for type, props in prop_multi.items())
 properties_rexps.update(dict((type, dict((canonical_form, [ _to_rexp(canonical_form) ])
                                         for canonical_form in props))
                             for type, props in prop_single.items()))
 def find_properties(string):
    result = []
    for property_name, props in properties_rexps.items():
        # FIXME: this should be done in a more flexible way...
        if property_name in ['weakReleaseGroup']:
            continue
        for canonical_form, rexps in props.items():
            for value_rexp in rexps:
                match = value_rexp.search(string)
                if match:
                    start, end = match.span()
                    # make sure our word is always surrounded by separators
                    # note: sep is a regexp, but in this case using it as
                    #       a char sequence achieves the same goal
                    if ((start > 0 and string[start-1] not in sep) or
                        (end < len(string) and string[end] not in sep)):
                        continue
                    result.append((property_name, canonical_form, start, end))
    return result
 property_synonyms = { 'Special Edition': [ 'Special' ],
                      'Collector Edition': [ 'Collector' ],
                      'Criterion Edition': [ 'Criterion' ]
                      }
 def revert_synonyms():
    reverse = {}
    for canonical, synonyms in property_synonyms.items():
        for synonym in synonyms:
            reverse[synonym.lower()] = canonical
    return reverse
 reverse_synonyms = revert_synonyms()
 def canonical_form(string):
    return reverse_synonyms.get(string.lower(), string)
 def compute_canonical_form(property_name, value):
    """Return the canonical form of a property given its type if it is a valid
    one, None otherwise."""
    if isinstance(value, basestring):
        for canonical_form, rexps in properties_rexps[property_name].items():
            for rexp in rexps:
                if rexp.match(value):
                    return canonical_form
    return None
--- a/libs/guessit/patterns/init.py
+++ b/libs/guessit/patterns/init.py
@ -0,0 +1,77 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 # Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 import re
 from guessit import base_text_type
 group_delimiters = ['()', '[]', '{}']
 # separator character regexp
 sep = r'[][,)(}:{+ /~/\._-]'  # regexp art, hehe :D
 _dash = '-'
 _psep = '[\W_]?'
 def build_or_pattern(patterns, escape=False):
    """Build a or pattern string from a list of possible patterns
    """
    or_pattern = []
    for pattern in patterns:
        if not or_pattern:
            or_pattern.append('(?:')
        else:
            or_pattern.append('|')
        or_pattern.append('(?:%s)' % re.escape(pattern) if escape else pattern)
    or_pattern.append(')')
    return ''.join(or_pattern)
 def compile_pattern(pattern, enhance=True):
    """Compile and enhance a pattern
    :param pattern: Pattern to compile (regexp).
    :type pattern: string
    :param pattern: Enhance pattern before compiling.
    :type pattern: string
    :return: The compiled pattern
    :rtype: regular expression object
    """
    return re.compile(enhance_pattern(pattern) if enhance else pattern, re.IGNORECASE)
 def enhance_pattern(pattern):
    """Enhance pattern to match more equivalent values.
    '-' are replaced by '[\W_]?', which matches more types of separators (or none)
    :param pattern: Pattern to enhance (regexp).
    :type pattern: string
    :return: The enhanced pattern
    :rtype: string
    """
    return pattern.replace(_dash, _psep)
--- a/libs/guessit/patterns/extension.py
+++ b/libs/guessit/patterns/extension.py
@ -0,0 +1,32 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 # Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 # Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 subtitle_exts = ['srt', 'idx', 'sub', 'ssa', 'ass']
 info_exts = ['nfo']
 video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
              'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
              'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
              'iso']
--- a/libs/guessit/patterns/numeral.py
+++ b/libs/guessit/patterns/numeral.py
@ -0,0 +1,150 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 import re
 digital_numeral = '\d{1,4}'
 roman_numeral = "(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})"
 english_word_numeral_list = [
  'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
  'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
 ]
 french_word_numeral_list = [
  'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
  'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
 ]
 french_alt_word_numeral_list = [
  'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
  'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
 ]
 def __build_word_numeral(*args, **kwargs):
    re_ = None
    for word_list in args:
        for word in word_list:
            if not re_:
                re_ = '(?:(?=\w+)'
            else:
                re_ += '|'
            re_ += word
    re_ += ')'
    return re_
 word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
 numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
 __romanNumeralMap = (
                   ('M', 1000),
                   ('CM', 900),
                   ('D', 500),
                   ('CD', 400),
                   ('C', 100),
                   ('XC', 90),
                   ('L', 50),
                   ('XL', 40),
                   ('X', 10),
                   ('IX', 9),
                   ('V', 5),
                   ('IV', 4),
                   ('I', 1)
                   )
 __romanNumeralPattern = re.compile('^' + roman_numeral + '$')
 def __parse_roman(value):
    """convert Roman numeral to integer"""
    if not __romanNumeralPattern.search(value):
        raise ValueError('Invalid Roman numeral: %s' % value)
    result = 0
    index = 0
    for num, integer in __romanNumeralMap:
        while value[index:index + len(num)] == num:
            result += integer
            index += len(num)
    return result
 def __parse_word(value):
    """Convert Word numeral to integer"""
    for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
        try:
            return word_list.index(value.lower())
        except ValueError:
            pass
    raise ValueError
 _clean_re = re.compile('[^\d]*(\d+)[^\d]*')
 def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
    """Parse a numeric value into integer.
    input can be an integer as a string, a roman numeral or a word
    :param value: Value to parse. Can be an integer, roman numeral or word.
    :type value: string
    :return: Numeric value, or None if value can't be parsed
    :rtype: int
    """
    if int_enabled:
        try:
            if clean:
                match = _clean_re.match(value)
                if match:
                    clean_value = match.group(1)
                    return int(clean_value)
            return int(value)
        except ValueError:
            pass
    if roman_enabled:
        try:
            if clean:
                for word in value.split():
                    try:
                        return __parse_roman(word.upper())
                    except ValueError:
                        pass
            return __parse_roman(value)
        except ValueError:
            pass
    if word_enabled:
        try:
            if clean:
                for word in value.split():
                    try:
                        return __parse_word(word)
                    except ValueError:
                        pass
            return __parse_word(value)
        except ValueError:
            pass
    raise ValueError('Invalid numeral: ' + value)
--- a/libs/guessit/plugins/init.py
+++ b/libs/guessit/plugins/init.py
@ -0,0 +1,21 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
--- a/libs/guessit/plugins/transformers.py
+++ b/libs/guessit/plugins/transformers.py
@ -0,0 +1,219 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.options import reload as reload_options
 from stevedore import ExtensionManager
 from pkg_resources import EntryPoint
 from stevedore.extension import Extension
 from logging import getLogger
 log = getLogger(__name__)
 class Transformer(object):  # pragma: no cover
    def __init__(self, priority=0):
        self.priority = priority
        self.log = getLogger(self.name)
    @property
    def name(self):
        return self.__class__.__name__
    def supported_properties(self):
        return {}
    def second_pass_options(self, mtree, options=None):
        return None
    def should_process(self, mtree, options=None):
        return True
    def process(self, mtree, options=None):
        pass
    def post_process(self, mtree, options=None):
        pass
    def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
        pass
    def rate_quality(self, guess, *props):
        return 0
 class CustomTransformerExtensionManager(ExtensionManager):
    def __init__(self, namespace='guessit.transformer', invoke_on_load=True,
                 invoke_args=(), invoke_kwds={}, propagate_map_exceptions=True, on_load_failure_callback=None,
                 verify_requirements=False):
        super(CustomTransformerExtensionManager, self).__init__(namespace=namespace,
                                                                invoke_on_load=invoke_on_load,
                                                                invoke_args=invoke_args,
                                                                invoke_kwds=invoke_kwds,
                                                                propagate_map_exceptions=propagate_map_exceptions,
                                                                on_load_failure_callback=on_load_failure_callback,
                                                                verify_requirements=verify_requirements)
    def order_extensions(self, extensions):
        """Order the loaded transformers
        It should follow those rules
           - website before language (eg: tvu.org.ru vs russian)
           - language before episodes_rexps
           - properties before language (eg: he-aac vs hebrew)
           - release_group before properties (eg: XviD-?? vs xvid)
        """
        extensions.sort(key=lambda ext: -ext.obj.priority)
        return extensions
    def _load_one_plugin(self, ep, invoke_on_load, invoke_args, invoke_kwds, verify_requirements=True):
        if not ep.dist:
            # `require` argument of ep.load() is deprecated in newer versions of setuptools
            if hasattr(ep, 'resolve'):
                plugin = ep.resolve()
            elif hasattr(ep, '_load'):
                plugin = ep._load()
            else:
                plugin = ep.load(require=False)
        else:
            plugin = ep.load()
        if invoke_on_load:
            obj = plugin(*invoke_args, **invoke_kwds)
        else:
            obj = None
        return Extension(ep.name, ep, plugin, obj)
    def _load_plugins(self, invoke_on_load, invoke_args, invoke_kwds, verify_requirements):
        return self.order_extensions(super(CustomTransformerExtensionManager, self)._load_plugins(invoke_on_load, invoke_args, invoke_kwds, verify_requirements))
    def objects(self):
        return self.map(self._get_obj)
    def _get_obj(self, ext):
        return ext.obj
    def object(self, name):
        try:
            return self[name].obj
        except KeyError:
            return None
    def register_module(self, name=None, module_name=None, attrs=(), entry_point=None):
        if entry_point:
            ep = EntryPoint.parse(entry_point)
        else:
            ep = EntryPoint(name, module_name, attrs)
        loaded = self._load_one_plugin(ep, invoke_on_load=True, invoke_args=(), invoke_kwds={})
        if loaded:
            self.extensions.append(loaded)
            self.extensions = self.order_extensions(self.extensions)
            self._extensions_by_name = None
 class DefaultTransformerExtensionManager(CustomTransformerExtensionManager):
    @property
    def _internal_entry_points(self):
        return ['split_path_components = guessit.transfo.split_path_components:SplitPathComponents',
                                    'guess_filetype = guessit.transfo.guess_filetype:GuessFiletype',
                                    'split_explicit_groups = guessit.transfo.split_explicit_groups:SplitExplicitGroups',
                                    'guess_date = guessit.transfo.guess_date:GuessDate',
                                    'guess_website = guessit.transfo.guess_website:GuessWebsite',
                                    'guess_release_group = guessit.transfo.guess_release_group:GuessReleaseGroup',
                                    'guess_properties = guessit.transfo.guess_properties:GuessProperties',
                                    'guess_language = guessit.transfo.guess_language:GuessLanguage',
                                    'guess_video_rexps = guessit.transfo.guess_video_rexps:GuessVideoRexps',
                                    'guess_episodes_rexps = guessit.transfo.guess_episodes_rexps:GuessEpisodesRexps',
                                    'guess_weak_episodes_rexps = guessit.transfo.guess_weak_episodes_rexps:GuessWeakEpisodesRexps',
                                    'guess_bonus_features = guessit.transfo.guess_bonus_features:GuessBonusFeatures',
                                    'guess_year = guessit.transfo.guess_year:GuessYear',
                                    'guess_country = guessit.transfo.guess_country:GuessCountry',
                                    'guess_idnumber = guessit.transfo.guess_idnumber:GuessIdnumber',
                                    'split_on_dash = guessit.transfo.split_on_dash:SplitOnDash',
                                    'guess_episode_info_from_position = guessit.transfo.guess_episode_info_from_position:GuessEpisodeInfoFromPosition',
                                    'guess_movie_title_from_position = guessit.transfo.guess_movie_title_from_position:GuessMovieTitleFromPosition',
                                    'guess_episode_details = guessit.transfo.guess_episode_details:GuessEpisodeDetails',
                                    'expected_series = guessit.transfo.expected_series:ExpectedSeries',
                                    'expected_title = guessit.transfo.expected_title:ExpectedTitle',]
    def _find_entry_points(self, namespace):
        entry_points = {}
        # Internal entry points
        if namespace == self.namespace:
            for internal_entry_point_str in self._internal_entry_points:
                internal_entry_point = EntryPoint.parse(internal_entry_point_str)
                entry_points[internal_entry_point.name] = internal_entry_point
        # Package entry points
        setuptools_entrypoints = super(DefaultTransformerExtensionManager, self)._find_entry_points(namespace)
        for setuptools_entrypoint in setuptools_entrypoints:
            entry_points[setuptools_entrypoint.name] = setuptools_entrypoint
        return list(entry_points.values())
 _extensions = None
 def all_transformers():
    return _extensions.objects()
 def get_transformer(name):
    return _extensions.object(name)
 def add_transformer(name, module_name, class_name):
    """
    Add a transformer
    :param name: the name of the transformer. ie: 'guess_regexp_id'
    :param name: the module name. ie: 'flexget.utils.parsers.transformers.guess_regexp_id'
    :param class_name: the class name. ie: 'GuessRegexpId'
    """
    _extensions.register_module(name, module_name, (class_name,))
 def add_transformer(entry_point):
    """
    Add a transformer
    :param entry_point: entry point spec format. ie: 'guess_regexp_id = flexget.utils.parsers.transformers.guess_regexp_id:GuessRegexpId'
    """
    _extensions.register_module(entry_point = entry_point)
 def reload(custom=False):
    """
    Reload extension manager with default or custom one.
    :param custom: if True, custom manager will be used, else default one.
    Default manager will load default extensions from guessit and setuptools packaging extensions
    Custom manager will not load default extensions from guessit, using only setuptools packaging extensions.
    :type custom: boolean
    """
    global _extensions
    if custom:
        _extensions = CustomTransformerExtensionManager()
    else:
        _extensions = DefaultTransformerExtensionManager()
    reload_options(all_transformers())
 reload()
--- a/libs/guessit/quality.py
+++ b/libs/guessit/quality.py
@ -0,0 +1,65 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.plugins.transformers import all_transformers
 def best_quality_properties(props, *guesses):
    """Retrieve the best quality guess, based on given properties
    :param props: Properties to include in the rating
    :type props: list of strings
    :param guesses: Guesses to rate
    :type guesses: :class:`guessit.guess.Guess`
    :return: Best quality guess from all passed guesses
    :rtype: :class:`guessit.guess.Guess`
    """
    best_guess = None
    best_rate = None
    for guess in guesses:
        for transformer in all_transformers():
            rate = transformer.rate_quality(guess, *props)
            if best_rate is None or best_rate < rate:
                best_rate = rate
                best_guess = guess
    return best_guess
 def best_quality(*guesses):
    """Retrieve the best quality guess.
    :param guesses: Guesses to rate
    :type guesses: :class:`guessit.guess.Guess`
    :return: Best quality guess from all passed guesses
    :rtype: :class:`guessit.guess.Guess`
    """
    best_guess = None
    best_rate = None
    for guess in guesses:
        for transformer in all_transformers():
            rate = transformer.rate_quality(guess)
            if best_rate is None or best_rate < rate:
                best_rate = rate
                best_guess = guess
    return best_guess
--- a/libs/guessit/slogging.py
+++ b/libs/guessit/slogging.py
@ -1,28 +1,28 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
-# Smewt - A smart collection manager
+# GuessIt - A library for guessing information from filenames
-# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
-# Smewt is free software; you can redistribute it and/or modify
+# GuessIt is free software; you can redistribute it and/or modify it under
-# it under the terms of the GNU General Public License as published by
+# the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
-# Smewt is distributed in the hope that it will be useful,
+# GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
+# Lesser GNU General Public License for more details.
 #
-# You should have received a copy of the GNU General Public License
+# You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 import logging
 import sys
-import os, os.path
+import os
 GREEN_FONT = "\x1B[0;32m"
 YELLOW_FONT = "\x1B[0;33m"
@ -31,7 +31,7 @@ RED_FONT = "\x1B[0;31m"
 RESET_FONT = "\x1B[0m"
-def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False):
+def setup_logging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False):  # pragma: no cover
    """Set up a nice colored logger as the main application logger."""
    class SimpleFormatter(logging.Formatter):
--- a/libs/guessit/test/1MB
+++ b/libs/guessit/test/1MB
--- a/libs/guessit/test/init.py
+++ b/libs/guessit/test/init.py
@ -0,0 +1,26 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 import logging
 from guessit.slogging import setup_logging
 setup_logging()
 logging.disable(logging.INFO)
--- a/libs/guessit/test/main.py
+++ b/libs/guessit/test/main.py
@ -0,0 +1,40 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test import (test_api, test_autodetect, test_autodetect_all, test_doctests,
                          test_episode, test_hashes, test_language, test_main,
                          test_matchtree, test_movie, test_quality, test_utils)
 from unittest import TextTestRunner
 import logging
 def main():
    for suite in [test_api.suite, test_autodetect.suite,
                  test_autodetect_all.suite, test_doctests.suite,
                  test_episode.suite, test_hashes.suite, test_language.suite,
                  test_main.suite, test_matchtree.suite, test_movie.suite,
                  test_quality.suite, test_utils.suite]:
        TextTestRunner(verbosity=2).run(suite)
 if __name__ == '__main__':
    main()
--- a/libs/guessit/test/autodetect.yaml
+++ b/libs/guessit/test/autodetect.yaml
@ -0,0 +1,489 @@
 ? Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv
 : type: movie
  title: Fear and Loathing in Las Vegas
  year: 1998
  screenSize: 720p
  format: HD-DVD
  audioCodec: DTS
  videoCodec: h264
  releaseGroup: ESiR
 ? Leopard.dmg
 : type: unknown
  extension: dmg
 ? Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi
 : type: episode
  series: Duckman
  season: 1
  episodeNumber: 1
  title: I, Duckman
  date: 2002-11-07
 ? Series/Neverwhere/Neverwhere.05.Down.Street.[tvu.org.ru].avi
 : type: episode
  series: Neverwhere
  episodeNumber: 5
  title: Down Street
  website: tvu.org.ru
 ? Neverwhere.05.Down.Street.[tvu.org.ru].avi
 : type: episode
  series: Neverwhere
  episodeNumber: 5
  title: Down Street
  website: tvu.org.ru
 ? Series/Breaking Bad/Minisodes/Breaking.Bad.(Minisodes).01.Good.Cop.Bad.Cop.WEBRip.XviD.avi
 : type: episode
  series: Breaking Bad
  episodeFormat: Minisode
  episodeNumber: 1
  title: Good Cop Bad Cop
  format: WEBRip
  videoCodec: XviD
 ? Series/Kaamelott/Kaamelott - Livre V - Ep 23 - Le Forfait.avi
 : type: episode
  series: Kaamelott
  episodeNumber: 23
  title: Le Forfait
 ? Movies/The Doors (1991)/09.03.08.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
 : type: movie
  title: The Doors
  year: 1991
  date: 2008-03-09
  format: BluRay
  screenSize: 720p
  audioCodec: AC3
  videoCodec: h264
  releaseGroup: HiS@SiLUHD
  language: english
  website: sharethefiles.com
 ? Movies/M.A.S.H. (1970)/MASH.(1970).[Divx.5.02][Dual-Subtitulos][DVDRip].ogm
 : type: movie
  title: M.A.S.H.
  year: 1970
  videoCodec: DivX
  format: DVD
 ? the.mentalist.501.hdtv-lol.mp4
 : type: episode
  series: The Mentalist
  season: 5
  episodeNumber: 1
  format: HDTV
  releaseGroup: LOL
 ? the.simpsons.2401.hdtv-lol.mp4
 : type: episode
  series: The Simpsons
  season: 24
  episodeNumber: 1
  format: HDTV
  releaseGroup: LOL
 ? Homeland.S02E01.HDTV.x264-EVOLVE.mp4
 : type: episode
  series: Homeland
  season: 2
  episodeNumber: 1
  format: HDTV
  videoCodec: h264
  releaseGroup: EVOLVE
 ? /media/Band_of_Brothers-e01-Currahee.mkv
 : type: episode
  series: Band of Brothers
  episodeNumber: 1
  title: Currahee
 ? /media/Band_of_Brothers-x02-We_Stand_Alone_Together.mkv
 : type: episode
  series: Band of Brothers
  bonusNumber: 2
  bonusTitle: We Stand Alone Together
 ? /movies/James_Bond-f21-Casino_Royale-x02-Stunts.mkv
 : type: movie
  title: Casino Royale
  filmSeries: James Bond
  filmNumber: 21
  bonusNumber: 2
  bonusTitle: Stunts
 ? /TV Shows/new.girl.117.hdtv-lol.mp4
 : type: episode
  series: New Girl
  season: 1
  episodeNumber: 17
  format: HDTV
  releaseGroup: LOL
 ? The.Office.(US).1x03.Health.Care.HDTV.XviD-LOL.avi
 : type: episode
  series: The Office (US)
  country: US
  season: 1
  episodeNumber: 3
  title: Health Care
  format: HDTV
  videoCodec: XviD
  releaseGroup: LOL
 ? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4
 : type: movie
  title: The Insider
  year: 1999
  bonusNumber: 2
  bonusTitle: 60 Minutes Interview-1996
 ? OSS_117--Cairo,_Nest_of_Spies.mkv
 : type: movie
  title: OSS 117--Cairo, Nest of Spies
 ? Rush.._Beyond_The_Lighted_Stage-x09-Between_Sun_and_Moon-2002_Hartford.mkv
 : type: movie
  title: Rush Beyond The Lighted Stage
  bonusNumber: 9
  bonusTitle: Between Sun and Moon-2002 Hartford
 ? House.Hunters.International.S56E06.720p.hdtv.x264.mp4
 : type: episode
  series: House Hunters International
  season: 56
  episodeNumber: 6
  screenSize: 720p
  format: HDTV
  videoCodec: h264
 ? White.House.Down.2013.1080p.BluRay.DTS-HD.MA.5.1.x264-PublicHD.mkv
 : type: movie
  title: White House Down
  year: 2013
  screenSize: 1080p
  format: BluRay
  audioCodec: DTS
  audioProfile: HDMA
  videoCodec: h264
  releaseGroup: PublicHD
  audioChannels: "5.1"
 ? White.House.Down.2013.1080p.BluRay.DTSHD.MA.5.1.x264-PublicHD.mkv
 : type: movie
  title: White House Down
  year: 2013
  screenSize: 1080p
  format: BluRay
  audioCodec: DTS
  audioProfile: HDMA
  videoCodec: h264
  releaseGroup: PublicHD
  audioChannels: "5.1"
 ? Hostages.S01E01.Pilot.for.Air.720p.WEB-DL.DD5.1.H.264-NTb.nfo
 : type: episodeinfo
  series: Hostages
  title: Pilot for Air
  season: 1
  episodeNumber: 1
  screenSize: 720p
  format: WEB-DL
  audioChannels: "5.1"
  videoCodec: h264
  audioCodec: DolbyDigital
  releaseGroup: NTb
 ? Despicable.Me.2.2013.1080p.BluRay.x264-VeDeTT.nfo
 : type: movieinfo
  title: Despicable Me 2
  year: 2013
  screenSize: 1080p
  format: BluRay
  videoCodec: h264
  releaseGroup: VeDeTT
 ? Le Cinquieme Commando 1971 SUBFORCED FRENCH DVDRiP XViD AC3 Bandix.mkv
 : type: movie
  audioCodec: AC3
  format: DVD
  releaseGroup: Bandix
  subtitleLanguage: French
  title: Le Cinquieme Commando
  videoCodec: XviD
  year: 1971
 ? Le Seigneur des Anneaux - La Communauté de l'Anneau - Version Longue - BDRip.mkv
 : type: movie
  format: BluRay
  title: Le Seigneur des Anneaux
 ? La petite bande (Michel Deville - 1983) VF PAL MP4 x264 AAC.mkv
 : type: movie
  audioCodec: AAC
  language: French
  title: La petite bande
  videoCodec: h264
  year: 1983
 ? Retour de Flammes (Gregor Schnitzler 2003) FULL DVD.iso
 : type: movie
  format: DVD
  title: Retour de Flammes
  type: movie
  year: 2003
 ? A.Common.Title.Special.2014.avi
 : type: movie
  year: 2014
  title: A Common Title Special
 ? A.Common.Title.2014.Special.avi
 : type: episode
  year: 2014
  series: A Common Title
  title: Special
  episodeDetails: Special
 ? A.Common.Title.2014.Special.Edition.avi
 : type: movie
  year: 2014
  title: A Common Title
  edition: Special Edition
 ? Downton.Abbey.2013.Christmas.Special.HDTV.x264-FoV.mp4
 : type: episode
  year: 2013
  series: Downton Abbey
  title: Christmas Special
  videoCodec: h264
  releaseGroup: FoV
  format: HDTV
  episodeDetails: Special
 ? Doctor_Who_2013_Christmas_Special.The_Time_of_The_Doctor.HD
 : options: -n
  type: episode
  series: Doctor Who
  other: HD
  episodeDetails: Special
  title: Christmas Special The Time of The Doctor
  year: 2013
 ? Doctor Who 2005 50th Anniversary Special The Day of the Doctor 3.avi
 : type: episode
  series: Doctor Who
  episodeDetails: Special
  title: 50th Anniversary Special The Day of the Doctor 3
  year: 2005
 ? Robot Chicken S06-Born Again Virgin Christmas Special HDTV x264.avi
 : type: episode
  series: Robot Chicken
  format: HDTV
  season: 6
  title: Born Again Virgin Christmas Special
  videoCodec: h264
  episodeDetails: Special
 ? Wicked.Tuna.S03E00.Head.To.Tail.Special.HDTV.x264-YesTV
 : options: -n
  type: episode
  series: Wicked Tuna
  title: Head To Tail Special
  releaseGroup: YesTV
  season: 3
  episodeNumber: 0
  videoCodec: h264
  format: HDTV
  episodeDetails: Special
 ? The.Voice.UK.S03E12.HDTV.x264-C4TV
 : options: -n
  episodeNumber: 12
  videoCodec: h264
  format: HDTV
  series: The Voice (UK)
  releaseGroup: C4TV
  season: 3
  country: United Kingdom
  type: episode
 ? /tmp/star.trek.9/star.trek.9.mkv
 : type: movie
  title: star trek 9
 ? star.trek.9.mkv
 : type: movie
  title: star trek 9
 ? FlexGet.S01E02.TheName.HDTV.xvid
 : options: -n
  episodeNumber: 2
  format: HDTV
  season: 1
  series: FlexGet
  title: TheName
  type: episode
  videoCodec: XviD
 ? FlexGet.S01E02.TheName.HDTV.xvid
 : options: -n
  episodeNumber: 2
  format: HDTV
  season: 1
  series: FlexGet
  title: TheName
  type: episode
  videoCodec: XviD
 ? some.series.S03E14.Title.Here.720p
 : options: -n
  episodeNumber: 14
  screenSize: 720p
  season: 3
  series: some series
  title: Title Here
  type: episode
 ? '[the.group] Some.Series.S03E15.Title.Two.720p'
 : options: -n
  episodeNumber: 15
  releaseGroup: the.group
  screenSize: 720p
  season: 3
  series: Some Series
  title: Title Two
  type: episode
 ? 'HD 720p: Some series.S03E16.Title.Three'
 : options: -n
  episodeNumber: 16
  other: HD
  screenSize: 720p
  season: 3
  series: Some series
  title: Title Three
  type: episode
 ? Something.Season.2.1of4.Ep.Title.HDTV.torrent
 : episodeCount: 4
  episodeNumber: 1
  format: HDTV
  season: 2
  series: Something
  title: Title
  type: episode
 ? Show-A (US) - Episode Title S02E09 hdtv
 : options: -n
  country: US
  episodeNumber: 9
  format: HDTV
  season: 2
  series: Show-A (US)
  type: episode
 ? Jack's.Show.S03E01.blah.1080p
 : options: -n
  episodeNumber: 1
  screenSize: 1080p
  season: 3
  series: Jack's Show
  title: blah
  type: episode
 ? FlexGet.epic
 : options: -n
  title: FlexGet epic
  type: movie
 ? FlexGet.Apt.1
 : options: -n
  title: FlexGet Apt 1
  type: movie
 ? FlexGet.aptitude
 : options: -n
  title: FlexGet aptitude
  type: movie
 ? FlexGet.Step1
 : options: -n
  title: FlexGet Step1
  type: movie
 ? Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720 * 432].avi
 : format: DVD
  screenSize: 720x432
  title: El Bosque Animado
  videoCodec: XviD
  year: 1987
  type: movie
 ? Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi
 : format: DVD
  screenSize: 720x432
  title: El Bosque Animado
  videoCodec: XviD
  year: 1987
  type: movie
 ? 2009.shoot.fruit.chan.multi.dvd9.pal
 : options: -n
  format: DVD
  language: mul
  other: PAL
  title: shoot fruit chan
  type: movie
  year: 2009
 ? 2009.shoot.fruit.chan.multi.dvd5.pal
 : options: -n
  format: DVD
  language: mul
  other: PAL
  title: shoot fruit chan
  type: movie
  year: 2009
 ? The.Flash.2014.S01E01.PREAIR.WEBRip.XviD-EVO.avi
 : episodeNumber: 1
  format: WEBRip
  other: Preair
  releaseGroup: EVO
  season: 1
  series: The Flash
  type: episode
  videoCodec: XviD
  year: 2014
 ? Ice.Lake.Rebels.S01E06.Ice.Lake.Games.720p.HDTV.x264-DHD
 : options: -n
  episodeNumber: 6
  format: HDTV
  releaseGroup: DHD
  screenSize: 720p
  season: 1
  series: Ice Lake Rebels
  title: Ice Lake Games
  type: episode
  videoCodec: h264
 ? The League - S06E10 - Epi Sexy.mkv
 : episodeNumber: 10
  season: 6
  series: The League
  title: Epi Sexy
  type: episode
 ? Stay (2005) [1080p]/Stay.2005.1080p.BluRay.x264.YIFY.mp4
 : format: BluRay
  releaseGroup: YIFY
  screenSize: 1080p
  title: Stay
  type: movie
  videoCodec: h264
  year: 2005
--- a/libs/guessit/test/dummy.srt
+++ b/libs/guessit/test/dummy.srt
@ -0,0 +1 @@
 Just a dummy srt file (used for unittests: do not remove!)
--- a/libs/guessit/test/episodes.yaml
+++ b/libs/guessit/test/episodes.yaml
--- a/libs/guessit/test/guessittest.py
+++ b/libs/guessit/test/guessittest.py
@ -0,0 +1,187 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit import base_text_type, u
 from collections import defaultdict
 from unittest import TestCase, TestLoader, TextTestRunner
 import shlex
 import babelfish
 import yaml, logging, sys, os
 from os.path import *
 def currentPath():
    '''Returns the path in which the calling file is located.'''
    return dirname(join(os.getcwd(), sys._getframe(1).f_globals['__file__']))
 def addImportPath(path):
    '''Function that adds the specified path to the import path. The path can be
    absolute or relative to the calling file.'''
    importPath = abspath(join(currentPath(), path))
    sys.path = [importPath] + sys.path
 log = logging.getLogger(__name__)
 from guessit.plugins import transformers
 from guessit.options import get_opts
 import guessit
 from guessit import *
 from guessit.matcher import *
 from guessit.fileutils import *
 def allTests(testClass):
    return TestLoader().loadTestsFromTestCase(testClass)
 class TestGuessit(TestCase):
    def checkMinimumFieldsCorrect(self, filename, filetype=None, remove_type=True,
                                  exclude_files=None):
        groundTruth = yaml.load(load_file_in_same_dir(__file__, filename))
        def guess_func(string, options=None):
            return guess_file_info(string, options=options, type=filetype)
        return self.checkFields(groundTruth, guess_func, remove_type, exclude_files)
    def checkFields(self, groundTruth, guess_func, remove_type=True,
                    exclude_files=None):
        total = 0
        exclude_files = exclude_files or []
        fails = defaultdict(list)
        additionals = defaultdict(list)
        for filename, required_fields in groundTruth.items():
            filename = u(filename)
            if filename in exclude_files:
                continue
            log.debug('\n' + '-' * 120)
            log.info('Guessing information for file: %s' % filename)
            options = required_fields.pop('options') if 'options' in required_fields else None
            if options:
                args = shlex.split(options)
                options = get_opts().parse_args(args)
                options = vars(options)
            try:
                found = guess_func(filename, options)
            except Exception as e:
                fails[filename].append("An exception has occured in %s: %s" % (filename, e))
                log.exception("An exception has occured in %s: %s" % (filename, e))
                continue
            total = total + 1
            # no need for these in the unittests
            if remove_type:
                try:
                    del found['type']
                except:
                    pass
            for prop in ('container', 'mimetype', 'unidentified'):
                if prop in found:
                    del found[prop]
            # props which are list of just 1 elem should be opened for easier writing of the tests
            for prop in ('language', 'subtitleLanguage', 'other', 'episodeDetails', 'unidentified'):
                value = found.get(prop, None)
                if isinstance(value, list) and len(value) == 1:
                    found[prop] = value[0]
            # look for missing properties
            for prop, value in required_fields.items():
                if prop not in found:
                    log.debug("Prop '%s' not found in: %s" % (prop, filename))
                    fails[filename].append("'%s' not found in: %s" % (prop, filename))
                    continue
                # if both properties are strings, do a case-insensitive comparison
                if (isinstance(value, base_text_type) and
                    isinstance(found[prop], base_text_type)):
                    if value.lower() != found[prop].lower():
                        log.debug("Wrong prop value [str] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                        fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                elif isinstance(value, list) and isinstance(found[prop], list):
                    if found[prop] and isinstance(found[prop][0], babelfish.Language):
                        # list of languages
                        s1 = set(Language.fromguessit(s) for s in value)
                        s2 = set(found[prop])
                    else:
                        # by default we assume list of strings and do a case-insensitive
                        # comparison on their elements
                        s1 = set(u(s).lower() for s in value)
                        s2 = set(u(s).lower() for s in found[prop])
                    if s1 != s2:
                        log.debug("Wrong prop value [list] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                        fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                elif isinstance(found[prop], babelfish.Language):
                    try:
                        if babelfish.Language.fromguessit(value) != found[prop]:
                            raise ValueError
                    except:
                        log.debug("Wrong prop value [Language] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                        fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                elif isinstance(found[prop], babelfish.Country):
                    try:
                        if babelfish.Country.fromguessit(value) != found[prop]:
                            raise ValueError
                    except:
                        log.debug("Wrong prop value [Country] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                        fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
                # otherwise, just compare their values directly
                else:
                    if found[prop] != value:
                        log.debug("Wrong prop value for '%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
                        fails[filename].append("'%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
            # look for additional properties
            for prop, value in found.items():
                if prop not in required_fields:
                    log.debug("Found additional info for prop = '%s': '%s'" % (prop, u(value)))
                    additionals[filename].append("'%s': '%s'" % (prop, u(value)))
        correct = total - len(fails)
        log.info('SUMMARY: Guessed correctly %d out of %d filenames' % (correct, total))
        for failed_entry, failed_properties in fails.items():
            log.error('---- ' + failed_entry + ' ----')
            for failed_property in failed_properties:
                log.error("FAILED: " + failed_property)
        for additional_entry, additional_properties in additionals.items():
            log.warning('---- ' + additional_entry + ' ----')
            for additional_property in additional_properties:
                log.warning("ADDITIONAL: " + additional_property)
        self.assertTrue(correct == total,
                        msg='Correct: %d < Total: %d' % (correct, total))
--- a/libs/guessit/test/movies.yaml
+++ b/libs/guessit/test/movies.yaml
@ -0,0 +1,754 @@
 ? Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv
 : title: Fear and Loathing in Las Vegas
  year: 1998
  screenSize: 720p
  format: HD-DVD
  audioCodec: DTS
  videoCodec: h264
  releaseGroup: ESiR
 ? Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi
 : title: El Dia de la Bestia
  year: 1995
  format: DVD
  language: spanish
  videoCodec: DivX
  releaseGroup: Artik[SEDG]
 ? Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
 : title: Dark City
  year: 1998
  format: BluRay
  screenSize: 720p
  audioCodec: DTS
  videoCodec: h264
  releaseGroup: CHD
 ? Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv
 : title: Sin City
  year: 2005
  format: BluRay
  screenSize: 720p
  videoCodec: h264
  audioCodec: AC3
  releaseGroup: SEPTiC
 ? Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi
 : title: Borat
  year: 2006
  other: PROPER
  format: DVD
  other: [ R5, Proper ]
  videoCodec: XviD
  releaseGroup: PUKKA
 ? "[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv"
 : title: Le Prestige
  format: DVD
  videoCodec: h264
  videoProfile: HP
  audioCodec: AAC
  audioProfile: HE
  language: [ french, english ]
  subtitleLanguage: [ french, english ]
  releaseGroup: XCT
 ? Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi
 : title: Battle Royale
  year: 2000
  edition: special edition
  cdNumber: 1
  cdNumberTotal: 2
  format: DVD
  videoCodec: XviD
  releaseGroup: ZeaL
 ? Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.avi
 : title: Brazil
  edition: Criterion Edition
  year: 1985
  cdNumber: 2
 ? Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv
 : title: Persepolis
  year: 2007
  videoCodec: h264
  audioCodec: AAC
  language: [ French, English ]
  subtitleLanguage: [ French, English ]
  releaseGroup: XCT
 ? Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv
 : title: Toy Story
  year: 1995
  format: HDTV
  screenSize: 720p
  language: [ english, spanish ]
 ? Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi
 : title: Office Space
  year: 1999
  format: DVD
  language: [ english, spanish ]
  videoCodec: XviD
  audioCodec: AC3
 ? Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.avi
 : title: Wild Zero
  year: 2000
  videoCodec: DivX
  releaseGroup: EPiC
 ? movies/Baraka_Edition_Collector.avi
 : title: Baraka
  edition: collector edition
 ? Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director's.Cut).CD1.DVDRip.XviD.AC3-WAF.avi
 : title: Blade Runner
  year: 1982
  edition: Director's Cut
  cdNumber: 1
  format: DVD
  videoCodec: XviD
  audioCodec: AC3
  releaseGroup: WAF
 ? movies/American.The.Bill.Hicks.Story.2009.DVDRip.XviD-EPiSODE.[UsaBit.com]/UsaBit.com_esd-americanbh.avi
 : title: American The Bill Hicks Story
  year: 2009
  format: DVD
  videoCodec: XviD
  releaseGroup: EPiSODE
  website: UsaBit.com
 ? movies/Charlie.And.Boots.DVDRip.XviD-TheWretched/wthd-cab.avi
 : title: Charlie And Boots
  format: DVD
  videoCodec: XviD
  releaseGroup: TheWretched
 ? movies/Steig Larsson Millenium Trilogy (2009) BRrip 720 AAC x264/(1)The Girl With The Dragon Tattoo (2009) BRrip 720 AAC x264.mkv
 : title: The Girl With The Dragon Tattoo
  filmSeries: Steig Larsson Millenium Trilogy
  filmNumber: 1
  year: 2009
  format: BluRay
  audioCodec: AAC
  videoCodec: h264
  screenSize: 720p
 ? movies/Greenberg.REPACK.LiMiTED.DVDRip.XviD-ARROW/arw-repack-greenberg.dvdrip.xvid.avi
 : title: Greenberg
  format: DVD
  videoCodec: XviD
  releaseGroup: ARROW
  other: ['Proper', 'Limited']
 ? Movies/Fr - Paris 2054, Renaissance (2005) - De Christian Volckman - (Film Divx Science Fiction Fantastique Thriller Policier N&B).avi
 : title: Paris 2054, Renaissance
  year: 2005
  language: french
  videoCodec: DivX
 ? Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
 : title: Avida
  year: 2006
  language: french
  format: DVD
  videoCodec: XviD
  releaseGroup: PROD
 ? Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
 : title: Alice in Wonderland
  format: DVD
  videoCodec: XviD
  releaseGroup: DiAMOND
 ? Movies/Ne.Le.Dis.A.Personne.Fr 2 cd/personnea_mp.avi
 : title: Ne Le Dis A Personne
  language: french
  cdNumberTotal: 2
 ? Movies/Bunker Palace Hôtel (Enki Bilal) (1989)/Enki Bilal - Bunker Palace Hotel (Fr Vhs Rip).avi
 : title: Bunker Palace Hôtel
  year: 1989
  language: french
  format: VHS
 ? Movies/21 (2008)/21.(2008).DVDRip.x264.AC3-FtS.[sharethefiles.com].mkv
 : title: "21"
  year: 2008
  format: DVD
  videoCodec: h264
  audioCodec: AC3
  releaseGroup: FtS
  website: sharethefiles.com
 ? Movies/9 (2009)/9.2009.Blu-ray.DTS.720p.x264.HDBRiSe.[sharethefiles.com].mkv
 : title: "9"
  year: 2009
  format: BluRay
  audioCodec: DTS
  screenSize: 720p
  videoCodec: h264
  releaseGroup: HDBRiSe
  website: sharethefiles.com
 ? Movies/Mamma.Mia.2008.DVDRip.AC3.XviD-CrazyTeam/Mamma.Mia.2008.DVDRip.AC3.XviD-CrazyTeam.avi
 : title: Mamma Mia
  year: 2008
  format: DVD
  audioCodec: AC3
  videoCodec: XviD
  releaseGroup: CrazyTeam
 ? Movies/M.A.S.H. (1970)/MASH.(1970).[Divx.5.02][Dual-Subtitulos][DVDRip].ogm
 : title: M.A.S.H.
  year: 1970
  videoCodec: DivX
  format: DVD
 ? Movies/The Doors (1991)/09.03.08.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
 : title: The Doors
  year: 1991
  date: 2008-03-09
  format: BluRay
  screenSize: 720p
  audioCodec: AC3
  videoCodec: h264
  releaseGroup: HiS@SiLUHD
  language: english
  website: sharethefiles.com
 ? Movies/The Doors (1991)/08.03.09.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
 : options: --date-year-first
  title: The Doors
  year: 1991
  date: 2008-03-09
  format: BluRay
  screenSize: 720p
  audioCodec: AC3
  videoCodec: h264
  releaseGroup: HiS@SiLUHD
  language: english
  website: sharethefiles.com
 ? Movies/Ratatouille/video_ts-ratatouille.srt
 : title: Ratatouille
  format: DVD
 ? Movies/001 __ A classer/Fantomas se déchaine - Louis de Funès.avi
 : title: Fantomas se déchaine
 ? Movies/Comme une Image (2004)/Comme.Une.Image.FRENCH.DVDRiP.XViD-NTK.par-www.divx-overnet.com.avi
 : title: Comme une Image
  year: 2004
  language: french
  format: DVD
  videoCodec: XviD
  releaseGroup: NTK
  website: www.divx-overnet.com
 ? Movies/Fantastic Mr Fox/Fantastic.Mr.Fox.2009.DVDRip.{x264+LC-AAC.5.1}{Fr-Eng}{Sub.Fr-Eng}-™.[sharethefiles.com].mkv
 : title: Fantastic Mr Fox
  year: 2009
  format: DVD
  videoCodec: h264
  audioCodec: AAC
  audioProfile: LC
  audioChannels: "5.1"
  language: [ french, english ]
  subtitleLanguage: [ french, english ]
  website: sharethefiles.com
 ? Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi
 : title: Somewhere
  year: 2010
  format: DVD
  videoCodec: XviD
  releaseGroup: iLG
 ? Movies/Moon_(2009).mkv
 : title: Moon
  year: 2009
 ? Movies/Moon_(2009)-x01.mkv
 : title: Moon
  year: 2009
  bonusNumber: 1
 ? Movies/Moon_(2009)-x02-Making_Of.mkv
 : title: Moon
  year: 2009
  bonusNumber: 2
  bonusTitle: Making Of
 ? movies/James_Bond-f17-Goldeneye.mkv
 : title: Goldeneye
  filmSeries: James Bond
  filmNumber: 17
 ? /movies/James_Bond-f21-Casino_Royale.mkv
 : title: Casino Royale
  filmSeries: James Bond
  filmNumber: 21
 ? /movies/James_Bond-f21-Casino_Royale-x01-Becoming_Bond.mkv
 : title: Casino Royale
  filmSeries: James Bond
  filmNumber: 21
  bonusNumber: 1
  bonusTitle: Becoming Bond
 ? /movies/James_Bond-f21-Casino_Royale-x02-Stunts.mkv
 : title: Casino Royale
  filmSeries: James Bond
  filmNumber: 21
  bonusNumber: 2
  bonusTitle: Stunts
 ? OSS_117--Cairo,_Nest_of_Spies.mkv
 : title: OSS 117--Cairo, Nest of Spies
 ? The Godfather Part III.mkv
 : title: The Godfather
  part: 3
 ? Foobar Part VI.mkv
 : title: Foobar
  part: 6
 ? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4
 : title: The Insider
  year: 1999
  bonusNumber: 2
  bonusTitle: 60 Minutes Interview-1996
 ? Rush.._Beyond_The_Lighted_Stage-x09-Between_Sun_and_Moon-2002_Hartford.mkv
 : title: Rush Beyond The Lighted Stage
  bonusNumber: 9
  bonusTitle: Between Sun and Moon-2002 Hartford
 ? /public/uTorrent/Downloads Finished/Movies/Indiana.Jones.and.the.Temple.of.Doom.1984.HDTV.720p.x264.AC3.5.1-REDµX/Indiana.Jones.and.the.Temple.of.Doom.1984.HDTV.720p.x264.AC3.5.1-REDµX.mkv
 : title: Indiana Jones and the Temple of Doom
  year: 1984
  format: HDTV
  screenSize: 720p
  videoCodec: h264
  audioCodec: AC3
  audioChannels: "5.1"
  releaseGroup: REDµX
 ? The.Director’s.Notebook.2006.Blu-Ray.x264.DXVA.720p.AC3-de[42].mkv
 : title: The Director’s Notebook
  year: 2006
  format: BluRay
  videoCodec: h264
  videoApi: DXVA
  screenSize: 720p
  audioCodec: AC3
  releaseGroup: de[42]
 ? Movies/Cosmopolis.2012.LiMiTED.720p.BluRay.x264-AN0NYM0US[bb]/ano-cosmo.720p.mkv
 : title: Cosmopolis
  year: 2012
  screenSize: 720p
  videoCodec: h264
  releaseGroup: AN0NYM0US[bb]
  format: BluRay
  other: LIMITED
 ? movies/La Science des Rêves (2006)/La.Science.Des.Reves.FRENCH.DVDRip.XviD-MP-AceBot.avi
 : title: La Science des Rêves
  year: 2006
  format: DVD
  videoCodec: XviD
  videoProfile: MP
  releaseGroup: AceBot
  language: French
 ? The_Italian_Job.mkv
 : title: The Italian Job
 ? The.Rum.Diary.2011.1080p.BluRay.DTS.x264.D-Z0N3.mkv
 : title: The Rum Diary
  year: 2011
  screenSize: 1080p
  format: BluRay
  videoCodec: h264
  audioCodec: DTS
  releaseGroup: D-Z0N3
 ? Life.Of.Pi.2012.1080p.BluRay.DTS.x264.D-Z0N3.mkv
 : title: Life Of Pi
  year: 2012
  screenSize: 1080p
  format: BluRay
  videoCodec: h264
  audioCodec: DTS
  releaseGroup: D-Z0N3
 ? The.Kings.Speech.2010.1080p.BluRay.DTS.x264.D Z0N3.mkv
 : title: The Kings Speech
  year: 2010
  screenSize: 1080p
  format: BluRay
  audioCodec: DTS
  videoCodec: h264
  releaseGroup: D Z0N3
 ? Street.Kings.2008.BluRay.1080p.DTS.x264.dxva EuReKA.mkv
 : title: Street Kings
  year: 2008
  format: BluRay
  screenSize: 1080p
  audioCodec: DTS
  videoCodec: h264
  videoApi: DXVA
  releaseGroup: EuReKa
 ? 2001.A.Space.Odyssey.1968.HDDVD.1080p.DTS.x264.dxva EuReKA.mkv
 : title: 2001 A Space Odyssey
  year: 1968
  format: HD-DVD
  screenSize: 1080p
  audioCodec: DTS
  videoCodec: h264
  videoApi: DXVA
  releaseGroup: EuReKa
 ? 2012.2009.720p.BluRay.x264.DTS WiKi.mkv
 : title: "2012"
  year: 2009
  screenSize: 720p
  format: BluRay
  videoCodec: h264
  audioCodec: DTS
  releaseGroup: WiKi
 ? /share/Download/movie/Dead Man Down (2013) BRRiP XViD DD5_1 Custom NLSubs =-_lt Q_o_Q gt-=_/XD607ebb-BRc59935-5155473f-1c5f49/XD607ebb-BRc59935-5155473f-1c5f49.avi
 : title: Dead Man Down
  year: 2013
  format: BluRay
  videoCodec: XviD
  audioChannels: "5.1"
  audioCodec: DolbyDigital
  idNumber: XD607ebb-BRc59935-5155473f-1c5f49
 ? Pacific.Rim.3D.2013.COMPLETE.BLURAY-PCH.avi
 : title: Pacific Rim
  year: 2013
  format: BluRay
  other:
   - complete
   - 3D
  releaseGroup: PCH
 ? Immersion.French.2011.STV.READNFO.QC.FRENCH.ENGLISH.NTSC.DVDR.nfo
 : title: Immersion French
  year: 2011
  language:
    - French
    - English
  format: DVD
 ? Immersion.French.2011.STV.READNFO.QC.FRENCH.NTSC.DVDR.nfo
 : title: Immersion French
  year: 2011
  language: French
  format: DVD
 ? Immersion.French.2011.STV.READNFO.QC.NTSC.DVDR.nfo
 : title: Immersion French
  year: 2011
  format: DVD
 ? French.Immersion.2011.STV.READNFO.QC.ENGLISH.NTSC.DVDR.nfo
 : title: French Immersion
  year: 2011
  language: ENGLISH
  format: DVD
 ? Howl's_Moving_Castle_(2004)_[720p,HDTV,x264,DTS]-FlexGet.avi
 : videoCodec: h264
  format: HDTV
  title: Howl's Moving Castle
  screenSize: 720p
  year: 2004
  audioCodec: DTS
  releaseGroup: FlexGet
 ? Pirates de langkasuka.2008.FRENCH.1920X1080.h264.AVC.AsiaRa.mkv
 : screenSize: 1080p
  year: 2008
  language: French
  videoCodec: h264
  title: Pirates de langkasuka
  releaseGroup: AsiaRa
 ? Masala (2013) Telugu Movie HD DVDScr XviD - Exclusive.avi
 : year: 2013
  videoCodec: XviD
  title: Masala
  format: HD-DVD
  other: screener
  language: Telugu
  releaseGroup: Exclusive
 ? Django Unchained 2012 DVDSCR X264 AAC-P2P.nfo
 : year: 2012
  other: screener
  videoCodec: h264
  title: Django Unchained
  audioCodec: AAC
  format: DVD
  releaseGroup: P2P
 ? Ejecutiva.En.Apuros(2009).BLURAY.SCR.Xvid.Spanish.LanzamientosD.nfo
 : year: 2009
  other: screener
  format: BluRay
  videoCodec: XviD
  language: Spanish
  title: Ejecutiva En Apuros
 ? Die.Schluempfe.2.German.DL.1080p.BluRay.x264-EXQUiSiTE.mkv
 : title: Die Schluempfe 2
  format: BluRay
  language:
    - Multiple languages
    - German
  videoCodec: h264
  releaseGroup: EXQUiSiTE
  screenSize: 1080p
 ? Rocky 1976 French SubForced BRRip x264 AC3-FUNKY.mkv
 : title: Rocky
  year: 1976
  subtitleLanguage: French
  format: BluRay
  videoCodec: h264
  audioCodec: AC3
  releaseGroup: FUNKY
 ? REDLINE (BD 1080p H264 10bit FLAC) [3xR].mkv
 : title: REDLINE
  format: BluRay
  videoCodec: h264
  videoProfile: 10bit
  audioCodec: Flac
  screenSize: 1080p
 ? The.Lizzie.McGuire.Movie.(2003).HR.DVDRiP.avi
 : title: The Lizzie McGuire Movie
  year: 2003
  format: DVD
  other: HR
 ? Hua.Mulan.BRRIP.MP4.x264.720p-HR.avi
 : title: Hua Mulan
  videoCodec: h264
  format: BluRay
  screenSize: 720p
  other: HR
 ? Dr.Seuss.The.Lorax.2012.DVDRip.LiNE.XviD.AC3.HQ.Hive-CM8.mp4
 : videoCodec: XviD
  title: Dr Seuss The Lorax
  format: DVD
  other: LiNE
  year: 2012
  audioCodec: AC3
  audioProfile: HQ
  releaseGroup: Hive-CM8
 ? "Star Wars: Episode IV - A New Hope (2004) Special Edition.MKV"
 : title: Star Wars Episode IV
  year: 2004
  edition: Special Edition
 ? Dr.LiNE.The.Lorax.2012.DVDRip.LiNE.XviD.AC3.HQ.Hive-CM8.mp4
 : videoCodec: XviD
  title: Dr LiNE The Lorax
  format: DVD
  other: LiNE
  year: 2012
  audioCodec: AC3
  audioProfile: HQ
  releaseGroup: Hive-CM8
 ? Perfect Child-2007-TRUEFRENCH-TVRip.Xvid-h@mster.avi
 : releaseGroup: h@mster
  title: Perfect Child
  videoCodec: XviD
  language: French
  format: TV
  year: 2007
 ? entre.ciel.et.terre.(1994).dvdrip.h264.aac-psypeon.avi
 : audioCodec: AAC
  format: DVD
  releaseGroup: psypeon
  title: entre ciel et terre
  videoCodec: h264
  year: 1994
 ? Yves.Saint.Laurent.2013.FRENCH.DVDSCR.MD.XviD-ViVARiUM.avi
 : format: DVD
  language: French
  other: Screener
  releaseGroup: ViVARiUM
  title: Yves Saint Laurent
  videoCodec: XviD
  year: 2013
 ? Echec et Mort - Hard to Kill - Steven Seagal Multi 1080p BluRay x264 CCATS.avi
 : format: BluRay
  language: Multiple languages
  releaseGroup: CCATS
  screenSize: 1080p
  title: Echec et Mort
  videoCodec: h264
 ? Paparazzi - Timsit/Lindon (MKV 1080p tvripHD)
 : options: -n
  title: Paparazzi
  screenSize: 1080p
  format: HDTV
 ? some.movie.720p.bluray.x264-mind
 : options: -n
  title: some movie
  screenSize: 720p
  videoCodec: h264
  releaseGroup: mind
  format: BluRay
 ? Dr LiNE The Lorax 720p h264 BluRay
 : options: -n
  title: Dr LiNE The Lorax
  screenSize: 720p
  videoCodec: h264
  format: BluRay
 ? BeatdownFrenchDVDRip.mkv
 : options: -c
  title: Beatdown
  language: French
  format: DVD
 ? YvesSaintLaurent2013FrenchDVDScrXvid.avi
 : options: -c
  format: DVD
  language: French
  other: Screener
  title: Yves saint laurent
  videoCodec: XviD
  year: 2013
 ? Elle.s.en.va.720p.mkv
 : screenSize: 720p
  title: Elle s en va
 ? FooBar.7.PDTV-FlexGet
 : options: -n
  format: DVB
  releaseGroup: FlexGet
  title: FooBar 7
 ? h265 - HEVC Riddick Unrated Director Cut French 1080p DTS.mkv
 : audioCodec: DTS
  edition: Director's cut
  language: fr
  screenSize: 1080p
  title: Riddick Unrated
  videoCodec: h265
 ? "[h265 - HEVC] Riddick Unrated Director Cut French [1080p DTS].mkv"
 : audioCodec: DTS
  edition: Director's cut
  language: fr
  screenSize: 1080p
  title: Riddick Unrated
  videoCodec: h265
 ? Barbecue-2014-French-mHD-1080p
 : options: -n
  language: fr
  other: mHD
  screenSize: 1080p
  title: Barbecue
  year: 2014
 ? Underworld Quadrilogie VO+VFF+VFQ 1080p HDlight.x264~Tonyk~Monde Infernal
 : options: -n
  language:
  - fr
  - vo
  other: HDLight
  screenSize: 1080p
  title: Underworld Quadrilogie
  videoCodec: h264
 ? A Bout Portant (The Killers).PAL.Multi.DVD-R-KZ
 : options: -n
  format: DVD
  language: mul
  releaseGroup: KZ
  title: A Bout Portant
 ? "Mise à Sac (Alain Cavalier, 1967) [Vhs.Rip.Vff]"
 : options: -n
  format: VHS
  language: fr
  title: "Mise à Sac"
  year: 1967
 ? A Bout Portant (The Killers).PAL.Multi.DVD-R-KZ
 : options: -n
  format: DVD
  language: mul
  releaseGroup: KZ
  title: A Bout Portant
 ? Youth.In.Revolt.(Be.Bad).2009.MULTI.1080p.LAME3*92-MEDIOZZ
 : options: -n
  audioCodec: MP3
  language: mul
  releaseGroup: MEDIOZZ
  screenSize: 1080p
  title: Youth In Revolt
  year: 2009
 ? La Defense Lincoln (The Lincoln Lawyer) 2011 [DVDRIP][Vostfr]
 : options: -n
  format: DVD
  subtitleLanguage: fr
  title: La Defense Lincoln
  year: 2011
 ? '[h265 - HEVC] Fight Club French 1080p DTS.'
 : options: -n
  audioCodec: DTS
  language: fr
  screenSize: 1080p
  title: Fight Club
  videoCodec: h265
 ? Love Gourou (Mike Myers) - FR
 : options: -n
  language: fr
  title: Love Gourou
 ? '[h265 - hevc] transformers 2 1080p french ac3 6ch.'
 : options: -n
  audioChannels: '5.1'
  audioCodec: AC3
  language: fr
  screenSize: 1080p
  title: transformers 2
  videoCodec: h265
--- a/libs/guessit/test/opensubtitles_languages_2012_05_09.txt
+++ b/libs/guessit/test/opensubtitles_languages_2012_05_09.txt
@ -0,0 +1,473 @@
 IdSubLanguage	ISO639	LanguageName	UploadEnabled	WebEnabled
 aar	aa	Afar, afar	0	0
 abk	ab	Abkhazian	0	0
 ace		Achinese	0	0
 ach		Acoli	0	0
 ada		Adangme	0	0
 ady		adyghé	0	0
 afa		Afro-Asiatic (Other)	0	0
 afh		Afrihili	0	0
 afr	af	Afrikaans	0	0
 ain		Ainu	0	0
 aka	ak	Akan	0	0
 akk		Akkadian	0	0
 alb	sq	Albanian	1	1
 ale		Aleut	0	0
 alg		Algonquian languages	0	0
 alt		Southern Altai	0	0
 amh	am	Amharic	0	0
 ang		English, Old (ca.450-1100)	0	0
 apa		Apache languages	0	0
 ara	ar	Arabic	1	1
 arc		Aramaic	0	0
 arg	an	Aragonese	0	0
 arm	hy	Armenian	1	0
 arn		Araucanian	0	0
 arp		Arapaho	0	0
 art		Artificial (Other)	0	0
 arw		Arawak	0	0
 asm	as	Assamese	0	0
 ast		Asturian, Bable	0	0
 ath		Athapascan languages	0	0
 aus		Australian languages	0	0
 ava	av	Avaric	0	0
 ave	ae	Avestan	0	0
 awa		Awadhi	0	0
 aym	ay	Aymara	0	0
 aze	az	Azerbaijani	0	0
 bad		Banda	0	0
 bai		Bamileke languages	0	0
 bak	ba	Bashkir	0	0
 bal		Baluchi	0	0
 bam	bm	Bambara	0	0
 ban		Balinese	0	0
 baq	eu	Basque	1	1
 bas		Basa	0	0
 bat		Baltic (Other)	0	0
 bej		Beja	0	0
 bel	be	Belarusian	0	0
 bem		Bemba	0	0
 ben	bn	Bengali	1	0
 ber		Berber (Other)	0	0
 bho		Bhojpuri	0	0
 bih	bh	Bihari	0	0
 bik		Bikol	0	0
 bin		Bini	0	0
 bis	bi	Bislama	0	0
 bla		Siksika	0	0
 bnt		Bantu (Other)	0	0
 bos	bs	Bosnian	1	0
 bra		Braj	0	0
 bre	br	Breton	1	0
 btk		Batak (Indonesia)	0	0
 bua		Buriat	0	0
 bug		Buginese	0	0
 bul	bg	Bulgarian	1	1
 bur	my	Burmese	0	0
 byn		Blin	0	0
 cad		Caddo	0	0
 cai		Central American Indian (Other)	0	0
 car		Carib	0	0
 cat	ca	Catalan	1	1
 cau		Caucasian (Other)	0	0
 ceb		Cebuano	0	0
 cel		Celtic (Other)	0	0
 cha	ch	Chamorro	0	0
 chb		Chibcha	0	0
 che	ce	Chechen	0	0
 chg		Chagatai	0	0
 chi	zh	Chinese	1	1
 chk		Chuukese	0	0
 chm		Mari	0	0
 chn		Chinook jargon	0	0
 cho		Choctaw	0	0
 chp		Chipewyan	0	0
 chr		Cherokee	0	0
 chu	cu	Church Slavic	0	0
 chv	cv	Chuvash	0	0
 chy		Cheyenne	0	0
 cmc		Chamic languages	0	0
 cop		Coptic	0	0
 cor	kw	Cornish	0	0
 cos	co	Corsican	0	0
 cpe		Creoles and pidgins, English based (Other)	0	0
 cpf		Creoles and pidgins, French-based (Other)	0	0
 cpp		Creoles and pidgins, Portuguese-based (Other)	0	0
 cre	cr	Cree	0	0
 crh		Crimean Tatar	0	0
 crp		Creoles and pidgins (Other)	0	0
 csb		Kashubian	0	0
 cus		Cushitic (Other)' couchitiques, autres langues	0	0
 cze	cs	Czech	1	1
 dak		Dakota	0	0
 dan	da	Danish	1	1
 dar		Dargwa	0	0
 day		Dayak	0	0
 del		Delaware	0	0
 den		Slave (Athapascan)	0	0
 dgr		Dogrib	0	0
 din		Dinka	0	0
 div	dv	Divehi	0	0
 doi		Dogri	0	0
 dra		Dravidian (Other)	0	0
 dua		Duala	0	0
 dum		Dutch, Middle (ca.1050-1350)	0	0
 dut	nl	Dutch	1	1
 dyu		Dyula	0	0
 dzo	dz	Dzongkha	0	0
 efi		Efik	0	0
 egy		Egyptian (Ancient)	0	0
 eka		Ekajuk	0	0
 elx		Elamite	0	0
 eng	en	English	1	1
 enm		English, Middle (1100-1500)	0	0
 epo	eo	Esperanto	1	0
 est	et	Estonian	1	1
 ewe	ee	Ewe	0	0
 ewo		Ewondo	0	0
 fan		Fang	0	0
 fao	fo	Faroese	0	0
 fat		Fanti	0	0
 fij	fj	Fijian	0	0
 fil		Filipino	0	0
 fin	fi	Finnish	1	1
 fiu		Finno-Ugrian (Other)	0	0
 fon		Fon	0	0
 fre	fr	French	1	1
 frm		French, Middle (ca.1400-1600)	0	0
 fro		French, Old (842-ca.1400)	0	0
 fry	fy	Frisian	0	0
 ful	ff	Fulah	0	0
 fur		Friulian	0	0
 gaa		Ga	0	0
 gay		Gayo	0	0
 gba		Gbaya	0	0
 gem		Germanic (Other)	0	0
 geo	ka	Georgian	1	1
 ger	de	German	1	1
 gez		Geez	0	0
 gil		Gilbertese	0	0
 gla	gd	Gaelic	0	0
 gle	ga	Irish	0	0
 glg	gl	Galician	1	1
 glv	gv	Manx	0	0
 gmh		German, Middle High (ca.1050-1500)	0	0
 goh		German, Old High (ca.750-1050)	0	0
 gon		Gondi	0	0
 gor		Gorontalo	0	0
 got		Gothic	0	0
 grb		Grebo	0	0
 grc		Greek, Ancient (to 1453)	0	0
 ell	el	Greek	1	1
 grn	gn	Guarani	0	0
 guj	gu	Gujarati	0	0
 gwi		Gwich´in	0	0
 hai		Haida	0	0
 hat	ht	Haitian	0	0
 hau	ha	Hausa	0	0
 haw		Hawaiian	0	0
 heb	he	Hebrew	1	1
 her	hz	Herero	0	0
 hil		Hiligaynon	0	0
 him		Himachali	0	0
 hin	hi	Hindi	1	1
 hit		Hittite	0	0
 hmn		Hmong	0	0
 hmo	ho	Hiri Motu	0	0
 hrv	hr	Croatian	1	1
 hun	hu	Hungarian	1	1
 hup		Hupa	0	0
 iba		Iban	0	0
 ibo	ig	Igbo	0	0
 ice	is	Icelandic	1	1
 ido	io	Ido	0	0
 iii	ii	Sichuan Yi	0	0
 ijo		Ijo	0	0
 iku	iu	Inuktitut	0	0
 ile	ie	Interlingue	0	0
 ilo		Iloko	0	0
 ina	ia	Interlingua (International Auxiliary Language Asso	0	0
 inc		Indic (Other)	0	0
 ind	id	Indonesian	1	1
 ine		Indo-European (Other)	0	0
 inh		Ingush	0	0
 ipk	ik	Inupiaq	0	0
 ira		Iranian (Other)	0	0
 iro		Iroquoian languages	0	0
 ita	it	Italian	1	1
 jav	jv	Javanese	0	0
 jpn	ja	Japanese	1	1
 jpr		Judeo-Persian	0	0
 jrb		Judeo-Arabic	0	0
 kaa		Kara-Kalpak	0	0
 kab		Kabyle	0	0
 kac		Kachin	0	0
 kal	kl	Kalaallisut	0	0
 kam		Kamba	0	0
 kan	kn	Kannada	0	0
 kar		Karen	0	0
 kas	ks	Kashmiri	0	0
 kau	kr	Kanuri	0	0
 kaw		Kawi	0	0
 kaz	kk	Kazakh	1	0
 kbd		Kabardian	0	0
 kha		Khasi	0	0
 khi		Khoisan (Other)	0	0
 khm	km	Khmer	1	1
 kho		Khotanese	0	0
 kik	ki	Kikuyu	0	0
 kin	rw	Kinyarwanda	0	0
 kir	ky	Kirghiz	0	0
 kmb		Kimbundu	0	0
 kok		Konkani	0	0
 kom	kv	Komi	0	0
 kon	kg	Kongo	0	0
 kor	ko	Korean	1	1
 kos		Kosraean	0	0
 kpe		Kpelle	0	0
 krc		Karachay-Balkar	0	0
 kro		Kru	0	0
 kru		Kurukh	0	0
 kua	kj	Kuanyama	0	0
 kum		Kumyk	0	0
 kur	ku	Kurdish	0	0
 kut		Kutenai	0	0
 lad		Ladino	0	0
 lah		Lahnda	0	0
 lam		Lamba	0	0
 lao	lo	Lao	0	0
 lat	la	Latin	0	0
 lav	lv	Latvian	1	0
 lez		Lezghian	0	0
 lim	li	Limburgan	0	0
 lin	ln	Lingala	0	0
 lit	lt	Lithuanian	1	0
 lol		Mongo	0	0
 loz		Lozi	0	0
 ltz	lb	Luxembourgish	1	0
 lua		Luba-Lulua	0	0
 lub	lu	Luba-Katanga	0	0
 lug	lg	Ganda	0	0
 lui		Luiseno	0	0
 lun		Lunda	0	0
 luo		Luo (Kenya and Tanzania)	0	0
 lus		lushai	0	0
 mac	mk	Macedonian	1	1
 mad		Madurese	0	0
 mag		Magahi	0	0
 mah	mh	Marshallese	0	0
 mai		Maithili	0	0
 mak		Makasar	0	0
 mal	ml	Malayalam	0	0
 man		Mandingo	0	0
 mao	mi	Maori	0	0
 map		Austronesian (Other)	0	0
 mar	mr	Marathi	0	0
 mas		Masai	0	0
 may	ms	Malay	1	1
 mdf		Moksha	0	0
 mdr		Mandar	0	0
 men		Mende	0	0
 mga		Irish, Middle (900-1200)	0	0
 mic		Mi'kmaq	0	0
 min		Minangkabau	0	0
 mis		Miscellaneous languages	0	0
 mkh		Mon-Khmer (Other)	0	0
 mlg	mg	Malagasy	0	0
 mlt	mt	Maltese	0	0
 mnc		Manchu	0	0
 mni		Manipuri	0	0
 mno		Manobo languages	0	0
 moh		Mohawk	0	0
 mol	mo	Moldavian	0	0
 mon	mn	Mongolian	1	0
 mos		Mossi	0	0
 mwl		Mirandese	0	0
 mul		Multiple languages	0	0
 mun		Munda languages	0	0
 mus		Creek	0	0
 mwr		Marwari	0	0
 myn		Mayan languages	0	0
 myv		Erzya	0	0
 nah		Nahuatl	0	0
 nai		North American Indian	0	0
 nap		Neapolitan	0	0
 nau	na	Nauru	0	0
 nav	nv	Navajo	0	0
 nbl	nr	Ndebele, South	0	0
 nde	nd	Ndebele, North	0	0
 ndo	ng	Ndonga	0	0
 nds		Low German	0	0
 nep	ne	Nepali	0	0
 new		Nepal Bhasa	0	0
 nia		Nias	0	0
 nic		Niger-Kordofanian (Other)	0	0
 niu		Niuean	0	0
 nno	nn	Norwegian Nynorsk	0	0
 nob	nb	Norwegian Bokmal	0	0
 nog		Nogai	0	0
 non		Norse, Old	0	0
 nor	no	Norwegian	1	1
 nso		Northern Sotho	0	0
 nub		Nubian languages	0	0
 nwc		Classical Newari	0	0
 nya	ny	Chichewa	0	0
 nym		Nyamwezi	0	0
 nyn		Nyankole	0	0
 nyo		Nyoro	0	0
 nzi		Nzima	0	0
 oci	oc	Occitan	1	1
 oji	oj	Ojibwa	0	0
 ori	or	Oriya	0	0
 orm	om	Oromo	0	0
 osa		Osage	0	0
 oss	os	Ossetian	0	0
 ota		Turkish, Ottoman (1500-1928)	0	0
 oto		Otomian languages	0	0
 paa		Papuan (Other)	0	0
 pag		Pangasinan	0	0
 pal		Pahlavi	0	0
 pam		Pampanga	0	0
 pan	pa	Panjabi	0	0
 pap		Papiamento	0	0
 pau		Palauan	0	0
 peo		Persian, Old (ca.600-400 B.C.)	0	0
 per	fa	Persian	1	1
 phi		Philippine (Other)	0	0
 phn		Phoenician	0	0
 pli	pi	Pali	0	0
 pol	pl	Polish	1	1
 pon		Pohnpeian	0	0
 por	pt	Portuguese	1	1
 pra		Prakrit languages	0	0
 pro		Provençal, Old (to 1500)	0	0
 pus	ps	Pushto	0	0
 que	qu	Quechua	0	0
 raj		Rajasthani	0	0
 rap		Rapanui	0	0
 rar		Rarotongan	0	0
 roa		Romance (Other)	0	0
 roh	rm	Raeto-Romance	0	0
 rom		Romany	0	0
 run	rn	Rundi	0	0
 rup		Aromanian	0	0
 rus	ru	Russian	1	1
 sad		Sandawe	0	0
 sag	sg	Sango	0	0
 sah		Yakut	0	0
 sai		South American Indian (Other)	0	0
 sal		Salishan languages	0	0
 sam		Samaritan Aramaic	0	0
 san	sa	Sanskrit	0	0
 sas		Sasak	0	0
 sat		Santali	0	0
 scc	sr	Serbian	1	1
 scn		Sicilian	0	0
 sco		Scots	0	0
 sel		Selkup	0	0
 sem		Semitic (Other)	0	0
 sga		Irish, Old (to 900)	0	0
 sgn		Sign Languages	0	0
 shn		Shan	0	0
 sid		Sidamo	0	0
 sin	si	Sinhalese	1	1
 sio		Siouan languages	0	0
 sit		Sino-Tibetan (Other)	0	0
 sla		Slavic (Other)	0	0
 slo	sk	Slovak	1	1
 slv	sl	Slovenian	1	1
 sma		Southern Sami	0	0
 sme	se	Northern Sami	0	0
 smi		Sami languages (Other)	0	0
 smj		Lule Sami	0	0
 smn		Inari Sami	0	0
 smo	sm	Samoan	0	0
 sms		Skolt Sami	0	0
 sna	sn	Shona	0	0
 snd	sd	Sindhi	0	0
 snk		Soninke	0	0
 sog		Sogdian	0	0
 som	so	Somali	0	0
 son		Songhai	0	0
 sot	st	Sotho, Southern	0	0
 spa	es	Spanish	1	1
 srd	sc	Sardinian	0	0
 srr		Serer	0	0
 ssa		Nilo-Saharan (Other)	0	0
 ssw	ss	Swati	0	0
 suk		Sukuma	0	0
 sun	su	Sundanese	0	0
 sus		Susu	0	0
 sux		Sumerian	0	0
 swa	sw	Swahili	1	0
 swe	sv	Swedish	1	1
 syr		Syriac	1	0
 tah	ty	Tahitian	0	0
 tai		Tai (Other)	0	0
 tam	ta	Tamil	0	0
 tat	tt	Tatar	0	0
 tel	te	Telugu	0	0
 tem		Timne	0	0
 ter		Tereno	0	0
 tet		Tetum	0	0
 tgk	tg	Tajik	0	0
 tgl	tl	Tagalog	1	1
 tha	th	Thai	1	1
 tib	bo	Tibetan	0	0
 tig		Tigre	0	0
 tir	ti	Tigrinya	0	0
 tiv		Tiv	0	0
 tkl		Tokelau	0	0
 tlh		Klingon	0	0
 tli		Tlingit	0	0
 tmh		Tamashek	0	0
 tog		Tonga (Nyasa)	0	0
 ton	to	Tonga (Tonga Islands)	0	0
 tpi		Tok Pisin	0	0
 tsi		Tsimshian	0	0
 tsn	tn	Tswana	0	0
 tso	ts	Tsonga	0	0
 tuk	tk	Turkmen	0	0
 tum		Tumbuka	0	0
 tup		Tupi languages	0	0
 tur	tr	Turkish	1	1
 tut		Altaic (Other)	0	0
 tvl		Tuvalu	0	0
 twi	tw	Twi	0	0
 tyv		Tuvinian	0	0
 udm		Udmurt	0	0
 uga		Ugaritic	0	0
 uig	ug	Uighur	0	0
 ukr	uk	Ukrainian	1	1
 umb		Umbundu	0	0
 und		Undetermined	0	0
 urd	ur	Urdu	1	0
 uzb	uz	Uzbek	0	0
 vai		Vai	0	0
 ven	ve	Venda	0	0
 vie	vi	Vietnamese	1	1
 vol	vo	Volapük	0	0
 vot		Votic	0	0
 wak		Wakashan languages	0	0
 wal		Walamo	0	0
 war		Waray	0	0
 was		Washo	0	0
 wel	cy	Welsh	0	0
 wen		Sorbian languages	0	0
 wln	wa	Walloon	0	0
 wol	wo	Wolof	0	0
 xal		Kalmyk	0	0
 xho	xh	Xhosa	0	0
 yao		Yao	0	0
 yap		Yapese	0	0
 yid	yi	Yiddish	0	0
 yor	yo	Yoruba	0	0
 ypk		Yupik languages	0	0
 zap		Zapotec	0	0
 zen		Zenaga	0	0
 zha	za	Zhuang	0	0
 znd		Zande	0	0
 zul	zu	Zulu	0	0
 zun		Zuni	0	0
 rum	ro	Romanian	1	1
 pob	pb	Brazilian	1	1
--- a/libs/guessit/test/test_api.py
+++ b/libs/guessit/test/test_api.py
@ -0,0 +1,54 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 class TestApi(TestGuessit):
    def test_api(self):
        movie_path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
        movie_info = guessit.guess_movie_info(movie_path)
        video_info = guessit.guess_video_info(movie_path)
        episode_info = guessit.guess_episode_info(movie_path)
        file_info = guessit.guess_file_info(movie_path)
        self.assertEqual(guessit.guess_file_info(movie_path, type='movie'), movie_info)
        self.assertEqual(guessit.guess_file_info(movie_path, type='video'), video_info)
        self.assertEqual(guessit.guess_file_info(movie_path, type='episode'), episode_info)
        self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'movie'}), movie_info)
        self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'video'}), video_info)
        self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'episode'}), episode_info)
        self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'episode'}, type='movie'), episode_info)  # kwargs priority other options
        movie_path_name_only = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD'
        file_info_name_only = guessit.guess_file_info(movie_path_name_only, options={"name_only": True})
        self.assertFalse('container' in file_info_name_only)
        self.assertTrue('container' in file_info)
 suite = allTests(TestApi)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/libs/guessit/test/test_autodetect.py
+++ b/libs/guessit/test/test_autodetect.py
@ -0,0 +1,45 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 class TestAutoDetect(TestGuessit):
    def testEmpty(self):
        result = guessit.guess_file_info('')
        self.assertEqual(result, {})
        result = guessit.guess_file_info('___-__')
        self.assertEqual(result, {})
        result = guessit.guess_file_info('__-.avc')
        self.assertEqual(result, {'type': 'unknown', 'extension': 'avc'})
    def testAutoDetect(self):
        self.checkMinimumFieldsCorrect(filename='autodetect.yaml',
                                       remove_type=False)
 suite = allTests(TestAutoDetect)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/libs/guessit/test/test_autodetect_all.py
+++ b/libs/guessit/test/test_autodetect_all.py
@ -0,0 +1,46 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 IGNORE_EPISODES = []
 IGNORE_MOVIES = []
 class TestAutoDetectAll(TestGuessit):
    def testAutoMatcher(self):
        self.checkMinimumFieldsCorrect(filename='autodetect.yaml',
                                       remove_type=False)
    def testAutoMatcherMovies(self):
        self.checkMinimumFieldsCorrect(filename='movies.yaml',
                                       exclude_files=IGNORE_MOVIES)
    def testAutoMatcherEpisodes(self):
        self.checkMinimumFieldsCorrect(filename='episodes.yaml',
                                       exclude_files=IGNORE_EPISODES)
 suite = allTests(TestAutoDetectAll)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/libs/guessit/test/test_doctests.py
+++ b/libs/guessit/test/test_doctests.py
@ -0,0 +1,45 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 import guessit
 import guessit.hash_ed2k
 import unittest
 import doctest
 def load_tests(loader, tests, ignore):
    tests.addTests(doctest.DocTestSuite(guessit))
    tests.addTests(doctest.DocTestSuite(guessit.date))
    tests.addTests(doctest.DocTestSuite(guessit.fileutils))
    tests.addTests(doctest.DocTestSuite(guessit.guess))
    tests.addTests(doctest.DocTestSuite(guessit.hash_ed2k))
    tests.addTests(doctest.DocTestSuite(guessit.language))
    tests.addTests(doctest.DocTestSuite(guessit.matchtree))
    tests.addTests(doctest.DocTestSuite(guessit.textutils))
    return tests
 suite = unittest.TestSuite()
 load_tests(None, suite, None)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/libs/guessit/test/test_episode.py
+++ b/libs/guessit/test/test_episode.py
@ -0,0 +1,35 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 class TestEpisode(TestGuessit):
    def testEpisodes(self):
        self.checkMinimumFieldsCorrect(filetype='episode',
                                       filename='episodes.yaml')
 suite = allTests(TestEpisode)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/libs/guessit/test/test_hashes.py
+++ b/libs/guessit/test/test_hashes.py
@ -0,0 +1,46 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 class TestHashes(TestGuessit):
    def test_hashes(self):
        hashes = (
                  ('hash_mpc', '1MB', u'8542ad406c15c8bd'),  # TODO: Check if this value is valid
                  ('hash_ed2k', '1MB', u'ed2k://|file|1MB|1048576|AA3CC5552A9931A76B61A41D306735F7|/'),  # TODO: Check if this value is valid
                  ('hash_md5', '1MB', u'5d8dcbca8d8ac21766f28797d6c3954c'),
                  ('hash_sha1', '1MB', u'51d2b8f3248d7ee495b7750c8da5aa3b3819de9d'),
                  ('hash_md5', 'dummy.srt', u'64de6b5893cac24456c46a935ef9c359'),
                  ('hash_sha1', 'dummy.srt', u'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
                  )
        for hash_type, filename, expected_value in hashes:
            guess = guess_file_info(file_in_same_dir(__file__, filename), hash_type)
            computed_value = guess.get(hash_type)
            self.assertEqual(expected_value, guess.get(hash_type), "Invalid %s for %s: %s != %s" % (hash_type, filename, computed_value, expected_value))
 suite = allTests(TestHashes)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/libs/guessit/test/test_language.py
+++ b/libs/guessit/test/test_language.py
@ -0,0 +1,130 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 import io
 class TestLanguage(TestGuessit):
    def check_languages(self, languages):
        for lang1, lang2 in languages.items():
            self.assertEqual(Language.fromguessit(lang1),
                             Language.fromguessit(lang2))
    def test_addic7ed(self):
        languages = {'English': 'en',
                     'English (US)': 'en-US',
                     'English (UK)': 'en-UK',
                     'Italian': 'it',
                     'Portuguese': 'pt',
                     'Portuguese (Brazilian)': 'pt-BR',
                     'Romanian': 'ro',
                     'Español (Latinoamérica)': 'es-MX',
                     'Español (España)': 'es-ES',
                     'Spanish (Latin America)': 'es-MX',
                     'Español': 'es',
                     'Spanish': 'es',
                     'Spanish (Spain)': 'es-ES',
                     'French': 'fr',
                     'Greek': 'el',
                     'Arabic': 'ar',
                     'German': 'de',
                     'Croatian': 'hr',
                     'Indonesian': 'id',
                     'Hebrew': 'he',
                     'Russian': 'ru',
                     'Turkish': 'tr',
                     'Swedish': 'se',
                     'Czech': 'cs',
                     'Dutch': 'nl',
                     'Hungarian': 'hu',
                     'Norwegian': 'no',
                     'Polish': 'pl',
                     'Persian': 'fa'}
        self.check_languages(languages)
    def test_subswiki(self):
        languages = {'English (US)': 'en-US', 'English (UK)': 'en-UK', 'English': 'en',
                     'French': 'fr', 'Brazilian': 'po', 'Portuguese': 'pt',
                     'Español (Latinoamérica)': 'es-MX', 'Español (España)': 'es-ES',
                     'Español': 'es', 'Italian': 'it', 'Català': 'ca'}
        self.check_languages(languages)
    def test_tvsubtitles(self):
        languages = {'English': 'en', 'Español': 'es', 'French': 'fr', 'German': 'de',
                     'Brazilian': 'br', 'Russian': 'ru', 'Ukrainian': 'ua', 'Italian': 'it',
                     'Greek': 'gr', 'Arabic': 'ar', 'Hungarian': 'hu', 'Polish': 'pl',
                     'Turkish': 'tr', 'Dutch': 'nl', 'Portuguese': 'pt', 'Swedish': 'sv',
                     'Danish': 'da', 'Finnish': 'fi', 'Korean': 'ko', 'Chinese': 'cn',
                     'Japanese': 'jp', 'Bulgarian': 'bg', 'Czech': 'cz', 'Romanian': 'ro'}
        self.check_languages(languages)
    def test_opensubtitles(self):
        opensubtitles_langfile = file_in_same_dir(__file__, 'opensubtitles_languages_2012_05_09.txt')
        for l in [u(l).strip() for l in io.open(opensubtitles_langfile, encoding='utf-8')][1:]:
            idlang, alpha2, _, upload_enabled, web_enabled = l.strip().split('\t')
            # do not test languages that are too esoteric / not widely available
            if int(upload_enabled) and int(web_enabled):
                # check that we recognize the opensubtitles language code correctly
                # and that we are able to output this code from a language
                self.assertEqual(idlang, Language.fromguessit(idlang).opensubtitles)
                if alpha2:
                    # check we recognize the opensubtitles 2-letter code correctly
                    self.check_languages({idlang: alpha2})
    def test_tmdb(self):
        # examples from http://api.themoviedb.org/2.1/language-tags
        for lang in ['en-US', 'en-CA', 'es-MX', 'fr-PF']:
            self.assertEqual(lang, str(Language.fromguessit(lang)))
    def test_subtitulos(self):
        languages = {'English (US)': 'en-US', 'English (UK)': 'en-UK', 'English': 'en',
                     'French': 'fr', 'Brazilian': 'po', 'Portuguese': 'pt',
                     'Español (Latinoamérica)': 'es-MX', 'Español (España)': 'es-ES',
                     'Español': 'es', 'Italian': 'it', 'Català': 'ca'}
        self.check_languages(languages)
    def test_thesubdb(self):
        languages = {'af': 'af', 'cs': 'cs', 'da': 'da', 'de': 'de', 'en': 'en', 'es': 'es', 'fi': 'fi',
                     'fr': 'fr', 'hu': 'hu', 'id': 'id', 'it': 'it', 'la': 'la', 'nl': 'nl', 'no': 'no',
                     'oc': 'oc', 'pl': 'pl', 'pt': 'pt', 'ro': 'ro', 'ru': 'ru', 'sl': 'sl', 'sr': 'sr',
                     'sv': 'sv', 'tr': 'tr'}
        self.check_languages(languages)
    def test_exceptions(self):
        self.assertEqual(Language.fromguessit('br'), Language.fromguessit('pt(br)'))
        self.assertEqual(Language.fromguessit('unknown'),
                         Language.fromguessit('und'))
 suite = allTests(TestLanguage)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/libs/guessit/test/test_main.py
+++ b/libs/guessit/test/test_main.py
@ -0,0 +1,69 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 from guessit.fileutils import split_path, file_in_same_dir
 from guessit.textutils import strip_brackets, str_replace, str_fill
 from guessit import PY2
 from guessit import __main__
 if PY2:
    from StringIO import StringIO
 else:
    from io import StringIO
 class TestMain(TestGuessit):
    def setUp(self):
        self._stdout = sys.stdout
        string_out = StringIO()
        sys.stdout = string_out
    def tearDown(self):
        sys.stdout = self._stdout
    def test_list_properties(self):
        __main__.main(["-p"], False)
        __main__.main(["-V"], False)
    def test_list_transformers(self):
        __main__.main(["--transformers"], False)
        __main__.main(["-V", "--transformers"], False)
    def test_demo(self):
        __main__.main(["-d"], False)
    def test_filename(self):
        __main__.main(["A.Movie.2014.avi"], False)
        __main__.main(["A.Movie.2014.avi", "A.2nd.Movie.2014.avi"], False)
        __main__.main(["-y", "A.Movie.2014.avi"], False)
        __main__.main(["-a", "A.Movie.2014.avi"], False)
        __main__.main(["-v", "A.Movie.2014.avi"], False)
        __main__.main(["-t", "movie", "A.Movie.2014.avi"], False)
        __main__.main(["-t", "episode", "A.Serie.S02E06.avi"], False)
        __main__.main(["-i", "hash_mpc", file_in_same_dir(__file__, "1MB")], False)
        __main__.main(["-i", "hash_md5", file_in_same_dir(__file__, "1MB")], False)
 suite = allTests(TestMain)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/libs/guessit/test/test_matchtree.py
+++ b/libs/guessit/test/test_matchtree.py
@ -0,0 +1,93 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 from guessit.transfo.guess_release_group import GuessReleaseGroup
 from guessit.transfo.guess_properties import GuessProperties
 from guessit.matchtree import BaseMatchTree
 keywords = yaml.load("""
 ? Xvid PROPER
 : videoCodec: Xvid
  other: PROPER
 ? PROPER-Xvid
 : videoCodec: Xvid
  other: PROPER
 """)
 def guess_info(string, options=None):
    mtree = MatchTree(string)
    GuessReleaseGroup().process(mtree, options)
    GuessProperties().process(mtree, options)
    return mtree.matched()
 class TestMatchTree(TestGuessit):
    def test_base_tree(self):
        t = BaseMatchTree('One Two Three(Three) Four')
        t.partition((3, 7, 20))
        leaves = list(t.leaves())
        self.assertEqual(leaves[0].span, (0, 3))
        self.assertEqual('One', leaves[0].value)
        self.assertEqual(' Two', leaves[1].value)
        self.assertEqual(' Three(Three)', leaves[2].value)
        self.assertEqual(' Four', leaves[3].value)
        leaves[2].partition((1, 6, 7, 12))
        three_leaves = list(leaves[2].leaves())
        self.assertEqual('Three', three_leaves[1].value)
        self.assertEqual('Three', three_leaves[3].value)
        leaves = list(t.leaves())
        self.assertEqual(len(leaves), 8)
        self.assertEqual(leaves[5], three_leaves[3])
        self.assertEqual(t.previous_leaf(leaves[5]), leaves[4])
        self.assertEqual(t.next_leaf(leaves[5]), leaves[6])
        self.assertEqual(t.next_leaves(leaves[5]), [leaves[6], leaves[7]])
        self.assertEqual(t.previous_leaves(leaves[5]), [leaves[4], leaves[3], leaves[2], leaves[1], leaves[0]])
        self.assertEqual(t.next_leaf(leaves[7]), None)
        self.assertEqual(t.previous_leaf(leaves[0]), None)
        self.assertEqual(t.next_leaves(leaves[7]), [])
        self.assertEqual(t.previous_leaves(leaves[0]), [])
    def test_match(self):
        self.checkFields(keywords, guess_info)
 suite = allTests(TestMatchTree)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/libs/guessit/test/test_movie.py
+++ b/libs/guessit/test/test_movie.py
@ -0,0 +1,35 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 class TestMovie(TestGuessit):
    def testMovies(self):
        self.checkMinimumFieldsCorrect(filetype='movie',
                                       filename='movies.yaml')
 suite = allTests(TestMovie)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/libs/guessit/test/test_quality.py
+++ b/libs/guessit/test/test_quality.py
@ -0,0 +1,126 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.quality import best_quality, best_quality_properties
 from guessit.containers import QualitiesContainer
 from guessit.test.guessittest import *
 class TestQuality(TestGuessit):
    def test_container(self):
        container = QualitiesContainer()
        container.register_quality('color', 'red', 10)
        container.register_quality('color', 'orange', 20)
        container.register_quality('color', 'green', 30)
        container.register_quality('context', 'sun', 100)
        container.register_quality('context', 'sea', 200)
        container.register_quality('context', 'sex', 300)
        g1 = Guess()
        g1['color'] = 'red'
        g2 = Guess()
        g2['color'] = 'green'
        g3 = Guess()
        g3['color'] = 'orange'
        q3 = container.rate_quality(g3)
        self.assertEqual(q3, 20, "ORANGE should be rated 20. Don't ask why!")
        q1 = container.rate_quality(g1)
        q2 = container.rate_quality(g2)
        self.assertTrue(q2 > q1, "GREEN should be greater than RED. Don't ask why!")
        g1['context'] = 'sex'
        g2['context'] = 'sun'
        q1 = container.rate_quality(g1)
        q2 = container.rate_quality(g2)
        self.assertTrue(q1 > q2, "SEX should be greater than SUN. Don't ask why!")
        self.assertEqual(container.best_quality(g1, g2), g1, "RED&SEX should be better than GREEN&SUN. Don't ask why!")
        self.assertEqual(container.best_quality_properties(['color'], g1, g2), g2, "GREEN should be better than RED. Don't ask why!")
        self.assertEqual(container.best_quality_properties(['context'], g1, g2), g1, "SEX should be better than SUN. Don't ask why!")
        q1 = container.rate_quality(g1, 'color')
        q2 = container.rate_quality(g2, 'color')
        self.assertTrue(q2 > q1, "GREEN should be greater than RED. Don't ask why!")
        container.unregister_quality('context', 'sex')
        container.unregister_quality('context', 'sun')
        q1 = container.rate_quality(g1)
        q2 = container.rate_quality(g2)
        self.assertTrue(q2 > q1, "GREEN&SUN should be greater than RED&SEX. Don't ask why!")
        g3['context'] = 'sea'
        container.unregister_quality('context', 'sea')
        q3 = container.rate_quality(g3, 'context')
        self.assertEqual(q3, 0, "Context should be unregistered.")
        container.unregister_quality('color')
        q3 = container.rate_quality(g3, 'color')
        self.assertEqual(q3, 0, "Color should be unregistered.")
        container.clear_qualities()
        q1 = container.rate_quality(g1)
        q2 = container.rate_quality(g2)
        self.assertTrue(q1 == q2 == 0, "Empty quality container should rate each guess to 0")
    def test_quality_transformers(self):
        guess_720p = guessit.guess_file_info("2012.2009.720p.BluRay.x264.DTS WiKi.mkv")
        guess_1080p = guessit.guess_file_info("2012.2009.1080p.BluRay.x264.MP3 WiKi.mkv")
        self.assertTrue('audioCodec' in guess_720p, "audioCodec should be present")
        self.assertTrue('audioCodec' in guess_1080p, "audioCodec should be present")
        self.assertTrue('screenSize' in guess_720p, "screenSize should be present")
        self.assertTrue('screenSize' in guess_1080p, "screenSize should be present")
        best_quality_guess = best_quality(guess_720p, guess_1080p)
        self.assertTrue(guess_1080p == best_quality_guess, "1080p+MP3 is not the best global quality")
        best_quality_guess = best_quality_properties(['screenSize'], guess_720p, guess_1080p)
        self.assertTrue(guess_1080p == best_quality_guess, "1080p is not the best screenSize")
        best_quality_guess = best_quality_properties(['audioCodec'], guess_720p, guess_1080p)
        self.assertTrue(guess_720p == best_quality_guess, "DTS is not the best audioCodec")
 suite = allTests(TestQuality)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/libs/guessit/test/test_utils.py
+++ b/libs/guessit/test/test_utils.py
@ -0,0 +1,163 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.test.guessittest import *
 from guessit.fileutils import split_path
 from guessit.textutils import strip_brackets, str_replace, str_fill, from_camel, is_camel,\
    levenshtein, reorder_title
 from guessit import PY2
 from guessit.date import search_date, search_year
 from datetime import datetime, date, timedelta
 class TestUtils(TestGuessit):
    def test_splitpath(self):
        alltests = {False: {'/usr/bin/smewt': ['/', 'usr', 'bin', 'smewt'],
                                           'relative_path/to/my_folder/': ['relative_path', 'to', 'my_folder'],
                                           '//some/path': ['//', 'some', 'path'],
                                           '//some//path': ['//', 'some', 'path'],
                                           '///some////path': ['///', 'some', 'path']
                                             },
                     True: {'C:\\Program Files\\Smewt\\smewt.exe': ['C:\\', 'Program Files', 'Smewt', 'smewt.exe'],
                                  'Documents and Settings\\User\\config': ['Documents and Settings', 'User', 'config'],
                                  'C:\\Documents and Settings\\User\\config': ['C:\\', 'Documents and Settings', 'User', 'config'],
                                  # http://bugs.python.org/issue19945
                                  '\\\\netdrive\\share': ['\\\\', 'netdrive', 'share'] if PY2 else ['\\\\netdrive\\share'],
                                  '\\\\netdrive\\share\\folder': ['\\\\', 'netdrive', 'share', 'folder'] if PY2 else ['\\\\netdrive\\share\\', 'folder'],
                                  }
                     }
        tests = alltests[sys.platform == 'win32']
        for path, split in tests.items():
            self.assertEqual(split, split_path(path))
    def test_strip_brackets(self):
        allTests = (('', ''),
                    ('[test]', 'test'),
                    ('{test2}', 'test2'),
                    ('(test3)', 'test3'),
                    ('(test4]', '(test4]'),
                    )
        for i, e in allTests:
            self.assertEqual(e, strip_brackets(i))
    def test_levenshtein(self):
        self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmno"), 0)
        self.assertEqual(levenshtein("abcdef ghijk lmnop", "abcdef ghijk lmno"), 1)
        self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmn"), 1)
        self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnp"), 1)
        self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnq"), 1)
        self.assertEqual(levenshtein("cbcdef ghijk lmno", "abcdef ghijk lmnq"), 2)
        self.assertEqual(levenshtein("cbcdef ghihk lmno", "abcdef ghijk lmnq"), 3)
    def test_reorder_title(self):
        self.assertEqual(reorder_title("Simpsons, The"), "The Simpsons")
        self.assertEqual(reorder_title("Simpsons,The"), "The Simpsons")
        self.assertEqual(reorder_title("Simpsons,Les", articles=('the', 'le', 'la', 'les')), "Les Simpsons")
        self.assertEqual(reorder_title("Simpsons, Les", articles=('the', 'le', 'la', 'les')), "Les Simpsons")
    def test_camel(self):
        self.assertEqual("", from_camel(""))
        self.assertEqual("Hello world", str_replace("Hello World", 6, 'w'))
        self.assertEqual("Hello *****", str_fill("Hello World", (6, 11), '*'))
        self.assertTrue("This is camel", from_camel("ThisIsCamel"))
        self.assertEqual('camel case', from_camel('camelCase'))
        self.assertEqual('A case', from_camel('ACase'))
        self.assertEqual('MiXedCaSe is not camel case', from_camel('MiXedCaSe is not camelCase'))
        self.assertEqual("This is camel cased title", from_camel("ThisIsCamelCasedTitle"))
        self.assertEqual("This is camel CASED title", from_camel("ThisIsCamelCASEDTitle"))
        self.assertEqual("These are camel CASED title", from_camel("TheseAreCamelCASEDTitle"))
        self.assertEqual("Give a camel case string", from_camel("GiveACamelCaseString"))
        self.assertEqual("Death TO camel case", from_camel("DeathTOCamelCase"))
        self.assertEqual("But i like java too:)", from_camel("ButILikeJavaToo:)"))
        self.assertEqual("Beatdown french DVD rip.mkv", from_camel("BeatdownFrenchDVDRip.mkv"))
        self.assertEqual("DO NOTHING ON UPPER CASE", from_camel("DO NOTHING ON UPPER CASE"))
        self.assertFalse(is_camel("this_is_not_camel"))
        self.assertTrue(is_camel("ThisIsCamel"))
        self.assertEqual("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv", from_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv"))
        self.assertFalse(is_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv"))
        self.assertEqual("A2LiNE", from_camel("A2LiNE"))
    def test_date(self):
        self.assertEqual(search_year(' in the year 2000... '), (2000, (13, 17)))
        self.assertEqual(search_year(' they arrived in 1492. '), (None, None))
        today = date.today()
        today_year_2 = int(str(today.year)[2:])
        future = today + timedelta(days=1000)
        future_year_2 = int(str(future.year)[2:])
        past = today - timedelta(days=10000)
        past_year_2 = int(str(past.year)[2:])
        self.assertEqual(search_date(' Something before 2002-04-22 '), (date(2002, 4, 22), (18, 28)))
        self.assertEqual(search_date(' 2002-04-22 Something after '), (date(2002, 4, 22), (1, 11)))
        self.assertEqual(search_date(' This happened on 2002-04-22. '), (date(2002, 4, 22), (18, 28)))
        self.assertEqual(search_date(' This happened on 22-04-2002. '), (date(2002, 4, 22), (18, 28)))
        self.assertEqual(search_date(' This happened on 13-04-%s. ' % (today_year_2,)), (date(today.year, 4, 13), (18, 26)))
        self.assertEqual(search_date(' This happened on 22-04-%s. ' % (future_year_2,)), (date(future.year, 4, 22), (18, 26)))
        self.assertEqual(search_date(' This happened on 20-04-%s. ' % (past_year_2)), (date(past.year, 4, 20), (18, 26)))
        self.assertEqual(search_date(' This happened on 13-06-14. ', year_first=True), (date(2013, 6, 14), (18, 26)))
        self.assertEqual(search_date(' This happened on 13-05-14. ', year_first=False), (date(2014, 5, 13), (18, 26)))
        self.assertEqual(search_date(' This happened on 04-13-%s. ' % (today_year_2,)), (date(today.year, 4, 13), (18, 26)))
        self.assertEqual(search_date(' This happened on 04-22-%s. ' % (future_year_2,)), (date(future.year, 4, 22), (18, 26)))
        self.assertEqual(search_date(' This happened on 04-20-%s. ' % (past_year_2)), (date(past.year, 4, 20), (18, 26)))
        self.assertEqual(search_date(' This happened on 35-12-%s. ' % (today_year_2,)), (None, None))
        self.assertEqual(search_date(' This happened on 37-18-%s. ' % (future_year_2,)), (None, None))
        self.assertEqual(search_date(' This happened on 44-42-%s. ' % (past_year_2)), (None, None))
        self.assertEqual(search_date(' This happened on %s. ' % (today, )), (today, (18, 28)))
        self.assertEqual(search_date(' This happened on %s. ' % (future, )), (future, (18, 28)))
        self.assertEqual(search_date(' This happened on %s. ' % (past, )), (past, (18, 28)))
        self.assertEqual(search_date(' released date: 04-03-1901? '), (None, None))
        self.assertEqual(search_date(' There\'s no date in here. '), (None, None))
        self.assertEqual(search_date(' Something 01-02-03 '), (date(2003, 2, 1), (11, 19)))
        self.assertEqual(search_date(' Something 01-02-03 ', year_first=False, day_first=True), (date(2003, 2, 1), (11, 19)))
        self.assertEqual(search_date(' Something 01-02-03 ', year_first=True), (date(2001, 2, 3), (11, 19)))
        self.assertEqual(search_date(' Something 01-02-03 ', day_first=False), (date(2003, 1, 2), (11, 19)))
 suite = allTests(TestUtils)
 if __name__ == '__main__':
    TextTestRunner(verbosity=2).run(suite)
--- a/libs/guessit/textutils.py
+++ b/libs/guessit/textutils.py
@ -1,24 +1,25 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
-# Smewt - A smart collection manager
+# GuessIt - A library for guessing information from filenames
-# Copyright (c) 2008-2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
-# Smewt is free software; you can redistribute it and/or modify
+# GuessIt is free software; you can redistribute it and/or modify it under
-# it under the terms of the GNU General Public License as published by
+# the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
-# Smewt is distributed in the hope that it will be useful,
+# GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
+# Lesser GNU General Public License for more details.
 #
-# You should have received a copy of the GNU General Public License
+# You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit import s
 from guessit.patterns import sep
 import functools
@ -27,6 +28,7 @@ import re
 # string-related functions
 def normalize_unicode(s):
    return unicodedata.normalize('NFC', s)
@ -43,41 +45,63 @@ def strip_brackets(s):
    return s
-def clean_string(st):
+_dotted_rexp = re.compile(r'(?:\W|^)(([A-Za-z]\.){2,}[A-Za-z]\.?)')
 def clean_default(st):
    for c in sep:
        # do not remove certain chars
        if c in ['-', ',']:
            continue
        if c == '.':
            # we should not remove the dots for acronyms and such
            dotted = _dotted_rexp.search(st)
            if dotted:
                s = dotted.group(1)
                exclude_begin, exclude_end = dotted.span(1)
                st = (st[:exclude_begin].replace(c, ' ') +
                      st[exclude_begin:exclude_end] +
                      st[exclude_end:].replace(c, ' '))
                continue
        st = st.replace(c, ' ')
    parts = st.split()
    result = ' '.join(p for p in parts if p != '')
    # now also remove dashes on the outer part of the string
-    while result and result[0] in sep:
+    while result and result[0] in '-':
        result = result[1:]
-    while result and result[-1] in sep:
+    while result and result[-1] in '-':
        result = result[:-1]
    return result
 _words_rexp = re.compile('\w+', re.UNICODE)
 def find_words(s):
    return _words_rexp.findall(s.replace('_', ' '))
-def reorder_title(title):
+def iter_words(s):
    return _words_rexp.finditer(s.replace('_', ' '))
 def reorder_title(title, articles=('the',), separators=(',', ', ')):
    ltitle = title.lower()
-    if ltitle[-4:] == ',the':
+    for article in articles:
-        return title[-3:] + ' ' + title[:-4]
+        for separator in separators:
-    if ltitle[-5:] == ', the':
+            suffix = separator + article
-        return title[-3:] + ' ' + title[:-5]
+            if ltitle[-len(suffix):] == suffix:
                return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
    return title
 def str_replace(string, pos, c):
-    return string[:pos] + c + string[pos+1:]
+    return string[:pos] + c + string[pos + 1:]
 def str_fill(string, region, c):
@ -85,7 +109,6 @@ def str_fill(string, region, c):
    return string[:start] + c * (end - start) + string[end:]
 def levenshtein(a, b):
    if not a:
        return len(b)
@ -95,25 +118,25 @@ def levenshtein(a, b):
    m = len(a)
    n = len(b)
    d = []
-    for i in range(m+1):
+    for i in range(m + 1):
-        d.append([0] * (n+1))
+        d.append([0] * (n + 1))
-    for i in range(m+1):
+    for i in range(m + 1):
        d[i][0] = i
-    for j in range(n+1):
+    for j in range(n + 1):
        d[0][j] = j
-    for i in range(1, m+1):
+    for i in range(1, m + 1):
-        for j in range(1, n+1):
+        for j in range(1, n + 1):
-            if a[i-1] == b[j-1]:
+            if a[i - 1] == b[j - 1]:
                cost = 0
            else:
                cost = 1
-            d[i][j] = min(d[i-1][j] + 1,     # deletion
+            d[i][j] = min(d[i - 1][j] + 1,  # deletion
-                          d[i][j-1] + 1,     # insertion
+                          d[i][j - 1] + 1,  # insertion
-                          d[i-1][j-1] + cost # substitution
+                          d[i - 1][j - 1] + cost  # substitution
                          )
    return d[m][n]
@ -151,7 +174,7 @@ def find_first_level_groups_span(string, enclosing):
                end = i
                if not depth:
                    # we emptied our stack, so we have a 1st level group
-                    result.append((start, end+1))
+                    result.append((start, end + 1))
            except IndexError:
                # we closed a group which was not opened before
                pass
@ -172,7 +195,7 @@ def split_on_groups(string, groups):
    """
    if not groups:
-        return [ string ]
+        return [string]
    boundaries = sorted(set(functools.reduce(lambda l, x: l + list(x), groups, [])))
    if boundaries[0] != 0:
@ -180,10 +203,10 @@ def split_on_groups(string, groups):
    if boundaries[-1] != len(string):
        boundaries.append(len(string))
-    groups = [ string[start:end] for start, end in zip(boundaries[:-1],
+    groups = [string[start:end] for start, end in zip(boundaries[:-1],
-                                                       boundaries[1:]) ]
+                                                       boundaries[1:])]
-    return [ g for g in groups if g ] # return only non-empty groups
+    return [g for g in groups if g]  # return only non-empty groups
 def find_first_level_groups(string, enclosing, blank_sep=None):
@ -219,6 +242,114 @@ def find_first_level_groups(string, enclosing, blank_sep=None):
    if blank_sep:
        for start, end in groups:
            string = str_replace(string, start, blank_sep)
-            string = str_replace(string, end-1, blank_sep)
+            string = str_replace(string, end - 1, blank_sep)
    return split_on_groups(string, groups)
 _camel_word2_set = set(('is', 'to',))
 _camel_word3_set = set(('the',))
 def _camel_split_and_lower(string, i):
        """Retrieves a tuple (need_split, need_lower)
        need_split is True if this char is a first letter in a camelCasedString.
        need_lower is True if this char should be lowercased.
        """
        def islower(c):
            return c.isalpha() and not c.isupper()
        previous_char2 = string[i - 2] if i > 1 else None
        previous_char = string[i - 1] if i > 0 else None
        char = string[i]
        next_char = string[i + 1] if i + 1 < len(string) else None
        next_char2 = string[i + 2] if i + 2 < len(string) else None
        char_upper = char.isupper()
        char_lower = islower(char)
        # previous_char2_lower = islower(previous_char2) if previous_char2 else False
        previous_char2_upper = previous_char2.isupper() if previous_char2 else False
        previous_char_lower = islower(previous_char) if previous_char else False
        previous_char_upper = previous_char.isupper() if previous_char else False
        next_char_upper = next_char.isupper() if next_char else False
        next_char_lower = islower(next_char) if next_char else False
        next_char2_upper = next_char2.isupper() if next_char2 else False
        # next_char2_lower = islower(next_char2) if next_char2 else False
        mixedcase_word = (previous_char_upper and char_lower and next_char_upper) or \
                        (previous_char_lower and char_upper and next_char_lower and next_char2_upper) or \
                        (previous_char2_upper and previous_char_lower and char_upper)
        if mixedcase_word:
            word2 = (char + next_char).lower() if next_char else None
            word3 = (char + next_char + next_char2).lower() if next_char and next_char2 else None
            word2b = (previous_char2 + previous_char).lower() if previous_char2 and previous_char else None
            if word2 in _camel_word2_set or word2b in _camel_word2_set or word3 in _camel_word3_set:
                mixedcase_word = False
        uppercase_word = previous_char_upper and char_upper and next_char_upper or (char_upper and next_char_upper and next_char2_upper)
        need_split = char_upper and previous_char_lower and not mixedcase_word
        if not need_split:
            previous_char_upper = string[i - 1].isupper() if i > 0 else False
            next_char_lower = (string[i + 1].isalpha() and not string[i + 1].isupper()) if i + 1 < len(string) else False
            need_split = char_upper and previous_char_upper and next_char_lower
            uppercase_word = previous_char_upper and not next_char_lower
        need_lower = not uppercase_word and not mixedcase_word and need_split
        return (need_split, need_lower)
 def is_camel(string):
    """
    >>> is_camel('dogEATDog')
    True
    >>> is_camel('DeathToCamelCase')
    True
    >>> is_camel('death_to_camel_case')
    False
    >>> is_camel('TheBest')
    True
    >>> is_camel('The Best')
    False
    """
    for i in range(0, len(string)):
        need_split, _ = _camel_split_and_lower(string, i)
        if need_split:
            return True
    return False
 def from_camel(string):
    """
    >>> from_camel('dogEATDog') == 'dog EAT dog'
    True
    >>> from_camel('DeathToCamelCase') == 'Death to camel case'
    True
    >>> from_camel('TheBest') == 'The best'
    True
    >>> from_camel('MiXedCaSe is not camelCase') == 'MiXedCaSe is not camel case'
    True
    """
    if not string:
        return string
    pieces = []
    for i in range(0, len(string)):
        char = string[i]
        need_split, need_lower = _camel_split_and_lower(string, i)
        if need_split:
            pieces.append(' ')
        if need_lower:
            pieces.append(char.lower())
        else:
            pieces.append(char)
    return ''.join(pieces)
--- a/libs/guessit/tlds-alpha-by-domain.txt
+++ b/libs/guessit/tlds-alpha-by-domain.txt
@ -0,0 +1,341 @@
 # Version 2013112900, Last Updated Fri Nov 29 07:07:01 2013 UTC
 AC
 AD
 AE
 AERO
 AF
 AG
 AI
 AL
 AM
 AN
 AO
 AQ
 AR
 ARPA
 AS
 ASIA
 AT
 AU
 AW
 AX
 AZ
 BA
 BB
 BD
 BE
 BF
 BG
 BH
 BI
 BIKE
 BIZ
 BJ
 BM
 BN
 BO
 BR
 BS
 BT
 BV
 BW
 BY
 BZ
 CA
 CAMERA
 CAT
 CC
 CD
 CF
 CG
 CH
 CI
 CK
 CL
 CLOTHING
 CM
 CN
 CO
 COM
 CONSTRUCTION
 CONTRACTORS
 COOP
 CR
 CU
 CV
 CW
 CX
 CY
 CZ
 DE
 DIAMONDS
 DIRECTORY
 DJ
 DK
 DM
 DO
 DZ
 EC
 EDU
 EE
 EG
 ENTERPRISES
 EQUIPMENT
 ER
 ES
 ESTATE
 ET
 EU
 FI
 FJ
 FK
 FM
 FO
 FR
 GA
 GALLERY
 GB
 GD
 GE
 GF
 GG
 GH
 GI
 GL
 GM
 GN
 GOV
 GP
 GQ
 GR
 GRAPHICS
 GS
 GT
 GU
 GURU
 GW
 GY
 HK
 HM
 HN
 HOLDINGS
 HR
 HT
 HU
 ID
 IE
 IL
 IM
 IN
 INFO
 INT
 IO
 IQ
 IR
 IS
 IT
 JE
 JM
 JO
 JOBS
 JP
 KE
 KG
 KH
 KI
 KITCHEN
 KM
 KN
 KP
 KR
 KW
 KY
 KZ
 LA
 LAND
 LB
 LC
 LI
 LIGHTING
 LK
 LR
 LS
 LT
 LU
 LV
 LY
 MA
 MC
 MD
 ME
 MG
 MH
 MIL
 MK
 ML
 MM
 MN
 MO
 MOBI
 MP
 MQ
 MR
 MS
 MT
 MU
 MUSEUM
 MV
 MW
 MX
 MY
 MZ
 NA
 NAME
 NC
 NE
 NET
 NF
 NG
 NI
 NL
 NO
 NP
 NR
 NU
 NZ
 OM
 ORG
 PA
 PE
 PF
 PG
 PH
 PHOTOGRAPHY
 PK
 PL
 PLUMBING
 PM
 PN
 POST
 PR
 PRO
 PS
 PT
 PW
 PY
 QA
 RE
 RO
 RS
 RU
 RW
 SA
 SB
 SC
 SD
 SE
 SEXY
 SG
 SH
 SI
 SINGLES
 SJ
 SK
 SL
 SM
 SN
 SO
 SR
 ST
 SU
 SV
 SX
 SY
 SZ
 TATTOO
 TC
 TD
 TECHNOLOGY
 TEL
 TF
 TG
 TH
 TIPS
 TJ
 TK
 TL
 TM
 TN
 TO
 TODAY
 TP
 TR
 TRAVEL
 TT
 TV
 TW
 TZ
 UA
 UG
 UK
 US
 UY
 UZ
 VA
 VC
 VE
 VENTURES
 VG
 VI
 VN
 VOYAGE
 VU
 WF
 WS
 XN--3E0B707E
 XN--45BRJ9C
 XN--80AO21A
 XN--80ASEHDB
 XN--80ASWG
 XN--90A3AC
 XN--CLCHC0EA0B2G2A9GCD
 XN--FIQS8S
 XN--FIQZ9S
 XN--FPCRJ9C3D
 XN--FZC2C9E2C
 XN--GECRJ9C
 XN--H2BRJ9C
 XN--J1AMH
 XN--J6W193G
 XN--KPRW13D
 XN--KPRY57D
 XN--L1ACC
 XN--LGBBAT1AD8J
 XN--MGB9AWBF
 XN--MGBA3A4F16A
 XN--MGBAAM7A8H
 XN--MGBAYH7GPA
 XN--MGBBH1A71E
 XN--MGBC0A9AZCG
 XN--MGBERP4A5D4AR
 XN--MGBX4CD0AB
 XN--NGBC5AZD
 XN--O3CW4H
 XN--OGBPF8FL
 XN--P1AI
 XN--PGBS0DH
 XN--Q9JYB4C
 XN--S9BRJ9C
 XN--UNUP4Y
 XN--WGBH1C
 XN--WGBL6A
 XN--XKC2AL3HYE2A
 XN--XKC2DL3A5EE0H
 XN--YFRO4I67O
 XN--YGBI2AMMX
 XXX
 YE
 YT
 ZA
 ZM
 ZW
--- a/libs/guessit/transfo/init.py
+++ b/libs/guessit/transfo/init.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,92 +18,13 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit import base_text_type, Guess
 from guessit.patterns import canonical_form
 from guessit.textutils import clean_string
 import logging
 log = logging.getLogger(__name__)
-def found_property(node, name, confidence):
+class TransformerException(Exception):
-    node.guess = Guess({name: node.clean_value}, confidence=confidence, raw=node.value)
+    def __init__(self, transformer, message):
    log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
        # Call the base class constructor with the parameters it needs
        Exception.__init__(self, message)
-def format_guess(guess):
+        self.transformer = transformer
    """Format all the found values to their natural type.
    For instance, a year would be stored as an int value, etc...
    Note that this modifies the dictionary given as input.
    """
    for prop, value in guess.items():
        if prop in ('season', 'episodeNumber', 'year', 'cdNumber',
                    'cdNumberTotal', 'bonusNumber', 'filmNumber'):
            guess[prop] = int(guess[prop])
        elif isinstance(value, base_text_type):
            if prop in ('edition',):
                value = clean_string(value)
            guess[prop] = canonical_form(value).replace('\\', '')
    return guess
 def find_and_split_node(node, strategy, logger):
    string = ' %s ' % node.value # add sentinels
    for matcher, confidence, args, kwargs in strategy:
        all_args = [string]
        if getattr(matcher, 'use_node', False):
            all_args.append(node)
        if args:
            all_args.append(args)
        if kwargs:
            result, span = matcher(*all_args, **kwargs)
        else:
            result, span = matcher(*all_args)
        if result:
            # readjust span to compensate for sentinels
            span = (span[0] - 1, span[1] - 1)
            if isinstance(result, Guess):
                if confidence is None:
                    confidence = result.confidence(list(result.keys())[0])
            else:
                if confidence is None:
                    confidence = 1.0
            guess = format_guess(Guess(result, confidence=confidence, raw=string[span[0] + 1:span[1] + 1]))
            msg = 'Found with confidence %.2f: %s' % (confidence, guess)
            (logger or log).debug(msg)
            node.partition(span)
            absolute_span = (span[0] + node.offset, span[1] + node.offset)
            for child in node.children:
                if child.span == absolute_span:
                    child.guess = guess
                else:
                    find_and_split_node(child, strategy, logger)
            return
 class SingleNodeGuesser(object):
    def __init__(self, guess_func, confidence, logger, *args, **kwargs):
        self.guess_func = guess_func
        self.confidence = confidence
        self.logger = logger
        self.args = args
        self.kwargs = kwargs
    def process(self, mtree):
        # strategy is a list of pairs (guesser, confidence)
        # - if the guesser returns a guessit.Guess and confidence is specified,
        #   it will override it, otherwise it will leave the guess confidence
        # - if the guesser returns a simple dict as a guess and confidence is
        #   specified, it will use it, or 1.0 otherwise
        strategy = [ (self.guess_func, self.confidence, self.args, self.kwargs) ]
        for node in mtree.unidentified_leaves():
            find_and_split_node(node, strategy, self.logger)
--- a/libs/guessit/transfo/expected_series.py
+++ b/libs/guessit/transfo/expected_series.py
@ -0,0 +1,60 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.containers import PropertiesContainer
 from guessit.matcher import GuessFinder
 from guessit.plugins.transformers import Transformer
 import re
 class ExpectedSeries(Transformer):
    def __init__(self):
        Transformer.__init__(self, 230)
    def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
        naming_opts.add_argument('-S', '--expected-series', action='append', dest='expected_series',
                                 help='Expected series to parse (can be used multiple times)')
    def should_process(self, mtree, options=None):
        return options and options.get('expected_series')
    def expected_series(self, string, node=None, options=None):
        container = PropertiesContainer(enhance=True, canonical_from_pattern=False)
        for expected_serie in options.get('expected_series'):
            if expected_serie.startswith('re:'):
                expected_serie = expected_serie[3:]
                expected_serie = expected_serie.replace(' ', '-')
                container.register_property('series', expected_serie, enhance=True)
            else:
                expected_serie = re.escape(expected_serie)
                container.register_property('series', expected_serie, enhance=False)
        found = container.find_properties(string, node, options)
        return container.as_guess(found, string)
    def supported_properties(self):
        return ['series']
    def process(self, mtree, options=None):
        GuessFinder(self.expected_series, None, self.log, options).process_nodes(mtree.unidentified_leaves())
--- a/libs/guessit/transfo/expected_title.py
+++ b/libs/guessit/transfo/expected_title.py
@ -0,0 +1,61 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.containers import PropertiesContainer
 from guessit.matcher import GuessFinder
 from guessit.plugins.transformers import Transformer
 import re
 class ExpectedTitle(Transformer):
    def __init__(self):
        Transformer.__init__(self, 225)
    def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
        naming_opts.add_argument('-T', '--expected-title', action='append', dest='expected_title',
                                 help='Expected title (can be used multiple times)')
    def should_process(self, mtree, options=None):
        return options and options.get('expected_title')
    def expected_titles(self, string, node=None, options=None):
        container = PropertiesContainer(enhance=True, canonical_from_pattern=False)
        for expected_title in options.get('expected_title'):
            if expected_title.startswith('re:'):
                expected_title = expected_title[3:]
                expected_title = expected_title.replace(' ', '-')
                container.register_property('title', expected_title, enhance=True)
            else:
                expected_title = re.escape(expected_title)
                container.register_property('title', expected_title, enhance=False)
        found = container.find_properties(string, node, options)
        return container.as_guess(found, string)
    def supported_properties(self):
        return ['title']
    def process(self, mtree, options=None):
        GuessFinder(self.expected_titles, None, self.log, options).process_nodes(mtree.unidentified_leaves())
--- a/libs/guessit/transfo/guess_bonus_features.py
+++ b/libs/guessit/transfo/guess_bonus_features.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,16 +18,22 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.transfo import found_property
 import logging
-log = logging.getLogger(__name__)
+from guessit.plugins.transformers import Transformer
 from guessit.matcher import found_property
-def process(mtree):
+class GuessBonusFeatures(Transformer):
    def __init__(self):
        Transformer.__init__(self, -150)
    def supported_properties(self):
        return ['bonusNumber', 'bonusTitle', 'filmNumber', 'filmSeries', 'title', 'series']
    def process(self, mtree, options=None):
        def previous_group(g):
-        for leaf in mtree.unidentified_leaves()[::-1]:
+            for leaf in reversed(list(mtree.unidentified_leaves())):
                if leaf.node_idx < g.node_idx:
                    return leaf
@ -39,23 +45,23 @@ def process(mtree):
        def same_group(g1, g2):
            return g1.node_idx[:2] == g2.node_idx[:2]
-    bonus = [ node for node in mtree.leaves() if 'bonusNumber' in node.guess ]
+        bonus = [node for node in mtree.leaves() if 'bonusNumber' in node.guess]
        if bonus:
-        bonusTitle = next_group(bonus[0])
+            bonus_title = next_group(bonus[0])
-        if same_group(bonusTitle, bonus[0]):
+            if bonus_title and same_group(bonus_title, bonus[0]):
-            found_property(bonusTitle, 'bonusTitle', 0.8)
+                found_property(bonus_title, 'bonusTitle', confidence=0.8)
-    filmNumber = [ node for node in mtree.leaves()
+        film_number = [node for node in mtree.leaves()
-                   if 'filmNumber' in node.guess ]
+                       if 'filmNumber' in node.guess]
-    if filmNumber:
+        if film_number:
-        filmSeries = previous_group(filmNumber[0])
+            film_series = previous_group(film_number[0])
-        found_property(filmSeries, 'filmSeries', 0.9)
+            found_property(film_series, 'filmSeries', confidence=0.9)
-        title = next_group(filmNumber[0])
+            title = next_group(film_number[0])
-        found_property(title, 'title', 0.9)
+            found_property(title, 'title', confidence=0.9)
-    season = [ node for node in mtree.leaves() if 'season' in node.guess ]
+        season = [node for node in mtree.leaves() if 'season' in node.guess]
        if season and 'bonusNumber' in mtree.info:
            series = previous_group(season[0])
            if same_group(series, season[0]):
-            found_property(series, 'series', 0.9)
+                found_property(series, 'series', confidence=0.9)
--- a/libs/guessit/transfo/guess_country.py
+++ b/libs/guessit/transfo/guess_country.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,31 +18,107 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
-from guessit.country import Country
+
 from guessit.plugins.transformers import Transformer
 from babelfish import Country
 from guessit import Guess
 from guessit.textutils import iter_words
 from guessit.matcher import GuessFinder, found_guess
 from guessit.language import LNG_COMMON_WORDS
 import babelfish
 import logging
 log = logging.getLogger(__name__)
 # list of common words which could be interpreted as countries, but which
 # are far too common to be able to say they represent a country
 country_common_words = frozenset([ 'bt', 'bb' ])
-def process(mtree):
+class GuessCountry(Transformer):
-    for node in mtree.unidentified_leaves():
+    def __init__(self):
-        if len(node.node_idx) == 2:
+        Transformer.__init__(self, -170)
-            c = node.value[1:-1].lower()
+        self.replace_language = frozenset(['uk'])
            if c in country_common_words:
                continue
-            # only keep explicit groups (enclosed in parentheses/brackets)
+    def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
-            if node.value[0] + node.value[-1] not in ['()', '[]', '{}']:
+        naming_opts.add_argument('-C', '--allowed-country', action='append', dest='allowed_countries',
-                continue
+                                 help='Allowed country (can be used multiple times)')
    def supported_properties(self):
        return ['country']
    def should_process(self, mtree, options=None):
        options = options or {}
        return options.get('country', True)
    def _scan_country(self, country, strict=False):
        """
        Find a country if it is at the start or end of country string
        """
        words_match = list(iter_words(country.lower()))
        s = ""
        start = None
        for word_match in words_match:
            if not start:
                start = word_match.start(0)
            s += word_match.group(0)
            try:
-                country = Country(c, strict=True)
+                return Country.fromguessit(s), (start, word_match.end(0))
-            except ValueError:
+            except babelfish.Error:
                continue
-            node.guess = Guess(country=country, confidence=1.0, raw=c)
+        words_match.reverse()
        s = ""
        end = None
        for word_match in words_match:
            if not end:
                end = word_match.end(0)
            s = word_match.group(0) + s
            try:
                return Country.fromguessit(s), (word_match.start(0), end)
            except babelfish.Error:
                continue
        return Country.fromguessit(country), (start, end)
    def is_valid_country(self, country, options=None):
        if options and options.get('allowed_countries'):
            allowed_countries = options.get('allowed_countries')
            return country.name.lower() in allowed_countries or country.alpha2.lower() in allowed_countries
        else:
            return (country.name.lower() not in LNG_COMMON_WORDS and
                    country.alpha2.lower() not in LNG_COMMON_WORDS)
    def guess_country(self, string, node=None, options=None):
        c = string.strip().lower()
        if c not in LNG_COMMON_WORDS:
            try:
                country, country_span = self._scan_country(c, True)
                if self.is_valid_country(country, options):
                    guess = Guess(country=country, confidence=1.0, input=node.value, span=(country_span[0] + 1, country_span[1] + 1))
                    return guess
            except babelfish.Error:
                pass
        return None, None
    def process(self, mtree, options=None):
        GuessFinder(self.guess_country, None, self.log, options).process_nodes(mtree.unidentified_leaves())
        for node in mtree.leaves_containing('language'):
            c = node.clean_value.lower()
            if c in self.replace_language:
                node.guess.set('language', None)
                try:
                    country = Country.fromguessit(c)
                    if self.is_valid_country(country, options):
                        guess = Guess(country=country, confidence=0.9, input=node.value, span=node.span)
                        found_guess(node, guess, logger=log)
                except babelfish.Error:
                    pass
    def post_process(self, mtree, options=None, *args, **kwargs):
        # if country is in the guessed properties, make it part of the series name
        series_leaves = list(mtree.leaves_containing('series'))
        country_leaves = list(mtree.leaves_containing('country'))
        if series_leaves and country_leaves:
            country_leaf = country_leaves[0]
            for serie_leaf in series_leaves:
                serie_leaf.guess['series'] += ' (%s)' % str(country_leaf.guess['country'].guessit)
--- a/libs/guessit/transfo/guess_date.py
+++ b/libs/guessit/transfo/guess_date.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,21 +18,32 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
-from guessit.transfo import SingleNodeGuesser
+
 from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder
 from guessit.date import search_date
 import logging
 log = logging.getLogger(__name__)
-def guess_date(string):
+class GuessDate(Transformer):
-    date, span = search_date(string)
+    def __init__(self):
        Transformer.__init__(self, 50)
    def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
        naming_opts.add_argument('-Y', '--date-year-first', action='store_true', dest='date_year_first', default=None,
                                 help='If short date is found, consider the first digits as the year.')
        naming_opts.add_argument('-D', '--date-day-first', action='store_true', dest='date_day_first', default=None,
                                 help='If short date is found, consider the second digits as the day.')
    def supported_properties(self):
        return ['date']
    def guess_date(self, string, node=None, options=None):
        date, span = search_date(string, options.get('date_year_first') if options else False, options.get('date_day_first') if options else False)
        if date:
-        return { 'date': date }, span
+            return {'date': date}, span
        else:
            return None, None
-
+    def process(self, mtree, options=None):
-def process(mtree):
+        GuessFinder(self.guess_date, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
    SingleNodeGuesser(guess_date, 1.0, log).process(mtree)
--- a/libs/guessit/transfo/guess_episode_details.py
+++ b/libs/guessit/transfo/guess_episode_details.py
@ -0,0 +1,64 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.plugins.transformers import Transformer
 from guessit.matcher import found_guess
 from guessit.containers import PropertiesContainer
 import itertools
 class GuessEpisodeDetails(Transformer):
    def __init__(self):
        Transformer.__init__(self, -205)
        self.container = PropertiesContainer()
        self.container.register_property('episodeDetails', 'Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired')
        self.container.register_property('episodeDetails', 'Extras?', canonical_form='Extras')
    def guess_details(self, string, node=None, options=None):
        properties = self.container.find_properties(string, node, options, 'episodeDetails', multiple=True)
        guesses = self.container.as_guess(properties, multiple=True)
        return guesses
    def second_pass_options(self, mtree, options=None):
        if not mtree.guess.get('type', '').startswith('episode'):
            for unidentified_leaf in mtree.unidentified_leaves():
                properties = self.container.find_properties(unidentified_leaf.value, unidentified_leaf, options, 'episodeDetails')
                guess = self.container.as_guess(properties)
                if guess:
                    return {'type': 'episode'}
        return None
    def supported_properties(self):
        return self.container.get_supported_properties()
    def process(self, mtree, options=None):
        if (mtree.guess.get('type', '').startswith('episode') and
            (not mtree.info.get('episodeNumber') or
             mtree.info.get('season') == 0)):
            for leaf in itertools.chain(mtree.leaves_containing('title'),
                                        mtree.unidentified_leaves()):
                guesses = self.guess_details(leaf.value, leaf, options)
                for guess in guesses:
                    found_guess(leaf, guess, update_guess=False)
        return None
--- a/libs/guessit/transfo/guess_episode_info_from_position.py
+++ b/libs/guessit/transfo/guess_episode_info_from_position.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,38 +18,53 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.transfo import found_property
 from guessit.patterns import non_episode_title, unlikely_series
 import logging
-log = logging.getLogger(__name__)
+from guessit.plugins.transformers import Transformer, get_transformer
 from guessit.textutils import reorder_title
 from guessit.matcher import found_property
-def match_from_epnum_position(mtree, node):
+class GuessEpisodeInfoFromPosition(Transformer):
    def __init__(self):
        Transformer.__init__(self, -200)
    def supported_properties(self):
        return ['title', 'series']
    def match_from_epnum_position(self, mtree, node, options):
        epnum_idx = node.node_idx
        # a few helper functions to be able to filter using high-level semantics
        def before_epnum_in_same_pathgroup():
-        return [ leaf for leaf in mtree.unidentified_leaves()
+            return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
                    if (leaf.node_idx[0] == epnum_idx[0] and
-                     leaf.node_idx[1:] < epnum_idx[1:]) ]
+                    leaf.node_idx[1:] < epnum_idx[1:])]
        def after_epnum_in_same_pathgroup():
-        return [ leaf for leaf in mtree.unidentified_leaves()
+            return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
                    if (leaf.node_idx[0] == epnum_idx[0] and
-                     leaf.node_idx[1:] > epnum_idx[1:]) ]
+                    leaf.node_idx[1:] > epnum_idx[1:])]
        def after_epnum_in_same_explicitgroup():
-        return [ leaf for leaf in mtree.unidentified_leaves()
+            return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
                    if (leaf.node_idx[:2] == epnum_idx[:2] and
-                     leaf.node_idx[2:] > epnum_idx[2:]) ]
+                    leaf.node_idx[2:] > epnum_idx[2:])]
        # epnumber is the first group and there are only 2 after it in same
        # path group
        # -> series title - episode title
-    title_candidates = [ n for n in after_epnum_in_same_pathgroup()
+        title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
-                         if n.clean_value.lower() not in non_episode_title ]
+
        if ('title' not in mtree.info and  # no title
                'series' in mtree.info and # series present
                before_epnum_in_same_pathgroup() == [] and  # no groups before
                len(title_candidates) == 1):  # only 1 group after
            found_property(title_candidates[0], 'title', confidence=0.4)
            return
        if ('title' not in mtree.info and  # no title
                before_epnum_in_same_pathgroup() == [] and  # no groups before
                len(title_candidates) == 2):  # only 2 groups after
@ -65,18 +80,14 @@ def match_from_epnum_position(mtree, node):
            found_property(series_candidates[0], 'series', confidence=0.7)
        # only 1 group after (in the same path group) and it's probably the
-    # episode title
+        # episode title.
-    title_candidates = [ n for n in after_epnum_in_same_pathgroup()
+        title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
                         if n.clean_value.lower() not in non_episode_title ]
        if len(title_candidates) == 1:
            found_property(title_candidates[0], 'title', confidence=0.5)
            return
        else:
            # try in the same explicit group, with lower confidence
-        title_candidates = [ n for n in after_epnum_in_same_explicitgroup()
+            title_candidates = self._filter_candidates(after_epnum_in_same_explicitgroup(), options)
                             if n.clean_value.lower() not in non_episode_title
                             ]
            if len(title_candidates) == 1:
                found_property(title_candidates[0], 'title', confidence=0.4)
                return
@ -85,8 +96,7 @@ def match_from_epnum_position(mtree, node):
                return
        # get the one with the longest value
-    title_candidates = [ n for n in after_epnum_in_same_pathgroup()
+        title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
                         if n.clean_value.lower() not in non_episode_title ]
        if title_candidates:
            maxidx = -1
            maxv = -1
@ -96,51 +106,76 @@ def match_from_epnum_position(mtree, node):
                    maxv = len(c.clean_value)
            found_property(title_candidates[maxidx], 'title', confidence=0.3)
    def should_process(self, mtree, options=None):
        options = options or {}
        return not options.get('skip_title') and mtree.guess.get('type', '').startswith('episode')
-def process(mtree):
+    def _filter_candidates(self, candidates, options):
        episode_details_transformer = get_transformer('guess_episode_details')
        if episode_details_transformer:
            return [n for n in candidates if not episode_details_transformer.container.find_properties(n.value, n, options, re_match=True)]
        else:
            return candidates
    def process(self, mtree, options=None):
        """
        try to identify the remaining unknown groups by looking at their
        position relative to other known elements
        """
        eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
        if not eps:
            eps = [node for node in mtree.leaves() if 'date' in node.guess]
        if eps:
-        match_from_epnum_position(mtree, eps[0])
+            self.match_from_epnum_position(mtree, eps[0], options)
        else:
            # if we don't have the episode number, but at least 2 groups in the
            # basename, then it's probably series - eptitle
            basename = mtree.node_at((-2,))
        title_candidates = [ n for n in basename.unidentified_leaves()
                             if n.clean_value.lower() not in non_episode_title
                             ]
-        if len(title_candidates) >= 2:
+            title_candidates = self._filter_candidates(basename.unidentified_leaves(), options)
-            found_property(title_candidates[0], 'series', 0.4)
+
-            found_property(title_candidates[1], 'title', 0.4)
+            if len(title_candidates) >= 2 and 'series' not in mtree.info:
                found_property(title_candidates[0], 'series', confidence=0.4)
                found_property(title_candidates[1], 'title', confidence=0.4)
            elif len(title_candidates) == 1:
                # but if there's only one candidate, it's probably the series name
-            found_property(title_candidates[0], 'series', 0.4)
+                found_property(title_candidates[0], 'series' if 'series' not in mtree.info else 'title', confidence=0.4)
        # if we only have 1 remaining valid group in the folder containing the
        # file, then it's likely that it is the series name
        try:
-        series_candidates = mtree.node_at((-3,)).unidentified_leaves()
+            series_candidates = list(mtree.node_at((-3,)).unidentified_leaves())
        except ValueError:
            series_candidates = []
        if len(series_candidates) == 1:
-        found_property(series_candidates[0], 'series', 0.3)
+            found_property(series_candidates[0], 'series', confidence=0.3)
        # if there's a path group that only contains the season info, then the
        # previous one is most likely the series title (ie: ../series/season X/..)
-    eps = [ node for node in mtree.nodes()
+        eps = [node for node in mtree.nodes()
-            if 'season' in node.guess and 'episodeNumber' not in node.guess ]
+               if 'season' in node.guess and 'episodeNumber' not in node.guess]
        if eps:
-        previous = [ node for node in mtree.unidentified_leaves()
+            previous = [node for node in mtree.unidentified_leaves()
-                     if node.node_idx[0] == eps[0].node_idx[0] - 1 ]
+                        if node.node_idx[0] == eps[0].node_idx[0] - 1]
            if len(previous) == 1:
-            found_property(previous[0], 'series', 0.5)
+                found_property(previous[0], 'series', confidence=0.5)
-    # reduce the confidence of unlikely series
+        # If we have found title without any serie name, replace it by the serie name.
        if 'series' not in mtree.info and 'title' in mtree.info:
            title_leaf = mtree.first_leaf_containing('title')
            metadata = title_leaf.guess.metadata('title')
            value = title_leaf.guess['title']
            del title_leaf.guess['title']
            title_leaf.guess.set('series', value, metadata=metadata)
    def post_process(self, mtree, options=None):
        for node in mtree.nodes():
-        if 'series' in node.guess:
+            if 'series' not in node.guess:
-            if node.guess['series'].lower() in unlikely_series:
+                continue
-                new_confidence = node.guess.confidence('series') * 0.5
+
-                node.guess.set_confidence('series', new_confidence)
+            node.guess['series'] = reorder_title(node.guess['series'])
--- a/libs/guessit/transfo/guess_episodes_rexps.py
+++ b/libs/guessit/transfo/guess_episodes_rexps.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,49 +18,176 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
-from guessit import Guess
+
-from guessit.transfo import SingleNodeGuesser
+from guessit.plugins.transformers import Transformer
-from guessit.patterns import episode_rexps
+from guessit.matcher import GuessFinder
 from guessit.patterns import sep, build_or_pattern
 from guessit.containers import PropertiesContainer, WeakValidator, NoValidator, ChainedValidator, DefaultValidator, \
    FormatterValidator
 from guessit.patterns.numeral import numeral, digital_numeral, parse_numeral
 import re
 import logging
 log = logging.getLogger(__name__)
 def number_list(s):
    l = [ int(n) for n in re.sub('[^0-9]+', ' ', s).split() ]
    if len(l) == 2:
        # it is an episode interval, return all numbers in between
        return range(l[0], l[1]+1)
    return l
 def guess_episodes_rexps(string):
    for rexp, confidence, span_adjust in episode_rexps:
        match = re.search(rexp, string, re.IGNORECASE)
        if match:
            span = (match.start() + span_adjust[0], 
                    match.end() + span_adjust[1])
            guess = Guess(match.groupdict(), confidence=confidence, raw=string[span[0]:span[1]])
            # decide whether we have only a single episode number or an
            # episode list
            if guess.get('episodeNumber'):
                eplist = number_list(guess['episodeNumber'])
                guess.set('episodeNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]])
                if len(eplist) > 1:
                    guess.set('episodeList', eplist, confidence=confidence, raw=string[span[0]:span[1]])
            if guess.get('bonusNumber'):
                eplist = number_list(guess['bonusNumber'])
                guess.set('bonusNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]])
            return guess, span
    return None, None
-def process(mtree):
+class GuessEpisodesRexps(Transformer):
-    SingleNodeGuesser(guess_episodes_rexps, None, log).process(mtree)
+    def __init__(self):
        Transformer.__init__(self, 20)
        range_separators = ['-', 'to', 'a']
        discrete_separators = ['&', 'and', 'et']
        of_separators = ['of', 'sur', '/', '\\']
        season_words = ['seasons?', 'saisons?', 'series?']
        episode_words = ['episodes?']
        season_markers = ['s']
        episode_markers = ['e', 'ep']
        discrete_sep = sep
        for range_separator in range_separators:
            discrete_sep = discrete_sep.replace(range_separator, '')
        discrete_separators.append(discrete_sep)
        all_separators = list(range_separators)
        all_separators.extend(discrete_separators)
        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
        range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE)
        discrete_separators_re = re.compile(build_or_pattern(discrete_separators), re.IGNORECASE)
        all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE)
        of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)
        season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE)
        episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE)
        season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE)
        episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE)
        def list_parser(value, property_list_name, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False):
            discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value))
            discrete_elements = [x.strip() for x in discrete_elements]
            proper_discrete_elements = []
            i = 0
            while i < len(discrete_elements):
                if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]):
                    proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2])
                    i += 3
                else:
                    match = range_separators_re.search(discrete_elements[i])
                    if match and match.start() == 0:
                        proper_discrete_elements[i-1] = proper_discrete_elements[i-1] + discrete_elements[i]
                    elif match and match.end() == len(discrete_elements[i]):
                        proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1])
                    else:
                        proper_discrete_elements.append(discrete_elements[i])
                    i += 1
            discrete_elements = proper_discrete_elements
            ret = []
            for discrete_element in discrete_elements:
                range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element))
                range_values = [x.strip() for x in range_values]
                if len(range_values) > 1:
                    for x in range(0, len(range_values) - 1):
                        start_range_ep = parse_numeral(range_values[x])
                        end_range_ep = parse_numeral(range_values[x+1])
                        for range_ep in range(start_range_ep, end_range_ep + 1):
                            if range_ep not in ret:
                                ret.append(range_ep)
                else:
                    discrete_value = parse_numeral(discrete_element)
                    if discrete_value not in ret:
                        ret.append(discrete_value)
            if len(ret) > 1:
                if not allow_discrete:
                    valid_ret = list()
                    # replace discrete elements by ranges
                    valid_ret.append(ret[0])
                    for i in range(0, len(ret) - 1):
                        previous = valid_ret[len(valid_ret) - 1]
                        if ret[i+1] < previous:
                            pass
                        else:
                            valid_ret.append(ret[i+1])
                    ret = valid_ret
                if fill_gaps:
                    ret = list(range(min(ret), max(ret) + 1))
                if len(ret) > 1:
                    return {None: ret[0], property_list_name: ret}
            if len(ret) > 0:
                return ret[0]
            return None
        def episode_parser_x(value):
            return list_parser(value, 'episodeList', discrete_separators_re=re.compile('x', re.IGNORECASE))
        def episode_parser_e(value):
            return list_parser(value, 'episodeList', discrete_separators_re=re.compile('e', re.IGNORECASE), fill_gaps=True)
        def episode_parser(value):
            return list_parser(value, 'episodeList')
        def season_parser(value):
            return list_parser(value, 'seasonList')
        class ResolutionCollisionValidator(object):
            def validate(self, prop, string, node, match, entry_start, entry_end):
                return len(match.group(2)) < 3 # limit
        self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral)
        self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={None: parse_numeral, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), FormatterValidator('season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False)))
        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser}, validator=NoValidator())
        # self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
        self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'season': season_parser}, validator=NoValidator())
        self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral)
        self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')'  + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')'  + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
        self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral)
        self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral)
        self.container.register_property('episodeNumber', r'^ ?0(\d{1,2})' + sep, confidence=0.4, formatter=parse_numeral)
        self.container.register_property('episodeNumber', sep + r'(\d{2}) ?$', confidence=0.4, formatter=parse_numeral)
        self.container.register_property('episodeNumber', sep + r'0(\d{1,2}) ?$', confidence=0.4, formatter=parse_numeral)
        self.container.register_property(None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
        self.container.register_property(None, r'((?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral)
        self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())
        self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<other>xAll))', confidence=1.0, formatter={None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
    def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
        naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number', default=False,
                               help='Guess "serie.213.avi" as the episodeNumber 213. Without this option, '
                                    'it will be guessed as season 2, episodeNumber 13')
    def supported_properties(self):
        return ['episodeNumber', 'season', 'episodeList', 'seasonList', 'episodeCount', 'seasonCount', 'version', 'other']
    def guess_episodes_rexps(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options)
        return self.container.as_guess(found, string)
    def should_process(self, mtree, options=None):
        return mtree.guess.get('type', '').startswith('episode')
    def process(self, mtree, options=None):
        GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
--- a/libs/guessit/transfo/guess_filetype.py
+++ b/libs/guessit/transfo/guess_filetype.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,173 +18,196 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
-from guessit import Guess
+
-from guessit.patterns import (subtitle_exts, info_exts, video_exts, episode_rexps,
+import mimetypes
                              find_properties, compute_canonical_form)
 from guessit.date import valid_year
 from guessit.textutils import clean_string
 import os.path
 import re
 import mimetypes
 import logging
-log = logging.getLogger(__name__)
+from guessit.guess import Guess
 from guessit.patterns.extension import subtitle_exts, info_exts, video_exts
 from guessit.transfo import TransformerException
 from guessit.plugins.transformers import Transformer, get_transformer
 from guessit.matcher import log_found_guess, found_guess, found_property
 # List of well known movies and series, hardcoded because they cannot be
 # guessed appropriately otherwise
 MOVIES = [ 'OSS 117' ]
 SERIES = [ 'Band of Brothers' ]
-MOVIES = [ m.lower() for m in MOVIES ]
+class GuessFiletype(Transformer):
-SERIES = [ s.lower() for s in SERIES ]
+    def __init__(self):
        Transformer.__init__(self, 200)
    # List of well known movies and series, hardcoded because they cannot be
    # guessed appropriately otherwise
    MOVIES = ['OSS 117']
    SERIES = ['Band of Brothers']
    MOVIES = [m.lower() for m in MOVIES]
    SERIES = [s.lower() for s in SERIES]
    def guess_filetype(self, mtree, options=None):
        options = options or {}
 def guess_filetype(mtree, filetype):
        # put the filetype inside a dummy container to be able to have the
        # following functions work correctly as closures
        # this is a workaround for python 2 which doesn't have the
-    # 'nonlocal' keyword (python 3 does have it)
+        # 'nonlocal' keyword which we could use here in the upgrade_* functions
-    filetype_container = [filetype]
+        # (python 3 does have it)
        filetype_container = [mtree.guess.get('type')]
        other = {}
        filename = mtree.string
        def upgrade_episode():
-        if filetype_container[0] == 'video':
+            if filetype_container[0] == 'subtitle':
            filetype_container[0] = 'episode'
        elif filetype_container[0] == 'subtitle':
                filetype_container[0] = 'episodesubtitle'
            elif filetype_container[0] == 'info':
                filetype_container[0] = 'episodeinfo'
            elif (not filetype_container[0] or
                  filetype_container[0] == 'video'):
                filetype_container[0] = 'episode'
        def upgrade_movie():
-        if filetype_container[0] == 'video':
+            if filetype_container[0] == 'subtitle':
            filetype_container[0] = 'movie'
        elif filetype_container[0] == 'subtitle':
                filetype_container[0] = 'moviesubtitle'
            elif filetype_container[0] == 'info':
                filetype_container[0] = 'movieinfo'
            elif (not filetype_container[0] or
                  filetype_container[0] == 'video'):
                filetype_container[0] = 'movie'
        def upgrade_subtitle():
-        if 'movie' in filetype_container[0]:
+            if filetype_container[0] == 'movie':
                filetype_container[0] = 'moviesubtitle'
-        elif 'episode' in filetype_container[0]:
+            elif filetype_container[0] == 'episode':
                filetype_container[0] = 'episodesubtitle'
-        else:
+            elif not filetype_container[0]:
                filetype_container[0] = 'subtitle'
        def upgrade_info():
-        if 'movie' in filetype_container[0]:
+            if filetype_container[0] == 'movie':
                filetype_container[0] = 'movieinfo'
-        elif 'episode' in filetype_container[0]:
+            elif filetype_container[0] == 'episode':
                filetype_container[0] = 'episodeinfo'
-        else:
+            elif not filetype_container[0]:
                filetype_container[0] = 'info'
    def upgrade(type='unknown'):
        if filetype_container[0] == 'autodetect':
            filetype_container[0] = type
        # look at the extension first
        fileext = os.path.splitext(filename)[1][1:].lower()
        if fileext in subtitle_exts:
            upgrade_subtitle()
-        other = { 'container': fileext }
+            other = {'container': fileext}
        elif fileext in info_exts:
            upgrade_info()
-        other = { 'container': fileext }
+            other = {'container': fileext}
        elif fileext in video_exts:
-        upgrade(type='video')
+            other = {'container': fileext}
        other = { 'container': fileext }
        else:
-        upgrade(type='unknown')
+            if fileext and not options.get('name_only'):
-        other = { 'extension': fileext }
+                other = {'extension': fileext}
-
+                list(mtree.unidentified_leaves())[-1].guess = Guess(other)
        # check whether we are in a 'Movies', 'Tv Shows', ... folder
-    folder_rexps = [ (r'Movies?', upgrade_movie),
+        folder_rexps = [(r'Movies?', upgrade_movie),
                        (r'Films?', upgrade_movie),
                        (r'Tv[ _-]?Shows?', upgrade_episode),
-                     (r'Series', upgrade_episode)
+                        (r'Series?', upgrade_episode),
-                     ]
+                        (r'Episodes?', upgrade_episode)]
        for frexp, upgrade_func in folder_rexps:
            frexp = re.compile(frexp, re.IGNORECASE)
            for pathgroup in mtree.children:
                if frexp.match(pathgroup.value):
                    upgrade_func()
                    return filetype_container[0], other
        # check for a few specific cases which will unintentionally make the
        # following heuristics confused (eg: OSS 117 will look like an episode,
        # season 1, epnum 17, when it is in fact a movie)
-    fname = clean_string(filename).lower()
+        fname = mtree.clean_string(filename).lower()
-    for m in MOVIES:
+        for m in self.MOVIES:
            if m in fname:
-            log.debug('Found in exception list of movies -> type = movie')
+                self.log.debug('Found in exception list of movies -> type = movie')
                upgrade_movie()
-    for s in SERIES:
+                return filetype_container[0], other
        for s in self.SERIES:
            if s in fname:
-            log.debug('Found in exception list of series -> type = episode')
+                self.log.debug('Found in exception list of series -> type = episode')
                upgrade_episode()
                return filetype_container[0], other
    # now look whether there are some specific hints for episode vs movie
    if filetype_container[0] in ('video', 'subtitle', 'info'):
        # if we have an episode_rexp (eg: s02e13), it is an episode
-        for rexp, _, _ in episode_rexps:
+        episode_transformer = get_transformer('guess_episodes_rexps')
-            match = re.search(rexp, filename, re.IGNORECASE)
+        if episode_transformer:
-            if match:
+            filename_parts = list(x.value for x in mtree.unidentified_leaves());
-                log.debug('Found matching regexp: "%s" (string = "%s") -> type = episode', rexp, match.group())
+            filename_parts.append(filename)
-                upgrade_episode()
+            for filename_part in filename_parts:
-                break
+                guess = episode_transformer.guess_episodes_rexps(filename_part)
-
+                if guess:
-        # if we have a 3-4 digit number that's not a year, maybe an episode
+                    self.log.debug('Found guess_episodes_rexps: %s -> type = episode', guess)
        match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename)
        if match:
            fullnumber = int(match.group()[1:-1])
            #season = fullnumber // 100
            epnumber = fullnumber % 100
            possible = True
            # check for validity
            if epnumber > 40:
                possible = False
            if valid_year(fullnumber):
                possible = False
            if possible:
                log.debug('Found possible episode number: %s (from string "%s") -> type = episode', epnumber, match.group())
                    upgrade_episode()
                    return filetype_container[0], other
        properties_transformer = get_transformer('guess_properties')
        if properties_transformer:
            # if we have certain properties characteristic of episodes, it is an ep
-        for prop, value, _, _ in find_properties(filename):
+            found = properties_transformer.container.find_properties(filename, mtree, options, 'episodeFormat')
-            log.debug('prop: %s = %s' % (prop, value))
+            guess = properties_transformer.container.as_guess(found, filename)
-            if prop == 'episodeFormat':
+            if guess:
-                log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
+                self.log.debug('Found characteristic property of episodes: %s"', guess)
                upgrade_episode()
-                break
+                return filetype_container[0], other
-            elif compute_canonical_form('format', value) == 'DVB':
+            weak_episode_transformer = get_transformer('guess_weak_episodes_rexps')
-                log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
+            if weak_episode_transformer:
                found = properties_transformer.container.find_properties(filename, mtree, options, 'crc32')
                guess = properties_transformer.container.as_guess(found, filename)
                if guess:
                    found = weak_episode_transformer.container.find_properties(filename, mtree, options)
                    guess = weak_episode_transformer.container.as_guess(found, filename)
                    if guess:
                        self.log.debug('Found characteristic property of episodes: %s"', guess)
                        upgrade_episode()
-                break
+                        return filetype_container[0], other
            found = properties_transformer.container.find_properties(filename, mtree, options, 'format')
            guess = properties_transformer.container.as_guess(found, filename)
            if guess and guess['format'] in ('HDTV', 'WEBRip', 'WEB-DL', 'DVB'):
                # Use weak episodes only if TV or WEB source
                weak_episode_transformer = get_transformer('guess_weak_episodes_rexps')
                if weak_episode_transformer:
                    guess = weak_episode_transformer.guess_weak_episodes_rexps(filename)
                    if guess:
                        self.log.debug('Found guess_weak_episodes_rexps: %s -> type = episode', guess)
                        upgrade_episode()
                        return filetype_container[0], other
        website_transformer = get_transformer('guess_website')
        if website_transformer:
            found = website_transformer.container.find_properties(filename, mtree, options, 'website')
            guess = website_transformer.container.as_guess(found, filename)
            if guess:
                for namepart in ('tv', 'serie', 'episode'):
                    if namepart in guess['website']:
                        # origin-specific type
-        if 'tvu.org.ru' in filename:
+                        self.log.debug('Found characteristic property of episodes: %s', guess)
            log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
                        upgrade_episode()
                        return filetype_container[0], other
        if filetype_container[0] in ('subtitle', 'info') or (not filetype_container[0] and fileext in video_exts):
            # if no episode info found, assume it's a movie
-        log.debug('Nothing characteristic found, assuming type = movie')
+            self.log.debug('Nothing characteristic found, assuming type = movie')
            upgrade_movie()
-    filetype = filetype_container[0]
+        if not filetype_container[0]:
-    return filetype, other
+            self.log.debug('Nothing characteristic found, assuming type = unknown')
            filetype_container[0] = 'unknown'
        return filetype_container[0], other
-def process(mtree, filetype='autodetect'):
+    def process(self, mtree, options=None):
-    filetype, other = guess_filetype(mtree, filetype)
+        """guess the file type now (will be useful later)
        """
        filetype, other = self.guess_filetype(mtree, options)
        mtree.guess.set('type', filetype, confidence=1.0)
-    log.debug('Found with confidence %.2f: %s' % (1.0, mtree.guess))
+        log_found_guess(mtree.guess)
        filetype_info = Guess(other, confidence=1.0)
        # guess the mimetype of the filename
@ -195,5 +218,20 @@ def process(mtree, filetype='autodetect'):
            filetype_info.update({'mimetype': mime}, confidence=1.0)
        node_ext = mtree.node_at((-1,))
-    node_ext.guess = filetype_info
+        found_guess(node_ext, filetype_info)
-    log.debug('Found with confidence %.2f: %s' % (1.0, node_ext.guess))
+
        if mtree.guess.get('type') in [None, 'unknown']:
            if options.get('name_only'):
                mtree.guess.set('type', 'movie', confidence=0.6)
            else:
                raise TransformerException(__name__, 'Unknown file type')
    def post_process(self, mtree, options=None):
        # now look whether there are some specific hints for episode vs movie
        # If we have a date and no year, this is a TV Show.
        if 'date' in mtree.info and 'year' not in mtree.info and mtree.info.get('type') != 'episode':
            mtree.guess['type'] = 'episode'
            for type_leaves in mtree.leaves_containing('type'):
                type_leaves.guess['type'] = 'episode'
            for title_leaves in mtree.leaves_containing('title'):
                title_leaves.guess.rename('title', 'series')
--- a/libs/guessit/transfo/guess_idnumber.py
+++ b/libs/guessit/transfo/guess_idnumber.py
@ -18,40 +18,47 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
-from guessit.transfo import SingleNodeGuesser
+
-from guessit.patterns import find_properties
+from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder
 import re
 import logging
-log = logging.getLogger(__name__)
+_DIGIT = 0
 _LETTER = 1
 _OTHER = 2
-def guess_properties(string):
+class GuessIdnumber(Transformer):
-    try:
+    def __init__(self):
-        prop, value, pos, end = find_properties(string)[0]
+        Transformer.__init__(self, 220)
        return { prop: value }, (pos, end)
    except IndexError:
        return None, None
-_idnum = re.compile(r'(?P<idNumber>[a-zA-Z0-9-]{10,})') # 1.0, (0, 0))
+    def supported_properties(self):
        return ['idNumber']
-def guess_idnumber(string):
+    _idnum = re.compile(r'(?P<idNumber>[a-zA-Z0-9-]{20,})')  # 1.0, (0, 0))
-    match = _idnum.search(string)
+
    def guess_idnumber(self, string, node=None, options=None):
        match = self._idnum.search(string)
        if match is not None:
            result = match.groupdict()
            switch_count = 0
-        DIGIT = 0
+            switch_letter_count = 0;
-        LETTER = 1
+            letter_count = 0;
-        OTHER = 2
+            last_letter = None
-        last = LETTER
+
            last = _LETTER
            for c in result['idNumber']:
                if c in '0123456789':
-                ci = DIGIT
+                    ci = _DIGIT
                elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
-                ci = LETTER
+                    ci = _LETTER
                    if c != last_letter:
                        switch_letter_count += 1
                    last_letter = c
                    letter_count += 1
                else:
-                ci = OTHER
+                    ci = _OTHER
                if ci != last:
                    switch_count += 1
@ -59,13 +66,14 @@ def guess_idnumber(string):
                last = ci
            switch_ratio = float(switch_count) / len(result['idNumber'])
            letters_ratio = (float(switch_letter_count) / letter_count) if letter_count > 0 else 1
            # only return the result as probable if we alternate often between
            # char type (more likely for hash values than for common words)
-        if switch_ratio > 0.4:
+            if switch_ratio > 0.4 and letters_ratio > 0.4:
                return result, match.span()
        return None, None
-def process(mtree):
+    def process(self, mtree, options=None):
-    SingleNodeGuesser(guess_idnumber, 0.4, log).process(mtree)
+        GuessFinder(self.guess_idnumber, 0.4, self.log, options).process_nodes(mtree.unidentified_leaves())
--- a/libs/guessit/transfo/guess_language.py
+++ b/libs/guessit/transfo/guess_language.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,38 +18,169 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit import Guess
 from guessit.transfo import SingleNodeGuesser
 from guessit.language import search_language
 import logging
-log = logging.getLogger(__name__)
+from guessit.language import search_language, subtitle_prefixes, subtitle_suffixes
 from guessit.patterns.extension import subtitle_exts
 from guessit.textutils import find_words
 from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder
-def guess_language(string, node, skip=None):
+class GuessLanguage(Transformer):
-    if skip:
+    def __init__(self):
-        relative_skip = []
+        Transformer.__init__(self, 30)
        for entry in skip:
            node_idx = entry['node_idx']
            span = entry['span']
            if node_idx == node.node_idx[:len(node_idx)]:
                relative_span = (span[0] - node.offset + 1, span[1] - node.offset + 1)
                relative_skip.append(relative_span)
        skip = relative_skip
-    language, span, confidence = search_language(string, skip=skip)
+    def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
-    if language:
+        naming_opts.add_argument('-L', '--allowed-languages', action='append', dest='allowed_languages',
-        return (Guess({'language': language},
+                               help='Allowed language (can be used multiple times)')
                      confidence=confidence,
                      raw= string[span[0]:span[1]]),
                span)
-    return None, None
+    def supported_properties(self):
        return ['language', 'subtitleLanguage']
-guess_language.use_node = True
+    def guess_language(self, string, node=None, options=None):
        allowed_languages = None
        if options and 'allowed_languages' in options:
            allowed_languages = options.get('allowed_languages')
        guess = search_language(string, allowed_languages)
        return guess
    def _skip_language_on_second_pass(self, mtree, node):
        """Check if found node is a valid language node, or if it's a false positive.
-def process(mtree, *args, **kwargs):
+        :param mtree: Tree detected on first pass.
-    SingleNodeGuesser(guess_language, None, log, *args, **kwargs).process(mtree)
+        :type mtree: :class:`guessit.matchtree.MatchTree`
-    # Note: 'language' is promoted to 'subtitleLanguage' in the post_process transfo
+        :param node: Node that contains a language Guess
        :type node: :class:`guessit.matchtree.MatchTree`
        :return: True if a second pass skipping this node is required
        :rtype: bool
        """
        unidentified_starts = {}
        unidentified_ends = {}
        property_starts = {}
        property_ends = {}
        title_starts = {}
        title_ends = {}
        for unidentified_node in mtree.unidentified_leaves():
            unidentified_starts[unidentified_node.span[0]] = unidentified_node
            unidentified_ends[unidentified_node.span[1]] = unidentified_node
        for property_node in mtree.leaves_containing('year'):
            property_starts[property_node.span[0]] = property_node
            property_ends[property_node.span[1]] = property_node
        for title_node in mtree.leaves_containing(['title', 'series']):
            title_starts[title_node.span[0]] = title_node
            title_ends[title_node.span[1]] = title_node
        return node.span[0] in title_ends.keys() and (node.span[1] in unidentified_starts.keys() or node.span[1] + 1 in property_starts.keys()) or\
            node.span[1] in title_starts.keys() and (node.span[0] == node.group_node().span[0] or node.span[0] in unidentified_ends.keys() or node.span[0] in property_ends.keys())
    def second_pass_options(self, mtree, options=None):
        m = mtree.matched()
        to_skip_language_nodes = []
        for lang_key in ('language', 'subtitleLanguage'):
            langs = {}
            lang_nodes = set(mtree.leaves_containing(lang_key))
            for lang_node in lang_nodes:
                lang = lang_node.guess.get(lang_key, None)
                if self._skip_language_on_second_pass(mtree, lang_node):
                    # Language probably split the title. Add to skip for 2nd pass.
                    # if filetype is subtitle and the language appears last, just before
                    # the extension, then it is likely a subtitle language
                    parts = mtree.clean_string(lang_node.root.value).split()
                    if m.get('type') in ['moviesubtitle', 'episodesubtitle']:
                        if lang_node.value in parts and \
                                (parts.index(lang_node.value) == len(parts) - 2):
                            continue
                    to_skip_language_nodes.append(lang_node)
                elif lang not in langs:
                    langs[lang] = lang_node
                else:
                    # The same language was found. Keep the more confident one,
                    # and add others to skip for 2nd pass.
                    existing_lang_node = langs[lang]
                    to_skip = None
                    if (existing_lang_node.guess.confidence('language') >=
                        lang_node.guess.confidence('language')):
                        # lang_node is to remove
                        to_skip = lang_node
                    else:
                        # existing_lang_node is to remove
                        langs[lang] = lang_node
                        to_skip = existing_lang_node
                    to_skip_language_nodes.append(to_skip)
        if to_skip_language_nodes:
            # Also skip same value nodes
            skipped_values = [skip_node.value for skip_node in to_skip_language_nodes]
            for lang_key in ('language', 'subtitleLanguage'):
                lang_nodes = set(mtree.leaves_containing(lang_key))
                for lang_node in lang_nodes:
                    if lang_node not in to_skip_language_nodes and lang_node.value in skipped_values:
                        to_skip_language_nodes.append(lang_node)
            return {'skip_nodes': to_skip_language_nodes}
        return None
    def should_process(self, mtree, options=None):
        options = options or {}
        return options.get('language', True)
    def process(self, mtree, options=None):
        GuessFinder(self.guess_language, None, self.log, options).process_nodes(mtree.unidentified_leaves())
    def promote_subtitle(self, node):
        if 'language' in node.guess:
            node.guess.set('subtitleLanguage', node.guess['language'],
                           confidence=node.guess.confidence('language'))
            del node.guess['language']
    def post_process(self, mtree, options=None):
        # 1- try to promote language to subtitle language where it makes sense
        for node in mtree.nodes():
            if 'language' not in node.guess:
                continue
            # - if we matched a language in a file with a sub extension and that
            #   the group is the last group of the filename, it is probably the
            #   language of the subtitle
            #   (eg: 'xxx.english.srt')
            if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
                    node == list(mtree.leaves())[-2]):
                self.promote_subtitle(node)
            # - if we find in the same explicit group
            # a subtitle prefix before the language,
            # or a subtitle suffix after the language,
            # then upgrade the language
            explicit_group = mtree.node_at(node.node_idx[:2])
            group_str = explicit_group.value.lower()
            for sub_prefix in subtitle_prefixes:
                if (sub_prefix in find_words(group_str) and
                        0 <= group_str.find(sub_prefix) < (node.span[0] - explicit_group.span[0])):
                    self.promote_subtitle(node)
            for sub_suffix in subtitle_suffixes:
                if (sub_suffix in find_words(group_str) and
                        (node.span[0] - explicit_group.span[0]) < group_str.find(sub_suffix)):
                    self.promote_subtitle(node)
            # - if a language is in an explicit group just preceded by "st",
            #   it is a subtitle language (eg: '...st[fr-eng]...')
            try:
                idx = node.node_idx
                previous = list(mtree.node_at((idx[0], idx[1] - 1)).leaves())[-1]
                if previous.value.lower()[-2:] == 'st':
                    self.promote_subtitle(node)
            except IndexError:
                pass
--- a/libs/guessit/transfo/guess_movie_title_from_position.py
+++ b/libs/guessit/transfo/guess_movie_title_from_position.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,45 +18,51 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit import Guess
 import unicodedata
 import logging
-log = logging.getLogger(__name__)
+from guessit.plugins.transformers import Transformer
 from guessit.matcher import found_property
 from guessit import u
-def process(mtree):
+class GuessMovieTitleFromPosition(Transformer):
-    def found_property(node, name, value, confidence):
+    def __init__(self):
-        node.guess = Guess({ name: value },
+        Transformer.__init__(self, -200)
                           confidence=confidence,
                           raw=value)
        log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
-    def found_title(node, confidence):
+    def supported_properties(self):
-        found_property(node, 'title', node.clean_value, confidence)
+        return ['title']
    def should_process(self, mtree, options=None):
        options = options or {}
        return not options.get('skip_title') and not mtree.guess.get('type', '').startswith('episode')
    def process(self, mtree, options=None):
        """
        try to identify the remaining unknown groups by looking at their
        position relative to other known elements
        """
        if 'title' in mtree.info:
            return
        basename = mtree.node_at((-2,))
        all_valid = lambda leaf: len(leaf.clean_value) > 0
-    basename_leftover = basename.unidentified_leaves(valid=all_valid)
+        basename_leftover = list(basename.unidentified_leaves(valid=all_valid))
        try:
            folder = mtree.node_at((-3,))
-        folder_leftover = folder.unidentified_leaves()
+            folder_leftover = list(folder.unidentified_leaves())
        except ValueError:
            folder = None
            folder_leftover = []
-    log.debug('folder: %s' % folder_leftover)
+        self.log.debug('folder: %s' % u(folder_leftover))
-    log.debug('basename: %s' % basename_leftover)
+        self.log.debug('basename: %s' % u(basename_leftover))
        # specific cases:
        # if we find the same group both in the folder name and the filename,
        # it's a good candidate for title
-    if (folder_leftover and basename_leftover and
+        if folder_leftover and basename_leftover and folder_leftover[0].clean_value == basename_leftover[0].clean_value:
-        folder_leftover[0].clean_value == basename_leftover[0].clean_value):
+            found_property(folder_leftover[0], 'title', confidence=0.8)
        found_title(folder_leftover[0], confidence=0.8)
            return
        # specific cases:
@ -64,61 +70,52 @@ def process(mtree):
        # group, and the folder only contains 1 unidentified one, then we have
        # a series
        # ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
-    try:
+        if len(folder_leftover) > 0 and len(basename_leftover) > 1:
            series = folder_leftover[0]
-        filmNumber = basename_leftover[0]
+            film_number = basename_leftover[0]
            title = basename_leftover[1]
-        basename_leaves = basename.leaves()
+            basename_leaves = list(basename.leaves())
-        num = int(filmNumber.clean_value)
+            num = None
            try:
                num = int(film_number.clean_value)
            except ValueError:
                pass
-        log.debug('series: %s' % series.clean_value)
+            if num:
-        log.debug('title: %s' % title.clean_value)
+                self.log.debug('series: %s' % series.clean_value)
                self.log.debug('title: %s' % title.clean_value)
                if (series.clean_value != title.clean_value and
-            series.clean_value != filmNumber.clean_value and
+                            series.clean_value != film_number.clean_value and
-            basename_leaves.index(filmNumber) == 0 and
+                            basename_leaves.index(film_number) == 0 and
                            basename_leaves.index(title) == 1):
-            found_title(title, confidence=0.6)
+                    found_property(title, 'title', confidence=0.6)
-            found_property(series, 'filmSeries',
+                    found_property(series, 'filmSeries', confidence=0.6)
-                           series.clean_value, confidence=0.6)
+                    found_property(film_number, 'filmNumber', num, confidence=0.6)
            found_property(filmNumber, 'filmNumber',
                           num, confidence=0.6)
                return
    except Exception:
        pass
    # specific cases:
    #  - movies/tttttt (yyyy)/tttttt.ccc
    try:
        if mtree.node_at((-4, 0)).value.lower() == 'movies':
            folder = mtree.node_at((-3,))
            # Note:too generic, might solve all the unittests as they all
            # contain 'movies' in their path
            #
            #if containing_folder.is_leaf() and not containing_folder.guess:
            #    containing_folder.guess =
            #        Guess({ 'title': clean_string(containing_folder.value) },
            #              confidence=0.7)
        if folder:
            year_group = folder.first_leaf_containing('year')
            if year_group:
                groups_before = folder.previous_unidentified_leaves(year_group)
-
+                if groups_before:
-            found_title(groups_before[0], confidence=0.8)
+                    try:
                        node = next(groups_before)
                        found_property(node, 'title', confidence=0.8)
                        return
-
+                    except StopIteration:
    except Exception:
                        pass
-    # if we have either format or videoCodec in the folder containing the file
+        # if we have either format or videoCodec in the folder containing the
-    # or one of its parents, then we should probably look for the title in
+        # file or one of its parents, then we should probably look for the title
-    # there rather than in the basename
+        # in there rather than in the basename
        try:
-        props = mtree.previous_leaves_containing(mtree.children[-2],
+            props = list(mtree.previous_leaves_containing(mtree.children[-2],
-                                                 [ 'videoCodec', 'format',
+                                                          ['videoCodec',
-                                                   'language' ])
+                                                           'format',
                                                           'language']))
        except IndexError:
            props = []
@ -127,48 +124,50 @@ def process(mtree):
            if all(g.node_idx[0] == group_idx for g in props):
                # if they're all in the same group, take leftover info from there
                leftover = mtree.node_at((group_idx,)).unidentified_leaves()
-
+                try:
-            if leftover:
+                    found_property(next(leftover), 'title', confidence=0.7)
                found_title(leftover[0], confidence=0.7)
                    return
                except StopIteration:
                    pass
-    # look for title in basename if there are some remaining undidentified
+        # look for title in basename if there are some remaining unidentified
        # groups there
        if basename_leftover:
        title_candidate = basename_leftover[0]
            # if basename is only one word and the containing folder has at least
            # 3 words in it, we should take the title from the folder name
            # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
            # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi  <-- TODO: gets caught here?
-        if (title_candidate.clean_value.count(' ') == 0 and
+            if (basename_leftover[0].clean_value.count(' ') == 0 and
-            folder_leftover and
+                    folder_leftover and folder_leftover[0].clean_value.count(' ') >= 2):
            folder_leftover[0].clean_value.count(' ') >= 2):
-            found_title(folder_leftover[0], confidence=0.7)
+                found_property(folder_leftover[0], 'title', confidence=0.7)
                return
-        # if there are only 2 unidentified groups, the first of which is inside
+            # if there are only many unidentified groups, take the first of which is
-        # brackets or parentheses, we take the second one for the title:
+            # not inside brackets or parentheses.
            # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
-        if len(basename_leftover) == 2 and basename_leftover[0].is_explicit():
+            if basename_leftover[0].is_explicit():
-            found_title(basename_leftover[1], confidence=0.8)
+                for basename_leftover_elt in basename_leftover:
                    if not basename_leftover_elt.is_explicit():
                        found_property(basename_leftover_elt, 'title', confidence=0.8)
                        return
            # if all else fails, take the first remaining unidentified group in the
            # basename as title
-        found_title(title_candidate, confidence=0.6)
+            found_property(basename_leftover[0], 'title', confidence=0.6)
            return
        # if there are no leftover groups in the basename, look in the folder name
        if folder_leftover:
-        found_title(folder_leftover[0], confidence=0.5)
+            found_property(folder_leftover[0], 'title', confidence=0.5)
            return
        # if nothing worked, look if we have a very small group at the beginning
        # of the basename
        basename = mtree.node_at((-2,))
        basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
-    if basename_leftover:
+        try:
-        found_title(basename_leftover[0], confidence=0.4)
+            found_property(next(basename_leftover), 'title', confidence=0.4)
            return
        except StopIteration:
            pass
--- a/libs/guessit/transfo/guess_properties.py
+++ b/libs/guessit/transfo/guess_properties.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,21 +18,271 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.transfo import SingleNodeGuesser
 from guessit.patterns import find_properties
 import logging
-log = logging.getLogger(__name__)
+from guessit.containers import PropertiesContainer, WeakValidator, LeavesValidator, QualitiesContainer, NoValidator, \
    ChainedValidator, DefaultValidator, OnlyOneValidator, LeftValidator, NeighborValidator
 from guessit.patterns import sep, build_or_pattern
 from guessit.patterns.extension import subtitle_exts, video_exts, info_exts
 from guessit.patterns.numeral import numeral, parse_numeral
 from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder, found_property
 import re
-def guess_properties(string):
+class GuessProperties(Transformer):
-    try:
+    def __init__(self):
-        prop, value, pos, end = find_properties(string)[0]
+        Transformer.__init__(self, 35)
        return { prop: value }, (pos, end)
    except IndexError:
        return None, None
        self.container = PropertiesContainer()
        self.qualities = QualitiesContainer()
-def process(mtree):
+        def register_property(propname, props, **kwargs):
-    SingleNodeGuesser(guess_properties, 1.0, log).process(mtree)
+            """props a dict of {value: [patterns]}"""
            for canonical_form, patterns in props.items():
                if isinstance(patterns, tuple):
                    patterns2, pattern_kwarg = patterns
                    if kwargs:
                        current_kwarg = dict(kwargs)
                        current_kwarg.update(pattern_kwarg)
                    else:
                        current_kwarg = dict(pattern_kwarg)
                    current_kwarg['canonical_form'] = canonical_form
                    self.container.register_property(propname, *patterns2, **current_kwarg)
                elif kwargs:
                    current_kwarg = dict(kwargs)
                    current_kwarg['canonical_form'] = canonical_form
                    self.container.register_property(propname, *patterns, **current_kwarg)
                else:
                    self.container.register_property(propname, *patterns, canonical_form=canonical_form)
        def register_quality(propname, quality_dict):
            """props a dict of {canonical_form: quality}"""
            for canonical_form, quality in quality_dict.items():
                self.qualities.register_quality(propname, canonical_form, quality)
        register_property('container', {'mp4': ['MP4']})
        # http://en.wikipedia.org/wiki/Pirated_movie_release_types
        register_property('format', {'VHS': ['VHS', 'VHS-Rip'],
                                     'Cam': ['CAM', 'CAMRip', 'HD-CAM'],
                                     #'Telesync': ['TELESYNC', 'PDVD'],
                                     'Telesync': (['TS', 'HD-TS'], {'confidence': 0.4}),
                                     'Workprint': ['WORKPRINT', 'WP'],
                                     'Telecine': ['TELECINE', 'TC'],
                                     'PPV': ['PPV', 'PPV-Rip'],  # Pay Per View
                                     'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
                                     'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
                                     'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS', 'DVD-R', 'DVD-9', 'DVD-5'],
                                     'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP'],
                                     'VOD': ['VOD', 'VOD-Rip'],
                                     'WEBRip': ['WEB-Rip'],
                                     'WEB-DL': ['WEB-DL', 'WEB-HD', 'WEB'],
                                     'HD-DVD': ['HD-(?:DVD)?-Rip', 'HD-DVD'],
                                     'BluRay': ['Blu-ray(?:-Rip)?', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
                                     })
        register_quality('format', {'VHS': -100,
                                    'Cam': -90,
                                    'Telesync': -80,
                                    'Workprint': -70,
                                    'Telecine': -60,
                                    'PPV': -50,
                                    'TV': -30,
                                    'DVB': -20,
                                    'DVD': 0,
                                    'HDTV': 20,
                                    'VOD': 40,
                                    'WEBRip': 50,
                                    'WEB-DL': 60,
                                    'HD-DVD': 80,
                                    'BluRay': 100
                                    })
        register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'],
                                         '368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'],
                                         '480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'],
                                         #'480p': (['hr'], {'confidence': 0.2}), # duplicate dict key
                                         '576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'],
                                         '720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'],
                                         '900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'],
                                         '1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'],
                                         '1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080p?x?'],
                                         '4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)']
                                         },
                          validator=ChainedValidator(DefaultValidator(), OnlyOneValidator()))
        class ResolutionValidator(object):
            """Make sure our match is surrounded by separators, or by another entry"""
            def validate(self, prop, string, node, match, entry_start, entry_end):
                """
                span = _get_span(prop, match)
                span = _trim_span(span, string[span[0]:span[1]])
                start, end = span
                sep_start = start <= 0 or string[start - 1] in sep
                sep_end = end >= len(string) or string[end] in sep
                start_by_other = start in entry_end
                end_by_other = end in entry_start
                if (sep_start or start_by_other) and (sep_end or end_by_other):
                    return True
                return False
                """
                return True
        _digits_re = re.compile('\d+')
        def resolution_formatter(value):
            digits = _digits_re.findall(value)
            return 'x'.join(digits)
        self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter, validator=ChainedValidator(DefaultValidator(), ResolutionValidator()))
        register_quality('screenSize', {'360p': -300,
                                        '368p': -200,
                                        '480p': -100,
                                        '576p': 0,
                                        '720p': 100,
                                        '900p': 130,
                                        '1080i': 180,
                                        '1080p': 200,
                                        '4K': 400
                                        })
        _videoCodecProperty = {'Real': ['Rv\d{2}'],  # http://en.wikipedia.org/wiki/RealVideo
                               'Mpeg2': ['Mpeg2'],
                               'DivX': ['DVDivX', 'DivX'],
                               'XviD': ['XviD'],
                               'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'],
                               'h265': ['[hx]-265(?:-HEVC)?', 'HEVC']
                               }
        register_property('videoCodec', _videoCodecProperty)
        register_quality('videoCodec', {'Real': -50,
                                        'Mpeg2': -30,
                                        'DivX': -10,
                                        'XviD': 0,
                                        'h264': 100,
                                        'h265': 150
                                        })
        # http://blog.mediacoderhq.com/h264-profiles-and-levels/
        # http://fr.wikipedia.org/wiki/H.264
        self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit')
        self.container.register_property('videoProfile', '8.?bit', canonical_form='8bit')
        self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
        register_quality('videoProfile', {'BP': -20,
                                          'XP': -10,
                                          'MP': 0,
                                          'HP': 10,
                                          '10bit': 15,
                                          'Hi422P': 25,
                                          'Hi444PP': 35
                                          })
        # has nothing to do here (or on filenames for that matter), but some
        # releases use it and it helps to identify release groups, so we adapt
        register_property('videoApi', {'DXVA': ['DXVA']})
        register_property('audioCodec', {'MP3': ['MP3', 'LAME', 'LAME(?:\d)+-(?:\d)+'],
                                         'DolbyDigital': ['DD'],
                                         'AAC': ['AAC'],
                                         'AC3': ['AC3'],
                                         'Flac': ['FLAC'],
                                         'DTS': (['DTS'], {'validator': LeftValidator()}),
                                         'TrueHD': ['True-HD']
                                         })
        register_quality('audioCodec', {'MP3': 10,
                                        'DolbyDigital': 30,
                                        'AAC': 35,
                                        'AC3': 40,
                                        'Flac': 45,
                                        'DTS': 60,
                                        'TrueHD': 70
                                        })
        self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
        self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
        self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3']))
        register_quality('audioProfile', {'HD': 20,
                                          'HDMA': 50,
                                          'LC': 0,
                                          'HQ': 0,
                                          'HE': 20
                                          })
        register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch', '8ch'],
                                            '5.1': ['5[\W_]1', '5ch', '6ch'],
                                            '2.0': ['2[\W_]0', '2ch', 'stereo'],
                                            '1.0': ['1[\W_]0', '1ch', 'mono']
                                            })
        register_quality('audioChannels', {'7.1': 200,
                                           '5.1': 100,
                                           '2.0': 0,
                                           '1.0': -100
                                           })
        self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode')
        self.container.register_property('crc32', '(?:[a-fA-F]|[0-9]){8}', enhance=False, canonical_from_pattern=False)
        weak_episode_words = ['pt', 'part']
        self.container.register_property(None, '(' + build_or_pattern(weak_episode_words) + sep + '?(?P<part>' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral)
        register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
                                    'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
                                    'DualAudio': ['Dual-Audio'],
                                    'WideScreen': ['ws', 'wide-screen'],
                                    'Netflix': ['Netflix', 'NF']
                                    })
        self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=NeighborValidator())
        self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form='Proper')
        self.container.register_property('other', 'Fansub', canonical_form='Fansub')
        self.container.register_property('other', 'Fastsub', canonical_form='Fastsub')
        self.container.register_property('other', '(?:Seasons?' + sep + '?)?Complete', canonical_form='Complete')
        self.container.register_property('other', 'R5', 'RC', canonical_form='R5')
        self.container.register_property('other', 'Pre-Air', 'Preair', canonical_form='Preair')
        self.container.register_canonical_properties('other', 'Screener', 'Remux', '3D', 'HD', 'mHD', 'HDLight', 'HQ',
                                                     'DDC',
                                                     'HR', 'PAL', 'SECAM', 'NTSC')
        self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator())
        for prop in self.container.get_properties('format'):
            self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener')
        for exts in (subtitle_exts, info_exts, video_exts):
            for container in exts:
                self.container.register_property('container', container, confidence=0.3)
    def guess_properties(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options)
        return self.container.as_guess(found, string)
    def supported_properties(self):
        return self.container.get_supported_properties()
    def process(self, mtree, options=None):
        GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
        proper_count = 0
        for other_leaf in mtree.leaves_containing('other'):
            if 'other' in other_leaf.info and 'Proper' in other_leaf.info['other']:
                proper_count += 1
        if proper_count:
            found_property(mtree, 'properCount', proper_count)
    def rate_quality(self, guess, *props):
        return self.qualities.rate_quality(guess, *props)
--- a/libs/guessit/transfo/guess_release_group.py
+++ b/libs/guessit/transfo/guess_release_group.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,69 +18,187 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
-from guessit.transfo import SingleNodeGuesser
+
-from guessit.patterns import prop_multi, compute_canonical_form, _dash, _psep
+from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder, build_guess
 from guessit.containers import PropertiesContainer
 from guessit.patterns import sep
 from guessit.guess import Guess
 from guessit.textutils import strip_brackets
 import re
 import logging
 log = logging.getLogger(__name__)
 def get_patterns(property_name):
    return [ p.replace(_dash, _psep) for patterns in prop_multi[property_name].values() for p in patterns  ]
 CODECS = get_patterns('videoCodec')
 FORMATS = get_patterns('format')
 VAPIS = get_patterns('videoApi')
 # RG names following a codec or format, with a potential space or dash inside the name
 GROUP_NAMES = [ r'(?P<videoCodec>' + codec + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
                for codec in CODECS ]
 GROUP_NAMES += [ r'(?P<format>'    + fmt   + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
                 for fmt in FORMATS ]
 GROUP_NAMES += [ r'(?P<videoApi>'  + api   + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
                 for api in VAPIS ]
 GROUP_NAMES2 = [ r'\.(?P<videoCodec>' + codec + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
                 for codec in CODECS ]
 GROUP_NAMES2 += [ r'\.(?P<format>'    + fmt   + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
                  for fmt in FORMATS ]
 GROUP_NAMES2 += [ r'\.(?P<videoApi>'  + vapi  + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
                  for vapi in VAPIS ]
 GROUP_NAMES = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES ]
 GROUP_NAMES2 = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES2 ]
 def adjust_metadata(md):
    return dict((property_name, compute_canonical_form(property_name, value) or value)
                for property_name, value in md.items())
-def guess_release_group(string):
+class GuessReleaseGroup(Transformer):
-    # first try to see whether we have both a known codec and a known release group
+    def __init__(self):
-    for rexp in GROUP_NAMES:
+        Transformer.__init__(self, -190)
        match = rexp.search(string)
        while match:
            metadata = match.groupdict()
            # make sure this is an actual release group we caught
            release_group = (compute_canonical_form('releaseGroup', metadata['releaseGroup']) or
                             compute_canonical_form('weakReleaseGroup', metadata['releaseGroup']))
            if release_group:
                return adjust_metadata(metadata), (match.start(1), match.end(2))
-            # we didn't find anything conclusive, keep searching
+        self.container = PropertiesContainer(canonical_from_pattern=False)
-            match = rexp.search(string, match.span()[0]+1)
+        self._allowed_groupname_pattern = '[\w@#€£$&!\?]'
        self._forbidden_groupname_lambda = [lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'],
                                            lambda elt: self._is_number(elt)]
        # If the previous property in this list, the match will be considered as safe
        # and group name can contain a separator.
        self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels', 'other']
        self.previous_safe_values = {'other': ['Complete']}
        self.next_safe_properties = ['extension', 'website']
        self.next_safe_values = {'format': ['Telesync']}
        self.container.sep_replace_char = '-'
        self.container.canonical_from_pattern = False
        self.container.enhance = True
        self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+')
        self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+')
        self.re_sep = re.compile('(' + sep + ')')
-    # pick anything as releaseGroup as long as we have a codec in front
+    def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
-    # this doesn't include a potential dash ('-') ending the release group
+        naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group',
-    # eg: [...].X264-HiS@SiLUHD-English.[...]
+                               help='Expected release group (can be used multiple times)')
    for rexp in GROUP_NAMES2:
        match = rexp.search(string)
        if match:
            return adjust_metadata(match.groupdict()), (match.start(1), match.end(2))
-    return None, None
+    def supported_properties(self):
        return self.container.get_supported_properties()
    def _is_number(self, s):
        try:
            int(s)
            return True
        except ValueError:
            return False
-def process(mtree):
+    def validate_group_name(self, guess):
-    SingleNodeGuesser(guess_release_group, 0.8, log).process(mtree)
+        val = guess['releaseGroup']
        if len(val) > 1:
            checked_val = ""
            forbidden = False
            for elt in self.re_sep.split(val): # separators are in the list because of capturing group
                if forbidden:
                    # Previous was forbidden, don't had separator
                    forbidden = False
                    continue
                for forbidden_lambda in self._forbidden_groupname_lambda:
                    forbidden = forbidden_lambda(elt.lower())
                    if forbidden:
                        if checked_val:
                            # Removing previous separator
                            checked_val = checked_val[0:len(checked_val) - 1]
                        break
                if not forbidden:
                    checked_val += elt
            val = checked_val
            if not val:
                return False
            if self.re_sep.match(val[-1]):
                val = val[:len(val)-1]
            if self.re_sep.match(val[0]):
                val = val[1:]
            guess['releaseGroup'] = val
            forbidden = False
            for forbidden_lambda in self._forbidden_groupname_lambda:
                forbidden = forbidden_lambda(val.lower())
                if forbidden:
                    break
            if not forbidden:
                return True
        return False
    def is_leaf_previous(self, leaf, node):
        if leaf.span[1] <= node.span[0]:
            for idx in range(leaf.span[1], node.span[0]):
                if leaf.root.value[idx] not in sep:
                    return False
            return True
        return False
    def validate_next_leaves(self, node):
        if 'series' in node.root.info or 'title' in node.root.info:
            # --expected-series or --expected-title is used.
            return True
        # Make sure to avoid collision with 'series' or 'title' guessed later. Should be more precise.
        leaves = node.root.unidentified_leaves()
        return len(list(leaves)) > 1
    def validate_node(self, leaf, node, safe=False):
        if not self.is_leaf_previous(leaf, node):
            return False
        if not self.validate_next_leaves(node):
            return False
        if safe:
            for k, v in leaf.guess.items():
                if k in self.previous_safe_values and not v in self.previous_safe_values[k]:
                    return False
        return True
    def guess_release_group(self, string, node=None, options=None):
        if options and options.get('expected_group'):
            expected_container = PropertiesContainer(enhance=True, canonical_from_pattern=False)
            for expected_group in options.get('expected_group'):
                if expected_group.startswith('re:'):
                    expected_group = expected_group[3:]
                    expected_group = expected_group.replace(' ', '-')
                    expected_container.register_property('releaseGroup', expected_group, enhance=True)
                else:
                    expected_group = re.escape(expected_group)
                    expected_container.register_property('releaseGroup', expected_group, enhance=False)
            found = expected_container.find_properties(string, node, options, 'releaseGroup')
            guess = expected_container.as_guess(found, string, self.validate_group_name)
            if guess:
                return guess
        found = self.container.find_properties(string, node, options, 'releaseGroup')
        guess = self.container.as_guess(found, string, self.validate_group_name)
        validated_guess = None
        if guess:
            group_node = node.group_node()
            if group_node:
                for leaf in group_node.leaves_containing(self.previous_safe_properties):
                    if self.validate_node(leaf, node, True):
                        if leaf.root.value[leaf.span[1]] == '-':
                            guess.metadata().confidence = 1
                        else:
                            guess.metadata().confidence = 0.7
                        validated_guess = guess
            if not validated_guess:
                # If previous group last leaf is identified as a safe property,
                # consider the raw value as a releaseGroup
                previous_group_node = node.previous_group_node()
                if previous_group_node:
                    for leaf in previous_group_node.leaves_containing(self.previous_safe_properties):
                        if self.validate_node(leaf, node, False):
                            guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value)))
                            if self.validate_group_name(guess):
                                node.guess = guess
                                validated_guess = guess
            if validated_guess:
                # If following group nodes have only one unidentified leaf, it belongs to the release group
                next_group_node = node
                while True:
                    next_group_node = next_group_node.next_group_node()
                    if next_group_node:
                        leaves = list(next_group_node.leaves())
                        if len(leaves) == 1 and not leaves[0].guess:
                            validated_guess['releaseGroup'] = validated_guess['releaseGroup'] + leaves[0].value
                            leaves[0].guess = validated_guess
                        else:
                            break
                    else:
                        break
            if not validated_guess and node.is_explicit() and node.node_last_idx == 0: # first node from group
                validated_guess = build_guess(node, 'releaseGroup', value=node.value[1:len(node.value)-1])
                validated_guess.metadata().confidence = 0.4
                validated_guess.metadata().span = 1, len(node.value)
                node.guess = validated_guess
        if validated_guess:
            # Strip brackets
            validated_guess['releaseGroup'] = strip_brackets(validated_guess['releaseGroup'])
        return validated_guess
    def process(self, mtree, options=None):
        GuessFinder(self.guess_release_group, None, self.log, options).process_nodes(mtree.unidentified_leaves())
--- a/libs/guessit/transfo/guess_video_rexps.py
+++ b/libs/guessit/transfo/guess_video_rexps.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,33 +18,41 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, \
-from guessit import Guess
+    unicode_literals
 from guessit.transfo import SingleNodeGuesser
 from guessit.patterns import video_rexps, sep
 import re
 import logging
-log = logging.getLogger(__name__)
+from guessit.patterns import _psep
 from guessit.containers import PropertiesContainer
 from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder
 from guessit.patterns.numeral import parse_numeral
-def guess_video_rexps(string):
+class GuessVideoRexps(Transformer):
-    string = '-' + string + '-'
+    def __init__(self):
-    for rexp, confidence, span_adjust in video_rexps:
+        Transformer.__init__(self, 25)
        match = re.search(sep + rexp + sep, string, re.IGNORECASE)
        if match:
            metadata = match.groupdict()
            # is this the better place to put it? (maybe, as it is at least
            # the soonest that we can catch it)
            if metadata.get('cdNumberTotal', -1) is None:
                del metadata['cdNumberTotal']
            span = (match.start() + span_adjust[0],
                    match.end() + span_adjust[1] - 2)
            return (Guess(metadata, confidence=confidence, raw=string[span[0]:span[1]]),
                    span)
-    return None, None
+        self.container = PropertiesContainer(canonical_from_pattern=False)
        self.container.register_property(None, 'cd' + _psep + '(?P<cdNumber>[0-9])(?:' + _psep + 'of' + _psep + '(?P<cdNumberTotal>[0-9]))?', confidence=1.0, enhance=False, global_span=True, formatter=parse_numeral)
        self.container.register_property('cdNumberTotal', '([1-9])' + _psep + 'cds?', confidence=0.9, enhance=False, formatter=parse_numeral)
-def process(mtree):
+        self.container.register_property('bonusNumber', 'x([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)
-    SingleNodeGuesser(guess_video_rexps, None, log).process(mtree)
+
        self.container.register_property('filmNumber', 'f([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)
        self.container.register_property('edition', 'collector', 'collector-edition', 'edition-collector', canonical_form='Collector Edition')
        self.container.register_property('edition', 'special-edition', 'edition-special', canonical_form='Special Edition')
        self.container.register_property('edition', 'criterion', 'criterion-edition', 'edition-criterion', canonical_form='Criterion Edition')
        self.container.register_property('edition', 'deluxe', 'cdeluxe-edition', 'edition-deluxe', canonical_form='Deluxe Edition')
        self.container.register_property('edition', 'director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', canonical_form='Director\'s cut')
    def supported_properties(self):
        return self.container.get_supported_properties()
    def guess_video_rexps(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options)
        return self.container.as_guess(found, string)
    def process(self, mtree, options=None):
        GuessFinder(self.guess_video_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
--- a/libs/guessit/transfo/guess_weak_episodes_rexps.py
+++ b/libs/guessit/transfo/guess_weak_episodes_rexps.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,45 +18,64 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
-from guessit import Guess
+
-from guessit.transfo import SingleNodeGuesser
+from guessit.plugins.transformers import Transformer
-from guessit.patterns import weak_episode_rexps
+from guessit.matcher import GuessFinder
 from guessit.patterns import sep, build_or_pattern
 from guessit.containers import PropertiesContainer, LeavesValidator, NoValidator, WeakValidator
 from guessit.patterns.numeral import numeral, parse_numeral
 from guessit.date import valid_year
 import re
 import logging
 log = logging.getLogger(__name__)
-def guess_weak_episodes_rexps(string, node):
+class GuessWeakEpisodesRexps(Transformer):
-    if 'episodeNumber' in node.root.info:
+    def __init__(self):
-        return None, None
+        Transformer.__init__(self, 15)
-    for rexp, span_adjust in weak_episode_rexps:
+        of_separators = ['of', 'sur', '/', '\\']
-        match = re.search(rexp, string, re.IGNORECASE)
+        of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)
        if match:
            metadata = match.groupdict()
            span = (match.start() + span_adjust[0],
                    match.end() + span_adjust[1])
-            epnum = int(metadata['episodeNumber'])
+        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
        episode_words = ['episodes?']
        def _formater(episode_number):
            epnum = parse_numeral(episode_number)
            if not valid_year(epnum):
                if epnum > 100:
                    season, epnum = epnum // 100, epnum % 100
-                # episodes which have a season > 25 are most likely errors
+                    # episodes which have a season > 50 are most likely errors
-                # (Simpsons is at 23!)
+                    # (Simpson is at 25!)
-                if season > 25:
+                    if season > 50:
-                    continue
+                        return None
-                return Guess({ 'season': season,
+                    return {'season': season, 'episodeNumber': epnum}
                               'episodeNumber': epnum },
                             confidence=0.6, raw=string[span[0]:span[1]]), span
                else:
-                return Guess(metadata, confidence=0.3, raw=string[span[0]:span[1]]), span
+                    return epnum
-    return None, None
+        self.container.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater, disabler=lambda options: options.get('episode_prefer_number') if options else False)
        self.container.register_property(['episodeNumber', 'season'], '[0-9]{4}', confidence=0.6, formatter=_formater)
        self.container.register_property('episodeNumber', '[^0-9](\d{1,3})', confidence=0.6, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
        self.container.register_property(None, '(' + build_or_pattern(episode_words) + sep + '?(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.4, formatter=parse_numeral)
        self.container.register_property(None, r'(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +')', confidence=0.6, formatter=parse_numeral)
        self.container.register_property('episodeNumber', r'^' + sep + '?(\d{1,3})' + sep, confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
        self.container.register_property('episodeNumber', sep + r'(\d{1,3})' + sep + '?$', confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
    def supported_properties(self):
        return self.container.get_supported_properties()
-guess_weak_episodes_rexps.use_node = True
+    def guess_weak_episodes_rexps(self, string, node=None, options=None):
        if node and 'episodeNumber' in node.root.info:
            return None
        properties = self.container.find_properties(string, node, options)
        guess = self.container.as_guess(properties, string)
-def process(mtree):
+        return guess
-    SingleNodeGuesser(guess_weak_episodes_rexps, 0.6, log).process(mtree)
+
    def should_process(self, mtree, options=None):
        return mtree.guess.get('type', '').startswith('episode')
    def process(self, mtree, options=None):
        GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log, options).process_nodes(mtree.unidentified_leaves())
--- a/libs/guessit/transfo/guess_website.py
+++ b/libs/guessit/transfo/guess_website.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,22 +18,39 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
-from guessit.transfo import SingleNodeGuesser
+from guessit.patterns import build_or_pattern
-from guessit.patterns import websites
+from guessit.containers import PropertiesContainer
-import logging
+from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder
 from pkg_resources import resource_stream  # @UnresolvedImport
-log = logging.getLogger(__name__)
+TLDS = [l.strip().decode('utf-8')
        for l in resource_stream('guessit', 'tlds-alpha-by-domain.txt').readlines()
        if b'--' not in l][1:]
-def guess_website(string):
+class GuessWebsite(Transformer):
-    low = string.lower()
+    def __init__(self):
-    for site in websites:
+        Transformer.__init__(self, 45)
        pos = low.find(site.lower())
        if pos != -1:
            return {'website': site}, (pos, pos + len(site))
    return None, None
        self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
-def process(mtree):
+        tlds_pattern = build_or_pattern(TLDS)  # All registered domain extension
-    SingleNodeGuesser(guess_website, 1.0, log).process(mtree)
+        safe_tlds_pattern = build_or_pattern(['com', 'org', 'net'])  # For sure a website extension
        safe_subdomains_pattern = build_or_pattern(['www'])  # For sure a website subdomain
        safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net'])  # Those words before a tlds are sure
        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
        self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
    def supported_properties(self):
        return self.container.get_supported_properties()
    def guess_website(self, string, node=None, options=None):
        found = self.container.find_properties(string, node, options, 'website')
        return self.container.as_guess(found, string)
    def process(self, mtree, options=None):
        GuessFinder(self.guess_website, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
--- a/libs/guessit/transfo/guess_year.py
+++ b/libs/guessit/transfo/guess_year.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,33 +18,40 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.transfo import SingleNodeGuesser
 from guessit.date import search_year
 import logging
-log = logging.getLogger(__name__)
+from guessit.plugins.transformers import Transformer
 from guessit.matcher import GuessFinder
 from guessit.date import search_year, valid_year
-def guess_year(string):
+class GuessYear(Transformer):
    def __init__(self):
        Transformer.__init__(self, -160)
    def supported_properties(self):
        return ['year']
    def guess_year(self, string, node=None, options=None):
        year, span = search_year(string)
        if year:
-        return { 'year': year }, span
+            return {'year': year}, span
        else:
            return None, None
-def guess_year_skip_first(string):
+    def second_pass_options(self, mtree, options=None):
-    year, span = search_year(string)
+        year_nodes = list(mtree.leaves_containing('year'))
-    if year:
+        if len(year_nodes) > 1:
-        year2, span2 = guess_year(string[span[1]:])
+            return {'skip_nodes': year_nodes[:len(year_nodes) - 1]}
-        if year2:
+        return None
            return year2, (span2[0]+span[1], span2[1]+span[1])
-    return None, None
+    def process(self, mtree, options=None):
        GuessFinder(self.guess_year, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
-
+        # if we found a season number that is a valid year, it is usually safe to assume
-def process(mtree, skip_first_year=False):
+        # we can also set the year property to that value
-    if skip_first_year:
+        for n in mtree.leaves_containing('season'):
-        SingleNodeGuesser(guess_year_skip_first, 1.0, log).process(mtree)
+            g = n.guess
-    else:
+            season = g['season']
-        SingleNodeGuesser(guess_year, 1.0, log).process(mtree)
+            if valid_year(season):
                g['year'] = season
--- a/libs/guessit/transfo/post_process.py
+++ b/libs/guessit/transfo/post_process.py
@ -1,73 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
 # Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # GuessIt is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # Lesser GNU General Public License for more details.
 #
 # You should have received a copy of the Lesser GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 from __future__ import unicode_literals
 from guessit.patterns import subtitle_exts
 from guessit.textutils import reorder_title, find_words
 import logging
 log = logging.getLogger(__name__)
 def process(mtree):
    # 1- try to promote language to subtitle language where it makes sense
    for node in mtree.nodes():
        if 'language' not in node.guess:
            continue
        def promote_subtitle():
            # pylint: disable=W0631
            node.guess.set('subtitleLanguage', node.guess['language'],
                           confidence=node.guess.confidence('language'))
            del node.guess['language']
        # - if we matched a language in a file with a sub extension and that
        #   the group is the last group of the filename, it is probably the
        #   language of the subtitle
        #   (eg: 'xxx.english.srt')
        if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
            node == mtree.leaves()[-2]):
            promote_subtitle()
        # - if we find the word 'sub' before the language, and in the same explicit
        #   group, then upgrade the language
        explicit_group = mtree.node_at(node.node_idx[:2])
        group_str = explicit_group.value.lower()
        if ('sub' in find_words(group_str) and
            0 <= group_str.find('sub') < (node.span[0] - explicit_group.span[0])):
            promote_subtitle()
        # - if a language is in an explicit group just preceded by "st",
        #   it is a subtitle language (eg: '...st[fr-eng]...')
        try:
            idx = node.node_idx
            previous = mtree.node_at((idx[0], idx[1] - 1)).leaves()[-1]
            if previous.value.lower()[-2:] == 'st':
                promote_subtitle()
        except IndexError:
            pass
    # 2- ", the" at the end of a series title should be prepended to it
    for node in mtree.nodes():
        if 'series' not in node.guess:
            continue
        node.guess['series'] = reorder_title(node.guess['series'])
--- a/libs/guessit/transfo/split_explicit_groups.py
+++ b/libs/guessit/transfo/split_explicit_groups.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,27 +18,32 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.plugins.transformers import Transformer
 from guessit.textutils import find_first_level_groups
 from guessit.patterns import group_delimiters
-import functools
+from functools import reduce
 import logging
 log = logging.getLogger(__name__)
-def process(mtree):
+class SplitExplicitGroups(Transformer):
-    """return the string split into explicit groups, that is, those either
+    def __init__(self):
        Transformer.__init__(self, 250)
    def process(self, mtree, options=None):
        """split each of those into explicit groups (separated by parentheses or square brackets)
        :return: return the string split into explicit groups, that is, those either
        between parenthese, square brackets or curly braces, and those separated
        by a dash."""
        for c in mtree.children:
            groups = find_first_level_groups(c.value, group_delimiters[0])
            for delimiters in group_delimiters:
                flatten = lambda l, x: l + find_first_level_groups(x, delimiters)
-            groups = functools.reduce(flatten, groups, [])
+                groups = reduce(flatten, groups, [])
            # do not do this at this moment, it is not strong enough and can break other
            # patterns, such as dates, etc...
-        #groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
+            # groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
            c.split_on_components(groups)
--- a/libs/guessit/transfo/split_on_dash.py
+++ b/libs/guessit/transfo/split_on_dash.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,24 +18,29 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.plugins.transformers import Transformer
 from guessit.patterns import sep
 import re
 import logging
 log = logging.getLogger(__name__)
-def process(mtree):
+class SplitOnDash(Transformer):
    def __init__(self):
        Transformer.__init__(self, 245)
    def process(self, mtree, options=None):
        """split into '-' separated subgroups (with required separator chars
        around the dash)
        """
        for node in mtree.unidentified_leaves():
            indices = []
        didx = 0
            pattern = re.compile(sep + '-' + sep)
            match = pattern.search(node.value)
            while match:
                span = match.span()
-            indices.extend([ span[0], span[1] ])
+                indices.extend([span[0], span[1]])
                match = pattern.search(node.value, span[1])
            if indices:
--- a/libs/guessit/transfo/split_path_components.py
+++ b/libs/guessit/transfo/split_path_components.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # GuessIt - A library for guessing information from filenames
-# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
+# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
 #
 # GuessIt is free software; you can redistribute it and/or modify it under
 # the terms of the Lesser GNU General Public License as published by
@ -18,19 +18,28 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 from guessit.plugins.transformers import Transformer
 from guessit import fileutils
-import os.path
+from os.path import splitext
 import logging
 log = logging.getLogger(__name__)
-def process(mtree):
+class SplitPathComponents(Transformer):
-    """Returns the filename split into [ dir*, basename, ext ]."""
+    def __init__(self):
        Transformer.__init__(self, 255)
    def process(self, mtree, options=None):
        """first split our path into dirs + basename + ext
        :return: the filename split into [ dir*, basename, ext ]
        """
        if not options.get('name_only'):
            components = fileutils.split_path(mtree.value)
            basename = components.pop(-1)
-    components += list(os.path.splitext(basename))
+            components += list(splitext(basename))
            components[-1] = components[-1][1:]  # remove the '.' from the extension
            mtree.split_on_components(components)
        else:
            mtree.split_on_components([mtree.value, ''])
--- a/libs/subliminal/providers/opensubtitles.py
+++ b/libs/subliminal/providers/opensubtitles.py
@ -99,13 +99,15 @@ class OpenSubtitlesProvider(Provider):
    def no_operation(self):
        checked(self.server.NoOperation(self.token))
-    def query(self, languages, hash=None, size=None, imdb_id=None, query=None):  # @ReservedAssignment
+    def query(self, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None):  # @ReservedAssignment
        searches = []
        if hash and size:
            searches.append({'moviehash': hash, 'moviebytesize': str(size)})
        if imdb_id:
            searches.append({'imdbid': imdb_id})
-        if query:
+        if query and season and episode:
            searches.append({'query': query, 'season': season, 'episode': episode})
        elif query:
            searches.append({'query': query})
        if not searches:
            raise ValueError('One or more parameter missing')
@ -126,10 +128,16 @@ class OpenSubtitlesProvider(Provider):
    def list_subtitles(self, video, languages):
        query = None
        season = None
        episode = None
        if ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id:
            query = video.name.split(os.sep)[-1]
        if isinstance(video, Episode):
            query = video.series
            season = video.season
            episode = video.episode
        return self.query(languages, hash=video.hashes.get('opensubtitles'), size=video.size, imdb_id=video.imdb_id,
-                          query=query)
+                          query=query, season=season, episode=episode)
    def download_subtitle(self, subtitle):
        response = checked(self.server.DownloadSubtitles(self.token, [subtitle.id]))
--- a/libs/subliminal/subtitle.py
+++ b/libs/subliminal/subtitle.py
@ -51,7 +51,14 @@ class Subtitle(object):
            encodings.append('windows-1255')
        elif self.language.alpha3 == 'tur':
            encodings.extend(['iso-8859-9', 'windows-1254'])
        elif self.language.alpha3 == 'pol':
            # Eastern European Group 1
            encodings.extend(['windows-1250'])
        elif self.language.alpha3 == 'bul':
            # Eastern European Group 2
            encodings.extend(['windows-1251'])
        else:
            # Western European (windows-1252)
            encodings.append('latin-1')
        # try to decode
		`@ -0,0 +1 @@`
							`Just a dummy srt file (used for unittests: do not remove!)`