mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-08-19 12:59:36 -07:00
update guessit and subliminal libs. Fixes #678
This commit is contained in:
parent
ff50e5144c
commit
f716323b76
72 changed files with 9350 additions and 3032 deletions
|
|
@ -1,249 +0,0 @@
|
||||||
Afghanistan|AF|AFG|004|ISO 3166-2:AF
|
|
||||||
Åland Islands|AX|ALA|248|ISO 3166-2:AX
|
|
||||||
Albania|AL|ALB|008|ISO 3166-2:AL
|
|
||||||
Algeria|DZ|DZA|012|ISO 3166-2:DZ
|
|
||||||
American Samoa|AS|ASM|016|ISO 3166-2:AS
|
|
||||||
Andorra|AD|AND|020|ISO 3166-2:AD
|
|
||||||
Angola|AO|AGO|024|ISO 3166-2:AO
|
|
||||||
Anguilla|AI|AIA|660|ISO 3166-2:AI
|
|
||||||
Antarctica|AQ|ATA|010|ISO 3166-2:AQ
|
|
||||||
Antigua and Barbuda|AG|ATG|028|ISO 3166-2:AG
|
|
||||||
Argentina|AR|ARG|032|ISO 3166-2:AR
|
|
||||||
Armenia|AM|ARM|051|ISO 3166-2:AM
|
|
||||||
Aruba|AW|ABW|533|ISO 3166-2:AW
|
|
||||||
Australia|AU|AUS|036|ISO 3166-2:AU
|
|
||||||
Austria|AT|AUT|040|ISO 3166-2:AT
|
|
||||||
Azerbaijan|AZ|AZE|031|ISO 3166-2:AZ
|
|
||||||
Bahamas|BS|BHS|044|ISO 3166-2:BS
|
|
||||||
Bahrain|BH|BHR|048|ISO 3166-2:BH
|
|
||||||
Bangladesh|BD|BGD|050|ISO 3166-2:BD
|
|
||||||
Barbados|BB|BRB|052|ISO 3166-2:BB
|
|
||||||
Belarus|BY|BLR|112|ISO 3166-2:BY
|
|
||||||
Belgium|BE|BEL|056|ISO 3166-2:BE
|
|
||||||
Belize|BZ|BLZ|084|ISO 3166-2:BZ
|
|
||||||
Benin|BJ|BEN|204|ISO 3166-2:BJ
|
|
||||||
Bermuda|BM|BMU|060|ISO 3166-2:BM
|
|
||||||
Bhutan|BT|BTN|064|ISO 3166-2:BT
|
|
||||||
Bolivia, Plurinational State of|BO|BOL|068|ISO 3166-2:BO
|
|
||||||
Bonaire, Sint Eustatius and Saba|BQ|BES|535|ISO 3166-2:BQ
|
|
||||||
Bosnia and Herzegovina|BA|BIH|070|ISO 3166-2:BA
|
|
||||||
Botswana|BW|BWA|072|ISO 3166-2:BW
|
|
||||||
Bouvet Island|BV|BVT|074|ISO 3166-2:BV
|
|
||||||
Brazil|BR|BRA|076|ISO 3166-2:BR
|
|
||||||
British Indian Ocean Territory|IO|IOT|086|ISO 3166-2:IO
|
|
||||||
Brunei Darussalam|BN|BRN|096|ISO 3166-2:BN
|
|
||||||
Bulgaria|BG|BGR|100|ISO 3166-2:BG
|
|
||||||
Burkina Faso|BF|BFA|854|ISO 3166-2:BF
|
|
||||||
Burundi|BI|BDI|108|ISO 3166-2:BI
|
|
||||||
Cambodia|KH|KHM|116|ISO 3166-2:KH
|
|
||||||
Cameroon|CM|CMR|120|ISO 3166-2:CM
|
|
||||||
Canada|CA|CAN|124|ISO 3166-2:CA
|
|
||||||
Cape Verde|CV|CPV|132|ISO 3166-2:CV
|
|
||||||
Cayman Islands|KY|CYM|136|ISO 3166-2:KY
|
|
||||||
Central African Republic|CF|CAF|140|ISO 3166-2:CF
|
|
||||||
Chad|TD|TCD|148|ISO 3166-2:TD
|
|
||||||
Chile|CL|CHL|152|ISO 3166-2:CL
|
|
||||||
China|CN|CHN|156|ISO 3166-2:CN
|
|
||||||
Christmas Island|CX|CXR|162|ISO 3166-2:CX
|
|
||||||
Cocos (Keeling) Islands|CC|CCK|166|ISO 3166-2:CC
|
|
||||||
Colombia|CO|COL|170|ISO 3166-2:CO
|
|
||||||
Comoros|KM|COM|174|ISO 3166-2:KM
|
|
||||||
Congo|CG|COG|178|ISO 3166-2:CG
|
|
||||||
Congo, the Democratic Republic of the|CD|COD|180|ISO 3166-2:CD
|
|
||||||
Cook Islands|CK|COK|184|ISO 3166-2:CK
|
|
||||||
Costa Rica|CR|CRI|188|ISO 3166-2:CR
|
|
||||||
Côte d'Ivoire|CI|CIV|384|ISO 3166-2:CI
|
|
||||||
Croatia|HR|HRV|191|ISO 3166-2:HR
|
|
||||||
Cuba|CU|CUB|192|ISO 3166-2:CU
|
|
||||||
Curaçao|CW|CUW|531|ISO 3166-2:CW
|
|
||||||
Cyprus|CY|CYP|196|ISO 3166-2:CY
|
|
||||||
Czech Republic|CZ|CZE|203|ISO 3166-2:CZ
|
|
||||||
Denmark|DK|DNK|208|ISO 3166-2:DK
|
|
||||||
Djibouti|DJ|DJI|262|ISO 3166-2:DJ
|
|
||||||
Dominica|DM|DMA|212|ISO 3166-2:DM
|
|
||||||
Dominican Republic|DO|DOM|214|ISO 3166-2:DO
|
|
||||||
Ecuador|EC|ECU|218|ISO 3166-2:EC
|
|
||||||
Egypt|EG|EGY|818|ISO 3166-2:EG
|
|
||||||
El Salvador|SV|SLV|222|ISO 3166-2:SV
|
|
||||||
Equatorial Guinea|GQ|GNQ|226|ISO 3166-2:GQ
|
|
||||||
Eritrea|ER|ERI|232|ISO 3166-2:ER
|
|
||||||
Estonia|EE|EST|233|ISO 3166-2:EE
|
|
||||||
Ethiopia|ET|ETH|231|ISO 3166-2:ET
|
|
||||||
Falkland Islands (Malvinas|FK|FLK|238|ISO 3166-2:FK
|
|
||||||
Faroe Islands|FO|FRO|234|ISO 3166-2:FO
|
|
||||||
Fiji|FJ|FJI|242|ISO 3166-2:FJ
|
|
||||||
Finland|FI|FIN|246|ISO 3166-2:FI
|
|
||||||
France|FR|FRA|250|ISO 3166-2:FR
|
|
||||||
French Guiana|GF|GUF|254|ISO 3166-2:GF
|
|
||||||
French Polynesia|PF|PYF|258|ISO 3166-2:PF
|
|
||||||
French Southern Territories|TF|ATF|260|ISO 3166-2:TF
|
|
||||||
Gabon|GA|GAB|266|ISO 3166-2:GA
|
|
||||||
Gambia|GM|GMB|270|ISO 3166-2:GM
|
|
||||||
Georgia|GE|GEO|268|ISO 3166-2:GE
|
|
||||||
Germany|DE|DEU|276|ISO 3166-2:DE
|
|
||||||
Ghana|GH|GHA|288|ISO 3166-2:GH
|
|
||||||
Gibraltar|GI|GIB|292|ISO 3166-2:GI
|
|
||||||
Greece|GR|GRC|300|ISO 3166-2:GR
|
|
||||||
Greenland|GL|GRL|304|ISO 3166-2:GL
|
|
||||||
Grenada|GD|GRD|308|ISO 3166-2:GD
|
|
||||||
Guadeloupe|GP|GLP|312|ISO 3166-2:GP
|
|
||||||
Guam|GU|GUM|316|ISO 3166-2:GU
|
|
||||||
Guatemala|GT|GTM|320|ISO 3166-2:GT
|
|
||||||
Guernsey|GG|GGY|831|ISO 3166-2:GG
|
|
||||||
Guinea|GN|GIN|324|ISO 3166-2:GN
|
|
||||||
Guinea-Bissau|GW|GNB|624|ISO 3166-2:GW
|
|
||||||
Guyana|GY|GUY|328|ISO 3166-2:GY
|
|
||||||
Haiti|HT|HTI|332|ISO 3166-2:HT
|
|
||||||
Heard Island and McDonald Islands|HM|HMD|334|ISO 3166-2:HM
|
|
||||||
Holy See (Vatican City State|VA|VAT|336|ISO 3166-2:VA
|
|
||||||
Honduras|HN|HND|340|ISO 3166-2:HN
|
|
||||||
Hong Kong|HK|HKG|344|ISO 3166-2:HK
|
|
||||||
Hungary|HU|HUN|348|ISO 3166-2:HU
|
|
||||||
Iceland|IS|ISL|352|ISO 3166-2:IS
|
|
||||||
India|IN|IND|356|ISO 3166-2:IN
|
|
||||||
Indonesia|ID|IDN|360|ISO 3166-2:ID
|
|
||||||
Iran, Islamic Republic of|IR|IRN|364|ISO 3166-2:IR
|
|
||||||
Iraq|IQ|IRQ|368|ISO 3166-2:IQ
|
|
||||||
Ireland|IE|IRL|372|ISO 3166-2:IE
|
|
||||||
Isle of Man|IM|IMN|833|ISO 3166-2:IM
|
|
||||||
Israel|IL|ISR|376|ISO 3166-2:IL
|
|
||||||
Italy|IT|ITA|380|ISO 3166-2:IT
|
|
||||||
Jamaica|JM|JAM|388|ISO 3166-2:JM
|
|
||||||
Japan|JP|JPN|392|ISO 3166-2:JP
|
|
||||||
Jersey|JE|JEY|832|ISO 3166-2:JE
|
|
||||||
Jordan|JO|JOR|400|ISO 3166-2:JO
|
|
||||||
Kazakhstan|KZ|KAZ|398|ISO 3166-2:KZ
|
|
||||||
Kenya|KE|KEN|404|ISO 3166-2:KE
|
|
||||||
Kiribati|KI|KIR|296|ISO 3166-2:KI
|
|
||||||
Korea, Democratic People's Republic of|KP|PRK|408|ISO 3166-2:KP
|
|
||||||
Korea, Republic of|KR|KOR|410|ISO 3166-2:KR
|
|
||||||
Kuwait|KW|KWT|414|ISO 3166-2:KW
|
|
||||||
Kyrgyzstan|KG|KGZ|417|ISO 3166-2:KG
|
|
||||||
Lao People's Democratic Republic|LA|LAO|418|ISO 3166-2:LA
|
|
||||||
Latvia|LV|LVA|428|ISO 3166-2:LV
|
|
||||||
Lebanon|LB|LBN|422|ISO 3166-2:LB
|
|
||||||
Lesotho|LS|LSO|426|ISO 3166-2:LS
|
|
||||||
Liberia|LR|LBR|430|ISO 3166-2:LR
|
|
||||||
Libya|LY|LBY|434|ISO 3166-2:LY
|
|
||||||
Liechtenstein|LI|LIE|438|ISO 3166-2:LI
|
|
||||||
Lithuania|LT|LTU|440|ISO 3166-2:LT
|
|
||||||
Luxembourg|LU|LUX|442|ISO 3166-2:LU
|
|
||||||
Macao|MO|MAC|446|ISO 3166-2:MO
|
|
||||||
Macedonia, the former Yugoslav Republic of|MK|MKD|807|ISO 3166-2:MK
|
|
||||||
Madagascar|MG|MDG|450|ISO 3166-2:MG
|
|
||||||
Malawi|MW|MWI|454|ISO 3166-2:MW
|
|
||||||
Malaysia|MY|MYS|458|ISO 3166-2:MY
|
|
||||||
Maldives|MV|MDV|462|ISO 3166-2:MV
|
|
||||||
Mali|ML|MLI|466|ISO 3166-2:ML
|
|
||||||
Malta|MT|MLT|470|ISO 3166-2:MT
|
|
||||||
Marshall Islands|MH|MHL|584|ISO 3166-2:MH
|
|
||||||
Martinique|MQ|MTQ|474|ISO 3166-2:MQ
|
|
||||||
Mauritania|MR|MRT|478|ISO 3166-2:MR
|
|
||||||
Mauritius|MU|MUS|480|ISO 3166-2:MU
|
|
||||||
Mayotte|YT|MYT|175|ISO 3166-2:YT
|
|
||||||
Mexico|MX|MEX|484|ISO 3166-2:MX
|
|
||||||
Micronesia, Federated States of|FM|FSM|583|ISO 3166-2:FM
|
|
||||||
Moldova, Republic of|MD|MDA|498|ISO 3166-2:MD
|
|
||||||
Monaco|MC|MCO|492|ISO 3166-2:MC
|
|
||||||
Mongolia|MN|MNG|496|ISO 3166-2:MN
|
|
||||||
Montenegro|ME|MNE|499|ISO 3166-2:ME
|
|
||||||
Montserrat|MS|MSR|500|ISO 3166-2:MS
|
|
||||||
Morocco|MA|MAR|504|ISO 3166-2:MA
|
|
||||||
Mozambique|MZ|MOZ|508|ISO 3166-2:MZ
|
|
||||||
Myanmar|MM|MMR|104|ISO 3166-2:MM
|
|
||||||
Namibia|NA|NAM|516|ISO 3166-2:NA
|
|
||||||
Nauru|NR|NRU|520|ISO 3166-2:NR
|
|
||||||
Nepal|NP|NPL|524|ISO 3166-2:NP
|
|
||||||
Netherlands|NL|NLD|528|ISO 3166-2:NL
|
|
||||||
New Caledonia|NC|NCL|540|ISO 3166-2:NC
|
|
||||||
New Zealand|NZ|NZL|554|ISO 3166-2:NZ
|
|
||||||
Nicaragua|NI|NIC|558|ISO 3166-2:NI
|
|
||||||
Niger|NE|NER|562|ISO 3166-2:NE
|
|
||||||
Nigeria|NG|NGA|566|ISO 3166-2:NG
|
|
||||||
Niue|NU|NIU|570|ISO 3166-2:NU
|
|
||||||
Norfolk Island|NF|NFK|574|ISO 3166-2:NF
|
|
||||||
Northern Mariana Islands|MP|MNP|580|ISO 3166-2:MP
|
|
||||||
Norway|NO|NOR|578|ISO 3166-2:NO
|
|
||||||
Oman|OM|OMN|512|ISO 3166-2:OM
|
|
||||||
Pakistan|PK|PAK|586|ISO 3166-2:PK
|
|
||||||
Palau|PW|PLW|585|ISO 3166-2:PW
|
|
||||||
Palestinian Territory, Occupied|PS|PSE|275|ISO 3166-2:PS
|
|
||||||
Panama|PA|PAN|591|ISO 3166-2:PA
|
|
||||||
Papua New Guinea|PG|PNG|598|ISO 3166-2:PG
|
|
||||||
Paraguay|PY|PRY|600|ISO 3166-2:PY
|
|
||||||
Peru|PE|PER|604|ISO 3166-2:PE
|
|
||||||
Philippines|PH|PHL|608|ISO 3166-2:PH
|
|
||||||
Pitcairn|PN|PCN|612|ISO 3166-2:PN
|
|
||||||
Poland|PL|POL|616|ISO 3166-2:PL
|
|
||||||
Portugal|PT|PRT|620|ISO 3166-2:PT
|
|
||||||
Puerto Rico|PR|PRI|630|ISO 3166-2:PR
|
|
||||||
Qatar|QA|QAT|634|ISO 3166-2:QA
|
|
||||||
Réunion|RE|REU|638|ISO 3166-2:RE
|
|
||||||
Romania|RO|ROU|642|ISO 3166-2:RO
|
|
||||||
Russian Federation|RU|RUS|643|ISO 3166-2:RU
|
|
||||||
Rwanda|RW|RWA|646|ISO 3166-2:RW
|
|
||||||
Saint Barthélemy|BL|BLM|652|ISO 3166-2:BL
|
|
||||||
Saint Helena, Ascension and Tristan da Cunha|SH|SHN|654|ISO 3166-2:SH
|
|
||||||
Saint Kitts and Nevis|KN|KNA|659|ISO 3166-2:KN
|
|
||||||
Saint Lucia|LC|LCA|662|ISO 3166-2:LC
|
|
||||||
Saint Martin (French part|MF|MAF|663|ISO 3166-2:MF
|
|
||||||
Saint Pierre and Miquelon|PM|SPM|666|ISO 3166-2:PM
|
|
||||||
Saint Vincent and the Grenadines|VC|VCT|670|ISO 3166-2:VC
|
|
||||||
Samoa|WS|WSM|882|ISO 3166-2:WS
|
|
||||||
San Marino|SM|SMR|674|ISO 3166-2:SM
|
|
||||||
Sao Tome and Principe|ST|STP|678|ISO 3166-2:ST
|
|
||||||
Saudi Arabia|SA|SAU|682|ISO 3166-2:SA
|
|
||||||
Senegal|SN|SEN|686|ISO 3166-2:SN
|
|
||||||
Serbia|RS|SRB|688|ISO 3166-2:RS
|
|
||||||
Seychelles|SC|SYC|690|ISO 3166-2:SC
|
|
||||||
Sierra Leone|SL|SLE|694|ISO 3166-2:SL
|
|
||||||
Singapore|SG|SGP|702|ISO 3166-2:SG
|
|
||||||
Sint Maarten (Dutch part|SX|SXM|534|ISO 3166-2:SX
|
|
||||||
Slovakia|SK|SVK|703|ISO 3166-2:SK
|
|
||||||
Slovenia|SI|SVN|705|ISO 3166-2:SI
|
|
||||||
Solomon Islands|SB|SLB|090|ISO 3166-2:SB
|
|
||||||
Somalia|SO|SOM|706|ISO 3166-2:SO
|
|
||||||
South Africa|ZA|ZAF|710|ISO 3166-2:ZA
|
|
||||||
South Georgia and the South Sandwich Islands|GS|SGS|239|ISO 3166-2:GS
|
|
||||||
South Sudan|SS|SSD|728|ISO 3166-2:SS
|
|
||||||
Spain|ES|ESP|724|ISO 3166-2:ES
|
|
||||||
Sri Lanka|LK|LKA|144|ISO 3166-2:LK
|
|
||||||
Sudan|SD|SDN|729|ISO 3166-2:SD
|
|
||||||
Suriname|SR|SUR|740|ISO 3166-2:SR
|
|
||||||
Svalbard and Jan Mayen|SJ|SJM|744|ISO 3166-2:SJ
|
|
||||||
Swaziland|SZ|SWZ|748|ISO 3166-2:SZ
|
|
||||||
Sweden|SE|SWE|752|ISO 3166-2:SE
|
|
||||||
Switzerland|CH|CHE|756|ISO 3166-2:CH
|
|
||||||
Syrian Arab Republic|SY|SYR|760|ISO 3166-2:SY
|
|
||||||
Taiwan, Province of China|TW|TWN|158|ISO 3166-2:TW
|
|
||||||
Tajikistan|TJ|TJK|762|ISO 3166-2:TJ
|
|
||||||
Tanzania, United Republic of|TZ|TZA|834|ISO 3166-2:TZ
|
|
||||||
Thailand|TH|THA|764|ISO 3166-2:TH
|
|
||||||
Timor-Leste|TL|TLS|626|ISO 3166-2:TL
|
|
||||||
Togo|TG|TGO|768|ISO 3166-2:TG
|
|
||||||
Tokelau|TK|TKL|772|ISO 3166-2:TK
|
|
||||||
Tonga|TO|TON|776|ISO 3166-2:TO
|
|
||||||
Trinidad and Tobago|TT|TTO|780|ISO 3166-2:TT
|
|
||||||
Tunisia|TN|TUN|788|ISO 3166-2:TN
|
|
||||||
Turkey|TR|TUR|792|ISO 3166-2:TR
|
|
||||||
Turkmenistan|TM|TKM|795|ISO 3166-2:TM
|
|
||||||
Turks and Caicos Islands|TC|TCA|796|ISO 3166-2:TC
|
|
||||||
Tuvalu|TV|TUV|798|ISO 3166-2:TV
|
|
||||||
Uganda|UG|UGA|800|ISO 3166-2:UG
|
|
||||||
Ukraine|UA|UKR|804|ISO 3166-2:UA
|
|
||||||
United Arab Emirates|AE|ARE|784|ISO 3166-2:AE
|
|
||||||
United Kingdom|GB|GBR|826|ISO 3166-2:GB
|
|
||||||
United States|US|USA|840|ISO 3166-2:US
|
|
||||||
United States Minor Outlying Islands|UM|UMI|581|ISO 3166-2:UM
|
|
||||||
Uruguay|UY|URY|858|ISO 3166-2:UY
|
|
||||||
Uzbekistan|UZ|UZB|860|ISO 3166-2:UZ
|
|
||||||
Vanuatu|VU|VUT|548|ISO 3166-2:VU
|
|
||||||
Venezuela, Bolivarian Republic of|VE|VEN|862|ISO 3166-2:VE
|
|
||||||
Viet Nam|VN|VNM|704|ISO 3166-2:VN
|
|
||||||
Virgin Islands, British|VG|VGB|092|ISO 3166-2:VG
|
|
||||||
Virgin Islands, U.S|VI|VIR|850|ISO 3166-2:VI
|
|
||||||
Wallis and Futuna|WF|WLF|876|ISO 3166-2:WF
|
|
||||||
Western Sahara|EH|ESH|732|ISO 3166-2:EH
|
|
||||||
Yemen|YE|YEM|887|ISO 3166-2:YE
|
|
||||||
Zambia|ZM|ZMB|894|ISO 3166-2:ZM
|
|
||||||
Zimbabwe|ZW|ZWE|716|ISO 3166-2:ZW
|
|
||||||
|
|
@ -1,485 +0,0 @@
|
||||||
aar||aa|Afar|afar
|
|
||||||
abk||ab|Abkhazian|abkhaze
|
|
||||||
ace|||Achinese|aceh
|
|
||||||
ach|||Acoli|acoli
|
|
||||||
ada|||Adangme|adangme
|
|
||||||
ady|||Adyghe; Adygei|adyghé
|
|
||||||
afa|||Afro-Asiatic languages|afro-asiatiques, langues
|
|
||||||
afh|||Afrihili|afrihili
|
|
||||||
afr||af|Afrikaans|afrikaans
|
|
||||||
ain|||Ainu|aïnou
|
|
||||||
aka||ak|Akan|akan
|
|
||||||
akk|||Akkadian|akkadien
|
|
||||||
alb|sqi|sq|Albanian|albanais
|
|
||||||
ale|||Aleut|aléoute
|
|
||||||
alg|||Algonquian languages|algonquines, langues
|
|
||||||
alt|||Southern Altai|altai du Sud
|
|
||||||
amh||am|Amharic|amharique
|
|
||||||
ang|||English, Old (ca.450-1100)|anglo-saxon (ca.450-1100)
|
|
||||||
anp|||Angika|angika
|
|
||||||
apa|||Apache languages|apaches, langues
|
|
||||||
ara||ar|Arabic|arabe
|
|
||||||
arc|||Official Aramaic (700-300 BCE); Imperial Aramaic (700-300 BCE)|araméen d'empire (700-300 BCE)
|
|
||||||
arg||an|Aragonese|aragonais
|
|
||||||
arm|hye|hy|Armenian|arménien
|
|
||||||
arn|||Mapudungun; Mapuche|mapudungun; mapuche; mapuce
|
|
||||||
arp|||Arapaho|arapaho
|
|
||||||
art|||Artificial languages|artificielles, langues
|
|
||||||
arw|||Arawak|arawak
|
|
||||||
asm||as|Assamese|assamais
|
|
||||||
ast|||Asturian; Bable; Leonese; Asturleonese|asturien; bable; léonais; asturoléonais
|
|
||||||
ath|||Athapascan languages|athapascanes, langues
|
|
||||||
aus|||Australian languages|australiennes, langues
|
|
||||||
ava||av|Avaric|avar
|
|
||||||
ave||ae|Avestan|avestique
|
|
||||||
awa|||Awadhi|awadhi
|
|
||||||
aym||ay|Aymara|aymara
|
|
||||||
aze||az|Azerbaijani|azéri
|
|
||||||
bad|||Banda languages|banda, langues
|
|
||||||
bai|||Bamileke languages|bamiléké, langues
|
|
||||||
bak||ba|Bashkir|bachkir
|
|
||||||
bal|||Baluchi|baloutchi
|
|
||||||
bam||bm|Bambara|bambara
|
|
||||||
ban|||Balinese|balinais
|
|
||||||
baq|eus|eu|Basque|basque
|
|
||||||
bas|||Basa|basa
|
|
||||||
bat|||Baltic languages|baltes, langues
|
|
||||||
bej|||Beja; Bedawiyet|bedja
|
|
||||||
bel||be|Belarusian|biélorusse
|
|
||||||
bem|||Bemba|bemba
|
|
||||||
ben||bn|Bengali|bengali
|
|
||||||
ber|||Berber languages|berbères, langues
|
|
||||||
bho|||Bhojpuri|bhojpuri
|
|
||||||
bih||bh|Bihari languages|langues biharis
|
|
||||||
bik|||Bikol|bikol
|
|
||||||
bin|||Bini; Edo|bini; edo
|
|
||||||
bis||bi|Bislama|bichlamar
|
|
||||||
bla|||Siksika|blackfoot
|
|
||||||
bnt|||Bantu (Other)|bantoues, autres langues
|
|
||||||
bos||bs|Bosnian|bosniaque
|
|
||||||
bra|||Braj|braj
|
|
||||||
bre||br|Breton|breton
|
|
||||||
btk|||Batak languages|batak, langues
|
|
||||||
bua|||Buriat|bouriate
|
|
||||||
bug|||Buginese|bugi
|
|
||||||
bul||bg|Bulgarian|bulgare
|
|
||||||
bur|mya|my|Burmese|birman
|
|
||||||
byn|||Blin; Bilin|blin; bilen
|
|
||||||
cad|||Caddo|caddo
|
|
||||||
cai|||Central American Indian languages|amérindiennes de L'Amérique centrale, langues
|
|
||||||
car|||Galibi Carib|karib; galibi; carib
|
|
||||||
cat||ca|Catalan; Valencian|catalan; valencien
|
|
||||||
cau|||Caucasian languages|caucasiennes, langues
|
|
||||||
ceb|||Cebuano|cebuano
|
|
||||||
cel|||Celtic languages|celtiques, langues; celtes, langues
|
|
||||||
cha||ch|Chamorro|chamorro
|
|
||||||
chb|||Chibcha|chibcha
|
|
||||||
che||ce|Chechen|tchétchène
|
|
||||||
chg|||Chagatai|djaghataï
|
|
||||||
chi|zho|zh|Chinese|chinois
|
|
||||||
chk|||Chuukese|chuuk
|
|
||||||
chm|||Mari|mari
|
|
||||||
chn|||Chinook jargon|chinook, jargon
|
|
||||||
cho|||Choctaw|choctaw
|
|
||||||
chp|||Chipewyan; Dene Suline|chipewyan
|
|
||||||
chr|||Cherokee|cherokee
|
|
||||||
chu||cu|Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic|slavon d'église; vieux slave; slavon liturgique; vieux bulgare
|
|
||||||
chv||cv|Chuvash|tchouvache
|
|
||||||
chy|||Cheyenne|cheyenne
|
|
||||||
cmc|||Chamic languages|chames, langues
|
|
||||||
cop|||Coptic|copte
|
|
||||||
cor||kw|Cornish|cornique
|
|
||||||
cos||co|Corsican|corse
|
|
||||||
cpe|||Creoles and pidgins, English based|créoles et pidgins basés sur l'anglais
|
|
||||||
cpf|||Creoles and pidgins, French-based |créoles et pidgins basés sur le français
|
|
||||||
cpp|||Creoles and pidgins, Portuguese-based |créoles et pidgins basés sur le portugais
|
|
||||||
cre||cr|Cree|cree
|
|
||||||
crh|||Crimean Tatar; Crimean Turkish|tatar de Crimé
|
|
||||||
crp|||Creoles and pidgins |créoles et pidgins
|
|
||||||
csb|||Kashubian|kachoube
|
|
||||||
cus|||Cushitic languages|couchitiques, langues
|
|
||||||
cze|ces|cs|Czech|tchèque
|
|
||||||
dak|||Dakota|dakota
|
|
||||||
dan||da|Danish|danois
|
|
||||||
dar|||Dargwa|dargwa
|
|
||||||
day|||Land Dayak languages|dayak, langues
|
|
||||||
del|||Delaware|delaware
|
|
||||||
den|||Slave (Athapascan)|esclave (athapascan)
|
|
||||||
dgr|||Dogrib|dogrib
|
|
||||||
din|||Dinka|dinka
|
|
||||||
div||dv|Divehi; Dhivehi; Maldivian|maldivien
|
|
||||||
doi|||Dogri|dogri
|
|
||||||
dra|||Dravidian languages|dravidiennes, langues
|
|
||||||
dsb|||Lower Sorbian|bas-sorabe
|
|
||||||
dua|||Duala|douala
|
|
||||||
dum|||Dutch, Middle (ca.1050-1350)|néerlandais moyen (ca. 1050-1350)
|
|
||||||
dut|nld|nl|Dutch; Flemish|néerlandais; flamand
|
|
||||||
dyu|||Dyula|dioula
|
|
||||||
dzo||dz|Dzongkha|dzongkha
|
|
||||||
efi|||Efik|efik
|
|
||||||
egy|||Egyptian (Ancient)|égyptien
|
|
||||||
eka|||Ekajuk|ekajuk
|
|
||||||
elx|||Elamite|élamite
|
|
||||||
eng||en|English|anglais
|
|
||||||
enm|||English, Middle (1100-1500)|anglais moyen (1100-1500)
|
|
||||||
epo||eo|Esperanto|espéranto
|
|
||||||
est||et|Estonian|estonien
|
|
||||||
ewe||ee|Ewe|éwé
|
|
||||||
ewo|||Ewondo|éwondo
|
|
||||||
fan|||Fang|fang
|
|
||||||
fao||fo|Faroese|féroïen
|
|
||||||
fat|||Fanti|fanti
|
|
||||||
fij||fj|Fijian|fidjien
|
|
||||||
fil|||Filipino; Pilipino|filipino; pilipino
|
|
||||||
fin||fi|Finnish|finnois
|
|
||||||
fiu|||Finno-Ugrian languages|finno-ougriennes, langues
|
|
||||||
fon|||Fon|fon
|
|
||||||
fre|fra|fr|French|français
|
|
||||||
frm|||French, Middle (ca.1400-1600)|français moyen (1400-1600)
|
|
||||||
fro|||French, Old (842-ca.1400)|français ancien (842-ca.1400)
|
|
||||||
frr|||Northern Frisian|frison septentrional
|
|
||||||
frs|||Eastern Frisian|frison oriental
|
|
||||||
fry||fy|Western Frisian|frison occidental
|
|
||||||
ful||ff|Fulah|peul
|
|
||||||
fur|||Friulian|frioulan
|
|
||||||
gaa|||Ga|ga
|
|
||||||
gay|||Gayo|gayo
|
|
||||||
gba|||Gbaya|gbaya
|
|
||||||
gem|||Germanic languages|germaniques, langues
|
|
||||||
geo|kat|ka|Georgian|géorgien
|
|
||||||
ger|deu|de|German|allemand
|
|
||||||
gez|||Geez|guèze
|
|
||||||
gil|||Gilbertese|kiribati
|
|
||||||
gla||gd|Gaelic; Scottish Gaelic|gaélique; gaélique écossais
|
|
||||||
gle||ga|Irish|irlandais
|
|
||||||
glg||gl|Galician|galicien
|
|
||||||
glv||gv|Manx|manx; mannois
|
|
||||||
gmh|||German, Middle High (ca.1050-1500)|allemand, moyen haut (ca. 1050-1500)
|
|
||||||
goh|||German, Old High (ca.750-1050)|allemand, vieux haut (ca. 750-1050)
|
|
||||||
gon|||Gondi|gond
|
|
||||||
gor|||Gorontalo|gorontalo
|
|
||||||
got|||Gothic|gothique
|
|
||||||
grb|||Grebo|grebo
|
|
||||||
grc|||Greek, Ancient (to 1453)|grec ancien (jusqu'à 1453)
|
|
||||||
gre|ell|el|Greek, Modern (1453-)|grec moderne (après 1453)
|
|
||||||
grn||gn|Guarani|guarani
|
|
||||||
gsw|||Swiss German; Alemannic; Alsatian|suisse alémanique; alémanique; alsacien
|
|
||||||
guj||gu|Gujarati|goudjrati
|
|
||||||
gwi|||Gwich'in|gwich'in
|
|
||||||
hai|||Haida|haida
|
|
||||||
hat||ht|Haitian; Haitian Creole|haïtien; créole haïtien
|
|
||||||
hau||ha|Hausa|haoussa
|
|
||||||
haw|||Hawaiian|hawaïen
|
|
||||||
heb||he|Hebrew|hébreu
|
|
||||||
her||hz|Herero|herero
|
|
||||||
hil|||Hiligaynon|hiligaynon
|
|
||||||
him|||Himachali languages; Western Pahari languages|langues himachalis; langues paharis occidentales
|
|
||||||
hin||hi|Hindi|hindi
|
|
||||||
hit|||Hittite|hittite
|
|
||||||
hmn|||Hmong; Mong|hmong
|
|
||||||
hmo||ho|Hiri Motu|hiri motu
|
|
||||||
hrv||hr|Croatian|croate
|
|
||||||
hsb|||Upper Sorbian|haut-sorabe
|
|
||||||
hun||hu|Hungarian|hongrois
|
|
||||||
hup|||Hupa|hupa
|
|
||||||
iba|||Iban|iban
|
|
||||||
ibo||ig|Igbo|igbo
|
|
||||||
ice|isl|is|Icelandic|islandais
|
|
||||||
ido||io|Ido|ido
|
|
||||||
iii||ii|Sichuan Yi; Nuosu|yi de Sichuan
|
|
||||||
ijo|||Ijo languages|ijo, langues
|
|
||||||
iku||iu|Inuktitut|inuktitut
|
|
||||||
ile||ie|Interlingue; Occidental|interlingue
|
|
||||||
ilo|||Iloko|ilocano
|
|
||||||
ina||ia|Interlingua (International Auxiliary Language Association)|interlingua (langue auxiliaire internationale)
|
|
||||||
inc|||Indic languages|indo-aryennes, langues
|
|
||||||
ind||id|Indonesian|indonésien
|
|
||||||
ine|||Indo-European languages|indo-européennes, langues
|
|
||||||
inh|||Ingush|ingouche
|
|
||||||
ipk||ik|Inupiaq|inupiaq
|
|
||||||
ira|||Iranian languages|iraniennes, langues
|
|
||||||
iro|||Iroquoian languages|iroquoises, langues
|
|
||||||
ita||it|Italian|italien
|
|
||||||
jav||jv|Javanese|javanais
|
|
||||||
jbo|||Lojban|lojban
|
|
||||||
jpn||ja|Japanese|japonais
|
|
||||||
jpr|||Judeo-Persian|judéo-persan
|
|
||||||
jrb|||Judeo-Arabic|judéo-arabe
|
|
||||||
kaa|||Kara-Kalpak|karakalpak
|
|
||||||
kab|||Kabyle|kabyle
|
|
||||||
kac|||Kachin; Jingpho|kachin; jingpho
|
|
||||||
kal||kl|Kalaallisut; Greenlandic|groenlandais
|
|
||||||
kam|||Kamba|kamba
|
|
||||||
kan||kn|Kannada|kannada
|
|
||||||
kar|||Karen languages|karen, langues
|
|
||||||
kas||ks|Kashmiri|kashmiri
|
|
||||||
kau||kr|Kanuri|kanouri
|
|
||||||
kaw|||Kawi|kawi
|
|
||||||
kaz||kk|Kazakh|kazakh
|
|
||||||
kbd|||Kabardian|kabardien
|
|
||||||
kha|||Khasi|khasi
|
|
||||||
khi|||Khoisan languages|khoïsan, langues
|
|
||||||
khm||km|Central Khmer|khmer central
|
|
||||||
kho|||Khotanese; Sakan|khotanais; sakan
|
|
||||||
kik||ki|Kikuyu; Gikuyu|kikuyu
|
|
||||||
kin||rw|Kinyarwanda|rwanda
|
|
||||||
kir||ky|Kirghiz; Kyrgyz|kirghiz
|
|
||||||
kmb|||Kimbundu|kimbundu
|
|
||||||
kok|||Konkani|konkani
|
|
||||||
kom||kv|Komi|kom
|
|
||||||
kon||kg|Kongo|kongo
|
|
||||||
kor||ko|Korean|coréen
|
|
||||||
kos|||Kosraean|kosrae
|
|
||||||
kpe|||Kpelle|kpellé
|
|
||||||
krc|||Karachay-Balkar|karatchai balkar
|
|
||||||
krl|||Karelian|carélien
|
|
||||||
kro|||Kru languages|krou, langues
|
|
||||||
kru|||Kurukh|kurukh
|
|
||||||
kua||kj|Kuanyama; Kwanyama|kuanyama; kwanyama
|
|
||||||
kum|||Kumyk|koumyk
|
|
||||||
kur||ku|Kurdish|kurde
|
|
||||||
kut|||Kutenai|kutenai
|
|
||||||
lad|||Ladino|judéo-espagnol
|
|
||||||
lah|||Lahnda|lahnda
|
|
||||||
lam|||Lamba|lamba
|
|
||||||
lao||lo|Lao|lao
|
|
||||||
lat||la|Latin|latin
|
|
||||||
lav||lv|Latvian|letton
|
|
||||||
lez|||Lezghian|lezghien
|
|
||||||
lim||li|Limburgan; Limburger; Limburgish|limbourgeois
|
|
||||||
lin||ln|Lingala|lingala
|
|
||||||
lit||lt|Lithuanian|lituanien
|
|
||||||
lol|||Mongo|mongo
|
|
||||||
loz|||Lozi|lozi
|
|
||||||
ltz||lb|Luxembourgish; Letzeburgesch|luxembourgeois
|
|
||||||
lua|||Luba-Lulua|luba-lulua
|
|
||||||
lub||lu|Luba-Katanga|luba-katanga
|
|
||||||
lug||lg|Ganda|ganda
|
|
||||||
lui|||Luiseno|luiseno
|
|
||||||
lun|||Lunda|lunda
|
|
||||||
luo|||Luo (Kenya and Tanzania)|luo (Kenya et Tanzanie)
|
|
||||||
lus|||Lushai|lushai
|
|
||||||
mac|mkd|mk|Macedonian|macédonien
|
|
||||||
mad|||Madurese|madourais
|
|
||||||
mag|||Magahi|magahi
|
|
||||||
mah||mh|Marshallese|marshall
|
|
||||||
mai|||Maithili|maithili
|
|
||||||
mak|||Makasar|makassar
|
|
||||||
mal||ml|Malayalam|malayalam
|
|
||||||
man|||Mandingo|mandingue
|
|
||||||
mao|mri|mi|Maori|maori
|
|
||||||
map|||Austronesian languages|austronésiennes, langues
|
|
||||||
mar||mr|Marathi|marathe
|
|
||||||
mas|||Masai|massaï
|
|
||||||
may|msa|ms|Malay|malais
|
|
||||||
mdf|||Moksha|moksa
|
|
||||||
mdr|||Mandar|mandar
|
|
||||||
men|||Mende|mendé
|
|
||||||
mga|||Irish, Middle (900-1200)|irlandais moyen (900-1200)
|
|
||||||
mic|||Mi'kmaq; Micmac|mi'kmaq; micmac
|
|
||||||
min|||Minangkabau|minangkabau
|
|
||||||
mis|||Uncoded languages|langues non codées
|
|
||||||
mkh|||Mon-Khmer languages|môn-khmer, langues
|
|
||||||
mlg||mg|Malagasy|malgache
|
|
||||||
mlt||mt|Maltese|maltais
|
|
||||||
mnc|||Manchu|mandchou
|
|
||||||
mni|||Manipuri|manipuri
|
|
||||||
mno|||Manobo languages|manobo, langues
|
|
||||||
moh|||Mohawk|mohawk
|
|
||||||
mon||mn|Mongolian|mongol
|
|
||||||
mos|||Mossi|moré
|
|
||||||
mul|||Multiple languages|multilingue
|
|
||||||
mun|||Munda languages|mounda, langues
|
|
||||||
mus|||Creek|muskogee
|
|
||||||
mwl|||Mirandese|mirandais
|
|
||||||
mwr|||Marwari|marvari
|
|
||||||
myn|||Mayan languages|maya, langues
|
|
||||||
myv|||Erzya|erza
|
|
||||||
nah|||Nahuatl languages|nahuatl, langues
|
|
||||||
nai|||North American Indian languages|nord-amérindiennes, langues
|
|
||||||
nap|||Neapolitan|napolitain
|
|
||||||
nau||na|Nauru|nauruan
|
|
||||||
nav||nv|Navajo; Navaho|navaho
|
|
||||||
nbl||nr|Ndebele, South; South Ndebele|ndébélé du Sud
|
|
||||||
nde||nd|Ndebele, North; North Ndebele|ndébélé du Nord
|
|
||||||
ndo||ng|Ndonga|ndonga
|
|
||||||
nds|||Low German; Low Saxon; German, Low; Saxon, Low|bas allemand; bas saxon; allemand, bas; saxon, bas
|
|
||||||
nep||ne|Nepali|népalais
|
|
||||||
new|||Nepal Bhasa; Newari|nepal bhasa; newari
|
|
||||||
nia|||Nias|nias
|
|
||||||
nic|||Niger-Kordofanian languages|nigéro-kordofaniennes, langues
|
|
||||||
niu|||Niuean|niué
|
|
||||||
nno||nn|Norwegian Nynorsk; Nynorsk, Norwegian|norvégien nynorsk; nynorsk, norvégien
|
|
||||||
nob||nb|Bokmål, Norwegian; Norwegian Bokmål|norvégien bokmål
|
|
||||||
nog|||Nogai|nogaï; nogay
|
|
||||||
non|||Norse, Old|norrois, vieux
|
|
||||||
nor||no|Norwegian|norvégien
|
|
||||||
nqo|||N'Ko|n'ko
|
|
||||||
nso|||Pedi; Sepedi; Northern Sotho|pedi; sepedi; sotho du Nord
|
|
||||||
nub|||Nubian languages|nubiennes, langues
|
|
||||||
nwc|||Classical Newari; Old Newari; Classical Nepal Bhasa|newari classique
|
|
||||||
nya||ny|Chichewa; Chewa; Nyanja|chichewa; chewa; nyanja
|
|
||||||
nym|||Nyamwezi|nyamwezi
|
|
||||||
nyn|||Nyankole|nyankolé
|
|
||||||
nyo|||Nyoro|nyoro
|
|
||||||
nzi|||Nzima|nzema
|
|
||||||
oci||oc|Occitan (post 1500); Provençal|occitan (après 1500); provençal
|
|
||||||
oji||oj|Ojibwa|ojibwa
|
|
||||||
ori||or|Oriya|oriya
|
|
||||||
orm||om|Oromo|galla
|
|
||||||
osa|||Osage|osage
|
|
||||||
oss||os|Ossetian; Ossetic|ossète
|
|
||||||
ota|||Turkish, Ottoman (1500-1928)|turc ottoman (1500-1928)
|
|
||||||
oto|||Otomian languages|otomi, langues
|
|
||||||
paa|||Papuan languages|papoues, langues
|
|
||||||
pag|||Pangasinan|pangasinan
|
|
||||||
pal|||Pahlavi|pahlavi
|
|
||||||
pam|||Pampanga; Kapampangan|pampangan
|
|
||||||
pan||pa|Panjabi; Punjabi|pendjabi
|
|
||||||
pap|||Papiamento|papiamento
|
|
||||||
pau|||Palauan|palau
|
|
||||||
peo|||Persian, Old (ca.600-400 B.C.)|perse, vieux (ca. 600-400 av. J.-C.)
|
|
||||||
per|fas|fa|Persian|persan
|
|
||||||
phi|||Philippine languages|philippines, langues
|
|
||||||
phn|||Phoenician|phénicien
|
|
||||||
pli||pi|Pali|pali
|
|
||||||
pol||pl|Polish|polonais
|
|
||||||
pon|||Pohnpeian|pohnpei
|
|
||||||
por||pt|Portuguese|portugais
|
|
||||||
pra|||Prakrit languages|prâkrit, langues
|
|
||||||
pro|||Provençal, Old (to 1500)|provençal ancien (jusqu'à 1500)
|
|
||||||
pus||ps|Pushto; Pashto|pachto
|
|
||||||
qaa-qtz|||Reserved for local use|réservée à l'usage local
|
|
||||||
que||qu|Quechua|quechua
|
|
||||||
raj|||Rajasthani|rajasthani
|
|
||||||
rap|||Rapanui|rapanui
|
|
||||||
rar|||Rarotongan; Cook Islands Maori|rarotonga; maori des îles Cook
|
|
||||||
roa|||Romance languages|romanes, langues
|
|
||||||
roh||rm|Romansh|romanche
|
|
||||||
rom|||Romany|tsigane
|
|
||||||
rum|ron|ro|Romanian; Moldavian; Moldovan|roumain; moldave
|
|
||||||
run||rn|Rundi|rundi
|
|
||||||
rup|||Aromanian; Arumanian; Macedo-Romanian|aroumain; macédo-roumain
|
|
||||||
rus||ru|Russian|russe
|
|
||||||
sad|||Sandawe|sandawe
|
|
||||||
sag||sg|Sango|sango
|
|
||||||
sah|||Yakut|iakoute
|
|
||||||
sai|||South American Indian (Other)|indiennes d'Amérique du Sud, autres langues
|
|
||||||
sal|||Salishan languages|salishennes, langues
|
|
||||||
sam|||Samaritan Aramaic|samaritain
|
|
||||||
san||sa|Sanskrit|sanskrit
|
|
||||||
sas|||Sasak|sasak
|
|
||||||
sat|||Santali|santal
|
|
||||||
scn|||Sicilian|sicilien
|
|
||||||
sco|||Scots|écossais
|
|
||||||
sel|||Selkup|selkoupe
|
|
||||||
sem|||Semitic languages|sémitiques, langues
|
|
||||||
sga|||Irish, Old (to 900)|irlandais ancien (jusqu'à 900)
|
|
||||||
sgn|||Sign Languages|langues des signes
|
|
||||||
shn|||Shan|chan
|
|
||||||
sid|||Sidamo|sidamo
|
|
||||||
sin||si|Sinhala; Sinhalese|singhalais
|
|
||||||
sio|||Siouan languages|sioux, langues
|
|
||||||
sit|||Sino-Tibetan languages|sino-tibétaines, langues
|
|
||||||
sla|||Slavic languages|slaves, langues
|
|
||||||
slo|slk|sk|Slovak|slovaque
|
|
||||||
slv||sl|Slovenian|slovène
|
|
||||||
sma|||Southern Sami|sami du Sud
|
|
||||||
sme||se|Northern Sami|sami du Nord
|
|
||||||
smi|||Sami languages|sames, langues
|
|
||||||
smj|||Lule Sami|sami de Lule
|
|
||||||
smn|||Inari Sami|sami d'Inari
|
|
||||||
smo||sm|Samoan|samoan
|
|
||||||
sms|||Skolt Sami|sami skolt
|
|
||||||
sna||sn|Shona|shona
|
|
||||||
snd||sd|Sindhi|sindhi
|
|
||||||
snk|||Soninke|soninké
|
|
||||||
sog|||Sogdian|sogdien
|
|
||||||
som||so|Somali|somali
|
|
||||||
son|||Songhai languages|songhai, langues
|
|
||||||
sot||st|Sotho, Southern|sotho du Sud
|
|
||||||
spa||es|Spanish; Castilian|espagnol; castillan
|
|
||||||
srd||sc|Sardinian|sarde
|
|
||||||
srn|||Sranan Tongo|sranan tongo
|
|
||||||
srp||sr|Serbian|serbe
|
|
||||||
srr|||Serer|sérère
|
|
||||||
ssa|||Nilo-Saharan languages|nilo-sahariennes, langues
|
|
||||||
ssw||ss|Swati|swati
|
|
||||||
suk|||Sukuma|sukuma
|
|
||||||
sun||su|Sundanese|soundanais
|
|
||||||
sus|||Susu|soussou
|
|
||||||
sux|||Sumerian|sumérien
|
|
||||||
swa||sw|Swahili|swahili
|
|
||||||
swe||sv|Swedish|suédois
|
|
||||||
syc|||Classical Syriac|syriaque classique
|
|
||||||
syr|||Syriac|syriaque
|
|
||||||
tah||ty|Tahitian|tahitien
|
|
||||||
tai|||Tai languages|tai, langues
|
|
||||||
tam||ta|Tamil|tamoul
|
|
||||||
tat||tt|Tatar|tatar
|
|
||||||
tel||te|Telugu|télougou
|
|
||||||
tem|||Timne|temne
|
|
||||||
ter|||Tereno|tereno
|
|
||||||
tet|||Tetum|tetum
|
|
||||||
tgk||tg|Tajik|tadjik
|
|
||||||
tgl||tl|Tagalog|tagalog
|
|
||||||
tha||th|Thai|thaï
|
|
||||||
tib|bod|bo|Tibetan|tibétain
|
|
||||||
tig|||Tigre|tigré
|
|
||||||
tir||ti|Tigrinya|tigrigna
|
|
||||||
tiv|||Tiv|tiv
|
|
||||||
tkl|||Tokelau|tokelau
|
|
||||||
tlh|||Klingon; tlhIngan-Hol|klingon
|
|
||||||
tli|||Tlingit|tlingit
|
|
||||||
tmh|||Tamashek|tamacheq
|
|
||||||
tog|||Tonga (Nyasa)|tonga (Nyasa)
|
|
||||||
ton||to|Tonga (Tonga Islands)|tongan (Îles Tonga)
|
|
||||||
tpi|||Tok Pisin|tok pisin
|
|
||||||
tsi|||Tsimshian|tsimshian
|
|
||||||
tsn||tn|Tswana|tswana
|
|
||||||
tso||ts|Tsonga|tsonga
|
|
||||||
tuk||tk|Turkmen|turkmène
|
|
||||||
tum|||Tumbuka|tumbuka
|
|
||||||
tup|||Tupi languages|tupi, langues
|
|
||||||
tur||tr|Turkish|turc
|
|
||||||
tut|||Altaic languages|altaïques, langues
|
|
||||||
tvl|||Tuvalu|tuvalu
|
|
||||||
twi||tw|Twi|twi
|
|
||||||
tyv|||Tuvinian|touva
|
|
||||||
udm|||Udmurt|oudmourte
|
|
||||||
uga|||Ugaritic|ougaritique
|
|
||||||
uig||ug|Uighur; Uyghur|ouïgour
|
|
||||||
ukr||uk|Ukrainian|ukrainien
|
|
||||||
umb|||Umbundu|umbundu
|
|
||||||
und|||Undetermined|indéterminée
|
|
||||||
urd||ur|Urdu|ourdou
|
|
||||||
uzb||uz|Uzbek|ouszbek
|
|
||||||
vai|||Vai|vaï
|
|
||||||
ven||ve|Venda|venda
|
|
||||||
vie||vi|Vietnamese|vietnamien
|
|
||||||
vol||vo|Volapük|volapük
|
|
||||||
vot|||Votic|vote
|
|
||||||
wak|||Wakashan languages|wakashanes, langues
|
|
||||||
wal|||Walamo|walamo
|
|
||||||
war|||Waray|waray
|
|
||||||
was|||Washo|washo
|
|
||||||
wel|cym|cy|Welsh|gallois
|
|
||||||
wen|||Sorbian languages|sorabes, langues
|
|
||||||
wln||wa|Walloon|wallon
|
|
||||||
wol||wo|Wolof|wolof
|
|
||||||
xal|||Kalmyk; Oirat|kalmouk; oïrat
|
|
||||||
xho||xh|Xhosa|xhosa
|
|
||||||
yao|||Yao|yao
|
|
||||||
yap|||Yapese|yapois
|
|
||||||
yid||yi|Yiddish|yiddish
|
|
||||||
yor||yo|Yoruba|yoruba
|
|
||||||
ypk|||Yupik languages|yupik, langues
|
|
||||||
zap|||Zapotec|zapotèque
|
|
||||||
zbl|||Blissymbols; Blissymbolics; Bliss|symboles Bliss; Bliss
|
|
||||||
zen|||Zenaga|zenaga
|
|
||||||
zha||za|Zhuang; Chuang|zhuang; chuang
|
|
||||||
znd|||Zande languages|zandé, langues
|
|
||||||
zul||zu|Zulu|zoulou
|
|
||||||
zun|||Zuni|zuni
|
|
||||||
zxx|||No linguistic content; Not applicable|pas de contenu linguistique; non applicable
|
|
||||||
zza|||Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki|zaza; dimili; dimli; kirdki; kirmanjki; zazaki
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,70 +18,86 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
import pkg_resources
|
||||||
|
from .__version__ import __version__
|
||||||
|
|
||||||
__version__ = '0.6.2'
|
|
||||||
__all__ = ['Guess', 'Language',
|
__all__ = ['Guess', 'Language',
|
||||||
'guess_file_info', 'guess_video_info',
|
'guess_file_info', 'guess_video_info',
|
||||||
'guess_movie_info', 'guess_episode_info']
|
'guess_movie_info', 'guess_episode_info',
|
||||||
|
'default_options']
|
||||||
|
|
||||||
|
|
||||||
# Do python3 detection before importing any other module, to be sure that
|
# Do python3 detection before importing any other module, to be sure that
|
||||||
# it will then always be available
|
# it will then always be available
|
||||||
# with code from http://lucumr.pocoo.org/2011/1/22/forwards-compatible-python/
|
# with code from http://lucumr.pocoo.org/2011/1/22/forwards-compatible-python/
|
||||||
import sys
|
import sys
|
||||||
if sys.version_info[0] >= 3:
|
if sys.version_info[0] >= 3: # pragma: no cover
|
||||||
PY3 = True
|
PY2, PY3 = False, True
|
||||||
unicode_text_type = str
|
unicode_text_type = str
|
||||||
native_text_type = str
|
native_text_type = str
|
||||||
base_text_type = str
|
base_text_type = str
|
||||||
|
|
||||||
def u(x):
|
def u(x):
|
||||||
return str(x)
|
return str(x)
|
||||||
|
|
||||||
def s(x):
|
def s(x):
|
||||||
return x
|
return x
|
||||||
|
|
||||||
class UnicodeMixin(object):
|
class UnicodeMixin(object):
|
||||||
__str__ = lambda x: x.__unicode__()
|
__str__ = lambda x: x.__unicode__()
|
||||||
import binascii
|
import binascii
|
||||||
|
|
||||||
def to_hex(x):
|
def to_hex(x):
|
||||||
return binascii.hexlify(x).decode('utf-8')
|
return binascii.hexlify(x).decode('utf-8')
|
||||||
|
|
||||||
else:
|
else: # pragma: no cover
|
||||||
PY3 = False
|
PY2, PY3 = True, False
|
||||||
__all__ = [ str(s) for s in __all__ ] # fix imports for python2
|
__all__ = [str(s) for s in __all__] # fix imports for python2
|
||||||
unicode_text_type = unicode
|
unicode_text_type = unicode
|
||||||
native_text_type = str
|
native_text_type = str
|
||||||
base_text_type = basestring
|
base_text_type = basestring
|
||||||
|
|
||||||
def u(x):
|
def u(x):
|
||||||
if isinstance(x, str):
|
if isinstance(x, str):
|
||||||
return x.decode('utf-8')
|
return x.decode('utf-8')
|
||||||
|
if isinstance(x, list):
|
||||||
|
return [u(s) for s in x]
|
||||||
return unicode(x)
|
return unicode(x)
|
||||||
|
|
||||||
def s(x):
|
def s(x):
|
||||||
if isinstance(x, unicode):
|
if isinstance(x, unicode):
|
||||||
return x.encode('utf-8')
|
return x.encode('utf-8')
|
||||||
if isinstance(x, list):
|
if isinstance(x, list):
|
||||||
return [ s(y) for y in x ]
|
return [s(y) for y in x]
|
||||||
if isinstance(x, tuple):
|
if isinstance(x, tuple):
|
||||||
return tuple(s(y) for y in x)
|
return tuple(s(y) for y in x)
|
||||||
if isinstance(x, dict):
|
if isinstance(x, dict):
|
||||||
return dict((s(key), s(value)) for key, value in x.items())
|
return dict((s(key), s(value)) for key, value in x.items())
|
||||||
return x
|
return x
|
||||||
|
|
||||||
class UnicodeMixin(object):
|
class UnicodeMixin(object):
|
||||||
__str__ = lambda x: unicode(x).encode('utf-8')
|
__str__ = lambda x: unicode(x).encode('utf-8')
|
||||||
|
|
||||||
def to_hex(x):
|
def to_hex(x):
|
||||||
return x.encode('hex')
|
return x.encode('hex')
|
||||||
|
|
||||||
|
range = xrange
|
||||||
|
|
||||||
from guessit.guess import Guess, merge_all
|
|
||||||
|
from guessit.guess import Guess, smart_merge
|
||||||
from guessit.language import Language
|
from guessit.language import Language
|
||||||
from guessit.matcher import IterativeMatcher
|
from guessit.matcher import IterativeMatcher
|
||||||
from guessit.textutils import clean_string
|
from guessit.textutils import clean_default, is_camel, from_camel
|
||||||
|
import babelfish
|
||||||
|
import os.path
|
||||||
import logging
|
import logging
|
||||||
import json
|
from copy import deepcopy
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class NullHandler(logging.Handler):
|
class NullHandler(logging.Handler):
|
||||||
def emit(self, record):
|
def emit(self, record):
|
||||||
pass
|
pass
|
||||||
|
|
@ -91,137 +107,193 @@ h = NullHandler()
|
||||||
log.addHandler(h)
|
log.addHandler(h)
|
||||||
|
|
||||||
|
|
||||||
def _guess_filename(filename, filetype):
|
def _guess_filename(filename, options=None, **kwargs):
|
||||||
def find_nodes(tree, props):
|
mtree = _build_filename_mtree(filename, options=options, **kwargs)
|
||||||
"""Yields all nodes containing any of the given props."""
|
if options.get('split_camel'):
|
||||||
if isinstance(props, base_text_type):
|
_add_camel_properties(mtree, options=options)
|
||||||
props = [props]
|
return mtree.matched()
|
||||||
for node in tree.nodes():
|
|
||||||
if any(prop in node.guess for prop in props):
|
|
||||||
yield node
|
|
||||||
|
|
||||||
def warning(title):
|
|
||||||
log.warning('%s, guesses: %s - %s' % (title, m.nice_string(), m2.nice_string()))
|
|
||||||
return m
|
|
||||||
|
|
||||||
mtree = IterativeMatcher(filename, filetype=filetype)
|
def _build_filename_mtree(filename, options=None, **kwargs):
|
||||||
|
mtree = IterativeMatcher(filename, options=options, **kwargs)
|
||||||
|
second_pass_options = mtree.second_pass_options
|
||||||
|
if second_pass_options:
|
||||||
|
log.debug("Running 2nd pass")
|
||||||
|
merged_options = dict(options)
|
||||||
|
merged_options.update(second_pass_options)
|
||||||
|
mtree = IterativeMatcher(filename, options=merged_options, **kwargs)
|
||||||
|
return mtree
|
||||||
|
|
||||||
m = mtree.matched()
|
|
||||||
|
|
||||||
second_pass_opts = []
|
def _add_camel_properties(mtree, options=None, **kwargs):
|
||||||
second_pass_transfo_opts = {}
|
prop = 'title' if mtree.matched().get('type') != 'episode' else 'series'
|
||||||
|
value = mtree.matched().get(prop)
|
||||||
|
_guess_camel_string(mtree, value, options=options, skip_title=False, **kwargs)
|
||||||
|
|
||||||
# if there are multiple possible years found, we assume the first one is
|
for leaf in mtree.match_tree.unidentified_leaves():
|
||||||
# part of the title, reparse the tree taking this into account
|
value = leaf.value
|
||||||
years = set(n.value for n in find_nodes(mtree.match_tree, 'year'))
|
_guess_camel_string(mtree, value, options=options, skip_title=True, **kwargs)
|
||||||
if len(years) >= 2:
|
|
||||||
second_pass_opts.append('skip_first_year')
|
|
||||||
|
|
||||||
to_skip_language_nodes = []
|
|
||||||
|
|
||||||
title_nodes = set(n for n in find_nodes(mtree.match_tree, ['title', 'series']))
|
def _guess_camel_string(mtree, string, options=None, skip_title=False, **kwargs):
|
||||||
title_spans = {}
|
if string and is_camel(string):
|
||||||
for title_node in title_nodes:
|
log.debug('"%s" is camel cased. Try to detect more properties.' % (string,))
|
||||||
title_spans[title_node.span[0]] = title_node
|
uncameled_value = from_camel(string)
|
||||||
title_spans[title_node.span[1]] = title_node
|
merged_options = dict(options)
|
||||||
|
if 'type' in mtree.match_tree.info:
|
||||||
|
current_type = mtree.match_tree.info.get('type')
|
||||||
|
if current_type and current_type != 'unknown':
|
||||||
|
merged_options['type'] = current_type
|
||||||
|
camel_tree = _build_filename_mtree(uncameled_value, options=merged_options, name_only=True, skip_title=skip_title, **kwargs)
|
||||||
|
if len(camel_tree.matched()) > 0:
|
||||||
|
mtree.matched().update(camel_tree.matched())
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
for lang_key in ('language', 'subtitleLanguage'):
|
|
||||||
langs = {}
|
|
||||||
lang_nodes = set(n for n in find_nodes(mtree.match_tree, lang_key))
|
|
||||||
|
|
||||||
for lang_node in lang_nodes:
|
def guess_video_metadata(filename):
|
||||||
lang = lang_node.guess.get(lang_key, None)
|
"""Gets the video metadata properties out of a given file. The file needs to
|
||||||
if len(lang_node.value) > 3 and (lang_node.span[0] in title_spans.keys() or lang_node.span[1] in title_spans.keys()):
|
exist on the filesystem to be able to be analyzed. An empty guess is
|
||||||
# Language is next or before title, and is not a language code. Add to skip for 2nd pass.
|
returned otherwise.
|
||||||
|
|
||||||
# if filetype is subtitle and the language appears last, just before
|
You need to have the Enzyme python package installed for this to work."""
|
||||||
# the extension, then it is likely a subtitle language
|
result = Guess()
|
||||||
parts = clean_string(lang_node.root.value).split()
|
|
||||||
if m['type'] in ['moviesubtitle', 'episodesubtitle'] and (parts.index(lang_node.value) == len(parts) - 2):
|
|
||||||
continue
|
|
||||||
|
|
||||||
to_skip_language_nodes.append(lang_node)
|
def found(prop, value):
|
||||||
elif not lang in langs:
|
result[prop] = value
|
||||||
langs[lang] = lang_node
|
log.debug('Found with enzyme %s: %s' % (prop, value))
|
||||||
|
|
||||||
|
# first get the size of the file, in bytes
|
||||||
|
try:
|
||||||
|
size = os.stat(filename).st_size
|
||||||
|
found('fileSize', size)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log.error('Cannot get video file size: %s' % e)
|
||||||
|
# file probably does not exist, we might as well return now
|
||||||
|
return result
|
||||||
|
|
||||||
|
# then get additional metadata from the file using enzyme, if available
|
||||||
|
try:
|
||||||
|
import enzyme
|
||||||
|
|
||||||
|
with open(filename) as f:
|
||||||
|
mkv = enzyme.MKV(f)
|
||||||
|
|
||||||
|
found('duration', mkv.info.duration.total_seconds())
|
||||||
|
|
||||||
|
if mkv.video_tracks:
|
||||||
|
video_track = mkv.video_tracks[0]
|
||||||
|
|
||||||
|
# resolution
|
||||||
|
if video_track.height in (480, 720, 1080):
|
||||||
|
if video_track.interlaced:
|
||||||
|
found('screenSize', '%di' % video_track.height)
|
||||||
else:
|
else:
|
||||||
# The same language was found. Keep the more confident one, and add others to skip for 2nd pass.
|
found('screenSize', '%dp' % video_track.height)
|
||||||
existing_lang_node = langs[lang]
|
|
||||||
to_skip = None
|
|
||||||
if existing_lang_node.guess.confidence('language') >= lang_node.guess.confidence('language'):
|
|
||||||
# lang_node is to remove
|
|
||||||
to_skip = lang_node
|
|
||||||
else:
|
else:
|
||||||
# existing_lang_node is to remove
|
# TODO: do we want this?
|
||||||
langs[lang] = lang_node
|
#found('screenSize', '%dx%d' % (video_track.width, video_track.height))
|
||||||
to_skip = existing_lang_node
|
pass
|
||||||
to_skip_language_nodes.append(to_skip)
|
|
||||||
|
# video codec
|
||||||
|
if video_track.codec_id == 'V_MPEG4/ISO/AVC':
|
||||||
|
found('videoCodec', 'h264')
|
||||||
|
elif video_track.codec_id == 'V_MPEG4/ISO/SP':
|
||||||
|
found('videoCodec', 'DivX')
|
||||||
|
elif video_track.codec_id == 'V_MPEG4/ISO/ASP':
|
||||||
|
found('videoCodec', 'XviD')
|
||||||
|
|
||||||
|
else:
|
||||||
|
log.warning('MKV has no video track')
|
||||||
|
|
||||||
|
if mkv.audio_tracks:
|
||||||
|
audio_track = mkv.audio_tracks[0]
|
||||||
|
# audio codec
|
||||||
|
if audio_track.codec_id == 'A_AC3':
|
||||||
|
found('audioCodec', 'AC3')
|
||||||
|
elif audio_track.codec_id == 'A_DTS':
|
||||||
|
found('audioCodec', 'DTS')
|
||||||
|
elif audio_track.codec_id == 'A_AAC':
|
||||||
|
found('audioCodec', 'AAC')
|
||||||
|
else:
|
||||||
|
log.warning('MKV has no audio track')
|
||||||
|
|
||||||
|
if mkv.subtitle_tracks:
|
||||||
|
embedded_subtitle_languages = set()
|
||||||
|
for st in mkv.subtitle_tracks:
|
||||||
|
try:
|
||||||
|
if st.language:
|
||||||
|
lang = babelfish.Language.fromalpha3b(st.language)
|
||||||
|
elif st.name:
|
||||||
|
lang = babelfish.Language.fromname(st.name)
|
||||||
|
else:
|
||||||
|
lang = babelfish.Language('und')
|
||||||
|
|
||||||
|
except babelfish.Error:
|
||||||
|
lang = babelfish.Language('und')
|
||||||
|
|
||||||
|
embedded_subtitle_languages.add(lang)
|
||||||
|
|
||||||
|
found('subtitleLanguage', embedded_subtitle_languages)
|
||||||
|
else:
|
||||||
|
log.debug('MKV has no subtitle track')
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
log.error('Cannot get video file metadata, missing dependency: enzyme')
|
||||||
|
log.error('Please install it from PyPI, by doing eg: pip install enzyme')
|
||||||
|
return result
|
||||||
|
|
||||||
|
except IOError as e:
|
||||||
|
log.error('Could not open file: %s' % filename)
|
||||||
|
log.error('Make sure it exists and is available for reading on the filesystem')
|
||||||
|
log.error('Error: %s' % e)
|
||||||
|
return result
|
||||||
|
|
||||||
|
except enzyme.Error as e:
|
||||||
|
log.error('Cannot guess video file metadata')
|
||||||
|
log.error('enzyme.Error while reading file: %s' % filename)
|
||||||
|
log.error('Error: %s' % e)
|
||||||
|
return result
|
||||||
|
|
||||||
|
default_options = {}
|
||||||
|
|
||||||
|
|
||||||
if to_skip_language_nodes:
|
def guess_file_info(filename, info=None, options=None, **kwargs):
|
||||||
second_pass_transfo_opts['guess_language'] = (
|
|
||||||
((), { 'skip': [ { 'node_idx': node.parent.node_idx,
|
|
||||||
'span': node.span }
|
|
||||||
for node in to_skip_language_nodes ] }))
|
|
||||||
|
|
||||||
if second_pass_opts or second_pass_transfo_opts:
|
|
||||||
# 2nd pass is needed
|
|
||||||
log.info("Running 2nd pass with options: %s" % second_pass_opts)
|
|
||||||
log.info("Transfo options: %s" % second_pass_transfo_opts)
|
|
||||||
mtree = IterativeMatcher(filename, filetype=filetype,
|
|
||||||
opts=second_pass_opts,
|
|
||||||
transfo_opts=second_pass_transfo_opts)
|
|
||||||
|
|
||||||
m = mtree.matched()
|
|
||||||
|
|
||||||
if 'language' not in m and 'subtitleLanguage' not in m or 'title' not in m:
|
|
||||||
return m
|
|
||||||
|
|
||||||
# if we found some language, make sure we didn't cut a title or sth...
|
|
||||||
mtree2 = IterativeMatcher(filename, filetype=filetype,
|
|
||||||
opts=['nolanguage', 'nocountry'])
|
|
||||||
m2 = mtree2.matched()
|
|
||||||
|
|
||||||
if m.get('title') != m2.get('title'):
|
|
||||||
title = next(find_nodes(mtree.match_tree, 'title'))
|
|
||||||
title2 = next(find_nodes(mtree2.match_tree, 'title'))
|
|
||||||
|
|
||||||
# if a node is in an explicit group, then the correct title is probably
|
|
||||||
# the other one
|
|
||||||
if title.root.node_at(title.node_idx[:2]).is_explicit():
|
|
||||||
return m2
|
|
||||||
elif title2.root.node_at(title2.node_idx[:2]).is_explicit():
|
|
||||||
return m
|
|
||||||
|
|
||||||
return m
|
|
||||||
|
|
||||||
|
|
||||||
def guess_file_info(filename, filetype='autodetect', info=None):
|
|
||||||
"""info can contain the names of the various plugins, such as 'filename' to
|
"""info can contain the names of the various plugins, such as 'filename' to
|
||||||
detect filename info, or 'hash_md5' to get the md5 hash of the file.
|
detect filename info, or 'hash_md5' to get the md5 hash of the file.
|
||||||
|
|
||||||
>>> guess_file_info('tests/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1'])
|
>>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
|
||||||
{'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'}
|
>>> g = guess_file_info(testfile, info = ['hash_md5', 'hash_sha1'])
|
||||||
|
>>> g['hash_md5'], g['hash_sha1']
|
||||||
|
('64de6b5893cac24456c46a935ef9c359', 'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
|
||||||
"""
|
"""
|
||||||
|
info = info or 'filename'
|
||||||
|
options = options or {}
|
||||||
|
if default_options:
|
||||||
|
merged_options = deepcopy(default_options)
|
||||||
|
merged_options.update(options)
|
||||||
|
options = merged_options
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
hashers = []
|
hashers = []
|
||||||
|
|
||||||
# Force unicode as soon as possible
|
# Force unicode as soon as possible
|
||||||
filename = u(filename)
|
filename = u(filename)
|
||||||
|
|
||||||
if info is None:
|
|
||||||
info = ['filename']
|
|
||||||
|
|
||||||
if isinstance(info, base_text_type):
|
if isinstance(info, base_text_type):
|
||||||
info = [info]
|
info = [info]
|
||||||
|
|
||||||
for infotype in info:
|
for infotype in info:
|
||||||
if infotype == 'filename':
|
if infotype == 'filename':
|
||||||
result.append(_guess_filename(filename, filetype))
|
result.append(_guess_filename(filename, options, **kwargs))
|
||||||
|
|
||||||
elif infotype == 'hash_mpc':
|
elif infotype == 'hash_mpc':
|
||||||
from guessit.hash_mpc import hash_file
|
from guessit.hash_mpc import hash_file
|
||||||
try:
|
try:
|
||||||
result.append(Guess({'hash_mpc': hash_file(filename)},
|
result.append(Guess({infotype: hash_file(filename)},
|
||||||
confidence=1.0))
|
confidence=1.0))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning('Could not compute MPC-style hash because: %s' % e)
|
log.warning('Could not compute MPC-style hash because: %s' % e)
|
||||||
|
|
@ -229,7 +301,7 @@ def guess_file_info(filename, filetype='autodetect', info=None):
|
||||||
elif infotype == 'hash_ed2k':
|
elif infotype == 'hash_ed2k':
|
||||||
from guessit.hash_ed2k import hash_file
|
from guessit.hash_ed2k import hash_file
|
||||||
try:
|
try:
|
||||||
result.append(Guess({'hash_ed2k': hash_file(filename)},
|
result.append(Guess({infotype: hash_file(filename)},
|
||||||
confidence=1.0))
|
confidence=1.0))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning('Could not compute ed2k hash because: %s' % e)
|
log.warning('Could not compute ed2k hash because: %s' % e)
|
||||||
|
|
@ -243,6 +315,11 @@ def guess_file_info(filename, filetype='autodetect', info=None):
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname)
|
log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname)
|
||||||
|
|
||||||
|
elif infotype == 'video':
|
||||||
|
g = guess_video_metadata(filename)
|
||||||
|
if g:
|
||||||
|
result.append(g)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.warning('Invalid infotype: %s' % infotype)
|
log.warning('Invalid infotype: %s' % infotype)
|
||||||
|
|
||||||
|
|
@ -265,25 +342,18 @@ def guess_file_info(filename, filetype='autodetect', info=None):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning('Could not compute hash because: %s' % e)
|
log.warning('Could not compute hash because: %s' % e)
|
||||||
|
|
||||||
result = merge_all(result)
|
result = smart_merge(result)
|
||||||
|
|
||||||
# last minute adjustments
|
|
||||||
|
|
||||||
# if country is in the guessed properties, make it part of the filename
|
|
||||||
if 'series' in result and 'country' in result:
|
|
||||||
result['series'] += ' (%s)' % result['country'].alpha2.upper()
|
|
||||||
|
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def guess_video_info(filename, info=None):
|
def guess_video_info(filename, info=None, options=None, **kwargs):
|
||||||
return guess_file_info(filename, 'autodetect', info)
|
return guess_file_info(filename, info=info, options=options, type='video', **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def guess_movie_info(filename, info=None):
|
def guess_movie_info(filename, info=None, options=None, **kwargs):
|
||||||
return guess_file_info(filename, 'movie', info)
|
return guess_file_info(filename, info=info, options=options, type='movie', **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def guess_episode_info(filename, info=None):
|
def guess_episode_info(filename, info=None, options=None, **kwargs):
|
||||||
return guess_file_info(filename, 'episode', info)
|
return guess_file_info(filename, info=info, options=options, type='episode', **kwargs)
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,8 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,29 +19,120 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from __future__ import print_function
|
from collections import defaultdict
|
||||||
from guessit import u
|
|
||||||
from guessit import slogging, guess_file_info
|
|
||||||
from optparse import OptionParser
|
|
||||||
import logging
|
import logging
|
||||||
import sys
|
|
||||||
import os
|
import os
|
||||||
import locale
|
|
||||||
|
from guessit import PY2, u, guess_file_info, __version__
|
||||||
|
from guessit.options import get_opts
|
||||||
|
from guessit.__version__ import __version__
|
||||||
|
|
||||||
|
|
||||||
def detect_filename(filename, filetype, info=['filename'], advanced = False):
|
def guess_file(filename, info='filename', options=None, **kwargs):
|
||||||
|
options = options or {}
|
||||||
filename = u(filename)
|
filename = u(filename)
|
||||||
|
|
||||||
|
if not options.get('yaml') and not options.get('show_property'):
|
||||||
print('For:', filename)
|
print('For:', filename)
|
||||||
print('GuessIt found:', guess_file_info(filename, filetype, info).nice_string(advanced))
|
guess = guess_file_info(filename, info, options, **kwargs)
|
||||||
|
|
||||||
|
if not options.get('unidentified'):
|
||||||
|
try:
|
||||||
|
del guess['unidentified']
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if options.get('show_property'):
|
||||||
|
print(guess.get(options.get('show_property'), ''))
|
||||||
|
return
|
||||||
|
|
||||||
|
if options.get('yaml'):
|
||||||
|
import yaml
|
||||||
|
for k, v in guess.items():
|
||||||
|
if isinstance(v, list) and len(v) == 1:
|
||||||
|
guess[k] = v[0]
|
||||||
|
ystr = yaml.safe_dump({filename: dict(guess)}, default_flow_style=False)
|
||||||
|
i = 0
|
||||||
|
for yline in ystr.splitlines():
|
||||||
|
if i == 0:
|
||||||
|
print("? " + yline[:-1])
|
||||||
|
elif i == 1:
|
||||||
|
print(":" + yline[1:])
|
||||||
|
else:
|
||||||
|
print(yline)
|
||||||
|
i += 1
|
||||||
|
return
|
||||||
|
print('GuessIt found:', guess.nice_string(options.get('advanced')))
|
||||||
|
|
||||||
|
|
||||||
def run_demo(episodes=True, movies=True, advanced=False):
|
def _supported_properties():
|
||||||
|
all_properties = defaultdict(list)
|
||||||
|
transformers_properties = []
|
||||||
|
|
||||||
|
from guessit.plugins import transformers
|
||||||
|
for transformer in transformers.all_transformers():
|
||||||
|
supported_properties = transformer.supported_properties()
|
||||||
|
transformers_properties.append((transformer, supported_properties))
|
||||||
|
|
||||||
|
if isinstance(supported_properties, dict):
|
||||||
|
for property_name, possible_values in supported_properties.items():
|
||||||
|
all_properties[property_name].extend(possible_values)
|
||||||
|
else:
|
||||||
|
for property_name in supported_properties:
|
||||||
|
all_properties[property_name] # just make sure it exists
|
||||||
|
|
||||||
|
return all_properties, transformers_properties
|
||||||
|
|
||||||
|
|
||||||
|
def display_transformers():
|
||||||
|
print('GuessIt transformers:')
|
||||||
|
_, transformers_properties = _supported_properties()
|
||||||
|
for transformer, _ in transformers_properties:
|
||||||
|
print('[@] %s (%s)' % (transformer.name, transformer.priority))
|
||||||
|
|
||||||
|
|
||||||
|
def display_properties(options):
|
||||||
|
values = options.values
|
||||||
|
transformers = options.transformers
|
||||||
|
name_only = options.name_only
|
||||||
|
|
||||||
|
print('GuessIt properties:')
|
||||||
|
all_properties, transformers_properties = _supported_properties()
|
||||||
|
if name_only:
|
||||||
|
# the 'container' property does not apply when using the --name-only
|
||||||
|
# option
|
||||||
|
del all_properties['container']
|
||||||
|
|
||||||
|
if transformers:
|
||||||
|
for transformer, properties_list in transformers_properties:
|
||||||
|
print('[@] %s (%s)' % (transformer.name, transformer.priority))
|
||||||
|
for property_name in properties_list:
|
||||||
|
property_values = all_properties.get(property_name)
|
||||||
|
print(' [+] %s' % (property_name,))
|
||||||
|
if property_values and values:
|
||||||
|
_display_property_values(property_name, indent=4)
|
||||||
|
else:
|
||||||
|
properties_list = sorted(all_properties.keys())
|
||||||
|
for property_name in properties_list:
|
||||||
|
property_values = all_properties.get(property_name)
|
||||||
|
print(' [+] %s' % (property_name,))
|
||||||
|
if property_values and values:
|
||||||
|
_display_property_values(property_name, indent=4)
|
||||||
|
|
||||||
|
|
||||||
|
def _display_property_values(property_name, indent=2):
|
||||||
|
all_properties, _ = _supported_properties()
|
||||||
|
property_values = all_properties.get(property_name)
|
||||||
|
for property_value in property_values:
|
||||||
|
print(indent * ' ' + '[!] %s' % (property_value,))
|
||||||
|
|
||||||
|
|
||||||
|
def run_demo(episodes=True, movies=True, options=None):
|
||||||
# NOTE: tests should not be added here but rather in the tests/ folder
|
# NOTE: tests should not be added here but rather in the tests/ folder
|
||||||
# this is just intended as a quick example
|
# this is just intended as a quick example
|
||||||
if episodes:
|
if episodes:
|
||||||
testeps = [ 'Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi',
|
testeps = ['Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi',
|
||||||
'Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi',
|
'Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi',
|
||||||
'Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.[tvu.org.ru].avi',
|
'Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.[tvu.org.ru].avi',
|
||||||
'Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi',
|
'Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi',
|
||||||
|
|
@ -48,22 +140,20 @@ def run_demo(episodes=True, movies=True, advanced=False):
|
||||||
'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg',
|
'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg',
|
||||||
'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi',
|
'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi',
|
||||||
'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi',
|
'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi',
|
||||||
'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi'
|
'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi']
|
||||||
]
|
|
||||||
|
|
||||||
for f in testeps:
|
for f in testeps:
|
||||||
print('-'*80)
|
print('-' * 80)
|
||||||
detect_filename(f, filetype='episode', advanced=advanced)
|
guess_file(f, options=options, type='episode')
|
||||||
|
|
||||||
|
|
||||||
if movies:
|
if movies:
|
||||||
testmovies = [ 'Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv',
|
testmovies = ['Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv',
|
||||||
'Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi',
|
'Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi',
|
||||||
'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi',
|
'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi',
|
||||||
'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv',
|
'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv',
|
||||||
'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv',
|
'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv',
|
||||||
'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi', # FIXME: PROPER and R5 get overwritten
|
'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi',
|
||||||
'[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv', # FIXME: title gets overwritten
|
'[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv',
|
||||||
'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi',
|
'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi',
|
||||||
'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt',
|
'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt',
|
||||||
'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv',
|
'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv',
|
||||||
|
|
@ -79,48 +169,115 @@ def run_demo(episodes=True, movies=True, advanced=False):
|
||||||
]
|
]
|
||||||
|
|
||||||
for f in testmovies:
|
for f in testmovies:
|
||||||
print('-'*80)
|
print('-' * 80)
|
||||||
detect_filename(f, filetype = 'movie', advanced = advanced)
|
guess_file(f, options=options, type='movie')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def submit_bug(filename, options):
|
||||||
slogging.setupLogging()
|
import requests # only import when needed
|
||||||
|
from requests.exceptions import RequestException
|
||||||
|
|
||||||
|
try:
|
||||||
|
opts = dict((k, v) for k, v in options.__dict__.items()
|
||||||
|
if v and k != 'submit_bug')
|
||||||
|
|
||||||
|
r = requests.post('http://localhost:5000/bugs', {'filename': filename,
|
||||||
|
'version': __version__,
|
||||||
|
'options': str(opts)})
|
||||||
|
if r.status_code == 200:
|
||||||
|
print('Successfully submitted file: %s' % r.text)
|
||||||
|
else:
|
||||||
|
print('Could not submit bug at the moment, please try again later.')
|
||||||
|
|
||||||
|
except RequestException as e:
|
||||||
|
print('Could not submit bug at the moment, please try again later.')
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=None, setup_logging=True):
|
||||||
|
if setup_logging:
|
||||||
|
from guessit import slogging
|
||||||
|
slogging.setup_logging()
|
||||||
|
|
||||||
|
if PY2: # pragma: no cover
|
||||||
|
import codecs
|
||||||
|
import locale
|
||||||
|
import sys
|
||||||
|
|
||||||
# see http://bugs.python.org/issue2128
|
# see http://bugs.python.org/issue2128
|
||||||
if sys.version_info.major < 3 and os.name == 'nt':
|
if os.name == 'nt':
|
||||||
for i, a in enumerate(sys.argv):
|
for i, a in enumerate(sys.argv):
|
||||||
sys.argv[i] = a.decode(locale.getpreferredencoding())
|
sys.argv[i] = a.decode(locale.getpreferredencoding())
|
||||||
|
|
||||||
parser = OptionParser(usage = 'usage: %prog [options] file1 [file2...]')
|
# see https://github.com/wackou/guessit/issues/43
|
||||||
parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
|
# and http://stackoverflow.com/questions/4545661/unicodedecodeerror-when-redirecting-to-file
|
||||||
help = 'display debug output')
|
# Wrap sys.stdout into a StreamWriter to allow writing unicode.
|
||||||
parser.add_option('-i', '--info', dest = 'info', default = 'filename',
|
sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
|
||||||
help = 'the desired information type: filename, hash_mpc or a hash from python\'s '
|
|
||||||
'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
|
|
||||||
'them, comma-separated')
|
|
||||||
parser.add_option('-t', '--type', dest = 'filetype', default = 'autodetect',
|
|
||||||
help = 'the suggested file type: movie, episode or autodetect')
|
|
||||||
parser.add_option('-a', '--advanced', dest = 'advanced', action='store_true', default = False,
|
|
||||||
help = 'display advanced information for filename guesses, as json output')
|
|
||||||
parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False,
|
|
||||||
help = 'run a few builtin tests instead of analyzing a file')
|
|
||||||
|
|
||||||
options, args = parser.parse_args()
|
from guessit.plugins import transformers
|
||||||
|
|
||||||
|
if args:
|
||||||
|
options = get_opts().parse_args(args)
|
||||||
|
else: # pragma: no cover
|
||||||
|
options = get_opts().parse_args()
|
||||||
if options.verbose:
|
if options.verbose:
|
||||||
logging.getLogger('guessit').setLevel(logging.DEBUG)
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
help_required = True
|
||||||
|
if options.properties or options.values:
|
||||||
|
display_properties(options)
|
||||||
|
help_required = False
|
||||||
|
elif options.transformers:
|
||||||
|
display_transformers()
|
||||||
|
help_required = False
|
||||||
|
|
||||||
if options.demo:
|
if options.demo:
|
||||||
run_demo(episodes=True, movies=True, advanced=options.advanced)
|
run_demo(episodes=True, movies=True, options=vars(options))
|
||||||
else:
|
help_required = False
|
||||||
if args:
|
|
||||||
for filename in args:
|
|
||||||
detect_filename(filename,
|
|
||||||
filetype = options.filetype,
|
|
||||||
info = options.info.split(','),
|
|
||||||
advanced = options.advanced)
|
|
||||||
|
|
||||||
|
if options.version:
|
||||||
|
print('+-------------------------------------------------------+')
|
||||||
|
print('+ GuessIt ' + __version__ + (28-len(__version__)) * ' ' + '+')
|
||||||
|
print('+-------------------------------------------------------+')
|
||||||
|
print('| Please report any bug or feature request at |')
|
||||||
|
print('| https://github.com/wackou/guessit/issues. |')
|
||||||
|
print('+-------------------------------------------------------+')
|
||||||
|
help_required = False
|
||||||
|
|
||||||
|
if options.yaml:
|
||||||
|
try:
|
||||||
|
import yaml, babelfish
|
||||||
|
def default_representer(dumper, data):
|
||||||
|
return dumper.represent_str(str(data))
|
||||||
|
yaml.SafeDumper.add_representer(babelfish.Language, default_representer)
|
||||||
|
yaml.SafeDumper.add_representer(babelfish.Country, default_representer)
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
print('PyYAML not found. Using default output.')
|
||||||
|
|
||||||
|
filenames = []
|
||||||
|
if options.filename:
|
||||||
|
filenames.extend(options.filename)
|
||||||
|
if options.input_file:
|
||||||
|
input_file = open(options.input_file, 'r')
|
||||||
|
try:
|
||||||
|
filenames.extend([line.strip() for line in input_file.readlines()])
|
||||||
|
finally:
|
||||||
|
input_file.close()
|
||||||
|
|
||||||
|
filenames = filter(lambda f: f, filenames)
|
||||||
|
|
||||||
|
if filenames:
|
||||||
|
help_required = False
|
||||||
|
if options.submit_bug:
|
||||||
|
for filename in filenames:
|
||||||
|
submit_bug(filename, options)
|
||||||
else:
|
else:
|
||||||
parser.print_help()
|
for filename in filenames:
|
||||||
|
guess_file(filename,
|
||||||
|
info=options.info.split(','),
|
||||||
|
options=vars(options))
|
||||||
|
|
||||||
|
if help_required: # pragma: no cover
|
||||||
|
get_opts().print_help()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
20
libs/guessit/__version__.py
Normal file
20
libs/guessit/__version__.py
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
__version__ = '0.10.2.dev0'
|
||||||
771
libs/guessit/containers.py
Normal file
771
libs/guessit/containers.py
Normal file
|
|
@ -0,0 +1,771 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from .patterns import compile_pattern, sep
|
||||||
|
from . import base_text_type
|
||||||
|
from .guess import Guess
|
||||||
|
import types
|
||||||
|
|
||||||
|
|
||||||
|
def _get_span(prop, match):
|
||||||
|
"""Retrieves span for a match"""
|
||||||
|
if not prop.global_span and match.re.groups:
|
||||||
|
start = None
|
||||||
|
end = None
|
||||||
|
for i in range(1, match.re.groups + 1):
|
||||||
|
span = match.span(i)
|
||||||
|
if start is None or span[0] < start:
|
||||||
|
start = span[0]
|
||||||
|
if end is None or span[1] > end:
|
||||||
|
end = span[1]
|
||||||
|
return start, end
|
||||||
|
else:
|
||||||
|
return match.span()
|
||||||
|
start = span[0]
|
||||||
|
end = span[1]
|
||||||
|
|
||||||
|
|
||||||
|
def _trim_span(span, value, blanks = sep):
|
||||||
|
start, end = span
|
||||||
|
|
||||||
|
for i in range(0, len(value)):
|
||||||
|
if value[i] in blanks:
|
||||||
|
start += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
for i in reversed(range(0, len(value))):
|
||||||
|
if value[i] in blanks:
|
||||||
|
end -= 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
if end <= start:
|
||||||
|
return -1, -1
|
||||||
|
return start, end
|
||||||
|
|
||||||
|
|
||||||
|
def _get_groups(compiled_re):
|
||||||
|
"""
|
||||||
|
Retrieves groups from re
|
||||||
|
|
||||||
|
:return: list of group names
|
||||||
|
"""
|
||||||
|
if compiled_re.groups:
|
||||||
|
indexgroup = {}
|
||||||
|
for k, i in compiled_re.groupindex.items():
|
||||||
|
indexgroup[i] = k
|
||||||
|
ret = []
|
||||||
|
for i in range(1, compiled_re.groups + 1):
|
||||||
|
ret.append(indexgroup.get(i, i))
|
||||||
|
return ret
|
||||||
|
else:
|
||||||
|
return [None]
|
||||||
|
|
||||||
|
|
||||||
|
class NoValidator(object):
|
||||||
|
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class LeftValidator(object):
|
||||||
|
"""Make sure our match is starting by separator, or by another entry"""
|
||||||
|
|
||||||
|
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
span = _get_span(prop, match)
|
||||||
|
span = _trim_span(span, string[span[0]:span[1]])
|
||||||
|
start, end = span
|
||||||
|
|
||||||
|
sep_start = start <= 0 or string[start - 1] in sep
|
||||||
|
start_by_other = start in entry_end
|
||||||
|
if not sep_start and not start_by_other:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class RightValidator(object):
|
||||||
|
"""Make sure our match is ended by separator, or by another entry"""
|
||||||
|
|
||||||
|
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
span = _get_span(prop, match)
|
||||||
|
span = _trim_span(span, string[span[0]:span[1]])
|
||||||
|
start, end = span
|
||||||
|
|
||||||
|
sep_end = end >= len(string) or string[end] in sep
|
||||||
|
end_by_other = end in entry_start
|
||||||
|
if not sep_end and not end_by_other:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class ChainedValidator(object):
|
||||||
|
def __init__(self, *validators):
|
||||||
|
self._validators = validators
|
||||||
|
|
||||||
|
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
for validator in self._validators:
|
||||||
|
if not validator.validate(prop, string, node, match, entry_start, entry_end):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class SameKeyValidator(object):
|
||||||
|
def __init__(self, validator_function):
|
||||||
|
self.validator_function = validator_function
|
||||||
|
|
||||||
|
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
for key in prop.keys:
|
||||||
|
for same_value_leaf in node.root.leaves_containing(key):
|
||||||
|
ret = self.validator_function(same_value_leaf, key, prop, string, node, match, entry_start, entry_end)
|
||||||
|
if ret is not None:
|
||||||
|
return ret
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class OnlyOneValidator(SameKeyValidator):
|
||||||
|
def __init__(self):
|
||||||
|
super(OnlyOneValidator, self).__init__(lambda same_value_leaf, key, prop, string, node, match, entry_start, entry_end: False)
|
||||||
|
|
||||||
|
|
||||||
|
class DefaultValidator(object):
|
||||||
|
"""Make sure our match is surrounded by separators, or by another entry"""
|
||||||
|
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
span = _get_span(prop, match)
|
||||||
|
span = _trim_span(span, string[span[0]:span[1]])
|
||||||
|
start, end = span
|
||||||
|
|
||||||
|
sep_start = start <= 0 or string[start - 1] in sep
|
||||||
|
sep_end = end >= len(string) or string[end] in sep
|
||||||
|
start_by_other = start in entry_end
|
||||||
|
end_by_other = end in entry_start
|
||||||
|
if (sep_start or start_by_other) and (sep_end or end_by_other):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class FunctionValidator(object):
|
||||||
|
def __init__(self, function):
|
||||||
|
self.function = function
|
||||||
|
|
||||||
|
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
return self.function(prop, string, node, match, entry_start, entry_end)
|
||||||
|
|
||||||
|
|
||||||
|
class FormatterValidator(object):
|
||||||
|
def __init__(self, group_name=None, formatted_validator=None):
|
||||||
|
self.group_name = group_name
|
||||||
|
self.formatted_validator = formatted_validator
|
||||||
|
|
||||||
|
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
if self.group_name:
|
||||||
|
formatted = prop.format(match.group(self.group_name), self.group_name)
|
||||||
|
else:
|
||||||
|
formatted = prop.format(match.group())
|
||||||
|
if self.formatted_validator:
|
||||||
|
return self.formatted_validator(formatted)
|
||||||
|
else:
|
||||||
|
return formatted
|
||||||
|
|
||||||
|
|
||||||
|
def _get_positions(prop, string, node, match, entry_start, entry_end):
|
||||||
|
span = match.span()
|
||||||
|
start = span[0]
|
||||||
|
end = span[1]
|
||||||
|
|
||||||
|
at_start = True
|
||||||
|
at_end = True
|
||||||
|
|
||||||
|
while start > 0:
|
||||||
|
start -= 1
|
||||||
|
if string[start] not in sep:
|
||||||
|
at_start = False
|
||||||
|
break
|
||||||
|
while end < len(string) - 1:
|
||||||
|
end += 1
|
||||||
|
if string[end] not in sep:
|
||||||
|
at_end = False
|
||||||
|
break
|
||||||
|
return at_start, at_end
|
||||||
|
|
||||||
|
|
||||||
|
class WeakValidator(DefaultValidator):
|
||||||
|
"""Make sure our match is surrounded by separators and is the first or last element in the string"""
|
||||||
|
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
if super(WeakValidator, self).validate(prop, string, node, match, entry_start, entry_end):
|
||||||
|
at_start, at_end = _get_positions(prop, string, node, match, entry_start, entry_end)
|
||||||
|
return at_start or at_end
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class NeighborValidator(DefaultValidator):
|
||||||
|
"""Make sure the node is next another one"""
|
||||||
|
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
at_start, at_end = _get_positions(prop, string, node, match, entry_start, entry_end)
|
||||||
|
|
||||||
|
if at_start:
|
||||||
|
previous_leaf = node.root.previous_leaf(node)
|
||||||
|
if previous_leaf is not None:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if at_end:
|
||||||
|
next_leaf = node.root.next_leaf(node)
|
||||||
|
if next_leaf is not None:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class LeavesValidator(DefaultValidator):
|
||||||
|
def __init__(self, lambdas=None, previous_lambdas=None, next_lambdas=None, both_side=False, default_=True):
|
||||||
|
self.previous_lambdas = previous_lambdas if previous_lambdas is not None else []
|
||||||
|
self.next_lambdas = next_lambdas if next_lambdas is not None else []
|
||||||
|
if lambdas:
|
||||||
|
self.previous_lambdas.extend(lambdas)
|
||||||
|
self.next_lambdas.extend(lambdas)
|
||||||
|
self.both_side = both_side
|
||||||
|
self.default_ = default_
|
||||||
|
|
||||||
|
"""Make sure our match is surrounded by separators and validates defined lambdas"""
|
||||||
|
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
if self.default_:
|
||||||
|
super_ret = super(LeavesValidator, self).validate(prop, string, node, match, entry_start, entry_end)
|
||||||
|
else:
|
||||||
|
super_ret = True
|
||||||
|
if not super_ret:
|
||||||
|
return False
|
||||||
|
|
||||||
|
previous_ = self._validate_previous(prop, string, node, match, entry_start, entry_end)
|
||||||
|
next_ = self._validate_next(prop, string, node, match, entry_start, entry_end)
|
||||||
|
|
||||||
|
if previous_ is None and next_ is None:
|
||||||
|
return super_ret
|
||||||
|
if self.both_side:
|
||||||
|
return previous_ and next_
|
||||||
|
else:
|
||||||
|
return previous_ or next_
|
||||||
|
|
||||||
|
def _validate_previous(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
if self.previous_lambdas:
|
||||||
|
for leaf in node.root.previous_leaves(node):
|
||||||
|
for lambda_ in self.previous_lambdas:
|
||||||
|
ret = self._check_rule(lambda_, leaf)
|
||||||
|
if ret is not None:
|
||||||
|
return ret
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _validate_next(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
if self.next_lambdas:
|
||||||
|
for leaf in node.root.next_leaves(node):
|
||||||
|
for lambda_ in self.next_lambdas:
|
||||||
|
ret = self._check_rule(lambda_, leaf)
|
||||||
|
if ret is not None:
|
||||||
|
return ret
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _check_rule(self, lambda_, previous_leaf):
|
||||||
|
return lambda_(previous_leaf)
|
||||||
|
|
||||||
|
|
||||||
|
class _Property:
|
||||||
|
"""Represents a property configuration."""
|
||||||
|
def __init__(self, keys=None, pattern=None, canonical_form=None, canonical_from_pattern=True, confidence=1.0, enhance=True, global_span=False, validator=DefaultValidator(), formatter=None, disabler=None, confidence_lambda=None):
|
||||||
|
"""
|
||||||
|
:param keys: Keys of the property (format, screenSize, ...)
|
||||||
|
:type keys: string
|
||||||
|
:param canonical_form: Unique value of the property (DVD, 720p, ...)
|
||||||
|
:type canonical_form: string
|
||||||
|
:param pattern: Regexp pattern
|
||||||
|
:type pattern: string
|
||||||
|
:param confidence: confidence
|
||||||
|
:type confidence: float
|
||||||
|
:param enhance: enhance the pattern
|
||||||
|
:type enhance: boolean
|
||||||
|
:param global_span: if True, the whole match span will used to create the Guess.
|
||||||
|
Else, the span from the capturing groups will be used.
|
||||||
|
:type global_span: boolean
|
||||||
|
:param validator: Validator to use
|
||||||
|
:type validator: :class:`DefaultValidator`
|
||||||
|
:param formatter: Formater to use
|
||||||
|
:type formatter: function
|
||||||
|
"""
|
||||||
|
if isinstance(keys, list):
|
||||||
|
self.keys = keys
|
||||||
|
elif isinstance(keys, base_text_type):
|
||||||
|
self.keys = [keys]
|
||||||
|
else:
|
||||||
|
self.keys = []
|
||||||
|
self.canonical_form = canonical_form
|
||||||
|
if pattern is not None:
|
||||||
|
self.pattern = pattern
|
||||||
|
else:
|
||||||
|
self.pattern = canonical_form
|
||||||
|
if self.canonical_form is None and canonical_from_pattern:
|
||||||
|
self.canonical_form = self.pattern
|
||||||
|
self.compiled = compile_pattern(self.pattern, enhance=enhance)
|
||||||
|
for group_name in _get_groups(self.compiled):
|
||||||
|
if isinstance(group_name, base_text_type) and not group_name in self.keys:
|
||||||
|
self.keys.append(group_name)
|
||||||
|
if not self.keys:
|
||||||
|
raise ValueError("No property key is defined")
|
||||||
|
self.confidence = confidence
|
||||||
|
self.confidence_lambda = confidence_lambda
|
||||||
|
self.global_span = global_span
|
||||||
|
self.validator = validator
|
||||||
|
self.formatter = formatter
|
||||||
|
self.disabler = disabler
|
||||||
|
|
||||||
|
def disabled(self, options):
|
||||||
|
if self.disabler:
|
||||||
|
return self.disabler(options)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def format(self, value, group_name=None):
|
||||||
|
"""Retrieves the final value from re group match value"""
|
||||||
|
formatter = None
|
||||||
|
if isinstance(self.formatter, dict):
|
||||||
|
formatter = self.formatter.get(group_name)
|
||||||
|
if formatter is None and group_name is not None:
|
||||||
|
formatter = self.formatter.get(None)
|
||||||
|
else:
|
||||||
|
formatter = self.formatter
|
||||||
|
if isinstance(formatter, types.FunctionType):
|
||||||
|
return formatter(value)
|
||||||
|
elif formatter is not None:
|
||||||
|
return formatter.format(value)
|
||||||
|
return value
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "%s: %s" % (self.keys, self.canonical_form if self.canonical_form else self.pattern)
|
||||||
|
|
||||||
|
|
||||||
|
class PropertiesContainer(object):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
self._properties = []
|
||||||
|
self.default_property_kwargs = kwargs
|
||||||
|
|
||||||
|
def unregister_property(self, name, *canonical_forms):
|
||||||
|
"""Unregister a property canonical forms
|
||||||
|
|
||||||
|
If canonical_forms are specified, only those values will be unregistered
|
||||||
|
|
||||||
|
:param name: Property name to unregister
|
||||||
|
:type name: string
|
||||||
|
:param canonical_forms: Values to unregister
|
||||||
|
:type canonical_forms: varargs of string
|
||||||
|
"""
|
||||||
|
_properties = [prop for prop in self._properties if prop.name == name and (not canonical_forms or prop.canonical_form in canonical_forms)]
|
||||||
|
|
||||||
|
def register_property(self, name, *patterns, **property_params):
|
||||||
|
"""Register property with defined canonical form and patterns.
|
||||||
|
|
||||||
|
:param name: name of the property (format, screenSize, ...)
|
||||||
|
:type name: string
|
||||||
|
:param patterns: regular expression patterns to register for the property canonical_form
|
||||||
|
:type patterns: varargs of string
|
||||||
|
"""
|
||||||
|
properties = []
|
||||||
|
for pattern in patterns:
|
||||||
|
params = dict(self.default_property_kwargs)
|
||||||
|
params.update(property_params)
|
||||||
|
if isinstance(pattern, dict):
|
||||||
|
params.update(pattern)
|
||||||
|
prop = _Property(name, **params)
|
||||||
|
else:
|
||||||
|
prop = _Property(name, pattern, **params)
|
||||||
|
self._properties.append(prop)
|
||||||
|
properties.append(prop)
|
||||||
|
return properties
|
||||||
|
|
||||||
|
def register_canonical_properties(self, name, *canonical_forms, **property_params):
|
||||||
|
"""Register properties from their canonical forms.
|
||||||
|
|
||||||
|
:param name: name of the property (releaseGroup, ...)
|
||||||
|
:type name: string
|
||||||
|
:param canonical_forms: values of the property ('ESiR', 'WAF', 'SEPTiC', ...)
|
||||||
|
:type canonical_forms: varargs of strings
|
||||||
|
"""
|
||||||
|
properties = []
|
||||||
|
for canonical_form in canonical_forms:
|
||||||
|
params = dict(property_params)
|
||||||
|
params['canonical_form'] = canonical_form
|
||||||
|
properties.extend(self.register_property(name, canonical_form, **property_params))
|
||||||
|
return properties
|
||||||
|
|
||||||
|
def unregister_all_properties(self):
|
||||||
|
"""Unregister all defined properties"""
|
||||||
|
self._properties.clear()
|
||||||
|
|
||||||
|
def find_properties(self, string, node, options, name=None, validate=True, re_match=False, sort=True, multiple=False):
|
||||||
|
"""Find all distinct properties for given string
|
||||||
|
|
||||||
|
If no capturing group is defined in the property, value will be grabbed from the entire match.
|
||||||
|
|
||||||
|
If one ore more unnamed capturing group is defined in the property, first capturing group will be used.
|
||||||
|
|
||||||
|
If named capturing group are defined in the property, they will be returned as property key.
|
||||||
|
|
||||||
|
If validate, found properties will be validated by their defined validator
|
||||||
|
|
||||||
|
If re_match, re.match will be used instead of re.search.
|
||||||
|
|
||||||
|
if sort, found properties will be sorted from longer match to shorter match.
|
||||||
|
|
||||||
|
If multiple is False and multiple values are found for the same property, the more confident one will be returned.
|
||||||
|
|
||||||
|
If multiple is False and multiple values are found for the same property and the same confidence, the longer will be returned.
|
||||||
|
|
||||||
|
:param string: input string
|
||||||
|
:type string: string
|
||||||
|
|
||||||
|
:param node: current node of the matching tree
|
||||||
|
:type node: :class:`guessit.matchtree.MatchTree`
|
||||||
|
|
||||||
|
:param name: name of property to find
|
||||||
|
:type name: string
|
||||||
|
|
||||||
|
:param re_match: use re.match instead of re.search
|
||||||
|
:type re_match: bool
|
||||||
|
|
||||||
|
:param multiple: Allows multiple property values to be returned
|
||||||
|
:type multiple: bool
|
||||||
|
|
||||||
|
:return: found properties
|
||||||
|
:rtype: list of tuples (:class:`_Property`, match, list of tuples (property_name, tuple(value_start, value_end)))
|
||||||
|
|
||||||
|
:see: `_Property`
|
||||||
|
:see: `register_property`
|
||||||
|
:see: `register_canonical_properties`
|
||||||
|
"""
|
||||||
|
entry_start = {}
|
||||||
|
entry_end = {}
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
duplicate_matches = {}
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
if not string.strip():
|
||||||
|
return ret
|
||||||
|
|
||||||
|
# search all properties
|
||||||
|
for prop in self.get_properties(name):
|
||||||
|
if not prop.disabled(options):
|
||||||
|
valid_match = None
|
||||||
|
if re_match:
|
||||||
|
match = prop.compiled.match(string)
|
||||||
|
if match:
|
||||||
|
entries.append((prop, match))
|
||||||
|
else:
|
||||||
|
matches = list(prop.compiled.finditer(string))
|
||||||
|
duplicate_matches[prop] = matches
|
||||||
|
for match in matches:
|
||||||
|
entries.append((prop, match))
|
||||||
|
|
||||||
|
for prop, match in entries:
|
||||||
|
# compute confidence
|
||||||
|
if prop.confidence_lambda:
|
||||||
|
computed_confidence = prop.confidence_lambda(match)
|
||||||
|
if computed_confidence is not None:
|
||||||
|
prop.confidence = computed_confidence
|
||||||
|
|
||||||
|
if validate:
|
||||||
|
# compute entries start and ends
|
||||||
|
for prop, match in entries:
|
||||||
|
start, end = _get_span(prop, match)
|
||||||
|
|
||||||
|
if start not in entry_start:
|
||||||
|
entry_start[start] = [prop]
|
||||||
|
else:
|
||||||
|
entry_start[start].append(prop)
|
||||||
|
|
||||||
|
if end not in entry_end:
|
||||||
|
entry_end[end] = [prop]
|
||||||
|
else:
|
||||||
|
entry_end[end].append(prop)
|
||||||
|
|
||||||
|
# remove invalid values
|
||||||
|
while True:
|
||||||
|
invalid_entries = []
|
||||||
|
for entry in entries:
|
||||||
|
prop, match = entry
|
||||||
|
if not prop.validator.validate(prop, string, node, match, entry_start, entry_end):
|
||||||
|
invalid_entries.append(entry)
|
||||||
|
if not invalid_entries:
|
||||||
|
break
|
||||||
|
for entry in invalid_entries:
|
||||||
|
prop, match = entry
|
||||||
|
entries.remove(entry)
|
||||||
|
prop_duplicate_matches = duplicate_matches.get(prop)
|
||||||
|
if prop_duplicate_matches:
|
||||||
|
prop_duplicate_matches.remove(match)
|
||||||
|
invalid_span = _get_span(prop, match)
|
||||||
|
start = invalid_span[0]
|
||||||
|
end = invalid_span[1]
|
||||||
|
entry_start[start].remove(prop)
|
||||||
|
if not entry_start.get(start):
|
||||||
|
del entry_start[start]
|
||||||
|
entry_end[end].remove(prop)
|
||||||
|
if not entry_end.get(end):
|
||||||
|
del entry_end[end]
|
||||||
|
|
||||||
|
for prop, prop_duplicate_matches in duplicate_matches.items():
|
||||||
|
# Keeping the last valid match.
|
||||||
|
# Needed for the.100.109.hdtv-lol.mp4
|
||||||
|
for duplicate_match in prop_duplicate_matches[:-1]:
|
||||||
|
entries.remove((prop, duplicate_match))
|
||||||
|
|
||||||
|
if multiple:
|
||||||
|
ret = entries
|
||||||
|
else:
|
||||||
|
# keep only best match if multiple values where found
|
||||||
|
entries_dict = {}
|
||||||
|
for entry in entries:
|
||||||
|
for key in prop.keys:
|
||||||
|
if key not in entries_dict:
|
||||||
|
entries_dict[key] = []
|
||||||
|
entries_dict[key].append(entry)
|
||||||
|
|
||||||
|
for key_entries in entries_dict.values():
|
||||||
|
if multiple:
|
||||||
|
for entry in key_entries:
|
||||||
|
ret.append(entry)
|
||||||
|
else:
|
||||||
|
best_ret = {}
|
||||||
|
|
||||||
|
best_prop, best_match = None, None
|
||||||
|
if len(key_entries) == 1:
|
||||||
|
best_prop, best_match = key_entries[0]
|
||||||
|
else:
|
||||||
|
for prop, match in key_entries:
|
||||||
|
start, end = _get_span(prop, match)
|
||||||
|
if not best_prop or \
|
||||||
|
best_prop.confidence < best_prop.confidence or \
|
||||||
|
best_prop.confidence == best_prop.confidence and \
|
||||||
|
best_match.span()[1] - best_match.span()[0] < match.span()[1] - match.span()[0]:
|
||||||
|
best_prop, best_match = prop, match
|
||||||
|
|
||||||
|
best_ret[best_prop] = best_match
|
||||||
|
|
||||||
|
for prop, match in best_ret.items():
|
||||||
|
ret.append((prop, match))
|
||||||
|
|
||||||
|
if sort:
|
||||||
|
def _sorting(x):
|
||||||
|
_, x_match = x
|
||||||
|
x_start, x_end = x_match.span()
|
||||||
|
return x_start - x_end
|
||||||
|
|
||||||
|
ret.sort(key=_sorting)
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def as_guess(self, found_properties, input=None, filter_=None, sep_replacement=None, multiple=False, *args, **kwargs):
|
||||||
|
if filter_ is None:
|
||||||
|
filter_ = lambda property, *args, **kwargs: True
|
||||||
|
guesses = [] if multiple else None
|
||||||
|
for prop, match in found_properties:
|
||||||
|
first_key = None
|
||||||
|
for key in prop.keys:
|
||||||
|
# First property key will be used as base for effective name
|
||||||
|
if isinstance(key, base_text_type):
|
||||||
|
if first_key is None:
|
||||||
|
first_key = key
|
||||||
|
break
|
||||||
|
property_name = first_key if first_key else None
|
||||||
|
span = _get_span(prop, match)
|
||||||
|
guess = Guess(confidence=prop.confidence, input=input, span=span, prop=property_name)
|
||||||
|
groups = _get_groups(match.re)
|
||||||
|
for group_name in groups:
|
||||||
|
name = group_name if isinstance(group_name, base_text_type) else property_name if property_name not in groups else None
|
||||||
|
if name:
|
||||||
|
value = self._effective_prop_value(prop, group_name, input, match.span(group_name) if group_name else match.span(), sep_replacement)
|
||||||
|
if not value is None:
|
||||||
|
is_string = isinstance(value, base_text_type)
|
||||||
|
if not is_string or is_string and value: # Keep non empty strings and other defined objects
|
||||||
|
if isinstance(value, dict):
|
||||||
|
for k, v in value.items():
|
||||||
|
if k is None:
|
||||||
|
k = name
|
||||||
|
guess[k] = v
|
||||||
|
else:
|
||||||
|
if name in guess:
|
||||||
|
if not isinstance(guess[name], list):
|
||||||
|
guess[name] = [guess[name]]
|
||||||
|
guess[name].append(value)
|
||||||
|
else:
|
||||||
|
guess[name] = value
|
||||||
|
if group_name:
|
||||||
|
guess.metadata(prop).span = match.span(group_name)
|
||||||
|
if filter_(guess):
|
||||||
|
if multiple:
|
||||||
|
guesses.append(guess)
|
||||||
|
else:
|
||||||
|
return guess
|
||||||
|
return guesses
|
||||||
|
|
||||||
|
def _effective_prop_value(self, prop, group_name, input=None, span=None, sep_replacement=None):
|
||||||
|
if prop.canonical_form:
|
||||||
|
return prop.canonical_form
|
||||||
|
if input is None:
|
||||||
|
return None
|
||||||
|
value = input
|
||||||
|
if span is not None:
|
||||||
|
value = value[span[0]:span[1]]
|
||||||
|
value = input[span[0]:span[1]] if input else None
|
||||||
|
if sep_replacement:
|
||||||
|
for sep_char in sep:
|
||||||
|
value = value.replace(sep_char, sep_replacement)
|
||||||
|
if value:
|
||||||
|
value = prop.format(value, group_name)
|
||||||
|
return value
|
||||||
|
|
||||||
|
def get_properties(self, name=None, canonical_form=None):
|
||||||
|
"""Retrieve properties
|
||||||
|
|
||||||
|
:return: Properties
|
||||||
|
:rtype: generator
|
||||||
|
"""
|
||||||
|
for prop in self._properties:
|
||||||
|
if (name is None or name in prop.keys) and (canonical_form is None or prop.canonical_form == canonical_form):
|
||||||
|
yield prop
|
||||||
|
|
||||||
|
def get_supported_properties(self):
|
||||||
|
supported_properties = {}
|
||||||
|
for prop in self.get_properties():
|
||||||
|
for k in prop.keys:
|
||||||
|
values = supported_properties.get(k)
|
||||||
|
if not values:
|
||||||
|
values = set()
|
||||||
|
supported_properties[k] = values
|
||||||
|
if prop.canonical_form:
|
||||||
|
values.add(prop.canonical_form)
|
||||||
|
return supported_properties
|
||||||
|
|
||||||
|
|
||||||
|
class QualitiesContainer():
|
||||||
|
def __init__(self):
|
||||||
|
self._qualities = {}
|
||||||
|
|
||||||
|
def register_quality(self, name, canonical_form, rating):
|
||||||
|
"""Register a quality rating.
|
||||||
|
|
||||||
|
:param name: Name of the property
|
||||||
|
:type name: string
|
||||||
|
:param canonical_form: Value of the property
|
||||||
|
:type canonical_form: string
|
||||||
|
:param rating: Estimated quality rating for the property
|
||||||
|
:type rating: int
|
||||||
|
"""
|
||||||
|
property_qualities = self._qualities.get(name)
|
||||||
|
|
||||||
|
if property_qualities is None:
|
||||||
|
property_qualities = {}
|
||||||
|
self._qualities[name] = property_qualities
|
||||||
|
|
||||||
|
property_qualities[canonical_form] = rating
|
||||||
|
|
||||||
|
def unregister_quality(self, name, *canonical_forms):
|
||||||
|
"""Unregister quality ratings for given property name.
|
||||||
|
|
||||||
|
If canonical_forms are specified, only those values will be unregistered
|
||||||
|
|
||||||
|
:param name: Name of the property
|
||||||
|
:type name: string
|
||||||
|
:param canonical_forms: Value of the property
|
||||||
|
:type canonical_forms: string
|
||||||
|
"""
|
||||||
|
if not canonical_forms:
|
||||||
|
if name in self._qualities:
|
||||||
|
del self._qualities[name]
|
||||||
|
else:
|
||||||
|
property_qualities = self._qualities.get(name)
|
||||||
|
if property_qualities is not None:
|
||||||
|
for property_canonical_form in canonical_forms:
|
||||||
|
if property_canonical_form in property_qualities:
|
||||||
|
del property_qualities[property_canonical_form]
|
||||||
|
if not property_qualities:
|
||||||
|
del self._qualities[name]
|
||||||
|
|
||||||
|
def clear_qualities(self,):
|
||||||
|
"""Unregister all defined quality ratings.
|
||||||
|
"""
|
||||||
|
self._qualities.clear()
|
||||||
|
|
||||||
|
def rate_quality(self, guess, *props):
|
||||||
|
"""Rate the quality of guess.
|
||||||
|
|
||||||
|
:param guess: Guess to rate
|
||||||
|
:type guess: :class:`guessit.guess.Guess`
|
||||||
|
:param props: Properties to include in the rating. if empty, rating will be performed for all guess properties.
|
||||||
|
:type props: varargs of string
|
||||||
|
|
||||||
|
:return: Quality of the guess. The higher, the better.
|
||||||
|
:rtype: int
|
||||||
|
"""
|
||||||
|
rate = 0
|
||||||
|
if not props:
|
||||||
|
props = guess.keys()
|
||||||
|
for prop in props:
|
||||||
|
prop_value = guess.get(prop)
|
||||||
|
prop_qualities = self._qualities.get(prop)
|
||||||
|
if prop_value is not None and prop_qualities is not None:
|
||||||
|
rate += prop_qualities.get(prop_value, 0)
|
||||||
|
return rate
|
||||||
|
|
||||||
|
def best_quality_properties(self, props, *guesses):
|
||||||
|
"""Retrieve the best quality guess, based on given properties
|
||||||
|
|
||||||
|
:param props: Properties to include in the rating
|
||||||
|
:type props: list of strings
|
||||||
|
:param guesses: Guesses to rate
|
||||||
|
:type guesses: :class:`guessit.guess.Guess`
|
||||||
|
|
||||||
|
:return: Best quality guess from all passed guesses
|
||||||
|
:rtype: :class:`guessit.guess.Guess`
|
||||||
|
"""
|
||||||
|
best_guess = None
|
||||||
|
best_rate = None
|
||||||
|
for guess in guesses:
|
||||||
|
rate = self.rate_quality(guess, *props)
|
||||||
|
if best_rate is None or best_rate < rate:
|
||||||
|
best_rate = rate
|
||||||
|
best_guess = guess
|
||||||
|
return best_guess
|
||||||
|
|
||||||
|
def best_quality(self, *guesses):
|
||||||
|
"""Retrieve the best quality guess.
|
||||||
|
|
||||||
|
:param guesses: Guesses to rate
|
||||||
|
:type guesses: :class:`guessit.guess.Guess`
|
||||||
|
|
||||||
|
:return: Best quality guess from all passed guesses
|
||||||
|
:rtype: :class:`guessit.guess.Guess`
|
||||||
|
"""
|
||||||
|
best_guess = None
|
||||||
|
best_rate = None
|
||||||
|
for guess in guesses:
|
||||||
|
rate = self.rate_quality(guess)
|
||||||
|
if best_rate is None or best_rate < rate:
|
||||||
|
best_rate = rate
|
||||||
|
best_guess = guess
|
||||||
|
return best_guess
|
||||||
|
|
||||||
|
|
@ -1,112 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# GuessIt - A library for guessing information from filenames
|
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
|
||||||
#
|
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
|
||||||
# the Free Software Foundation; either version 3 of the License, or
|
|
||||||
# (at your option) any later version.
|
|
||||||
#
|
|
||||||
# GuessIt is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
# Lesser GNU General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the Lesser GNU General Public License
|
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
#
|
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from guessit import UnicodeMixin, base_text_type, u
|
|
||||||
from guessit.fileutils import load_file_in_same_dir
|
|
||||||
import logging
|
|
||||||
|
|
||||||
__all__ = [ 'Country' ]
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
# parsed from http://en.wikipedia.org/wiki/ISO_3166-1
|
|
||||||
#
|
|
||||||
# Description of the fields:
|
|
||||||
# "An English name, an alpha-2 code (when given),
|
|
||||||
# an alpha-3 code (when given), a numeric code, and an ISO 31666-2 code
|
|
||||||
# are all separated by pipe (|) characters."
|
|
||||||
_iso3166_contents = load_file_in_same_dir(__file__, 'ISO-3166-1_utf8.txt')
|
|
||||||
|
|
||||||
country_matrix = [ l.strip().split('|')
|
|
||||||
for l in _iso3166_contents.strip().split('\n') ]
|
|
||||||
|
|
||||||
country_matrix += [ [ 'Unknown', 'un', 'unk', '', '' ],
|
|
||||||
[ 'Latin America', '', 'lat', '', '' ]
|
|
||||||
]
|
|
||||||
|
|
||||||
country_to_alpha3 = dict((c[0].lower(), c[2].lower()) for c in country_matrix)
|
|
||||||
country_to_alpha3.update(dict((c[1].lower(), c[2].lower()) for c in country_matrix))
|
|
||||||
country_to_alpha3.update(dict((c[2].lower(), c[2].lower()) for c in country_matrix))
|
|
||||||
|
|
||||||
# add here exceptions / non ISO representations
|
|
||||||
# Note: remember to put those exceptions in lower-case, they won't work otherwise
|
|
||||||
country_to_alpha3.update({ 'latinoamérica': 'lat',
|
|
||||||
'brazilian': 'bra',
|
|
||||||
'españa': 'esp',
|
|
||||||
'uk': 'gbr'
|
|
||||||
})
|
|
||||||
|
|
||||||
country_alpha3_to_en_name = dict((c[2].lower(), c[0]) for c in country_matrix)
|
|
||||||
country_alpha3_to_alpha2 = dict((c[2].lower(), c[1].lower()) for c in country_matrix)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Country(UnicodeMixin):
|
|
||||||
"""This class represents a country.
|
|
||||||
|
|
||||||
You can initialize it with pretty much anything, as it knows conversion
|
|
||||||
from ISO-3166 2-letter and 3-letter codes, and an English name.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, country, strict=False):
|
|
||||||
country = u(country.strip().lower())
|
|
||||||
self.alpha3 = country_to_alpha3.get(country)
|
|
||||||
|
|
||||||
if self.alpha3 is None and strict:
|
|
||||||
msg = 'The given string "%s" could not be identified as a country'
|
|
||||||
raise ValueError(msg % country)
|
|
||||||
|
|
||||||
if self.alpha3 is None:
|
|
||||||
self.alpha3 = 'unk'
|
|
||||||
|
|
||||||
|
|
||||||
@property
|
|
||||||
def alpha2(self):
|
|
||||||
return country_alpha3_to_alpha2[self.alpha3]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def english_name(self):
|
|
||||||
return country_alpha3_to_en_name[self.alpha3]
|
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(self.alpha3)
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
if isinstance(other, Country):
|
|
||||||
return self.alpha3 == other.alpha3
|
|
||||||
|
|
||||||
if isinstance(other, base_text_type):
|
|
||||||
try:
|
|
||||||
return self == Country(other)
|
|
||||||
except ValueError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def __ne__(self, other):
|
|
||||||
return not self == other
|
|
||||||
|
|
||||||
def __unicode__(self):
|
|
||||||
return self.english_name
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return 'Country(%s)' % self.english_name
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,15 +18,38 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
def valid_year(year):
|
from dateutil import parser
|
||||||
return 1920 < year < datetime.date.today().year + 5
|
|
||||||
|
|
||||||
|
_dsep = r'[-/ \.]'
|
||||||
|
_dsep_bis = r'[-/ \.x]'
|
||||||
|
|
||||||
|
date_regexps = [
|
||||||
|
re.compile('[^\d](\d{8})[^\d]', re.IGNORECASE),
|
||||||
|
re.compile('[^\d](\d{6})[^\d]', re.IGNORECASE),
|
||||||
|
re.compile('[^\d](\d{2})%s(\d{1,2})%s(\d{1,2})[^\d]' % (_dsep, _dsep), re.IGNORECASE),
|
||||||
|
re.compile('[^\d](\d{1,2})%s(\d{1,2})%s(\d{2})[^\d]' % (_dsep, _dsep), re.IGNORECASE),
|
||||||
|
re.compile('[^\d](\d{4})%s(\d{1,2})%s(\d{1,2})[^\d]' % (_dsep_bis, _dsep), re.IGNORECASE),
|
||||||
|
re.compile('[^\d](\d{1,2})%s(\d{1,2})%s(\d{4})[^\d]' % (_dsep, _dsep_bis), re.IGNORECASE),
|
||||||
|
re.compile('[^\d](\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4})[^\d]' % (_dsep, _dsep), re.IGNORECASE)]
|
||||||
|
|
||||||
|
|
||||||
|
def valid_year(year, today=None):
|
||||||
|
"""Check if number is a valid year"""
|
||||||
|
if not today:
|
||||||
|
today = datetime.date.today()
|
||||||
|
return 1920 < year < today.year + 5
|
||||||
|
|
||||||
|
|
||||||
def search_year(string):
|
def search_year(string):
|
||||||
"""Looks for year patterns, and if found return the year and group span.
|
"""Looks for year patterns, and if found return the year and group span.
|
||||||
|
|
||||||
Assumes there are sentinels at the beginning and end of the string that
|
Assumes there are sentinels at the beginning and end of the string that
|
||||||
always allow matching a non-digit delimiting the date.
|
always allow matching a non-digit delimiting the date.
|
||||||
|
|
||||||
|
|
@ -34,10 +57,10 @@ def search_year(string):
|
||||||
and now + 5 years, so for instance 2000 would be returned as a valid
|
and now + 5 years, so for instance 2000 would be returned as a valid
|
||||||
year but 1492 would not.
|
year but 1492 would not.
|
||||||
|
|
||||||
>>> search_year('in the year 2000...')
|
>>> search_year(' in the year 2000... ')
|
||||||
(2000, (12, 16))
|
(2000, (13, 17))
|
||||||
|
|
||||||
>>> search_year('they arrived in 1492.')
|
>>> search_year(' they arrived in 1492. ')
|
||||||
(None, None)
|
(None, None)
|
||||||
"""
|
"""
|
||||||
match = re.search(r'[^0-9]([0-9]{4})[^0-9]', string)
|
match = re.search(r'[^0-9]([0-9]{4})[^0-9]', string)
|
||||||
|
|
@ -49,85 +72,58 @@ def search_year(string):
|
||||||
return (None, None)
|
return (None, None)
|
||||||
|
|
||||||
|
|
||||||
def search_date(string):
|
def search_date(string, year_first=None, day_first=True):
|
||||||
"""Looks for date patterns, and if found return the date and group span.
|
"""Looks for date patterns, and if found return the date and group span.
|
||||||
|
|
||||||
Assumes there are sentinels at the beginning and end of the string that
|
Assumes there are sentinels at the beginning and end of the string that
|
||||||
always allow matching a non-digit delimiting the date.
|
always allow matching a non-digit delimiting the date.
|
||||||
|
|
||||||
>>> search_date('This happened on 2002-04-22.')
|
Year can be defined on two digit only. It will return the nearest possible
|
||||||
(datetime.date(2002, 4, 22), (17, 27))
|
date from today.
|
||||||
|
|
||||||
>>> search_date('And this on 17-06-1998.')
|
>>> search_date(' This happened on 2002-04-22. ')
|
||||||
(datetime.date(1998, 6, 17), (12, 22))
|
(datetime.date(2002, 4, 22), (18, 28))
|
||||||
|
|
||||||
>>> search_date('no date in here')
|
>>> search_date(' And this on 17-06-1998. ')
|
||||||
|
(datetime.date(1998, 6, 17), (13, 23))
|
||||||
|
|
||||||
|
>>> search_date(' no date in here ')
|
||||||
(None, None)
|
(None, None)
|
||||||
"""
|
"""
|
||||||
|
start, end = None, None
|
||||||
dsep = r'[-/ \.]'
|
match = None
|
||||||
|
for date_re in date_regexps:
|
||||||
date_rexps = [
|
s = date_re.search(string)
|
||||||
# 20010823
|
if s and (match is None or s.end() - s.start() > len(match)):
|
||||||
r'[^0-9]' +
|
start, end = s.start(), s.end()
|
||||||
r'(?P<year>[0-9]{4})' +
|
if date_re.groups:
|
||||||
r'(?P<month>[0-9]{2})' +
|
match = '-'.join(s.groups())
|
||||||
r'(?P<day>[0-9]{2})' +
|
|
||||||
r'[^0-9]',
|
|
||||||
|
|
||||||
# 2001-08-23
|
|
||||||
r'[^0-9]' +
|
|
||||||
r'(?P<year>[0-9]{4})' + dsep +
|
|
||||||
r'(?P<month>[0-9]{2})' + dsep +
|
|
||||||
r'(?P<day>[0-9]{2})' +
|
|
||||||
r'[^0-9]',
|
|
||||||
|
|
||||||
# 23-08-2001
|
|
||||||
r'[^0-9]' +
|
|
||||||
r'(?P<day>[0-9]{2})' + dsep +
|
|
||||||
r'(?P<month>[0-9]{2})' + dsep +
|
|
||||||
r'(?P<year>[0-9]{4})' +
|
|
||||||
r'[^0-9]',
|
|
||||||
|
|
||||||
# 23-08-01
|
|
||||||
r'[^0-9]' +
|
|
||||||
r'(?P<day>[0-9]{2})' + dsep +
|
|
||||||
r'(?P<month>[0-9]{2})' + dsep +
|
|
||||||
r'(?P<year>[0-9]{2})' +
|
|
||||||
r'[^0-9]',
|
|
||||||
]
|
|
||||||
|
|
||||||
for drexp in date_rexps:
|
|
||||||
match = re.search(drexp, string)
|
|
||||||
if match:
|
|
||||||
d = match.groupdict()
|
|
||||||
year, month, day = int(d['year']), int(d['month']), int(d['day'])
|
|
||||||
# years specified as 2 digits should be adjusted here
|
|
||||||
if year < 100:
|
|
||||||
if year > (datetime.date.today().year % 100) + 5:
|
|
||||||
year = 1900 + year
|
|
||||||
else:
|
else:
|
||||||
year = 2000 + year
|
match = s.group()
|
||||||
|
|
||||||
|
if match is None:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
today = datetime.date.today()
|
||||||
|
|
||||||
|
# If day_first/year_first is undefined, parse is made using both possible values.
|
||||||
|
yearfirst_opts = [False, True]
|
||||||
|
if year_first is not None:
|
||||||
|
yearfirst_opts = [year_first]
|
||||||
|
|
||||||
|
dayfirst_opts = [True, False]
|
||||||
|
if day_first is not None:
|
||||||
|
dayfirst_opts = [day_first]
|
||||||
|
|
||||||
|
kwargs_list = ({'dayfirst': d, 'yearfirst': y} for d in dayfirst_opts for y in yearfirst_opts)
|
||||||
|
for kwargs in kwargs_list:
|
||||||
|
try:
|
||||||
|
date = parser.parse(match, **kwargs)
|
||||||
|
except (ValueError, TypeError) as e: #see https://bugs.launchpad.net/dateutil/+bug/1247643
|
||||||
date = None
|
date = None
|
||||||
try:
|
|
||||||
date = datetime.date(year, month, day)
|
|
||||||
except ValueError:
|
|
||||||
try:
|
|
||||||
date = datetime.date(year, day, month)
|
|
||||||
except ValueError:
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if date is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# check date plausibility
|
# check date plausibility
|
||||||
if not 1900 < date.year < datetime.date.today().year + 5:
|
if date and valid_year(date.year, today=today):
|
||||||
continue
|
return date.date(), (start+1, end-1) #compensate for sentinels
|
||||||
|
|
||||||
# looks like we have a valid date
|
|
||||||
# note: span is [+1,-1] because we don't want to include the
|
|
||||||
# non-digit char
|
|
||||||
start, end = match.span()
|
|
||||||
return (date, (start + 1, end - 1))
|
|
||||||
|
|
||||||
return None, None
|
return None, None
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,7 +18,8 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
from guessit import s, u
|
from guessit import s, u
|
||||||
import os.path
|
import os.path
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
@ -44,17 +45,13 @@ def split_path(path):
|
||||||
result = []
|
result = []
|
||||||
while True:
|
while True:
|
||||||
head, tail = os.path.split(path)
|
head, tail = os.path.split(path)
|
||||||
headlen = len(head)
|
|
||||||
|
|
||||||
# on Unix systems, the root folder is '/'
|
if not head and not tail:
|
||||||
if head and head == '/'*headlen and tail == '':
|
return result
|
||||||
return ['/'] + result
|
|
||||||
|
|
||||||
# on Windows, the root folder is a drive letter (eg: 'C:\') or for shares \\
|
if not tail and head == path:
|
||||||
if ((headlen == 3 and head[1:] == ':\\') or (headlen == 2 and head == '\\\\')) and tail == '':
|
# Make sure we won't have an infinite loop.
|
||||||
return [head] + result
|
result = [head] + result
|
||||||
|
|
||||||
if head == '' and tail == '':
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# we just split a directory ending with '/', so tail is empty
|
# we just split a directory ending with '/', so tail is empty
|
||||||
|
|
@ -70,8 +67,8 @@ def split_path(path):
|
||||||
def file_in_same_dir(ref_file, desired_file):
|
def file_in_same_dir(ref_file, desired_file):
|
||||||
"""Return the path for a file in the same dir as a given reference file.
|
"""Return the path for a file in the same dir as a given reference file.
|
||||||
|
|
||||||
>>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings'))
|
>>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings')) == os.path.normpath('~/smewt/smewt.settings')
|
||||||
'~/smewt/smewt.settings'
|
True
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return os.path.join(*(split_path(ref_file)[:-1] + [desired_file]))
|
return os.path.join(*(split_path(ref_file)[:-1] + [desired_file]))
|
||||||
|
|
@ -85,6 +82,6 @@ def load_file_in_same_dir(ref_file, filename):
|
||||||
if p.endswith('.zip'):
|
if p.endswith('.zip'):
|
||||||
zfilename = os.path.join(*path[:i + 1])
|
zfilename = os.path.join(*path[:i + 1])
|
||||||
zfile = zipfile.ZipFile(zfilename)
|
zfile = zipfile.ZipFile(zfilename)
|
||||||
return zfile.read('/'.join(path[i + 1:]))
|
return u(zfile.read('/'.join(path[i + 1:])))
|
||||||
|
|
||||||
return u(io.open(os.path.join(*path), encoding='utf-8').read())
|
return u(io.open(os.path.join(*path), encoding='utf-8').read())
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,10 +18,10 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
from guessit import UnicodeMixin, s, u, base_text_type
|
from guessit import UnicodeMixin, s, u, base_text_type
|
||||||
from guessit.language import Language
|
from babelfish import Language, Country
|
||||||
from guessit.country import Country
|
|
||||||
import json
|
import json
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
|
|
@ -29,6 +29,111 @@ import logging
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class GuessMetadata(object):
|
||||||
|
"""GuessMetadata contains confidence, an input string, span and related property.
|
||||||
|
|
||||||
|
If defined on a property of Guess object, it overrides the object defined as global.
|
||||||
|
|
||||||
|
:param parent: The parent metadata, used for undefined properties in self object
|
||||||
|
:type parent: :class: `GuessMedata`
|
||||||
|
:param confidence: The confidence (from 0.0 to 1.0)
|
||||||
|
:type confidence: number
|
||||||
|
:param input: The input string
|
||||||
|
:type input: string
|
||||||
|
:param span: The input string
|
||||||
|
:type span: tuple (int, int)
|
||||||
|
:param prop: The found property definition
|
||||||
|
:type prop: :class `guessit.containers._Property`
|
||||||
|
"""
|
||||||
|
def __init__(self, parent=None, confidence=None, input=None, span=None, prop=None, *args, **kwargs):
|
||||||
|
self.parent = parent
|
||||||
|
if confidence is None and self.parent is None:
|
||||||
|
self._confidence = 1.0
|
||||||
|
else:
|
||||||
|
self._confidence = confidence
|
||||||
|
self._input = input
|
||||||
|
self._span = span
|
||||||
|
self._prop = prop
|
||||||
|
|
||||||
|
@property
|
||||||
|
def confidence(self):
|
||||||
|
"""The confidence
|
||||||
|
|
||||||
|
:rtype: int
|
||||||
|
:return: confidence value
|
||||||
|
"""
|
||||||
|
return self._confidence if self._confidence is not None else self.parent.confidence if self.parent else None
|
||||||
|
|
||||||
|
@confidence.setter
|
||||||
|
def confidence(self, confidence):
|
||||||
|
self._confidence = confidence
|
||||||
|
|
||||||
|
@property
|
||||||
|
def input(self):
|
||||||
|
"""The input
|
||||||
|
|
||||||
|
:rtype: string
|
||||||
|
:return: String used to find this guess value
|
||||||
|
"""
|
||||||
|
return self._input if self._input is not None else self.parent.input if self.parent else None
|
||||||
|
|
||||||
|
@input.setter
|
||||||
|
def input(self, input):
|
||||||
|
"""The input
|
||||||
|
|
||||||
|
:rtype: string
|
||||||
|
"""
|
||||||
|
self._input = input
|
||||||
|
|
||||||
|
@property
|
||||||
|
def span(self):
|
||||||
|
"""The span
|
||||||
|
|
||||||
|
:rtype: tuple (int, int)
|
||||||
|
:return: span of input string used to find this guess value
|
||||||
|
"""
|
||||||
|
return self._span if self._span is not None else self.parent.span if self.parent else None
|
||||||
|
|
||||||
|
@span.setter
|
||||||
|
def span(self, span):
|
||||||
|
"""The span
|
||||||
|
|
||||||
|
:rtype: tuple (int, int)
|
||||||
|
:return: span of input string used to find this guess value
|
||||||
|
"""
|
||||||
|
self._span = span
|
||||||
|
|
||||||
|
@property
|
||||||
|
def prop(self):
|
||||||
|
"""The property
|
||||||
|
|
||||||
|
:rtype: :class:`_Property`
|
||||||
|
:return: The property
|
||||||
|
"""
|
||||||
|
return self._prop if self._prop is not None else self.parent.prop if self.parent else None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def raw(self):
|
||||||
|
"""Return the raw information (original match from the string,
|
||||||
|
not the cleaned version) associated with the given property name."""
|
||||||
|
if self.input and self.span:
|
||||||
|
return self.input[self.span[0]:self.span[1]]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def __repr__(self, *args, **kwargs):
|
||||||
|
return object.__repr__(self, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def _split_kwargs(**kwargs):
|
||||||
|
metadata_args = {}
|
||||||
|
for prop in dir(GuessMetadata):
|
||||||
|
try:
|
||||||
|
metadata_args[prop] = kwargs.pop(prop)
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
return metadata_args, kwargs
|
||||||
|
|
||||||
|
|
||||||
class Guess(UnicodeMixin, dict):
|
class Guess(UnicodeMixin, dict):
|
||||||
"""A Guess is a dictionary which has an associated confidence for each of
|
"""A Guess is a dictionary which has an associated confidence for each of
|
||||||
its values.
|
its values.
|
||||||
|
|
@ -37,39 +142,58 @@ class Guess(UnicodeMixin, dict):
|
||||||
simple dict."""
|
simple dict."""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
try:
|
metadata_kwargs, kwargs = _split_kwargs(**kwargs)
|
||||||
confidence = kwargs.pop('confidence')
|
self._global_metadata = GuessMetadata(**metadata_kwargs)
|
||||||
except KeyError:
|
|
||||||
confidence = 0
|
|
||||||
|
|
||||||
try:
|
|
||||||
raw = kwargs.pop('raw')
|
|
||||||
except KeyError:
|
|
||||||
raw = None
|
|
||||||
|
|
||||||
dict.__init__(self, *args, **kwargs)
|
dict.__init__(self, *args, **kwargs)
|
||||||
|
|
||||||
self._confidence = {}
|
self._metadata = {}
|
||||||
self._raw = {}
|
|
||||||
for prop in self:
|
for prop in self:
|
||||||
self._confidence[prop] = confidence
|
self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
|
||||||
self._raw[prop] = raw
|
|
||||||
|
def rename(self, old_name, new_name):
|
||||||
|
if old_name in self._metadata:
|
||||||
|
metadata = self._metadata[old_name]
|
||||||
|
del self._metadata[old_name]
|
||||||
|
self._metadata[new_name] = metadata
|
||||||
|
if old_name in self:
|
||||||
|
value = self[old_name]
|
||||||
|
del self[old_name]
|
||||||
|
self[new_name] = value
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def to_dict(self, advanced=False):
|
def to_dict(self, advanced=False):
|
||||||
|
"""Return the guess as a dict containing only base types, ie:
|
||||||
|
where dates, languages, countries, etc. are converted to strings.
|
||||||
|
|
||||||
|
if advanced is True, return the data as a json string containing
|
||||||
|
also the raw information of the properties."""
|
||||||
data = dict(self)
|
data = dict(self)
|
||||||
for prop, value in data.items():
|
for prop, value in data.items():
|
||||||
if isinstance(value, datetime.date):
|
if isinstance(value, datetime.date):
|
||||||
data[prop] = value.isoformat()
|
data[prop] = value.isoformat()
|
||||||
elif isinstance(value, (Language, Country, base_text_type)):
|
elif isinstance(value, (UnicodeMixin, base_text_type)):
|
||||||
data[prop] = u(value)
|
data[prop] = u(value)
|
||||||
|
elif isinstance(value, (Language, Country)):
|
||||||
|
data[prop] = value.guessit
|
||||||
elif isinstance(value, list):
|
elif isinstance(value, list):
|
||||||
data[prop] = [u(x) for x in value]
|
data[prop] = [u(x) for x in value]
|
||||||
if advanced:
|
if advanced:
|
||||||
data[prop] = {"value": data[prop], "raw": self.raw(prop), "confidence": self.confidence(prop)}
|
metadata = self.metadata(prop)
|
||||||
|
prop_data = {'value': data[prop]}
|
||||||
|
if metadata.raw:
|
||||||
|
prop_data['raw'] = metadata.raw
|
||||||
|
if metadata.confidence:
|
||||||
|
prop_data['confidence'] = metadata.confidence
|
||||||
|
data[prop] = prop_data
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def nice_string(self, advanced=False):
|
def nice_string(self, advanced=False):
|
||||||
|
"""Return a string with the property names and their values,
|
||||||
|
that also displays the associated confidence to each property.
|
||||||
|
|
||||||
|
FIXME: doc with param"""
|
||||||
if advanced:
|
if advanced:
|
||||||
data = self.to_dict(advanced)
|
data = self.to_dict(advanced)
|
||||||
return json.dumps(data, indent=4)
|
return json.dumps(data, indent=4)
|
||||||
|
|
@ -89,39 +213,54 @@ class Guess(UnicodeMixin, dict):
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return u(self.to_dict())
|
return u(self.to_dict())
|
||||||
|
|
||||||
def confidence(self, prop):
|
def metadata(self, prop=None):
|
||||||
return self._confidence.get(prop, -1)
|
"""Return the metadata associated with the given property name
|
||||||
|
|
||||||
|
If no property name is given, get the global_metadata
|
||||||
|
"""
|
||||||
|
if prop is None:
|
||||||
|
return self._global_metadata
|
||||||
|
if prop not in self._metadata:
|
||||||
|
self._metadata[prop] = GuessMetadata(parent=self._global_metadata)
|
||||||
|
return self._metadata[prop]
|
||||||
|
|
||||||
|
def confidence(self, prop=None):
|
||||||
|
return self.metadata(prop).confidence
|
||||||
|
|
||||||
|
def set_confidence(self, prop, confidence):
|
||||||
|
self.metadata(prop).confidence = confidence
|
||||||
|
|
||||||
def raw(self, prop):
|
def raw(self, prop):
|
||||||
return self._raw.get(prop, None)
|
return self.metadata(prop).raw
|
||||||
|
|
||||||
def set(self, prop, value, confidence=None, raw=None):
|
def set(self, prop_name, value, *args, **kwargs):
|
||||||
self[prop] = value
|
if value is None:
|
||||||
if confidence is not None:
|
try:
|
||||||
self._confidence[prop] = confidence
|
del self[prop_name]
|
||||||
if raw is not None:
|
except KeyError:
|
||||||
self._raw[prop] = raw
|
pass
|
||||||
|
try:
|
||||||
|
del self._metadata[prop_name]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
self[prop_name] = value
|
||||||
|
if 'metadata' in kwargs.keys():
|
||||||
|
self._metadata[prop_name] = kwargs['metadata']
|
||||||
|
else:
|
||||||
|
self._metadata[prop_name] = GuessMetadata(parent=self._global_metadata, *args, **kwargs)
|
||||||
|
|
||||||
def set_confidence(self, prop, value):
|
def update(self, other, confidence=None):
|
||||||
self._confidence[prop] = value
|
|
||||||
|
|
||||||
def set_raw(self, prop, value):
|
|
||||||
self._raw[prop] = value
|
|
||||||
|
|
||||||
def update(self, other, confidence=None, raw=None):
|
|
||||||
dict.update(self, other)
|
dict.update(self, other)
|
||||||
if isinstance(other, Guess):
|
if isinstance(other, Guess):
|
||||||
for prop in other:
|
for prop in other:
|
||||||
self._confidence[prop] = other.confidence(prop)
|
try:
|
||||||
self._raw[prop] = other.raw(prop)
|
self._metadata[prop] = other._metadata[prop]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
if confidence is not None:
|
if confidence is not None:
|
||||||
for prop in other:
|
for prop in other:
|
||||||
self._confidence[prop] = confidence
|
self.set_confidence(prop, confidence)
|
||||||
|
|
||||||
if raw is not None:
|
|
||||||
for prop in other:
|
|
||||||
self._raw[prop] = raw
|
|
||||||
|
|
||||||
def update_highest_confidence(self, other):
|
def update_highest_confidence(self, other):
|
||||||
"""Update this guess with the values from the given one. In case
|
"""Update this guess with the values from the given one. In case
|
||||||
|
|
@ -131,11 +270,10 @@ class Guess(UnicodeMixin, dict):
|
||||||
raise ValueError('Can only call this function on Guess instances')
|
raise ValueError('Can only call this function on Guess instances')
|
||||||
|
|
||||||
for prop in other:
|
for prop in other:
|
||||||
if prop in self and self.confidence(prop) >= other.confidence(prop):
|
if prop in self and self.metadata(prop).confidence >= other.metadata(prop).confidence:
|
||||||
continue
|
continue
|
||||||
self[prop] = other[prop]
|
self[prop] = other[prop]
|
||||||
self._confidence[prop] = other.confidence(prop)
|
self._metadata[prop] = other.metadata(prop)
|
||||||
self._raw[prop] = other.raw(prop)
|
|
||||||
|
|
||||||
|
|
||||||
def choose_int(g1, g2):
|
def choose_int(g1, g2):
|
||||||
|
|
@ -193,26 +331,26 @@ def choose_string(g1, g2):
|
||||||
combined_prob = 1 - (1 - c1) * (1 - c2)
|
combined_prob = 1 - (1 - c1) * (1 - c2)
|
||||||
|
|
||||||
if v1l == v2l:
|
if v1l == v2l:
|
||||||
return (v1, combined_prob)
|
return v1, combined_prob
|
||||||
|
|
||||||
# check for common patterns
|
# check for common patterns
|
||||||
elif v1l == 'the ' + v2l:
|
elif v1l == 'the ' + v2l:
|
||||||
return (v1, combined_prob)
|
return v1, combined_prob
|
||||||
elif v2l == 'the ' + v1l:
|
elif v2l == 'the ' + v1l:
|
||||||
return (v2, combined_prob)
|
return v2, combined_prob
|
||||||
|
|
||||||
# if one string is contained in the other, return the shortest one
|
# if one string is contained in the other, return the shortest one
|
||||||
elif v2l in v1l:
|
elif v2l in v1l:
|
||||||
return (v2, combined_prob)
|
return v2, combined_prob
|
||||||
elif v1l in v2l:
|
elif v1l in v2l:
|
||||||
return (v1, combined_prob)
|
return v1, combined_prob
|
||||||
|
|
||||||
# in case of conflict, return the one with highest confidence
|
# in case of conflict, return the one with highest confidence
|
||||||
else:
|
else:
|
||||||
if c1 > c2:
|
if c1 > c2:
|
||||||
return (v1, c1 - c2)
|
return v1, c1 - c2
|
||||||
else:
|
else:
|
||||||
return (v2, c2 - c1)
|
return v2, c2 - c1
|
||||||
|
|
||||||
|
|
||||||
def _merge_similar_guesses_nocheck(guesses, prop, choose):
|
def _merge_similar_guesses_nocheck(guesses, prop, choose):
|
||||||
|
|
@ -226,17 +364,7 @@ def _merge_similar_guesses_nocheck(guesses, prop, choose):
|
||||||
|
|
||||||
g1, g2 = similar[0], similar[1]
|
g1, g2 = similar[0], similar[1]
|
||||||
|
|
||||||
other_props = set(g1) & set(g2) - set([prop])
|
# merge only this prop of s2 into s1, updating the confidence for the
|
||||||
if other_props:
|
|
||||||
log.debug('guess 1: %s' % g1)
|
|
||||||
log.debug('guess 2: %s' % g2)
|
|
||||||
for prop in other_props:
|
|
||||||
if g1[prop] != g2[prop]:
|
|
||||||
log.warning('both guesses to be merged have more than one '
|
|
||||||
'different property in common, bailing out...')
|
|
||||||
return
|
|
||||||
|
|
||||||
# merge all props of s2 into s1, updating the confidence for the
|
|
||||||
# considered property
|
# considered property
|
||||||
v1, v2 = g1[prop], g2[prop]
|
v1, v2 = g1[prop], g2[prop]
|
||||||
c1, c2 = g1.confidence(prop), g2.confidence(prop)
|
c1, c2 = g1.confidence(prop), g2.confidence(prop)
|
||||||
|
|
@ -248,10 +376,11 @@ def _merge_similar_guesses_nocheck(guesses, prop, choose):
|
||||||
msg = "Updating non-matching property '%s' with confidence %.2f"
|
msg = "Updating non-matching property '%s' with confidence %.2f"
|
||||||
log.debug(msg % (prop, new_confidence))
|
log.debug(msg % (prop, new_confidence))
|
||||||
|
|
||||||
g2[prop] = new_value
|
g1.set(prop, new_value, confidence=new_confidence)
|
||||||
g2.set_confidence(prop, new_confidence)
|
g2.pop(prop)
|
||||||
|
|
||||||
g1.update(g2)
|
# remove g2 if there are no properties left
|
||||||
|
if not g2.keys():
|
||||||
guesses.remove(g2)
|
guesses.remove(g2)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -286,43 +415,53 @@ def merge_all(guesses, append=None):
|
||||||
instead of being merged.
|
instead of being merged.
|
||||||
|
|
||||||
>>> s(merge_all([ Guess({'season': 2}, confidence=0.6),
|
>>> s(merge_all([ Guess({'season': 2}, confidence=0.6),
|
||||||
... Guess({'episodeNumber': 13}, confidence=0.8) ]))
|
... Guess({'episodeNumber': 13}, confidence=0.8) ])
|
||||||
{'season': 2, 'episodeNumber': 13}
|
... ) == {'season': 2, 'episodeNumber': 13}
|
||||||
|
True
|
||||||
|
|
||||||
|
|
||||||
>>> s(merge_all([ Guess({'episodeNumber': 27}, confidence=0.02),
|
>>> s(merge_all([ Guess({'episodeNumber': 27}, confidence=0.02),
|
||||||
... Guess({'season': 1}, confidence=0.2) ]))
|
... Guess({'season': 1}, confidence=0.2) ])
|
||||||
{'season': 1}
|
... ) == {'season': 1}
|
||||||
|
True
|
||||||
|
|
||||||
>>> s(merge_all([ Guess({'other': 'PROPER'}, confidence=0.8),
|
>>> s(merge_all([ Guess({'other': 'PROPER'}, confidence=0.8),
|
||||||
... Guess({'releaseGroup': '2HD'}, confidence=0.8) ],
|
... Guess({'releaseGroup': '2HD'}, confidence=0.8) ],
|
||||||
... append=['other']))
|
... append=['other'])
|
||||||
{'releaseGroup': '2HD', 'other': ['PROPER']}
|
... ) == {'releaseGroup': '2HD', 'other': ['PROPER']}
|
||||||
|
True
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
result = Guess()
|
||||||
if not guesses:
|
if not guesses:
|
||||||
return Guess()
|
return result
|
||||||
|
|
||||||
result = guesses[0]
|
|
||||||
if append is None:
|
if append is None:
|
||||||
append = []
|
append = []
|
||||||
|
|
||||||
for g in guesses[1:]:
|
for g in guesses:
|
||||||
# first append our appendable properties
|
# first append our appendable properties
|
||||||
for prop in append:
|
for prop in append:
|
||||||
if prop in g:
|
if prop in g:
|
||||||
result.set(prop, result.get(prop, []) + [g[prop]],
|
if isinstance(g[prop], (list, set)):
|
||||||
|
new_values = result.get(prop, []) + list(g[prop])
|
||||||
|
else:
|
||||||
|
new_values = result.get(prop, []) + [g[prop]]
|
||||||
|
|
||||||
|
result.set(prop, new_values,
|
||||||
# TODO: what to do with confidence here? maybe an
|
# TODO: what to do with confidence here? maybe an
|
||||||
# arithmetic mean...
|
# arithmetic mean...
|
||||||
confidence=g.confidence(prop),
|
confidence=g.metadata(prop).confidence,
|
||||||
raw=g.raw(prop))
|
input=g.metadata(prop).input,
|
||||||
|
span=g.metadata(prop).span,
|
||||||
|
prop=g.metadata(prop).prop)
|
||||||
|
|
||||||
del g[prop]
|
del g[prop]
|
||||||
|
|
||||||
# then merge the remaining ones
|
# then merge the remaining ones
|
||||||
dups = set(result) & set(g)
|
dups = set(result) & set(g)
|
||||||
if dups:
|
if dups:
|
||||||
log.warning('duplicate properties %s in merged result...' % [ (result[p], g[p]) for p in dups] )
|
log.debug('duplicate properties %s in merged result...' % [(result[p], g[p]) for p in dups])
|
||||||
|
|
||||||
result.update_highest_confidence(g)
|
result.update_highest_confidence(g)
|
||||||
|
|
||||||
|
|
@ -338,8 +477,38 @@ def merge_all(guesses, append=None):
|
||||||
if isinstance(value, list):
|
if isinstance(value, list):
|
||||||
result[prop] = list(set(value))
|
result[prop] = list(set(value))
|
||||||
else:
|
else:
|
||||||
result[prop] = [ value ]
|
result[prop] = [value]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def smart_merge(guesses):
|
||||||
|
"""First tries to merge well-known similar properties, and then merges
|
||||||
|
the rest with a merge_all call.
|
||||||
|
|
||||||
|
Should be the function to call in most cases, unless one wants to have more
|
||||||
|
control.
|
||||||
|
|
||||||
|
Warning: this function is destructive, ie: it will merge the list in-place.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# 1- try to merge similar information together and give it a higher
|
||||||
|
# confidence
|
||||||
|
for int_part in ('year', 'season', 'episodeNumber'):
|
||||||
|
merge_similar_guesses(guesses, int_part, choose_int)
|
||||||
|
|
||||||
|
for string_part in ('title', 'series', 'container', 'format',
|
||||||
|
'releaseGroup', 'website', 'audioCodec',
|
||||||
|
'videoCodec', 'screenSize', 'episodeFormat',
|
||||||
|
'audioChannels', 'idNumber'):
|
||||||
|
merge_similar_guesses(guesses, string_part, choose_string)
|
||||||
|
|
||||||
|
# 2- merge the rest, potentially discarding information not properly
|
||||||
|
# merged before
|
||||||
|
result = merge_all(guesses,
|
||||||
|
append=['language', 'subtitleLanguage', 'other',
|
||||||
|
'episodeDetails', 'unidentified'])
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,17 +18,21 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
from guessit import s, to_hex
|
from guessit import s, to_hex
|
||||||
import hashlib
|
import hashlib
|
||||||
import os.path
|
import os.path
|
||||||
|
|
||||||
|
from functools import reduce
|
||||||
|
|
||||||
|
|
||||||
def hash_file(filename):
|
def hash_file(filename):
|
||||||
"""Returns the ed2k hash of a given file.
|
"""Returns the ed2k hash of a given file.
|
||||||
|
|
||||||
>>> s(hash_file('tests/dummy.srt'))
|
>>> testfile = os.path.join(os.path.dirname(__file__), 'test/dummy.srt')
|
||||||
'ed2k://|file|dummy.srt|44|1CA0B9DED3473B926AA93A0A546138BB|/'
|
>>> s(hash_file(testfile))
|
||||||
|
'ed2k://|file|dummy.srt|59|41F58B913AB3973F593BEBA8B8DF6510|/'
|
||||||
"""
|
"""
|
||||||
return 'ed2k://|file|%s|%d|%s|/' % (os.path.basename(filename),
|
return 'ed2k://|file|%s|%d|%s|/' % (os.path.basename(filename),
|
||||||
os.path.getsize(filename),
|
os.path.getsize(filename),
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,7 +18,8 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
import struct
|
import struct
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
@ -28,7 +29,7 @@ def hash_file(filename):
|
||||||
http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
|
http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
|
||||||
and is licensed under the GPL."""
|
and is licensed under the GPL."""
|
||||||
|
|
||||||
longlongformat = 'q' # long long
|
longlongformat = b'q' # long long
|
||||||
bytesize = struct.calcsize(longlongformat)
|
bytesize = struct.calcsize(longlongformat)
|
||||||
|
|
||||||
f = open(filename, "rb")
|
f = open(filename, "rb")
|
||||||
|
|
@ -39,18 +40,18 @@ def hash_file(filename):
|
||||||
if filesize < 65536 * 2:
|
if filesize < 65536 * 2:
|
||||||
raise Exception("SizeError: size is %d, should be > 132K..." % filesize)
|
raise Exception("SizeError: size is %d, should be > 132K..." % filesize)
|
||||||
|
|
||||||
for x in range(65536 / bytesize):
|
for x in range(int(65536 / bytesize)):
|
||||||
buf = f.read(bytesize)
|
buf = f.read(bytesize)
|
||||||
(l_value,) = struct.unpack(longlongformat, buf)
|
(l_value,) = struct.unpack(longlongformat, buf)
|
||||||
hash_value += l_value
|
hash_value += l_value
|
||||||
hash_value = hash_value & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
|
hash_value &= 0xFFFFFFFFFFFFFFFF # to remain as 64bit number
|
||||||
|
|
||||||
f.seek(max(0, filesize - 65536), 0)
|
f.seek(max(0, filesize - 65536), 0)
|
||||||
for x in range(65536 / bytesize):
|
for x in range(int(65536 / bytesize)):
|
||||||
buf = f.read(bytesize)
|
buf = f.read(bytesize)
|
||||||
(l_value,) = struct.unpack(longlongformat, buf)
|
(l_value,) = struct.unpack(longlongformat, buf)
|
||||||
hash_value += l_value
|
hash_value += l_value
|
||||||
hash_value = hash_value & 0xFFFFFFFFFFFFFFFF
|
hash_value &= 0xFFFFFFFFFFFFFFFF
|
||||||
|
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,373 +18,284 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit import UnicodeMixin, base_text_type, u, s
|
|
||||||
from guessit.fileutils import load_file_in_same_dir
|
from guessit import UnicodeMixin, base_text_type, u
|
||||||
from guessit.textutils import find_words
|
from guessit.textutils import find_words
|
||||||
from guessit.country import Country
|
from babelfish import Language, Country
|
||||||
|
import babelfish
|
||||||
import re
|
import re
|
||||||
import logging
|
import logging
|
||||||
|
from guessit.guess import Guess
|
||||||
|
|
||||||
__all__ = [ 'is_iso_language', 'is_language', 'lang_set', 'Language',
|
__all__ = ['Language', 'UNDETERMINED',
|
||||||
'ALL_LANGUAGES', 'ALL_LANGUAGES_NAMES', 'UNDETERMINED',
|
'search_language', 'guess_language']
|
||||||
'search_language', 'guess_language' ]
|
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
UNDETERMINED = babelfish.Language('und')
|
||||||
|
|
||||||
# downloaded from http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
|
SYN = {('und', None): ['unknown', 'inconnu', 'unk', 'un'],
|
||||||
#
|
('ell', None): ['gr', 'greek'],
|
||||||
# Description of the fields:
|
('spa', None): ['esp', 'español'],
|
||||||
# "An alpha-3 (bibliographic) code, an alpha-3 (terminologic) code (when given),
|
('fra', None): ['français', 'vf', 'vff', 'vfi'],
|
||||||
# an alpha-2 code (when given), an English name, and a French name of a language
|
('swe', None): ['se'],
|
||||||
# are all separated by pipe (|) characters."
|
('por', 'BR'): ['po', 'pb', 'pob', 'br', 'brazilian'],
|
||||||
_iso639_contents = load_file_in_same_dir(__file__, 'ISO-639-2_utf-8.txt')
|
('cat', None): ['català'],
|
||||||
|
('ces', None): ['cz'],
|
||||||
# drop the BOM from the beginning of the file
|
('ukr', None): ['ua'],
|
||||||
_iso639_contents = _iso639_contents[1:]
|
('zho', None): ['cn'],
|
||||||
|
('jpn', None): ['jp'],
|
||||||
language_matrix = [ l.strip().split('|')
|
('hrv', None): ['scr'],
|
||||||
for l in _iso639_contents.strip().split('\n') ]
|
('mul', None): ['multi', 'dl'], # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/
|
||||||
|
|
||||||
|
|
||||||
# update information in the language matrix
|
|
||||||
language_matrix += [['mol', '', 'mo', 'Moldavian', 'moldave'],
|
|
||||||
['ass', '', '', 'Assyrian', 'assyrien']]
|
|
||||||
|
|
||||||
for lang in language_matrix:
|
|
||||||
# remove unused languages that shadow other common ones with a non-official form
|
|
||||||
if (lang[2] == 'se' or # Northern Sami shadows Swedish
|
|
||||||
lang[2] == 'br'): # Breton shadows Brazilian
|
|
||||||
lang[2] = ''
|
|
||||||
# add missing information
|
|
||||||
if lang[0] == 'und':
|
|
||||||
lang[2] = 'un'
|
|
||||||
if lang[0] == 'srp':
|
|
||||||
lang[1] = 'scc' # from OpenSubtitles
|
|
||||||
|
|
||||||
|
|
||||||
lng3 = frozenset(l[0] for l in language_matrix if l[0])
|
|
||||||
lng3term = frozenset(l[1] for l in language_matrix if l[1])
|
|
||||||
lng2 = frozenset(l[2] for l in language_matrix if l[2])
|
|
||||||
lng_en_name = frozenset(lng for l in language_matrix
|
|
||||||
for lng in l[3].lower().split('; ') if lng)
|
|
||||||
lng_fr_name = frozenset(lng for l in language_matrix
|
|
||||||
for lng in l[4].lower().split('; ') if lng)
|
|
||||||
lng_all_names = lng3 | lng3term | lng2 | lng_en_name | lng_fr_name
|
|
||||||
|
|
||||||
lng3_to_lng3term = dict((l[0], l[1]) for l in language_matrix if l[1])
|
|
||||||
lng3term_to_lng3 = dict((l[1], l[0]) for l in language_matrix if l[1])
|
|
||||||
|
|
||||||
lng3_to_lng2 = dict((l[0], l[2]) for l in language_matrix if l[2])
|
|
||||||
lng2_to_lng3 = dict((l[2], l[0]) for l in language_matrix if l[2])
|
|
||||||
|
|
||||||
# we only return the first given english name, hoping it is the most used one
|
|
||||||
lng3_to_lng_en_name = dict((l[0], l[3].split('; ')[0])
|
|
||||||
for l in language_matrix if l[3])
|
|
||||||
lng_en_name_to_lng3 = dict((en_name.lower(), l[0])
|
|
||||||
for l in language_matrix if l[3]
|
|
||||||
for en_name in l[3].split('; '))
|
|
||||||
|
|
||||||
# we only return the first given french name, hoping it is the most used one
|
|
||||||
lng3_to_lng_fr_name = dict((l[0], l[4].split('; ')[0])
|
|
||||||
for l in language_matrix if l[4])
|
|
||||||
lng_fr_name_to_lng3 = dict((fr_name.lower(), l[0])
|
|
||||||
for l in language_matrix if l[4]
|
|
||||||
for fr_name in l[4].split('; '))
|
|
||||||
|
|
||||||
# contains a list of exceptions: strings that should be parsed as a language
|
|
||||||
# but which are not in an ISO form
|
|
||||||
lng_exceptions = { 'unknown': ('und', None),
|
|
||||||
'inconnu': ('und', None),
|
|
||||||
'unk': ('und', None),
|
|
||||||
'un': ('und', None),
|
|
||||||
'gr': ('gre', None),
|
|
||||||
'greek': ('gre', None),
|
|
||||||
'esp': ('spa', None),
|
|
||||||
'español': ('spa', None),
|
|
||||||
'se': ('swe', None),
|
|
||||||
'po': ('pt', 'br'),
|
|
||||||
'pb': ('pt', 'br'),
|
|
||||||
'pob': ('pt', 'br'),
|
|
||||||
'br': ('pt', 'br'),
|
|
||||||
'brazilian': ('pt', 'br'),
|
|
||||||
'català': ('cat', None),
|
|
||||||
'cz': ('cze', None),
|
|
||||||
'ua': ('ukr', None),
|
|
||||||
'cn': ('chi', None),
|
|
||||||
'chs': ('chi', None),
|
|
||||||
'jp': ('jpn', None),
|
|
||||||
'scr': ('hrv', None)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def is_iso_language(language):
|
class GuessitConverter(babelfish.LanguageReverseConverter):
|
||||||
return language.lower() in lng_all_names
|
|
||||||
|
|
||||||
def is_language(language):
|
|
||||||
return is_iso_language(language) or language in lng_exceptions
|
|
||||||
|
|
||||||
def lang_set(languages, strict=False):
|
|
||||||
"""Return a set of guessit.Language created from their given string
|
|
||||||
representation.
|
|
||||||
|
|
||||||
if strict is True, then this will raise an exception if any language
|
|
||||||
could not be identified.
|
|
||||||
"""
|
|
||||||
return set(Language(l, strict=strict) for l in languages)
|
|
||||||
|
|
||||||
|
|
||||||
class Language(UnicodeMixin):
|
|
||||||
"""This class represents a human language.
|
|
||||||
|
|
||||||
You can initialize it with pretty much anything, as it knows conversion
|
|
||||||
from ISO-639 2-letter and 3-letter codes, English and French names.
|
|
||||||
|
|
||||||
You can also distinguish languages for specific countries, such as
|
|
||||||
Portuguese and Brazilian Portuguese.
|
|
||||||
|
|
||||||
There are various properties on the language object that give you the
|
|
||||||
representation of the language for a specific usage, such as .alpha3
|
|
||||||
to get the ISO 3-letter code, or .opensubtitles to get the OpenSubtitles
|
|
||||||
language code.
|
|
||||||
|
|
||||||
>>> Language('fr')
|
|
||||||
Language(French)
|
|
||||||
|
|
||||||
>>> s(Language('eng').french_name)
|
|
||||||
'anglais'
|
|
||||||
|
|
||||||
>>> s(Language('pt(br)').country.english_name)
|
|
||||||
'Brazil'
|
|
||||||
|
|
||||||
>>> s(Language('Español (Latinoamérica)').country.english_name)
|
|
||||||
'Latin America'
|
|
||||||
|
|
||||||
>>> Language('Spanish (Latin America)') == Language('Español (Latinoamérica)')
|
|
||||||
True
|
|
||||||
|
|
||||||
>>> s(Language('zz', strict=False).english_name)
|
|
||||||
'Undetermined'
|
|
||||||
|
|
||||||
>>> s(Language('pt(br)').opensubtitles)
|
|
||||||
'pob'
|
|
||||||
"""
|
|
||||||
|
|
||||||
_with_country_regexp = re.compile('(.*)\((.*)\)')
|
_with_country_regexp = re.compile('(.*)\((.*)\)')
|
||||||
_with_country_regexp2 = re.compile('(.*)-(.*)')
|
_with_country_regexp2 = re.compile('(.*)-(.*)')
|
||||||
|
|
||||||
def __init__(self, language, country=None, strict=False, scheme=None):
|
def __init__(self):
|
||||||
language = u(language.strip().lower())
|
self.guessit_exceptions = {}
|
||||||
with_country = (Language._with_country_regexp.match(language) or
|
for (alpha3, country), synlist in SYN.items():
|
||||||
Language._with_country_regexp2.match(language))
|
for syn in synlist:
|
||||||
|
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def codes(self):
|
||||||
|
return (babelfish.language_converters['alpha3b'].codes |
|
||||||
|
babelfish.language_converters['alpha2'].codes |
|
||||||
|
babelfish.language_converters['name'].codes |
|
||||||
|
babelfish.language_converters['opensubtitles'].codes |
|
||||||
|
babelfish.country_converters['name'].codes |
|
||||||
|
frozenset(self.guessit_exceptions.keys()))
|
||||||
|
|
||||||
|
def convert(self, alpha3, country=None, script=None):
|
||||||
|
return str(babelfish.Language(alpha3, country, script))
|
||||||
|
|
||||||
|
def reverse(self, name):
|
||||||
|
with_country = (GuessitConverter._with_country_regexp.match(name) or
|
||||||
|
GuessitConverter._with_country_regexp2.match(name))
|
||||||
|
|
||||||
|
name = u(name.lower())
|
||||||
if with_country:
|
if with_country:
|
||||||
self.lang = Language(with_country.group(1)).lang
|
lang = Language.fromguessit(with_country.group(1).strip())
|
||||||
self.country = Country(with_country.group(2))
|
lang.country = babelfish.Country.fromguessit(with_country.group(2).strip())
|
||||||
return
|
return (lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None)
|
||||||
|
|
||||||
self.lang = None
|
# exceptions come first, as they need to override a potential match
|
||||||
self.country = Country(country) if country else None
|
# with any of the other guessers
|
||||||
|
|
||||||
# first look for scheme specific languages
|
|
||||||
if scheme == 'opensubtitles':
|
|
||||||
if language == 'br':
|
|
||||||
self.lang = 'bre'
|
|
||||||
return
|
|
||||||
elif language == 'se':
|
|
||||||
self.lang = 'sme'
|
|
||||||
return
|
|
||||||
elif scheme is not None:
|
|
||||||
log.warning('Unrecognized scheme: "%s" - Proceeding with standard one' % scheme)
|
|
||||||
|
|
||||||
# look for ISO language codes
|
|
||||||
if len(language) == 2:
|
|
||||||
self.lang = lng2_to_lng3.get(language)
|
|
||||||
elif len(language) == 3:
|
|
||||||
self.lang = (language
|
|
||||||
if language in lng3
|
|
||||||
else lng3term_to_lng3.get(language))
|
|
||||||
else:
|
|
||||||
self.lang = (lng_en_name_to_lng3.get(language) or
|
|
||||||
lng_fr_name_to_lng3.get(language))
|
|
||||||
|
|
||||||
# general language exceptions
|
|
||||||
if self.lang is None and language in lng_exceptions:
|
|
||||||
lang, country = lng_exceptions[language]
|
|
||||||
self.lang = Language(lang).alpha3
|
|
||||||
self.country = Country(country) if country else None
|
|
||||||
|
|
||||||
msg = 'The given string "%s" could not be identified as a language' % language
|
|
||||||
|
|
||||||
if self.lang is None and strict:
|
|
||||||
raise ValueError(msg)
|
|
||||||
|
|
||||||
if self.lang is None:
|
|
||||||
log.debug(msg)
|
|
||||||
self.lang = 'und'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def alpha2(self):
|
|
||||||
return lng3_to_lng2[self.lang]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def alpha3(self):
|
|
||||||
return self.lang
|
|
||||||
|
|
||||||
@property
|
|
||||||
def alpha3term(self):
|
|
||||||
return lng3_to_lng3term[self.lang]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def english_name(self):
|
|
||||||
return lng3_to_lng_en_name[self.lang]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def french_name(self):
|
|
||||||
return lng3_to_lng_fr_name[self.lang]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def opensubtitles(self):
|
|
||||||
if self.lang == 'por' and self.country and self.country.alpha2 == 'br':
|
|
||||||
return 'pob'
|
|
||||||
elif self.lang in ['gre', 'srp']:
|
|
||||||
return self.alpha3term
|
|
||||||
return self.alpha3
|
|
||||||
|
|
||||||
@property
|
|
||||||
def tmdb(self):
|
|
||||||
if self.country:
|
|
||||||
return '%s-%s' % (self.alpha2, self.country.alpha2.upper())
|
|
||||||
return self.alpha2
|
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(self.lang)
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
if isinstance(other, Language):
|
|
||||||
return self.lang == other.lang
|
|
||||||
|
|
||||||
if isinstance(other, base_text_type):
|
|
||||||
try:
|
try:
|
||||||
return self == Language(other)
|
return self.guessit_exceptions[name]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
for conv in [babelfish.Language,
|
||||||
|
babelfish.Language.fromalpha3b,
|
||||||
|
babelfish.Language.fromalpha2,
|
||||||
|
babelfish.Language.fromname,
|
||||||
|
babelfish.Language.fromopensubtitles]:
|
||||||
|
try:
|
||||||
|
c = conv(name)
|
||||||
|
return c.alpha3, c.country, c.script
|
||||||
|
except (ValueError, babelfish.LanguageReverseError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
raise babelfish.LanguageReverseError(name)
|
||||||
|
|
||||||
|
|
||||||
|
babelfish.language_converters['guessit'] = GuessitConverter()
|
||||||
|
|
||||||
|
COUNTRIES_SYN = {'ES': ['españa'],
|
||||||
|
'GB': ['UK'],
|
||||||
|
'BR': ['brazilian', 'bra'],
|
||||||
|
# FIXME: this one is a bit of a stretch, not sure how to do
|
||||||
|
# it properly, though...
|
||||||
|
'MX': ['Latinoamérica', 'latin america']
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class GuessitCountryConverter(babelfish.CountryReverseConverter):
|
||||||
|
def __init__(self):
|
||||||
|
self.guessit_exceptions = {}
|
||||||
|
|
||||||
|
for alpha2, synlist in COUNTRIES_SYN.items():
|
||||||
|
for syn in synlist:
|
||||||
|
self.guessit_exceptions[syn.lower()] = alpha2
|
||||||
|
|
||||||
|
@property
|
||||||
|
def codes(self):
|
||||||
|
return (babelfish.country_converters['name'].codes |
|
||||||
|
frozenset(babelfish.COUNTRIES.values()) |
|
||||||
|
frozenset(self.guessit_exceptions.keys()))
|
||||||
|
|
||||||
|
def convert(self, alpha2):
|
||||||
|
if alpha2 == 'GB':
|
||||||
|
return 'UK'
|
||||||
|
return str(Country(alpha2))
|
||||||
|
|
||||||
|
def reverse(self, name):
|
||||||
|
# exceptions come first, as they need to override a potential match
|
||||||
|
# with any of the other guessers
|
||||||
|
try:
|
||||||
|
return self.guessit_exceptions[name.lower()]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
return babelfish.Country(name.upper()).alpha2
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return False
|
pass
|
||||||
|
|
||||||
return False
|
for conv in [babelfish.Country.fromname]:
|
||||||
|
try:
|
||||||
|
return conv(name).alpha2
|
||||||
|
except babelfish.CountryReverseError:
|
||||||
|
pass
|
||||||
|
|
||||||
def __ne__(self, other):
|
raise babelfish.CountryReverseError(name)
|
||||||
return not self == other
|
|
||||||
|
|
||||||
def __nonzero__(self):
|
|
||||||
return self.lang != 'und'
|
|
||||||
|
|
||||||
def __unicode__(self):
|
babelfish.country_converters['guessit'] = GuessitCountryConverter()
|
||||||
if self.country:
|
|
||||||
return '%s(%s)' % (self.english_name, self.country.alpha2)
|
|
||||||
|
# list of common words which could be interpreted as languages, but which
|
||||||
|
# are far too common to be able to say they represent a language in the
|
||||||
|
# middle of a string (where they most likely carry their commmon meaning)
|
||||||
|
LNG_COMMON_WORDS = frozenset([
|
||||||
|
# english words
|
||||||
|
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
|
||||||
|
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
|
||||||
|
'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
|
||||||
|
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb', 'bt',
|
||||||
|
'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt', 'in', 'ad', 'ice', 'ay',
|
||||||
|
# french words
|
||||||
|
'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
|
||||||
|
'mal', 'est', 'vol', 'or', 'mon', 'se', 'je', 'tu', 'me',
|
||||||
|
'ne', 'ma', 'va', 'au',
|
||||||
|
# japanese words,
|
||||||
|
'wa', 'ga', 'ao',
|
||||||
|
# spanish words
|
||||||
|
'la', 'el', 'del', 'por', 'mar',
|
||||||
|
# other
|
||||||
|
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
|
||||||
|
'vi', 'ben', 'da', 'lt', 'ch',
|
||||||
|
# new from babelfish
|
||||||
|
'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
|
||||||
|
'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
|
||||||
|
'job', 'gag', 'reel', 'www', 'for', 'ayu', 'csi', 'ren', 'moi', 'sur',
|
||||||
|
'fer', 'fun', 'two', 'big', 'psy', 'air',
|
||||||
|
# movie title
|
||||||
|
'brazil',
|
||||||
|
# release groups
|
||||||
|
'bs', # Bosnian
|
||||||
|
'kz',
|
||||||
|
# countries
|
||||||
|
'gt', 'lt',
|
||||||
|
# part/pt
|
||||||
|
'pt'
|
||||||
|
])
|
||||||
|
|
||||||
|
LNG_COMMON_WORDS_STRICT = frozenset(['brazil'])
|
||||||
|
|
||||||
|
|
||||||
|
subtitle_prefixes = ['sub', 'subs', 'st', 'vost', 'subforced', 'fansub', 'hardsub']
|
||||||
|
subtitle_suffixes = ['subforced', 'fansub', 'hardsub']
|
||||||
|
lang_prefixes = ['true']
|
||||||
|
|
||||||
|
|
||||||
|
def find_possible_languages(string, allowed_languages=None):
|
||||||
|
"""Find possible languages in the string
|
||||||
|
|
||||||
|
:return: list of tuple (property, Language, lang_word, word)
|
||||||
|
"""
|
||||||
|
|
||||||
|
common_words = None
|
||||||
|
if allowed_languages:
|
||||||
|
common_words = LNG_COMMON_WORDS_STRICT
|
||||||
else:
|
else:
|
||||||
return self.english_name
|
common_words = LNG_COMMON_WORDS
|
||||||
|
|
||||||
def __repr__(self):
|
words = find_words(string)
|
||||||
if self.country:
|
|
||||||
return 'Language(%s, country=%s)' % (self.english_name, self.country)
|
valid_words = []
|
||||||
else:
|
for word in words:
|
||||||
return 'Language(%s)' % self.english_name
|
lang_word = word.lower()
|
||||||
|
key = 'language'
|
||||||
|
for prefix in subtitle_prefixes:
|
||||||
|
if lang_word.startswith(prefix):
|
||||||
|
lang_word = lang_word[len(prefix):]
|
||||||
|
key = 'subtitleLanguage'
|
||||||
|
for suffix in subtitle_suffixes:
|
||||||
|
if lang_word.endswith(suffix):
|
||||||
|
lang_word = lang_word[:len(suffix)]
|
||||||
|
key = 'subtitleLanguage'
|
||||||
|
for prefix in lang_prefixes:
|
||||||
|
if lang_word.startswith(prefix):
|
||||||
|
lang_word = lang_word[len(prefix):]
|
||||||
|
if lang_word not in common_words:
|
||||||
|
try:
|
||||||
|
lang = Language.fromguessit(lang_word)
|
||||||
|
if allowed_languages:
|
||||||
|
if lang.name.lower() in allowed_languages or lang.alpha2.lower() in allowed_languages or lang.alpha3.lower() in allowed_languages:
|
||||||
|
valid_words.append((key, lang, lang_word, word))
|
||||||
|
# Keep language with alpha2 equivalent. Others are probably
|
||||||
|
# uncommon languages.
|
||||||
|
elif lang == 'mul' or hasattr(lang, 'alpha2'):
|
||||||
|
valid_words.append((key, lang, lang_word, word))
|
||||||
|
except babelfish.Error:
|
||||||
|
pass
|
||||||
|
return valid_words
|
||||||
|
|
||||||
|
|
||||||
UNDETERMINED = Language('und')
|
def search_language(string, allowed_languages=None):
|
||||||
ALL_LANGUAGES = frozenset(Language(lng) for lng in lng_all_names) - frozenset([UNDETERMINED])
|
|
||||||
ALL_LANGUAGES_NAMES = lng_all_names
|
|
||||||
|
|
||||||
def search_language(string, lang_filter=None, skip=None):
|
|
||||||
"""Looks for language patterns, and if found return the language object,
|
"""Looks for language patterns, and if found return the language object,
|
||||||
its group span and an associated confidence.
|
its group span and an associated confidence.
|
||||||
|
|
||||||
you can specify a list of allowed languages using the lang_filter argument,
|
you can specify a list of allowed languages using the lang_filter argument,
|
||||||
as in lang_filter = [ 'fr', 'eng', 'spanish' ]
|
as in lang_filter = [ 'fr', 'eng', 'spanish' ]
|
||||||
|
|
||||||
>>> search_language('movie [en].avi')
|
>>> search_language('movie [en].avi')['language']
|
||||||
(Language(English), (7, 9), 0.8)
|
<Language [en]>
|
||||||
|
|
||||||
|
>>> search_language('the zen fat cat and the gay mad men got a new fan', allowed_languages = ['en', 'fr', 'es'])
|
||||||
|
|
||||||
>>> search_language('the zen fat cat and the gay mad men got a new fan', lang_filter = ['en', 'fr', 'es'])
|
|
||||||
(None, None, None)
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# list of common words which could be interpreted as languages, but which
|
if allowed_languages:
|
||||||
# are far too common to be able to say they represent a language in the
|
allowed_languages = set(Language.fromguessit(lang) for lang in allowed_languages)
|
||||||
# middle of a string (where they most likely carry their commmon meaning)
|
|
||||||
lng_common_words = frozenset([
|
|
||||||
# english words
|
|
||||||
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
|
|
||||||
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
|
|
||||||
'fry', 'cop', 'zen', 'gay', 'fat', 'cherokee', 'got', 'an', 'as',
|
|
||||||
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi',
|
|
||||||
# french words
|
|
||||||
'bas', 'de', 'le', 'son', 'vo', 'vf', 'ne', 'ca', 'ce', 'et', 'que',
|
|
||||||
'mal', 'est', 'vol', 'or', 'mon', 'se',
|
|
||||||
# spanish words
|
|
||||||
'la', 'el', 'del', 'por', 'mar',
|
|
||||||
# other
|
|
||||||
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
|
|
||||||
'vi', 'ben', 'da', 'lt'
|
|
||||||
])
|
|
||||||
sep = r'[](){} \._-+'
|
|
||||||
|
|
||||||
if lang_filter:
|
|
||||||
lang_filter = lang_set(lang_filter)
|
|
||||||
|
|
||||||
slow = ' %s ' % string.lower()
|
|
||||||
confidence = 1.0 # for all of them
|
confidence = 1.0 # for all of them
|
||||||
|
|
||||||
for lang in set(find_words(slow)) & lng_all_names:
|
for prop, language, lang, word in find_possible_languages(string, allowed_languages):
|
||||||
|
pos = string.find(word)
|
||||||
if lang in lng_common_words:
|
end = pos + len(word)
|
||||||
continue
|
|
||||||
|
|
||||||
pos = slow.find(lang)
|
|
||||||
|
|
||||||
if pos != -1:
|
|
||||||
end = pos + len(lang)
|
|
||||||
|
|
||||||
# skip if span in in skip list
|
|
||||||
while skip and (pos - 1, end - 1) in skip:
|
|
||||||
pos = slow.find(lang, end)
|
|
||||||
if pos == -1:
|
|
||||||
continue
|
|
||||||
end = pos + len(lang)
|
|
||||||
if pos == -1:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# make sure our word is always surrounded by separators
|
|
||||||
if slow[pos - 1] not in sep or slow[end] not in sep:
|
|
||||||
continue
|
|
||||||
|
|
||||||
language = Language(slow[pos:end])
|
|
||||||
if lang_filter and language not in lang_filter:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# only allow those languages that have a 2-letter code, those that
|
# only allow those languages that have a 2-letter code, those that
|
||||||
# don't are too esoteric and probably false matches
|
# don't are too esoteric and probably false matches
|
||||||
if language.lang not in lng3_to_lng2:
|
# if language.lang not in lng3_to_lng2:
|
||||||
continue
|
# continue
|
||||||
|
|
||||||
# confidence depends on lng2, lng3, english name, ...
|
# confidence depends on alpha2, alpha3, english name, ...
|
||||||
if len(lang) == 2:
|
if len(lang) == 2:
|
||||||
confidence = 0.8
|
confidence = 0.8
|
||||||
elif len(lang) == 3:
|
elif len(lang) == 3:
|
||||||
confidence = 0.9
|
confidence = 0.9
|
||||||
|
elif prop == 'subtitleLanguage':
|
||||||
|
confidence = 0.6 # Subtitle prefix found with language
|
||||||
else:
|
else:
|
||||||
# Note: we could either be really confident that we found a
|
# Note: we could either be really confident that we found a
|
||||||
# language or assume that full language names are too
|
# language or assume that full language names are too
|
||||||
# common words and lower their confidence accordingly
|
# common words and lower their confidence accordingly
|
||||||
confidence = 0.3 # going with the low-confidence route here
|
confidence = 0.3 # going with the low-confidence route here
|
||||||
|
|
||||||
return language, (pos - 1, end - 1), confidence
|
return Guess({prop: language}, confidence=confidence, input=string, span=(pos, end))
|
||||||
|
|
||||||
return None, None, None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def guess_language(text):
|
def guess_language(text): # pragma: no cover
|
||||||
"""Guess the language in which a body of text is written.
|
"""Guess the language in which a body of text is written.
|
||||||
|
|
||||||
This uses the external guess-language python module, and will fail and return
|
This uses the external guess-language python module, and will fail and return
|
||||||
|
|
@ -392,7 +303,7 @@ def guess_language(text):
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from guess_language import guessLanguage
|
from guess_language import guessLanguage
|
||||||
return Language(guessLanguage(text))
|
return Language.fromguessit(guessLanguage(text))
|
||||||
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
log.error('Cannot detect the language of the given text body, missing dependency: guess-language')
|
log.error('Cannot detect the language of the given text body, missing dependency: guess-language')
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,8 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,29 +19,36 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, \
|
||||||
from guessit import PY3, u, base_text_type
|
unicode_literals
|
||||||
from guessit.matchtree import MatchTree
|
|
||||||
from guessit.textutils import normalize_unicode, clean_string
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from guessit import PY3, u
|
||||||
|
from guessit.transfo import TransformerException
|
||||||
|
from guessit.matchtree import MatchTree
|
||||||
|
from guessit.textutils import normalize_unicode, clean_default
|
||||||
|
from guessit.guess import Guess
|
||||||
|
import inspect
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class IterativeMatcher(object):
|
class IterativeMatcher(object):
|
||||||
def __init__(self, filename, filetype='autodetect', opts=None, transfo_opts=None):
|
|
||||||
"""An iterative matcher tries to match different patterns that appear
|
"""An iterative matcher tries to match different patterns that appear
|
||||||
in the filename.
|
in the filename.
|
||||||
|
|
||||||
The 'filetype' argument indicates which type of file you want to match.
|
The ``filetype`` argument indicates which type of file you want to match.
|
||||||
If it is 'autodetect', the matcher will try to see whether it can guess
|
If it is undefined, the matcher will try to see whether it can guess
|
||||||
that the file corresponds to an episode, or otherwise will assume it is
|
that the file corresponds to an episode, or otherwise will assume it is
|
||||||
a movie.
|
a movie.
|
||||||
|
|
||||||
The recognized 'filetype' values are:
|
The recognized ``filetype`` values are:
|
||||||
[ autodetect, subtitle, info, movie, moviesubtitle, movieinfo, episode,
|
``['subtitle', 'info', 'movie', 'moviesubtitle', 'movieinfo', 'episode',
|
||||||
episodesubtitle, episodeinfo ]
|
'episodesubtitle', 'episodeinfo']``
|
||||||
|
|
||||||
|
``options`` is a dict of options values to be passed to the transformations used
|
||||||
|
by the matcher.
|
||||||
|
|
||||||
The IterativeMatcher works mainly in 2 steps:
|
The IterativeMatcher works mainly in 2 steps:
|
||||||
|
|
||||||
|
|
@ -48,7 +56,7 @@ class IterativeMatcher(object):
|
||||||
which have a semantic meaning, such as episode number, movie title,
|
which have a semantic meaning, such as episode number, movie title,
|
||||||
etc...
|
etc...
|
||||||
|
|
||||||
The match_tree created looks like the following:
|
The match_tree created looks like the following::
|
||||||
|
|
||||||
0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111
|
0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111
|
||||||
0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000
|
0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000
|
||||||
|
|
@ -58,123 +66,241 @@ class IterativeMatcher(object):
|
||||||
[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv
|
[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv
|
||||||
|
|
||||||
The first 3 lines indicates the group index in which a char in the
|
The first 3 lines indicates the group index in which a char in the
|
||||||
filename is located. So for instance, x264 is the group (0, 4, 1), and
|
filename is located. So for instance, ``x264`` (in the middle) is the group (0, 4, 1), and
|
||||||
it corresponds to a video codec, denoted by the letter'v' in the 4th line.
|
it corresponds to a video codec, denoted by the letter ``v`` in the 4th line.
|
||||||
(for more info, see guess.matchtree.to_string)
|
(for more info, see guess.matchtree.to_string)
|
||||||
|
|
||||||
Second, it tries to merge all this information into a single object
|
Second, it tries to merge all this information into a single object
|
||||||
containing all the found properties, and does some (basic) conflict
|
containing all the found properties, and does some (basic) conflict
|
||||||
resolution when they arise.
|
resolution when they arise.
|
||||||
|
|
||||||
|
|
||||||
When you create the Matcher, you can pass it:
|
|
||||||
- a list 'opts' of option names, that act as global flags
|
|
||||||
- a dict 'transfo_opts' of { transfo_name: (transfo_args, transfo_kwargs) }
|
|
||||||
with which to call the transfo.process() function.
|
|
||||||
"""
|
"""
|
||||||
|
def __init__(self, filename, options=None, **kwargs):
|
||||||
valid_filetypes = ('autodetect', 'subtitle', 'info', 'video',
|
options = dict(options or {})
|
||||||
'movie', 'moviesubtitle', 'movieinfo',
|
for k, v in kwargs.items():
|
||||||
'episode', 'episodesubtitle', 'episodeinfo')
|
if k not in options or not options[k]:
|
||||||
if filetype not in valid_filetypes:
|
options[k] = v # options dict has priority over keyword arguments
|
||||||
raise ValueError("filetype needs to be one of %s" % valid_filetypes)
|
self._validate_options(options)
|
||||||
if not PY3 and not isinstance(filename, unicode):
|
if not PY3 and not isinstance(filename, unicode):
|
||||||
log.warning('Given filename to matcher is not unicode...')
|
log.warning('Given filename to matcher is not unicode...')
|
||||||
filename = filename.decode('utf-8')
|
filename = filename.decode('utf-8')
|
||||||
|
|
||||||
filename = normalize_unicode(filename)
|
filename = normalize_unicode(filename)
|
||||||
|
if options and options.get('clean_function'):
|
||||||
|
clean_function = options.get('clean_function')
|
||||||
|
if not hasattr(clean_function, '__call__'):
|
||||||
|
module, function = clean_function.rsplit('.')
|
||||||
|
if not module:
|
||||||
|
module = 'guessit.textutils'
|
||||||
|
clean_function = getattr(__import__(module), function)
|
||||||
|
if not clean_function:
|
||||||
|
log.error('Can\'t find clean function %s. Default will be used.' % options.get('clean_function'))
|
||||||
|
clean_function = clean_default
|
||||||
|
else:
|
||||||
|
clean_function = clean_default
|
||||||
|
|
||||||
if opts is None:
|
self.match_tree = MatchTree(filename, clean_function=clean_function)
|
||||||
opts = []
|
self.options = options
|
||||||
if not isinstance(opts, list):
|
self._transfo_calls = []
|
||||||
raise ValueError('opts must be a list of option names! Received: type=%s val=%s',
|
|
||||||
type(opts), opts)
|
|
||||||
|
|
||||||
if transfo_opts is None:
|
|
||||||
transfo_opts = {}
|
|
||||||
if not isinstance(transfo_opts, dict):
|
|
||||||
raise ValueError('transfo_opts must be a dict of { transfo_name: (args, kwargs) }. '+
|
|
||||||
'Received: type=%s val=%s', type(transfo_opts), transfo_opts)
|
|
||||||
|
|
||||||
self.match_tree = MatchTree(filename)
|
|
||||||
|
|
||||||
# sanity check: make sure we don't process a (mostly) empty string
|
# sanity check: make sure we don't process a (mostly) empty string
|
||||||
if clean_string(filename) == '':
|
if clean_function(filename).strip() == '':
|
||||||
return
|
return
|
||||||
|
|
||||||
|
from guessit.plugins import transformers
|
||||||
|
|
||||||
|
try:
|
||||||
mtree = self.match_tree
|
mtree = self.match_tree
|
||||||
mtree.guess.set('type', filetype, confidence=1.0)
|
if 'type' in self.options:
|
||||||
|
mtree.guess.set('type', self.options['type'], confidence=0.0)
|
||||||
|
|
||||||
def apply_transfo(transfo_name, *args, **kwargs):
|
# Process
|
||||||
transfo = __import__('guessit.transfo.' + transfo_name,
|
for transformer in transformers.all_transformers():
|
||||||
globals=globals(), locals=locals(),
|
disabled = options.get('disabled_transformers')
|
||||||
fromlist=['process'], level=0)
|
if not disabled or transformer.name not in disabled:
|
||||||
default_args, default_kwargs = transfo_opts.get(transfo_name, ((), {}))
|
self._process(transformer, False)
|
||||||
all_args = args or default_args
|
|
||||||
all_kwargs = dict(default_kwargs)
|
|
||||||
all_kwargs.update(kwargs) # keep all kwargs merged together
|
|
||||||
transfo.process(mtree, *all_args, **all_kwargs)
|
|
||||||
|
|
||||||
# 1- first split our path into dirs + basename + ext
|
# Post-process
|
||||||
apply_transfo('split_path_components')
|
for transformer in transformers.all_transformers():
|
||||||
|
disabled = options.get('disabled_transformers')
|
||||||
# 2- guess the file type now (will be useful later)
|
if not disabled or transformer.name not in disabled:
|
||||||
apply_transfo('guess_filetype', filetype)
|
self._process(transformer, True)
|
||||||
if mtree.guess['type'] == 'unknown':
|
|
||||||
return
|
|
||||||
|
|
||||||
# 3- split each of those into explicit groups (separated by parentheses
|
|
||||||
# or square brackets)
|
|
||||||
apply_transfo('split_explicit_groups')
|
|
||||||
|
|
||||||
# 4- try to match information for specific patterns
|
|
||||||
# NOTE: order needs to comply to the following:
|
|
||||||
# - website before language (eg: tvu.org.ru vs russian)
|
|
||||||
# - language before episodes_rexps
|
|
||||||
# - properties before language (eg: he-aac vs hebrew)
|
|
||||||
# - release_group before properties (eg: XviD-?? vs xvid)
|
|
||||||
if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'):
|
|
||||||
strategy = [ 'guess_date', 'guess_website', 'guess_release_group',
|
|
||||||
'guess_properties', 'guess_language',
|
|
||||||
'guess_video_rexps',
|
|
||||||
'guess_episodes_rexps', 'guess_weak_episodes_rexps' ]
|
|
||||||
else:
|
|
||||||
strategy = [ 'guess_date', 'guess_website', 'guess_release_group',
|
|
||||||
'guess_properties', 'guess_language',
|
|
||||||
'guess_video_rexps' ]
|
|
||||||
|
|
||||||
if 'nolanguage' in opts:
|
|
||||||
strategy.remove('guess_language')
|
|
||||||
|
|
||||||
|
|
||||||
for name in strategy:
|
|
||||||
apply_transfo(name)
|
|
||||||
|
|
||||||
# more guessers for both movies and episodes
|
|
||||||
apply_transfo('guess_bonus_features')
|
|
||||||
apply_transfo('guess_year', skip_first_year=('skip_first_year' in opts))
|
|
||||||
|
|
||||||
if 'nocountry' not in opts:
|
|
||||||
apply_transfo('guess_country')
|
|
||||||
|
|
||||||
apply_transfo('guess_idnumber')
|
|
||||||
|
|
||||||
|
|
||||||
# split into '-' separated subgroups (with required separator chars
|
|
||||||
# around the dash)
|
|
||||||
apply_transfo('split_on_dash')
|
|
||||||
|
|
||||||
# 5- try to identify the remaining unknown groups by looking at their
|
|
||||||
# position relative to other known elements
|
|
||||||
if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'):
|
|
||||||
apply_transfo('guess_episode_info_from_position')
|
|
||||||
else:
|
|
||||||
apply_transfo('guess_movie_title_from_position')
|
|
||||||
|
|
||||||
# 6- perform some post-processing steps
|
|
||||||
apply_transfo('post_process')
|
|
||||||
|
|
||||||
log.debug('Found match tree:\n%s' % u(mtree))
|
log.debug('Found match tree:\n%s' % u(mtree))
|
||||||
|
except TransformerException as e:
|
||||||
|
log.debug('An error has occurred in Transformer %s: %s' % (e.transformer, e))
|
||||||
|
|
||||||
|
def _process(self, transformer, post=False):
|
||||||
|
|
||||||
|
if not hasattr(transformer, 'should_process') or transformer.should_process(self.match_tree, self.options):
|
||||||
|
if post:
|
||||||
|
transformer.post_process(self.match_tree, self.options)
|
||||||
|
else:
|
||||||
|
transformer.process(self.match_tree, self.options)
|
||||||
|
self._transfo_calls.append(transformer)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def second_pass_options(self):
|
||||||
|
second_pass_options = {}
|
||||||
|
for transformer in self._transfo_calls:
|
||||||
|
if hasattr(transformer, 'second_pass_options'):
|
||||||
|
transformer_second_pass_options = transformer.second_pass_options(self.match_tree, self.options)
|
||||||
|
if transformer_second_pass_options:
|
||||||
|
second_pass_options.update(transformer_second_pass_options)
|
||||||
|
|
||||||
|
return second_pass_options
|
||||||
|
|
||||||
|
def _validate_options(self, options):
|
||||||
|
valid_filetypes = ('subtitle', 'info', 'video',
|
||||||
|
'movie', 'moviesubtitle', 'movieinfo',
|
||||||
|
'episode', 'episodesubtitle', 'episodeinfo')
|
||||||
|
|
||||||
|
type_ = options.get('type')
|
||||||
|
if type_ and type_ not in valid_filetypes:
|
||||||
|
raise ValueError("filetype needs to be one of %s" % (valid_filetypes,))
|
||||||
|
|
||||||
def matched(self):
|
def matched(self):
|
||||||
return self.match_tree.matched()
|
return self.match_tree.matched()
|
||||||
|
|
||||||
|
|
||||||
|
def build_guess(node, name, value=None, confidence=1.0):
|
||||||
|
guess = Guess({name: node.clean_value if value is None else value}, confidence=confidence)
|
||||||
|
guess.metadata().input = node.value if value is None else value
|
||||||
|
if value is None:
|
||||||
|
left_offset = 0
|
||||||
|
right_offset = 0
|
||||||
|
|
||||||
|
clean_value = node.clean_value
|
||||||
|
|
||||||
|
for i in range(0, len(node.value)):
|
||||||
|
if clean_value[0] == node.value[i]:
|
||||||
|
break
|
||||||
|
left_offset += 1
|
||||||
|
|
||||||
|
for i in reversed(range(0, len(node.value))):
|
||||||
|
if clean_value[-1] == node.value[i]:
|
||||||
|
break
|
||||||
|
right_offset += 1
|
||||||
|
|
||||||
|
guess.metadata().span = (node.span[0] - node.offset + left_offset, node.span[1] - node.offset - right_offset)
|
||||||
|
return guess
|
||||||
|
|
||||||
|
|
||||||
|
def found_property(node, name, value=None, confidence=1.0, update_guess=True, logger=None):
|
||||||
|
# automatically retrieve the log object from the caller frame
|
||||||
|
if not logger:
|
||||||
|
caller_frame = inspect.stack()[1][0]
|
||||||
|
logger = caller_frame.f_locals['self'].log
|
||||||
|
guess = build_guess(node, name, value, confidence)
|
||||||
|
return found_guess(node, guess, update_guess=update_guess, logger=logger)
|
||||||
|
|
||||||
|
|
||||||
|
def found_guess(node, guess, update_guess=True, logger=None):
|
||||||
|
if node.guess:
|
||||||
|
if update_guess:
|
||||||
|
node.guess.update_highest_confidence(guess)
|
||||||
|
else:
|
||||||
|
child = node.add_child(guess.metadata().span)
|
||||||
|
child.guess = guess
|
||||||
|
else:
|
||||||
|
node.guess = guess
|
||||||
|
log_found_guess(guess, logger)
|
||||||
|
return node.guess
|
||||||
|
|
||||||
|
|
||||||
|
def log_found_guess(guess, logger=None):
|
||||||
|
for k, v in guess.items():
|
||||||
|
(logger or log).debug('Property found: %s=%s (%s) (confidence=%.2f)' %
|
||||||
|
(k, v, guess.raw(k), guess.confidence(k)))
|
||||||
|
|
||||||
|
|
||||||
|
def _get_split_spans(node, span):
|
||||||
|
partition_spans = node.get_partition_spans(span)
|
||||||
|
for to_remove_span in partition_spans:
|
||||||
|
if to_remove_span[0] == span[0] and to_remove_span[1] in [span[1], span[1] + 1]:
|
||||||
|
partition_spans.remove(to_remove_span)
|
||||||
|
break
|
||||||
|
return partition_spans
|
||||||
|
|
||||||
|
|
||||||
|
class GuessFinder(object):
|
||||||
|
def __init__(self, guess_func, confidence=None, logger=None, options=None):
|
||||||
|
self.guess_func = guess_func
|
||||||
|
self.confidence = confidence
|
||||||
|
self.logger = logger or log
|
||||||
|
self.options = options
|
||||||
|
|
||||||
|
def process_nodes(self, nodes):
|
||||||
|
for node in nodes:
|
||||||
|
self.process_node(node)
|
||||||
|
|
||||||
|
def process_node(self, node, iterative=True, partial_span=None):
|
||||||
|
if partial_span:
|
||||||
|
value = node.value[partial_span[0]:partial_span[1]]
|
||||||
|
else:
|
||||||
|
value = node.value
|
||||||
|
string = ' %s ' % value # add sentinels
|
||||||
|
|
||||||
|
if not self.options:
|
||||||
|
matcher_result = self.guess_func(string, node)
|
||||||
|
else:
|
||||||
|
matcher_result = self.guess_func(string, node, self.options)
|
||||||
|
|
||||||
|
if matcher_result:
|
||||||
|
if not isinstance(matcher_result, Guess):
|
||||||
|
result, span = matcher_result
|
||||||
|
else:
|
||||||
|
result, span = matcher_result, matcher_result.metadata().span
|
||||||
|
|
||||||
|
if result:
|
||||||
|
# readjust span to compensate for sentinels
|
||||||
|
span = (span[0] - 1, span[1] - 1)
|
||||||
|
|
||||||
|
# readjust span to compensate for partial_span
|
||||||
|
if partial_span:
|
||||||
|
span = (span[0] + partial_span[0], span[1] + partial_span[0])
|
||||||
|
|
||||||
|
partition_spans = None
|
||||||
|
if self.options and 'skip_nodes' in self.options:
|
||||||
|
skip_nodes = self.options.get('skip_nodes')
|
||||||
|
for skip_node in skip_nodes:
|
||||||
|
if skip_node.parent.node_idx == node.node_idx[:len(skip_node.parent.node_idx)] and\
|
||||||
|
skip_node.span == span or\
|
||||||
|
skip_node.span == (span[0] + skip_node.offset, span[1] + skip_node.offset):
|
||||||
|
if partition_spans is None:
|
||||||
|
partition_spans = _get_split_spans(node, skip_node.span)
|
||||||
|
else:
|
||||||
|
new_partition_spans = []
|
||||||
|
for partition_span in partition_spans:
|
||||||
|
tmp_node = MatchTree(value, span=partition_span, parent=node)
|
||||||
|
tmp_partitions_spans = _get_split_spans(tmp_node, skip_node.span)
|
||||||
|
new_partition_spans.extend(tmp_partitions_spans)
|
||||||
|
partition_spans.extend(new_partition_spans)
|
||||||
|
|
||||||
|
if not partition_spans:
|
||||||
|
# restore sentinels compensation
|
||||||
|
|
||||||
|
if isinstance(result, Guess):
|
||||||
|
guess = result
|
||||||
|
else:
|
||||||
|
guess = Guess(result, confidence=self.confidence, input=string, span=span)
|
||||||
|
|
||||||
|
if not iterative:
|
||||||
|
found_guess(node, guess, logger=self.logger)
|
||||||
|
else:
|
||||||
|
absolute_span = (span[0] + node.offset, span[1] + node.offset)
|
||||||
|
node.partition(span)
|
||||||
|
if node.is_leaf():
|
||||||
|
found_guess(node, guess, logger=self.logger)
|
||||||
|
else:
|
||||||
|
found_child = None
|
||||||
|
for child in node.children:
|
||||||
|
if child.span == absolute_span:
|
||||||
|
found_guess(child, guess, logger=self.logger)
|
||||||
|
found_child = child
|
||||||
|
break
|
||||||
|
for child in node.children:
|
||||||
|
if child is not found_child:
|
||||||
|
self.process_node(child)
|
||||||
|
else:
|
||||||
|
for partition_span in partition_spans:
|
||||||
|
self.process_node(node, partial_span=partition_span)
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,12 +18,15 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit import UnicodeMixin, base_text_type, Guess
|
|
||||||
from guessit.textutils import clean_string, str_fill
|
import guessit # @UnusedImport needed for doctests
|
||||||
|
from guessit import UnicodeMixin, base_text_type
|
||||||
|
from guessit.textutils import clean_default, str_fill
|
||||||
from guessit.patterns import group_delimiters
|
from guessit.patterns import group_delimiters
|
||||||
from guessit.guess import (merge_similar_guesses, merge_all,
|
from guessit.guess import (merge_similar_guesses, smart_merge,
|
||||||
choose_int, choose_string)
|
choose_int, choose_string, Guess)
|
||||||
|
from itertools import takewhile
|
||||||
import copy
|
import copy
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
@ -31,23 +34,71 @@ log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class BaseMatchTree(UnicodeMixin):
|
class BaseMatchTree(UnicodeMixin):
|
||||||
"""A MatchTree represents the hierarchical split of a string into its
|
"""A BaseMatchTree is a tree covering the filename, where each
|
||||||
constituent semantic groups."""
|
node represents a substring in the filename and can have a ``Guess``
|
||||||
|
associated with it that contains the information that has been guessed
|
||||||
|
in this node. Nodes can be further split into subnodes until a proper
|
||||||
|
split has been found.
|
||||||
|
|
||||||
def __init__(self, string='', span=None, parent=None):
|
Each node has the following attributes:
|
||||||
|
- string = the original string of which this node represents a region
|
||||||
|
- span = a pair of (begin, end) indices delimiting the substring
|
||||||
|
- parent = parent node
|
||||||
|
- children = list of children nodes
|
||||||
|
- guess = Guess()
|
||||||
|
|
||||||
|
BaseMatchTrees are displayed in the following way:
|
||||||
|
|
||||||
|
>>> path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
|
||||||
|
>>> print(guessit.IterativeMatcher(path).match_tree)
|
||||||
|
000000 1111111111111111 2222222222222222222222222222222222222222222 333
|
||||||
|
000000 0000000000111111 0000000000111111222222222222222222222222222 000
|
||||||
|
011112 011112000011111222222222222222222 000
|
||||||
|
011112222222222222
|
||||||
|
0000011112222
|
||||||
|
01112 0111
|
||||||
|
Movies/__________(____)/Dark.City.(____).DC._____.____.___.____-___.___
|
||||||
|
tttttttttt yyyy yyyy fffff ssss aaa vvvv rrr ccc
|
||||||
|
Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
|
||||||
|
|
||||||
|
The last line contains the filename, which you can use a reference.
|
||||||
|
The previous line contains the type of property that has been found.
|
||||||
|
The line before that contains the filename, where all the found groups
|
||||||
|
have been blanked. Basically, what is left on this line are the leftover
|
||||||
|
groups which could not be identified.
|
||||||
|
|
||||||
|
The lines before that indicate the indices of the groups in the tree.
|
||||||
|
|
||||||
|
For instance, the part of the filename 'BDRip' is the leaf with index
|
||||||
|
``(2, 2, 1)`` (read from top to bottom), and its meaning is 'format'
|
||||||
|
(as shown by the ``f``'s on the last-but-one line).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, string='', span=None, parent=None, clean_function=None):
|
||||||
self.string = string
|
self.string = string
|
||||||
self.span = span or (0, len(string))
|
self.span = span or (0, len(string))
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.children = []
|
self.children = []
|
||||||
self.guess = Guess()
|
self.guess = Guess()
|
||||||
|
self._clean_value = None
|
||||||
|
self._clean_function = clean_function or clean_default
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def value(self):
|
def value(self):
|
||||||
|
"""Return the substring that this node matches."""
|
||||||
return self.string[self.span[0]:self.span[1]]
|
return self.string[self.span[0]:self.span[1]]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def clean_value(self):
|
def clean_value(self):
|
||||||
return clean_string(self.value)
|
"""Return a cleaned value of the matched substring, with better
|
||||||
|
presentation formatting (punctuation marks removed, duplicate
|
||||||
|
spaces, ...)"""
|
||||||
|
if self._clean_value is None:
|
||||||
|
self._clean_value = self.clean_string(self.value)
|
||||||
|
return self._clean_value
|
||||||
|
|
||||||
|
def clean_string(self, string):
|
||||||
|
return self._clean_function(string)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def offset(self):
|
def offset(self):
|
||||||
|
|
@ -55,6 +106,8 @@ class BaseMatchTree(UnicodeMixin):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def info(self):
|
def info(self):
|
||||||
|
"""Return a dict containing all the info guessed by this node,
|
||||||
|
subnodes included."""
|
||||||
result = dict(self.guess)
|
result = dict(self.guess)
|
||||||
|
|
||||||
for c in self.children:
|
for c in self.children:
|
||||||
|
|
@ -64,6 +117,7 @@ class BaseMatchTree(UnicodeMixin):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def root(self):
|
def root(self):
|
||||||
|
"""Return the root node of the tree."""
|
||||||
if not self.parent:
|
if not self.parent:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
@ -71,28 +125,43 @@ class BaseMatchTree(UnicodeMixin):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def depth(self):
|
def depth(self):
|
||||||
|
"""Return the depth of this node."""
|
||||||
if self.is_leaf():
|
if self.is_leaf():
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
return 1 + max(c.depth for c in self.children)
|
return 1 + max(c.depth for c in self.children)
|
||||||
|
|
||||||
def is_leaf(self):
|
def is_leaf(self):
|
||||||
|
"""Return whether this node is a leaf or not."""
|
||||||
return self.children == []
|
return self.children == []
|
||||||
|
|
||||||
def add_child(self, span):
|
def add_child(self, span):
|
||||||
child = MatchTree(self.string, span=span, parent=self)
|
"""Add a new child node to this node with the given span."""
|
||||||
|
child = MatchTree(self.string, span=span, parent=self, clean_function=self._clean_function)
|
||||||
self.children.append(child)
|
self.children.append(child)
|
||||||
|
return child
|
||||||
|
|
||||||
def partition(self, indices):
|
def get_partition_spans(self, indices):
|
||||||
|
"""Return the list of absolute spans for the regions of the original
|
||||||
|
string defined by splitting this node at the given indices (relative
|
||||||
|
to this node)"""
|
||||||
indices = sorted(indices)
|
indices = sorted(indices)
|
||||||
if indices[0] != 0:
|
if indices[0] != 0:
|
||||||
indices.insert(0, 0)
|
indices.insert(0, 0)
|
||||||
if indices[-1] != len(self.value):
|
if indices[-1] != len(self.value):
|
||||||
indices.append(len(self.value))
|
indices.append(len(self.value))
|
||||||
|
|
||||||
|
spans = []
|
||||||
for start, end in zip(indices[:-1], indices[1:]):
|
for start, end in zip(indices[:-1], indices[1:]):
|
||||||
self.add_child(span=(self.offset + start,
|
spans.append((self.offset + start,
|
||||||
self.offset + end))
|
self.offset + end))
|
||||||
|
return spans
|
||||||
|
|
||||||
|
def partition(self, indices):
|
||||||
|
"""Partition this node by splitting it at the given indices,
|
||||||
|
relative to this node."""
|
||||||
|
for partition_span in self.get_partition_spans(indices):
|
||||||
|
self.add_child(span=partition_span)
|
||||||
|
|
||||||
def split_on_components(self, components):
|
def split_on_components(self, components):
|
||||||
offset = 0
|
offset = 0
|
||||||
|
|
@ -104,6 +173,7 @@ class BaseMatchTree(UnicodeMixin):
|
||||||
offset = end
|
offset = end
|
||||||
|
|
||||||
def nodes_at_depth(self, depth):
|
def nodes_at_depth(self, depth):
|
||||||
|
"""Return all the nodes at a given depth in the tree"""
|
||||||
if depth == 0:
|
if depth == 0:
|
||||||
yield self
|
yield self
|
||||||
|
|
||||||
|
|
@ -113,38 +183,109 @@ class BaseMatchTree(UnicodeMixin):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def node_idx(self):
|
def node_idx(self):
|
||||||
|
"""Return this node's index in the tree, as a tuple.
|
||||||
|
If this node is the root of the tree, then return ()."""
|
||||||
if self.parent is None:
|
if self.parent is None:
|
||||||
return ()
|
return ()
|
||||||
return self.parent.node_idx + (self.parent.children.index(self),)
|
return self.parent.node_idx + (self.node_last_idx,)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def node_last_idx(self):
|
||||||
|
if self.parent is None:
|
||||||
|
return None
|
||||||
|
return self.parent.children.index(self)
|
||||||
|
|
||||||
def node_at(self, idx):
|
def node_at(self, idx):
|
||||||
|
"""Return the node at the given index in the subtree rooted at
|
||||||
|
this node."""
|
||||||
if not idx:
|
if not idx:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return self.children[idx[0]].node_at(idx[1:])
|
return self.children[idx[0]].node_at(idx[1:])
|
||||||
except:
|
except IndexError:
|
||||||
raise ValueError('Non-existent node index: %s' % (idx,))
|
raise ValueError('Non-existent node index: %s' % (idx,))
|
||||||
|
|
||||||
def nodes(self):
|
def nodes(self):
|
||||||
|
"""Return all the nodes and subnodes in this tree."""
|
||||||
yield self
|
yield self
|
||||||
for child in self.children:
|
for child in self.children:
|
||||||
for node in child.nodes():
|
for node in child.nodes():
|
||||||
yield node
|
yield node
|
||||||
|
|
||||||
def _leaves(self):
|
def leaves(self):
|
||||||
|
"""Return a generator over all the nodes that are leaves."""
|
||||||
if self.is_leaf():
|
if self.is_leaf():
|
||||||
yield self
|
yield self
|
||||||
else:
|
else:
|
||||||
for child in self.children:
|
for child in self.children:
|
||||||
# pylint: disable=W0212
|
# pylint: disable=W0212
|
||||||
for leaf in child._leaves():
|
for leaf in child.leaves():
|
||||||
yield leaf
|
yield leaf
|
||||||
|
|
||||||
def leaves(self):
|
def group_node(self):
|
||||||
return list(self._leaves())
|
return self._other_group_node(0)
|
||||||
|
|
||||||
|
def previous_group_node(self):
|
||||||
|
return self._other_group_node(-1)
|
||||||
|
|
||||||
|
def next_group_node(self):
|
||||||
|
return self._other_group_node(+1)
|
||||||
|
|
||||||
|
def _other_group_node(self, offset):
|
||||||
|
if len(self.node_idx) > 1:
|
||||||
|
group_idx = self.node_idx[:2]
|
||||||
|
if group_idx[1] + offset >= 0:
|
||||||
|
other_group_idx = (group_idx[0], group_idx[1] + offset)
|
||||||
|
try:
|
||||||
|
other_group_node = self.root.node_at(other_group_idx)
|
||||||
|
return other_group_node
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
def previous_leaf(self, leaf):
|
||||||
|
"""Return previous leaf for this node"""
|
||||||
|
return self._other_leaf(leaf, -1)
|
||||||
|
|
||||||
|
def next_leaf(self, leaf):
|
||||||
|
"""Return next leaf for this node"""
|
||||||
|
return self._other_leaf(leaf, +1)
|
||||||
|
|
||||||
|
def _other_leaf(self, leaf, offset):
|
||||||
|
leaves = list(self.leaves())
|
||||||
|
index = leaves.index(leaf) + offset
|
||||||
|
if index > 0 and index < len(leaves):
|
||||||
|
return leaves[index]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def previous_leaves(self, leaf):
|
||||||
|
"""Return previous leaves for this node"""
|
||||||
|
leaves = list(self.leaves())
|
||||||
|
index = leaves.index(leaf)
|
||||||
|
if index > 0 and index < len(leaves):
|
||||||
|
previous_leaves = leaves[:index]
|
||||||
|
previous_leaves.reverse()
|
||||||
|
return previous_leaves
|
||||||
|
return []
|
||||||
|
|
||||||
|
def next_leaves(self, leaf):
|
||||||
|
"""Return next leaves for this node"""
|
||||||
|
leaves = list(self.leaves())
|
||||||
|
index = leaves.index(leaf)
|
||||||
|
if index > 0 and index < len(leaves):
|
||||||
|
return leaves[index + 1:len(leaves)]
|
||||||
|
return []
|
||||||
|
|
||||||
def to_string(self):
|
def to_string(self):
|
||||||
|
"""Return a readable string representation of this tree.
|
||||||
|
|
||||||
|
The result is a multi-line string, where the lines are:
|
||||||
|
- line 1 -> N-2: each line contains the nodes at the given depth in the tree
|
||||||
|
- line N-2: original string where all the found groups have been blanked
|
||||||
|
- line N-1: type of property that has been found
|
||||||
|
- line N: the original string, which you can use a reference.
|
||||||
|
"""
|
||||||
empty_line = ' ' * len(self.string)
|
empty_line = ' ' * len(self.string)
|
||||||
|
|
||||||
def to_hex(x):
|
def to_hex(x):
|
||||||
|
|
@ -153,14 +294,17 @@ class BaseMatchTree(UnicodeMixin):
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def meaning(result):
|
def meaning(result):
|
||||||
mmap = { 'episodeNumber': 'E',
|
mmap = {'episodeNumber': 'E',
|
||||||
'season': 'S',
|
'season': 'S',
|
||||||
'extension': 'e',
|
'extension': 'e',
|
||||||
'format': 'f',
|
'format': 'f',
|
||||||
'language': 'l',
|
'language': 'l',
|
||||||
'country': 'C',
|
'country': 'C',
|
||||||
'videoCodec': 'v',
|
'videoCodec': 'v',
|
||||||
|
'videoProfile': 'v',
|
||||||
'audioCodec': 'a',
|
'audioCodec': 'a',
|
||||||
|
'audioProfile': 'a',
|
||||||
|
'audioChannels': 'a',
|
||||||
'website': 'w',
|
'website': 'w',
|
||||||
'container': 'c',
|
'container': 'c',
|
||||||
'series': 'T',
|
'series': 'T',
|
||||||
|
|
@ -168,7 +312,8 @@ class BaseMatchTree(UnicodeMixin):
|
||||||
'date': 'd',
|
'date': 'd',
|
||||||
'year': 'y',
|
'year': 'y',
|
||||||
'releaseGroup': 'r',
|
'releaseGroup': 'r',
|
||||||
'screenSize': 's'
|
'screenSize': 's',
|
||||||
|
'other': 'o'
|
||||||
}
|
}
|
||||||
|
|
||||||
if result is None:
|
if result is None:
|
||||||
|
|
@ -180,7 +325,7 @@ class BaseMatchTree(UnicodeMixin):
|
||||||
|
|
||||||
return 'x'
|
return 'x'
|
||||||
|
|
||||||
lines = [ empty_line ] * (self.depth + 2) # +2: remaining, meaning
|
lines = [empty_line] * (self.depth + 2) # +2: remaining, meaning
|
||||||
lines[-2] = self.string
|
lines[-2] = self.string
|
||||||
|
|
||||||
for node in self.nodes():
|
for node in self.nodes():
|
||||||
|
|
@ -198,63 +343,61 @@ class BaseMatchTree(UnicodeMixin):
|
||||||
|
|
||||||
lines.append(self.string)
|
lines.append(self.string)
|
||||||
|
|
||||||
return '\n'.join(lines)
|
return '\n'.join(l.rstrip() for l in lines)
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return self.to_string()
|
return self.to_string()
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<MatchTree: root=%s>' % self.value
|
||||||
|
|
||||||
|
|
||||||
class MatchTree(BaseMatchTree):
|
class MatchTree(BaseMatchTree):
|
||||||
"""The MatchTree contains a few "utility" methods which are not necessary
|
"""The MatchTree contains a few "utility" methods which are not necessary
|
||||||
for the BaseMatchTree, but add a lot of convenience for writing
|
for the BaseMatchTree, but add a lot of convenience for writing
|
||||||
higher-level rules."""
|
higher-level rules.
|
||||||
|
"""
|
||||||
|
|
||||||
def _unidentified_leaves(self,
|
def unidentified_leaves(self,
|
||||||
valid=lambda leaf: len(leaf.clean_value) >= 2):
|
valid=lambda leaf: len(leaf.clean_value) > 0):
|
||||||
for leaf in self._leaves():
|
"""Return a generator of leaves that are not empty."""
|
||||||
|
for leaf in self.leaves():
|
||||||
if not leaf.guess and valid(leaf):
|
if not leaf.guess and valid(leaf):
|
||||||
yield leaf
|
yield leaf
|
||||||
|
|
||||||
def unidentified_leaves(self,
|
def leaves_containing(self, property_name):
|
||||||
valid=lambda leaf: len(leaf.clean_value) >= 2):
|
"""Return a generator of leaves that guessed the given property."""
|
||||||
return list(self._unidentified_leaves(valid))
|
|
||||||
|
|
||||||
def _leaves_containing(self, property_name):
|
|
||||||
if isinstance(property_name, base_text_type):
|
if isinstance(property_name, base_text_type):
|
||||||
property_name = [ property_name ]
|
property_name = [property_name]
|
||||||
|
|
||||||
for leaf in self._leaves():
|
for leaf in self.leaves():
|
||||||
for prop in property_name:
|
for prop in property_name:
|
||||||
if prop in leaf.guess:
|
if prop in leaf.guess:
|
||||||
yield leaf
|
yield leaf
|
||||||
break
|
break
|
||||||
|
|
||||||
def leaves_containing(self, property_name):
|
|
||||||
return list(self._leaves_containing(property_name))
|
|
||||||
|
|
||||||
def first_leaf_containing(self, property_name):
|
def first_leaf_containing(self, property_name):
|
||||||
|
"""Return the first leaf containing the given property."""
|
||||||
try:
|
try:
|
||||||
return next(self._leaves_containing(property_name))
|
return next(self.leaves_containing(property_name))
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _previous_unidentified_leaves(self, node):
|
|
||||||
node_idx = node.node_idx
|
|
||||||
for leaf in self._unidentified_leaves():
|
|
||||||
if leaf.node_idx < node_idx:
|
|
||||||
yield leaf
|
|
||||||
|
|
||||||
def previous_unidentified_leaves(self, node):
|
def previous_unidentified_leaves(self, node):
|
||||||
return list(self._previous_unidentified_leaves(node))
|
"""Return a generator of non-empty leaves that are before the given
|
||||||
|
node (in the string)."""
|
||||||
def _previous_leaves_containing(self, node, property_name):
|
|
||||||
node_idx = node.node_idx
|
node_idx = node.node_idx
|
||||||
for leaf in self._leaves_containing(property_name):
|
for leaf in self.unidentified_leaves():
|
||||||
if leaf.node_idx < node_idx:
|
if leaf.node_idx < node_idx:
|
||||||
yield leaf
|
yield leaf
|
||||||
|
|
||||||
def previous_leaves_containing(self, node, property_name):
|
def previous_leaves_containing(self, node, property_name):
|
||||||
return list(self._previous_leaves_containing(node, property_name))
|
"""Return a generator of leaves containing the given property that are
|
||||||
|
before the given node (in the string)."""
|
||||||
|
node_idx = node.node_idx
|
||||||
|
for leaf in self.leaves_containing(property_name):
|
||||||
|
if leaf.node_idx < node_idx:
|
||||||
|
yield leaf
|
||||||
|
|
||||||
def is_explicit(self):
|
def is_explicit(self):
|
||||||
"""Return whether the group was explicitly enclosed by
|
"""Return whether the group was explicitly enclosed by
|
||||||
|
|
@ -262,26 +405,22 @@ class MatchTree(BaseMatchTree):
|
||||||
return (self.value[0] + self.value[-1]) in group_delimiters
|
return (self.value[0] + self.value[-1]) in group_delimiters
|
||||||
|
|
||||||
def matched(self):
|
def matched(self):
|
||||||
|
"""Return a single guess that contains all the info found in the
|
||||||
|
nodes of this tree, trying to merge properties as good as possible.
|
||||||
|
"""
|
||||||
|
if not getattr(self, '_matched_result', None):
|
||||||
# we need to make a copy here, as the merge functions work in place and
|
# we need to make a copy here, as the merge functions work in place and
|
||||||
# calling them on the match tree would modify it
|
# calling them on the match tree would modify it
|
||||||
parts = [node.guess for node in self.nodes() if node.guess]
|
parts = [copy.copy(node.guess) for node in self.nodes() if node.guess]
|
||||||
parts = copy.deepcopy(parts)
|
|
||||||
|
|
||||||
# 1- try to merge similar information together and give it a higher
|
result = smart_merge(parts)
|
||||||
# confidence
|
|
||||||
for int_part in ('year', 'season', 'episodeNumber'):
|
|
||||||
merge_similar_guesses(parts, int_part, choose_int)
|
|
||||||
|
|
||||||
for string_part in ('title', 'series', 'container', 'format',
|
|
||||||
'releaseGroup', 'website', 'audioCodec',
|
|
||||||
'videoCodec', 'screenSize', 'episodeFormat',
|
|
||||||
'audioChannels', 'idNumber'):
|
|
||||||
merge_similar_guesses(parts, string_part, choose_string)
|
|
||||||
|
|
||||||
# 2- merge the rest, potentially discarding information not properly
|
|
||||||
# merged before
|
|
||||||
result = merge_all(parts,
|
|
||||||
append=['language', 'subtitleLanguage', 'other'])
|
|
||||||
|
|
||||||
log.debug('Final result: ' + result.nice_string())
|
log.debug('Final result: ' + result.nice_string())
|
||||||
return result
|
self._matched_result = result
|
||||||
|
|
||||||
|
for unidentified_leaves in self.unidentified_leaves():
|
||||||
|
if 'unidentified' not in self._matched_result:
|
||||||
|
self._matched_result['unidentified'] = []
|
||||||
|
self._matched_result['unidentified'].append(unidentified_leaves.clean_value)
|
||||||
|
|
||||||
|
return self._matched_result
|
||||||
|
|
|
||||||
69
libs/guessit/options.py
Normal file
69
libs/guessit/options.py
Normal file
|
|
@ -0,0 +1,69 @@
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
|
|
||||||
|
def build_opts(transformers=None):
|
||||||
|
opts = ArgumentParser()
|
||||||
|
opts.add_argument(dest='filename', help='Filename or release name to guess', nargs='*')
|
||||||
|
|
||||||
|
naming_opts = opts.add_argument_group("Naming")
|
||||||
|
naming_opts.add_argument('-t', '--type', dest='type', default=None,
|
||||||
|
help='The suggested file type: movie, episode. If undefined, type will be guessed.')
|
||||||
|
naming_opts.add_argument('-n', '--name-only', dest='name_only', action='store_true', default=False,
|
||||||
|
help='Parse files as name only. Disable folder parsing, extension parsing, and file content analysis.')
|
||||||
|
naming_opts.add_argument('-c', '--split-camel', dest='split_camel', action='store_true', default=False,
|
||||||
|
help='Split camel case part of filename.')
|
||||||
|
|
||||||
|
naming_opts.add_argument('-X', '--disabled-transformer', action='append', dest='disabled_transformers',
|
||||||
|
help='Transformer to disable (can be used multiple time)')
|
||||||
|
|
||||||
|
output_opts = opts.add_argument_group("Output")
|
||||||
|
output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=False,
|
||||||
|
help='Display debug output')
|
||||||
|
output_opts.add_argument('-P', '--show-property', dest='show_property', default=None,
|
||||||
|
help='Display the value of a single property (title, series, videoCodec, year, type ...)'),
|
||||||
|
output_opts.add_argument('-u', '--unidentified', dest='unidentified', action='store_true', default=False,
|
||||||
|
help='Display the unidentified parts.'),
|
||||||
|
output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=False,
|
||||||
|
help='Display advanced information for filename guesses, as json output')
|
||||||
|
output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=False,
|
||||||
|
help='Display information for filename guesses as yaml output (like unit-test)')
|
||||||
|
output_opts.add_argument('-f', '--input-file', dest='input_file', default=False,
|
||||||
|
help='Read filenames from an input file.')
|
||||||
|
output_opts.add_argument('-d', '--demo', action='store_true', dest='demo', default=False,
|
||||||
|
help='Run a few builtin tests instead of analyzing a file')
|
||||||
|
|
||||||
|
information_opts = opts.add_argument_group("Information")
|
||||||
|
information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=False,
|
||||||
|
help='Display properties that can be guessed.')
|
||||||
|
information_opts.add_argument('-V', '--values', dest='values', action='store_true', default=False,
|
||||||
|
help='Display property values that can be guessed.')
|
||||||
|
information_opts.add_argument('-s', '--transformers', dest='transformers', action='store_true', default=False,
|
||||||
|
help='Display transformers that can be used.')
|
||||||
|
information_opts.add_argument('--version', dest='version', action='store_true', default=False,
|
||||||
|
help='Display the guessit version.')
|
||||||
|
|
||||||
|
webservice_opts = opts.add_argument_group("guessit.io")
|
||||||
|
webservice_opts.add_argument('-b', '--bug', action='store_true', dest='submit_bug', default=False,
|
||||||
|
help='Submit a wrong detection to the guessit.io service')
|
||||||
|
|
||||||
|
other_opts = opts.add_argument_group("Other features")
|
||||||
|
other_opts.add_argument('-i', '--info', dest='info', default='filename',
|
||||||
|
help='The desired information type: filename, video, hash_mpc or a hash from python\'s '
|
||||||
|
'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
|
||||||
|
'them, comma-separated')
|
||||||
|
|
||||||
|
if transformers:
|
||||||
|
for transformer in transformers:
|
||||||
|
transformer.register_arguments(opts, naming_opts, output_opts, information_opts, webservice_opts, other_opts)
|
||||||
|
|
||||||
|
return opts, naming_opts, output_opts, information_opts, webservice_opts, other_opts
|
||||||
|
_opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts = None, None, None, None, None, None
|
||||||
|
|
||||||
|
|
||||||
|
def reload(transformers=None):
|
||||||
|
global _opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts
|
||||||
|
_opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts = build_opts(transformers)
|
||||||
|
|
||||||
|
|
||||||
|
def get_opts():
|
||||||
|
return _opts
|
||||||
|
|
@ -1,250 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# GuessIt - A library for guessing information from filenames
|
|
||||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
|
||||||
# Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
|
|
||||||
#
|
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
|
||||||
# the Free Software Foundation; either version 3 of the License, or
|
|
||||||
# (at your option) any later version.
|
|
||||||
#
|
|
||||||
# GuessIt is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
# Lesser GNU General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the Lesser GNU General Public License
|
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
#
|
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
subtitle_exts = [ 'srt', 'idx', 'sub', 'ssa' ]
|
|
||||||
|
|
||||||
info_exts = [ 'nfo' ]
|
|
||||||
|
|
||||||
video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
|
|
||||||
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
|
|
||||||
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv']
|
|
||||||
|
|
||||||
group_delimiters = [ '()', '[]', '{}' ]
|
|
||||||
|
|
||||||
# separator character regexp
|
|
||||||
sep = r'[][,)(}{+ /\._-]' # regexp art, hehe :D
|
|
||||||
|
|
||||||
# character used to represent a deleted char (when matching groups)
|
|
||||||
deleted = '_'
|
|
||||||
|
|
||||||
# format: [ (regexp, confidence, span_adjust) ]
|
|
||||||
episode_rexps = [ # ... Season 2 ...
|
|
||||||
(r'season (?P<season>[0-9]+)', 1.0, (0, 0)),
|
|
||||||
(r'saison (?P<season>[0-9]+)', 1.0, (0, 0)),
|
|
||||||
|
|
||||||
# ... s02e13 ...
|
|
||||||
(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<episodeNumber>(?:-?[eE-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
|
|
||||||
|
|
||||||
# ... s03-x02 ... # FIXME: redundant? remove it?
|
|
||||||
#(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<bonusNumber>(?:-?[xX-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
|
|
||||||
|
|
||||||
# ... 2x13 ...
|
|
||||||
(r'[^0-9](?P<season>[0-9]{1,2})[^0-9 .-]?(?P<episodeNumber>(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)),
|
|
||||||
|
|
||||||
# ... s02 ...
|
|
||||||
#(sep + r's(?P<season>[0-9]{1,2})' + sep, 0.6, (1, -1)),
|
|
||||||
(r's(?P<season>[0-9]{1,2})[^0-9]', 0.6, (0, -1)),
|
|
||||||
|
|
||||||
# v2 or v3 for some mangas which have multiples rips
|
|
||||||
(r'(?P<episodeNumber>[0-9]{1,3})v[23]' + sep, 0.6, (0, 0)),
|
|
||||||
|
|
||||||
# ... ep 23 ...
|
|
||||||
('ep' + sep + r'(?P<episodeNumber>[0-9]{1,2})[^0-9]', 0.7, (0, -1)),
|
|
||||||
|
|
||||||
# ... e13 ... for a mini-series without a season number
|
|
||||||
(sep + r'e(?P<episodeNumber>[0-9]{1,2})' + sep, 0.6, (1, -1))
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
weak_episode_rexps = [ # ... 213 or 0106 ...
|
|
||||||
(sep + r'(?P<episodeNumber>[0-9]{2,4})' + sep, (1, -1))
|
|
||||||
]
|
|
||||||
|
|
||||||
non_episode_title = [ 'extras', 'rip' ]
|
|
||||||
|
|
||||||
|
|
||||||
video_rexps = [ # cd number
|
|
||||||
(r'cd ?(?P<cdNumber>[0-9])( ?of ?(?P<cdNumberTotal>[0-9]))?', 1.0, (0, 0)),
|
|
||||||
(r'(?P<cdNumberTotal>[1-9]) cds?', 0.9, (0, 0)),
|
|
||||||
|
|
||||||
# special editions
|
|
||||||
(r'edition' + sep + r'(?P<edition>collector)', 1.0, (0, 0)),
|
|
||||||
(r'(?P<edition>collector)' + sep + 'edition', 1.0, (0, 0)),
|
|
||||||
(r'(?P<edition>special)' + sep + 'edition', 1.0, (0, 0)),
|
|
||||||
(r'(?P<edition>criterion)' + sep + 'edition', 1.0, (0, 0)),
|
|
||||||
|
|
||||||
# director's cut
|
|
||||||
(r"(?P<edition>director'?s?" + sep + "cut)", 1.0, (0, 0)),
|
|
||||||
|
|
||||||
# video size
|
|
||||||
(r'(?P<width>[0-9]{3,4})x(?P<height>[0-9]{3,4})', 0.9, (0, 0)),
|
|
||||||
|
|
||||||
# website
|
|
||||||
(r'(?P<website>www(\.[a-zA-Z0-9]+){2,3})', 0.8, (0, 0)),
|
|
||||||
|
|
||||||
# bonusNumber: ... x01 ...
|
|
||||||
(r'x(?P<bonusNumber>[0-9]{1,2})', 1.0, (0, 0)),
|
|
||||||
|
|
||||||
# filmNumber: ... f01 ...
|
|
||||||
(r'f(?P<filmNumber>[0-9]{1,2})', 1.0, (0, 0))
|
|
||||||
]
|
|
||||||
|
|
||||||
websites = [ 'tvu.org.ru', 'emule-island.com', 'UsaBit.com', 'www.divx-overnet.com',
|
|
||||||
'sharethefiles.com' ]
|
|
||||||
|
|
||||||
unlikely_series = [ 'series' ]
|
|
||||||
|
|
||||||
|
|
||||||
# prop_multi is a dict of { property_name: { canonical_form: [ pattern ] } }
|
|
||||||
# pattern is a string considered as a regexp, with the addition that dashes are
|
|
||||||
# replaced with '([ \.-_])?' which matches more types of separators (or none)
|
|
||||||
# note: simpler patterns need to be at the end of the list to not shadow more
|
|
||||||
# complete ones, eg: 'AAC' needs to come after 'He-AAC'
|
|
||||||
# ie: from most specific to less specific
|
|
||||||
prop_multi = { 'format': { 'DVD': [ 'DVD', 'DVD-Rip', 'VIDEO-TS', 'DVDivX' ],
|
|
||||||
'HD-DVD': [ 'HD-(?:DVD)?-Rip', 'HD-DVD' ],
|
|
||||||
'BluRay': [ 'Blu-ray', 'B[DR]Rip' ],
|
|
||||||
'HDTV': [ 'HD-TV' ],
|
|
||||||
'DVB': [ 'DVB-Rip', 'DVB', 'PD-TV' ],
|
|
||||||
'WEBRip': [ 'WEB-Rip' ],
|
|
||||||
'Screener': [ 'DVD-SCR', 'Screener' ],
|
|
||||||
'VHS': [ 'VHS' ],
|
|
||||||
'WEB-DL': [ 'WEB-DL' ] },
|
|
||||||
|
|
||||||
'is3D': { True: [ '3D' ] },
|
|
||||||
|
|
||||||
'screenSize': { '480p': [ '480[pi]?' ],
|
|
||||||
'720p': [ '720[pi]?' ],
|
|
||||||
'1080i': [ '1080i' ],
|
|
||||||
'1080p': [ '1080p', '1080[^i]' ] },
|
|
||||||
|
|
||||||
'videoCodec': { 'XviD': [ 'Xvid' ],
|
|
||||||
'DivX': [ 'DVDivX', 'DivX' ],
|
|
||||||
'h264': [ '[hx]-264' ],
|
|
||||||
'Rv10': [ 'Rv10' ],
|
|
||||||
'Mpeg2': [ 'Mpeg2' ] },
|
|
||||||
|
|
||||||
# has nothing to do here (or on filenames for that matter), but some
|
|
||||||
# releases use it and it helps to identify release groups, so we adapt
|
|
||||||
'videoApi': { 'DXVA': [ 'DXVA' ] },
|
|
||||||
|
|
||||||
'audioCodec': { 'AC3': [ 'AC3' ],
|
|
||||||
'DTS': [ 'DTS' ],
|
|
||||||
'AAC': [ 'He-AAC', 'AAC-He', 'AAC' ] },
|
|
||||||
|
|
||||||
'audioChannels': { '5.1': [ r'5\.1', 'DD5[._ ]1', '5ch' ] },
|
|
||||||
|
|
||||||
'episodeFormat': { 'Minisode': [ 'Minisodes?' ] }
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
# prop_single dict of { property_name: [ canonical_form ] }
|
|
||||||
prop_single = { 'releaseGroup': [ 'ESiR', 'WAF', 'SEPTiC', r'\[XCT\]', 'iNT', 'PUKKA',
|
|
||||||
'CHD', 'ViTE', 'TLF', 'FLAiTE',
|
|
||||||
'MDX', 'GM4F', 'DVL', 'SVD', 'iLUMiNADOS',
|
|
||||||
'aXXo', 'KLAXXON', 'NoTV', 'ZeaL', 'LOL',
|
|
||||||
'CtrlHD', 'POD', 'WiKi','IMMERSE', 'FQM',
|
|
||||||
'2HD', 'CTU', 'HALCYON', 'EbP', 'SiTV',
|
|
||||||
'HDBRiSe', 'AlFleNi-TeaM', 'EVOLVE', '0TV',
|
|
||||||
'TLA', 'NTB', 'ASAP', 'MOMENTUM', 'FoV', 'D-Z0N3',
|
|
||||||
'TrollHD', 'ECI'
|
|
||||||
],
|
|
||||||
|
|
||||||
# potentially confusing release group names (they are words)
|
|
||||||
'weakReleaseGroup': [ 'DEiTY', 'FiNaLe', 'UnSeeN', 'KiNGS', 'CLUE', 'DIMENSION',
|
|
||||||
'SAiNTS', 'ARROW', 'EuReKA', 'SiNNERS', 'DiRTY', 'REWARD',
|
|
||||||
'REPTiLE',
|
|
||||||
],
|
|
||||||
|
|
||||||
'other': [ 'PROPER', 'REPACK', 'LIMITED', 'DualAudio', 'Audiofixed', 'R5',
|
|
||||||
'complete', 'classic', # not so sure about these ones, could appear in a title
|
|
||||||
'ws' ] # widescreen
|
|
||||||
}
|
|
||||||
|
|
||||||
_dash = '-'
|
|
||||||
_psep = '[-. _]?'
|
|
||||||
|
|
||||||
def _to_rexp(prop):
|
|
||||||
return re.compile(prop.replace(_dash, _psep), re.IGNORECASE)
|
|
||||||
|
|
||||||
# properties_rexps dict of { property_name: { canonical_form: [ rexp ] } }
|
|
||||||
# containing the rexps compiled from both prop_multi and prop_single
|
|
||||||
properties_rexps = dict((type, dict((canonical_form,
|
|
||||||
[ _to_rexp(pattern) for pattern in patterns ])
|
|
||||||
for canonical_form, patterns in props.items()))
|
|
||||||
for type, props in prop_multi.items())
|
|
||||||
|
|
||||||
properties_rexps.update(dict((type, dict((canonical_form, [ _to_rexp(canonical_form) ])
|
|
||||||
for canonical_form in props))
|
|
||||||
for type, props in prop_single.items()))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def find_properties(string):
|
|
||||||
result = []
|
|
||||||
for property_name, props in properties_rexps.items():
|
|
||||||
# FIXME: this should be done in a more flexible way...
|
|
||||||
if property_name in ['weakReleaseGroup']:
|
|
||||||
continue
|
|
||||||
|
|
||||||
for canonical_form, rexps in props.items():
|
|
||||||
for value_rexp in rexps:
|
|
||||||
match = value_rexp.search(string)
|
|
||||||
if match:
|
|
||||||
start, end = match.span()
|
|
||||||
# make sure our word is always surrounded by separators
|
|
||||||
# note: sep is a regexp, but in this case using it as
|
|
||||||
# a char sequence achieves the same goal
|
|
||||||
if ((start > 0 and string[start-1] not in sep) or
|
|
||||||
(end < len(string) and string[end] not in sep)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
result.append((property_name, canonical_form, start, end))
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
property_synonyms = { 'Special Edition': [ 'Special' ],
|
|
||||||
'Collector Edition': [ 'Collector' ],
|
|
||||||
'Criterion Edition': [ 'Criterion' ]
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def revert_synonyms():
|
|
||||||
reverse = {}
|
|
||||||
|
|
||||||
for canonical, synonyms in property_synonyms.items():
|
|
||||||
for synonym in synonyms:
|
|
||||||
reverse[synonym.lower()] = canonical
|
|
||||||
|
|
||||||
return reverse
|
|
||||||
|
|
||||||
|
|
||||||
reverse_synonyms = revert_synonyms()
|
|
||||||
|
|
||||||
|
|
||||||
def canonical_form(string):
|
|
||||||
return reverse_synonyms.get(string.lower(), string)
|
|
||||||
|
|
||||||
|
|
||||||
def compute_canonical_form(property_name, value):
|
|
||||||
"""Return the canonical form of a property given its type if it is a valid
|
|
||||||
one, None otherwise."""
|
|
||||||
if isinstance(value, basestring):
|
|
||||||
for canonical_form, rexps in properties_rexps[property_name].items():
|
|
||||||
for rexp in rexps:
|
|
||||||
if rexp.match(value):
|
|
||||||
return canonical_form
|
|
||||||
return None
|
|
||||||
77
libs/guessit/patterns/__init__.py
Executable file
77
libs/guessit/patterns/__init__.py
Executable file
|
|
@ -0,0 +1,77 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from guessit import base_text_type
|
||||||
|
|
||||||
|
group_delimiters = ['()', '[]', '{}']
|
||||||
|
|
||||||
|
# separator character regexp
|
||||||
|
sep = r'[][,)(}:{+ /~/\._-]' # regexp art, hehe :D
|
||||||
|
|
||||||
|
_dash = '-'
|
||||||
|
_psep = '[\W_]?'
|
||||||
|
|
||||||
|
|
||||||
|
def build_or_pattern(patterns, escape=False):
|
||||||
|
"""Build a or pattern string from a list of possible patterns
|
||||||
|
"""
|
||||||
|
or_pattern = []
|
||||||
|
for pattern in patterns:
|
||||||
|
if not or_pattern:
|
||||||
|
or_pattern.append('(?:')
|
||||||
|
else:
|
||||||
|
or_pattern.append('|')
|
||||||
|
or_pattern.append('(?:%s)' % re.escape(pattern) if escape else pattern)
|
||||||
|
or_pattern.append(')')
|
||||||
|
return ''.join(or_pattern)
|
||||||
|
|
||||||
|
|
||||||
|
def compile_pattern(pattern, enhance=True):
|
||||||
|
"""Compile and enhance a pattern
|
||||||
|
|
||||||
|
:param pattern: Pattern to compile (regexp).
|
||||||
|
:type pattern: string
|
||||||
|
|
||||||
|
:param pattern: Enhance pattern before compiling.
|
||||||
|
:type pattern: string
|
||||||
|
|
||||||
|
:return: The compiled pattern
|
||||||
|
:rtype: regular expression object
|
||||||
|
"""
|
||||||
|
return re.compile(enhance_pattern(pattern) if enhance else pattern, re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def enhance_pattern(pattern):
|
||||||
|
"""Enhance pattern to match more equivalent values.
|
||||||
|
|
||||||
|
'-' are replaced by '[\W_]?', which matches more types of separators (or none)
|
||||||
|
|
||||||
|
:param pattern: Pattern to enhance (regexp).
|
||||||
|
:type pattern: string
|
||||||
|
|
||||||
|
:return: The enhanced pattern
|
||||||
|
:rtype: string
|
||||||
|
"""
|
||||||
|
return pattern.replace(_dash, _psep)
|
||||||
32
libs/guessit/patterns/extension.py
Normal file
32
libs/guessit/patterns/extension.py
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||||
|
# Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
subtitle_exts = ['srt', 'idx', 'sub', 'ssa', 'ass']
|
||||||
|
|
||||||
|
info_exts = ['nfo']
|
||||||
|
|
||||||
|
video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
|
||||||
|
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
|
||||||
|
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
|
||||||
|
'iso']
|
||||||
150
libs/guessit/patterns/numeral.py
Normal file
150
libs/guessit/patterns/numeral.py
Normal file
|
|
@ -0,0 +1,150 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
digital_numeral = '\d{1,4}'
|
||||||
|
|
||||||
|
roman_numeral = "(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})"
|
||||||
|
|
||||||
|
english_word_numeral_list = [
|
||||||
|
'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
|
||||||
|
'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
|
||||||
|
]
|
||||||
|
|
||||||
|
french_word_numeral_list = [
|
||||||
|
'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
||||||
|
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
|
||||||
|
]
|
||||||
|
|
||||||
|
french_alt_word_numeral_list = [
|
||||||
|
'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
||||||
|
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def __build_word_numeral(*args, **kwargs):
|
||||||
|
re_ = None
|
||||||
|
for word_list in args:
|
||||||
|
for word in word_list:
|
||||||
|
if not re_:
|
||||||
|
re_ = '(?:(?=\w+)'
|
||||||
|
else:
|
||||||
|
re_ += '|'
|
||||||
|
re_ += word
|
||||||
|
re_ += ')'
|
||||||
|
return re_
|
||||||
|
|
||||||
|
word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
|
||||||
|
|
||||||
|
numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
|
||||||
|
|
||||||
|
__romanNumeralMap = (
|
||||||
|
('M', 1000),
|
||||||
|
('CM', 900),
|
||||||
|
('D', 500),
|
||||||
|
('CD', 400),
|
||||||
|
('C', 100),
|
||||||
|
('XC', 90),
|
||||||
|
('L', 50),
|
||||||
|
('XL', 40),
|
||||||
|
('X', 10),
|
||||||
|
('IX', 9),
|
||||||
|
('V', 5),
|
||||||
|
('IV', 4),
|
||||||
|
('I', 1)
|
||||||
|
)
|
||||||
|
|
||||||
|
__romanNumeralPattern = re.compile('^' + roman_numeral + '$')
|
||||||
|
|
||||||
|
|
||||||
|
def __parse_roman(value):
|
||||||
|
"""convert Roman numeral to integer"""
|
||||||
|
if not __romanNumeralPattern.search(value):
|
||||||
|
raise ValueError('Invalid Roman numeral: %s' % value)
|
||||||
|
|
||||||
|
result = 0
|
||||||
|
index = 0
|
||||||
|
for num, integer in __romanNumeralMap:
|
||||||
|
while value[index:index + len(num)] == num:
|
||||||
|
result += integer
|
||||||
|
index += len(num)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def __parse_word(value):
|
||||||
|
"""Convert Word numeral to integer"""
|
||||||
|
for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
|
||||||
|
try:
|
||||||
|
return word_list.index(value.lower())
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
raise ValueError
|
||||||
|
|
||||||
|
|
||||||
|
_clean_re = re.compile('[^\d]*(\d+)[^\d]*')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
|
||||||
|
"""Parse a numeric value into integer.
|
||||||
|
|
||||||
|
input can be an integer as a string, a roman numeral or a word
|
||||||
|
|
||||||
|
:param value: Value to parse. Can be an integer, roman numeral or word.
|
||||||
|
:type value: string
|
||||||
|
|
||||||
|
:return: Numeric value, or None if value can't be parsed
|
||||||
|
:rtype: int
|
||||||
|
"""
|
||||||
|
if int_enabled:
|
||||||
|
try:
|
||||||
|
if clean:
|
||||||
|
match = _clean_re.match(value)
|
||||||
|
if match:
|
||||||
|
clean_value = match.group(1)
|
||||||
|
return int(clean_value)
|
||||||
|
return int(value)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
if roman_enabled:
|
||||||
|
try:
|
||||||
|
if clean:
|
||||||
|
for word in value.split():
|
||||||
|
try:
|
||||||
|
return __parse_roman(word.upper())
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return __parse_roman(value)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
if word_enabled:
|
||||||
|
try:
|
||||||
|
if clean:
|
||||||
|
for word in value.split():
|
||||||
|
try:
|
||||||
|
return __parse_word(word)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return __parse_word(value)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
raise ValueError('Invalid numeral: ' + value)
|
||||||
21
libs/guessit/plugins/__init__.py
Normal file
21
libs/guessit/plugins/__init__.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
219
libs/guessit/plugins/transformers.py
Normal file
219
libs/guessit/plugins/transformers.py
Normal file
|
|
@ -0,0 +1,219 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
from guessit.options import reload as reload_options
|
||||||
|
|
||||||
|
from stevedore import ExtensionManager
|
||||||
|
from pkg_resources import EntryPoint
|
||||||
|
|
||||||
|
from stevedore.extension import Extension
|
||||||
|
from logging import getLogger
|
||||||
|
|
||||||
|
log = getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Transformer(object): # pragma: no cover
|
||||||
|
def __init__(self, priority=0):
|
||||||
|
self.priority = priority
|
||||||
|
self.log = getLogger(self.name)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
return self.__class__.__name__
|
||||||
|
|
||||||
|
def supported_properties(self):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def second_pass_options(self, mtree, options=None):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def should_process(self, mtree, options=None):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def post_process(self, mtree, options=None):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def rate_quality(self, guess, *props):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
class CustomTransformerExtensionManager(ExtensionManager):
|
||||||
|
def __init__(self, namespace='guessit.transformer', invoke_on_load=True,
|
||||||
|
invoke_args=(), invoke_kwds={}, propagate_map_exceptions=True, on_load_failure_callback=None,
|
||||||
|
verify_requirements=False):
|
||||||
|
super(CustomTransformerExtensionManager, self).__init__(namespace=namespace,
|
||||||
|
invoke_on_load=invoke_on_load,
|
||||||
|
invoke_args=invoke_args,
|
||||||
|
invoke_kwds=invoke_kwds,
|
||||||
|
propagate_map_exceptions=propagate_map_exceptions,
|
||||||
|
on_load_failure_callback=on_load_failure_callback,
|
||||||
|
verify_requirements=verify_requirements)
|
||||||
|
|
||||||
|
def order_extensions(self, extensions):
|
||||||
|
"""Order the loaded transformers
|
||||||
|
|
||||||
|
It should follow those rules
|
||||||
|
- website before language (eg: tvu.org.ru vs russian)
|
||||||
|
- language before episodes_rexps
|
||||||
|
- properties before language (eg: he-aac vs hebrew)
|
||||||
|
- release_group before properties (eg: XviD-?? vs xvid)
|
||||||
|
"""
|
||||||
|
extensions.sort(key=lambda ext: -ext.obj.priority)
|
||||||
|
return extensions
|
||||||
|
|
||||||
|
def _load_one_plugin(self, ep, invoke_on_load, invoke_args, invoke_kwds, verify_requirements=True):
|
||||||
|
if not ep.dist:
|
||||||
|
# `require` argument of ep.load() is deprecated in newer versions of setuptools
|
||||||
|
if hasattr(ep, 'resolve'):
|
||||||
|
plugin = ep.resolve()
|
||||||
|
elif hasattr(ep, '_load'):
|
||||||
|
plugin = ep._load()
|
||||||
|
else:
|
||||||
|
plugin = ep.load(require=False)
|
||||||
|
else:
|
||||||
|
plugin = ep.load()
|
||||||
|
if invoke_on_load:
|
||||||
|
obj = plugin(*invoke_args, **invoke_kwds)
|
||||||
|
else:
|
||||||
|
obj = None
|
||||||
|
return Extension(ep.name, ep, plugin, obj)
|
||||||
|
|
||||||
|
def _load_plugins(self, invoke_on_load, invoke_args, invoke_kwds, verify_requirements):
|
||||||
|
return self.order_extensions(super(CustomTransformerExtensionManager, self)._load_plugins(invoke_on_load, invoke_args, invoke_kwds, verify_requirements))
|
||||||
|
|
||||||
|
def objects(self):
|
||||||
|
return self.map(self._get_obj)
|
||||||
|
|
||||||
|
def _get_obj(self, ext):
|
||||||
|
return ext.obj
|
||||||
|
|
||||||
|
def object(self, name):
|
||||||
|
try:
|
||||||
|
return self[name].obj
|
||||||
|
except KeyError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def register_module(self, name=None, module_name=None, attrs=(), entry_point=None):
|
||||||
|
if entry_point:
|
||||||
|
ep = EntryPoint.parse(entry_point)
|
||||||
|
else:
|
||||||
|
ep = EntryPoint(name, module_name, attrs)
|
||||||
|
loaded = self._load_one_plugin(ep, invoke_on_load=True, invoke_args=(), invoke_kwds={})
|
||||||
|
if loaded:
|
||||||
|
self.extensions.append(loaded)
|
||||||
|
self.extensions = self.order_extensions(self.extensions)
|
||||||
|
self._extensions_by_name = None
|
||||||
|
|
||||||
|
|
||||||
|
class DefaultTransformerExtensionManager(CustomTransformerExtensionManager):
|
||||||
|
@property
|
||||||
|
def _internal_entry_points(self):
|
||||||
|
return ['split_path_components = guessit.transfo.split_path_components:SplitPathComponents',
|
||||||
|
'guess_filetype = guessit.transfo.guess_filetype:GuessFiletype',
|
||||||
|
'split_explicit_groups = guessit.transfo.split_explicit_groups:SplitExplicitGroups',
|
||||||
|
'guess_date = guessit.transfo.guess_date:GuessDate',
|
||||||
|
'guess_website = guessit.transfo.guess_website:GuessWebsite',
|
||||||
|
'guess_release_group = guessit.transfo.guess_release_group:GuessReleaseGroup',
|
||||||
|
'guess_properties = guessit.transfo.guess_properties:GuessProperties',
|
||||||
|
'guess_language = guessit.transfo.guess_language:GuessLanguage',
|
||||||
|
'guess_video_rexps = guessit.transfo.guess_video_rexps:GuessVideoRexps',
|
||||||
|
'guess_episodes_rexps = guessit.transfo.guess_episodes_rexps:GuessEpisodesRexps',
|
||||||
|
'guess_weak_episodes_rexps = guessit.transfo.guess_weak_episodes_rexps:GuessWeakEpisodesRexps',
|
||||||
|
'guess_bonus_features = guessit.transfo.guess_bonus_features:GuessBonusFeatures',
|
||||||
|
'guess_year = guessit.transfo.guess_year:GuessYear',
|
||||||
|
'guess_country = guessit.transfo.guess_country:GuessCountry',
|
||||||
|
'guess_idnumber = guessit.transfo.guess_idnumber:GuessIdnumber',
|
||||||
|
'split_on_dash = guessit.transfo.split_on_dash:SplitOnDash',
|
||||||
|
'guess_episode_info_from_position = guessit.transfo.guess_episode_info_from_position:GuessEpisodeInfoFromPosition',
|
||||||
|
'guess_movie_title_from_position = guessit.transfo.guess_movie_title_from_position:GuessMovieTitleFromPosition',
|
||||||
|
'guess_episode_details = guessit.transfo.guess_episode_details:GuessEpisodeDetails',
|
||||||
|
'expected_series = guessit.transfo.expected_series:ExpectedSeries',
|
||||||
|
'expected_title = guessit.transfo.expected_title:ExpectedTitle',]
|
||||||
|
|
||||||
|
def _find_entry_points(self, namespace):
|
||||||
|
entry_points = {}
|
||||||
|
# Internal entry points
|
||||||
|
if namespace == self.namespace:
|
||||||
|
for internal_entry_point_str in self._internal_entry_points:
|
||||||
|
internal_entry_point = EntryPoint.parse(internal_entry_point_str)
|
||||||
|
entry_points[internal_entry_point.name] = internal_entry_point
|
||||||
|
|
||||||
|
# Package entry points
|
||||||
|
setuptools_entrypoints = super(DefaultTransformerExtensionManager, self)._find_entry_points(namespace)
|
||||||
|
for setuptools_entrypoint in setuptools_entrypoints:
|
||||||
|
entry_points[setuptools_entrypoint.name] = setuptools_entrypoint
|
||||||
|
|
||||||
|
return list(entry_points.values())
|
||||||
|
|
||||||
|
_extensions = None
|
||||||
|
|
||||||
|
|
||||||
|
def all_transformers():
|
||||||
|
return _extensions.objects()
|
||||||
|
|
||||||
|
|
||||||
|
def get_transformer(name):
|
||||||
|
return _extensions.object(name)
|
||||||
|
|
||||||
|
|
||||||
|
def add_transformer(name, module_name, class_name):
|
||||||
|
"""
|
||||||
|
Add a transformer
|
||||||
|
|
||||||
|
:param name: the name of the transformer. ie: 'guess_regexp_id'
|
||||||
|
:param name: the module name. ie: 'flexget.utils.parsers.transformers.guess_regexp_id'
|
||||||
|
:param class_name: the class name. ie: 'GuessRegexpId'
|
||||||
|
"""
|
||||||
|
|
||||||
|
_extensions.register_module(name, module_name, (class_name,))
|
||||||
|
|
||||||
|
|
||||||
|
def add_transformer(entry_point):
|
||||||
|
"""
|
||||||
|
Add a transformer
|
||||||
|
|
||||||
|
:param entry_point: entry point spec format. ie: 'guess_regexp_id = flexget.utils.parsers.transformers.guess_regexp_id:GuessRegexpId'
|
||||||
|
"""
|
||||||
|
_extensions.register_module(entry_point = entry_point)
|
||||||
|
|
||||||
|
|
||||||
|
def reload(custom=False):
|
||||||
|
"""
|
||||||
|
Reload extension manager with default or custom one.
|
||||||
|
:param custom: if True, custom manager will be used, else default one.
|
||||||
|
Default manager will load default extensions from guessit and setuptools packaging extensions
|
||||||
|
Custom manager will not load default extensions from guessit, using only setuptools packaging extensions.
|
||||||
|
:type custom: boolean
|
||||||
|
"""
|
||||||
|
global _extensions
|
||||||
|
if custom:
|
||||||
|
_extensions = CustomTransformerExtensionManager()
|
||||||
|
else:
|
||||||
|
_extensions = DefaultTransformerExtensionManager()
|
||||||
|
reload_options(all_transformers())
|
||||||
|
|
||||||
|
reload()
|
||||||
65
libs/guessit/quality.py
Normal file
65
libs/guessit/quality.py
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.plugins.transformers import all_transformers
|
||||||
|
|
||||||
|
|
||||||
|
def best_quality_properties(props, *guesses):
|
||||||
|
"""Retrieve the best quality guess, based on given properties
|
||||||
|
|
||||||
|
:param props: Properties to include in the rating
|
||||||
|
:type props: list of strings
|
||||||
|
:param guesses: Guesses to rate
|
||||||
|
:type guesses: :class:`guessit.guess.Guess`
|
||||||
|
|
||||||
|
:return: Best quality guess from all passed guesses
|
||||||
|
:rtype: :class:`guessit.guess.Guess`
|
||||||
|
"""
|
||||||
|
best_guess = None
|
||||||
|
best_rate = None
|
||||||
|
for guess in guesses:
|
||||||
|
for transformer in all_transformers():
|
||||||
|
rate = transformer.rate_quality(guess, *props)
|
||||||
|
if best_rate is None or best_rate < rate:
|
||||||
|
best_rate = rate
|
||||||
|
best_guess = guess
|
||||||
|
return best_guess
|
||||||
|
|
||||||
|
|
||||||
|
def best_quality(*guesses):
|
||||||
|
"""Retrieve the best quality guess.
|
||||||
|
|
||||||
|
:param guesses: Guesses to rate
|
||||||
|
:type guesses: :class:`guessit.guess.Guess`
|
||||||
|
|
||||||
|
:return: Best quality guess from all passed guesses
|
||||||
|
:rtype: :class:`guessit.guess.Guess`
|
||||||
|
"""
|
||||||
|
best_guess = None
|
||||||
|
best_rate = None
|
||||||
|
for guess in guesses:
|
||||||
|
for transformer in all_transformers():
|
||||||
|
rate = transformer.rate_quality(guess)
|
||||||
|
if best_rate is None or best_rate < rate:
|
||||||
|
best_rate = rate
|
||||||
|
best_guess = guess
|
||||||
|
return best_guess
|
||||||
|
|
@ -1,28 +1,28 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# Smewt - A smart collection manager
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# Smewt is free software; you can redistribute it and/or modify
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# it under the terms of the GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
# the Free Software Foundation; either version 3 of the License, or
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
# (at your option) any later version.
|
# (at your option) any later version.
|
||||||
#
|
#
|
||||||
# Smewt is distributed in the hope that it will be useful,
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
# GNU General Public License for more details.
|
# Lesser GNU General Public License for more details.
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
import os, os.path
|
import os
|
||||||
|
|
||||||
|
|
||||||
GREEN_FONT = "\x1B[0;32m"
|
GREEN_FONT = "\x1B[0;32m"
|
||||||
YELLOW_FONT = "\x1B[0;33m"
|
YELLOW_FONT = "\x1B[0;33m"
|
||||||
|
|
@ -31,7 +31,7 @@ RED_FONT = "\x1B[0;31m"
|
||||||
RESET_FONT = "\x1B[0m"
|
RESET_FONT = "\x1B[0m"
|
||||||
|
|
||||||
|
|
||||||
def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False):
|
def setup_logging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False): # pragma: no cover
|
||||||
"""Set up a nice colored logger as the main application logger."""
|
"""Set up a nice colored logger as the main application logger."""
|
||||||
|
|
||||||
class SimpleFormatter(logging.Formatter):
|
class SimpleFormatter(logging.Formatter):
|
||||||
|
|
|
||||||
BIN
libs/guessit/test/1MB
Normal file
BIN
libs/guessit/test/1MB
Normal file
Binary file not shown.
26
libs/guessit/test/__init__.py
Normal file
26
libs/guessit/test/__init__.py
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from guessit.slogging import setup_logging
|
||||||
|
setup_logging()
|
||||||
|
logging.disable(logging.INFO)
|
||||||
40
libs/guessit/test/__main__.py
Normal file
40
libs/guessit/test/__main__.py
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
from guessit.test import (test_api, test_autodetect, test_autodetect_all, test_doctests,
|
||||||
|
test_episode, test_hashes, test_language, test_main,
|
||||||
|
test_matchtree, test_movie, test_quality, test_utils)
|
||||||
|
from unittest import TextTestRunner
|
||||||
|
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
def main():
|
||||||
|
for suite in [test_api.suite, test_autodetect.suite,
|
||||||
|
test_autodetect_all.suite, test_doctests.suite,
|
||||||
|
test_episode.suite, test_hashes.suite, test_language.suite,
|
||||||
|
test_main.suite, test_matchtree.suite, test_movie.suite,
|
||||||
|
test_quality.suite, test_utils.suite]:
|
||||||
|
TextTestRunner(verbosity=2).run(suite)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
489
libs/guessit/test/autodetect.yaml
Normal file
489
libs/guessit/test/autodetect.yaml
Normal file
|
|
@ -0,0 +1,489 @@
|
||||||
|
? Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv
|
||||||
|
: type: movie
|
||||||
|
title: Fear and Loathing in Las Vegas
|
||||||
|
year: 1998
|
||||||
|
screenSize: 720p
|
||||||
|
format: HD-DVD
|
||||||
|
audioCodec: DTS
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: ESiR
|
||||||
|
|
||||||
|
? Leopard.dmg
|
||||||
|
: type: unknown
|
||||||
|
extension: dmg
|
||||||
|
|
||||||
|
? Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi
|
||||||
|
: type: episode
|
||||||
|
series: Duckman
|
||||||
|
season: 1
|
||||||
|
episodeNumber: 1
|
||||||
|
title: I, Duckman
|
||||||
|
date: 2002-11-07
|
||||||
|
|
||||||
|
? Series/Neverwhere/Neverwhere.05.Down.Street.[tvu.org.ru].avi
|
||||||
|
: type: episode
|
||||||
|
series: Neverwhere
|
||||||
|
episodeNumber: 5
|
||||||
|
title: Down Street
|
||||||
|
website: tvu.org.ru
|
||||||
|
|
||||||
|
? Neverwhere.05.Down.Street.[tvu.org.ru].avi
|
||||||
|
: type: episode
|
||||||
|
series: Neverwhere
|
||||||
|
episodeNumber: 5
|
||||||
|
title: Down Street
|
||||||
|
website: tvu.org.ru
|
||||||
|
|
||||||
|
? Series/Breaking Bad/Minisodes/Breaking.Bad.(Minisodes).01.Good.Cop.Bad.Cop.WEBRip.XviD.avi
|
||||||
|
: type: episode
|
||||||
|
series: Breaking Bad
|
||||||
|
episodeFormat: Minisode
|
||||||
|
episodeNumber: 1
|
||||||
|
title: Good Cop Bad Cop
|
||||||
|
format: WEBRip
|
||||||
|
videoCodec: XviD
|
||||||
|
|
||||||
|
? Series/Kaamelott/Kaamelott - Livre V - Ep 23 - Le Forfait.avi
|
||||||
|
: type: episode
|
||||||
|
series: Kaamelott
|
||||||
|
episodeNumber: 23
|
||||||
|
title: Le Forfait
|
||||||
|
|
||||||
|
? Movies/The Doors (1991)/09.03.08.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
|
||||||
|
: type: movie
|
||||||
|
title: The Doors
|
||||||
|
year: 1991
|
||||||
|
date: 2008-03-09
|
||||||
|
format: BluRay
|
||||||
|
screenSize: 720p
|
||||||
|
audioCodec: AC3
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: HiS@SiLUHD
|
||||||
|
language: english
|
||||||
|
website: sharethefiles.com
|
||||||
|
|
||||||
|
? Movies/M.A.S.H. (1970)/MASH.(1970).[Divx.5.02][Dual-Subtitulos][DVDRip].ogm
|
||||||
|
: type: movie
|
||||||
|
title: M.A.S.H.
|
||||||
|
year: 1970
|
||||||
|
videoCodec: DivX
|
||||||
|
format: DVD
|
||||||
|
|
||||||
|
? the.mentalist.501.hdtv-lol.mp4
|
||||||
|
: type: episode
|
||||||
|
series: The Mentalist
|
||||||
|
season: 5
|
||||||
|
episodeNumber: 1
|
||||||
|
format: HDTV
|
||||||
|
releaseGroup: LOL
|
||||||
|
|
||||||
|
? the.simpsons.2401.hdtv-lol.mp4
|
||||||
|
: type: episode
|
||||||
|
series: The Simpsons
|
||||||
|
season: 24
|
||||||
|
episodeNumber: 1
|
||||||
|
format: HDTV
|
||||||
|
releaseGroup: LOL
|
||||||
|
|
||||||
|
? Homeland.S02E01.HDTV.x264-EVOLVE.mp4
|
||||||
|
: type: episode
|
||||||
|
series: Homeland
|
||||||
|
season: 2
|
||||||
|
episodeNumber: 1
|
||||||
|
format: HDTV
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: EVOLVE
|
||||||
|
|
||||||
|
? /media/Band_of_Brothers-e01-Currahee.mkv
|
||||||
|
: type: episode
|
||||||
|
series: Band of Brothers
|
||||||
|
episodeNumber: 1
|
||||||
|
title: Currahee
|
||||||
|
|
||||||
|
? /media/Band_of_Brothers-x02-We_Stand_Alone_Together.mkv
|
||||||
|
: type: episode
|
||||||
|
series: Band of Brothers
|
||||||
|
bonusNumber: 2
|
||||||
|
bonusTitle: We Stand Alone Together
|
||||||
|
|
||||||
|
? /movies/James_Bond-f21-Casino_Royale-x02-Stunts.mkv
|
||||||
|
: type: movie
|
||||||
|
title: Casino Royale
|
||||||
|
filmSeries: James Bond
|
||||||
|
filmNumber: 21
|
||||||
|
bonusNumber: 2
|
||||||
|
bonusTitle: Stunts
|
||||||
|
|
||||||
|
? /TV Shows/new.girl.117.hdtv-lol.mp4
|
||||||
|
: type: episode
|
||||||
|
series: New Girl
|
||||||
|
season: 1
|
||||||
|
episodeNumber: 17
|
||||||
|
format: HDTV
|
||||||
|
releaseGroup: LOL
|
||||||
|
|
||||||
|
? The.Office.(US).1x03.Health.Care.HDTV.XviD-LOL.avi
|
||||||
|
: type: episode
|
||||||
|
series: The Office (US)
|
||||||
|
country: US
|
||||||
|
season: 1
|
||||||
|
episodeNumber: 3
|
||||||
|
title: Health Care
|
||||||
|
format: HDTV
|
||||||
|
videoCodec: XviD
|
||||||
|
releaseGroup: LOL
|
||||||
|
|
||||||
|
? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4
|
||||||
|
: type: movie
|
||||||
|
title: The Insider
|
||||||
|
year: 1999
|
||||||
|
bonusNumber: 2
|
||||||
|
bonusTitle: 60 Minutes Interview-1996
|
||||||
|
|
||||||
|
? OSS_117--Cairo,_Nest_of_Spies.mkv
|
||||||
|
: type: movie
|
||||||
|
title: OSS 117--Cairo, Nest of Spies
|
||||||
|
|
||||||
|
? Rush.._Beyond_The_Lighted_Stage-x09-Between_Sun_and_Moon-2002_Hartford.mkv
|
||||||
|
: type: movie
|
||||||
|
title: Rush Beyond The Lighted Stage
|
||||||
|
bonusNumber: 9
|
||||||
|
bonusTitle: Between Sun and Moon-2002 Hartford
|
||||||
|
|
||||||
|
? House.Hunters.International.S56E06.720p.hdtv.x264.mp4
|
||||||
|
: type: episode
|
||||||
|
series: House Hunters International
|
||||||
|
season: 56
|
||||||
|
episodeNumber: 6
|
||||||
|
screenSize: 720p
|
||||||
|
format: HDTV
|
||||||
|
videoCodec: h264
|
||||||
|
|
||||||
|
? White.House.Down.2013.1080p.BluRay.DTS-HD.MA.5.1.x264-PublicHD.mkv
|
||||||
|
: type: movie
|
||||||
|
title: White House Down
|
||||||
|
year: 2013
|
||||||
|
screenSize: 1080p
|
||||||
|
format: BluRay
|
||||||
|
audioCodec: DTS
|
||||||
|
audioProfile: HDMA
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: PublicHD
|
||||||
|
audioChannels: "5.1"
|
||||||
|
|
||||||
|
? White.House.Down.2013.1080p.BluRay.DTSHD.MA.5.1.x264-PublicHD.mkv
|
||||||
|
: type: movie
|
||||||
|
title: White House Down
|
||||||
|
year: 2013
|
||||||
|
screenSize: 1080p
|
||||||
|
format: BluRay
|
||||||
|
audioCodec: DTS
|
||||||
|
audioProfile: HDMA
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: PublicHD
|
||||||
|
audioChannels: "5.1"
|
||||||
|
|
||||||
|
? Hostages.S01E01.Pilot.for.Air.720p.WEB-DL.DD5.1.H.264-NTb.nfo
|
||||||
|
: type: episodeinfo
|
||||||
|
series: Hostages
|
||||||
|
title: Pilot for Air
|
||||||
|
season: 1
|
||||||
|
episodeNumber: 1
|
||||||
|
screenSize: 720p
|
||||||
|
format: WEB-DL
|
||||||
|
audioChannels: "5.1"
|
||||||
|
videoCodec: h264
|
||||||
|
audioCodec: DolbyDigital
|
||||||
|
releaseGroup: NTb
|
||||||
|
|
||||||
|
? Despicable.Me.2.2013.1080p.BluRay.x264-VeDeTT.nfo
|
||||||
|
: type: movieinfo
|
||||||
|
title: Despicable Me 2
|
||||||
|
year: 2013
|
||||||
|
screenSize: 1080p
|
||||||
|
format: BluRay
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: VeDeTT
|
||||||
|
|
||||||
|
? Le Cinquieme Commando 1971 SUBFORCED FRENCH DVDRiP XViD AC3 Bandix.mkv
|
||||||
|
: type: movie
|
||||||
|
audioCodec: AC3
|
||||||
|
format: DVD
|
||||||
|
releaseGroup: Bandix
|
||||||
|
subtitleLanguage: French
|
||||||
|
title: Le Cinquieme Commando
|
||||||
|
videoCodec: XviD
|
||||||
|
year: 1971
|
||||||
|
|
||||||
|
? Le Seigneur des Anneaux - La Communauté de l'Anneau - Version Longue - BDRip.mkv
|
||||||
|
: type: movie
|
||||||
|
format: BluRay
|
||||||
|
title: Le Seigneur des Anneaux
|
||||||
|
|
||||||
|
? La petite bande (Michel Deville - 1983) VF PAL MP4 x264 AAC.mkv
|
||||||
|
: type: movie
|
||||||
|
audioCodec: AAC
|
||||||
|
language: French
|
||||||
|
title: La petite bande
|
||||||
|
videoCodec: h264
|
||||||
|
year: 1983
|
||||||
|
|
||||||
|
? Retour de Flammes (Gregor Schnitzler 2003) FULL DVD.iso
|
||||||
|
: type: movie
|
||||||
|
format: DVD
|
||||||
|
title: Retour de Flammes
|
||||||
|
type: movie
|
||||||
|
year: 2003
|
||||||
|
|
||||||
|
? A.Common.Title.Special.2014.avi
|
||||||
|
: type: movie
|
||||||
|
year: 2014
|
||||||
|
title: A Common Title Special
|
||||||
|
|
||||||
|
? A.Common.Title.2014.Special.avi
|
||||||
|
: type: episode
|
||||||
|
year: 2014
|
||||||
|
series: A Common Title
|
||||||
|
title: Special
|
||||||
|
episodeDetails: Special
|
||||||
|
|
||||||
|
? A.Common.Title.2014.Special.Edition.avi
|
||||||
|
: type: movie
|
||||||
|
year: 2014
|
||||||
|
title: A Common Title
|
||||||
|
edition: Special Edition
|
||||||
|
|
||||||
|
? Downton.Abbey.2013.Christmas.Special.HDTV.x264-FoV.mp4
|
||||||
|
: type: episode
|
||||||
|
year: 2013
|
||||||
|
series: Downton Abbey
|
||||||
|
title: Christmas Special
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: FoV
|
||||||
|
format: HDTV
|
||||||
|
episodeDetails: Special
|
||||||
|
|
||||||
|
? Doctor_Who_2013_Christmas_Special.The_Time_of_The_Doctor.HD
|
||||||
|
: options: -n
|
||||||
|
type: episode
|
||||||
|
series: Doctor Who
|
||||||
|
other: HD
|
||||||
|
episodeDetails: Special
|
||||||
|
title: Christmas Special The Time of The Doctor
|
||||||
|
year: 2013
|
||||||
|
|
||||||
|
? Doctor Who 2005 50th Anniversary Special The Day of the Doctor 3.avi
|
||||||
|
: type: episode
|
||||||
|
series: Doctor Who
|
||||||
|
episodeDetails: Special
|
||||||
|
title: 50th Anniversary Special The Day of the Doctor 3
|
||||||
|
year: 2005
|
||||||
|
|
||||||
|
? Robot Chicken S06-Born Again Virgin Christmas Special HDTV x264.avi
|
||||||
|
: type: episode
|
||||||
|
series: Robot Chicken
|
||||||
|
format: HDTV
|
||||||
|
season: 6
|
||||||
|
title: Born Again Virgin Christmas Special
|
||||||
|
videoCodec: h264
|
||||||
|
episodeDetails: Special
|
||||||
|
|
||||||
|
? Wicked.Tuna.S03E00.Head.To.Tail.Special.HDTV.x264-YesTV
|
||||||
|
: options: -n
|
||||||
|
type: episode
|
||||||
|
series: Wicked Tuna
|
||||||
|
title: Head To Tail Special
|
||||||
|
releaseGroup: YesTV
|
||||||
|
season: 3
|
||||||
|
episodeNumber: 0
|
||||||
|
videoCodec: h264
|
||||||
|
format: HDTV
|
||||||
|
episodeDetails: Special
|
||||||
|
|
||||||
|
? The.Voice.UK.S03E12.HDTV.x264-C4TV
|
||||||
|
: options: -n
|
||||||
|
episodeNumber: 12
|
||||||
|
videoCodec: h264
|
||||||
|
format: HDTV
|
||||||
|
series: The Voice (UK)
|
||||||
|
releaseGroup: C4TV
|
||||||
|
season: 3
|
||||||
|
country: United Kingdom
|
||||||
|
type: episode
|
||||||
|
|
||||||
|
? /tmp/star.trek.9/star.trek.9.mkv
|
||||||
|
: type: movie
|
||||||
|
title: star trek 9
|
||||||
|
|
||||||
|
? star.trek.9.mkv
|
||||||
|
: type: movie
|
||||||
|
title: star trek 9
|
||||||
|
|
||||||
|
? FlexGet.S01E02.TheName.HDTV.xvid
|
||||||
|
: options: -n
|
||||||
|
episodeNumber: 2
|
||||||
|
format: HDTV
|
||||||
|
season: 1
|
||||||
|
series: FlexGet
|
||||||
|
title: TheName
|
||||||
|
type: episode
|
||||||
|
videoCodec: XviD
|
||||||
|
|
||||||
|
? FlexGet.S01E02.TheName.HDTV.xvid
|
||||||
|
: options: -n
|
||||||
|
episodeNumber: 2
|
||||||
|
format: HDTV
|
||||||
|
season: 1
|
||||||
|
series: FlexGet
|
||||||
|
title: TheName
|
||||||
|
type: episode
|
||||||
|
videoCodec: XviD
|
||||||
|
|
||||||
|
? some.series.S03E14.Title.Here.720p
|
||||||
|
: options: -n
|
||||||
|
episodeNumber: 14
|
||||||
|
screenSize: 720p
|
||||||
|
season: 3
|
||||||
|
series: some series
|
||||||
|
title: Title Here
|
||||||
|
type: episode
|
||||||
|
|
||||||
|
? '[the.group] Some.Series.S03E15.Title.Two.720p'
|
||||||
|
: options: -n
|
||||||
|
episodeNumber: 15
|
||||||
|
releaseGroup: the.group
|
||||||
|
screenSize: 720p
|
||||||
|
season: 3
|
||||||
|
series: Some Series
|
||||||
|
title: Title Two
|
||||||
|
type: episode
|
||||||
|
|
||||||
|
? 'HD 720p: Some series.S03E16.Title.Three'
|
||||||
|
: options: -n
|
||||||
|
episodeNumber: 16
|
||||||
|
other: HD
|
||||||
|
screenSize: 720p
|
||||||
|
season: 3
|
||||||
|
series: Some series
|
||||||
|
title: Title Three
|
||||||
|
type: episode
|
||||||
|
|
||||||
|
? Something.Season.2.1of4.Ep.Title.HDTV.torrent
|
||||||
|
: episodeCount: 4
|
||||||
|
episodeNumber: 1
|
||||||
|
format: HDTV
|
||||||
|
season: 2
|
||||||
|
series: Something
|
||||||
|
title: Title
|
||||||
|
type: episode
|
||||||
|
|
||||||
|
? Show-A (US) - Episode Title S02E09 hdtv
|
||||||
|
: options: -n
|
||||||
|
country: US
|
||||||
|
episodeNumber: 9
|
||||||
|
format: HDTV
|
||||||
|
season: 2
|
||||||
|
series: Show-A (US)
|
||||||
|
type: episode
|
||||||
|
|
||||||
|
? Jack's.Show.S03E01.blah.1080p
|
||||||
|
: options: -n
|
||||||
|
episodeNumber: 1
|
||||||
|
screenSize: 1080p
|
||||||
|
season: 3
|
||||||
|
series: Jack's Show
|
||||||
|
title: blah
|
||||||
|
type: episode
|
||||||
|
|
||||||
|
? FlexGet.epic
|
||||||
|
: options: -n
|
||||||
|
title: FlexGet epic
|
||||||
|
type: movie
|
||||||
|
|
||||||
|
? FlexGet.Apt.1
|
||||||
|
: options: -n
|
||||||
|
title: FlexGet Apt 1
|
||||||
|
type: movie
|
||||||
|
|
||||||
|
? FlexGet.aptitude
|
||||||
|
: options: -n
|
||||||
|
title: FlexGet aptitude
|
||||||
|
type: movie
|
||||||
|
|
||||||
|
? FlexGet.Step1
|
||||||
|
: options: -n
|
||||||
|
title: FlexGet Step1
|
||||||
|
type: movie
|
||||||
|
|
||||||
|
? Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720 * 432].avi
|
||||||
|
: format: DVD
|
||||||
|
screenSize: 720x432
|
||||||
|
title: El Bosque Animado
|
||||||
|
videoCodec: XviD
|
||||||
|
year: 1987
|
||||||
|
type: movie
|
||||||
|
|
||||||
|
? Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi
|
||||||
|
: format: DVD
|
||||||
|
screenSize: 720x432
|
||||||
|
title: El Bosque Animado
|
||||||
|
videoCodec: XviD
|
||||||
|
year: 1987
|
||||||
|
type: movie
|
||||||
|
|
||||||
|
? 2009.shoot.fruit.chan.multi.dvd9.pal
|
||||||
|
: options: -n
|
||||||
|
format: DVD
|
||||||
|
language: mul
|
||||||
|
other: PAL
|
||||||
|
title: shoot fruit chan
|
||||||
|
type: movie
|
||||||
|
year: 2009
|
||||||
|
|
||||||
|
? 2009.shoot.fruit.chan.multi.dvd5.pal
|
||||||
|
: options: -n
|
||||||
|
format: DVD
|
||||||
|
language: mul
|
||||||
|
other: PAL
|
||||||
|
title: shoot fruit chan
|
||||||
|
type: movie
|
||||||
|
year: 2009
|
||||||
|
|
||||||
|
? The.Flash.2014.S01E01.PREAIR.WEBRip.XviD-EVO.avi
|
||||||
|
: episodeNumber: 1
|
||||||
|
format: WEBRip
|
||||||
|
other: Preair
|
||||||
|
releaseGroup: EVO
|
||||||
|
season: 1
|
||||||
|
series: The Flash
|
||||||
|
type: episode
|
||||||
|
videoCodec: XviD
|
||||||
|
year: 2014
|
||||||
|
|
||||||
|
? Ice.Lake.Rebels.S01E06.Ice.Lake.Games.720p.HDTV.x264-DHD
|
||||||
|
: options: -n
|
||||||
|
episodeNumber: 6
|
||||||
|
format: HDTV
|
||||||
|
releaseGroup: DHD
|
||||||
|
screenSize: 720p
|
||||||
|
season: 1
|
||||||
|
series: Ice Lake Rebels
|
||||||
|
title: Ice Lake Games
|
||||||
|
type: episode
|
||||||
|
videoCodec: h264
|
||||||
|
|
||||||
|
? The League - S06E10 - Epi Sexy.mkv
|
||||||
|
: episodeNumber: 10
|
||||||
|
season: 6
|
||||||
|
series: The League
|
||||||
|
title: Epi Sexy
|
||||||
|
type: episode
|
||||||
|
|
||||||
|
? Stay (2005) [1080p]/Stay.2005.1080p.BluRay.x264.YIFY.mp4
|
||||||
|
: format: BluRay
|
||||||
|
releaseGroup: YIFY
|
||||||
|
screenSize: 1080p
|
||||||
|
title: Stay
|
||||||
|
type: movie
|
||||||
|
videoCodec: h264
|
||||||
|
year: 2005
|
||||||
1
libs/guessit/test/dummy.srt
Normal file
1
libs/guessit/test/dummy.srt
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
Just a dummy srt file (used for unittests: do not remove!)
|
||||||
1174
libs/guessit/test/episodes.yaml
Normal file
1174
libs/guessit/test/episodes.yaml
Normal file
File diff suppressed because it is too large
Load diff
187
libs/guessit/test/guessittest.py
Normal file
187
libs/guessit/test/guessittest.py
Normal file
|
|
@ -0,0 +1,187 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit import base_text_type, u
|
||||||
|
from collections import defaultdict
|
||||||
|
from unittest import TestCase, TestLoader, TextTestRunner
|
||||||
|
import shlex
|
||||||
|
import babelfish
|
||||||
|
import yaml, logging, sys, os
|
||||||
|
from os.path import *
|
||||||
|
|
||||||
|
|
||||||
|
def currentPath():
|
||||||
|
'''Returns the path in which the calling file is located.'''
|
||||||
|
return dirname(join(os.getcwd(), sys._getframe(1).f_globals['__file__']))
|
||||||
|
|
||||||
|
|
||||||
|
def addImportPath(path):
|
||||||
|
'''Function that adds the specified path to the import path. The path can be
|
||||||
|
absolute or relative to the calling file.'''
|
||||||
|
importPath = abspath(join(currentPath(), path))
|
||||||
|
sys.path = [importPath] + sys.path
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
from guessit.plugins import transformers
|
||||||
|
from guessit.options import get_opts
|
||||||
|
import guessit
|
||||||
|
from guessit import *
|
||||||
|
from guessit.matcher import *
|
||||||
|
from guessit.fileutils import *
|
||||||
|
|
||||||
|
|
||||||
|
def allTests(testClass):
|
||||||
|
return TestLoader().loadTestsFromTestCase(testClass)
|
||||||
|
|
||||||
|
|
||||||
|
class TestGuessit(TestCase):
|
||||||
|
|
||||||
|
def checkMinimumFieldsCorrect(self, filename, filetype=None, remove_type=True,
|
||||||
|
exclude_files=None):
|
||||||
|
groundTruth = yaml.load(load_file_in_same_dir(__file__, filename))
|
||||||
|
|
||||||
|
def guess_func(string, options=None):
|
||||||
|
return guess_file_info(string, options=options, type=filetype)
|
||||||
|
|
||||||
|
return self.checkFields(groundTruth, guess_func, remove_type, exclude_files)
|
||||||
|
|
||||||
|
def checkFields(self, groundTruth, guess_func, remove_type=True,
|
||||||
|
exclude_files=None):
|
||||||
|
total = 0
|
||||||
|
exclude_files = exclude_files or []
|
||||||
|
|
||||||
|
fails = defaultdict(list)
|
||||||
|
additionals = defaultdict(list)
|
||||||
|
|
||||||
|
for filename, required_fields in groundTruth.items():
|
||||||
|
filename = u(filename)
|
||||||
|
if filename in exclude_files:
|
||||||
|
continue
|
||||||
|
|
||||||
|
log.debug('\n' + '-' * 120)
|
||||||
|
log.info('Guessing information for file: %s' % filename)
|
||||||
|
|
||||||
|
options = required_fields.pop('options') if 'options' in required_fields else None
|
||||||
|
|
||||||
|
if options:
|
||||||
|
args = shlex.split(options)
|
||||||
|
options = get_opts().parse_args(args)
|
||||||
|
options = vars(options)
|
||||||
|
try:
|
||||||
|
found = guess_func(filename, options)
|
||||||
|
except Exception as e:
|
||||||
|
fails[filename].append("An exception has occured in %s: %s" % (filename, e))
|
||||||
|
log.exception("An exception has occured in %s: %s" % (filename, e))
|
||||||
|
continue
|
||||||
|
|
||||||
|
total = total + 1
|
||||||
|
|
||||||
|
# no need for these in the unittests
|
||||||
|
if remove_type:
|
||||||
|
try:
|
||||||
|
del found['type']
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
for prop in ('container', 'mimetype', 'unidentified'):
|
||||||
|
if prop in found:
|
||||||
|
del found[prop]
|
||||||
|
|
||||||
|
# props which are list of just 1 elem should be opened for easier writing of the tests
|
||||||
|
for prop in ('language', 'subtitleLanguage', 'other', 'episodeDetails', 'unidentified'):
|
||||||
|
value = found.get(prop, None)
|
||||||
|
if isinstance(value, list) and len(value) == 1:
|
||||||
|
found[prop] = value[0]
|
||||||
|
|
||||||
|
# look for missing properties
|
||||||
|
for prop, value in required_fields.items():
|
||||||
|
if prop not in found:
|
||||||
|
log.debug("Prop '%s' not found in: %s" % (prop, filename))
|
||||||
|
fails[filename].append("'%s' not found in: %s" % (prop, filename))
|
||||||
|
continue
|
||||||
|
|
||||||
|
# if both properties are strings, do a case-insensitive comparison
|
||||||
|
if (isinstance(value, base_text_type) and
|
||||||
|
isinstance(found[prop], base_text_type)):
|
||||||
|
if value.lower() != found[prop].lower():
|
||||||
|
log.debug("Wrong prop value [str] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
|
||||||
|
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
|
||||||
|
|
||||||
|
elif isinstance(value, list) and isinstance(found[prop], list):
|
||||||
|
if found[prop] and isinstance(found[prop][0], babelfish.Language):
|
||||||
|
# list of languages
|
||||||
|
s1 = set(Language.fromguessit(s) for s in value)
|
||||||
|
s2 = set(found[prop])
|
||||||
|
else:
|
||||||
|
# by default we assume list of strings and do a case-insensitive
|
||||||
|
# comparison on their elements
|
||||||
|
s1 = set(u(s).lower() for s in value)
|
||||||
|
s2 = set(u(s).lower() for s in found[prop])
|
||||||
|
|
||||||
|
if s1 != s2:
|
||||||
|
log.debug("Wrong prop value [list] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
|
||||||
|
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
|
||||||
|
|
||||||
|
elif isinstance(found[prop], babelfish.Language):
|
||||||
|
try:
|
||||||
|
if babelfish.Language.fromguessit(value) != found[prop]:
|
||||||
|
raise ValueError
|
||||||
|
except:
|
||||||
|
log.debug("Wrong prop value [Language] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
|
||||||
|
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
|
||||||
|
|
||||||
|
elif isinstance(found[prop], babelfish.Country):
|
||||||
|
try:
|
||||||
|
if babelfish.Country.fromguessit(value) != found[prop]:
|
||||||
|
raise ValueError
|
||||||
|
except:
|
||||||
|
log.debug("Wrong prop value [Country] for '%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
|
||||||
|
fails[filename].append("'%s': expected = '%s' - received = '%s'" % (prop, u(value), u(found[prop])))
|
||||||
|
|
||||||
|
|
||||||
|
# otherwise, just compare their values directly
|
||||||
|
else:
|
||||||
|
if found[prop] != value:
|
||||||
|
log.debug("Wrong prop value for '%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
|
||||||
|
fails[filename].append("'%s': expected = '%s' [%s] - received = '%s' [%s]" % (prop, u(value), type(value), u(found[prop]), type(found[prop])))
|
||||||
|
|
||||||
|
# look for additional properties
|
||||||
|
for prop, value in found.items():
|
||||||
|
if prop not in required_fields:
|
||||||
|
log.debug("Found additional info for prop = '%s': '%s'" % (prop, u(value)))
|
||||||
|
additionals[filename].append("'%s': '%s'" % (prop, u(value)))
|
||||||
|
|
||||||
|
correct = total - len(fails)
|
||||||
|
log.info('SUMMARY: Guessed correctly %d out of %d filenames' % (correct, total))
|
||||||
|
|
||||||
|
for failed_entry, failed_properties in fails.items():
|
||||||
|
log.error('---- ' + failed_entry + ' ----')
|
||||||
|
for failed_property in failed_properties:
|
||||||
|
log.error("FAILED: " + failed_property)
|
||||||
|
|
||||||
|
for additional_entry, additional_properties in additionals.items():
|
||||||
|
log.warning('---- ' + additional_entry + ' ----')
|
||||||
|
for additional_property in additional_properties:
|
||||||
|
log.warning("ADDITIONAL: " + additional_property)
|
||||||
|
|
||||||
|
self.assertTrue(correct == total,
|
||||||
|
msg='Correct: %d < Total: %d' % (correct, total))
|
||||||
754
libs/guessit/test/movies.yaml
Normal file
754
libs/guessit/test/movies.yaml
Normal file
|
|
@ -0,0 +1,754 @@
|
||||||
|
|
||||||
|
? Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv
|
||||||
|
: title: Fear and Loathing in Las Vegas
|
||||||
|
year: 1998
|
||||||
|
screenSize: 720p
|
||||||
|
format: HD-DVD
|
||||||
|
audioCodec: DTS
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: ESiR
|
||||||
|
|
||||||
|
? Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi
|
||||||
|
: title: El Dia de la Bestia
|
||||||
|
year: 1995
|
||||||
|
format: DVD
|
||||||
|
language: spanish
|
||||||
|
videoCodec: DivX
|
||||||
|
releaseGroup: Artik[SEDG]
|
||||||
|
|
||||||
|
? Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
|
||||||
|
: title: Dark City
|
||||||
|
year: 1998
|
||||||
|
format: BluRay
|
||||||
|
screenSize: 720p
|
||||||
|
audioCodec: DTS
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: CHD
|
||||||
|
|
||||||
|
? Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv
|
||||||
|
: title: Sin City
|
||||||
|
year: 2005
|
||||||
|
format: BluRay
|
||||||
|
screenSize: 720p
|
||||||
|
videoCodec: h264
|
||||||
|
audioCodec: AC3
|
||||||
|
releaseGroup: SEPTiC
|
||||||
|
|
||||||
|
|
||||||
|
? Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi
|
||||||
|
: title: Borat
|
||||||
|
year: 2006
|
||||||
|
other: PROPER
|
||||||
|
format: DVD
|
||||||
|
other: [ R5, Proper ]
|
||||||
|
videoCodec: XviD
|
||||||
|
releaseGroup: PUKKA
|
||||||
|
|
||||||
|
|
||||||
|
? "[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv"
|
||||||
|
: title: Le Prestige
|
||||||
|
format: DVD
|
||||||
|
videoCodec: h264
|
||||||
|
videoProfile: HP
|
||||||
|
audioCodec: AAC
|
||||||
|
audioProfile: HE
|
||||||
|
language: [ french, english ]
|
||||||
|
subtitleLanguage: [ french, english ]
|
||||||
|
releaseGroup: XCT
|
||||||
|
|
||||||
|
? Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi
|
||||||
|
: title: Battle Royale
|
||||||
|
year: 2000
|
||||||
|
edition: special edition
|
||||||
|
cdNumber: 1
|
||||||
|
cdNumberTotal: 2
|
||||||
|
format: DVD
|
||||||
|
videoCodec: XviD
|
||||||
|
releaseGroup: ZeaL
|
||||||
|
|
||||||
|
? Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.avi
|
||||||
|
: title: Brazil
|
||||||
|
edition: Criterion Edition
|
||||||
|
year: 1985
|
||||||
|
cdNumber: 2
|
||||||
|
|
||||||
|
? Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv
|
||||||
|
: title: Persepolis
|
||||||
|
year: 2007
|
||||||
|
videoCodec: h264
|
||||||
|
audioCodec: AAC
|
||||||
|
language: [ French, English ]
|
||||||
|
subtitleLanguage: [ French, English ]
|
||||||
|
releaseGroup: XCT
|
||||||
|
|
||||||
|
? Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv
|
||||||
|
: title: Toy Story
|
||||||
|
year: 1995
|
||||||
|
format: HDTV
|
||||||
|
screenSize: 720p
|
||||||
|
language: [ english, spanish ]
|
||||||
|
|
||||||
|
? Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi
|
||||||
|
: title: Office Space
|
||||||
|
year: 1999
|
||||||
|
format: DVD
|
||||||
|
language: [ english, spanish ]
|
||||||
|
videoCodec: XviD
|
||||||
|
audioCodec: AC3
|
||||||
|
|
||||||
|
? Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.avi
|
||||||
|
: title: Wild Zero
|
||||||
|
year: 2000
|
||||||
|
videoCodec: DivX
|
||||||
|
releaseGroup: EPiC
|
||||||
|
|
||||||
|
? movies/Baraka_Edition_Collector.avi
|
||||||
|
: title: Baraka
|
||||||
|
edition: collector edition
|
||||||
|
|
||||||
|
? Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director's.Cut).CD1.DVDRip.XviD.AC3-WAF.avi
|
||||||
|
: title: Blade Runner
|
||||||
|
year: 1982
|
||||||
|
edition: Director's Cut
|
||||||
|
cdNumber: 1
|
||||||
|
format: DVD
|
||||||
|
videoCodec: XviD
|
||||||
|
audioCodec: AC3
|
||||||
|
releaseGroup: WAF
|
||||||
|
|
||||||
|
? movies/American.The.Bill.Hicks.Story.2009.DVDRip.XviD-EPiSODE.[UsaBit.com]/UsaBit.com_esd-americanbh.avi
|
||||||
|
: title: American The Bill Hicks Story
|
||||||
|
year: 2009
|
||||||
|
format: DVD
|
||||||
|
videoCodec: XviD
|
||||||
|
releaseGroup: EPiSODE
|
||||||
|
website: UsaBit.com
|
||||||
|
|
||||||
|
? movies/Charlie.And.Boots.DVDRip.XviD-TheWretched/wthd-cab.avi
|
||||||
|
: title: Charlie And Boots
|
||||||
|
format: DVD
|
||||||
|
videoCodec: XviD
|
||||||
|
releaseGroup: TheWretched
|
||||||
|
|
||||||
|
? movies/Steig Larsson Millenium Trilogy (2009) BRrip 720 AAC x264/(1)The Girl With The Dragon Tattoo (2009) BRrip 720 AAC x264.mkv
|
||||||
|
: title: The Girl With The Dragon Tattoo
|
||||||
|
filmSeries: Steig Larsson Millenium Trilogy
|
||||||
|
filmNumber: 1
|
||||||
|
year: 2009
|
||||||
|
format: BluRay
|
||||||
|
audioCodec: AAC
|
||||||
|
videoCodec: h264
|
||||||
|
screenSize: 720p
|
||||||
|
|
||||||
|
? movies/Greenberg.REPACK.LiMiTED.DVDRip.XviD-ARROW/arw-repack-greenberg.dvdrip.xvid.avi
|
||||||
|
: title: Greenberg
|
||||||
|
format: DVD
|
||||||
|
videoCodec: XviD
|
||||||
|
releaseGroup: ARROW
|
||||||
|
other: ['Proper', 'Limited']
|
||||||
|
|
||||||
|
? Movies/Fr - Paris 2054, Renaissance (2005) - De Christian Volckman - (Film Divx Science Fiction Fantastique Thriller Policier N&B).avi
|
||||||
|
: title: Paris 2054, Renaissance
|
||||||
|
year: 2005
|
||||||
|
language: french
|
||||||
|
videoCodec: DivX
|
||||||
|
|
||||||
|
? Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
|
||||||
|
: title: Avida
|
||||||
|
year: 2006
|
||||||
|
language: french
|
||||||
|
format: DVD
|
||||||
|
videoCodec: XviD
|
||||||
|
releaseGroup: PROD
|
||||||
|
|
||||||
|
? Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
|
||||||
|
: title: Alice in Wonderland
|
||||||
|
format: DVD
|
||||||
|
videoCodec: XviD
|
||||||
|
releaseGroup: DiAMOND
|
||||||
|
|
||||||
|
? Movies/Ne.Le.Dis.A.Personne.Fr 2 cd/personnea_mp.avi
|
||||||
|
: title: Ne Le Dis A Personne
|
||||||
|
language: french
|
||||||
|
cdNumberTotal: 2
|
||||||
|
|
||||||
|
? Movies/Bunker Palace Hôtel (Enki Bilal) (1989)/Enki Bilal - Bunker Palace Hotel (Fr Vhs Rip).avi
|
||||||
|
: title: Bunker Palace Hôtel
|
||||||
|
year: 1989
|
||||||
|
language: french
|
||||||
|
format: VHS
|
||||||
|
|
||||||
|
? Movies/21 (2008)/21.(2008).DVDRip.x264.AC3-FtS.[sharethefiles.com].mkv
|
||||||
|
: title: "21"
|
||||||
|
year: 2008
|
||||||
|
format: DVD
|
||||||
|
videoCodec: h264
|
||||||
|
audioCodec: AC3
|
||||||
|
releaseGroup: FtS
|
||||||
|
website: sharethefiles.com
|
||||||
|
|
||||||
|
? Movies/9 (2009)/9.2009.Blu-ray.DTS.720p.x264.HDBRiSe.[sharethefiles.com].mkv
|
||||||
|
: title: "9"
|
||||||
|
year: 2009
|
||||||
|
format: BluRay
|
||||||
|
audioCodec: DTS
|
||||||
|
screenSize: 720p
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: HDBRiSe
|
||||||
|
website: sharethefiles.com
|
||||||
|
|
||||||
|
? Movies/Mamma.Mia.2008.DVDRip.AC3.XviD-CrazyTeam/Mamma.Mia.2008.DVDRip.AC3.XviD-CrazyTeam.avi
|
||||||
|
: title: Mamma Mia
|
||||||
|
year: 2008
|
||||||
|
format: DVD
|
||||||
|
audioCodec: AC3
|
||||||
|
videoCodec: XviD
|
||||||
|
releaseGroup: CrazyTeam
|
||||||
|
|
||||||
|
? Movies/M.A.S.H. (1970)/MASH.(1970).[Divx.5.02][Dual-Subtitulos][DVDRip].ogm
|
||||||
|
: title: M.A.S.H.
|
||||||
|
year: 1970
|
||||||
|
videoCodec: DivX
|
||||||
|
format: DVD
|
||||||
|
|
||||||
|
? Movies/The Doors (1991)/09.03.08.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
|
||||||
|
: title: The Doors
|
||||||
|
year: 1991
|
||||||
|
date: 2008-03-09
|
||||||
|
format: BluRay
|
||||||
|
screenSize: 720p
|
||||||
|
audioCodec: AC3
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: HiS@SiLUHD
|
||||||
|
language: english
|
||||||
|
website: sharethefiles.com
|
||||||
|
|
||||||
|
? Movies/The Doors (1991)/08.03.09.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv
|
||||||
|
: options: --date-year-first
|
||||||
|
title: The Doors
|
||||||
|
year: 1991
|
||||||
|
date: 2008-03-09
|
||||||
|
format: BluRay
|
||||||
|
screenSize: 720p
|
||||||
|
audioCodec: AC3
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: HiS@SiLUHD
|
||||||
|
language: english
|
||||||
|
website: sharethefiles.com
|
||||||
|
|
||||||
|
? Movies/Ratatouille/video_ts-ratatouille.srt
|
||||||
|
: title: Ratatouille
|
||||||
|
format: DVD
|
||||||
|
|
||||||
|
? Movies/001 __ A classer/Fantomas se déchaine - Louis de Funès.avi
|
||||||
|
: title: Fantomas se déchaine
|
||||||
|
|
||||||
|
? Movies/Comme une Image (2004)/Comme.Une.Image.FRENCH.DVDRiP.XViD-NTK.par-www.divx-overnet.com.avi
|
||||||
|
: title: Comme une Image
|
||||||
|
year: 2004
|
||||||
|
language: french
|
||||||
|
format: DVD
|
||||||
|
videoCodec: XviD
|
||||||
|
releaseGroup: NTK
|
||||||
|
website: www.divx-overnet.com
|
||||||
|
|
||||||
|
? Movies/Fantastic Mr Fox/Fantastic.Mr.Fox.2009.DVDRip.{x264+LC-AAC.5.1}{Fr-Eng}{Sub.Fr-Eng}-™.[sharethefiles.com].mkv
|
||||||
|
: title: Fantastic Mr Fox
|
||||||
|
year: 2009
|
||||||
|
format: DVD
|
||||||
|
videoCodec: h264
|
||||||
|
audioCodec: AAC
|
||||||
|
audioProfile: LC
|
||||||
|
audioChannels: "5.1"
|
||||||
|
language: [ french, english ]
|
||||||
|
subtitleLanguage: [ french, english ]
|
||||||
|
website: sharethefiles.com
|
||||||
|
|
||||||
|
? Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi
|
||||||
|
: title: Somewhere
|
||||||
|
year: 2010
|
||||||
|
format: DVD
|
||||||
|
videoCodec: XviD
|
||||||
|
releaseGroup: iLG
|
||||||
|
|
||||||
|
? Movies/Moon_(2009).mkv
|
||||||
|
: title: Moon
|
||||||
|
year: 2009
|
||||||
|
|
||||||
|
? Movies/Moon_(2009)-x01.mkv
|
||||||
|
: title: Moon
|
||||||
|
year: 2009
|
||||||
|
bonusNumber: 1
|
||||||
|
|
||||||
|
? Movies/Moon_(2009)-x02-Making_Of.mkv
|
||||||
|
: title: Moon
|
||||||
|
year: 2009
|
||||||
|
bonusNumber: 2
|
||||||
|
bonusTitle: Making Of
|
||||||
|
|
||||||
|
? movies/James_Bond-f17-Goldeneye.mkv
|
||||||
|
: title: Goldeneye
|
||||||
|
filmSeries: James Bond
|
||||||
|
filmNumber: 17
|
||||||
|
|
||||||
|
? /movies/James_Bond-f21-Casino_Royale.mkv
|
||||||
|
: title: Casino Royale
|
||||||
|
filmSeries: James Bond
|
||||||
|
filmNumber: 21
|
||||||
|
|
||||||
|
? /movies/James_Bond-f21-Casino_Royale-x01-Becoming_Bond.mkv
|
||||||
|
: title: Casino Royale
|
||||||
|
filmSeries: James Bond
|
||||||
|
filmNumber: 21
|
||||||
|
bonusNumber: 1
|
||||||
|
bonusTitle: Becoming Bond
|
||||||
|
|
||||||
|
? /movies/James_Bond-f21-Casino_Royale-x02-Stunts.mkv
|
||||||
|
: title: Casino Royale
|
||||||
|
filmSeries: James Bond
|
||||||
|
filmNumber: 21
|
||||||
|
bonusNumber: 2
|
||||||
|
bonusTitle: Stunts
|
||||||
|
|
||||||
|
? OSS_117--Cairo,_Nest_of_Spies.mkv
|
||||||
|
: title: OSS 117--Cairo, Nest of Spies
|
||||||
|
|
||||||
|
? The Godfather Part III.mkv
|
||||||
|
: title: The Godfather
|
||||||
|
part: 3
|
||||||
|
|
||||||
|
? Foobar Part VI.mkv
|
||||||
|
: title: Foobar
|
||||||
|
part: 6
|
||||||
|
|
||||||
|
? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4
|
||||||
|
: title: The Insider
|
||||||
|
year: 1999
|
||||||
|
bonusNumber: 2
|
||||||
|
bonusTitle: 60 Minutes Interview-1996
|
||||||
|
|
||||||
|
? Rush.._Beyond_The_Lighted_Stage-x09-Between_Sun_and_Moon-2002_Hartford.mkv
|
||||||
|
: title: Rush Beyond The Lighted Stage
|
||||||
|
bonusNumber: 9
|
||||||
|
bonusTitle: Between Sun and Moon-2002 Hartford
|
||||||
|
|
||||||
|
? /public/uTorrent/Downloads Finished/Movies/Indiana.Jones.and.the.Temple.of.Doom.1984.HDTV.720p.x264.AC3.5.1-REDµX/Indiana.Jones.and.the.Temple.of.Doom.1984.HDTV.720p.x264.AC3.5.1-REDµX.mkv
|
||||||
|
: title: Indiana Jones and the Temple of Doom
|
||||||
|
year: 1984
|
||||||
|
format: HDTV
|
||||||
|
screenSize: 720p
|
||||||
|
videoCodec: h264
|
||||||
|
audioCodec: AC3
|
||||||
|
audioChannels: "5.1"
|
||||||
|
releaseGroup: REDµX
|
||||||
|
|
||||||
|
? The.Director’s.Notebook.2006.Blu-Ray.x264.DXVA.720p.AC3-de[42].mkv
|
||||||
|
: title: The Director’s Notebook
|
||||||
|
year: 2006
|
||||||
|
format: BluRay
|
||||||
|
videoCodec: h264
|
||||||
|
videoApi: DXVA
|
||||||
|
screenSize: 720p
|
||||||
|
audioCodec: AC3
|
||||||
|
releaseGroup: de[42]
|
||||||
|
|
||||||
|
? Movies/Cosmopolis.2012.LiMiTED.720p.BluRay.x264-AN0NYM0US[bb]/ano-cosmo.720p.mkv
|
||||||
|
: title: Cosmopolis
|
||||||
|
year: 2012
|
||||||
|
screenSize: 720p
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: AN0NYM0US[bb]
|
||||||
|
format: BluRay
|
||||||
|
other: LIMITED
|
||||||
|
|
||||||
|
? movies/La Science des Rêves (2006)/La.Science.Des.Reves.FRENCH.DVDRip.XviD-MP-AceBot.avi
|
||||||
|
: title: La Science des Rêves
|
||||||
|
year: 2006
|
||||||
|
format: DVD
|
||||||
|
videoCodec: XviD
|
||||||
|
videoProfile: MP
|
||||||
|
releaseGroup: AceBot
|
||||||
|
language: French
|
||||||
|
|
||||||
|
? The_Italian_Job.mkv
|
||||||
|
: title: The Italian Job
|
||||||
|
|
||||||
|
? The.Rum.Diary.2011.1080p.BluRay.DTS.x264.D-Z0N3.mkv
|
||||||
|
: title: The Rum Diary
|
||||||
|
year: 2011
|
||||||
|
screenSize: 1080p
|
||||||
|
format: BluRay
|
||||||
|
videoCodec: h264
|
||||||
|
audioCodec: DTS
|
||||||
|
releaseGroup: D-Z0N3
|
||||||
|
|
||||||
|
? Life.Of.Pi.2012.1080p.BluRay.DTS.x264.D-Z0N3.mkv
|
||||||
|
: title: Life Of Pi
|
||||||
|
year: 2012
|
||||||
|
screenSize: 1080p
|
||||||
|
format: BluRay
|
||||||
|
videoCodec: h264
|
||||||
|
audioCodec: DTS
|
||||||
|
releaseGroup: D-Z0N3
|
||||||
|
|
||||||
|
? The.Kings.Speech.2010.1080p.BluRay.DTS.x264.D Z0N3.mkv
|
||||||
|
: title: The Kings Speech
|
||||||
|
year: 2010
|
||||||
|
screenSize: 1080p
|
||||||
|
format: BluRay
|
||||||
|
audioCodec: DTS
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: D Z0N3
|
||||||
|
|
||||||
|
? Street.Kings.2008.BluRay.1080p.DTS.x264.dxva EuReKA.mkv
|
||||||
|
: title: Street Kings
|
||||||
|
year: 2008
|
||||||
|
format: BluRay
|
||||||
|
screenSize: 1080p
|
||||||
|
audioCodec: DTS
|
||||||
|
videoCodec: h264
|
||||||
|
videoApi: DXVA
|
||||||
|
releaseGroup: EuReKa
|
||||||
|
|
||||||
|
? 2001.A.Space.Odyssey.1968.HDDVD.1080p.DTS.x264.dxva EuReKA.mkv
|
||||||
|
: title: 2001 A Space Odyssey
|
||||||
|
year: 1968
|
||||||
|
format: HD-DVD
|
||||||
|
screenSize: 1080p
|
||||||
|
audioCodec: DTS
|
||||||
|
videoCodec: h264
|
||||||
|
videoApi: DXVA
|
||||||
|
releaseGroup: EuReKa
|
||||||
|
|
||||||
|
? 2012.2009.720p.BluRay.x264.DTS WiKi.mkv
|
||||||
|
: title: "2012"
|
||||||
|
year: 2009
|
||||||
|
screenSize: 720p
|
||||||
|
format: BluRay
|
||||||
|
videoCodec: h264
|
||||||
|
audioCodec: DTS
|
||||||
|
releaseGroup: WiKi
|
||||||
|
|
||||||
|
? /share/Download/movie/Dead Man Down (2013) BRRiP XViD DD5_1 Custom NLSubs =-_lt Q_o_Q gt-=_/XD607ebb-BRc59935-5155473f-1c5f49/XD607ebb-BRc59935-5155473f-1c5f49.avi
|
||||||
|
: title: Dead Man Down
|
||||||
|
year: 2013
|
||||||
|
format: BluRay
|
||||||
|
videoCodec: XviD
|
||||||
|
audioChannels: "5.1"
|
||||||
|
audioCodec: DolbyDigital
|
||||||
|
idNumber: XD607ebb-BRc59935-5155473f-1c5f49
|
||||||
|
|
||||||
|
? Pacific.Rim.3D.2013.COMPLETE.BLURAY-PCH.avi
|
||||||
|
: title: Pacific Rim
|
||||||
|
year: 2013
|
||||||
|
format: BluRay
|
||||||
|
other:
|
||||||
|
- complete
|
||||||
|
- 3D
|
||||||
|
releaseGroup: PCH
|
||||||
|
|
||||||
|
? Immersion.French.2011.STV.READNFO.QC.FRENCH.ENGLISH.NTSC.DVDR.nfo
|
||||||
|
: title: Immersion French
|
||||||
|
year: 2011
|
||||||
|
language:
|
||||||
|
- French
|
||||||
|
- English
|
||||||
|
format: DVD
|
||||||
|
|
||||||
|
? Immersion.French.2011.STV.READNFO.QC.FRENCH.NTSC.DVDR.nfo
|
||||||
|
: title: Immersion French
|
||||||
|
year: 2011
|
||||||
|
language: French
|
||||||
|
format: DVD
|
||||||
|
|
||||||
|
? Immersion.French.2011.STV.READNFO.QC.NTSC.DVDR.nfo
|
||||||
|
: title: Immersion French
|
||||||
|
year: 2011
|
||||||
|
format: DVD
|
||||||
|
|
||||||
|
? French.Immersion.2011.STV.READNFO.QC.ENGLISH.NTSC.DVDR.nfo
|
||||||
|
: title: French Immersion
|
||||||
|
year: 2011
|
||||||
|
language: ENGLISH
|
||||||
|
format: DVD
|
||||||
|
|
||||||
|
? Howl's_Moving_Castle_(2004)_[720p,HDTV,x264,DTS]-FlexGet.avi
|
||||||
|
: videoCodec: h264
|
||||||
|
format: HDTV
|
||||||
|
title: Howl's Moving Castle
|
||||||
|
screenSize: 720p
|
||||||
|
year: 2004
|
||||||
|
audioCodec: DTS
|
||||||
|
releaseGroup: FlexGet
|
||||||
|
|
||||||
|
? Pirates de langkasuka.2008.FRENCH.1920X1080.h264.AVC.AsiaRa.mkv
|
||||||
|
: screenSize: 1080p
|
||||||
|
year: 2008
|
||||||
|
language: French
|
||||||
|
videoCodec: h264
|
||||||
|
title: Pirates de langkasuka
|
||||||
|
releaseGroup: AsiaRa
|
||||||
|
|
||||||
|
? Masala (2013) Telugu Movie HD DVDScr XviD - Exclusive.avi
|
||||||
|
: year: 2013
|
||||||
|
videoCodec: XviD
|
||||||
|
title: Masala
|
||||||
|
format: HD-DVD
|
||||||
|
other: screener
|
||||||
|
language: Telugu
|
||||||
|
releaseGroup: Exclusive
|
||||||
|
|
||||||
|
? Django Unchained 2012 DVDSCR X264 AAC-P2P.nfo
|
||||||
|
: year: 2012
|
||||||
|
other: screener
|
||||||
|
videoCodec: h264
|
||||||
|
title: Django Unchained
|
||||||
|
audioCodec: AAC
|
||||||
|
format: DVD
|
||||||
|
releaseGroup: P2P
|
||||||
|
|
||||||
|
? Ejecutiva.En.Apuros(2009).BLURAY.SCR.Xvid.Spanish.LanzamientosD.nfo
|
||||||
|
: year: 2009
|
||||||
|
other: screener
|
||||||
|
format: BluRay
|
||||||
|
videoCodec: XviD
|
||||||
|
language: Spanish
|
||||||
|
title: Ejecutiva En Apuros
|
||||||
|
|
||||||
|
? Die.Schluempfe.2.German.DL.1080p.BluRay.x264-EXQUiSiTE.mkv
|
||||||
|
: title: Die Schluempfe 2
|
||||||
|
format: BluRay
|
||||||
|
language:
|
||||||
|
- Multiple languages
|
||||||
|
- German
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: EXQUiSiTE
|
||||||
|
screenSize: 1080p
|
||||||
|
|
||||||
|
? Rocky 1976 French SubForced BRRip x264 AC3-FUNKY.mkv
|
||||||
|
: title: Rocky
|
||||||
|
year: 1976
|
||||||
|
subtitleLanguage: French
|
||||||
|
format: BluRay
|
||||||
|
videoCodec: h264
|
||||||
|
audioCodec: AC3
|
||||||
|
releaseGroup: FUNKY
|
||||||
|
|
||||||
|
? REDLINE (BD 1080p H264 10bit FLAC) [3xR].mkv
|
||||||
|
: title: REDLINE
|
||||||
|
format: BluRay
|
||||||
|
videoCodec: h264
|
||||||
|
videoProfile: 10bit
|
||||||
|
audioCodec: Flac
|
||||||
|
screenSize: 1080p
|
||||||
|
|
||||||
|
? The.Lizzie.McGuire.Movie.(2003).HR.DVDRiP.avi
|
||||||
|
: title: The Lizzie McGuire Movie
|
||||||
|
year: 2003
|
||||||
|
format: DVD
|
||||||
|
other: HR
|
||||||
|
|
||||||
|
? Hua.Mulan.BRRIP.MP4.x264.720p-HR.avi
|
||||||
|
: title: Hua Mulan
|
||||||
|
videoCodec: h264
|
||||||
|
format: BluRay
|
||||||
|
screenSize: 720p
|
||||||
|
other: HR
|
||||||
|
|
||||||
|
? Dr.Seuss.The.Lorax.2012.DVDRip.LiNE.XviD.AC3.HQ.Hive-CM8.mp4
|
||||||
|
: videoCodec: XviD
|
||||||
|
title: Dr Seuss The Lorax
|
||||||
|
format: DVD
|
||||||
|
other: LiNE
|
||||||
|
year: 2012
|
||||||
|
audioCodec: AC3
|
||||||
|
audioProfile: HQ
|
||||||
|
releaseGroup: Hive-CM8
|
||||||
|
|
||||||
|
|
||||||
|
? "Star Wars: Episode IV - A New Hope (2004) Special Edition.MKV"
|
||||||
|
: title: Star Wars Episode IV
|
||||||
|
year: 2004
|
||||||
|
edition: Special Edition
|
||||||
|
|
||||||
|
? Dr.LiNE.The.Lorax.2012.DVDRip.LiNE.XviD.AC3.HQ.Hive-CM8.mp4
|
||||||
|
: videoCodec: XviD
|
||||||
|
title: Dr LiNE The Lorax
|
||||||
|
format: DVD
|
||||||
|
other: LiNE
|
||||||
|
year: 2012
|
||||||
|
audioCodec: AC3
|
||||||
|
audioProfile: HQ
|
||||||
|
releaseGroup: Hive-CM8
|
||||||
|
|
||||||
|
? Perfect Child-2007-TRUEFRENCH-TVRip.Xvid-h@mster.avi
|
||||||
|
: releaseGroup: h@mster
|
||||||
|
title: Perfect Child
|
||||||
|
videoCodec: XviD
|
||||||
|
language: French
|
||||||
|
format: TV
|
||||||
|
year: 2007
|
||||||
|
|
||||||
|
? entre.ciel.et.terre.(1994).dvdrip.h264.aac-psypeon.avi
|
||||||
|
: audioCodec: AAC
|
||||||
|
format: DVD
|
||||||
|
releaseGroup: psypeon
|
||||||
|
title: entre ciel et terre
|
||||||
|
videoCodec: h264
|
||||||
|
year: 1994
|
||||||
|
|
||||||
|
? Yves.Saint.Laurent.2013.FRENCH.DVDSCR.MD.XviD-ViVARiUM.avi
|
||||||
|
: format: DVD
|
||||||
|
language: French
|
||||||
|
other: Screener
|
||||||
|
releaseGroup: ViVARiUM
|
||||||
|
title: Yves Saint Laurent
|
||||||
|
videoCodec: XviD
|
||||||
|
year: 2013
|
||||||
|
|
||||||
|
? Echec et Mort - Hard to Kill - Steven Seagal Multi 1080p BluRay x264 CCATS.avi
|
||||||
|
: format: BluRay
|
||||||
|
language: Multiple languages
|
||||||
|
releaseGroup: CCATS
|
||||||
|
screenSize: 1080p
|
||||||
|
title: Echec et Mort
|
||||||
|
videoCodec: h264
|
||||||
|
|
||||||
|
? Paparazzi - Timsit/Lindon (MKV 1080p tvripHD)
|
||||||
|
: options: -n
|
||||||
|
title: Paparazzi
|
||||||
|
screenSize: 1080p
|
||||||
|
format: HDTV
|
||||||
|
|
||||||
|
? some.movie.720p.bluray.x264-mind
|
||||||
|
: options: -n
|
||||||
|
title: some movie
|
||||||
|
screenSize: 720p
|
||||||
|
videoCodec: h264
|
||||||
|
releaseGroup: mind
|
||||||
|
format: BluRay
|
||||||
|
|
||||||
|
? Dr LiNE The Lorax 720p h264 BluRay
|
||||||
|
: options: -n
|
||||||
|
title: Dr LiNE The Lorax
|
||||||
|
screenSize: 720p
|
||||||
|
videoCodec: h264
|
||||||
|
format: BluRay
|
||||||
|
|
||||||
|
? BeatdownFrenchDVDRip.mkv
|
||||||
|
: options: -c
|
||||||
|
title: Beatdown
|
||||||
|
language: French
|
||||||
|
format: DVD
|
||||||
|
|
||||||
|
? YvesSaintLaurent2013FrenchDVDScrXvid.avi
|
||||||
|
: options: -c
|
||||||
|
format: DVD
|
||||||
|
language: French
|
||||||
|
other: Screener
|
||||||
|
title: Yves saint laurent
|
||||||
|
videoCodec: XviD
|
||||||
|
year: 2013
|
||||||
|
|
||||||
|
? Elle.s.en.va.720p.mkv
|
||||||
|
: screenSize: 720p
|
||||||
|
title: Elle s en va
|
||||||
|
|
||||||
|
? FooBar.7.PDTV-FlexGet
|
||||||
|
: options: -n
|
||||||
|
format: DVB
|
||||||
|
releaseGroup: FlexGet
|
||||||
|
title: FooBar 7
|
||||||
|
|
||||||
|
? h265 - HEVC Riddick Unrated Director Cut French 1080p DTS.mkv
|
||||||
|
: audioCodec: DTS
|
||||||
|
edition: Director's cut
|
||||||
|
language: fr
|
||||||
|
screenSize: 1080p
|
||||||
|
title: Riddick Unrated
|
||||||
|
videoCodec: h265
|
||||||
|
|
||||||
|
? "[h265 - HEVC] Riddick Unrated Director Cut French [1080p DTS].mkv"
|
||||||
|
: audioCodec: DTS
|
||||||
|
edition: Director's cut
|
||||||
|
language: fr
|
||||||
|
screenSize: 1080p
|
||||||
|
title: Riddick Unrated
|
||||||
|
videoCodec: h265
|
||||||
|
|
||||||
|
? Barbecue-2014-French-mHD-1080p
|
||||||
|
: options: -n
|
||||||
|
language: fr
|
||||||
|
other: mHD
|
||||||
|
screenSize: 1080p
|
||||||
|
title: Barbecue
|
||||||
|
year: 2014
|
||||||
|
|
||||||
|
? Underworld Quadrilogie VO+VFF+VFQ 1080p HDlight.x264~Tonyk~Monde Infernal
|
||||||
|
: options: -n
|
||||||
|
language:
|
||||||
|
- fr
|
||||||
|
- vo
|
||||||
|
other: HDLight
|
||||||
|
screenSize: 1080p
|
||||||
|
title: Underworld Quadrilogie
|
||||||
|
videoCodec: h264
|
||||||
|
|
||||||
|
? A Bout Portant (The Killers).PAL.Multi.DVD-R-KZ
|
||||||
|
: options: -n
|
||||||
|
format: DVD
|
||||||
|
language: mul
|
||||||
|
releaseGroup: KZ
|
||||||
|
title: A Bout Portant
|
||||||
|
|
||||||
|
? "Mise à Sac (Alain Cavalier, 1967) [Vhs.Rip.Vff]"
|
||||||
|
: options: -n
|
||||||
|
format: VHS
|
||||||
|
language: fr
|
||||||
|
title: "Mise à Sac"
|
||||||
|
year: 1967
|
||||||
|
|
||||||
|
? A Bout Portant (The Killers).PAL.Multi.DVD-R-KZ
|
||||||
|
: options: -n
|
||||||
|
format: DVD
|
||||||
|
language: mul
|
||||||
|
releaseGroup: KZ
|
||||||
|
title: A Bout Portant
|
||||||
|
|
||||||
|
? Youth.In.Revolt.(Be.Bad).2009.MULTI.1080p.LAME3*92-MEDIOZZ
|
||||||
|
: options: -n
|
||||||
|
audioCodec: MP3
|
||||||
|
language: mul
|
||||||
|
releaseGroup: MEDIOZZ
|
||||||
|
screenSize: 1080p
|
||||||
|
title: Youth In Revolt
|
||||||
|
year: 2009
|
||||||
|
|
||||||
|
? La Defense Lincoln (The Lincoln Lawyer) 2011 [DVDRIP][Vostfr]
|
||||||
|
: options: -n
|
||||||
|
format: DVD
|
||||||
|
subtitleLanguage: fr
|
||||||
|
title: La Defense Lincoln
|
||||||
|
year: 2011
|
||||||
|
|
||||||
|
? '[h265 - HEVC] Fight Club French 1080p DTS.'
|
||||||
|
: options: -n
|
||||||
|
audioCodec: DTS
|
||||||
|
language: fr
|
||||||
|
screenSize: 1080p
|
||||||
|
title: Fight Club
|
||||||
|
videoCodec: h265
|
||||||
|
|
||||||
|
? Love Gourou (Mike Myers) - FR
|
||||||
|
: options: -n
|
||||||
|
language: fr
|
||||||
|
title: Love Gourou
|
||||||
|
|
||||||
|
? '[h265 - hevc] transformers 2 1080p french ac3 6ch.'
|
||||||
|
: options: -n
|
||||||
|
audioChannels: '5.1'
|
||||||
|
audioCodec: AC3
|
||||||
|
language: fr
|
||||||
|
screenSize: 1080p
|
||||||
|
title: transformers 2
|
||||||
|
videoCodec: h265
|
||||||
473
libs/guessit/test/opensubtitles_languages_2012_05_09.txt
Normal file
473
libs/guessit/test/opensubtitles_languages_2012_05_09.txt
Normal file
|
|
@ -0,0 +1,473 @@
|
||||||
|
IdSubLanguage ISO639 LanguageName UploadEnabled WebEnabled
|
||||||
|
aar aa Afar, afar 0 0
|
||||||
|
abk ab Abkhazian 0 0
|
||||||
|
ace Achinese 0 0
|
||||||
|
ach Acoli 0 0
|
||||||
|
ada Adangme 0 0
|
||||||
|
ady adyghé 0 0
|
||||||
|
afa Afro-Asiatic (Other) 0 0
|
||||||
|
afh Afrihili 0 0
|
||||||
|
afr af Afrikaans 0 0
|
||||||
|
ain Ainu 0 0
|
||||||
|
aka ak Akan 0 0
|
||||||
|
akk Akkadian 0 0
|
||||||
|
alb sq Albanian 1 1
|
||||||
|
ale Aleut 0 0
|
||||||
|
alg Algonquian languages 0 0
|
||||||
|
alt Southern Altai 0 0
|
||||||
|
amh am Amharic 0 0
|
||||||
|
ang English, Old (ca.450-1100) 0 0
|
||||||
|
apa Apache languages 0 0
|
||||||
|
ara ar Arabic 1 1
|
||||||
|
arc Aramaic 0 0
|
||||||
|
arg an Aragonese 0 0
|
||||||
|
arm hy Armenian 1 0
|
||||||
|
arn Araucanian 0 0
|
||||||
|
arp Arapaho 0 0
|
||||||
|
art Artificial (Other) 0 0
|
||||||
|
arw Arawak 0 0
|
||||||
|
asm as Assamese 0 0
|
||||||
|
ast Asturian, Bable 0 0
|
||||||
|
ath Athapascan languages 0 0
|
||||||
|
aus Australian languages 0 0
|
||||||
|
ava av Avaric 0 0
|
||||||
|
ave ae Avestan 0 0
|
||||||
|
awa Awadhi 0 0
|
||||||
|
aym ay Aymara 0 0
|
||||||
|
aze az Azerbaijani 0 0
|
||||||
|
bad Banda 0 0
|
||||||
|
bai Bamileke languages 0 0
|
||||||
|
bak ba Bashkir 0 0
|
||||||
|
bal Baluchi 0 0
|
||||||
|
bam bm Bambara 0 0
|
||||||
|
ban Balinese 0 0
|
||||||
|
baq eu Basque 1 1
|
||||||
|
bas Basa 0 0
|
||||||
|
bat Baltic (Other) 0 0
|
||||||
|
bej Beja 0 0
|
||||||
|
bel be Belarusian 0 0
|
||||||
|
bem Bemba 0 0
|
||||||
|
ben bn Bengali 1 0
|
||||||
|
ber Berber (Other) 0 0
|
||||||
|
bho Bhojpuri 0 0
|
||||||
|
bih bh Bihari 0 0
|
||||||
|
bik Bikol 0 0
|
||||||
|
bin Bini 0 0
|
||||||
|
bis bi Bislama 0 0
|
||||||
|
bla Siksika 0 0
|
||||||
|
bnt Bantu (Other) 0 0
|
||||||
|
bos bs Bosnian 1 0
|
||||||
|
bra Braj 0 0
|
||||||
|
bre br Breton 1 0
|
||||||
|
btk Batak (Indonesia) 0 0
|
||||||
|
bua Buriat 0 0
|
||||||
|
bug Buginese 0 0
|
||||||
|
bul bg Bulgarian 1 1
|
||||||
|
bur my Burmese 0 0
|
||||||
|
byn Blin 0 0
|
||||||
|
cad Caddo 0 0
|
||||||
|
cai Central American Indian (Other) 0 0
|
||||||
|
car Carib 0 0
|
||||||
|
cat ca Catalan 1 1
|
||||||
|
cau Caucasian (Other) 0 0
|
||||||
|
ceb Cebuano 0 0
|
||||||
|
cel Celtic (Other) 0 0
|
||||||
|
cha ch Chamorro 0 0
|
||||||
|
chb Chibcha 0 0
|
||||||
|
che ce Chechen 0 0
|
||||||
|
chg Chagatai 0 0
|
||||||
|
chi zh Chinese 1 1
|
||||||
|
chk Chuukese 0 0
|
||||||
|
chm Mari 0 0
|
||||||
|
chn Chinook jargon 0 0
|
||||||
|
cho Choctaw 0 0
|
||||||
|
chp Chipewyan 0 0
|
||||||
|
chr Cherokee 0 0
|
||||||
|
chu cu Church Slavic 0 0
|
||||||
|
chv cv Chuvash 0 0
|
||||||
|
chy Cheyenne 0 0
|
||||||
|
cmc Chamic languages 0 0
|
||||||
|
cop Coptic 0 0
|
||||||
|
cor kw Cornish 0 0
|
||||||
|
cos co Corsican 0 0
|
||||||
|
cpe Creoles and pidgins, English based (Other) 0 0
|
||||||
|
cpf Creoles and pidgins, French-based (Other) 0 0
|
||||||
|
cpp Creoles and pidgins, Portuguese-based (Other) 0 0
|
||||||
|
cre cr Cree 0 0
|
||||||
|
crh Crimean Tatar 0 0
|
||||||
|
crp Creoles and pidgins (Other) 0 0
|
||||||
|
csb Kashubian 0 0
|
||||||
|
cus Cushitic (Other)' couchitiques, autres langues 0 0
|
||||||
|
cze cs Czech 1 1
|
||||||
|
dak Dakota 0 0
|
||||||
|
dan da Danish 1 1
|
||||||
|
dar Dargwa 0 0
|
||||||
|
day Dayak 0 0
|
||||||
|
del Delaware 0 0
|
||||||
|
den Slave (Athapascan) 0 0
|
||||||
|
dgr Dogrib 0 0
|
||||||
|
din Dinka 0 0
|
||||||
|
div dv Divehi 0 0
|
||||||
|
doi Dogri 0 0
|
||||||
|
dra Dravidian (Other) 0 0
|
||||||
|
dua Duala 0 0
|
||||||
|
dum Dutch, Middle (ca.1050-1350) 0 0
|
||||||
|
dut nl Dutch 1 1
|
||||||
|
dyu Dyula 0 0
|
||||||
|
dzo dz Dzongkha 0 0
|
||||||
|
efi Efik 0 0
|
||||||
|
egy Egyptian (Ancient) 0 0
|
||||||
|
eka Ekajuk 0 0
|
||||||
|
elx Elamite 0 0
|
||||||
|
eng en English 1 1
|
||||||
|
enm English, Middle (1100-1500) 0 0
|
||||||
|
epo eo Esperanto 1 0
|
||||||
|
est et Estonian 1 1
|
||||||
|
ewe ee Ewe 0 0
|
||||||
|
ewo Ewondo 0 0
|
||||||
|
fan Fang 0 0
|
||||||
|
fao fo Faroese 0 0
|
||||||
|
fat Fanti 0 0
|
||||||
|
fij fj Fijian 0 0
|
||||||
|
fil Filipino 0 0
|
||||||
|
fin fi Finnish 1 1
|
||||||
|
fiu Finno-Ugrian (Other) 0 0
|
||||||
|
fon Fon 0 0
|
||||||
|
fre fr French 1 1
|
||||||
|
frm French, Middle (ca.1400-1600) 0 0
|
||||||
|
fro French, Old (842-ca.1400) 0 0
|
||||||
|
fry fy Frisian 0 0
|
||||||
|
ful ff Fulah 0 0
|
||||||
|
fur Friulian 0 0
|
||||||
|
gaa Ga 0 0
|
||||||
|
gay Gayo 0 0
|
||||||
|
gba Gbaya 0 0
|
||||||
|
gem Germanic (Other) 0 0
|
||||||
|
geo ka Georgian 1 1
|
||||||
|
ger de German 1 1
|
||||||
|
gez Geez 0 0
|
||||||
|
gil Gilbertese 0 0
|
||||||
|
gla gd Gaelic 0 0
|
||||||
|
gle ga Irish 0 0
|
||||||
|
glg gl Galician 1 1
|
||||||
|
glv gv Manx 0 0
|
||||||
|
gmh German, Middle High (ca.1050-1500) 0 0
|
||||||
|
goh German, Old High (ca.750-1050) 0 0
|
||||||
|
gon Gondi 0 0
|
||||||
|
gor Gorontalo 0 0
|
||||||
|
got Gothic 0 0
|
||||||
|
grb Grebo 0 0
|
||||||
|
grc Greek, Ancient (to 1453) 0 0
|
||||||
|
ell el Greek 1 1
|
||||||
|
grn gn Guarani 0 0
|
||||||
|
guj gu Gujarati 0 0
|
||||||
|
gwi Gwich´in 0 0
|
||||||
|
hai Haida 0 0
|
||||||
|
hat ht Haitian 0 0
|
||||||
|
hau ha Hausa 0 0
|
||||||
|
haw Hawaiian 0 0
|
||||||
|
heb he Hebrew 1 1
|
||||||
|
her hz Herero 0 0
|
||||||
|
hil Hiligaynon 0 0
|
||||||
|
him Himachali 0 0
|
||||||
|
hin hi Hindi 1 1
|
||||||
|
hit Hittite 0 0
|
||||||
|
hmn Hmong 0 0
|
||||||
|
hmo ho Hiri Motu 0 0
|
||||||
|
hrv hr Croatian 1 1
|
||||||
|
hun hu Hungarian 1 1
|
||||||
|
hup Hupa 0 0
|
||||||
|
iba Iban 0 0
|
||||||
|
ibo ig Igbo 0 0
|
||||||
|
ice is Icelandic 1 1
|
||||||
|
ido io Ido 0 0
|
||||||
|
iii ii Sichuan Yi 0 0
|
||||||
|
ijo Ijo 0 0
|
||||||
|
iku iu Inuktitut 0 0
|
||||||
|
ile ie Interlingue 0 0
|
||||||
|
ilo Iloko 0 0
|
||||||
|
ina ia Interlingua (International Auxiliary Language Asso 0 0
|
||||||
|
inc Indic (Other) 0 0
|
||||||
|
ind id Indonesian 1 1
|
||||||
|
ine Indo-European (Other) 0 0
|
||||||
|
inh Ingush 0 0
|
||||||
|
ipk ik Inupiaq 0 0
|
||||||
|
ira Iranian (Other) 0 0
|
||||||
|
iro Iroquoian languages 0 0
|
||||||
|
ita it Italian 1 1
|
||||||
|
jav jv Javanese 0 0
|
||||||
|
jpn ja Japanese 1 1
|
||||||
|
jpr Judeo-Persian 0 0
|
||||||
|
jrb Judeo-Arabic 0 0
|
||||||
|
kaa Kara-Kalpak 0 0
|
||||||
|
kab Kabyle 0 0
|
||||||
|
kac Kachin 0 0
|
||||||
|
kal kl Kalaallisut 0 0
|
||||||
|
kam Kamba 0 0
|
||||||
|
kan kn Kannada 0 0
|
||||||
|
kar Karen 0 0
|
||||||
|
kas ks Kashmiri 0 0
|
||||||
|
kau kr Kanuri 0 0
|
||||||
|
kaw Kawi 0 0
|
||||||
|
kaz kk Kazakh 1 0
|
||||||
|
kbd Kabardian 0 0
|
||||||
|
kha Khasi 0 0
|
||||||
|
khi Khoisan (Other) 0 0
|
||||||
|
khm km Khmer 1 1
|
||||||
|
kho Khotanese 0 0
|
||||||
|
kik ki Kikuyu 0 0
|
||||||
|
kin rw Kinyarwanda 0 0
|
||||||
|
kir ky Kirghiz 0 0
|
||||||
|
kmb Kimbundu 0 0
|
||||||
|
kok Konkani 0 0
|
||||||
|
kom kv Komi 0 0
|
||||||
|
kon kg Kongo 0 0
|
||||||
|
kor ko Korean 1 1
|
||||||
|
kos Kosraean 0 0
|
||||||
|
kpe Kpelle 0 0
|
||||||
|
krc Karachay-Balkar 0 0
|
||||||
|
kro Kru 0 0
|
||||||
|
kru Kurukh 0 0
|
||||||
|
kua kj Kuanyama 0 0
|
||||||
|
kum Kumyk 0 0
|
||||||
|
kur ku Kurdish 0 0
|
||||||
|
kut Kutenai 0 0
|
||||||
|
lad Ladino 0 0
|
||||||
|
lah Lahnda 0 0
|
||||||
|
lam Lamba 0 0
|
||||||
|
lao lo Lao 0 0
|
||||||
|
lat la Latin 0 0
|
||||||
|
lav lv Latvian 1 0
|
||||||
|
lez Lezghian 0 0
|
||||||
|
lim li Limburgan 0 0
|
||||||
|
lin ln Lingala 0 0
|
||||||
|
lit lt Lithuanian 1 0
|
||||||
|
lol Mongo 0 0
|
||||||
|
loz Lozi 0 0
|
||||||
|
ltz lb Luxembourgish 1 0
|
||||||
|
lua Luba-Lulua 0 0
|
||||||
|
lub lu Luba-Katanga 0 0
|
||||||
|
lug lg Ganda 0 0
|
||||||
|
lui Luiseno 0 0
|
||||||
|
lun Lunda 0 0
|
||||||
|
luo Luo (Kenya and Tanzania) 0 0
|
||||||
|
lus lushai 0 0
|
||||||
|
mac mk Macedonian 1 1
|
||||||
|
mad Madurese 0 0
|
||||||
|
mag Magahi 0 0
|
||||||
|
mah mh Marshallese 0 0
|
||||||
|
mai Maithili 0 0
|
||||||
|
mak Makasar 0 0
|
||||||
|
mal ml Malayalam 0 0
|
||||||
|
man Mandingo 0 0
|
||||||
|
mao mi Maori 0 0
|
||||||
|
map Austronesian (Other) 0 0
|
||||||
|
mar mr Marathi 0 0
|
||||||
|
mas Masai 0 0
|
||||||
|
may ms Malay 1 1
|
||||||
|
mdf Moksha 0 0
|
||||||
|
mdr Mandar 0 0
|
||||||
|
men Mende 0 0
|
||||||
|
mga Irish, Middle (900-1200) 0 0
|
||||||
|
mic Mi'kmaq 0 0
|
||||||
|
min Minangkabau 0 0
|
||||||
|
mis Miscellaneous languages 0 0
|
||||||
|
mkh Mon-Khmer (Other) 0 0
|
||||||
|
mlg mg Malagasy 0 0
|
||||||
|
mlt mt Maltese 0 0
|
||||||
|
mnc Manchu 0 0
|
||||||
|
mni Manipuri 0 0
|
||||||
|
mno Manobo languages 0 0
|
||||||
|
moh Mohawk 0 0
|
||||||
|
mol mo Moldavian 0 0
|
||||||
|
mon mn Mongolian 1 0
|
||||||
|
mos Mossi 0 0
|
||||||
|
mwl Mirandese 0 0
|
||||||
|
mul Multiple languages 0 0
|
||||||
|
mun Munda languages 0 0
|
||||||
|
mus Creek 0 0
|
||||||
|
mwr Marwari 0 0
|
||||||
|
myn Mayan languages 0 0
|
||||||
|
myv Erzya 0 0
|
||||||
|
nah Nahuatl 0 0
|
||||||
|
nai North American Indian 0 0
|
||||||
|
nap Neapolitan 0 0
|
||||||
|
nau na Nauru 0 0
|
||||||
|
nav nv Navajo 0 0
|
||||||
|
nbl nr Ndebele, South 0 0
|
||||||
|
nde nd Ndebele, North 0 0
|
||||||
|
ndo ng Ndonga 0 0
|
||||||
|
nds Low German 0 0
|
||||||
|
nep ne Nepali 0 0
|
||||||
|
new Nepal Bhasa 0 0
|
||||||
|
nia Nias 0 0
|
||||||
|
nic Niger-Kordofanian (Other) 0 0
|
||||||
|
niu Niuean 0 0
|
||||||
|
nno nn Norwegian Nynorsk 0 0
|
||||||
|
nob nb Norwegian Bokmal 0 0
|
||||||
|
nog Nogai 0 0
|
||||||
|
non Norse, Old 0 0
|
||||||
|
nor no Norwegian 1 1
|
||||||
|
nso Northern Sotho 0 0
|
||||||
|
nub Nubian languages 0 0
|
||||||
|
nwc Classical Newari 0 0
|
||||||
|
nya ny Chichewa 0 0
|
||||||
|
nym Nyamwezi 0 0
|
||||||
|
nyn Nyankole 0 0
|
||||||
|
nyo Nyoro 0 0
|
||||||
|
nzi Nzima 0 0
|
||||||
|
oci oc Occitan 1 1
|
||||||
|
oji oj Ojibwa 0 0
|
||||||
|
ori or Oriya 0 0
|
||||||
|
orm om Oromo 0 0
|
||||||
|
osa Osage 0 0
|
||||||
|
oss os Ossetian 0 0
|
||||||
|
ota Turkish, Ottoman (1500-1928) 0 0
|
||||||
|
oto Otomian languages 0 0
|
||||||
|
paa Papuan (Other) 0 0
|
||||||
|
pag Pangasinan 0 0
|
||||||
|
pal Pahlavi 0 0
|
||||||
|
pam Pampanga 0 0
|
||||||
|
pan pa Panjabi 0 0
|
||||||
|
pap Papiamento 0 0
|
||||||
|
pau Palauan 0 0
|
||||||
|
peo Persian, Old (ca.600-400 B.C.) 0 0
|
||||||
|
per fa Persian 1 1
|
||||||
|
phi Philippine (Other) 0 0
|
||||||
|
phn Phoenician 0 0
|
||||||
|
pli pi Pali 0 0
|
||||||
|
pol pl Polish 1 1
|
||||||
|
pon Pohnpeian 0 0
|
||||||
|
por pt Portuguese 1 1
|
||||||
|
pra Prakrit languages 0 0
|
||||||
|
pro Provençal, Old (to 1500) 0 0
|
||||||
|
pus ps Pushto 0 0
|
||||||
|
que qu Quechua 0 0
|
||||||
|
raj Rajasthani 0 0
|
||||||
|
rap Rapanui 0 0
|
||||||
|
rar Rarotongan 0 0
|
||||||
|
roa Romance (Other) 0 0
|
||||||
|
roh rm Raeto-Romance 0 0
|
||||||
|
rom Romany 0 0
|
||||||
|
run rn Rundi 0 0
|
||||||
|
rup Aromanian 0 0
|
||||||
|
rus ru Russian 1 1
|
||||||
|
sad Sandawe 0 0
|
||||||
|
sag sg Sango 0 0
|
||||||
|
sah Yakut 0 0
|
||||||
|
sai South American Indian (Other) 0 0
|
||||||
|
sal Salishan languages 0 0
|
||||||
|
sam Samaritan Aramaic 0 0
|
||||||
|
san sa Sanskrit 0 0
|
||||||
|
sas Sasak 0 0
|
||||||
|
sat Santali 0 0
|
||||||
|
scc sr Serbian 1 1
|
||||||
|
scn Sicilian 0 0
|
||||||
|
sco Scots 0 0
|
||||||
|
sel Selkup 0 0
|
||||||
|
sem Semitic (Other) 0 0
|
||||||
|
sga Irish, Old (to 900) 0 0
|
||||||
|
sgn Sign Languages 0 0
|
||||||
|
shn Shan 0 0
|
||||||
|
sid Sidamo 0 0
|
||||||
|
sin si Sinhalese 1 1
|
||||||
|
sio Siouan languages 0 0
|
||||||
|
sit Sino-Tibetan (Other) 0 0
|
||||||
|
sla Slavic (Other) 0 0
|
||||||
|
slo sk Slovak 1 1
|
||||||
|
slv sl Slovenian 1 1
|
||||||
|
sma Southern Sami 0 0
|
||||||
|
sme se Northern Sami 0 0
|
||||||
|
smi Sami languages (Other) 0 0
|
||||||
|
smj Lule Sami 0 0
|
||||||
|
smn Inari Sami 0 0
|
||||||
|
smo sm Samoan 0 0
|
||||||
|
sms Skolt Sami 0 0
|
||||||
|
sna sn Shona 0 0
|
||||||
|
snd sd Sindhi 0 0
|
||||||
|
snk Soninke 0 0
|
||||||
|
sog Sogdian 0 0
|
||||||
|
som so Somali 0 0
|
||||||
|
son Songhai 0 0
|
||||||
|
sot st Sotho, Southern 0 0
|
||||||
|
spa es Spanish 1 1
|
||||||
|
srd sc Sardinian 0 0
|
||||||
|
srr Serer 0 0
|
||||||
|
ssa Nilo-Saharan (Other) 0 0
|
||||||
|
ssw ss Swati 0 0
|
||||||
|
suk Sukuma 0 0
|
||||||
|
sun su Sundanese 0 0
|
||||||
|
sus Susu 0 0
|
||||||
|
sux Sumerian 0 0
|
||||||
|
swa sw Swahili 1 0
|
||||||
|
swe sv Swedish 1 1
|
||||||
|
syr Syriac 1 0
|
||||||
|
tah ty Tahitian 0 0
|
||||||
|
tai Tai (Other) 0 0
|
||||||
|
tam ta Tamil 0 0
|
||||||
|
tat tt Tatar 0 0
|
||||||
|
tel te Telugu 0 0
|
||||||
|
tem Timne 0 0
|
||||||
|
ter Tereno 0 0
|
||||||
|
tet Tetum 0 0
|
||||||
|
tgk tg Tajik 0 0
|
||||||
|
tgl tl Tagalog 1 1
|
||||||
|
tha th Thai 1 1
|
||||||
|
tib bo Tibetan 0 0
|
||||||
|
tig Tigre 0 0
|
||||||
|
tir ti Tigrinya 0 0
|
||||||
|
tiv Tiv 0 0
|
||||||
|
tkl Tokelau 0 0
|
||||||
|
tlh Klingon 0 0
|
||||||
|
tli Tlingit 0 0
|
||||||
|
tmh Tamashek 0 0
|
||||||
|
tog Tonga (Nyasa) 0 0
|
||||||
|
ton to Tonga (Tonga Islands) 0 0
|
||||||
|
tpi Tok Pisin 0 0
|
||||||
|
tsi Tsimshian 0 0
|
||||||
|
tsn tn Tswana 0 0
|
||||||
|
tso ts Tsonga 0 0
|
||||||
|
tuk tk Turkmen 0 0
|
||||||
|
tum Tumbuka 0 0
|
||||||
|
tup Tupi languages 0 0
|
||||||
|
tur tr Turkish 1 1
|
||||||
|
tut Altaic (Other) 0 0
|
||||||
|
tvl Tuvalu 0 0
|
||||||
|
twi tw Twi 0 0
|
||||||
|
tyv Tuvinian 0 0
|
||||||
|
udm Udmurt 0 0
|
||||||
|
uga Ugaritic 0 0
|
||||||
|
uig ug Uighur 0 0
|
||||||
|
ukr uk Ukrainian 1 1
|
||||||
|
umb Umbundu 0 0
|
||||||
|
und Undetermined 0 0
|
||||||
|
urd ur Urdu 1 0
|
||||||
|
uzb uz Uzbek 0 0
|
||||||
|
vai Vai 0 0
|
||||||
|
ven ve Venda 0 0
|
||||||
|
vie vi Vietnamese 1 1
|
||||||
|
vol vo Volapük 0 0
|
||||||
|
vot Votic 0 0
|
||||||
|
wak Wakashan languages 0 0
|
||||||
|
wal Walamo 0 0
|
||||||
|
war Waray 0 0
|
||||||
|
was Washo 0 0
|
||||||
|
wel cy Welsh 0 0
|
||||||
|
wen Sorbian languages 0 0
|
||||||
|
wln wa Walloon 0 0
|
||||||
|
wol wo Wolof 0 0
|
||||||
|
xal Kalmyk 0 0
|
||||||
|
xho xh Xhosa 0 0
|
||||||
|
yao Yao 0 0
|
||||||
|
yap Yapese 0 0
|
||||||
|
yid yi Yiddish 0 0
|
||||||
|
yor yo Yoruba 0 0
|
||||||
|
ypk Yupik languages 0 0
|
||||||
|
zap Zapotec 0 0
|
||||||
|
zen Zenaga 0 0
|
||||||
|
zha za Zhuang 0 0
|
||||||
|
znd Zande 0 0
|
||||||
|
zul zu Zulu 0 0
|
||||||
|
zun Zuni 0 0
|
||||||
|
rum ro Romanian 1 1
|
||||||
|
pob pb Brazilian 1 1
|
||||||
54
libs/guessit/test/test_api.py
Normal file
54
libs/guessit/test/test_api.py
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.test.guessittest import *
|
||||||
|
|
||||||
|
|
||||||
|
class TestApi(TestGuessit):
|
||||||
|
def test_api(self):
|
||||||
|
movie_path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
|
||||||
|
|
||||||
|
movie_info = guessit.guess_movie_info(movie_path)
|
||||||
|
video_info = guessit.guess_video_info(movie_path)
|
||||||
|
episode_info = guessit.guess_episode_info(movie_path)
|
||||||
|
file_info = guessit.guess_file_info(movie_path)
|
||||||
|
|
||||||
|
self.assertEqual(guessit.guess_file_info(movie_path, type='movie'), movie_info)
|
||||||
|
self.assertEqual(guessit.guess_file_info(movie_path, type='video'), video_info)
|
||||||
|
self.assertEqual(guessit.guess_file_info(movie_path, type='episode'), episode_info)
|
||||||
|
|
||||||
|
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'movie'}), movie_info)
|
||||||
|
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'video'}), video_info)
|
||||||
|
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'episode'}), episode_info)
|
||||||
|
|
||||||
|
self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'episode'}, type='movie'), episode_info) # kwargs priority other options
|
||||||
|
|
||||||
|
movie_path_name_only = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD'
|
||||||
|
file_info_name_only = guessit.guess_file_info(movie_path_name_only, options={"name_only": True})
|
||||||
|
|
||||||
|
self.assertFalse('container' in file_info_name_only)
|
||||||
|
self.assertTrue('container' in file_info)
|
||||||
|
|
||||||
|
suite = allTests(TestApi)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
TextTestRunner(verbosity=2).run(suite)
|
||||||
45
libs/guessit/test/test_autodetect.py
Normal file
45
libs/guessit/test/test_autodetect.py
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.test.guessittest import *
|
||||||
|
|
||||||
|
|
||||||
|
class TestAutoDetect(TestGuessit):
|
||||||
|
def testEmpty(self):
|
||||||
|
result = guessit.guess_file_info('')
|
||||||
|
self.assertEqual(result, {})
|
||||||
|
|
||||||
|
result = guessit.guess_file_info('___-__')
|
||||||
|
self.assertEqual(result, {})
|
||||||
|
|
||||||
|
result = guessit.guess_file_info('__-.avc')
|
||||||
|
self.assertEqual(result, {'type': 'unknown', 'extension': 'avc'})
|
||||||
|
|
||||||
|
def testAutoDetect(self):
|
||||||
|
self.checkMinimumFieldsCorrect(filename='autodetect.yaml',
|
||||||
|
remove_type=False)
|
||||||
|
|
||||||
|
|
||||||
|
suite = allTests(TestAutoDetect)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
TextTestRunner(verbosity=2).run(suite)
|
||||||
46
libs/guessit/test/test_autodetect_all.py
Normal file
46
libs/guessit/test/test_autodetect_all.py
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.test.guessittest import *
|
||||||
|
|
||||||
|
IGNORE_EPISODES = []
|
||||||
|
IGNORE_MOVIES = []
|
||||||
|
|
||||||
|
|
||||||
|
class TestAutoDetectAll(TestGuessit):
|
||||||
|
def testAutoMatcher(self):
|
||||||
|
self.checkMinimumFieldsCorrect(filename='autodetect.yaml',
|
||||||
|
remove_type=False)
|
||||||
|
|
||||||
|
def testAutoMatcherMovies(self):
|
||||||
|
self.checkMinimumFieldsCorrect(filename='movies.yaml',
|
||||||
|
exclude_files=IGNORE_MOVIES)
|
||||||
|
|
||||||
|
def testAutoMatcherEpisodes(self):
|
||||||
|
self.checkMinimumFieldsCorrect(filename='episodes.yaml',
|
||||||
|
exclude_files=IGNORE_EPISODES)
|
||||||
|
|
||||||
|
|
||||||
|
suite = allTests(TestAutoDetectAll)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
TextTestRunner(verbosity=2).run(suite)
|
||||||
45
libs/guessit/test/test_doctests.py
Normal file
45
libs/guessit/test/test_doctests.py
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.test.guessittest import *
|
||||||
|
import guessit
|
||||||
|
import guessit.hash_ed2k
|
||||||
|
import unittest
|
||||||
|
import doctest
|
||||||
|
|
||||||
|
|
||||||
|
def load_tests(loader, tests, ignore):
|
||||||
|
tests.addTests(doctest.DocTestSuite(guessit))
|
||||||
|
tests.addTests(doctest.DocTestSuite(guessit.date))
|
||||||
|
tests.addTests(doctest.DocTestSuite(guessit.fileutils))
|
||||||
|
tests.addTests(doctest.DocTestSuite(guessit.guess))
|
||||||
|
tests.addTests(doctest.DocTestSuite(guessit.hash_ed2k))
|
||||||
|
tests.addTests(doctest.DocTestSuite(guessit.language))
|
||||||
|
tests.addTests(doctest.DocTestSuite(guessit.matchtree))
|
||||||
|
tests.addTests(doctest.DocTestSuite(guessit.textutils))
|
||||||
|
return tests
|
||||||
|
|
||||||
|
suite = unittest.TestSuite()
|
||||||
|
load_tests(None, suite, None)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
TextTestRunner(verbosity=2).run(suite)
|
||||||
35
libs/guessit/test/test_episode.py
Normal file
35
libs/guessit/test/test_episode.py
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.test.guessittest import *
|
||||||
|
|
||||||
|
|
||||||
|
class TestEpisode(TestGuessit):
|
||||||
|
def testEpisodes(self):
|
||||||
|
self.checkMinimumFieldsCorrect(filetype='episode',
|
||||||
|
filename='episodes.yaml')
|
||||||
|
|
||||||
|
|
||||||
|
suite = allTests(TestEpisode)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
TextTestRunner(verbosity=2).run(suite)
|
||||||
46
libs/guessit/test/test_hashes.py
Normal file
46
libs/guessit/test/test_hashes.py
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.test.guessittest import *
|
||||||
|
|
||||||
|
|
||||||
|
class TestHashes(TestGuessit):
|
||||||
|
def test_hashes(self):
|
||||||
|
hashes = (
|
||||||
|
('hash_mpc', '1MB', u'8542ad406c15c8bd'), # TODO: Check if this value is valid
|
||||||
|
('hash_ed2k', '1MB', u'ed2k://|file|1MB|1048576|AA3CC5552A9931A76B61A41D306735F7|/'), # TODO: Check if this value is valid
|
||||||
|
('hash_md5', '1MB', u'5d8dcbca8d8ac21766f28797d6c3954c'),
|
||||||
|
('hash_sha1', '1MB', u'51d2b8f3248d7ee495b7750c8da5aa3b3819de9d'),
|
||||||
|
('hash_md5', 'dummy.srt', u'64de6b5893cac24456c46a935ef9c359'),
|
||||||
|
('hash_sha1', 'dummy.srt', u'a703fc0fa4518080505809bf562c6fc6f7b3c98c')
|
||||||
|
)
|
||||||
|
|
||||||
|
for hash_type, filename, expected_value in hashes:
|
||||||
|
guess = guess_file_info(file_in_same_dir(__file__, filename), hash_type)
|
||||||
|
computed_value = guess.get(hash_type)
|
||||||
|
self.assertEqual(expected_value, guess.get(hash_type), "Invalid %s for %s: %s != %s" % (hash_type, filename, computed_value, expected_value))
|
||||||
|
|
||||||
|
|
||||||
|
suite = allTests(TestHashes)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
TextTestRunner(verbosity=2).run(suite)
|
||||||
130
libs/guessit/test/test_language.py
Normal file
130
libs/guessit/test/test_language.py
Normal file
|
|
@ -0,0 +1,130 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.test.guessittest import *
|
||||||
|
|
||||||
|
import io
|
||||||
|
|
||||||
|
|
||||||
|
class TestLanguage(TestGuessit):
|
||||||
|
|
||||||
|
def check_languages(self, languages):
|
||||||
|
for lang1, lang2 in languages.items():
|
||||||
|
self.assertEqual(Language.fromguessit(lang1),
|
||||||
|
Language.fromguessit(lang2))
|
||||||
|
|
||||||
|
def test_addic7ed(self):
|
||||||
|
languages = {'English': 'en',
|
||||||
|
'English (US)': 'en-US',
|
||||||
|
'English (UK)': 'en-UK',
|
||||||
|
'Italian': 'it',
|
||||||
|
'Portuguese': 'pt',
|
||||||
|
'Portuguese (Brazilian)': 'pt-BR',
|
||||||
|
'Romanian': 'ro',
|
||||||
|
'Español (Latinoamérica)': 'es-MX',
|
||||||
|
'Español (España)': 'es-ES',
|
||||||
|
'Spanish (Latin America)': 'es-MX',
|
||||||
|
'Español': 'es',
|
||||||
|
'Spanish': 'es',
|
||||||
|
'Spanish (Spain)': 'es-ES',
|
||||||
|
'French': 'fr',
|
||||||
|
'Greek': 'el',
|
||||||
|
'Arabic': 'ar',
|
||||||
|
'German': 'de',
|
||||||
|
'Croatian': 'hr',
|
||||||
|
'Indonesian': 'id',
|
||||||
|
'Hebrew': 'he',
|
||||||
|
'Russian': 'ru',
|
||||||
|
'Turkish': 'tr',
|
||||||
|
'Swedish': 'se',
|
||||||
|
'Czech': 'cs',
|
||||||
|
'Dutch': 'nl',
|
||||||
|
'Hungarian': 'hu',
|
||||||
|
'Norwegian': 'no',
|
||||||
|
'Polish': 'pl',
|
||||||
|
'Persian': 'fa'}
|
||||||
|
|
||||||
|
self.check_languages(languages)
|
||||||
|
|
||||||
|
def test_subswiki(self):
|
||||||
|
languages = {'English (US)': 'en-US', 'English (UK)': 'en-UK', 'English': 'en',
|
||||||
|
'French': 'fr', 'Brazilian': 'po', 'Portuguese': 'pt',
|
||||||
|
'Español (Latinoamérica)': 'es-MX', 'Español (España)': 'es-ES',
|
||||||
|
'Español': 'es', 'Italian': 'it', 'Català': 'ca'}
|
||||||
|
|
||||||
|
self.check_languages(languages)
|
||||||
|
|
||||||
|
def test_tvsubtitles(self):
|
||||||
|
languages = {'English': 'en', 'Español': 'es', 'French': 'fr', 'German': 'de',
|
||||||
|
'Brazilian': 'br', 'Russian': 'ru', 'Ukrainian': 'ua', 'Italian': 'it',
|
||||||
|
'Greek': 'gr', 'Arabic': 'ar', 'Hungarian': 'hu', 'Polish': 'pl',
|
||||||
|
'Turkish': 'tr', 'Dutch': 'nl', 'Portuguese': 'pt', 'Swedish': 'sv',
|
||||||
|
'Danish': 'da', 'Finnish': 'fi', 'Korean': 'ko', 'Chinese': 'cn',
|
||||||
|
'Japanese': 'jp', 'Bulgarian': 'bg', 'Czech': 'cz', 'Romanian': 'ro'}
|
||||||
|
|
||||||
|
self.check_languages(languages)
|
||||||
|
|
||||||
|
def test_opensubtitles(self):
|
||||||
|
opensubtitles_langfile = file_in_same_dir(__file__, 'opensubtitles_languages_2012_05_09.txt')
|
||||||
|
for l in [u(l).strip() for l in io.open(opensubtitles_langfile, encoding='utf-8')][1:]:
|
||||||
|
idlang, alpha2, _, upload_enabled, web_enabled = l.strip().split('\t')
|
||||||
|
# do not test languages that are too esoteric / not widely available
|
||||||
|
if int(upload_enabled) and int(web_enabled):
|
||||||
|
# check that we recognize the opensubtitles language code correctly
|
||||||
|
# and that we are able to output this code from a language
|
||||||
|
self.assertEqual(idlang, Language.fromguessit(idlang).opensubtitles)
|
||||||
|
if alpha2:
|
||||||
|
# check we recognize the opensubtitles 2-letter code correctly
|
||||||
|
self.check_languages({idlang: alpha2})
|
||||||
|
|
||||||
|
def test_tmdb(self):
|
||||||
|
# examples from http://api.themoviedb.org/2.1/language-tags
|
||||||
|
for lang in ['en-US', 'en-CA', 'es-MX', 'fr-PF']:
|
||||||
|
self.assertEqual(lang, str(Language.fromguessit(lang)))
|
||||||
|
|
||||||
|
def test_subtitulos(self):
|
||||||
|
languages = {'English (US)': 'en-US', 'English (UK)': 'en-UK', 'English': 'en',
|
||||||
|
'French': 'fr', 'Brazilian': 'po', 'Portuguese': 'pt',
|
||||||
|
'Español (Latinoamérica)': 'es-MX', 'Español (España)': 'es-ES',
|
||||||
|
'Español': 'es', 'Italian': 'it', 'Català': 'ca'}
|
||||||
|
|
||||||
|
self.check_languages(languages)
|
||||||
|
|
||||||
|
def test_thesubdb(self):
|
||||||
|
languages = {'af': 'af', 'cs': 'cs', 'da': 'da', 'de': 'de', 'en': 'en', 'es': 'es', 'fi': 'fi',
|
||||||
|
'fr': 'fr', 'hu': 'hu', 'id': 'id', 'it': 'it', 'la': 'la', 'nl': 'nl', 'no': 'no',
|
||||||
|
'oc': 'oc', 'pl': 'pl', 'pt': 'pt', 'ro': 'ro', 'ru': 'ru', 'sl': 'sl', 'sr': 'sr',
|
||||||
|
'sv': 'sv', 'tr': 'tr'}
|
||||||
|
|
||||||
|
self.check_languages(languages)
|
||||||
|
|
||||||
|
def test_exceptions(self):
|
||||||
|
self.assertEqual(Language.fromguessit('br'), Language.fromguessit('pt(br)'))
|
||||||
|
|
||||||
|
self.assertEqual(Language.fromguessit('unknown'),
|
||||||
|
Language.fromguessit('und'))
|
||||||
|
|
||||||
|
|
||||||
|
suite = allTests(TestLanguage)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
TextTestRunner(verbosity=2).run(suite)
|
||||||
69
libs/guessit/test/test_main.py
Normal file
69
libs/guessit/test/test_main.py
Normal file
|
|
@ -0,0 +1,69 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2014 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.test.guessittest import *
|
||||||
|
from guessit.fileutils import split_path, file_in_same_dir
|
||||||
|
from guessit.textutils import strip_brackets, str_replace, str_fill
|
||||||
|
from guessit import PY2
|
||||||
|
from guessit import __main__
|
||||||
|
|
||||||
|
if PY2:
|
||||||
|
from StringIO import StringIO
|
||||||
|
else:
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
|
|
||||||
|
class TestMain(TestGuessit):
|
||||||
|
def setUp(self):
|
||||||
|
self._stdout = sys.stdout
|
||||||
|
string_out = StringIO()
|
||||||
|
sys.stdout = string_out
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
sys.stdout = self._stdout
|
||||||
|
|
||||||
|
def test_list_properties(self):
|
||||||
|
__main__.main(["-p"], False)
|
||||||
|
__main__.main(["-V"], False)
|
||||||
|
|
||||||
|
def test_list_transformers(self):
|
||||||
|
__main__.main(["--transformers"], False)
|
||||||
|
__main__.main(["-V", "--transformers"], False)
|
||||||
|
|
||||||
|
def test_demo(self):
|
||||||
|
__main__.main(["-d"], False)
|
||||||
|
|
||||||
|
def test_filename(self):
|
||||||
|
__main__.main(["A.Movie.2014.avi"], False)
|
||||||
|
__main__.main(["A.Movie.2014.avi", "A.2nd.Movie.2014.avi"], False)
|
||||||
|
__main__.main(["-y", "A.Movie.2014.avi"], False)
|
||||||
|
__main__.main(["-a", "A.Movie.2014.avi"], False)
|
||||||
|
__main__.main(["-v", "A.Movie.2014.avi"], False)
|
||||||
|
__main__.main(["-t", "movie", "A.Movie.2014.avi"], False)
|
||||||
|
__main__.main(["-t", "episode", "A.Serie.S02E06.avi"], False)
|
||||||
|
__main__.main(["-i", "hash_mpc", file_in_same_dir(__file__, "1MB")], False)
|
||||||
|
__main__.main(["-i", "hash_md5", file_in_same_dir(__file__, "1MB")], False)
|
||||||
|
|
||||||
|
suite = allTests(TestMain)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
TextTestRunner(verbosity=2).run(suite)
|
||||||
93
libs/guessit/test/test_matchtree.py
Normal file
93
libs/guessit/test/test_matchtree.py
Normal file
|
|
@ -0,0 +1,93 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.test.guessittest import *
|
||||||
|
|
||||||
|
from guessit.transfo.guess_release_group import GuessReleaseGroup
|
||||||
|
from guessit.transfo.guess_properties import GuessProperties
|
||||||
|
from guessit.matchtree import BaseMatchTree
|
||||||
|
|
||||||
|
keywords = yaml.load("""
|
||||||
|
|
||||||
|
? Xvid PROPER
|
||||||
|
: videoCodec: Xvid
|
||||||
|
other: PROPER
|
||||||
|
|
||||||
|
? PROPER-Xvid
|
||||||
|
: videoCodec: Xvid
|
||||||
|
other: PROPER
|
||||||
|
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def guess_info(string, options=None):
|
||||||
|
mtree = MatchTree(string)
|
||||||
|
GuessReleaseGroup().process(mtree, options)
|
||||||
|
GuessProperties().process(mtree, options)
|
||||||
|
return mtree.matched()
|
||||||
|
|
||||||
|
|
||||||
|
class TestMatchTree(TestGuessit):
|
||||||
|
def test_base_tree(self):
|
||||||
|
t = BaseMatchTree('One Two Three(Three) Four')
|
||||||
|
t.partition((3, 7, 20))
|
||||||
|
leaves = list(t.leaves())
|
||||||
|
|
||||||
|
self.assertEqual(leaves[0].span, (0, 3))
|
||||||
|
|
||||||
|
self.assertEqual('One', leaves[0].value)
|
||||||
|
self.assertEqual(' Two', leaves[1].value)
|
||||||
|
self.assertEqual(' Three(Three)', leaves[2].value)
|
||||||
|
self.assertEqual(' Four', leaves[3].value)
|
||||||
|
|
||||||
|
leaves[2].partition((1, 6, 7, 12))
|
||||||
|
three_leaves = list(leaves[2].leaves())
|
||||||
|
|
||||||
|
self.assertEqual('Three', three_leaves[1].value)
|
||||||
|
self.assertEqual('Three', three_leaves[3].value)
|
||||||
|
|
||||||
|
leaves = list(t.leaves())
|
||||||
|
|
||||||
|
self.assertEqual(len(leaves), 8)
|
||||||
|
|
||||||
|
self.assertEqual(leaves[5], three_leaves[3])
|
||||||
|
|
||||||
|
self.assertEqual(t.previous_leaf(leaves[5]), leaves[4])
|
||||||
|
self.assertEqual(t.next_leaf(leaves[5]), leaves[6])
|
||||||
|
|
||||||
|
self.assertEqual(t.next_leaves(leaves[5]), [leaves[6], leaves[7]])
|
||||||
|
self.assertEqual(t.previous_leaves(leaves[5]), [leaves[4], leaves[3], leaves[2], leaves[1], leaves[0]])
|
||||||
|
|
||||||
|
self.assertEqual(t.next_leaf(leaves[7]), None)
|
||||||
|
self.assertEqual(t.previous_leaf(leaves[0]), None)
|
||||||
|
|
||||||
|
self.assertEqual(t.next_leaves(leaves[7]), [])
|
||||||
|
self.assertEqual(t.previous_leaves(leaves[0]), [])
|
||||||
|
|
||||||
|
def test_match(self):
|
||||||
|
self.checkFields(keywords, guess_info)
|
||||||
|
|
||||||
|
|
||||||
|
suite = allTests(TestMatchTree)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
TextTestRunner(verbosity=2).run(suite)
|
||||||
35
libs/guessit/test/test_movie.py
Normal file
35
libs/guessit/test/test_movie.py
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.test.guessittest import *
|
||||||
|
|
||||||
|
|
||||||
|
class TestMovie(TestGuessit):
|
||||||
|
def testMovies(self):
|
||||||
|
self.checkMinimumFieldsCorrect(filetype='movie',
|
||||||
|
filename='movies.yaml')
|
||||||
|
|
||||||
|
|
||||||
|
suite = allTests(TestMovie)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
TextTestRunner(verbosity=2).run(suite)
|
||||||
126
libs/guessit/test/test_quality.py
Normal file
126
libs/guessit/test/test_quality.py
Normal file
|
|
@ -0,0 +1,126 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.quality import best_quality, best_quality_properties
|
||||||
|
from guessit.containers import QualitiesContainer
|
||||||
|
from guessit.test.guessittest import *
|
||||||
|
|
||||||
|
|
||||||
|
class TestQuality(TestGuessit):
|
||||||
|
def test_container(self):
|
||||||
|
container = QualitiesContainer()
|
||||||
|
|
||||||
|
container.register_quality('color', 'red', 10)
|
||||||
|
container.register_quality('color', 'orange', 20)
|
||||||
|
container.register_quality('color', 'green', 30)
|
||||||
|
|
||||||
|
container.register_quality('context', 'sun', 100)
|
||||||
|
container.register_quality('context', 'sea', 200)
|
||||||
|
container.register_quality('context', 'sex', 300)
|
||||||
|
|
||||||
|
g1 = Guess()
|
||||||
|
g1['color'] = 'red'
|
||||||
|
|
||||||
|
g2 = Guess()
|
||||||
|
g2['color'] = 'green'
|
||||||
|
|
||||||
|
g3 = Guess()
|
||||||
|
g3['color'] = 'orange'
|
||||||
|
|
||||||
|
q3 = container.rate_quality(g3)
|
||||||
|
self.assertEqual(q3, 20, "ORANGE should be rated 20. Don't ask why!")
|
||||||
|
|
||||||
|
q1 = container.rate_quality(g1)
|
||||||
|
q2 = container.rate_quality(g2)
|
||||||
|
|
||||||
|
self.assertTrue(q2 > q1, "GREEN should be greater than RED. Don't ask why!")
|
||||||
|
|
||||||
|
g1['context'] = 'sex'
|
||||||
|
g2['context'] = 'sun'
|
||||||
|
|
||||||
|
q1 = container.rate_quality(g1)
|
||||||
|
q2 = container.rate_quality(g2)
|
||||||
|
|
||||||
|
self.assertTrue(q1 > q2, "SEX should be greater than SUN. Don't ask why!")
|
||||||
|
|
||||||
|
self.assertEqual(container.best_quality(g1, g2), g1, "RED&SEX should be better than GREEN&SUN. Don't ask why!")
|
||||||
|
|
||||||
|
self.assertEqual(container.best_quality_properties(['color'], g1, g2), g2, "GREEN should be better than RED. Don't ask why!")
|
||||||
|
|
||||||
|
self.assertEqual(container.best_quality_properties(['context'], g1, g2), g1, "SEX should be better than SUN. Don't ask why!")
|
||||||
|
|
||||||
|
q1 = container.rate_quality(g1, 'color')
|
||||||
|
q2 = container.rate_quality(g2, 'color')
|
||||||
|
|
||||||
|
self.assertTrue(q2 > q1, "GREEN should be greater than RED. Don't ask why!")
|
||||||
|
|
||||||
|
container.unregister_quality('context', 'sex')
|
||||||
|
container.unregister_quality('context', 'sun')
|
||||||
|
|
||||||
|
q1 = container.rate_quality(g1)
|
||||||
|
q2 = container.rate_quality(g2)
|
||||||
|
|
||||||
|
self.assertTrue(q2 > q1, "GREEN&SUN should be greater than RED&SEX. Don't ask why!")
|
||||||
|
|
||||||
|
g3['context'] = 'sea'
|
||||||
|
container.unregister_quality('context', 'sea')
|
||||||
|
|
||||||
|
q3 = container.rate_quality(g3, 'context')
|
||||||
|
self.assertEqual(q3, 0, "Context should be unregistered.")
|
||||||
|
|
||||||
|
container.unregister_quality('color')
|
||||||
|
q3 = container.rate_quality(g3, 'color')
|
||||||
|
|
||||||
|
self.assertEqual(q3, 0, "Color should be unregistered.")
|
||||||
|
|
||||||
|
container.clear_qualities()
|
||||||
|
|
||||||
|
q1 = container.rate_quality(g1)
|
||||||
|
q2 = container.rate_quality(g2)
|
||||||
|
|
||||||
|
self.assertTrue(q1 == q2 == 0, "Empty quality container should rate each guess to 0")
|
||||||
|
|
||||||
|
def test_quality_transformers(self):
|
||||||
|
guess_720p = guessit.guess_file_info("2012.2009.720p.BluRay.x264.DTS WiKi.mkv")
|
||||||
|
guess_1080p = guessit.guess_file_info("2012.2009.1080p.BluRay.x264.MP3 WiKi.mkv")
|
||||||
|
|
||||||
|
self.assertTrue('audioCodec' in guess_720p, "audioCodec should be present")
|
||||||
|
self.assertTrue('audioCodec' in guess_1080p, "audioCodec should be present")
|
||||||
|
self.assertTrue('screenSize' in guess_720p, "screenSize should be present")
|
||||||
|
self.assertTrue('screenSize' in guess_1080p, "screenSize should be present")
|
||||||
|
|
||||||
|
best_quality_guess = best_quality(guess_720p, guess_1080p)
|
||||||
|
|
||||||
|
self.assertTrue(guess_1080p == best_quality_guess, "1080p+MP3 is not the best global quality")
|
||||||
|
|
||||||
|
best_quality_guess = best_quality_properties(['screenSize'], guess_720p, guess_1080p)
|
||||||
|
|
||||||
|
self.assertTrue(guess_1080p == best_quality_guess, "1080p is not the best screenSize")
|
||||||
|
|
||||||
|
best_quality_guess = best_quality_properties(['audioCodec'], guess_720p, guess_1080p)
|
||||||
|
|
||||||
|
self.assertTrue(guess_720p == best_quality_guess, "DTS is not the best audioCodec")
|
||||||
|
|
||||||
|
suite = allTests(TestQuality)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
TextTestRunner(verbosity=2).run(suite)
|
||||||
163
libs/guessit/test/test_utils.py
Normal file
163
libs/guessit/test/test_utils.py
Normal file
|
|
@ -0,0 +1,163 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.test.guessittest import *
|
||||||
|
from guessit.fileutils import split_path
|
||||||
|
from guessit.textutils import strip_brackets, str_replace, str_fill, from_camel, is_camel,\
|
||||||
|
levenshtein, reorder_title
|
||||||
|
from guessit import PY2
|
||||||
|
from guessit.date import search_date, search_year
|
||||||
|
from datetime import datetime, date, timedelta
|
||||||
|
|
||||||
|
|
||||||
|
class TestUtils(TestGuessit):
|
||||||
|
def test_splitpath(self):
|
||||||
|
alltests = {False: {'/usr/bin/smewt': ['/', 'usr', 'bin', 'smewt'],
|
||||||
|
'relative_path/to/my_folder/': ['relative_path', 'to', 'my_folder'],
|
||||||
|
'//some/path': ['//', 'some', 'path'],
|
||||||
|
'//some//path': ['//', 'some', 'path'],
|
||||||
|
'///some////path': ['///', 'some', 'path']
|
||||||
|
|
||||||
|
},
|
||||||
|
True: {'C:\\Program Files\\Smewt\\smewt.exe': ['C:\\', 'Program Files', 'Smewt', 'smewt.exe'],
|
||||||
|
'Documents and Settings\\User\\config': ['Documents and Settings', 'User', 'config'],
|
||||||
|
'C:\\Documents and Settings\\User\\config': ['C:\\', 'Documents and Settings', 'User', 'config'],
|
||||||
|
# http://bugs.python.org/issue19945
|
||||||
|
'\\\\netdrive\\share': ['\\\\', 'netdrive', 'share'] if PY2 else ['\\\\netdrive\\share'],
|
||||||
|
'\\\\netdrive\\share\\folder': ['\\\\', 'netdrive', 'share', 'folder'] if PY2 else ['\\\\netdrive\\share\\', 'folder'],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tests = alltests[sys.platform == 'win32']
|
||||||
|
for path, split in tests.items():
|
||||||
|
self.assertEqual(split, split_path(path))
|
||||||
|
|
||||||
|
def test_strip_brackets(self):
|
||||||
|
allTests = (('', ''),
|
||||||
|
('[test]', 'test'),
|
||||||
|
('{test2}', 'test2'),
|
||||||
|
('(test3)', 'test3'),
|
||||||
|
('(test4]', '(test4]'),
|
||||||
|
)
|
||||||
|
|
||||||
|
for i, e in allTests:
|
||||||
|
self.assertEqual(e, strip_brackets(i))
|
||||||
|
|
||||||
|
def test_levenshtein(self):
|
||||||
|
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmno"), 0)
|
||||||
|
self.assertEqual(levenshtein("abcdef ghijk lmnop", "abcdef ghijk lmno"), 1)
|
||||||
|
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmn"), 1)
|
||||||
|
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnp"), 1)
|
||||||
|
self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnq"), 1)
|
||||||
|
self.assertEqual(levenshtein("cbcdef ghijk lmno", "abcdef ghijk lmnq"), 2)
|
||||||
|
self.assertEqual(levenshtein("cbcdef ghihk lmno", "abcdef ghijk lmnq"), 3)
|
||||||
|
|
||||||
|
def test_reorder_title(self):
|
||||||
|
self.assertEqual(reorder_title("Simpsons, The"), "The Simpsons")
|
||||||
|
self.assertEqual(reorder_title("Simpsons,The"), "The Simpsons")
|
||||||
|
self.assertEqual(reorder_title("Simpsons,Les", articles=('the', 'le', 'la', 'les')), "Les Simpsons")
|
||||||
|
self.assertEqual(reorder_title("Simpsons, Les", articles=('the', 'le', 'la', 'les')), "Les Simpsons")
|
||||||
|
|
||||||
|
def test_camel(self):
|
||||||
|
self.assertEqual("", from_camel(""))
|
||||||
|
|
||||||
|
self.assertEqual("Hello world", str_replace("Hello World", 6, 'w'))
|
||||||
|
self.assertEqual("Hello *****", str_fill("Hello World", (6, 11), '*'))
|
||||||
|
|
||||||
|
self.assertTrue("This is camel", from_camel("ThisIsCamel"))
|
||||||
|
|
||||||
|
self.assertEqual('camel case', from_camel('camelCase'))
|
||||||
|
self.assertEqual('A case', from_camel('ACase'))
|
||||||
|
self.assertEqual('MiXedCaSe is not camel case', from_camel('MiXedCaSe is not camelCase'))
|
||||||
|
|
||||||
|
self.assertEqual("This is camel cased title", from_camel("ThisIsCamelCasedTitle"))
|
||||||
|
self.assertEqual("This is camel CASED title", from_camel("ThisIsCamelCASEDTitle"))
|
||||||
|
|
||||||
|
self.assertEqual("These are camel CASED title", from_camel("TheseAreCamelCASEDTitle"))
|
||||||
|
|
||||||
|
self.assertEqual("Give a camel case string", from_camel("GiveACamelCaseString"))
|
||||||
|
|
||||||
|
self.assertEqual("Death TO camel case", from_camel("DeathTOCamelCase"))
|
||||||
|
self.assertEqual("But i like java too:)", from_camel("ButILikeJavaToo:)"))
|
||||||
|
|
||||||
|
self.assertEqual("Beatdown french DVD rip.mkv", from_camel("BeatdownFrenchDVDRip.mkv"))
|
||||||
|
self.assertEqual("DO NOTHING ON UPPER CASE", from_camel("DO NOTHING ON UPPER CASE"))
|
||||||
|
|
||||||
|
self.assertFalse(is_camel("this_is_not_camel"))
|
||||||
|
self.assertTrue(is_camel("ThisIsCamel"))
|
||||||
|
|
||||||
|
self.assertEqual("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv", from_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv"))
|
||||||
|
self.assertFalse(is_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv"))
|
||||||
|
|
||||||
|
self.assertEqual("A2LiNE", from_camel("A2LiNE"))
|
||||||
|
|
||||||
|
def test_date(self):
|
||||||
|
self.assertEqual(search_year(' in the year 2000... '), (2000, (13, 17)))
|
||||||
|
self.assertEqual(search_year(' they arrived in 1492. '), (None, None))
|
||||||
|
|
||||||
|
today = date.today()
|
||||||
|
today_year_2 = int(str(today.year)[2:])
|
||||||
|
|
||||||
|
future = today + timedelta(days=1000)
|
||||||
|
future_year_2 = int(str(future.year)[2:])
|
||||||
|
|
||||||
|
past = today - timedelta(days=10000)
|
||||||
|
past_year_2 = int(str(past.year)[2:])
|
||||||
|
|
||||||
|
self.assertEqual(search_date(' Something before 2002-04-22 '), (date(2002, 4, 22), (18, 28)))
|
||||||
|
self.assertEqual(search_date(' 2002-04-22 Something after '), (date(2002, 4, 22), (1, 11)))
|
||||||
|
|
||||||
|
self.assertEqual(search_date(' This happened on 2002-04-22. '), (date(2002, 4, 22), (18, 28)))
|
||||||
|
self.assertEqual(search_date(' This happened on 22-04-2002. '), (date(2002, 4, 22), (18, 28)))
|
||||||
|
|
||||||
|
self.assertEqual(search_date(' This happened on 13-04-%s. ' % (today_year_2,)), (date(today.year, 4, 13), (18, 26)))
|
||||||
|
self.assertEqual(search_date(' This happened on 22-04-%s. ' % (future_year_2,)), (date(future.year, 4, 22), (18, 26)))
|
||||||
|
self.assertEqual(search_date(' This happened on 20-04-%s. ' % (past_year_2)), (date(past.year, 4, 20), (18, 26)))
|
||||||
|
|
||||||
|
self.assertEqual(search_date(' This happened on 13-06-14. ', year_first=True), (date(2013, 6, 14), (18, 26)))
|
||||||
|
self.assertEqual(search_date(' This happened on 13-05-14. ', year_first=False), (date(2014, 5, 13), (18, 26)))
|
||||||
|
|
||||||
|
self.assertEqual(search_date(' This happened on 04-13-%s. ' % (today_year_2,)), (date(today.year, 4, 13), (18, 26)))
|
||||||
|
self.assertEqual(search_date(' This happened on 04-22-%s. ' % (future_year_2,)), (date(future.year, 4, 22), (18, 26)))
|
||||||
|
self.assertEqual(search_date(' This happened on 04-20-%s. ' % (past_year_2)), (date(past.year, 4, 20), (18, 26)))
|
||||||
|
|
||||||
|
self.assertEqual(search_date(' This happened on 35-12-%s. ' % (today_year_2,)), (None, None))
|
||||||
|
self.assertEqual(search_date(' This happened on 37-18-%s. ' % (future_year_2,)), (None, None))
|
||||||
|
self.assertEqual(search_date(' This happened on 44-42-%s. ' % (past_year_2)), (None, None))
|
||||||
|
|
||||||
|
self.assertEqual(search_date(' This happened on %s. ' % (today, )), (today, (18, 28)))
|
||||||
|
self.assertEqual(search_date(' This happened on %s. ' % (future, )), (future, (18, 28)))
|
||||||
|
self.assertEqual(search_date(' This happened on %s. ' % (past, )), (past, (18, 28)))
|
||||||
|
|
||||||
|
self.assertEqual(search_date(' released date: 04-03-1901? '), (None, None))
|
||||||
|
|
||||||
|
self.assertEqual(search_date(' There\'s no date in here. '), (None, None))
|
||||||
|
|
||||||
|
self.assertEqual(search_date(' Something 01-02-03 '), (date(2003, 2, 1), (11, 19)))
|
||||||
|
self.assertEqual(search_date(' Something 01-02-03 ', year_first=False, day_first=True), (date(2003, 2, 1), (11, 19)))
|
||||||
|
self.assertEqual(search_date(' Something 01-02-03 ', year_first=True), (date(2001, 2, 3), (11, 19)))
|
||||||
|
self.assertEqual(search_date(' Something 01-02-03 ', day_first=False), (date(2003, 1, 2), (11, 19)))
|
||||||
|
|
||||||
|
|
||||||
|
suite = allTests(TestUtils)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
TextTestRunner(verbosity=2).run(suite)
|
||||||
|
|
@ -1,24 +1,25 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# Smewt - A smart collection manager
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2008-2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# Smewt is free software; you can redistribute it and/or modify
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# it under the terms of the GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
# the Free Software Foundation; either version 3 of the License, or
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
# (at your option) any later version.
|
# (at your option) any later version.
|
||||||
#
|
#
|
||||||
# Smewt is distributed in the hope that it will be useful,
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
# GNU General Public License for more details.
|
# Lesser GNU General Public License for more details.
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
from guessit import s
|
from guessit import s
|
||||||
from guessit.patterns import sep
|
from guessit.patterns import sep
|
||||||
import functools
|
import functools
|
||||||
|
|
@ -27,6 +28,7 @@ import re
|
||||||
|
|
||||||
# string-related functions
|
# string-related functions
|
||||||
|
|
||||||
|
|
||||||
def normalize_unicode(s):
|
def normalize_unicode(s):
|
||||||
return unicodedata.normalize('NFC', s)
|
return unicodedata.normalize('NFC', s)
|
||||||
|
|
||||||
|
|
@ -43,41 +45,63 @@ def strip_brackets(s):
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
def clean_string(st):
|
_dotted_rexp = re.compile(r'(?:\W|^)(([A-Za-z]\.){2,}[A-Za-z]\.?)')
|
||||||
|
|
||||||
|
|
||||||
|
def clean_default(st):
|
||||||
for c in sep:
|
for c in sep:
|
||||||
# do not remove certain chars
|
# do not remove certain chars
|
||||||
if c in ['-', ',']:
|
if c in ['-', ',']:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if c == '.':
|
||||||
|
# we should not remove the dots for acronyms and such
|
||||||
|
dotted = _dotted_rexp.search(st)
|
||||||
|
if dotted:
|
||||||
|
s = dotted.group(1)
|
||||||
|
exclude_begin, exclude_end = dotted.span(1)
|
||||||
|
|
||||||
|
st = (st[:exclude_begin].replace(c, ' ') +
|
||||||
|
st[exclude_begin:exclude_end] +
|
||||||
|
st[exclude_end:].replace(c, ' '))
|
||||||
|
continue
|
||||||
|
|
||||||
st = st.replace(c, ' ')
|
st = st.replace(c, ' ')
|
||||||
|
|
||||||
parts = st.split()
|
parts = st.split()
|
||||||
result = ' '.join(p for p in parts if p != '')
|
result = ' '.join(p for p in parts if p != '')
|
||||||
|
|
||||||
# now also remove dashes on the outer part of the string
|
# now also remove dashes on the outer part of the string
|
||||||
while result and result[0] in sep:
|
while result and result[0] in '-':
|
||||||
result = result[1:]
|
result = result[1:]
|
||||||
while result and result[-1] in sep:
|
while result and result[-1] in '-':
|
||||||
result = result[:-1]
|
result = result[:-1]
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
_words_rexp = re.compile('\w+', re.UNICODE)
|
_words_rexp = re.compile('\w+', re.UNICODE)
|
||||||
|
|
||||||
|
|
||||||
def find_words(s):
|
def find_words(s):
|
||||||
return _words_rexp.findall(s.replace('_', ' '))
|
return _words_rexp.findall(s.replace('_', ' '))
|
||||||
|
|
||||||
|
|
||||||
def reorder_title(title):
|
def iter_words(s):
|
||||||
|
return _words_rexp.finditer(s.replace('_', ' '))
|
||||||
|
|
||||||
|
|
||||||
|
def reorder_title(title, articles=('the',), separators=(',', ', ')):
|
||||||
ltitle = title.lower()
|
ltitle = title.lower()
|
||||||
if ltitle[-4:] == ',the':
|
for article in articles:
|
||||||
return title[-3:] + ' ' + title[:-4]
|
for separator in separators:
|
||||||
if ltitle[-5:] == ', the':
|
suffix = separator + article
|
||||||
return title[-3:] + ' ' + title[:-5]
|
if ltitle[-len(suffix):] == suffix:
|
||||||
|
return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
|
||||||
return title
|
return title
|
||||||
|
|
||||||
|
|
||||||
def str_replace(string, pos, c):
|
def str_replace(string, pos, c):
|
||||||
return string[:pos] + c + string[pos+1:]
|
return string[:pos] + c + string[pos + 1:]
|
||||||
|
|
||||||
|
|
||||||
def str_fill(string, region, c):
|
def str_fill(string, region, c):
|
||||||
|
|
@ -85,7 +109,6 @@ def str_fill(string, region, c):
|
||||||
return string[:start] + c * (end - start) + string[end:]
|
return string[:start] + c * (end - start) + string[end:]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def levenshtein(a, b):
|
def levenshtein(a, b):
|
||||||
if not a:
|
if not a:
|
||||||
return len(b)
|
return len(b)
|
||||||
|
|
@ -95,25 +118,25 @@ def levenshtein(a, b):
|
||||||
m = len(a)
|
m = len(a)
|
||||||
n = len(b)
|
n = len(b)
|
||||||
d = []
|
d = []
|
||||||
for i in range(m+1):
|
for i in range(m + 1):
|
||||||
d.append([0] * (n+1))
|
d.append([0] * (n + 1))
|
||||||
|
|
||||||
for i in range(m+1):
|
for i in range(m + 1):
|
||||||
d[i][0] = i
|
d[i][0] = i
|
||||||
|
|
||||||
for j in range(n+1):
|
for j in range(n + 1):
|
||||||
d[0][j] = j
|
d[0][j] = j
|
||||||
|
|
||||||
for i in range(1, m+1):
|
for i in range(1, m + 1):
|
||||||
for j in range(1, n+1):
|
for j in range(1, n + 1):
|
||||||
if a[i-1] == b[j-1]:
|
if a[i - 1] == b[j - 1]:
|
||||||
cost = 0
|
cost = 0
|
||||||
else:
|
else:
|
||||||
cost = 1
|
cost = 1
|
||||||
|
|
||||||
d[i][j] = min(d[i-1][j] + 1, # deletion
|
d[i][j] = min(d[i - 1][j] + 1, # deletion
|
||||||
d[i][j-1] + 1, # insertion
|
d[i][j - 1] + 1, # insertion
|
||||||
d[i-1][j-1] + cost # substitution
|
d[i - 1][j - 1] + cost # substitution
|
||||||
)
|
)
|
||||||
|
|
||||||
return d[m][n]
|
return d[m][n]
|
||||||
|
|
@ -151,7 +174,7 @@ def find_first_level_groups_span(string, enclosing):
|
||||||
end = i
|
end = i
|
||||||
if not depth:
|
if not depth:
|
||||||
# we emptied our stack, so we have a 1st level group
|
# we emptied our stack, so we have a 1st level group
|
||||||
result.append((start, end+1))
|
result.append((start, end + 1))
|
||||||
except IndexError:
|
except IndexError:
|
||||||
# we closed a group which was not opened before
|
# we closed a group which was not opened before
|
||||||
pass
|
pass
|
||||||
|
|
@ -172,7 +195,7 @@ def split_on_groups(string, groups):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if not groups:
|
if not groups:
|
||||||
return [ string ]
|
return [string]
|
||||||
|
|
||||||
boundaries = sorted(set(functools.reduce(lambda l, x: l + list(x), groups, [])))
|
boundaries = sorted(set(functools.reduce(lambda l, x: l + list(x), groups, [])))
|
||||||
if boundaries[0] != 0:
|
if boundaries[0] != 0:
|
||||||
|
|
@ -180,10 +203,10 @@ def split_on_groups(string, groups):
|
||||||
if boundaries[-1] != len(string):
|
if boundaries[-1] != len(string):
|
||||||
boundaries.append(len(string))
|
boundaries.append(len(string))
|
||||||
|
|
||||||
groups = [ string[start:end] for start, end in zip(boundaries[:-1],
|
groups = [string[start:end] for start, end in zip(boundaries[:-1],
|
||||||
boundaries[1:]) ]
|
boundaries[1:])]
|
||||||
|
|
||||||
return [ g for g in groups if g ] # return only non-empty groups
|
return [g for g in groups if g] # return only non-empty groups
|
||||||
|
|
||||||
|
|
||||||
def find_first_level_groups(string, enclosing, blank_sep=None):
|
def find_first_level_groups(string, enclosing, blank_sep=None):
|
||||||
|
|
@ -219,6 +242,114 @@ def find_first_level_groups(string, enclosing, blank_sep=None):
|
||||||
if blank_sep:
|
if blank_sep:
|
||||||
for start, end in groups:
|
for start, end in groups:
|
||||||
string = str_replace(string, start, blank_sep)
|
string = str_replace(string, start, blank_sep)
|
||||||
string = str_replace(string, end-1, blank_sep)
|
string = str_replace(string, end - 1, blank_sep)
|
||||||
|
|
||||||
return split_on_groups(string, groups)
|
return split_on_groups(string, groups)
|
||||||
|
|
||||||
|
|
||||||
|
_camel_word2_set = set(('is', 'to',))
|
||||||
|
_camel_word3_set = set(('the',))
|
||||||
|
|
||||||
|
|
||||||
|
def _camel_split_and_lower(string, i):
|
||||||
|
"""Retrieves a tuple (need_split, need_lower)
|
||||||
|
|
||||||
|
need_split is True if this char is a first letter in a camelCasedString.
|
||||||
|
need_lower is True if this char should be lowercased.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def islower(c):
|
||||||
|
return c.isalpha() and not c.isupper()
|
||||||
|
|
||||||
|
previous_char2 = string[i - 2] if i > 1 else None
|
||||||
|
previous_char = string[i - 1] if i > 0 else None
|
||||||
|
char = string[i]
|
||||||
|
next_char = string[i + 1] if i + 1 < len(string) else None
|
||||||
|
next_char2 = string[i + 2] if i + 2 < len(string) else None
|
||||||
|
|
||||||
|
char_upper = char.isupper()
|
||||||
|
char_lower = islower(char)
|
||||||
|
|
||||||
|
# previous_char2_lower = islower(previous_char2) if previous_char2 else False
|
||||||
|
previous_char2_upper = previous_char2.isupper() if previous_char2 else False
|
||||||
|
|
||||||
|
previous_char_lower = islower(previous_char) if previous_char else False
|
||||||
|
previous_char_upper = previous_char.isupper() if previous_char else False
|
||||||
|
|
||||||
|
next_char_upper = next_char.isupper() if next_char else False
|
||||||
|
next_char_lower = islower(next_char) if next_char else False
|
||||||
|
|
||||||
|
next_char2_upper = next_char2.isupper() if next_char2 else False
|
||||||
|
# next_char2_lower = islower(next_char2) if next_char2 else False
|
||||||
|
|
||||||
|
mixedcase_word = (previous_char_upper and char_lower and next_char_upper) or \
|
||||||
|
(previous_char_lower and char_upper and next_char_lower and next_char2_upper) or \
|
||||||
|
(previous_char2_upper and previous_char_lower and char_upper)
|
||||||
|
if mixedcase_word:
|
||||||
|
word2 = (char + next_char).lower() if next_char else None
|
||||||
|
word3 = (char + next_char + next_char2).lower() if next_char and next_char2 else None
|
||||||
|
word2b = (previous_char2 + previous_char).lower() if previous_char2 and previous_char else None
|
||||||
|
if word2 in _camel_word2_set or word2b in _camel_word2_set or word3 in _camel_word3_set:
|
||||||
|
mixedcase_word = False
|
||||||
|
|
||||||
|
uppercase_word = previous_char_upper and char_upper and next_char_upper or (char_upper and next_char_upper and next_char2_upper)
|
||||||
|
|
||||||
|
need_split = char_upper and previous_char_lower and not mixedcase_word
|
||||||
|
|
||||||
|
if not need_split:
|
||||||
|
previous_char_upper = string[i - 1].isupper() if i > 0 else False
|
||||||
|
next_char_lower = (string[i + 1].isalpha() and not string[i + 1].isupper()) if i + 1 < len(string) else False
|
||||||
|
need_split = char_upper and previous_char_upper and next_char_lower
|
||||||
|
uppercase_word = previous_char_upper and not next_char_lower
|
||||||
|
|
||||||
|
need_lower = not uppercase_word and not mixedcase_word and need_split
|
||||||
|
|
||||||
|
return (need_split, need_lower)
|
||||||
|
|
||||||
|
|
||||||
|
def is_camel(string):
|
||||||
|
"""
|
||||||
|
>>> is_camel('dogEATDog')
|
||||||
|
True
|
||||||
|
>>> is_camel('DeathToCamelCase')
|
||||||
|
True
|
||||||
|
>>> is_camel('death_to_camel_case')
|
||||||
|
False
|
||||||
|
>>> is_camel('TheBest')
|
||||||
|
True
|
||||||
|
>>> is_camel('The Best')
|
||||||
|
False
|
||||||
|
"""
|
||||||
|
for i in range(0, len(string)):
|
||||||
|
need_split, _ = _camel_split_and_lower(string, i)
|
||||||
|
if need_split:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def from_camel(string):
|
||||||
|
"""
|
||||||
|
>>> from_camel('dogEATDog') == 'dog EAT dog'
|
||||||
|
True
|
||||||
|
>>> from_camel('DeathToCamelCase') == 'Death to camel case'
|
||||||
|
True
|
||||||
|
>>> from_camel('TheBest') == 'The best'
|
||||||
|
True
|
||||||
|
>>> from_camel('MiXedCaSe is not camelCase') == 'MiXedCaSe is not camel case'
|
||||||
|
True
|
||||||
|
"""
|
||||||
|
if not string:
|
||||||
|
return string
|
||||||
|
pieces = []
|
||||||
|
|
||||||
|
for i in range(0, len(string)):
|
||||||
|
char = string[i]
|
||||||
|
need_split, need_lower = _camel_split_and_lower(string, i)
|
||||||
|
if need_split:
|
||||||
|
pieces.append(' ')
|
||||||
|
|
||||||
|
if need_lower:
|
||||||
|
pieces.append(char.lower())
|
||||||
|
else:
|
||||||
|
pieces.append(char)
|
||||||
|
return ''.join(pieces)
|
||||||
|
|
|
||||||
341
libs/guessit/tlds-alpha-by-domain.txt
Normal file
341
libs/guessit/tlds-alpha-by-domain.txt
Normal file
|
|
@ -0,0 +1,341 @@
|
||||||
|
# Version 2013112900, Last Updated Fri Nov 29 07:07:01 2013 UTC
|
||||||
|
AC
|
||||||
|
AD
|
||||||
|
AE
|
||||||
|
AERO
|
||||||
|
AF
|
||||||
|
AG
|
||||||
|
AI
|
||||||
|
AL
|
||||||
|
AM
|
||||||
|
AN
|
||||||
|
AO
|
||||||
|
AQ
|
||||||
|
AR
|
||||||
|
ARPA
|
||||||
|
AS
|
||||||
|
ASIA
|
||||||
|
AT
|
||||||
|
AU
|
||||||
|
AW
|
||||||
|
AX
|
||||||
|
AZ
|
||||||
|
BA
|
||||||
|
BB
|
||||||
|
BD
|
||||||
|
BE
|
||||||
|
BF
|
||||||
|
BG
|
||||||
|
BH
|
||||||
|
BI
|
||||||
|
BIKE
|
||||||
|
BIZ
|
||||||
|
BJ
|
||||||
|
BM
|
||||||
|
BN
|
||||||
|
BO
|
||||||
|
BR
|
||||||
|
BS
|
||||||
|
BT
|
||||||
|
BV
|
||||||
|
BW
|
||||||
|
BY
|
||||||
|
BZ
|
||||||
|
CA
|
||||||
|
CAMERA
|
||||||
|
CAT
|
||||||
|
CC
|
||||||
|
CD
|
||||||
|
CF
|
||||||
|
CG
|
||||||
|
CH
|
||||||
|
CI
|
||||||
|
CK
|
||||||
|
CL
|
||||||
|
CLOTHING
|
||||||
|
CM
|
||||||
|
CN
|
||||||
|
CO
|
||||||
|
COM
|
||||||
|
CONSTRUCTION
|
||||||
|
CONTRACTORS
|
||||||
|
COOP
|
||||||
|
CR
|
||||||
|
CU
|
||||||
|
CV
|
||||||
|
CW
|
||||||
|
CX
|
||||||
|
CY
|
||||||
|
CZ
|
||||||
|
DE
|
||||||
|
DIAMONDS
|
||||||
|
DIRECTORY
|
||||||
|
DJ
|
||||||
|
DK
|
||||||
|
DM
|
||||||
|
DO
|
||||||
|
DZ
|
||||||
|
EC
|
||||||
|
EDU
|
||||||
|
EE
|
||||||
|
EG
|
||||||
|
ENTERPRISES
|
||||||
|
EQUIPMENT
|
||||||
|
ER
|
||||||
|
ES
|
||||||
|
ESTATE
|
||||||
|
ET
|
||||||
|
EU
|
||||||
|
FI
|
||||||
|
FJ
|
||||||
|
FK
|
||||||
|
FM
|
||||||
|
FO
|
||||||
|
FR
|
||||||
|
GA
|
||||||
|
GALLERY
|
||||||
|
GB
|
||||||
|
GD
|
||||||
|
GE
|
||||||
|
GF
|
||||||
|
GG
|
||||||
|
GH
|
||||||
|
GI
|
||||||
|
GL
|
||||||
|
GM
|
||||||
|
GN
|
||||||
|
GOV
|
||||||
|
GP
|
||||||
|
GQ
|
||||||
|
GR
|
||||||
|
GRAPHICS
|
||||||
|
GS
|
||||||
|
GT
|
||||||
|
GU
|
||||||
|
GURU
|
||||||
|
GW
|
||||||
|
GY
|
||||||
|
HK
|
||||||
|
HM
|
||||||
|
HN
|
||||||
|
HOLDINGS
|
||||||
|
HR
|
||||||
|
HT
|
||||||
|
HU
|
||||||
|
ID
|
||||||
|
IE
|
||||||
|
IL
|
||||||
|
IM
|
||||||
|
IN
|
||||||
|
INFO
|
||||||
|
INT
|
||||||
|
IO
|
||||||
|
IQ
|
||||||
|
IR
|
||||||
|
IS
|
||||||
|
IT
|
||||||
|
JE
|
||||||
|
JM
|
||||||
|
JO
|
||||||
|
JOBS
|
||||||
|
JP
|
||||||
|
KE
|
||||||
|
KG
|
||||||
|
KH
|
||||||
|
KI
|
||||||
|
KITCHEN
|
||||||
|
KM
|
||||||
|
KN
|
||||||
|
KP
|
||||||
|
KR
|
||||||
|
KW
|
||||||
|
KY
|
||||||
|
KZ
|
||||||
|
LA
|
||||||
|
LAND
|
||||||
|
LB
|
||||||
|
LC
|
||||||
|
LI
|
||||||
|
LIGHTING
|
||||||
|
LK
|
||||||
|
LR
|
||||||
|
LS
|
||||||
|
LT
|
||||||
|
LU
|
||||||
|
LV
|
||||||
|
LY
|
||||||
|
MA
|
||||||
|
MC
|
||||||
|
MD
|
||||||
|
ME
|
||||||
|
MG
|
||||||
|
MH
|
||||||
|
MIL
|
||||||
|
MK
|
||||||
|
ML
|
||||||
|
MM
|
||||||
|
MN
|
||||||
|
MO
|
||||||
|
MOBI
|
||||||
|
MP
|
||||||
|
MQ
|
||||||
|
MR
|
||||||
|
MS
|
||||||
|
MT
|
||||||
|
MU
|
||||||
|
MUSEUM
|
||||||
|
MV
|
||||||
|
MW
|
||||||
|
MX
|
||||||
|
MY
|
||||||
|
MZ
|
||||||
|
NA
|
||||||
|
NAME
|
||||||
|
NC
|
||||||
|
NE
|
||||||
|
NET
|
||||||
|
NF
|
||||||
|
NG
|
||||||
|
NI
|
||||||
|
NL
|
||||||
|
NO
|
||||||
|
NP
|
||||||
|
NR
|
||||||
|
NU
|
||||||
|
NZ
|
||||||
|
OM
|
||||||
|
ORG
|
||||||
|
PA
|
||||||
|
PE
|
||||||
|
PF
|
||||||
|
PG
|
||||||
|
PH
|
||||||
|
PHOTOGRAPHY
|
||||||
|
PK
|
||||||
|
PL
|
||||||
|
PLUMBING
|
||||||
|
PM
|
||||||
|
PN
|
||||||
|
POST
|
||||||
|
PR
|
||||||
|
PRO
|
||||||
|
PS
|
||||||
|
PT
|
||||||
|
PW
|
||||||
|
PY
|
||||||
|
QA
|
||||||
|
RE
|
||||||
|
RO
|
||||||
|
RS
|
||||||
|
RU
|
||||||
|
RW
|
||||||
|
SA
|
||||||
|
SB
|
||||||
|
SC
|
||||||
|
SD
|
||||||
|
SE
|
||||||
|
SEXY
|
||||||
|
SG
|
||||||
|
SH
|
||||||
|
SI
|
||||||
|
SINGLES
|
||||||
|
SJ
|
||||||
|
SK
|
||||||
|
SL
|
||||||
|
SM
|
||||||
|
SN
|
||||||
|
SO
|
||||||
|
SR
|
||||||
|
ST
|
||||||
|
SU
|
||||||
|
SV
|
||||||
|
SX
|
||||||
|
SY
|
||||||
|
SZ
|
||||||
|
TATTOO
|
||||||
|
TC
|
||||||
|
TD
|
||||||
|
TECHNOLOGY
|
||||||
|
TEL
|
||||||
|
TF
|
||||||
|
TG
|
||||||
|
TH
|
||||||
|
TIPS
|
||||||
|
TJ
|
||||||
|
TK
|
||||||
|
TL
|
||||||
|
TM
|
||||||
|
TN
|
||||||
|
TO
|
||||||
|
TODAY
|
||||||
|
TP
|
||||||
|
TR
|
||||||
|
TRAVEL
|
||||||
|
TT
|
||||||
|
TV
|
||||||
|
TW
|
||||||
|
TZ
|
||||||
|
UA
|
||||||
|
UG
|
||||||
|
UK
|
||||||
|
US
|
||||||
|
UY
|
||||||
|
UZ
|
||||||
|
VA
|
||||||
|
VC
|
||||||
|
VE
|
||||||
|
VENTURES
|
||||||
|
VG
|
||||||
|
VI
|
||||||
|
VN
|
||||||
|
VOYAGE
|
||||||
|
VU
|
||||||
|
WF
|
||||||
|
WS
|
||||||
|
XN--3E0B707E
|
||||||
|
XN--45BRJ9C
|
||||||
|
XN--80AO21A
|
||||||
|
XN--80ASEHDB
|
||||||
|
XN--80ASWG
|
||||||
|
XN--90A3AC
|
||||||
|
XN--CLCHC0EA0B2G2A9GCD
|
||||||
|
XN--FIQS8S
|
||||||
|
XN--FIQZ9S
|
||||||
|
XN--FPCRJ9C3D
|
||||||
|
XN--FZC2C9E2C
|
||||||
|
XN--GECRJ9C
|
||||||
|
XN--H2BRJ9C
|
||||||
|
XN--J1AMH
|
||||||
|
XN--J6W193G
|
||||||
|
XN--KPRW13D
|
||||||
|
XN--KPRY57D
|
||||||
|
XN--L1ACC
|
||||||
|
XN--LGBBAT1AD8J
|
||||||
|
XN--MGB9AWBF
|
||||||
|
XN--MGBA3A4F16A
|
||||||
|
XN--MGBAAM7A8H
|
||||||
|
XN--MGBAYH7GPA
|
||||||
|
XN--MGBBH1A71E
|
||||||
|
XN--MGBC0A9AZCG
|
||||||
|
XN--MGBERP4A5D4AR
|
||||||
|
XN--MGBX4CD0AB
|
||||||
|
XN--NGBC5AZD
|
||||||
|
XN--O3CW4H
|
||||||
|
XN--OGBPF8FL
|
||||||
|
XN--P1AI
|
||||||
|
XN--PGBS0DH
|
||||||
|
XN--Q9JYB4C
|
||||||
|
XN--S9BRJ9C
|
||||||
|
XN--UNUP4Y
|
||||||
|
XN--WGBH1C
|
||||||
|
XN--WGBL6A
|
||||||
|
XN--XKC2AL3HYE2A
|
||||||
|
XN--XKC2DL3A5EE0H
|
||||||
|
XN--YFRO4I67O
|
||||||
|
XN--YGBI2AMMX
|
||||||
|
XXX
|
||||||
|
YE
|
||||||
|
YT
|
||||||
|
ZA
|
||||||
|
ZM
|
||||||
|
ZW
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,92 +18,13 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit import base_text_type, Guess
|
|
||||||
from guessit.patterns import canonical_form
|
|
||||||
from guessit.textutils import clean_string
|
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def found_property(node, name, confidence):
|
class TransformerException(Exception):
|
||||||
node.guess = Guess({name: node.clean_value}, confidence=confidence, raw=node.value)
|
def __init__(self, transformer, message):
|
||||||
log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
|
|
||||||
|
|
||||||
|
# Call the base class constructor with the parameters it needs
|
||||||
|
Exception.__init__(self, message)
|
||||||
|
|
||||||
def format_guess(guess):
|
self.transformer = transformer
|
||||||
"""Format all the found values to their natural type.
|
|
||||||
For instance, a year would be stored as an int value, etc...
|
|
||||||
|
|
||||||
Note that this modifies the dictionary given as input.
|
|
||||||
"""
|
|
||||||
for prop, value in guess.items():
|
|
||||||
if prop in ('season', 'episodeNumber', 'year', 'cdNumber',
|
|
||||||
'cdNumberTotal', 'bonusNumber', 'filmNumber'):
|
|
||||||
guess[prop] = int(guess[prop])
|
|
||||||
elif isinstance(value, base_text_type):
|
|
||||||
if prop in ('edition',):
|
|
||||||
value = clean_string(value)
|
|
||||||
guess[prop] = canonical_form(value).replace('\\', '')
|
|
||||||
|
|
||||||
return guess
|
|
||||||
|
|
||||||
|
|
||||||
def find_and_split_node(node, strategy, logger):
|
|
||||||
string = ' %s ' % node.value # add sentinels
|
|
||||||
for matcher, confidence, args, kwargs in strategy:
|
|
||||||
all_args = [string]
|
|
||||||
if getattr(matcher, 'use_node', False):
|
|
||||||
all_args.append(node)
|
|
||||||
if args:
|
|
||||||
all_args.append(args)
|
|
||||||
|
|
||||||
if kwargs:
|
|
||||||
result, span = matcher(*all_args, **kwargs)
|
|
||||||
else:
|
|
||||||
result, span = matcher(*all_args)
|
|
||||||
|
|
||||||
if result:
|
|
||||||
# readjust span to compensate for sentinels
|
|
||||||
span = (span[0] - 1, span[1] - 1)
|
|
||||||
|
|
||||||
if isinstance(result, Guess):
|
|
||||||
if confidence is None:
|
|
||||||
confidence = result.confidence(list(result.keys())[0])
|
|
||||||
else:
|
|
||||||
if confidence is None:
|
|
||||||
confidence = 1.0
|
|
||||||
|
|
||||||
guess = format_guess(Guess(result, confidence=confidence, raw=string[span[0] + 1:span[1] + 1]))
|
|
||||||
msg = 'Found with confidence %.2f: %s' % (confidence, guess)
|
|
||||||
(logger or log).debug(msg)
|
|
||||||
|
|
||||||
node.partition(span)
|
|
||||||
absolute_span = (span[0] + node.offset, span[1] + node.offset)
|
|
||||||
for child in node.children:
|
|
||||||
if child.span == absolute_span:
|
|
||||||
child.guess = guess
|
|
||||||
else:
|
|
||||||
find_and_split_node(child, strategy, logger)
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
class SingleNodeGuesser(object):
|
|
||||||
def __init__(self, guess_func, confidence, logger, *args, **kwargs):
|
|
||||||
self.guess_func = guess_func
|
|
||||||
self.confidence = confidence
|
|
||||||
self.logger = logger
|
|
||||||
self.args = args
|
|
||||||
self.kwargs = kwargs
|
|
||||||
|
|
||||||
def process(self, mtree):
|
|
||||||
# strategy is a list of pairs (guesser, confidence)
|
|
||||||
# - if the guesser returns a guessit.Guess and confidence is specified,
|
|
||||||
# it will override it, otherwise it will leave the guess confidence
|
|
||||||
# - if the guesser returns a simple dict as a guess and confidence is
|
|
||||||
# specified, it will use it, or 1.0 otherwise
|
|
||||||
strategy = [ (self.guess_func, self.confidence, self.args, self.kwargs) ]
|
|
||||||
|
|
||||||
for node in mtree.unidentified_leaves():
|
|
||||||
find_and_split_node(node, strategy, self.logger)
|
|
||||||
60
libs/guessit/transfo/expected_series.py
Normal file
60
libs/guessit/transfo/expected_series.py
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
from guessit.containers import PropertiesContainer
|
||||||
|
from guessit.matcher import GuessFinder
|
||||||
|
|
||||||
|
from guessit.plugins.transformers import Transformer
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class ExpectedSeries(Transformer):
|
||||||
|
def __init__(self):
|
||||||
|
Transformer.__init__(self, 230)
|
||||||
|
|
||||||
|
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
|
||||||
|
naming_opts.add_argument('-S', '--expected-series', action='append', dest='expected_series',
|
||||||
|
help='Expected series to parse (can be used multiple times)')
|
||||||
|
|
||||||
|
def should_process(self, mtree, options=None):
|
||||||
|
return options and options.get('expected_series')
|
||||||
|
|
||||||
|
def expected_series(self, string, node=None, options=None):
|
||||||
|
container = PropertiesContainer(enhance=True, canonical_from_pattern=False)
|
||||||
|
|
||||||
|
for expected_serie in options.get('expected_series'):
|
||||||
|
if expected_serie.startswith('re:'):
|
||||||
|
expected_serie = expected_serie[3:]
|
||||||
|
expected_serie = expected_serie.replace(' ', '-')
|
||||||
|
container.register_property('series', expected_serie, enhance=True)
|
||||||
|
else:
|
||||||
|
expected_serie = re.escape(expected_serie)
|
||||||
|
container.register_property('series', expected_serie, enhance=False)
|
||||||
|
|
||||||
|
found = container.find_properties(string, node, options)
|
||||||
|
return container.as_guess(found, string)
|
||||||
|
|
||||||
|
def supported_properties(self):
|
||||||
|
return ['series']
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
GuessFinder(self.expected_series, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||||
61
libs/guessit/transfo/expected_title.py
Normal file
61
libs/guessit/transfo/expected_title.py
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.containers import PropertiesContainer
|
||||||
|
from guessit.matcher import GuessFinder
|
||||||
|
|
||||||
|
from guessit.plugins.transformers import Transformer
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class ExpectedTitle(Transformer):
|
||||||
|
def __init__(self):
|
||||||
|
Transformer.__init__(self, 225)
|
||||||
|
|
||||||
|
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
|
||||||
|
naming_opts.add_argument('-T', '--expected-title', action='append', dest='expected_title',
|
||||||
|
help='Expected title (can be used multiple times)')
|
||||||
|
|
||||||
|
def should_process(self, mtree, options=None):
|
||||||
|
return options and options.get('expected_title')
|
||||||
|
|
||||||
|
def expected_titles(self, string, node=None, options=None):
|
||||||
|
container = PropertiesContainer(enhance=True, canonical_from_pattern=False)
|
||||||
|
|
||||||
|
for expected_title in options.get('expected_title'):
|
||||||
|
if expected_title.startswith('re:'):
|
||||||
|
expected_title = expected_title[3:]
|
||||||
|
expected_title = expected_title.replace(' ', '-')
|
||||||
|
container.register_property('title', expected_title, enhance=True)
|
||||||
|
else:
|
||||||
|
expected_title = re.escape(expected_title)
|
||||||
|
container.register_property('title', expected_title, enhance=False)
|
||||||
|
|
||||||
|
found = container.find_properties(string, node, options)
|
||||||
|
return container.as_guess(found, string)
|
||||||
|
|
||||||
|
def supported_properties(self):
|
||||||
|
return ['title']
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
GuessFinder(self.expected_titles, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,16 +18,22 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit.transfo import found_property
|
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
from guessit.plugins.transformers import Transformer
|
||||||
|
from guessit.matcher import found_property
|
||||||
|
|
||||||
|
|
||||||
def process(mtree):
|
class GuessBonusFeatures(Transformer):
|
||||||
|
def __init__(self):
|
||||||
|
Transformer.__init__(self, -150)
|
||||||
|
|
||||||
|
def supported_properties(self):
|
||||||
|
return ['bonusNumber', 'bonusTitle', 'filmNumber', 'filmSeries', 'title', 'series']
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
def previous_group(g):
|
def previous_group(g):
|
||||||
for leaf in mtree.unidentified_leaves()[::-1]:
|
for leaf in reversed(list(mtree.unidentified_leaves())):
|
||||||
if leaf.node_idx < g.node_idx:
|
if leaf.node_idx < g.node_idx:
|
||||||
return leaf
|
return leaf
|
||||||
|
|
||||||
|
|
@ -39,23 +45,23 @@ def process(mtree):
|
||||||
def same_group(g1, g2):
|
def same_group(g1, g2):
|
||||||
return g1.node_idx[:2] == g2.node_idx[:2]
|
return g1.node_idx[:2] == g2.node_idx[:2]
|
||||||
|
|
||||||
bonus = [ node for node in mtree.leaves() if 'bonusNumber' in node.guess ]
|
bonus = [node for node in mtree.leaves() if 'bonusNumber' in node.guess]
|
||||||
if bonus:
|
if bonus:
|
||||||
bonusTitle = next_group(bonus[0])
|
bonus_title = next_group(bonus[0])
|
||||||
if same_group(bonusTitle, bonus[0]):
|
if bonus_title and same_group(bonus_title, bonus[0]):
|
||||||
found_property(bonusTitle, 'bonusTitle', 0.8)
|
found_property(bonus_title, 'bonusTitle', confidence=0.8)
|
||||||
|
|
||||||
filmNumber = [ node for node in mtree.leaves()
|
film_number = [node for node in mtree.leaves()
|
||||||
if 'filmNumber' in node.guess ]
|
if 'filmNumber' in node.guess]
|
||||||
if filmNumber:
|
if film_number:
|
||||||
filmSeries = previous_group(filmNumber[0])
|
film_series = previous_group(film_number[0])
|
||||||
found_property(filmSeries, 'filmSeries', 0.9)
|
found_property(film_series, 'filmSeries', confidence=0.9)
|
||||||
|
|
||||||
title = next_group(filmNumber[0])
|
title = next_group(film_number[0])
|
||||||
found_property(title, 'title', 0.9)
|
found_property(title, 'title', confidence=0.9)
|
||||||
|
|
||||||
season = [ node for node in mtree.leaves() if 'season' in node.guess ]
|
season = [node for node in mtree.leaves() if 'season' in node.guess]
|
||||||
if season and 'bonusNumber' in mtree.info:
|
if season and 'bonusNumber' in mtree.info:
|
||||||
series = previous_group(season[0])
|
series = previous_group(season[0])
|
||||||
if same_group(series, season[0]):
|
if same_group(series, season[0]):
|
||||||
found_property(series, 'series', 0.9)
|
found_property(series, 'series', confidence=0.9)
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,31 +18,107 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit.country import Country
|
|
||||||
|
from guessit.plugins.transformers import Transformer
|
||||||
|
from babelfish import Country
|
||||||
from guessit import Guess
|
from guessit import Guess
|
||||||
|
from guessit.textutils import iter_words
|
||||||
|
from guessit.matcher import GuessFinder, found_guess
|
||||||
|
from guessit.language import LNG_COMMON_WORDS
|
||||||
|
import babelfish
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
# list of common words which could be interpreted as countries, but which
|
|
||||||
# are far too common to be able to say they represent a country
|
|
||||||
country_common_words = frozenset([ 'bt', 'bb' ])
|
|
||||||
|
|
||||||
def process(mtree):
|
class GuessCountry(Transformer):
|
||||||
for node in mtree.unidentified_leaves():
|
def __init__(self):
|
||||||
if len(node.node_idx) == 2:
|
Transformer.__init__(self, -170)
|
||||||
c = node.value[1:-1].lower()
|
self.replace_language = frozenset(['uk'])
|
||||||
if c in country_common_words:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# only keep explicit groups (enclosed in parentheses/brackets)
|
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
|
||||||
if node.value[0] + node.value[-1] not in ['()', '[]', '{}']:
|
naming_opts.add_argument('-C', '--allowed-country', action='append', dest='allowed_countries',
|
||||||
continue
|
help='Allowed country (can be used multiple times)')
|
||||||
|
|
||||||
|
def supported_properties(self):
|
||||||
|
return ['country']
|
||||||
|
|
||||||
|
def should_process(self, mtree, options=None):
|
||||||
|
options = options or {}
|
||||||
|
return options.get('country', True)
|
||||||
|
|
||||||
|
def _scan_country(self, country, strict=False):
|
||||||
|
"""
|
||||||
|
Find a country if it is at the start or end of country string
|
||||||
|
"""
|
||||||
|
words_match = list(iter_words(country.lower()))
|
||||||
|
s = ""
|
||||||
|
start = None
|
||||||
|
|
||||||
|
for word_match in words_match:
|
||||||
|
if not start:
|
||||||
|
start = word_match.start(0)
|
||||||
|
s += word_match.group(0)
|
||||||
try:
|
try:
|
||||||
country = Country(c, strict=True)
|
return Country.fromguessit(s), (start, word_match.end(0))
|
||||||
except ValueError:
|
except babelfish.Error:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
node.guess = Guess(country=country, confidence=1.0, raw=c)
|
words_match.reverse()
|
||||||
|
s = ""
|
||||||
|
end = None
|
||||||
|
for word_match in words_match:
|
||||||
|
if not end:
|
||||||
|
end = word_match.end(0)
|
||||||
|
s = word_match.group(0) + s
|
||||||
|
try:
|
||||||
|
return Country.fromguessit(s), (word_match.start(0), end)
|
||||||
|
except babelfish.Error:
|
||||||
|
continue
|
||||||
|
|
||||||
|
return Country.fromguessit(country), (start, end)
|
||||||
|
|
||||||
|
def is_valid_country(self, country, options=None):
|
||||||
|
if options and options.get('allowed_countries'):
|
||||||
|
allowed_countries = options.get('allowed_countries')
|
||||||
|
return country.name.lower() in allowed_countries or country.alpha2.lower() in allowed_countries
|
||||||
|
else:
|
||||||
|
return (country.name.lower() not in LNG_COMMON_WORDS and
|
||||||
|
country.alpha2.lower() not in LNG_COMMON_WORDS)
|
||||||
|
|
||||||
|
def guess_country(self, string, node=None, options=None):
|
||||||
|
c = string.strip().lower()
|
||||||
|
if c not in LNG_COMMON_WORDS:
|
||||||
|
try:
|
||||||
|
country, country_span = self._scan_country(c, True)
|
||||||
|
if self.is_valid_country(country, options):
|
||||||
|
guess = Guess(country=country, confidence=1.0, input=node.value, span=(country_span[0] + 1, country_span[1] + 1))
|
||||||
|
return guess
|
||||||
|
except babelfish.Error:
|
||||||
|
pass
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
GuessFinder(self.guess_country, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||||
|
for node in mtree.leaves_containing('language'):
|
||||||
|
c = node.clean_value.lower()
|
||||||
|
if c in self.replace_language:
|
||||||
|
node.guess.set('language', None)
|
||||||
|
try:
|
||||||
|
country = Country.fromguessit(c)
|
||||||
|
if self.is_valid_country(country, options):
|
||||||
|
guess = Guess(country=country, confidence=0.9, input=node.value, span=node.span)
|
||||||
|
found_guess(node, guess, logger=log)
|
||||||
|
except babelfish.Error:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def post_process(self, mtree, options=None, *args, **kwargs):
|
||||||
|
# if country is in the guessed properties, make it part of the series name
|
||||||
|
series_leaves = list(mtree.leaves_containing('series'))
|
||||||
|
country_leaves = list(mtree.leaves_containing('country'))
|
||||||
|
|
||||||
|
if series_leaves and country_leaves:
|
||||||
|
country_leaf = country_leaves[0]
|
||||||
|
for serie_leaf in series_leaves:
|
||||||
|
serie_leaf.guess['series'] += ' (%s)' % str(country_leaf.guess['country'].guessit)
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,21 +18,32 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit.transfo import SingleNodeGuesser
|
|
||||||
|
from guessit.plugins.transformers import Transformer
|
||||||
|
from guessit.matcher import GuessFinder
|
||||||
from guessit.date import search_date
|
from guessit.date import search_date
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def guess_date(string):
|
class GuessDate(Transformer):
|
||||||
date, span = search_date(string)
|
def __init__(self):
|
||||||
|
Transformer.__init__(self, 50)
|
||||||
|
|
||||||
|
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
|
||||||
|
naming_opts.add_argument('-Y', '--date-year-first', action='store_true', dest='date_year_first', default=None,
|
||||||
|
help='If short date is found, consider the first digits as the year.')
|
||||||
|
naming_opts.add_argument('-D', '--date-day-first', action='store_true', dest='date_day_first', default=None,
|
||||||
|
help='If short date is found, consider the second digits as the day.')
|
||||||
|
|
||||||
|
def supported_properties(self):
|
||||||
|
return ['date']
|
||||||
|
|
||||||
|
def guess_date(self, string, node=None, options=None):
|
||||||
|
date, span = search_date(string, options.get('date_year_first') if options else False, options.get('date_day_first') if options else False)
|
||||||
if date:
|
if date:
|
||||||
return { 'date': date }, span
|
return {'date': date}, span
|
||||||
else:
|
else:
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
def process(mtree):
|
GuessFinder(self.guess_date, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||||
SingleNodeGuesser(guess_date, 1.0, log).process(mtree)
|
|
||||||
|
|
|
||||||
64
libs/guessit/transfo/guess_episode_details.py
Normal file
64
libs/guessit/transfo/guess_episode_details.py
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# GuessIt - A library for guessing information from filenames
|
||||||
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
|
#
|
||||||
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# GuessIt is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# Lesser GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the Lesser GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.plugins.transformers import Transformer
|
||||||
|
from guessit.matcher import found_guess
|
||||||
|
from guessit.containers import PropertiesContainer
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
|
||||||
|
class GuessEpisodeDetails(Transformer):
|
||||||
|
def __init__(self):
|
||||||
|
Transformer.__init__(self, -205)
|
||||||
|
self.container = PropertiesContainer()
|
||||||
|
self.container.register_property('episodeDetails', 'Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired')
|
||||||
|
self.container.register_property('episodeDetails', 'Extras?', canonical_form='Extras')
|
||||||
|
|
||||||
|
def guess_details(self, string, node=None, options=None):
|
||||||
|
properties = self.container.find_properties(string, node, options, 'episodeDetails', multiple=True)
|
||||||
|
guesses = self.container.as_guess(properties, multiple=True)
|
||||||
|
return guesses
|
||||||
|
|
||||||
|
def second_pass_options(self, mtree, options=None):
|
||||||
|
if not mtree.guess.get('type', '').startswith('episode'):
|
||||||
|
for unidentified_leaf in mtree.unidentified_leaves():
|
||||||
|
properties = self.container.find_properties(unidentified_leaf.value, unidentified_leaf, options, 'episodeDetails')
|
||||||
|
guess = self.container.as_guess(properties)
|
||||||
|
if guess:
|
||||||
|
return {'type': 'episode'}
|
||||||
|
return None
|
||||||
|
|
||||||
|
def supported_properties(self):
|
||||||
|
return self.container.get_supported_properties()
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
if (mtree.guess.get('type', '').startswith('episode') and
|
||||||
|
(not mtree.info.get('episodeNumber') or
|
||||||
|
mtree.info.get('season') == 0)):
|
||||||
|
|
||||||
|
for leaf in itertools.chain(mtree.leaves_containing('title'),
|
||||||
|
mtree.unidentified_leaves()):
|
||||||
|
guesses = self.guess_details(leaf.value, leaf, options)
|
||||||
|
for guess in guesses:
|
||||||
|
found_guess(leaf, guess, update_guess=False)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,38 +18,53 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit.transfo import found_property
|
|
||||||
from guessit.patterns import non_episode_title, unlikely_series
|
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
from guessit.plugins.transformers import Transformer, get_transformer
|
||||||
|
from guessit.textutils import reorder_title
|
||||||
|
|
||||||
|
from guessit.matcher import found_property
|
||||||
|
|
||||||
|
|
||||||
def match_from_epnum_position(mtree, node):
|
class GuessEpisodeInfoFromPosition(Transformer):
|
||||||
|
def __init__(self):
|
||||||
|
Transformer.__init__(self, -200)
|
||||||
|
|
||||||
|
def supported_properties(self):
|
||||||
|
return ['title', 'series']
|
||||||
|
|
||||||
|
def match_from_epnum_position(self, mtree, node, options):
|
||||||
epnum_idx = node.node_idx
|
epnum_idx = node.node_idx
|
||||||
|
|
||||||
# a few helper functions to be able to filter using high-level semantics
|
# a few helper functions to be able to filter using high-level semantics
|
||||||
def before_epnum_in_same_pathgroup():
|
def before_epnum_in_same_pathgroup():
|
||||||
return [ leaf for leaf in mtree.unidentified_leaves()
|
return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
|
||||||
if (leaf.node_idx[0] == epnum_idx[0] and
|
if (leaf.node_idx[0] == epnum_idx[0] and
|
||||||
leaf.node_idx[1:] < epnum_idx[1:]) ]
|
leaf.node_idx[1:] < epnum_idx[1:])]
|
||||||
|
|
||||||
def after_epnum_in_same_pathgroup():
|
def after_epnum_in_same_pathgroup():
|
||||||
return [ leaf for leaf in mtree.unidentified_leaves()
|
return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
|
||||||
if (leaf.node_idx[0] == epnum_idx[0] and
|
if (leaf.node_idx[0] == epnum_idx[0] and
|
||||||
leaf.node_idx[1:] > epnum_idx[1:]) ]
|
leaf.node_idx[1:] > epnum_idx[1:])]
|
||||||
|
|
||||||
def after_epnum_in_same_explicitgroup():
|
def after_epnum_in_same_explicitgroup():
|
||||||
return [ leaf for leaf in mtree.unidentified_leaves()
|
return [leaf for leaf in mtree.unidentified_leaves(lambda x: len(x.clean_value) > 1)
|
||||||
if (leaf.node_idx[:2] == epnum_idx[:2] and
|
if (leaf.node_idx[:2] == epnum_idx[:2] and
|
||||||
leaf.node_idx[2:] > epnum_idx[2:]) ]
|
leaf.node_idx[2:] > epnum_idx[2:])]
|
||||||
|
|
||||||
# epnumber is the first group and there are only 2 after it in same
|
# epnumber is the first group and there are only 2 after it in same
|
||||||
# path group
|
# path group
|
||||||
# -> series title - episode title
|
# -> series title - episode title
|
||||||
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
|
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
|
||||||
if n.clean_value.lower() not in non_episode_title ]
|
|
||||||
|
if ('title' not in mtree.info and # no title
|
||||||
|
'series' in mtree.info and # series present
|
||||||
|
before_epnum_in_same_pathgroup() == [] and # no groups before
|
||||||
|
len(title_candidates) == 1): # only 1 group after
|
||||||
|
|
||||||
|
found_property(title_candidates[0], 'title', confidence=0.4)
|
||||||
|
return
|
||||||
|
|
||||||
if ('title' not in mtree.info and # no title
|
if ('title' not in mtree.info and # no title
|
||||||
before_epnum_in_same_pathgroup() == [] and # no groups before
|
before_epnum_in_same_pathgroup() == [] and # no groups before
|
||||||
len(title_candidates) == 2): # only 2 groups after
|
len(title_candidates) == 2): # only 2 groups after
|
||||||
|
|
@ -65,18 +80,14 @@ def match_from_epnum_position(mtree, node):
|
||||||
found_property(series_candidates[0], 'series', confidence=0.7)
|
found_property(series_candidates[0], 'series', confidence=0.7)
|
||||||
|
|
||||||
# only 1 group after (in the same path group) and it's probably the
|
# only 1 group after (in the same path group) and it's probably the
|
||||||
# episode title
|
# episode title.
|
||||||
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
|
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
|
||||||
if n.clean_value.lower() not in non_episode_title ]
|
|
||||||
|
|
||||||
if len(title_candidates) == 1:
|
if len(title_candidates) == 1:
|
||||||
found_property(title_candidates[0], 'title', confidence=0.5)
|
found_property(title_candidates[0], 'title', confidence=0.5)
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
# try in the same explicit group, with lower confidence
|
# try in the same explicit group, with lower confidence
|
||||||
title_candidates = [ n for n in after_epnum_in_same_explicitgroup()
|
title_candidates = self._filter_candidates(after_epnum_in_same_explicitgroup(), options)
|
||||||
if n.clean_value.lower() not in non_episode_title
|
|
||||||
]
|
|
||||||
if len(title_candidates) == 1:
|
if len(title_candidates) == 1:
|
||||||
found_property(title_candidates[0], 'title', confidence=0.4)
|
found_property(title_candidates[0], 'title', confidence=0.4)
|
||||||
return
|
return
|
||||||
|
|
@ -85,8 +96,7 @@ def match_from_epnum_position(mtree, node):
|
||||||
return
|
return
|
||||||
|
|
||||||
# get the one with the longest value
|
# get the one with the longest value
|
||||||
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
|
title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup(), options)
|
||||||
if n.clean_value.lower() not in non_episode_title ]
|
|
||||||
if title_candidates:
|
if title_candidates:
|
||||||
maxidx = -1
|
maxidx = -1
|
||||||
maxv = -1
|
maxv = -1
|
||||||
|
|
@ -96,51 +106,76 @@ def match_from_epnum_position(mtree, node):
|
||||||
maxv = len(c.clean_value)
|
maxv = len(c.clean_value)
|
||||||
found_property(title_candidates[maxidx], 'title', confidence=0.3)
|
found_property(title_candidates[maxidx], 'title', confidence=0.3)
|
||||||
|
|
||||||
|
def should_process(self, mtree, options=None):
|
||||||
|
options = options or {}
|
||||||
|
return not options.get('skip_title') and mtree.guess.get('type', '').startswith('episode')
|
||||||
|
|
||||||
def process(mtree):
|
def _filter_candidates(self, candidates, options):
|
||||||
|
episode_details_transformer = get_transformer('guess_episode_details')
|
||||||
|
if episode_details_transformer:
|
||||||
|
return [n for n in candidates if not episode_details_transformer.container.find_properties(n.value, n, options, re_match=True)]
|
||||||
|
else:
|
||||||
|
return candidates
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
"""
|
||||||
|
try to identify the remaining unknown groups by looking at their
|
||||||
|
position relative to other known elements
|
||||||
|
"""
|
||||||
eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
|
eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
|
||||||
|
|
||||||
|
if not eps:
|
||||||
|
eps = [node for node in mtree.leaves() if 'date' in node.guess]
|
||||||
|
|
||||||
if eps:
|
if eps:
|
||||||
match_from_epnum_position(mtree, eps[0])
|
self.match_from_epnum_position(mtree, eps[0], options)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# if we don't have the episode number, but at least 2 groups in the
|
# if we don't have the episode number, but at least 2 groups in the
|
||||||
# basename, then it's probably series - eptitle
|
# basename, then it's probably series - eptitle
|
||||||
basename = mtree.node_at((-2,))
|
basename = mtree.node_at((-2,))
|
||||||
title_candidates = [ n for n in basename.unidentified_leaves()
|
|
||||||
if n.clean_value.lower() not in non_episode_title
|
|
||||||
]
|
|
||||||
|
|
||||||
if len(title_candidates) >= 2:
|
title_candidates = self._filter_candidates(basename.unidentified_leaves(), options)
|
||||||
found_property(title_candidates[0], 'series', 0.4)
|
|
||||||
found_property(title_candidates[1], 'title', 0.4)
|
if len(title_candidates) >= 2 and 'series' not in mtree.info:
|
||||||
|
found_property(title_candidates[0], 'series', confidence=0.4)
|
||||||
|
found_property(title_candidates[1], 'title', confidence=0.4)
|
||||||
elif len(title_candidates) == 1:
|
elif len(title_candidates) == 1:
|
||||||
# but if there's only one candidate, it's probably the series name
|
# but if there's only one candidate, it's probably the series name
|
||||||
found_property(title_candidates[0], 'series', 0.4)
|
found_property(title_candidates[0], 'series' if 'series' not in mtree.info else 'title', confidence=0.4)
|
||||||
|
|
||||||
# if we only have 1 remaining valid group in the folder containing the
|
# if we only have 1 remaining valid group in the folder containing the
|
||||||
# file, then it's likely that it is the series name
|
# file, then it's likely that it is the series name
|
||||||
try:
|
try:
|
||||||
series_candidates = mtree.node_at((-3,)).unidentified_leaves()
|
series_candidates = list(mtree.node_at((-3,)).unidentified_leaves())
|
||||||
except ValueError:
|
except ValueError:
|
||||||
series_candidates = []
|
series_candidates = []
|
||||||
|
|
||||||
if len(series_candidates) == 1:
|
if len(series_candidates) == 1:
|
||||||
found_property(series_candidates[0], 'series', 0.3)
|
found_property(series_candidates[0], 'series', confidence=0.3)
|
||||||
|
|
||||||
# if there's a path group that only contains the season info, then the
|
# if there's a path group that only contains the season info, then the
|
||||||
# previous one is most likely the series title (ie: ../series/season X/..)
|
# previous one is most likely the series title (ie: ../series/season X/..)
|
||||||
eps = [ node for node in mtree.nodes()
|
eps = [node for node in mtree.nodes()
|
||||||
if 'season' in node.guess and 'episodeNumber' not in node.guess ]
|
if 'season' in node.guess and 'episodeNumber' not in node.guess]
|
||||||
|
|
||||||
if eps:
|
if eps:
|
||||||
previous = [ node for node in mtree.unidentified_leaves()
|
previous = [node for node in mtree.unidentified_leaves()
|
||||||
if node.node_idx[0] == eps[0].node_idx[0] - 1 ]
|
if node.node_idx[0] == eps[0].node_idx[0] - 1]
|
||||||
if len(previous) == 1:
|
if len(previous) == 1:
|
||||||
found_property(previous[0], 'series', 0.5)
|
found_property(previous[0], 'series', confidence=0.5)
|
||||||
|
|
||||||
# reduce the confidence of unlikely series
|
# If we have found title without any serie name, replace it by the serie name.
|
||||||
|
if 'series' not in mtree.info and 'title' in mtree.info:
|
||||||
|
title_leaf = mtree.first_leaf_containing('title')
|
||||||
|
metadata = title_leaf.guess.metadata('title')
|
||||||
|
value = title_leaf.guess['title']
|
||||||
|
del title_leaf.guess['title']
|
||||||
|
title_leaf.guess.set('series', value, metadata=metadata)
|
||||||
|
|
||||||
|
def post_process(self, mtree, options=None):
|
||||||
for node in mtree.nodes():
|
for node in mtree.nodes():
|
||||||
if 'series' in node.guess:
|
if 'series' not in node.guess:
|
||||||
if node.guess['series'].lower() in unlikely_series:
|
continue
|
||||||
new_confidence = node.guess.confidence('series') * 0.5
|
|
||||||
node.guess.set_confidence('series', new_confidence)
|
node.guess['series'] = reorder_title(node.guess['series'])
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,49 +18,176 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit import Guess
|
|
||||||
from guessit.transfo import SingleNodeGuesser
|
from guessit.plugins.transformers import Transformer
|
||||||
from guessit.patterns import episode_rexps
|
from guessit.matcher import GuessFinder
|
||||||
|
from guessit.patterns import sep, build_or_pattern
|
||||||
|
from guessit.containers import PropertiesContainer, WeakValidator, NoValidator, ChainedValidator, DefaultValidator, \
|
||||||
|
FormatterValidator
|
||||||
|
from guessit.patterns.numeral import numeral, digital_numeral, parse_numeral
|
||||||
import re
|
import re
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
def number_list(s):
|
|
||||||
l = [ int(n) for n in re.sub('[^0-9]+', ' ', s).split() ]
|
|
||||||
|
|
||||||
if len(l) == 2:
|
|
||||||
# it is an episode interval, return all numbers in between
|
|
||||||
return range(l[0], l[1]+1)
|
|
||||||
|
|
||||||
return l
|
|
||||||
|
|
||||||
def guess_episodes_rexps(string):
|
|
||||||
for rexp, confidence, span_adjust in episode_rexps:
|
|
||||||
match = re.search(rexp, string, re.IGNORECASE)
|
|
||||||
if match:
|
|
||||||
span = (match.start() + span_adjust[0],
|
|
||||||
match.end() + span_adjust[1])
|
|
||||||
guess = Guess(match.groupdict(), confidence=confidence, raw=string[span[0]:span[1]])
|
|
||||||
|
|
||||||
# decide whether we have only a single episode number or an
|
|
||||||
# episode list
|
|
||||||
if guess.get('episodeNumber'):
|
|
||||||
eplist = number_list(guess['episodeNumber'])
|
|
||||||
guess.set('episodeNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]])
|
|
||||||
|
|
||||||
if len(eplist) > 1:
|
|
||||||
guess.set('episodeList', eplist, confidence=confidence, raw=string[span[0]:span[1]])
|
|
||||||
|
|
||||||
if guess.get('bonusNumber'):
|
|
||||||
eplist = number_list(guess['bonusNumber'])
|
|
||||||
guess.set('bonusNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]])
|
|
||||||
|
|
||||||
return guess, span
|
|
||||||
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
|
|
||||||
def process(mtree):
|
class GuessEpisodesRexps(Transformer):
|
||||||
SingleNodeGuesser(guess_episodes_rexps, None, log).process(mtree)
|
def __init__(self):
|
||||||
|
Transformer.__init__(self, 20)
|
||||||
|
|
||||||
|
range_separators = ['-', 'to', 'a']
|
||||||
|
discrete_separators = ['&', 'and', 'et']
|
||||||
|
of_separators = ['of', 'sur', '/', '\\']
|
||||||
|
|
||||||
|
season_words = ['seasons?', 'saisons?', 'series?']
|
||||||
|
episode_words = ['episodes?']
|
||||||
|
|
||||||
|
season_markers = ['s']
|
||||||
|
episode_markers = ['e', 'ep']
|
||||||
|
|
||||||
|
discrete_sep = sep
|
||||||
|
for range_separator in range_separators:
|
||||||
|
discrete_sep = discrete_sep.replace(range_separator, '')
|
||||||
|
discrete_separators.append(discrete_sep)
|
||||||
|
all_separators = list(range_separators)
|
||||||
|
all_separators.extend(discrete_separators)
|
||||||
|
|
||||||
|
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
|
||||||
|
|
||||||
|
range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE)
|
||||||
|
discrete_separators_re = re.compile(build_or_pattern(discrete_separators), re.IGNORECASE)
|
||||||
|
all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE)
|
||||||
|
of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)
|
||||||
|
|
||||||
|
season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE)
|
||||||
|
episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE)
|
||||||
|
|
||||||
|
season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE)
|
||||||
|
episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE)
|
||||||
|
|
||||||
|
def list_parser(value, property_list_name, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False):
|
||||||
|
discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value))
|
||||||
|
discrete_elements = [x.strip() for x in discrete_elements]
|
||||||
|
|
||||||
|
proper_discrete_elements = []
|
||||||
|
i = 0
|
||||||
|
while i < len(discrete_elements):
|
||||||
|
if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]):
|
||||||
|
proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2])
|
||||||
|
i += 3
|
||||||
|
else:
|
||||||
|
match = range_separators_re.search(discrete_elements[i])
|
||||||
|
if match and match.start() == 0:
|
||||||
|
proper_discrete_elements[i-1] = proper_discrete_elements[i-1] + discrete_elements[i]
|
||||||
|
elif match and match.end() == len(discrete_elements[i]):
|
||||||
|
proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1])
|
||||||
|
else:
|
||||||
|
proper_discrete_elements.append(discrete_elements[i])
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
discrete_elements = proper_discrete_elements
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
for discrete_element in discrete_elements:
|
||||||
|
range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element))
|
||||||
|
range_values = [x.strip() for x in range_values]
|
||||||
|
if len(range_values) > 1:
|
||||||
|
for x in range(0, len(range_values) - 1):
|
||||||
|
start_range_ep = parse_numeral(range_values[x])
|
||||||
|
end_range_ep = parse_numeral(range_values[x+1])
|
||||||
|
for range_ep in range(start_range_ep, end_range_ep + 1):
|
||||||
|
if range_ep not in ret:
|
||||||
|
ret.append(range_ep)
|
||||||
|
else:
|
||||||
|
discrete_value = parse_numeral(discrete_element)
|
||||||
|
if discrete_value not in ret:
|
||||||
|
ret.append(discrete_value)
|
||||||
|
|
||||||
|
if len(ret) > 1:
|
||||||
|
if not allow_discrete:
|
||||||
|
valid_ret = list()
|
||||||
|
# replace discrete elements by ranges
|
||||||
|
valid_ret.append(ret[0])
|
||||||
|
for i in range(0, len(ret) - 1):
|
||||||
|
previous = valid_ret[len(valid_ret) - 1]
|
||||||
|
if ret[i+1] < previous:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
valid_ret.append(ret[i+1])
|
||||||
|
ret = valid_ret
|
||||||
|
if fill_gaps:
|
||||||
|
ret = list(range(min(ret), max(ret) + 1))
|
||||||
|
if len(ret) > 1:
|
||||||
|
return {None: ret[0], property_list_name: ret}
|
||||||
|
if len(ret) > 0:
|
||||||
|
return ret[0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def episode_parser_x(value):
|
||||||
|
return list_parser(value, 'episodeList', discrete_separators_re=re.compile('x', re.IGNORECASE))
|
||||||
|
|
||||||
|
def episode_parser_e(value):
|
||||||
|
return list_parser(value, 'episodeList', discrete_separators_re=re.compile('e', re.IGNORECASE), fill_gaps=True)
|
||||||
|
|
||||||
|
def episode_parser(value):
|
||||||
|
return list_parser(value, 'episodeList')
|
||||||
|
|
||||||
|
def season_parser(value):
|
||||||
|
return list_parser(value, 'seasonList')
|
||||||
|
|
||||||
|
class ResolutionCollisionValidator(object):
|
||||||
|
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
return len(match.group(2)) < 3 # limit
|
||||||
|
|
||||||
|
self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral)
|
||||||
|
self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={None: parse_numeral, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), FormatterValidator('season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False)))
|
||||||
|
|
||||||
|
self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser}, validator=NoValidator())
|
||||||
|
# self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
|
||||||
|
self.container.register_property(None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
|
||||||
|
self.container.register_property(None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
|
||||||
|
self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'season': season_parser}, validator=NoValidator())
|
||||||
|
|
||||||
|
self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral)
|
||||||
|
self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral)
|
||||||
|
self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral)
|
||||||
|
|
||||||
|
|
||||||
|
self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
|
||||||
|
self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
|
||||||
|
|
||||||
|
self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
|
||||||
|
self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser})
|
||||||
|
|
||||||
|
|
||||||
|
self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral)
|
||||||
|
self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral)
|
||||||
|
self.container.register_property('episodeNumber', r'^ ?0(\d{1,2})' + sep, confidence=0.4, formatter=parse_numeral)
|
||||||
|
self.container.register_property('episodeNumber', sep + r'(\d{2}) ?$', confidence=0.4, formatter=parse_numeral)
|
||||||
|
self.container.register_property('episodeNumber', sep + r'0(\d{1,2}) ?$', confidence=0.4, formatter=parse_numeral)
|
||||||
|
|
||||||
|
self.container.register_property(None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral)
|
||||||
|
self.container.register_property(None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
|
||||||
|
self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral)
|
||||||
|
self.container.register_property(None, r'((?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral)
|
||||||
|
|
||||||
|
self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())
|
||||||
|
|
||||||
|
self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<other>xAll))', confidence=1.0, formatter={None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
|
||||||
|
|
||||||
|
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
|
||||||
|
naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number', default=False,
|
||||||
|
help='Guess "serie.213.avi" as the episodeNumber 213. Without this option, '
|
||||||
|
'it will be guessed as season 2, episodeNumber 13')
|
||||||
|
|
||||||
|
def supported_properties(self):
|
||||||
|
return ['episodeNumber', 'season', 'episodeList', 'seasonList', 'episodeCount', 'seasonCount', 'version', 'other']
|
||||||
|
|
||||||
|
def guess_episodes_rexps(self, string, node=None, options=None):
|
||||||
|
found = self.container.find_properties(string, node, options)
|
||||||
|
return self.container.as_guess(found, string)
|
||||||
|
|
||||||
|
def should_process(self, mtree, options=None):
|
||||||
|
return mtree.guess.get('type', '').startswith('episode')
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,173 +18,196 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit import Guess
|
|
||||||
from guessit.patterns import (subtitle_exts, info_exts, video_exts, episode_rexps,
|
import mimetypes
|
||||||
find_properties, compute_canonical_form)
|
|
||||||
from guessit.date import valid_year
|
|
||||||
from guessit.textutils import clean_string
|
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
import mimetypes
|
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
from guessit.guess import Guess
|
||||||
|
from guessit.patterns.extension import subtitle_exts, info_exts, video_exts
|
||||||
|
from guessit.transfo import TransformerException
|
||||||
|
from guessit.plugins.transformers import Transformer, get_transformer
|
||||||
|
from guessit.matcher import log_found_guess, found_guess, found_property
|
||||||
|
|
||||||
# List of well known movies and series, hardcoded because they cannot be
|
|
||||||
# guessed appropriately otherwise
|
|
||||||
MOVIES = [ 'OSS 117' ]
|
|
||||||
SERIES = [ 'Band of Brothers' ]
|
|
||||||
|
|
||||||
MOVIES = [ m.lower() for m in MOVIES ]
|
class GuessFiletype(Transformer):
|
||||||
SERIES = [ s.lower() for s in SERIES ]
|
def __init__(self):
|
||||||
|
Transformer.__init__(self, 200)
|
||||||
|
|
||||||
|
# List of well known movies and series, hardcoded because they cannot be
|
||||||
|
# guessed appropriately otherwise
|
||||||
|
MOVIES = ['OSS 117']
|
||||||
|
SERIES = ['Band of Brothers']
|
||||||
|
|
||||||
|
MOVIES = [m.lower() for m in MOVIES]
|
||||||
|
SERIES = [s.lower() for s in SERIES]
|
||||||
|
|
||||||
|
def guess_filetype(self, mtree, options=None):
|
||||||
|
options = options or {}
|
||||||
|
|
||||||
def guess_filetype(mtree, filetype):
|
|
||||||
# put the filetype inside a dummy container to be able to have the
|
# put the filetype inside a dummy container to be able to have the
|
||||||
# following functions work correctly as closures
|
# following functions work correctly as closures
|
||||||
# this is a workaround for python 2 which doesn't have the
|
# this is a workaround for python 2 which doesn't have the
|
||||||
# 'nonlocal' keyword (python 3 does have it)
|
# 'nonlocal' keyword which we could use here in the upgrade_* functions
|
||||||
filetype_container = [filetype]
|
# (python 3 does have it)
|
||||||
|
filetype_container = [mtree.guess.get('type')]
|
||||||
other = {}
|
other = {}
|
||||||
filename = mtree.string
|
filename = mtree.string
|
||||||
|
|
||||||
def upgrade_episode():
|
def upgrade_episode():
|
||||||
if filetype_container[0] == 'video':
|
if filetype_container[0] == 'subtitle':
|
||||||
filetype_container[0] = 'episode'
|
|
||||||
elif filetype_container[0] == 'subtitle':
|
|
||||||
filetype_container[0] = 'episodesubtitle'
|
filetype_container[0] = 'episodesubtitle'
|
||||||
elif filetype_container[0] == 'info':
|
elif filetype_container[0] == 'info':
|
||||||
filetype_container[0] = 'episodeinfo'
|
filetype_container[0] = 'episodeinfo'
|
||||||
|
elif (not filetype_container[0] or
|
||||||
|
filetype_container[0] == 'video'):
|
||||||
|
filetype_container[0] = 'episode'
|
||||||
|
|
||||||
def upgrade_movie():
|
def upgrade_movie():
|
||||||
if filetype_container[0] == 'video':
|
if filetype_container[0] == 'subtitle':
|
||||||
filetype_container[0] = 'movie'
|
|
||||||
elif filetype_container[0] == 'subtitle':
|
|
||||||
filetype_container[0] = 'moviesubtitle'
|
filetype_container[0] = 'moviesubtitle'
|
||||||
elif filetype_container[0] == 'info':
|
elif filetype_container[0] == 'info':
|
||||||
filetype_container[0] = 'movieinfo'
|
filetype_container[0] = 'movieinfo'
|
||||||
|
elif (not filetype_container[0] or
|
||||||
|
filetype_container[0] == 'video'):
|
||||||
|
filetype_container[0] = 'movie'
|
||||||
|
|
||||||
def upgrade_subtitle():
|
def upgrade_subtitle():
|
||||||
if 'movie' in filetype_container[0]:
|
if filetype_container[0] == 'movie':
|
||||||
filetype_container[0] = 'moviesubtitle'
|
filetype_container[0] = 'moviesubtitle'
|
||||||
elif 'episode' in filetype_container[0]:
|
elif filetype_container[0] == 'episode':
|
||||||
filetype_container[0] = 'episodesubtitle'
|
filetype_container[0] = 'episodesubtitle'
|
||||||
else:
|
elif not filetype_container[0]:
|
||||||
filetype_container[0] = 'subtitle'
|
filetype_container[0] = 'subtitle'
|
||||||
|
|
||||||
def upgrade_info():
|
def upgrade_info():
|
||||||
if 'movie' in filetype_container[0]:
|
if filetype_container[0] == 'movie':
|
||||||
filetype_container[0] = 'movieinfo'
|
filetype_container[0] = 'movieinfo'
|
||||||
elif 'episode' in filetype_container[0]:
|
elif filetype_container[0] == 'episode':
|
||||||
filetype_container[0] = 'episodeinfo'
|
filetype_container[0] = 'episodeinfo'
|
||||||
else:
|
elif not filetype_container[0]:
|
||||||
filetype_container[0] = 'info'
|
filetype_container[0] = 'info'
|
||||||
|
|
||||||
def upgrade(type='unknown'):
|
|
||||||
if filetype_container[0] == 'autodetect':
|
|
||||||
filetype_container[0] = type
|
|
||||||
|
|
||||||
|
|
||||||
# look at the extension first
|
# look at the extension first
|
||||||
fileext = os.path.splitext(filename)[1][1:].lower()
|
fileext = os.path.splitext(filename)[1][1:].lower()
|
||||||
if fileext in subtitle_exts:
|
if fileext in subtitle_exts:
|
||||||
upgrade_subtitle()
|
upgrade_subtitle()
|
||||||
other = { 'container': fileext }
|
other = {'container': fileext}
|
||||||
elif fileext in info_exts:
|
elif fileext in info_exts:
|
||||||
upgrade_info()
|
upgrade_info()
|
||||||
other = { 'container': fileext }
|
other = {'container': fileext}
|
||||||
elif fileext in video_exts:
|
elif fileext in video_exts:
|
||||||
upgrade(type='video')
|
other = {'container': fileext}
|
||||||
other = { 'container': fileext }
|
|
||||||
else:
|
else:
|
||||||
upgrade(type='unknown')
|
if fileext and not options.get('name_only'):
|
||||||
other = { 'extension': fileext }
|
other = {'extension': fileext}
|
||||||
|
list(mtree.unidentified_leaves())[-1].guess = Guess(other)
|
||||||
|
|
||||||
|
|
||||||
# check whether we are in a 'Movies', 'Tv Shows', ... folder
|
# check whether we are in a 'Movies', 'Tv Shows', ... folder
|
||||||
folder_rexps = [ (r'Movies?', upgrade_movie),
|
folder_rexps = [(r'Movies?', upgrade_movie),
|
||||||
|
(r'Films?', upgrade_movie),
|
||||||
(r'Tv[ _-]?Shows?', upgrade_episode),
|
(r'Tv[ _-]?Shows?', upgrade_episode),
|
||||||
(r'Series', upgrade_episode)
|
(r'Series?', upgrade_episode),
|
||||||
]
|
(r'Episodes?', upgrade_episode)]
|
||||||
for frexp, upgrade_func in folder_rexps:
|
for frexp, upgrade_func in folder_rexps:
|
||||||
frexp = re.compile(frexp, re.IGNORECASE)
|
frexp = re.compile(frexp, re.IGNORECASE)
|
||||||
for pathgroup in mtree.children:
|
for pathgroup in mtree.children:
|
||||||
if frexp.match(pathgroup.value):
|
if frexp.match(pathgroup.value):
|
||||||
upgrade_func()
|
upgrade_func()
|
||||||
|
return filetype_container[0], other
|
||||||
|
|
||||||
# check for a few specific cases which will unintentionally make the
|
# check for a few specific cases which will unintentionally make the
|
||||||
# following heuristics confused (eg: OSS 117 will look like an episode,
|
# following heuristics confused (eg: OSS 117 will look like an episode,
|
||||||
# season 1, epnum 17, when it is in fact a movie)
|
# season 1, epnum 17, when it is in fact a movie)
|
||||||
fname = clean_string(filename).lower()
|
fname = mtree.clean_string(filename).lower()
|
||||||
for m in MOVIES:
|
for m in self.MOVIES:
|
||||||
if m in fname:
|
if m in fname:
|
||||||
log.debug('Found in exception list of movies -> type = movie')
|
self.log.debug('Found in exception list of movies -> type = movie')
|
||||||
upgrade_movie()
|
upgrade_movie()
|
||||||
for s in SERIES:
|
return filetype_container[0], other
|
||||||
|
for s in self.SERIES:
|
||||||
if s in fname:
|
if s in fname:
|
||||||
log.debug('Found in exception list of series -> type = episode')
|
self.log.debug('Found in exception list of series -> type = episode')
|
||||||
upgrade_episode()
|
upgrade_episode()
|
||||||
|
return filetype_container[0], other
|
||||||
|
|
||||||
# now look whether there are some specific hints for episode vs movie
|
|
||||||
if filetype_container[0] in ('video', 'subtitle', 'info'):
|
|
||||||
# if we have an episode_rexp (eg: s02e13), it is an episode
|
# if we have an episode_rexp (eg: s02e13), it is an episode
|
||||||
for rexp, _, _ in episode_rexps:
|
episode_transformer = get_transformer('guess_episodes_rexps')
|
||||||
match = re.search(rexp, filename, re.IGNORECASE)
|
if episode_transformer:
|
||||||
if match:
|
filename_parts = list(x.value for x in mtree.unidentified_leaves());
|
||||||
log.debug('Found matching regexp: "%s" (string = "%s") -> type = episode', rexp, match.group())
|
filename_parts.append(filename)
|
||||||
upgrade_episode()
|
for filename_part in filename_parts:
|
||||||
break
|
guess = episode_transformer.guess_episodes_rexps(filename_part)
|
||||||
|
if guess:
|
||||||
# if we have a 3-4 digit number that's not a year, maybe an episode
|
self.log.debug('Found guess_episodes_rexps: %s -> type = episode', guess)
|
||||||
match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename)
|
|
||||||
if match:
|
|
||||||
fullnumber = int(match.group()[1:-1])
|
|
||||||
#season = fullnumber // 100
|
|
||||||
epnumber = fullnumber % 100
|
|
||||||
possible = True
|
|
||||||
|
|
||||||
# check for validity
|
|
||||||
if epnumber > 40:
|
|
||||||
possible = False
|
|
||||||
if valid_year(fullnumber):
|
|
||||||
possible = False
|
|
||||||
|
|
||||||
if possible:
|
|
||||||
log.debug('Found possible episode number: %s (from string "%s") -> type = episode', epnumber, match.group())
|
|
||||||
upgrade_episode()
|
upgrade_episode()
|
||||||
|
return filetype_container[0], other
|
||||||
|
|
||||||
|
properties_transformer = get_transformer('guess_properties')
|
||||||
|
if properties_transformer:
|
||||||
# if we have certain properties characteristic of episodes, it is an ep
|
# if we have certain properties characteristic of episodes, it is an ep
|
||||||
for prop, value, _, _ in find_properties(filename):
|
found = properties_transformer.container.find_properties(filename, mtree, options, 'episodeFormat')
|
||||||
log.debug('prop: %s = %s' % (prop, value))
|
guess = properties_transformer.container.as_guess(found, filename)
|
||||||
if prop == 'episodeFormat':
|
if guess:
|
||||||
log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
|
self.log.debug('Found characteristic property of episodes: %s"', guess)
|
||||||
upgrade_episode()
|
upgrade_episode()
|
||||||
break
|
return filetype_container[0], other
|
||||||
|
|
||||||
elif compute_canonical_form('format', value) == 'DVB':
|
weak_episode_transformer = get_transformer('guess_weak_episodes_rexps')
|
||||||
log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
|
if weak_episode_transformer:
|
||||||
|
found = properties_transformer.container.find_properties(filename, mtree, options, 'crc32')
|
||||||
|
guess = properties_transformer.container.as_guess(found, filename)
|
||||||
|
if guess:
|
||||||
|
found = weak_episode_transformer.container.find_properties(filename, mtree, options)
|
||||||
|
guess = weak_episode_transformer.container.as_guess(found, filename)
|
||||||
|
if guess:
|
||||||
|
self.log.debug('Found characteristic property of episodes: %s"', guess)
|
||||||
upgrade_episode()
|
upgrade_episode()
|
||||||
break
|
return filetype_container[0], other
|
||||||
|
|
||||||
|
found = properties_transformer.container.find_properties(filename, mtree, options, 'format')
|
||||||
|
guess = properties_transformer.container.as_guess(found, filename)
|
||||||
|
if guess and guess['format'] in ('HDTV', 'WEBRip', 'WEB-DL', 'DVB'):
|
||||||
|
# Use weak episodes only if TV or WEB source
|
||||||
|
weak_episode_transformer = get_transformer('guess_weak_episodes_rexps')
|
||||||
|
if weak_episode_transformer:
|
||||||
|
guess = weak_episode_transformer.guess_weak_episodes_rexps(filename)
|
||||||
|
if guess:
|
||||||
|
self.log.debug('Found guess_weak_episodes_rexps: %s -> type = episode', guess)
|
||||||
|
upgrade_episode()
|
||||||
|
return filetype_container[0], other
|
||||||
|
|
||||||
|
website_transformer = get_transformer('guess_website')
|
||||||
|
if website_transformer:
|
||||||
|
found = website_transformer.container.find_properties(filename, mtree, options, 'website')
|
||||||
|
guess = website_transformer.container.as_guess(found, filename)
|
||||||
|
if guess:
|
||||||
|
for namepart in ('tv', 'serie', 'episode'):
|
||||||
|
if namepart in guess['website']:
|
||||||
# origin-specific type
|
# origin-specific type
|
||||||
if 'tvu.org.ru' in filename:
|
self.log.debug('Found characteristic property of episodes: %s', guess)
|
||||||
log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
|
|
||||||
upgrade_episode()
|
upgrade_episode()
|
||||||
|
return filetype_container[0], other
|
||||||
|
|
||||||
|
if filetype_container[0] in ('subtitle', 'info') or (not filetype_container[0] and fileext in video_exts):
|
||||||
# if no episode info found, assume it's a movie
|
# if no episode info found, assume it's a movie
|
||||||
log.debug('Nothing characteristic found, assuming type = movie')
|
self.log.debug('Nothing characteristic found, assuming type = movie')
|
||||||
upgrade_movie()
|
upgrade_movie()
|
||||||
|
|
||||||
filetype = filetype_container[0]
|
if not filetype_container[0]:
|
||||||
return filetype, other
|
self.log.debug('Nothing characteristic found, assuming type = unknown')
|
||||||
|
filetype_container[0] = 'unknown'
|
||||||
|
|
||||||
|
return filetype_container[0], other
|
||||||
|
|
||||||
def process(mtree, filetype='autodetect'):
|
def process(self, mtree, options=None):
|
||||||
filetype, other = guess_filetype(mtree, filetype)
|
"""guess the file type now (will be useful later)
|
||||||
|
"""
|
||||||
|
filetype, other = self.guess_filetype(mtree, options)
|
||||||
|
|
||||||
mtree.guess.set('type', filetype, confidence=1.0)
|
mtree.guess.set('type', filetype, confidence=1.0)
|
||||||
log.debug('Found with confidence %.2f: %s' % (1.0, mtree.guess))
|
log_found_guess(mtree.guess)
|
||||||
|
|
||||||
filetype_info = Guess(other, confidence=1.0)
|
filetype_info = Guess(other, confidence=1.0)
|
||||||
# guess the mimetype of the filename
|
# guess the mimetype of the filename
|
||||||
|
|
@ -195,5 +218,20 @@ def process(mtree, filetype='autodetect'):
|
||||||
filetype_info.update({'mimetype': mime}, confidence=1.0)
|
filetype_info.update({'mimetype': mime}, confidence=1.0)
|
||||||
|
|
||||||
node_ext = mtree.node_at((-1,))
|
node_ext = mtree.node_at((-1,))
|
||||||
node_ext.guess = filetype_info
|
found_guess(node_ext, filetype_info)
|
||||||
log.debug('Found with confidence %.2f: %s' % (1.0, node_ext.guess))
|
|
||||||
|
if mtree.guess.get('type') in [None, 'unknown']:
|
||||||
|
if options.get('name_only'):
|
||||||
|
mtree.guess.set('type', 'movie', confidence=0.6)
|
||||||
|
else:
|
||||||
|
raise TransformerException(__name__, 'Unknown file type')
|
||||||
|
|
||||||
|
def post_process(self, mtree, options=None):
|
||||||
|
# now look whether there are some specific hints for episode vs movie
|
||||||
|
# If we have a date and no year, this is a TV Show.
|
||||||
|
if 'date' in mtree.info and 'year' not in mtree.info and mtree.info.get('type') != 'episode':
|
||||||
|
mtree.guess['type'] = 'episode'
|
||||||
|
for type_leaves in mtree.leaves_containing('type'):
|
||||||
|
type_leaves.guess['type'] = 'episode'
|
||||||
|
for title_leaves in mtree.leaves_containing('title'):
|
||||||
|
title_leaves.guess.rename('title', 'series')
|
||||||
|
|
@ -18,40 +18,47 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit.transfo import SingleNodeGuesser
|
|
||||||
from guessit.patterns import find_properties
|
from guessit.plugins.transformers import Transformer
|
||||||
|
from guessit.matcher import GuessFinder
|
||||||
import re
|
import re
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
_DIGIT = 0
|
||||||
|
_LETTER = 1
|
||||||
|
_OTHER = 2
|
||||||
|
|
||||||
|
|
||||||
def guess_properties(string):
|
class GuessIdnumber(Transformer):
|
||||||
try:
|
def __init__(self):
|
||||||
prop, value, pos, end = find_properties(string)[0]
|
Transformer.__init__(self, 220)
|
||||||
return { prop: value }, (pos, end)
|
|
||||||
except IndexError:
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
_idnum = re.compile(r'(?P<idNumber>[a-zA-Z0-9-]{10,})') # 1.0, (0, 0))
|
def supported_properties(self):
|
||||||
|
return ['idNumber']
|
||||||
|
|
||||||
def guess_idnumber(string):
|
_idnum = re.compile(r'(?P<idNumber>[a-zA-Z0-9-]{20,})') # 1.0, (0, 0))
|
||||||
match = _idnum.search(string)
|
|
||||||
|
def guess_idnumber(self, string, node=None, options=None):
|
||||||
|
match = self._idnum.search(string)
|
||||||
if match is not None:
|
if match is not None:
|
||||||
result = match.groupdict()
|
result = match.groupdict()
|
||||||
switch_count = 0
|
switch_count = 0
|
||||||
DIGIT = 0
|
switch_letter_count = 0;
|
||||||
LETTER = 1
|
letter_count = 0;
|
||||||
OTHER = 2
|
last_letter = None
|
||||||
last = LETTER
|
|
||||||
|
last = _LETTER
|
||||||
for c in result['idNumber']:
|
for c in result['idNumber']:
|
||||||
if c in '0123456789':
|
if c in '0123456789':
|
||||||
ci = DIGIT
|
ci = _DIGIT
|
||||||
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
||||||
ci = LETTER
|
ci = _LETTER
|
||||||
|
if c != last_letter:
|
||||||
|
switch_letter_count += 1
|
||||||
|
last_letter = c
|
||||||
|
letter_count += 1
|
||||||
else:
|
else:
|
||||||
ci = OTHER
|
ci = _OTHER
|
||||||
|
|
||||||
if ci != last:
|
if ci != last:
|
||||||
switch_count += 1
|
switch_count += 1
|
||||||
|
|
@ -59,13 +66,14 @@ def guess_idnumber(string):
|
||||||
last = ci
|
last = ci
|
||||||
|
|
||||||
switch_ratio = float(switch_count) / len(result['idNumber'])
|
switch_ratio = float(switch_count) / len(result['idNumber'])
|
||||||
|
letters_ratio = (float(switch_letter_count) / letter_count) if letter_count > 0 else 1
|
||||||
|
|
||||||
# only return the result as probable if we alternate often between
|
# only return the result as probable if we alternate often between
|
||||||
# char type (more likely for hash values than for common words)
|
# char type (more likely for hash values than for common words)
|
||||||
if switch_ratio > 0.4:
|
if switch_ratio > 0.4 and letters_ratio > 0.4:
|
||||||
return result, match.span()
|
return result, match.span()
|
||||||
|
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
def process(mtree):
|
def process(self, mtree, options=None):
|
||||||
SingleNodeGuesser(guess_idnumber, 0.4, log).process(mtree)
|
GuessFinder(self.guess_idnumber, 0.4, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,38 +18,169 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit import Guess
|
|
||||||
from guessit.transfo import SingleNodeGuesser
|
|
||||||
from guessit.language import search_language
|
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
from guessit.language import search_language, subtitle_prefixes, subtitle_suffixes
|
||||||
|
from guessit.patterns.extension import subtitle_exts
|
||||||
|
from guessit.textutils import find_words
|
||||||
|
from guessit.plugins.transformers import Transformer
|
||||||
|
from guessit.matcher import GuessFinder
|
||||||
|
|
||||||
|
|
||||||
def guess_language(string, node, skip=None):
|
class GuessLanguage(Transformer):
|
||||||
if skip:
|
def __init__(self):
|
||||||
relative_skip = []
|
Transformer.__init__(self, 30)
|
||||||
for entry in skip:
|
|
||||||
node_idx = entry['node_idx']
|
|
||||||
span = entry['span']
|
|
||||||
if node_idx == node.node_idx[:len(node_idx)]:
|
|
||||||
relative_span = (span[0] - node.offset + 1, span[1] - node.offset + 1)
|
|
||||||
relative_skip.append(relative_span)
|
|
||||||
skip = relative_skip
|
|
||||||
|
|
||||||
language, span, confidence = search_language(string, skip=skip)
|
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
|
||||||
if language:
|
naming_opts.add_argument('-L', '--allowed-languages', action='append', dest='allowed_languages',
|
||||||
return (Guess({'language': language},
|
help='Allowed language (can be used multiple times)')
|
||||||
confidence=confidence,
|
|
||||||
raw= string[span[0]:span[1]]),
|
|
||||||
span)
|
|
||||||
|
|
||||||
return None, None
|
def supported_properties(self):
|
||||||
|
return ['language', 'subtitleLanguage']
|
||||||
|
|
||||||
guess_language.use_node = True
|
def guess_language(self, string, node=None, options=None):
|
||||||
|
allowed_languages = None
|
||||||
|
if options and 'allowed_languages' in options:
|
||||||
|
allowed_languages = options.get('allowed_languages')
|
||||||
|
guess = search_language(string, allowed_languages)
|
||||||
|
return guess
|
||||||
|
|
||||||
|
def _skip_language_on_second_pass(self, mtree, node):
|
||||||
|
"""Check if found node is a valid language node, or if it's a false positive.
|
||||||
|
|
||||||
def process(mtree, *args, **kwargs):
|
:param mtree: Tree detected on first pass.
|
||||||
SingleNodeGuesser(guess_language, None, log, *args, **kwargs).process(mtree)
|
:type mtree: :class:`guessit.matchtree.MatchTree`
|
||||||
# Note: 'language' is promoted to 'subtitleLanguage' in the post_process transfo
|
:param node: Node that contains a language Guess
|
||||||
|
:type node: :class:`guessit.matchtree.MatchTree`
|
||||||
|
|
||||||
|
:return: True if a second pass skipping this node is required
|
||||||
|
:rtype: bool
|
||||||
|
"""
|
||||||
|
unidentified_starts = {}
|
||||||
|
unidentified_ends = {}
|
||||||
|
|
||||||
|
property_starts = {}
|
||||||
|
property_ends = {}
|
||||||
|
|
||||||
|
title_starts = {}
|
||||||
|
title_ends = {}
|
||||||
|
|
||||||
|
for unidentified_node in mtree.unidentified_leaves():
|
||||||
|
unidentified_starts[unidentified_node.span[0]] = unidentified_node
|
||||||
|
unidentified_ends[unidentified_node.span[1]] = unidentified_node
|
||||||
|
|
||||||
|
for property_node in mtree.leaves_containing('year'):
|
||||||
|
property_starts[property_node.span[0]] = property_node
|
||||||
|
property_ends[property_node.span[1]] = property_node
|
||||||
|
|
||||||
|
for title_node in mtree.leaves_containing(['title', 'series']):
|
||||||
|
title_starts[title_node.span[0]] = title_node
|
||||||
|
title_ends[title_node.span[1]] = title_node
|
||||||
|
|
||||||
|
return node.span[0] in title_ends.keys() and (node.span[1] in unidentified_starts.keys() or node.span[1] + 1 in property_starts.keys()) or\
|
||||||
|
node.span[1] in title_starts.keys() and (node.span[0] == node.group_node().span[0] or node.span[0] in unidentified_ends.keys() or node.span[0] in property_ends.keys())
|
||||||
|
|
||||||
|
def second_pass_options(self, mtree, options=None):
|
||||||
|
m = mtree.matched()
|
||||||
|
to_skip_language_nodes = []
|
||||||
|
|
||||||
|
for lang_key in ('language', 'subtitleLanguage'):
|
||||||
|
langs = {}
|
||||||
|
lang_nodes = set(mtree.leaves_containing(lang_key))
|
||||||
|
|
||||||
|
for lang_node in lang_nodes:
|
||||||
|
lang = lang_node.guess.get(lang_key, None)
|
||||||
|
if self._skip_language_on_second_pass(mtree, lang_node):
|
||||||
|
# Language probably split the title. Add to skip for 2nd pass.
|
||||||
|
|
||||||
|
# if filetype is subtitle and the language appears last, just before
|
||||||
|
# the extension, then it is likely a subtitle language
|
||||||
|
parts = mtree.clean_string(lang_node.root.value).split()
|
||||||
|
if m.get('type') in ['moviesubtitle', 'episodesubtitle']:
|
||||||
|
if lang_node.value in parts and \
|
||||||
|
(parts.index(lang_node.value) == len(parts) - 2):
|
||||||
|
continue
|
||||||
|
to_skip_language_nodes.append(lang_node)
|
||||||
|
elif lang not in langs:
|
||||||
|
langs[lang] = lang_node
|
||||||
|
else:
|
||||||
|
# The same language was found. Keep the more confident one,
|
||||||
|
# and add others to skip for 2nd pass.
|
||||||
|
existing_lang_node = langs[lang]
|
||||||
|
to_skip = None
|
||||||
|
if (existing_lang_node.guess.confidence('language') >=
|
||||||
|
lang_node.guess.confidence('language')):
|
||||||
|
# lang_node is to remove
|
||||||
|
to_skip = lang_node
|
||||||
|
else:
|
||||||
|
# existing_lang_node is to remove
|
||||||
|
langs[lang] = lang_node
|
||||||
|
to_skip = existing_lang_node
|
||||||
|
to_skip_language_nodes.append(to_skip)
|
||||||
|
|
||||||
|
if to_skip_language_nodes:
|
||||||
|
# Also skip same value nodes
|
||||||
|
skipped_values = [skip_node.value for skip_node in to_skip_language_nodes]
|
||||||
|
|
||||||
|
for lang_key in ('language', 'subtitleLanguage'):
|
||||||
|
lang_nodes = set(mtree.leaves_containing(lang_key))
|
||||||
|
|
||||||
|
for lang_node in lang_nodes:
|
||||||
|
if lang_node not in to_skip_language_nodes and lang_node.value in skipped_values:
|
||||||
|
to_skip_language_nodes.append(lang_node)
|
||||||
|
return {'skip_nodes': to_skip_language_nodes}
|
||||||
|
return None
|
||||||
|
|
||||||
|
def should_process(self, mtree, options=None):
|
||||||
|
options = options or {}
|
||||||
|
return options.get('language', True)
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
GuessFinder(self.guess_language, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||||
|
|
||||||
|
def promote_subtitle(self, node):
|
||||||
|
if 'language' in node.guess:
|
||||||
|
node.guess.set('subtitleLanguage', node.guess['language'],
|
||||||
|
confidence=node.guess.confidence('language'))
|
||||||
|
del node.guess['language']
|
||||||
|
|
||||||
|
def post_process(self, mtree, options=None):
|
||||||
|
# 1- try to promote language to subtitle language where it makes sense
|
||||||
|
for node in mtree.nodes():
|
||||||
|
if 'language' not in node.guess:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# - if we matched a language in a file with a sub extension and that
|
||||||
|
# the group is the last group of the filename, it is probably the
|
||||||
|
# language of the subtitle
|
||||||
|
# (eg: 'xxx.english.srt')
|
||||||
|
if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
|
||||||
|
node == list(mtree.leaves())[-2]):
|
||||||
|
self.promote_subtitle(node)
|
||||||
|
|
||||||
|
# - if we find in the same explicit group
|
||||||
|
# a subtitle prefix before the language,
|
||||||
|
# or a subtitle suffix after the language,
|
||||||
|
# then upgrade the language
|
||||||
|
explicit_group = mtree.node_at(node.node_idx[:2])
|
||||||
|
group_str = explicit_group.value.lower()
|
||||||
|
|
||||||
|
for sub_prefix in subtitle_prefixes:
|
||||||
|
if (sub_prefix in find_words(group_str) and
|
||||||
|
0 <= group_str.find(sub_prefix) < (node.span[0] - explicit_group.span[0])):
|
||||||
|
self.promote_subtitle(node)
|
||||||
|
|
||||||
|
for sub_suffix in subtitle_suffixes:
|
||||||
|
if (sub_suffix in find_words(group_str) and
|
||||||
|
(node.span[0] - explicit_group.span[0]) < group_str.find(sub_suffix)):
|
||||||
|
self.promote_subtitle(node)
|
||||||
|
|
||||||
|
# - if a language is in an explicit group just preceded by "st",
|
||||||
|
# it is a subtitle language (eg: '...st[fr-eng]...')
|
||||||
|
try:
|
||||||
|
idx = node.node_idx
|
||||||
|
previous = list(mtree.node_at((idx[0], idx[1] - 1)).leaves())[-1]
|
||||||
|
if previous.value.lower()[-2:] == 'st':
|
||||||
|
self.promote_subtitle(node)
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,45 +18,51 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit import Guess
|
|
||||||
import unicodedata
|
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
from guessit.plugins.transformers import Transformer
|
||||||
|
from guessit.matcher import found_property
|
||||||
|
from guessit import u
|
||||||
|
|
||||||
|
|
||||||
def process(mtree):
|
class GuessMovieTitleFromPosition(Transformer):
|
||||||
def found_property(node, name, value, confidence):
|
def __init__(self):
|
||||||
node.guess = Guess({ name: value },
|
Transformer.__init__(self, -200)
|
||||||
confidence=confidence,
|
|
||||||
raw=value)
|
|
||||||
log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
|
|
||||||
|
|
||||||
def found_title(node, confidence):
|
def supported_properties(self):
|
||||||
found_property(node, 'title', node.clean_value, confidence)
|
return ['title']
|
||||||
|
|
||||||
|
def should_process(self, mtree, options=None):
|
||||||
|
options = options or {}
|
||||||
|
return not options.get('skip_title') and not mtree.guess.get('type', '').startswith('episode')
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
"""
|
||||||
|
try to identify the remaining unknown groups by looking at their
|
||||||
|
position relative to other known elements
|
||||||
|
"""
|
||||||
|
if 'title' in mtree.info:
|
||||||
|
return
|
||||||
|
|
||||||
basename = mtree.node_at((-2,))
|
basename = mtree.node_at((-2,))
|
||||||
all_valid = lambda leaf: len(leaf.clean_value) > 0
|
all_valid = lambda leaf: len(leaf.clean_value) > 0
|
||||||
basename_leftover = basename.unidentified_leaves(valid=all_valid)
|
basename_leftover = list(basename.unidentified_leaves(valid=all_valid))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
folder = mtree.node_at((-3,))
|
folder = mtree.node_at((-3,))
|
||||||
folder_leftover = folder.unidentified_leaves()
|
folder_leftover = list(folder.unidentified_leaves())
|
||||||
except ValueError:
|
except ValueError:
|
||||||
folder = None
|
folder = None
|
||||||
folder_leftover = []
|
folder_leftover = []
|
||||||
|
|
||||||
log.debug('folder: %s' % folder_leftover)
|
self.log.debug('folder: %s' % u(folder_leftover))
|
||||||
log.debug('basename: %s' % basename_leftover)
|
self.log.debug('basename: %s' % u(basename_leftover))
|
||||||
|
|
||||||
# specific cases:
|
# specific cases:
|
||||||
# if we find the same group both in the folder name and the filename,
|
# if we find the same group both in the folder name and the filename,
|
||||||
# it's a good candidate for title
|
# it's a good candidate for title
|
||||||
if (folder_leftover and basename_leftover and
|
if folder_leftover and basename_leftover and folder_leftover[0].clean_value == basename_leftover[0].clean_value:
|
||||||
folder_leftover[0].clean_value == basename_leftover[0].clean_value):
|
found_property(folder_leftover[0], 'title', confidence=0.8)
|
||||||
|
|
||||||
found_title(folder_leftover[0], confidence=0.8)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# specific cases:
|
# specific cases:
|
||||||
|
|
@ -64,61 +70,52 @@ def process(mtree):
|
||||||
# group, and the folder only contains 1 unidentified one, then we have
|
# group, and the folder only contains 1 unidentified one, then we have
|
||||||
# a series
|
# a series
|
||||||
# ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
|
# ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
|
||||||
try:
|
if len(folder_leftover) > 0 and len(basename_leftover) > 1:
|
||||||
series = folder_leftover[0]
|
series = folder_leftover[0]
|
||||||
filmNumber = basename_leftover[0]
|
film_number = basename_leftover[0]
|
||||||
title = basename_leftover[1]
|
title = basename_leftover[1]
|
||||||
|
|
||||||
basename_leaves = basename.leaves()
|
basename_leaves = list(basename.leaves())
|
||||||
|
|
||||||
num = int(filmNumber.clean_value)
|
num = None
|
||||||
|
try:
|
||||||
|
num = int(film_number.clean_value)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
log.debug('series: %s' % series.clean_value)
|
if num:
|
||||||
log.debug('title: %s' % title.clean_value)
|
self.log.debug('series: %s' % series.clean_value)
|
||||||
|
self.log.debug('title: %s' % title.clean_value)
|
||||||
if (series.clean_value != title.clean_value and
|
if (series.clean_value != title.clean_value and
|
||||||
series.clean_value != filmNumber.clean_value and
|
series.clean_value != film_number.clean_value and
|
||||||
basename_leaves.index(filmNumber) == 0 and
|
basename_leaves.index(film_number) == 0 and
|
||||||
basename_leaves.index(title) == 1):
|
basename_leaves.index(title) == 1):
|
||||||
|
|
||||||
found_title(title, confidence=0.6)
|
found_property(title, 'title', confidence=0.6)
|
||||||
found_property(series, 'filmSeries',
|
found_property(series, 'filmSeries', confidence=0.6)
|
||||||
series.clean_value, confidence=0.6)
|
found_property(film_number, 'filmNumber', num, confidence=0.6)
|
||||||
found_property(filmNumber, 'filmNumber',
|
|
||||||
num, confidence=0.6)
|
|
||||||
return
|
return
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# specific cases:
|
|
||||||
# - movies/tttttt (yyyy)/tttttt.ccc
|
|
||||||
try:
|
|
||||||
if mtree.node_at((-4, 0)).value.lower() == 'movies':
|
|
||||||
folder = mtree.node_at((-3,))
|
|
||||||
|
|
||||||
# Note:too generic, might solve all the unittests as they all
|
|
||||||
# contain 'movies' in their path
|
|
||||||
#
|
|
||||||
#if containing_folder.is_leaf() and not containing_folder.guess:
|
|
||||||
# containing_folder.guess =
|
|
||||||
# Guess({ 'title': clean_string(containing_folder.value) },
|
|
||||||
# confidence=0.7)
|
|
||||||
|
|
||||||
|
if folder:
|
||||||
year_group = folder.first_leaf_containing('year')
|
year_group = folder.first_leaf_containing('year')
|
||||||
|
if year_group:
|
||||||
groups_before = folder.previous_unidentified_leaves(year_group)
|
groups_before = folder.previous_unidentified_leaves(year_group)
|
||||||
|
if groups_before:
|
||||||
found_title(groups_before[0], confidence=0.8)
|
try:
|
||||||
|
node = next(groups_before)
|
||||||
|
found_property(node, 'title', confidence=0.8)
|
||||||
return
|
return
|
||||||
|
except StopIteration:
|
||||||
except Exception:
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# if we have either format or videoCodec in the folder containing the file
|
# if we have either format or videoCodec in the folder containing the
|
||||||
# or one of its parents, then we should probably look for the title in
|
# file or one of its parents, then we should probably look for the title
|
||||||
# there rather than in the basename
|
# in there rather than in the basename
|
||||||
try:
|
try:
|
||||||
props = mtree.previous_leaves_containing(mtree.children[-2],
|
props = list(mtree.previous_leaves_containing(mtree.children[-2],
|
||||||
[ 'videoCodec', 'format',
|
['videoCodec',
|
||||||
'language' ])
|
'format',
|
||||||
|
'language']))
|
||||||
except IndexError:
|
except IndexError:
|
||||||
props = []
|
props = []
|
||||||
|
|
||||||
|
|
@ -127,48 +124,50 @@ def process(mtree):
|
||||||
if all(g.node_idx[0] == group_idx for g in props):
|
if all(g.node_idx[0] == group_idx for g in props):
|
||||||
# if they're all in the same group, take leftover info from there
|
# if they're all in the same group, take leftover info from there
|
||||||
leftover = mtree.node_at((group_idx,)).unidentified_leaves()
|
leftover = mtree.node_at((group_idx,)).unidentified_leaves()
|
||||||
|
try:
|
||||||
if leftover:
|
found_property(next(leftover), 'title', confidence=0.7)
|
||||||
found_title(leftover[0], confidence=0.7)
|
|
||||||
return
|
return
|
||||||
|
except StopIteration:
|
||||||
|
pass
|
||||||
|
|
||||||
# look for title in basename if there are some remaining undidentified
|
# look for title in basename if there are some remaining unidentified
|
||||||
# groups there
|
# groups there
|
||||||
if basename_leftover:
|
if basename_leftover:
|
||||||
title_candidate = basename_leftover[0]
|
|
||||||
|
|
||||||
# if basename is only one word and the containing folder has at least
|
# if basename is only one word and the containing folder has at least
|
||||||
# 3 words in it, we should take the title from the folder name
|
# 3 words in it, we should take the title from the folder name
|
||||||
# ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
|
# ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
|
||||||
# ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here?
|
# ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here?
|
||||||
if (title_candidate.clean_value.count(' ') == 0 and
|
if (basename_leftover[0].clean_value.count(' ') == 0 and
|
||||||
folder_leftover and
|
folder_leftover and folder_leftover[0].clean_value.count(' ') >= 2):
|
||||||
folder_leftover[0].clean_value.count(' ') >= 2):
|
|
||||||
|
|
||||||
found_title(folder_leftover[0], confidence=0.7)
|
found_property(folder_leftover[0], 'title', confidence=0.7)
|
||||||
return
|
return
|
||||||
|
|
||||||
# if there are only 2 unidentified groups, the first of which is inside
|
# if there are only many unidentified groups, take the first of which is
|
||||||
# brackets or parentheses, we take the second one for the title:
|
# not inside brackets or parentheses.
|
||||||
# ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
|
# ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
|
||||||
if len(basename_leftover) == 2 and basename_leftover[0].is_explicit():
|
if basename_leftover[0].is_explicit():
|
||||||
found_title(basename_leftover[1], confidence=0.8)
|
for basename_leftover_elt in basename_leftover:
|
||||||
|
if not basename_leftover_elt.is_explicit():
|
||||||
|
found_property(basename_leftover_elt, 'title', confidence=0.8)
|
||||||
return
|
return
|
||||||
|
|
||||||
# if all else fails, take the first remaining unidentified group in the
|
# if all else fails, take the first remaining unidentified group in the
|
||||||
# basename as title
|
# basename as title
|
||||||
found_title(title_candidate, confidence=0.6)
|
found_property(basename_leftover[0], 'title', confidence=0.6)
|
||||||
return
|
return
|
||||||
|
|
||||||
# if there are no leftover groups in the basename, look in the folder name
|
# if there are no leftover groups in the basename, look in the folder name
|
||||||
if folder_leftover:
|
if folder_leftover:
|
||||||
found_title(folder_leftover[0], confidence=0.5)
|
found_property(folder_leftover[0], 'title', confidence=0.5)
|
||||||
return
|
return
|
||||||
|
|
||||||
# if nothing worked, look if we have a very small group at the beginning
|
# if nothing worked, look if we have a very small group at the beginning
|
||||||
# of the basename
|
# of the basename
|
||||||
basename = mtree.node_at((-2,))
|
basename = mtree.node_at((-2,))
|
||||||
basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
|
basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
|
||||||
if basename_leftover:
|
try:
|
||||||
found_title(basename_leftover[0], confidence=0.4)
|
found_property(next(basename_leftover), 'title', confidence=0.4)
|
||||||
return
|
return
|
||||||
|
except StopIteration:
|
||||||
|
pass
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,21 +18,271 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit.transfo import SingleNodeGuesser
|
|
||||||
from guessit.patterns import find_properties
|
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
from guessit.containers import PropertiesContainer, WeakValidator, LeavesValidator, QualitiesContainer, NoValidator, \
|
||||||
|
ChainedValidator, DefaultValidator, OnlyOneValidator, LeftValidator, NeighborValidator
|
||||||
|
from guessit.patterns import sep, build_or_pattern
|
||||||
|
from guessit.patterns.extension import subtitle_exts, video_exts, info_exts
|
||||||
|
from guessit.patterns.numeral import numeral, parse_numeral
|
||||||
|
from guessit.plugins.transformers import Transformer
|
||||||
|
from guessit.matcher import GuessFinder, found_property
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
def guess_properties(string):
|
class GuessProperties(Transformer):
|
||||||
try:
|
def __init__(self):
|
||||||
prop, value, pos, end = find_properties(string)[0]
|
Transformer.__init__(self, 35)
|
||||||
return { prop: value }, (pos, end)
|
|
||||||
except IndexError:
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
|
self.container = PropertiesContainer()
|
||||||
|
self.qualities = QualitiesContainer()
|
||||||
|
|
||||||
def process(mtree):
|
def register_property(propname, props, **kwargs):
|
||||||
SingleNodeGuesser(guess_properties, 1.0, log).process(mtree)
|
"""props a dict of {value: [patterns]}"""
|
||||||
|
for canonical_form, patterns in props.items():
|
||||||
|
if isinstance(patterns, tuple):
|
||||||
|
patterns2, pattern_kwarg = patterns
|
||||||
|
if kwargs:
|
||||||
|
current_kwarg = dict(kwargs)
|
||||||
|
current_kwarg.update(pattern_kwarg)
|
||||||
|
else:
|
||||||
|
current_kwarg = dict(pattern_kwarg)
|
||||||
|
current_kwarg['canonical_form'] = canonical_form
|
||||||
|
self.container.register_property(propname, *patterns2, **current_kwarg)
|
||||||
|
elif kwargs:
|
||||||
|
current_kwarg = dict(kwargs)
|
||||||
|
current_kwarg['canonical_form'] = canonical_form
|
||||||
|
self.container.register_property(propname, *patterns, **current_kwarg)
|
||||||
|
else:
|
||||||
|
self.container.register_property(propname, *patterns, canonical_form=canonical_form)
|
||||||
|
|
||||||
|
def register_quality(propname, quality_dict):
|
||||||
|
"""props a dict of {canonical_form: quality}"""
|
||||||
|
for canonical_form, quality in quality_dict.items():
|
||||||
|
self.qualities.register_quality(propname, canonical_form, quality)
|
||||||
|
|
||||||
|
register_property('container', {'mp4': ['MP4']})
|
||||||
|
|
||||||
|
# http://en.wikipedia.org/wiki/Pirated_movie_release_types
|
||||||
|
register_property('format', {'VHS': ['VHS', 'VHS-Rip'],
|
||||||
|
'Cam': ['CAM', 'CAMRip', 'HD-CAM'],
|
||||||
|
#'Telesync': ['TELESYNC', 'PDVD'],
|
||||||
|
'Telesync': (['TS', 'HD-TS'], {'confidence': 0.4}),
|
||||||
|
'Workprint': ['WORKPRINT', 'WP'],
|
||||||
|
'Telecine': ['TELECINE', 'TC'],
|
||||||
|
'PPV': ['PPV', 'PPV-Rip'], # Pay Per View
|
||||||
|
'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'],
|
||||||
|
'DVB': ['DVB-Rip', 'DVB', 'PD-TV'],
|
||||||
|
'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS', 'DVD-R', 'DVD-9', 'DVD-5'],
|
||||||
|
'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP'],
|
||||||
|
'VOD': ['VOD', 'VOD-Rip'],
|
||||||
|
'WEBRip': ['WEB-Rip'],
|
||||||
|
'WEB-DL': ['WEB-DL', 'WEB-HD', 'WEB'],
|
||||||
|
'HD-DVD': ['HD-(?:DVD)?-Rip', 'HD-DVD'],
|
||||||
|
'BluRay': ['Blu-ray(?:-Rip)?', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50']
|
||||||
|
})
|
||||||
|
|
||||||
|
register_quality('format', {'VHS': -100,
|
||||||
|
'Cam': -90,
|
||||||
|
'Telesync': -80,
|
||||||
|
'Workprint': -70,
|
||||||
|
'Telecine': -60,
|
||||||
|
'PPV': -50,
|
||||||
|
'TV': -30,
|
||||||
|
'DVB': -20,
|
||||||
|
'DVD': 0,
|
||||||
|
'HDTV': 20,
|
||||||
|
'VOD': 40,
|
||||||
|
'WEBRip': 50,
|
||||||
|
'WEB-DL': 60,
|
||||||
|
'HD-DVD': 80,
|
||||||
|
'BluRay': 100
|
||||||
|
})
|
||||||
|
|
||||||
|
register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'],
|
||||||
|
'368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'],
|
||||||
|
'480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'],
|
||||||
|
#'480p': (['hr'], {'confidence': 0.2}), # duplicate dict key
|
||||||
|
'576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'],
|
||||||
|
'720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'],
|
||||||
|
'900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'],
|
||||||
|
'1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'],
|
||||||
|
'1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080p?x?'],
|
||||||
|
'4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)']
|
||||||
|
},
|
||||||
|
validator=ChainedValidator(DefaultValidator(), OnlyOneValidator()))
|
||||||
|
|
||||||
|
class ResolutionValidator(object):
|
||||||
|
"""Make sure our match is surrounded by separators, or by another entry"""
|
||||||
|
def validate(self, prop, string, node, match, entry_start, entry_end):
|
||||||
|
"""
|
||||||
|
span = _get_span(prop, match)
|
||||||
|
span = _trim_span(span, string[span[0]:span[1]])
|
||||||
|
start, end = span
|
||||||
|
|
||||||
|
sep_start = start <= 0 or string[start - 1] in sep
|
||||||
|
sep_end = end >= len(string) or string[end] in sep
|
||||||
|
start_by_other = start in entry_end
|
||||||
|
end_by_other = end in entry_start
|
||||||
|
if (sep_start or start_by_other) and (sep_end or end_by_other):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
_digits_re = re.compile('\d+')
|
||||||
|
|
||||||
|
def resolution_formatter(value):
|
||||||
|
digits = _digits_re.findall(value)
|
||||||
|
return 'x'.join(digits)
|
||||||
|
|
||||||
|
self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter, validator=ChainedValidator(DefaultValidator(), ResolutionValidator()))
|
||||||
|
|
||||||
|
register_quality('screenSize', {'360p': -300,
|
||||||
|
'368p': -200,
|
||||||
|
'480p': -100,
|
||||||
|
'576p': 0,
|
||||||
|
'720p': 100,
|
||||||
|
'900p': 130,
|
||||||
|
'1080i': 180,
|
||||||
|
'1080p': 200,
|
||||||
|
'4K': 400
|
||||||
|
})
|
||||||
|
|
||||||
|
_videoCodecProperty = {'Real': ['Rv\d{2}'], # http://en.wikipedia.org/wiki/RealVideo
|
||||||
|
'Mpeg2': ['Mpeg2'],
|
||||||
|
'DivX': ['DVDivX', 'DivX'],
|
||||||
|
'XviD': ['XviD'],
|
||||||
|
'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'],
|
||||||
|
'h265': ['[hx]-265(?:-HEVC)?', 'HEVC']
|
||||||
|
}
|
||||||
|
|
||||||
|
register_property('videoCodec', _videoCodecProperty)
|
||||||
|
|
||||||
|
register_quality('videoCodec', {'Real': -50,
|
||||||
|
'Mpeg2': -30,
|
||||||
|
'DivX': -10,
|
||||||
|
'XviD': 0,
|
||||||
|
'h264': 100,
|
||||||
|
'h265': 150
|
||||||
|
})
|
||||||
|
|
||||||
|
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
|
||||||
|
# http://fr.wikipedia.org/wiki/H.264
|
||||||
|
self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||||
|
self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||||
|
self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||||
|
self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||||
|
self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit')
|
||||||
|
self.container.register_property('videoProfile', '8.?bit', canonical_form='8bit')
|
||||||
|
self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||||
|
self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess]))
|
||||||
|
|
||||||
|
register_quality('videoProfile', {'BP': -20,
|
||||||
|
'XP': -10,
|
||||||
|
'MP': 0,
|
||||||
|
'HP': 10,
|
||||||
|
'10bit': 15,
|
||||||
|
'Hi422P': 25,
|
||||||
|
'Hi444PP': 35
|
||||||
|
})
|
||||||
|
|
||||||
|
# has nothing to do here (or on filenames for that matter), but some
|
||||||
|
# releases use it and it helps to identify release groups, so we adapt
|
||||||
|
register_property('videoApi', {'DXVA': ['DXVA']})
|
||||||
|
|
||||||
|
register_property('audioCodec', {'MP3': ['MP3', 'LAME', 'LAME(?:\d)+-(?:\d)+'],
|
||||||
|
'DolbyDigital': ['DD'],
|
||||||
|
'AAC': ['AAC'],
|
||||||
|
'AC3': ['AC3'],
|
||||||
|
'Flac': ['FLAC'],
|
||||||
|
'DTS': (['DTS'], {'validator': LeftValidator()}),
|
||||||
|
'TrueHD': ['True-HD']
|
||||||
|
})
|
||||||
|
|
||||||
|
register_quality('audioCodec', {'MP3': 10,
|
||||||
|
'DolbyDigital': 30,
|
||||||
|
'AAC': 35,
|
||||||
|
'AC3': 40,
|
||||||
|
'Flac': 45,
|
||||||
|
'DTS': 60,
|
||||||
|
'TrueHD': 70
|
||||||
|
})
|
||||||
|
|
||||||
|
self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
|
||||||
|
self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS']))
|
||||||
|
self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
|
||||||
|
self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC']))
|
||||||
|
self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3']))
|
||||||
|
|
||||||
|
register_quality('audioProfile', {'HD': 20,
|
||||||
|
'HDMA': 50,
|
||||||
|
'LC': 0,
|
||||||
|
'HQ': 0,
|
||||||
|
'HE': 20
|
||||||
|
})
|
||||||
|
|
||||||
|
register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch', '8ch'],
|
||||||
|
'5.1': ['5[\W_]1', '5ch', '6ch'],
|
||||||
|
'2.0': ['2[\W_]0', '2ch', 'stereo'],
|
||||||
|
'1.0': ['1[\W_]0', '1ch', 'mono']
|
||||||
|
})
|
||||||
|
|
||||||
|
register_quality('audioChannels', {'7.1': 200,
|
||||||
|
'5.1': 100,
|
||||||
|
'2.0': 0,
|
||||||
|
'1.0': -100
|
||||||
|
})
|
||||||
|
|
||||||
|
self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode')
|
||||||
|
|
||||||
|
self.container.register_property('crc32', '(?:[a-fA-F]|[0-9]){8}', enhance=False, canonical_from_pattern=False)
|
||||||
|
|
||||||
|
weak_episode_words = ['pt', 'part']
|
||||||
|
self.container.register_property(None, '(' + build_or_pattern(weak_episode_words) + sep + '?(?P<part>' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral)
|
||||||
|
|
||||||
|
register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'],
|
||||||
|
'SyncFix': ['Sync-Fix', 'Sync-Fixed'],
|
||||||
|
'DualAudio': ['Dual-Audio'],
|
||||||
|
'WideScreen': ['ws', 'wide-screen'],
|
||||||
|
'Netflix': ['Netflix', 'NF']
|
||||||
|
})
|
||||||
|
|
||||||
|
self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=NeighborValidator())
|
||||||
|
self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form='Proper')
|
||||||
|
self.container.register_property('other', 'Fansub', canonical_form='Fansub')
|
||||||
|
self.container.register_property('other', 'Fastsub', canonical_form='Fastsub')
|
||||||
|
self.container.register_property('other', '(?:Seasons?' + sep + '?)?Complete', canonical_form='Complete')
|
||||||
|
self.container.register_property('other', 'R5', 'RC', canonical_form='R5')
|
||||||
|
self.container.register_property('other', 'Pre-Air', 'Preair', canonical_form='Preair')
|
||||||
|
|
||||||
|
self.container.register_canonical_properties('other', 'Screener', 'Remux', '3D', 'HD', 'mHD', 'HDLight', 'HQ',
|
||||||
|
'DDC',
|
||||||
|
'HR', 'PAL', 'SECAM', 'NTSC')
|
||||||
|
self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator())
|
||||||
|
|
||||||
|
for prop in self.container.get_properties('format'):
|
||||||
|
self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener')
|
||||||
|
|
||||||
|
for exts in (subtitle_exts, info_exts, video_exts):
|
||||||
|
for container in exts:
|
||||||
|
self.container.register_property('container', container, confidence=0.3)
|
||||||
|
|
||||||
|
def guess_properties(self, string, node=None, options=None):
|
||||||
|
found = self.container.find_properties(string, node, options)
|
||||||
|
return self.container.as_guess(found, string)
|
||||||
|
|
||||||
|
def supported_properties(self):
|
||||||
|
return self.container.get_supported_properties()
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||||
|
proper_count = 0
|
||||||
|
for other_leaf in mtree.leaves_containing('other'):
|
||||||
|
if 'other' in other_leaf.info and 'Proper' in other_leaf.info['other']:
|
||||||
|
proper_count += 1
|
||||||
|
if proper_count:
|
||||||
|
found_property(mtree, 'properCount', proper_count)
|
||||||
|
|
||||||
|
def rate_quality(self, guess, *props):
|
||||||
|
return self.qualities.rate_quality(guess, *props)
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,69 +18,187 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit.transfo import SingleNodeGuesser
|
|
||||||
from guessit.patterns import prop_multi, compute_canonical_form, _dash, _psep
|
from guessit.plugins.transformers import Transformer
|
||||||
|
from guessit.matcher import GuessFinder, build_guess
|
||||||
|
from guessit.containers import PropertiesContainer
|
||||||
|
from guessit.patterns import sep
|
||||||
|
from guessit.guess import Guess
|
||||||
|
from guessit.textutils import strip_brackets
|
||||||
import re
|
import re
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
def get_patterns(property_name):
|
|
||||||
return [ p.replace(_dash, _psep) for patterns in prop_multi[property_name].values() for p in patterns ]
|
|
||||||
|
|
||||||
CODECS = get_patterns('videoCodec')
|
|
||||||
FORMATS = get_patterns('format')
|
|
||||||
VAPIS = get_patterns('videoApi')
|
|
||||||
|
|
||||||
# RG names following a codec or format, with a potential space or dash inside the name
|
|
||||||
GROUP_NAMES = [ r'(?P<videoCodec>' + codec + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
|
|
||||||
for codec in CODECS ]
|
|
||||||
GROUP_NAMES += [ r'(?P<format>' + fmt + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
|
|
||||||
for fmt in FORMATS ]
|
|
||||||
GROUP_NAMES += [ r'(?P<videoApi>' + api + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
|
|
||||||
for api in VAPIS ]
|
|
||||||
|
|
||||||
GROUP_NAMES2 = [ r'\.(?P<videoCodec>' + codec + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
|
|
||||||
for codec in CODECS ]
|
|
||||||
GROUP_NAMES2 += [ r'\.(?P<format>' + fmt + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
|
|
||||||
for fmt in FORMATS ]
|
|
||||||
GROUP_NAMES2 += [ r'\.(?P<videoApi>' + vapi + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
|
|
||||||
for vapi in VAPIS ]
|
|
||||||
|
|
||||||
GROUP_NAMES = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES ]
|
|
||||||
GROUP_NAMES2 = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES2 ]
|
|
||||||
|
|
||||||
def adjust_metadata(md):
|
|
||||||
return dict((property_name, compute_canonical_form(property_name, value) or value)
|
|
||||||
for property_name, value in md.items())
|
|
||||||
|
|
||||||
|
|
||||||
def guess_release_group(string):
|
class GuessReleaseGroup(Transformer):
|
||||||
# first try to see whether we have both a known codec and a known release group
|
def __init__(self):
|
||||||
for rexp in GROUP_NAMES:
|
Transformer.__init__(self, -190)
|
||||||
match = rexp.search(string)
|
|
||||||
while match:
|
|
||||||
metadata = match.groupdict()
|
|
||||||
# make sure this is an actual release group we caught
|
|
||||||
release_group = (compute_canonical_form('releaseGroup', metadata['releaseGroup']) or
|
|
||||||
compute_canonical_form('weakReleaseGroup', metadata['releaseGroup']))
|
|
||||||
if release_group:
|
|
||||||
return adjust_metadata(metadata), (match.start(1), match.end(2))
|
|
||||||
|
|
||||||
# we didn't find anything conclusive, keep searching
|
self.container = PropertiesContainer(canonical_from_pattern=False)
|
||||||
match = rexp.search(string, match.span()[0]+1)
|
self._allowed_groupname_pattern = '[\w@#€£$&!\?]'
|
||||||
|
self._forbidden_groupname_lambda = [lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'],
|
||||||
|
lambda elt: self._is_number(elt)]
|
||||||
|
# If the previous property in this list, the match will be considered as safe
|
||||||
|
# and group name can contain a separator.
|
||||||
|
self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels', 'other']
|
||||||
|
self.previous_safe_values = {'other': ['Complete']}
|
||||||
|
self.next_safe_properties = ['extension', 'website']
|
||||||
|
self.next_safe_values = {'format': ['Telesync']}
|
||||||
|
self.container.sep_replace_char = '-'
|
||||||
|
self.container.canonical_from_pattern = False
|
||||||
|
self.container.enhance = True
|
||||||
|
self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+')
|
||||||
|
self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+')
|
||||||
|
self.re_sep = re.compile('(' + sep + ')')
|
||||||
|
|
||||||
# pick anything as releaseGroup as long as we have a codec in front
|
def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options):
|
||||||
# this doesn't include a potential dash ('-') ending the release group
|
naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group',
|
||||||
# eg: [...].X264-HiS@SiLUHD-English.[...]
|
help='Expected release group (can be used multiple times)')
|
||||||
for rexp in GROUP_NAMES2:
|
|
||||||
match = rexp.search(string)
|
|
||||||
if match:
|
|
||||||
return adjust_metadata(match.groupdict()), (match.start(1), match.end(2))
|
|
||||||
|
|
||||||
return None, None
|
def supported_properties(self):
|
||||||
|
return self.container.get_supported_properties()
|
||||||
|
|
||||||
|
def _is_number(self, s):
|
||||||
|
try:
|
||||||
|
int(s)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
def process(mtree):
|
def validate_group_name(self, guess):
|
||||||
SingleNodeGuesser(guess_release_group, 0.8, log).process(mtree)
|
val = guess['releaseGroup']
|
||||||
|
if len(val) > 1:
|
||||||
|
checked_val = ""
|
||||||
|
forbidden = False
|
||||||
|
for elt in self.re_sep.split(val): # separators are in the list because of capturing group
|
||||||
|
if forbidden:
|
||||||
|
# Previous was forbidden, don't had separator
|
||||||
|
forbidden = False
|
||||||
|
continue
|
||||||
|
for forbidden_lambda in self._forbidden_groupname_lambda:
|
||||||
|
forbidden = forbidden_lambda(elt.lower())
|
||||||
|
if forbidden:
|
||||||
|
if checked_val:
|
||||||
|
# Removing previous separator
|
||||||
|
checked_val = checked_val[0:len(checked_val) - 1]
|
||||||
|
break
|
||||||
|
if not forbidden:
|
||||||
|
checked_val += elt
|
||||||
|
|
||||||
|
val = checked_val
|
||||||
|
if not val:
|
||||||
|
return False
|
||||||
|
if self.re_sep.match(val[-1]):
|
||||||
|
val = val[:len(val)-1]
|
||||||
|
if self.re_sep.match(val[0]):
|
||||||
|
val = val[1:]
|
||||||
|
guess['releaseGroup'] = val
|
||||||
|
forbidden = False
|
||||||
|
for forbidden_lambda in self._forbidden_groupname_lambda:
|
||||||
|
forbidden = forbidden_lambda(val.lower())
|
||||||
|
if forbidden:
|
||||||
|
break
|
||||||
|
if not forbidden:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_leaf_previous(self, leaf, node):
|
||||||
|
if leaf.span[1] <= node.span[0]:
|
||||||
|
for idx in range(leaf.span[1], node.span[0]):
|
||||||
|
if leaf.root.value[idx] not in sep:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def validate_next_leaves(self, node):
|
||||||
|
if 'series' in node.root.info or 'title' in node.root.info:
|
||||||
|
# --expected-series or --expected-title is used.
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Make sure to avoid collision with 'series' or 'title' guessed later. Should be more precise.
|
||||||
|
leaves = node.root.unidentified_leaves()
|
||||||
|
return len(list(leaves)) > 1
|
||||||
|
|
||||||
|
def validate_node(self, leaf, node, safe=False):
|
||||||
|
if not self.is_leaf_previous(leaf, node):
|
||||||
|
return False
|
||||||
|
if not self.validate_next_leaves(node):
|
||||||
|
return False
|
||||||
|
if safe:
|
||||||
|
for k, v in leaf.guess.items():
|
||||||
|
if k in self.previous_safe_values and not v in self.previous_safe_values[k]:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def guess_release_group(self, string, node=None, options=None):
|
||||||
|
if options and options.get('expected_group'):
|
||||||
|
expected_container = PropertiesContainer(enhance=True, canonical_from_pattern=False)
|
||||||
|
for expected_group in options.get('expected_group'):
|
||||||
|
if expected_group.startswith('re:'):
|
||||||
|
expected_group = expected_group[3:]
|
||||||
|
expected_group = expected_group.replace(' ', '-')
|
||||||
|
expected_container.register_property('releaseGroup', expected_group, enhance=True)
|
||||||
|
else:
|
||||||
|
expected_group = re.escape(expected_group)
|
||||||
|
expected_container.register_property('releaseGroup', expected_group, enhance=False)
|
||||||
|
|
||||||
|
found = expected_container.find_properties(string, node, options, 'releaseGroup')
|
||||||
|
guess = expected_container.as_guess(found, string, self.validate_group_name)
|
||||||
|
if guess:
|
||||||
|
return guess
|
||||||
|
|
||||||
|
found = self.container.find_properties(string, node, options, 'releaseGroup')
|
||||||
|
guess = self.container.as_guess(found, string, self.validate_group_name)
|
||||||
|
validated_guess = None
|
||||||
|
if guess:
|
||||||
|
group_node = node.group_node()
|
||||||
|
if group_node:
|
||||||
|
for leaf in group_node.leaves_containing(self.previous_safe_properties):
|
||||||
|
if self.validate_node(leaf, node, True):
|
||||||
|
if leaf.root.value[leaf.span[1]] == '-':
|
||||||
|
guess.metadata().confidence = 1
|
||||||
|
else:
|
||||||
|
guess.metadata().confidence = 0.7
|
||||||
|
validated_guess = guess
|
||||||
|
|
||||||
|
if not validated_guess:
|
||||||
|
# If previous group last leaf is identified as a safe property,
|
||||||
|
# consider the raw value as a releaseGroup
|
||||||
|
previous_group_node = node.previous_group_node()
|
||||||
|
if previous_group_node:
|
||||||
|
for leaf in previous_group_node.leaves_containing(self.previous_safe_properties):
|
||||||
|
if self.validate_node(leaf, node, False):
|
||||||
|
guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value)))
|
||||||
|
if self.validate_group_name(guess):
|
||||||
|
node.guess = guess
|
||||||
|
validated_guess = guess
|
||||||
|
|
||||||
|
if validated_guess:
|
||||||
|
# If following group nodes have only one unidentified leaf, it belongs to the release group
|
||||||
|
next_group_node = node
|
||||||
|
|
||||||
|
while True:
|
||||||
|
next_group_node = next_group_node.next_group_node()
|
||||||
|
if next_group_node:
|
||||||
|
leaves = list(next_group_node.leaves())
|
||||||
|
if len(leaves) == 1 and not leaves[0].guess:
|
||||||
|
validated_guess['releaseGroup'] = validated_guess['releaseGroup'] + leaves[0].value
|
||||||
|
leaves[0].guess = validated_guess
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not validated_guess and node.is_explicit() and node.node_last_idx == 0: # first node from group
|
||||||
|
validated_guess = build_guess(node, 'releaseGroup', value=node.value[1:len(node.value)-1])
|
||||||
|
validated_guess.metadata().confidence = 0.4
|
||||||
|
validated_guess.metadata().span = 1, len(node.value)
|
||||||
|
node.guess = validated_guess
|
||||||
|
|
||||||
|
if validated_guess:
|
||||||
|
# Strip brackets
|
||||||
|
validated_guess['releaseGroup'] = strip_brackets(validated_guess['releaseGroup'])
|
||||||
|
|
||||||
|
return validated_guess
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
GuessFinder(self.guess_release_group, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,33 +18,41 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, \
|
||||||
from guessit import Guess
|
unicode_literals
|
||||||
from guessit.transfo import SingleNodeGuesser
|
|
||||||
from guessit.patterns import video_rexps, sep
|
|
||||||
import re
|
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
from guessit.patterns import _psep
|
||||||
|
from guessit.containers import PropertiesContainer
|
||||||
|
from guessit.plugins.transformers import Transformer
|
||||||
|
from guessit.matcher import GuessFinder
|
||||||
|
from guessit.patterns.numeral import parse_numeral
|
||||||
|
|
||||||
|
|
||||||
def guess_video_rexps(string):
|
class GuessVideoRexps(Transformer):
|
||||||
string = '-' + string + '-'
|
def __init__(self):
|
||||||
for rexp, confidence, span_adjust in video_rexps:
|
Transformer.__init__(self, 25)
|
||||||
match = re.search(sep + rexp + sep, string, re.IGNORECASE)
|
|
||||||
if match:
|
|
||||||
metadata = match.groupdict()
|
|
||||||
# is this the better place to put it? (maybe, as it is at least
|
|
||||||
# the soonest that we can catch it)
|
|
||||||
if metadata.get('cdNumberTotal', -1) is None:
|
|
||||||
del metadata['cdNumberTotal']
|
|
||||||
span = (match.start() + span_adjust[0],
|
|
||||||
match.end() + span_adjust[1] - 2)
|
|
||||||
return (Guess(metadata, confidence=confidence, raw=string[span[0]:span[1]]),
|
|
||||||
span)
|
|
||||||
|
|
||||||
return None, None
|
self.container = PropertiesContainer(canonical_from_pattern=False)
|
||||||
|
|
||||||
|
self.container.register_property(None, 'cd' + _psep + '(?P<cdNumber>[0-9])(?:' + _psep + 'of' + _psep + '(?P<cdNumberTotal>[0-9]))?', confidence=1.0, enhance=False, global_span=True, formatter=parse_numeral)
|
||||||
|
self.container.register_property('cdNumberTotal', '([1-9])' + _psep + 'cds?', confidence=0.9, enhance=False, formatter=parse_numeral)
|
||||||
|
|
||||||
def process(mtree):
|
self.container.register_property('bonusNumber', 'x([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)
|
||||||
SingleNodeGuesser(guess_video_rexps, None, log).process(mtree)
|
|
||||||
|
self.container.register_property('filmNumber', 'f([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral)
|
||||||
|
|
||||||
|
self.container.register_property('edition', 'collector', 'collector-edition', 'edition-collector', canonical_form='Collector Edition')
|
||||||
|
self.container.register_property('edition', 'special-edition', 'edition-special', canonical_form='Special Edition')
|
||||||
|
self.container.register_property('edition', 'criterion', 'criterion-edition', 'edition-criterion', canonical_form='Criterion Edition')
|
||||||
|
self.container.register_property('edition', 'deluxe', 'cdeluxe-edition', 'edition-deluxe', canonical_form='Deluxe Edition')
|
||||||
|
self.container.register_property('edition', 'director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', canonical_form='Director\'s cut')
|
||||||
|
|
||||||
|
def supported_properties(self):
|
||||||
|
return self.container.get_supported_properties()
|
||||||
|
|
||||||
|
def guess_video_rexps(self, string, node=None, options=None):
|
||||||
|
found = self.container.find_properties(string, node, options)
|
||||||
|
return self.container.as_guess(found, string)
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
GuessFinder(self.guess_video_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,45 +18,64 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit import Guess
|
|
||||||
from guessit.transfo import SingleNodeGuesser
|
from guessit.plugins.transformers import Transformer
|
||||||
from guessit.patterns import weak_episode_rexps
|
from guessit.matcher import GuessFinder
|
||||||
|
from guessit.patterns import sep, build_or_pattern
|
||||||
|
from guessit.containers import PropertiesContainer, LeavesValidator, NoValidator, WeakValidator
|
||||||
|
from guessit.patterns.numeral import numeral, parse_numeral
|
||||||
|
from guessit.date import valid_year
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def guess_weak_episodes_rexps(string, node):
|
class GuessWeakEpisodesRexps(Transformer):
|
||||||
if 'episodeNumber' in node.root.info:
|
def __init__(self):
|
||||||
return None, None
|
Transformer.__init__(self, 15)
|
||||||
|
|
||||||
for rexp, span_adjust in weak_episode_rexps:
|
of_separators = ['of', 'sur', '/', '\\']
|
||||||
match = re.search(rexp, string, re.IGNORECASE)
|
of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE)
|
||||||
if match:
|
|
||||||
metadata = match.groupdict()
|
|
||||||
span = (match.start() + span_adjust[0],
|
|
||||||
match.end() + span_adjust[1])
|
|
||||||
|
|
||||||
epnum = int(metadata['episodeNumber'])
|
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
|
||||||
|
|
||||||
|
episode_words = ['episodes?']
|
||||||
|
|
||||||
|
def _formater(episode_number):
|
||||||
|
epnum = parse_numeral(episode_number)
|
||||||
|
if not valid_year(epnum):
|
||||||
if epnum > 100:
|
if epnum > 100:
|
||||||
season, epnum = epnum // 100, epnum % 100
|
season, epnum = epnum // 100, epnum % 100
|
||||||
# episodes which have a season > 25 are most likely errors
|
# episodes which have a season > 50 are most likely errors
|
||||||
# (Simpsons is at 23!)
|
# (Simpson is at 25!)
|
||||||
if season > 25:
|
if season > 50:
|
||||||
continue
|
return None
|
||||||
return Guess({ 'season': season,
|
return {'season': season, 'episodeNumber': epnum}
|
||||||
'episodeNumber': epnum },
|
|
||||||
confidence=0.6, raw=string[span[0]:span[1]]), span
|
|
||||||
else:
|
else:
|
||||||
return Guess(metadata, confidence=0.3, raw=string[span[0]:span[1]]), span
|
return epnum
|
||||||
|
|
||||||
return None, None
|
self.container.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater, disabler=lambda options: options.get('episode_prefer_number') if options else False)
|
||||||
|
self.container.register_property(['episodeNumber', 'season'], '[0-9]{4}', confidence=0.6, formatter=_formater)
|
||||||
|
self.container.register_property('episodeNumber', '[^0-9](\d{1,3})', confidence=0.6, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
|
||||||
|
self.container.register_property(None, '(' + build_or_pattern(episode_words) + sep + '?(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.4, formatter=parse_numeral)
|
||||||
|
self.container.register_property(None, r'(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +')', confidence=0.6, formatter=parse_numeral)
|
||||||
|
self.container.register_property('episodeNumber', r'^' + sep + '?(\d{1,3})' + sep, confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
|
||||||
|
self.container.register_property('episodeNumber', sep + r'(\d{1,3})' + sep + '?$', confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
|
||||||
|
|
||||||
|
def supported_properties(self):
|
||||||
|
return self.container.get_supported_properties()
|
||||||
|
|
||||||
guess_weak_episodes_rexps.use_node = True
|
def guess_weak_episodes_rexps(self, string, node=None, options=None):
|
||||||
|
if node and 'episodeNumber' in node.root.info:
|
||||||
|
return None
|
||||||
|
|
||||||
|
properties = self.container.find_properties(string, node, options)
|
||||||
|
guess = self.container.as_guess(properties, string)
|
||||||
|
|
||||||
def process(mtree):
|
return guess
|
||||||
SingleNodeGuesser(guess_weak_episodes_rexps, 0.6, log).process(mtree)
|
|
||||||
|
def should_process(self, mtree, options=None):
|
||||||
|
return mtree.guess.get('type', '').startswith('episode')
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Rémi Alvergnat <toilal.dev@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,22 +18,39 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit.transfo import SingleNodeGuesser
|
from guessit.patterns import build_or_pattern
|
||||||
from guessit.patterns import websites
|
from guessit.containers import PropertiesContainer
|
||||||
import logging
|
from guessit.plugins.transformers import Transformer
|
||||||
|
from guessit.matcher import GuessFinder
|
||||||
|
from pkg_resources import resource_stream # @UnresolvedImport
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
TLDS = [l.strip().decode('utf-8')
|
||||||
|
for l in resource_stream('guessit', 'tlds-alpha-by-domain.txt').readlines()
|
||||||
|
if b'--' not in l][1:]
|
||||||
|
|
||||||
|
|
||||||
def guess_website(string):
|
class GuessWebsite(Transformer):
|
||||||
low = string.lower()
|
def __init__(self):
|
||||||
for site in websites:
|
Transformer.__init__(self, 45)
|
||||||
pos = low.find(site.lower())
|
|
||||||
if pos != -1:
|
|
||||||
return {'website': site}, (pos, pos + len(site))
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
|
self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False)
|
||||||
|
|
||||||
def process(mtree):
|
tlds_pattern = build_or_pattern(TLDS) # All registered domain extension
|
||||||
SingleNodeGuesser(guess_website, 1.0, log).process(mtree)
|
safe_tlds_pattern = build_or_pattern(['com', 'org', 'net']) # For sure a website extension
|
||||||
|
safe_subdomains_pattern = build_or_pattern(['www']) # For sure a website subdomain
|
||||||
|
safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net']) # Those words before a tlds are sure
|
||||||
|
|
||||||
|
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+')
|
||||||
|
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+')
|
||||||
|
self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
|
||||||
|
|
||||||
|
def supported_properties(self):
|
||||||
|
return self.container.get_supported_properties()
|
||||||
|
|
||||||
|
def guess_website(self, string, node=None, options=None):
|
||||||
|
found = self.container.find_properties(string, node, options, 'website')
|
||||||
|
return self.container.as_guess(found, string)
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
GuessFinder(self.guess_website, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,33 +18,40 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from guessit.transfo import SingleNodeGuesser
|
|
||||||
from guessit.date import search_year
|
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
from guessit.plugins.transformers import Transformer
|
||||||
|
from guessit.matcher import GuessFinder
|
||||||
|
from guessit.date import search_year, valid_year
|
||||||
|
|
||||||
|
|
||||||
def guess_year(string):
|
class GuessYear(Transformer):
|
||||||
|
def __init__(self):
|
||||||
|
Transformer.__init__(self, -160)
|
||||||
|
|
||||||
|
def supported_properties(self):
|
||||||
|
return ['year']
|
||||||
|
|
||||||
|
def guess_year(self, string, node=None, options=None):
|
||||||
year, span = search_year(string)
|
year, span = search_year(string)
|
||||||
if year:
|
if year:
|
||||||
return { 'year': year }, span
|
return {'year': year}, span
|
||||||
else:
|
else:
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
def guess_year_skip_first(string):
|
def second_pass_options(self, mtree, options=None):
|
||||||
year, span = search_year(string)
|
year_nodes = list(mtree.leaves_containing('year'))
|
||||||
if year:
|
if len(year_nodes) > 1:
|
||||||
year2, span2 = guess_year(string[span[1]:])
|
return {'skip_nodes': year_nodes[:len(year_nodes) - 1]}
|
||||||
if year2:
|
return None
|
||||||
return year2, (span2[0]+span[1], span2[1]+span[1])
|
|
||||||
|
|
||||||
return None, None
|
def process(self, mtree, options=None):
|
||||||
|
GuessFinder(self.guess_year, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
|
||||||
|
|
||||||
|
# if we found a season number that is a valid year, it is usually safe to assume
|
||||||
def process(mtree, skip_first_year=False):
|
# we can also set the year property to that value
|
||||||
if skip_first_year:
|
for n in mtree.leaves_containing('season'):
|
||||||
SingleNodeGuesser(guess_year_skip_first, 1.0, log).process(mtree)
|
g = n.guess
|
||||||
else:
|
season = g['season']
|
||||||
SingleNodeGuesser(guess_year, 1.0, log).process(mtree)
|
if valid_year(season):
|
||||||
|
g['year'] = season
|
||||||
|
|
|
||||||
|
|
@ -1,73 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# GuessIt - A library for guessing information from filenames
|
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
|
||||||
#
|
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
|
||||||
# the Free Software Foundation; either version 3 of the License, or
|
|
||||||
# (at your option) any later version.
|
|
||||||
#
|
|
||||||
# GuessIt is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
# Lesser GNU General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the Lesser GNU General Public License
|
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
#
|
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from guessit.patterns import subtitle_exts
|
|
||||||
from guessit.textutils import reorder_title, find_words
|
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def process(mtree):
|
|
||||||
# 1- try to promote language to subtitle language where it makes sense
|
|
||||||
for node in mtree.nodes():
|
|
||||||
if 'language' not in node.guess:
|
|
||||||
continue
|
|
||||||
|
|
||||||
def promote_subtitle():
|
|
||||||
# pylint: disable=W0631
|
|
||||||
node.guess.set('subtitleLanguage', node.guess['language'],
|
|
||||||
confidence=node.guess.confidence('language'))
|
|
||||||
del node.guess['language']
|
|
||||||
|
|
||||||
# - if we matched a language in a file with a sub extension and that
|
|
||||||
# the group is the last group of the filename, it is probably the
|
|
||||||
# language of the subtitle
|
|
||||||
# (eg: 'xxx.english.srt')
|
|
||||||
if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
|
|
||||||
node == mtree.leaves()[-2]):
|
|
||||||
promote_subtitle()
|
|
||||||
|
|
||||||
# - if we find the word 'sub' before the language, and in the same explicit
|
|
||||||
# group, then upgrade the language
|
|
||||||
explicit_group = mtree.node_at(node.node_idx[:2])
|
|
||||||
group_str = explicit_group.value.lower()
|
|
||||||
|
|
||||||
if ('sub' in find_words(group_str) and
|
|
||||||
0 <= group_str.find('sub') < (node.span[0] - explicit_group.span[0])):
|
|
||||||
promote_subtitle()
|
|
||||||
|
|
||||||
# - if a language is in an explicit group just preceded by "st",
|
|
||||||
# it is a subtitle language (eg: '...st[fr-eng]...')
|
|
||||||
try:
|
|
||||||
idx = node.node_idx
|
|
||||||
previous = mtree.node_at((idx[0], idx[1] - 1)).leaves()[-1]
|
|
||||||
if previous.value.lower()[-2:] == 'st':
|
|
||||||
promote_subtitle()
|
|
||||||
except IndexError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# 2- ", the" at the end of a series title should be prepended to it
|
|
||||||
for node in mtree.nodes():
|
|
||||||
if 'series' not in node.guess:
|
|
||||||
continue
|
|
||||||
|
|
||||||
node.guess['series'] = reorder_title(node.guess['series'])
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,27 +18,32 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.plugins.transformers import Transformer
|
||||||
from guessit.textutils import find_first_level_groups
|
from guessit.textutils import find_first_level_groups
|
||||||
from guessit.patterns import group_delimiters
|
from guessit.patterns import group_delimiters
|
||||||
import functools
|
from functools import reduce
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def process(mtree):
|
class SplitExplicitGroups(Transformer):
|
||||||
"""return the string split into explicit groups, that is, those either
|
def __init__(self):
|
||||||
|
Transformer.__init__(self, 250)
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
"""split each of those into explicit groups (separated by parentheses or square brackets)
|
||||||
|
|
||||||
|
:return: return the string split into explicit groups, that is, those either
|
||||||
between parenthese, square brackets or curly braces, and those separated
|
between parenthese, square brackets or curly braces, and those separated
|
||||||
by a dash."""
|
by a dash."""
|
||||||
for c in mtree.children:
|
for c in mtree.children:
|
||||||
groups = find_first_level_groups(c.value, group_delimiters[0])
|
groups = find_first_level_groups(c.value, group_delimiters[0])
|
||||||
for delimiters in group_delimiters:
|
for delimiters in group_delimiters:
|
||||||
flatten = lambda l, x: l + find_first_level_groups(x, delimiters)
|
flatten = lambda l, x: l + find_first_level_groups(x, delimiters)
|
||||||
groups = functools.reduce(flatten, groups, [])
|
groups = reduce(flatten, groups, [])
|
||||||
|
|
||||||
# do not do this at this moment, it is not strong enough and can break other
|
# do not do this at this moment, it is not strong enough and can break other
|
||||||
# patterns, such as dates, etc...
|
# patterns, such as dates, etc...
|
||||||
#groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
|
# groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
|
||||||
|
|
||||||
c.split_on_components(groups)
|
c.split_on_components(groups)
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,24 +18,29 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.plugins.transformers import Transformer
|
||||||
from guessit.patterns import sep
|
from guessit.patterns import sep
|
||||||
import re
|
import re
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def process(mtree):
|
class SplitOnDash(Transformer):
|
||||||
|
def __init__(self):
|
||||||
|
Transformer.__init__(self, 245)
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
"""split into '-' separated subgroups (with required separator chars
|
||||||
|
around the dash)
|
||||||
|
"""
|
||||||
for node in mtree.unidentified_leaves():
|
for node in mtree.unidentified_leaves():
|
||||||
indices = []
|
indices = []
|
||||||
|
|
||||||
didx = 0
|
|
||||||
pattern = re.compile(sep + '-' + sep)
|
pattern = re.compile(sep + '-' + sep)
|
||||||
match = pattern.search(node.value)
|
match = pattern.search(node.value)
|
||||||
while match:
|
while match:
|
||||||
span = match.span()
|
span = match.span()
|
||||||
indices.extend([ span[0], span[1] ])
|
indices.extend([span[0], span[1]])
|
||||||
match = pattern.search(node.value, span[1])
|
match = pattern.search(node.value, span[1])
|
||||||
|
|
||||||
if indices:
|
if indices:
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# GuessIt - A library for guessing information from filenames
|
# GuessIt - A library for guessing information from filenames
|
||||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||||
#
|
#
|
||||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||||
# the terms of the Lesser GNU General Public License as published by
|
# the terms of the Lesser GNU General Public License as published by
|
||||||
|
|
@ -18,19 +18,28 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
#
|
#
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
from guessit.plugins.transformers import Transformer
|
||||||
from guessit import fileutils
|
from guessit import fileutils
|
||||||
import os.path
|
from os.path import splitext
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def process(mtree):
|
class SplitPathComponents(Transformer):
|
||||||
"""Returns the filename split into [ dir*, basename, ext ]."""
|
def __init__(self):
|
||||||
|
Transformer.__init__(self, 255)
|
||||||
|
|
||||||
|
def process(self, mtree, options=None):
|
||||||
|
"""first split our path into dirs + basename + ext
|
||||||
|
|
||||||
|
:return: the filename split into [ dir*, basename, ext ]
|
||||||
|
"""
|
||||||
|
if not options.get('name_only'):
|
||||||
components = fileutils.split_path(mtree.value)
|
components = fileutils.split_path(mtree.value)
|
||||||
basename = components.pop(-1)
|
basename = components.pop(-1)
|
||||||
components += list(os.path.splitext(basename))
|
components += list(splitext(basename))
|
||||||
components[-1] = components[-1][1:] # remove the '.' from the extension
|
components[-1] = components[-1][1:] # remove the '.' from the extension
|
||||||
|
|
||||||
mtree.split_on_components(components)
|
mtree.split_on_components(components)
|
||||||
|
else:
|
||||||
|
mtree.split_on_components([mtree.value, ''])
|
||||||
|
|
|
||||||
|
|
@ -99,13 +99,15 @@ class OpenSubtitlesProvider(Provider):
|
||||||
def no_operation(self):
|
def no_operation(self):
|
||||||
checked(self.server.NoOperation(self.token))
|
checked(self.server.NoOperation(self.token))
|
||||||
|
|
||||||
def query(self, languages, hash=None, size=None, imdb_id=None, query=None): # @ReservedAssignment
|
def query(self, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None): # @ReservedAssignment
|
||||||
searches = []
|
searches = []
|
||||||
if hash and size:
|
if hash and size:
|
||||||
searches.append({'moviehash': hash, 'moviebytesize': str(size)})
|
searches.append({'moviehash': hash, 'moviebytesize': str(size)})
|
||||||
if imdb_id:
|
if imdb_id:
|
||||||
searches.append({'imdbid': imdb_id})
|
searches.append({'imdbid': imdb_id})
|
||||||
if query:
|
if query and season and episode:
|
||||||
|
searches.append({'query': query, 'season': season, 'episode': episode})
|
||||||
|
elif query:
|
||||||
searches.append({'query': query})
|
searches.append({'query': query})
|
||||||
if not searches:
|
if not searches:
|
||||||
raise ValueError('One or more parameter missing')
|
raise ValueError('One or more parameter missing')
|
||||||
|
|
@ -126,10 +128,16 @@ class OpenSubtitlesProvider(Provider):
|
||||||
|
|
||||||
def list_subtitles(self, video, languages):
|
def list_subtitles(self, video, languages):
|
||||||
query = None
|
query = None
|
||||||
|
season = None
|
||||||
|
episode = None
|
||||||
if ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id:
|
if ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id:
|
||||||
query = video.name.split(os.sep)[-1]
|
query = video.name.split(os.sep)[-1]
|
||||||
|
if isinstance(video, Episode):
|
||||||
|
query = video.series
|
||||||
|
season = video.season
|
||||||
|
episode = video.episode
|
||||||
return self.query(languages, hash=video.hashes.get('opensubtitles'), size=video.size, imdb_id=video.imdb_id,
|
return self.query(languages, hash=video.hashes.get('opensubtitles'), size=video.size, imdb_id=video.imdb_id,
|
||||||
query=query)
|
query=query, season=season, episode=episode)
|
||||||
|
|
||||||
def download_subtitle(self, subtitle):
|
def download_subtitle(self, subtitle):
|
||||||
response = checked(self.server.DownloadSubtitles(self.token, [subtitle.id]))
|
response = checked(self.server.DownloadSubtitles(self.token, [subtitle.id]))
|
||||||
|
|
|
||||||
|
|
@ -51,7 +51,14 @@ class Subtitle(object):
|
||||||
encodings.append('windows-1255')
|
encodings.append('windows-1255')
|
||||||
elif self.language.alpha3 == 'tur':
|
elif self.language.alpha3 == 'tur':
|
||||||
encodings.extend(['iso-8859-9', 'windows-1254'])
|
encodings.extend(['iso-8859-9', 'windows-1254'])
|
||||||
|
elif self.language.alpha3 == 'pol':
|
||||||
|
# Eastern European Group 1
|
||||||
|
encodings.extend(['windows-1250'])
|
||||||
|
elif self.language.alpha3 == 'bul':
|
||||||
|
# Eastern European Group 2
|
||||||
|
encodings.extend(['windows-1251'])
|
||||||
else:
|
else:
|
||||||
|
# Western European (windows-1252)
|
||||||
encodings.append('latin-1')
|
encodings.append('latin-1')
|
||||||
|
|
||||||
# try to decode
|
# try to decode
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue