mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-08-14 18:47:09 -07:00
Renamed lib folder to libs
This commit is contained in:
parent
4a9a378d52
commit
74a71a2cb0
117 changed files with 0 additions and 0 deletions
249
libs/guessit/ISO-3166-1_utf8.txt
Normal file
249
libs/guessit/ISO-3166-1_utf8.txt
Normal file
|
@ -0,0 +1,249 @@
|
|||
Afghanistan|AF|AFG|004|ISO 3166-2:AF
|
||||
Åland Islands|AX|ALA|248|ISO 3166-2:AX
|
||||
Albania|AL|ALB|008|ISO 3166-2:AL
|
||||
Algeria|DZ|DZA|012|ISO 3166-2:DZ
|
||||
American Samoa|AS|ASM|016|ISO 3166-2:AS
|
||||
Andorra|AD|AND|020|ISO 3166-2:AD
|
||||
Angola|AO|AGO|024|ISO 3166-2:AO
|
||||
Anguilla|AI|AIA|660|ISO 3166-2:AI
|
||||
Antarctica|AQ|ATA|010|ISO 3166-2:AQ
|
||||
Antigua and Barbuda|AG|ATG|028|ISO 3166-2:AG
|
||||
Argentina|AR|ARG|032|ISO 3166-2:AR
|
||||
Armenia|AM|ARM|051|ISO 3166-2:AM
|
||||
Aruba|AW|ABW|533|ISO 3166-2:AW
|
||||
Australia|AU|AUS|036|ISO 3166-2:AU
|
||||
Austria|AT|AUT|040|ISO 3166-2:AT
|
||||
Azerbaijan|AZ|AZE|031|ISO 3166-2:AZ
|
||||
Bahamas|BS|BHS|044|ISO 3166-2:BS
|
||||
Bahrain|BH|BHR|048|ISO 3166-2:BH
|
||||
Bangladesh|BD|BGD|050|ISO 3166-2:BD
|
||||
Barbados|BB|BRB|052|ISO 3166-2:BB
|
||||
Belarus|BY|BLR|112|ISO 3166-2:BY
|
||||
Belgium|BE|BEL|056|ISO 3166-2:BE
|
||||
Belize|BZ|BLZ|084|ISO 3166-2:BZ
|
||||
Benin|BJ|BEN|204|ISO 3166-2:BJ
|
||||
Bermuda|BM|BMU|060|ISO 3166-2:BM
|
||||
Bhutan|BT|BTN|064|ISO 3166-2:BT
|
||||
Bolivia, Plurinational State of|BO|BOL|068|ISO 3166-2:BO
|
||||
Bonaire, Sint Eustatius and Saba|BQ|BES|535|ISO 3166-2:BQ
|
||||
Bosnia and Herzegovina|BA|BIH|070|ISO 3166-2:BA
|
||||
Botswana|BW|BWA|072|ISO 3166-2:BW
|
||||
Bouvet Island|BV|BVT|074|ISO 3166-2:BV
|
||||
Brazil|BR|BRA|076|ISO 3166-2:BR
|
||||
British Indian Ocean Territory|IO|IOT|086|ISO 3166-2:IO
|
||||
Brunei Darussalam|BN|BRN|096|ISO 3166-2:BN
|
||||
Bulgaria|BG|BGR|100|ISO 3166-2:BG
|
||||
Burkina Faso|BF|BFA|854|ISO 3166-2:BF
|
||||
Burundi|BI|BDI|108|ISO 3166-2:BI
|
||||
Cambodia|KH|KHM|116|ISO 3166-2:KH
|
||||
Cameroon|CM|CMR|120|ISO 3166-2:CM
|
||||
Canada|CA|CAN|124|ISO 3166-2:CA
|
||||
Cape Verde|CV|CPV|132|ISO 3166-2:CV
|
||||
Cayman Islands|KY|CYM|136|ISO 3166-2:KY
|
||||
Central African Republic|CF|CAF|140|ISO 3166-2:CF
|
||||
Chad|TD|TCD|148|ISO 3166-2:TD
|
||||
Chile|CL|CHL|152|ISO 3166-2:CL
|
||||
China|CN|CHN|156|ISO 3166-2:CN
|
||||
Christmas Island|CX|CXR|162|ISO 3166-2:CX
|
||||
Cocos (Keeling) Islands|CC|CCK|166|ISO 3166-2:CC
|
||||
Colombia|CO|COL|170|ISO 3166-2:CO
|
||||
Comoros|KM|COM|174|ISO 3166-2:KM
|
||||
Congo|CG|COG|178|ISO 3166-2:CG
|
||||
Congo, the Democratic Republic of the|CD|COD|180|ISO 3166-2:CD
|
||||
Cook Islands|CK|COK|184|ISO 3166-2:CK
|
||||
Costa Rica|CR|CRI|188|ISO 3166-2:CR
|
||||
Côte d'Ivoire|CI|CIV|384|ISO 3166-2:CI
|
||||
Croatia|HR|HRV|191|ISO 3166-2:HR
|
||||
Cuba|CU|CUB|192|ISO 3166-2:CU
|
||||
Curaçao|CW|CUW|531|ISO 3166-2:CW
|
||||
Cyprus|CY|CYP|196|ISO 3166-2:CY
|
||||
Czech Republic|CZ|CZE|203|ISO 3166-2:CZ
|
||||
Denmark|DK|DNK|208|ISO 3166-2:DK
|
||||
Djibouti|DJ|DJI|262|ISO 3166-2:DJ
|
||||
Dominica|DM|DMA|212|ISO 3166-2:DM
|
||||
Dominican Republic|DO|DOM|214|ISO 3166-2:DO
|
||||
Ecuador|EC|ECU|218|ISO 3166-2:EC
|
||||
Egypt|EG|EGY|818|ISO 3166-2:EG
|
||||
El Salvador|SV|SLV|222|ISO 3166-2:SV
|
||||
Equatorial Guinea|GQ|GNQ|226|ISO 3166-2:GQ
|
||||
Eritrea|ER|ERI|232|ISO 3166-2:ER
|
||||
Estonia|EE|EST|233|ISO 3166-2:EE
|
||||
Ethiopia|ET|ETH|231|ISO 3166-2:ET
|
||||
Falkland Islands (Malvinas|FK|FLK|238|ISO 3166-2:FK
|
||||
Faroe Islands|FO|FRO|234|ISO 3166-2:FO
|
||||
Fiji|FJ|FJI|242|ISO 3166-2:FJ
|
||||
Finland|FI|FIN|246|ISO 3166-2:FI
|
||||
France|FR|FRA|250|ISO 3166-2:FR
|
||||
French Guiana|GF|GUF|254|ISO 3166-2:GF
|
||||
French Polynesia|PF|PYF|258|ISO 3166-2:PF
|
||||
French Southern Territories|TF|ATF|260|ISO 3166-2:TF
|
||||
Gabon|GA|GAB|266|ISO 3166-2:GA
|
||||
Gambia|GM|GMB|270|ISO 3166-2:GM
|
||||
Georgia|GE|GEO|268|ISO 3166-2:GE
|
||||
Germany|DE|DEU|276|ISO 3166-2:DE
|
||||
Ghana|GH|GHA|288|ISO 3166-2:GH
|
||||
Gibraltar|GI|GIB|292|ISO 3166-2:GI
|
||||
Greece|GR|GRC|300|ISO 3166-2:GR
|
||||
Greenland|GL|GRL|304|ISO 3166-2:GL
|
||||
Grenada|GD|GRD|308|ISO 3166-2:GD
|
||||
Guadeloupe|GP|GLP|312|ISO 3166-2:GP
|
||||
Guam|GU|GUM|316|ISO 3166-2:GU
|
||||
Guatemala|GT|GTM|320|ISO 3166-2:GT
|
||||
Guernsey|GG|GGY|831|ISO 3166-2:GG
|
||||
Guinea|GN|GIN|324|ISO 3166-2:GN
|
||||
Guinea-Bissau|GW|GNB|624|ISO 3166-2:GW
|
||||
Guyana|GY|GUY|328|ISO 3166-2:GY
|
||||
Haiti|HT|HTI|332|ISO 3166-2:HT
|
||||
Heard Island and McDonald Islands|HM|HMD|334|ISO 3166-2:HM
|
||||
Holy See (Vatican City State|VA|VAT|336|ISO 3166-2:VA
|
||||
Honduras|HN|HND|340|ISO 3166-2:HN
|
||||
Hong Kong|HK|HKG|344|ISO 3166-2:HK
|
||||
Hungary|HU|HUN|348|ISO 3166-2:HU
|
||||
Iceland|IS|ISL|352|ISO 3166-2:IS
|
||||
India|IN|IND|356|ISO 3166-2:IN
|
||||
Indonesia|ID|IDN|360|ISO 3166-2:ID
|
||||
Iran, Islamic Republic of|IR|IRN|364|ISO 3166-2:IR
|
||||
Iraq|IQ|IRQ|368|ISO 3166-2:IQ
|
||||
Ireland|IE|IRL|372|ISO 3166-2:IE
|
||||
Isle of Man|IM|IMN|833|ISO 3166-2:IM
|
||||
Israel|IL|ISR|376|ISO 3166-2:IL
|
||||
Italy|IT|ITA|380|ISO 3166-2:IT
|
||||
Jamaica|JM|JAM|388|ISO 3166-2:JM
|
||||
Japan|JP|JPN|392|ISO 3166-2:JP
|
||||
Jersey|JE|JEY|832|ISO 3166-2:JE
|
||||
Jordan|JO|JOR|400|ISO 3166-2:JO
|
||||
Kazakhstan|KZ|KAZ|398|ISO 3166-2:KZ
|
||||
Kenya|KE|KEN|404|ISO 3166-2:KE
|
||||
Kiribati|KI|KIR|296|ISO 3166-2:KI
|
||||
Korea, Democratic People's Republic of|KP|PRK|408|ISO 3166-2:KP
|
||||
Korea, Republic of|KR|KOR|410|ISO 3166-2:KR
|
||||
Kuwait|KW|KWT|414|ISO 3166-2:KW
|
||||
Kyrgyzstan|KG|KGZ|417|ISO 3166-2:KG
|
||||
Lao People's Democratic Republic|LA|LAO|418|ISO 3166-2:LA
|
||||
Latvia|LV|LVA|428|ISO 3166-2:LV
|
||||
Lebanon|LB|LBN|422|ISO 3166-2:LB
|
||||
Lesotho|LS|LSO|426|ISO 3166-2:LS
|
||||
Liberia|LR|LBR|430|ISO 3166-2:LR
|
||||
Libya|LY|LBY|434|ISO 3166-2:LY
|
||||
Liechtenstein|LI|LIE|438|ISO 3166-2:LI
|
||||
Lithuania|LT|LTU|440|ISO 3166-2:LT
|
||||
Luxembourg|LU|LUX|442|ISO 3166-2:LU
|
||||
Macao|MO|MAC|446|ISO 3166-2:MO
|
||||
Macedonia, the former Yugoslav Republic of|MK|MKD|807|ISO 3166-2:MK
|
||||
Madagascar|MG|MDG|450|ISO 3166-2:MG
|
||||
Malawi|MW|MWI|454|ISO 3166-2:MW
|
||||
Malaysia|MY|MYS|458|ISO 3166-2:MY
|
||||
Maldives|MV|MDV|462|ISO 3166-2:MV
|
||||
Mali|ML|MLI|466|ISO 3166-2:ML
|
||||
Malta|MT|MLT|470|ISO 3166-2:MT
|
||||
Marshall Islands|MH|MHL|584|ISO 3166-2:MH
|
||||
Martinique|MQ|MTQ|474|ISO 3166-2:MQ
|
||||
Mauritania|MR|MRT|478|ISO 3166-2:MR
|
||||
Mauritius|MU|MUS|480|ISO 3166-2:MU
|
||||
Mayotte|YT|MYT|175|ISO 3166-2:YT
|
||||
Mexico|MX|MEX|484|ISO 3166-2:MX
|
||||
Micronesia, Federated States of|FM|FSM|583|ISO 3166-2:FM
|
||||
Moldova, Republic of|MD|MDA|498|ISO 3166-2:MD
|
||||
Monaco|MC|MCO|492|ISO 3166-2:MC
|
||||
Mongolia|MN|MNG|496|ISO 3166-2:MN
|
||||
Montenegro|ME|MNE|499|ISO 3166-2:ME
|
||||
Montserrat|MS|MSR|500|ISO 3166-2:MS
|
||||
Morocco|MA|MAR|504|ISO 3166-2:MA
|
||||
Mozambique|MZ|MOZ|508|ISO 3166-2:MZ
|
||||
Myanmar|MM|MMR|104|ISO 3166-2:MM
|
||||
Namibia|NA|NAM|516|ISO 3166-2:NA
|
||||
Nauru|NR|NRU|520|ISO 3166-2:NR
|
||||
Nepal|NP|NPL|524|ISO 3166-2:NP
|
||||
Netherlands|NL|NLD|528|ISO 3166-2:NL
|
||||
New Caledonia|NC|NCL|540|ISO 3166-2:NC
|
||||
New Zealand|NZ|NZL|554|ISO 3166-2:NZ
|
||||
Nicaragua|NI|NIC|558|ISO 3166-2:NI
|
||||
Niger|NE|NER|562|ISO 3166-2:NE
|
||||
Nigeria|NG|NGA|566|ISO 3166-2:NG
|
||||
Niue|NU|NIU|570|ISO 3166-2:NU
|
||||
Norfolk Island|NF|NFK|574|ISO 3166-2:NF
|
||||
Northern Mariana Islands|MP|MNP|580|ISO 3166-2:MP
|
||||
Norway|NO|NOR|578|ISO 3166-2:NO
|
||||
Oman|OM|OMN|512|ISO 3166-2:OM
|
||||
Pakistan|PK|PAK|586|ISO 3166-2:PK
|
||||
Palau|PW|PLW|585|ISO 3166-2:PW
|
||||
Palestinian Territory, Occupied|PS|PSE|275|ISO 3166-2:PS
|
||||
Panama|PA|PAN|591|ISO 3166-2:PA
|
||||
Papua New Guinea|PG|PNG|598|ISO 3166-2:PG
|
||||
Paraguay|PY|PRY|600|ISO 3166-2:PY
|
||||
Peru|PE|PER|604|ISO 3166-2:PE
|
||||
Philippines|PH|PHL|608|ISO 3166-2:PH
|
||||
Pitcairn|PN|PCN|612|ISO 3166-2:PN
|
||||
Poland|PL|POL|616|ISO 3166-2:PL
|
||||
Portugal|PT|PRT|620|ISO 3166-2:PT
|
||||
Puerto Rico|PR|PRI|630|ISO 3166-2:PR
|
||||
Qatar|QA|QAT|634|ISO 3166-2:QA
|
||||
Réunion|RE|REU|638|ISO 3166-2:RE
|
||||
Romania|RO|ROU|642|ISO 3166-2:RO
|
||||
Russian Federation|RU|RUS|643|ISO 3166-2:RU
|
||||
Rwanda|RW|RWA|646|ISO 3166-2:RW
|
||||
Saint Barthélemy|BL|BLM|652|ISO 3166-2:BL
|
||||
Saint Helena, Ascension and Tristan da Cunha|SH|SHN|654|ISO 3166-2:SH
|
||||
Saint Kitts and Nevis|KN|KNA|659|ISO 3166-2:KN
|
||||
Saint Lucia|LC|LCA|662|ISO 3166-2:LC
|
||||
Saint Martin (French part|MF|MAF|663|ISO 3166-2:MF
|
||||
Saint Pierre and Miquelon|PM|SPM|666|ISO 3166-2:PM
|
||||
Saint Vincent and the Grenadines|VC|VCT|670|ISO 3166-2:VC
|
||||
Samoa|WS|WSM|882|ISO 3166-2:WS
|
||||
San Marino|SM|SMR|674|ISO 3166-2:SM
|
||||
Sao Tome and Principe|ST|STP|678|ISO 3166-2:ST
|
||||
Saudi Arabia|SA|SAU|682|ISO 3166-2:SA
|
||||
Senegal|SN|SEN|686|ISO 3166-2:SN
|
||||
Serbia|RS|SRB|688|ISO 3166-2:RS
|
||||
Seychelles|SC|SYC|690|ISO 3166-2:SC
|
||||
Sierra Leone|SL|SLE|694|ISO 3166-2:SL
|
||||
Singapore|SG|SGP|702|ISO 3166-2:SG
|
||||
Sint Maarten (Dutch part|SX|SXM|534|ISO 3166-2:SX
|
||||
Slovakia|SK|SVK|703|ISO 3166-2:SK
|
||||
Slovenia|SI|SVN|705|ISO 3166-2:SI
|
||||
Solomon Islands|SB|SLB|090|ISO 3166-2:SB
|
||||
Somalia|SO|SOM|706|ISO 3166-2:SO
|
||||
South Africa|ZA|ZAF|710|ISO 3166-2:ZA
|
||||
South Georgia and the South Sandwich Islands|GS|SGS|239|ISO 3166-2:GS
|
||||
South Sudan|SS|SSD|728|ISO 3166-2:SS
|
||||
Spain|ES|ESP|724|ISO 3166-2:ES
|
||||
Sri Lanka|LK|LKA|144|ISO 3166-2:LK
|
||||
Sudan|SD|SDN|729|ISO 3166-2:SD
|
||||
Suriname|SR|SUR|740|ISO 3166-2:SR
|
||||
Svalbard and Jan Mayen|SJ|SJM|744|ISO 3166-2:SJ
|
||||
Swaziland|SZ|SWZ|748|ISO 3166-2:SZ
|
||||
Sweden|SE|SWE|752|ISO 3166-2:SE
|
||||
Switzerland|CH|CHE|756|ISO 3166-2:CH
|
||||
Syrian Arab Republic|SY|SYR|760|ISO 3166-2:SY
|
||||
Taiwan, Province of China|TW|TWN|158|ISO 3166-2:TW
|
||||
Tajikistan|TJ|TJK|762|ISO 3166-2:TJ
|
||||
Tanzania, United Republic of|TZ|TZA|834|ISO 3166-2:TZ
|
||||
Thailand|TH|THA|764|ISO 3166-2:TH
|
||||
Timor-Leste|TL|TLS|626|ISO 3166-2:TL
|
||||
Togo|TG|TGO|768|ISO 3166-2:TG
|
||||
Tokelau|TK|TKL|772|ISO 3166-2:TK
|
||||
Tonga|TO|TON|776|ISO 3166-2:TO
|
||||
Trinidad and Tobago|TT|TTO|780|ISO 3166-2:TT
|
||||
Tunisia|TN|TUN|788|ISO 3166-2:TN
|
||||
Turkey|TR|TUR|792|ISO 3166-2:TR
|
||||
Turkmenistan|TM|TKM|795|ISO 3166-2:TM
|
||||
Turks and Caicos Islands|TC|TCA|796|ISO 3166-2:TC
|
||||
Tuvalu|TV|TUV|798|ISO 3166-2:TV
|
||||
Uganda|UG|UGA|800|ISO 3166-2:UG
|
||||
Ukraine|UA|UKR|804|ISO 3166-2:UA
|
||||
United Arab Emirates|AE|ARE|784|ISO 3166-2:AE
|
||||
United Kingdom|GB|GBR|826|ISO 3166-2:GB
|
||||
United States|US|USA|840|ISO 3166-2:US
|
||||
United States Minor Outlying Islands|UM|UMI|581|ISO 3166-2:UM
|
||||
Uruguay|UY|URY|858|ISO 3166-2:UY
|
||||
Uzbekistan|UZ|UZB|860|ISO 3166-2:UZ
|
||||
Vanuatu|VU|VUT|548|ISO 3166-2:VU
|
||||
Venezuela, Bolivarian Republic of|VE|VEN|862|ISO 3166-2:VE
|
||||
Viet Nam|VN|VNM|704|ISO 3166-2:VN
|
||||
Virgin Islands, British|VG|VGB|092|ISO 3166-2:VG
|
||||
Virgin Islands, U.S|VI|VIR|850|ISO 3166-2:VI
|
||||
Wallis and Futuna|WF|WLF|876|ISO 3166-2:WF
|
||||
Western Sahara|EH|ESH|732|ISO 3166-2:EH
|
||||
Yemen|YE|YEM|887|ISO 3166-2:YE
|
||||
Zambia|ZM|ZMB|894|ISO 3166-2:ZM
|
||||
Zimbabwe|ZW|ZWE|716|ISO 3166-2:ZW
|
485
libs/guessit/ISO-639-2_utf-8.txt
Normal file
485
libs/guessit/ISO-639-2_utf-8.txt
Normal file
|
@ -0,0 +1,485 @@
|
|||
aar||aa|Afar|afar
|
||||
abk||ab|Abkhazian|abkhaze
|
||||
ace|||Achinese|aceh
|
||||
ach|||Acoli|acoli
|
||||
ada|||Adangme|adangme
|
||||
ady|||Adyghe; Adygei|adyghé
|
||||
afa|||Afro-Asiatic languages|afro-asiatiques, langues
|
||||
afh|||Afrihili|afrihili
|
||||
afr||af|Afrikaans|afrikaans
|
||||
ain|||Ainu|aïnou
|
||||
aka||ak|Akan|akan
|
||||
akk|||Akkadian|akkadien
|
||||
alb|sqi|sq|Albanian|albanais
|
||||
ale|||Aleut|aléoute
|
||||
alg|||Algonquian languages|algonquines, langues
|
||||
alt|||Southern Altai|altai du Sud
|
||||
amh||am|Amharic|amharique
|
||||
ang|||English, Old (ca.450-1100)|anglo-saxon (ca.450-1100)
|
||||
anp|||Angika|angika
|
||||
apa|||Apache languages|apaches, langues
|
||||
ara||ar|Arabic|arabe
|
||||
arc|||Official Aramaic (700-300 BCE); Imperial Aramaic (700-300 BCE)|araméen d'empire (700-300 BCE)
|
||||
arg||an|Aragonese|aragonais
|
||||
arm|hye|hy|Armenian|arménien
|
||||
arn|||Mapudungun; Mapuche|mapudungun; mapuche; mapuce
|
||||
arp|||Arapaho|arapaho
|
||||
art|||Artificial languages|artificielles, langues
|
||||
arw|||Arawak|arawak
|
||||
asm||as|Assamese|assamais
|
||||
ast|||Asturian; Bable; Leonese; Asturleonese|asturien; bable; léonais; asturoléonais
|
||||
ath|||Athapascan languages|athapascanes, langues
|
||||
aus|||Australian languages|australiennes, langues
|
||||
ava||av|Avaric|avar
|
||||
ave||ae|Avestan|avestique
|
||||
awa|||Awadhi|awadhi
|
||||
aym||ay|Aymara|aymara
|
||||
aze||az|Azerbaijani|azéri
|
||||
bad|||Banda languages|banda, langues
|
||||
bai|||Bamileke languages|bamiléké, langues
|
||||
bak||ba|Bashkir|bachkir
|
||||
bal|||Baluchi|baloutchi
|
||||
bam||bm|Bambara|bambara
|
||||
ban|||Balinese|balinais
|
||||
baq|eus|eu|Basque|basque
|
||||
bas|||Basa|basa
|
||||
bat|||Baltic languages|baltes, langues
|
||||
bej|||Beja; Bedawiyet|bedja
|
||||
bel||be|Belarusian|biélorusse
|
||||
bem|||Bemba|bemba
|
||||
ben||bn|Bengali|bengali
|
||||
ber|||Berber languages|berbères, langues
|
||||
bho|||Bhojpuri|bhojpuri
|
||||
bih||bh|Bihari languages|langues biharis
|
||||
bik|||Bikol|bikol
|
||||
bin|||Bini; Edo|bini; edo
|
||||
bis||bi|Bislama|bichlamar
|
||||
bla|||Siksika|blackfoot
|
||||
bnt|||Bantu (Other)|bantoues, autres langues
|
||||
bos||bs|Bosnian|bosniaque
|
||||
bra|||Braj|braj
|
||||
bre||br|Breton|breton
|
||||
btk|||Batak languages|batak, langues
|
||||
bua|||Buriat|bouriate
|
||||
bug|||Buginese|bugi
|
||||
bul||bg|Bulgarian|bulgare
|
||||
bur|mya|my|Burmese|birman
|
||||
byn|||Blin; Bilin|blin; bilen
|
||||
cad|||Caddo|caddo
|
||||
cai|||Central American Indian languages|amérindiennes de L'Amérique centrale, langues
|
||||
car|||Galibi Carib|karib; galibi; carib
|
||||
cat||ca|Catalan; Valencian|catalan; valencien
|
||||
cau|||Caucasian languages|caucasiennes, langues
|
||||
ceb|||Cebuano|cebuano
|
||||
cel|||Celtic languages|celtiques, langues; celtes, langues
|
||||
cha||ch|Chamorro|chamorro
|
||||
chb|||Chibcha|chibcha
|
||||
che||ce|Chechen|tchétchène
|
||||
chg|||Chagatai|djaghataï
|
||||
chi|zho|zh|Chinese|chinois
|
||||
chk|||Chuukese|chuuk
|
||||
chm|||Mari|mari
|
||||
chn|||Chinook jargon|chinook, jargon
|
||||
cho|||Choctaw|choctaw
|
||||
chp|||Chipewyan; Dene Suline|chipewyan
|
||||
chr|||Cherokee|cherokee
|
||||
chu||cu|Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic|slavon d'église; vieux slave; slavon liturgique; vieux bulgare
|
||||
chv||cv|Chuvash|tchouvache
|
||||
chy|||Cheyenne|cheyenne
|
||||
cmc|||Chamic languages|chames, langues
|
||||
cop|||Coptic|copte
|
||||
cor||kw|Cornish|cornique
|
||||
cos||co|Corsican|corse
|
||||
cpe|||Creoles and pidgins, English based|créoles et pidgins basés sur l'anglais
|
||||
cpf|||Creoles and pidgins, French-based |créoles et pidgins basés sur le français
|
||||
cpp|||Creoles and pidgins, Portuguese-based |créoles et pidgins basés sur le portugais
|
||||
cre||cr|Cree|cree
|
||||
crh|||Crimean Tatar; Crimean Turkish|tatar de Crimé
|
||||
crp|||Creoles and pidgins |créoles et pidgins
|
||||
csb|||Kashubian|kachoube
|
||||
cus|||Cushitic languages|couchitiques, langues
|
||||
cze|ces|cs|Czech|tchèque
|
||||
dak|||Dakota|dakota
|
||||
dan||da|Danish|danois
|
||||
dar|||Dargwa|dargwa
|
||||
day|||Land Dayak languages|dayak, langues
|
||||
del|||Delaware|delaware
|
||||
den|||Slave (Athapascan)|esclave (athapascan)
|
||||
dgr|||Dogrib|dogrib
|
||||
din|||Dinka|dinka
|
||||
div||dv|Divehi; Dhivehi; Maldivian|maldivien
|
||||
doi|||Dogri|dogri
|
||||
dra|||Dravidian languages|dravidiennes, langues
|
||||
dsb|||Lower Sorbian|bas-sorabe
|
||||
dua|||Duala|douala
|
||||
dum|||Dutch, Middle (ca.1050-1350)|néerlandais moyen (ca. 1050-1350)
|
||||
dut|nld|nl|Dutch; Flemish|néerlandais; flamand
|
||||
dyu|||Dyula|dioula
|
||||
dzo||dz|Dzongkha|dzongkha
|
||||
efi|||Efik|efik
|
||||
egy|||Egyptian (Ancient)|égyptien
|
||||
eka|||Ekajuk|ekajuk
|
||||
elx|||Elamite|élamite
|
||||
eng||en|English|anglais
|
||||
enm|||English, Middle (1100-1500)|anglais moyen (1100-1500)
|
||||
epo||eo|Esperanto|espéranto
|
||||
est||et|Estonian|estonien
|
||||
ewe||ee|Ewe|éwé
|
||||
ewo|||Ewondo|éwondo
|
||||
fan|||Fang|fang
|
||||
fao||fo|Faroese|féroïen
|
||||
fat|||Fanti|fanti
|
||||
fij||fj|Fijian|fidjien
|
||||
fil|||Filipino; Pilipino|filipino; pilipino
|
||||
fin||fi|Finnish|finnois
|
||||
fiu|||Finno-Ugrian languages|finno-ougriennes, langues
|
||||
fon|||Fon|fon
|
||||
fre|fra|fr|French|français
|
||||
frm|||French, Middle (ca.1400-1600)|français moyen (1400-1600)
|
||||
fro|||French, Old (842-ca.1400)|français ancien (842-ca.1400)
|
||||
frr|||Northern Frisian|frison septentrional
|
||||
frs|||Eastern Frisian|frison oriental
|
||||
fry||fy|Western Frisian|frison occidental
|
||||
ful||ff|Fulah|peul
|
||||
fur|||Friulian|frioulan
|
||||
gaa|||Ga|ga
|
||||
gay|||Gayo|gayo
|
||||
gba|||Gbaya|gbaya
|
||||
gem|||Germanic languages|germaniques, langues
|
||||
geo|kat|ka|Georgian|géorgien
|
||||
ger|deu|de|German|allemand
|
||||
gez|||Geez|guèze
|
||||
gil|||Gilbertese|kiribati
|
||||
gla||gd|Gaelic; Scottish Gaelic|gaélique; gaélique écossais
|
||||
gle||ga|Irish|irlandais
|
||||
glg||gl|Galician|galicien
|
||||
glv||gv|Manx|manx; mannois
|
||||
gmh|||German, Middle High (ca.1050-1500)|allemand, moyen haut (ca. 1050-1500)
|
||||
goh|||German, Old High (ca.750-1050)|allemand, vieux haut (ca. 750-1050)
|
||||
gon|||Gondi|gond
|
||||
gor|||Gorontalo|gorontalo
|
||||
got|||Gothic|gothique
|
||||
grb|||Grebo|grebo
|
||||
grc|||Greek, Ancient (to 1453)|grec ancien (jusqu'à 1453)
|
||||
gre|ell|el|Greek, Modern (1453-)|grec moderne (après 1453)
|
||||
grn||gn|Guarani|guarani
|
||||
gsw|||Swiss German; Alemannic; Alsatian|suisse alémanique; alémanique; alsacien
|
||||
guj||gu|Gujarati|goudjrati
|
||||
gwi|||Gwich'in|gwich'in
|
||||
hai|||Haida|haida
|
||||
hat||ht|Haitian; Haitian Creole|haïtien; créole haïtien
|
||||
hau||ha|Hausa|haoussa
|
||||
haw|||Hawaiian|hawaïen
|
||||
heb||he|Hebrew|hébreu
|
||||
her||hz|Herero|herero
|
||||
hil|||Hiligaynon|hiligaynon
|
||||
him|||Himachali languages; Western Pahari languages|langues himachalis; langues paharis occidentales
|
||||
hin||hi|Hindi|hindi
|
||||
hit|||Hittite|hittite
|
||||
hmn|||Hmong; Mong|hmong
|
||||
hmo||ho|Hiri Motu|hiri motu
|
||||
hrv||hr|Croatian|croate
|
||||
hsb|||Upper Sorbian|haut-sorabe
|
||||
hun||hu|Hungarian|hongrois
|
||||
hup|||Hupa|hupa
|
||||
iba|||Iban|iban
|
||||
ibo||ig|Igbo|igbo
|
||||
ice|isl|is|Icelandic|islandais
|
||||
ido||io|Ido|ido
|
||||
iii||ii|Sichuan Yi; Nuosu|yi de Sichuan
|
||||
ijo|||Ijo languages|ijo, langues
|
||||
iku||iu|Inuktitut|inuktitut
|
||||
ile||ie|Interlingue; Occidental|interlingue
|
||||
ilo|||Iloko|ilocano
|
||||
ina||ia|Interlingua (International Auxiliary Language Association)|interlingua (langue auxiliaire internationale)
|
||||
inc|||Indic languages|indo-aryennes, langues
|
||||
ind||id|Indonesian|indonésien
|
||||
ine|||Indo-European languages|indo-européennes, langues
|
||||
inh|||Ingush|ingouche
|
||||
ipk||ik|Inupiaq|inupiaq
|
||||
ira|||Iranian languages|iraniennes, langues
|
||||
iro|||Iroquoian languages|iroquoises, langues
|
||||
ita||it|Italian|italien
|
||||
jav||jv|Javanese|javanais
|
||||
jbo|||Lojban|lojban
|
||||
jpn||ja|Japanese|japonais
|
||||
jpr|||Judeo-Persian|judéo-persan
|
||||
jrb|||Judeo-Arabic|judéo-arabe
|
||||
kaa|||Kara-Kalpak|karakalpak
|
||||
kab|||Kabyle|kabyle
|
||||
kac|||Kachin; Jingpho|kachin; jingpho
|
||||
kal||kl|Kalaallisut; Greenlandic|groenlandais
|
||||
kam|||Kamba|kamba
|
||||
kan||kn|Kannada|kannada
|
||||
kar|||Karen languages|karen, langues
|
||||
kas||ks|Kashmiri|kashmiri
|
||||
kau||kr|Kanuri|kanouri
|
||||
kaw|||Kawi|kawi
|
||||
kaz||kk|Kazakh|kazakh
|
||||
kbd|||Kabardian|kabardien
|
||||
kha|||Khasi|khasi
|
||||
khi|||Khoisan languages|khoïsan, langues
|
||||
khm||km|Central Khmer|khmer central
|
||||
kho|||Khotanese; Sakan|khotanais; sakan
|
||||
kik||ki|Kikuyu; Gikuyu|kikuyu
|
||||
kin||rw|Kinyarwanda|rwanda
|
||||
kir||ky|Kirghiz; Kyrgyz|kirghiz
|
||||
kmb|||Kimbundu|kimbundu
|
||||
kok|||Konkani|konkani
|
||||
kom||kv|Komi|kom
|
||||
kon||kg|Kongo|kongo
|
||||
kor||ko|Korean|coréen
|
||||
kos|||Kosraean|kosrae
|
||||
kpe|||Kpelle|kpellé
|
||||
krc|||Karachay-Balkar|karatchai balkar
|
||||
krl|||Karelian|carélien
|
||||
kro|||Kru languages|krou, langues
|
||||
kru|||Kurukh|kurukh
|
||||
kua||kj|Kuanyama; Kwanyama|kuanyama; kwanyama
|
||||
kum|||Kumyk|koumyk
|
||||
kur||ku|Kurdish|kurde
|
||||
kut|||Kutenai|kutenai
|
||||
lad|||Ladino|judéo-espagnol
|
||||
lah|||Lahnda|lahnda
|
||||
lam|||Lamba|lamba
|
||||
lao||lo|Lao|lao
|
||||
lat||la|Latin|latin
|
||||
lav||lv|Latvian|letton
|
||||
lez|||Lezghian|lezghien
|
||||
lim||li|Limburgan; Limburger; Limburgish|limbourgeois
|
||||
lin||ln|Lingala|lingala
|
||||
lit||lt|Lithuanian|lituanien
|
||||
lol|||Mongo|mongo
|
||||
loz|||Lozi|lozi
|
||||
ltz||lb|Luxembourgish; Letzeburgesch|luxembourgeois
|
||||
lua|||Luba-Lulua|luba-lulua
|
||||
lub||lu|Luba-Katanga|luba-katanga
|
||||
lug||lg|Ganda|ganda
|
||||
lui|||Luiseno|luiseno
|
||||
lun|||Lunda|lunda
|
||||
luo|||Luo (Kenya and Tanzania)|luo (Kenya et Tanzanie)
|
||||
lus|||Lushai|lushai
|
||||
mac|mkd|mk|Macedonian|macédonien
|
||||
mad|||Madurese|madourais
|
||||
mag|||Magahi|magahi
|
||||
mah||mh|Marshallese|marshall
|
||||
mai|||Maithili|maithili
|
||||
mak|||Makasar|makassar
|
||||
mal||ml|Malayalam|malayalam
|
||||
man|||Mandingo|mandingue
|
||||
mao|mri|mi|Maori|maori
|
||||
map|||Austronesian languages|austronésiennes, langues
|
||||
mar||mr|Marathi|marathe
|
||||
mas|||Masai|massaï
|
||||
may|msa|ms|Malay|malais
|
||||
mdf|||Moksha|moksa
|
||||
mdr|||Mandar|mandar
|
||||
men|||Mende|mendé
|
||||
mga|||Irish, Middle (900-1200)|irlandais moyen (900-1200)
|
||||
mic|||Mi'kmaq; Micmac|mi'kmaq; micmac
|
||||
min|||Minangkabau|minangkabau
|
||||
mis|||Uncoded languages|langues non codées
|
||||
mkh|||Mon-Khmer languages|môn-khmer, langues
|
||||
mlg||mg|Malagasy|malgache
|
||||
mlt||mt|Maltese|maltais
|
||||
mnc|||Manchu|mandchou
|
||||
mni|||Manipuri|manipuri
|
||||
mno|||Manobo languages|manobo, langues
|
||||
moh|||Mohawk|mohawk
|
||||
mon||mn|Mongolian|mongol
|
||||
mos|||Mossi|moré
|
||||
mul|||Multiple languages|multilingue
|
||||
mun|||Munda languages|mounda, langues
|
||||
mus|||Creek|muskogee
|
||||
mwl|||Mirandese|mirandais
|
||||
mwr|||Marwari|marvari
|
||||
myn|||Mayan languages|maya, langues
|
||||
myv|||Erzya|erza
|
||||
nah|||Nahuatl languages|nahuatl, langues
|
||||
nai|||North American Indian languages|nord-amérindiennes, langues
|
||||
nap|||Neapolitan|napolitain
|
||||
nau||na|Nauru|nauruan
|
||||
nav||nv|Navajo; Navaho|navaho
|
||||
nbl||nr|Ndebele, South; South Ndebele|ndébélé du Sud
|
||||
nde||nd|Ndebele, North; North Ndebele|ndébélé du Nord
|
||||
ndo||ng|Ndonga|ndonga
|
||||
nds|||Low German; Low Saxon; German, Low; Saxon, Low|bas allemand; bas saxon; allemand, bas; saxon, bas
|
||||
nep||ne|Nepali|népalais
|
||||
new|||Nepal Bhasa; Newari|nepal bhasa; newari
|
||||
nia|||Nias|nias
|
||||
nic|||Niger-Kordofanian languages|nigéro-kordofaniennes, langues
|
||||
niu|||Niuean|niué
|
||||
nno||nn|Norwegian Nynorsk; Nynorsk, Norwegian|norvégien nynorsk; nynorsk, norvégien
|
||||
nob||nb|Bokmål, Norwegian; Norwegian Bokmål|norvégien bokmål
|
||||
nog|||Nogai|nogaï; nogay
|
||||
non|||Norse, Old|norrois, vieux
|
||||
nor||no|Norwegian|norvégien
|
||||
nqo|||N'Ko|n'ko
|
||||
nso|||Pedi; Sepedi; Northern Sotho|pedi; sepedi; sotho du Nord
|
||||
nub|||Nubian languages|nubiennes, langues
|
||||
nwc|||Classical Newari; Old Newari; Classical Nepal Bhasa|newari classique
|
||||
nya||ny|Chichewa; Chewa; Nyanja|chichewa; chewa; nyanja
|
||||
nym|||Nyamwezi|nyamwezi
|
||||
nyn|||Nyankole|nyankolé
|
||||
nyo|||Nyoro|nyoro
|
||||
nzi|||Nzima|nzema
|
||||
oci||oc|Occitan (post 1500); Provençal|occitan (après 1500); provençal
|
||||
oji||oj|Ojibwa|ojibwa
|
||||
ori||or|Oriya|oriya
|
||||
orm||om|Oromo|galla
|
||||
osa|||Osage|osage
|
||||
oss||os|Ossetian; Ossetic|ossète
|
||||
ota|||Turkish, Ottoman (1500-1928)|turc ottoman (1500-1928)
|
||||
oto|||Otomian languages|otomi, langues
|
||||
paa|||Papuan languages|papoues, langues
|
||||
pag|||Pangasinan|pangasinan
|
||||
pal|||Pahlavi|pahlavi
|
||||
pam|||Pampanga; Kapampangan|pampangan
|
||||
pan||pa|Panjabi; Punjabi|pendjabi
|
||||
pap|||Papiamento|papiamento
|
||||
pau|||Palauan|palau
|
||||
peo|||Persian, Old (ca.600-400 B.C.)|perse, vieux (ca. 600-400 av. J.-C.)
|
||||
per|fas|fa|Persian|persan
|
||||
phi|||Philippine languages|philippines, langues
|
||||
phn|||Phoenician|phénicien
|
||||
pli||pi|Pali|pali
|
||||
pol||pl|Polish|polonais
|
||||
pon|||Pohnpeian|pohnpei
|
||||
por||pt|Portuguese|portugais
|
||||
pra|||Prakrit languages|prâkrit, langues
|
||||
pro|||Provençal, Old (to 1500)|provençal ancien (jusqu'à 1500)
|
||||
pus||ps|Pushto; Pashto|pachto
|
||||
qaa-qtz|||Reserved for local use|réservée à l'usage local
|
||||
que||qu|Quechua|quechua
|
||||
raj|||Rajasthani|rajasthani
|
||||
rap|||Rapanui|rapanui
|
||||
rar|||Rarotongan; Cook Islands Maori|rarotonga; maori des îles Cook
|
||||
roa|||Romance languages|romanes, langues
|
||||
roh||rm|Romansh|romanche
|
||||
rom|||Romany|tsigane
|
||||
rum|ron|ro|Romanian; Moldavian; Moldovan|roumain; moldave
|
||||
run||rn|Rundi|rundi
|
||||
rup|||Aromanian; Arumanian; Macedo-Romanian|aroumain; macédo-roumain
|
||||
rus||ru|Russian|russe
|
||||
sad|||Sandawe|sandawe
|
||||
sag||sg|Sango|sango
|
||||
sah|||Yakut|iakoute
|
||||
sai|||South American Indian (Other)|indiennes d'Amérique du Sud, autres langues
|
||||
sal|||Salishan languages|salishennes, langues
|
||||
sam|||Samaritan Aramaic|samaritain
|
||||
san||sa|Sanskrit|sanskrit
|
||||
sas|||Sasak|sasak
|
||||
sat|||Santali|santal
|
||||
scn|||Sicilian|sicilien
|
||||
sco|||Scots|écossais
|
||||
sel|||Selkup|selkoupe
|
||||
sem|||Semitic languages|sémitiques, langues
|
||||
sga|||Irish, Old (to 900)|irlandais ancien (jusqu'à 900)
|
||||
sgn|||Sign Languages|langues des signes
|
||||
shn|||Shan|chan
|
||||
sid|||Sidamo|sidamo
|
||||
sin||si|Sinhala; Sinhalese|singhalais
|
||||
sio|||Siouan languages|sioux, langues
|
||||
sit|||Sino-Tibetan languages|sino-tibétaines, langues
|
||||
sla|||Slavic languages|slaves, langues
|
||||
slo|slk|sk|Slovak|slovaque
|
||||
slv||sl|Slovenian|slovène
|
||||
sma|||Southern Sami|sami du Sud
|
||||
sme||se|Northern Sami|sami du Nord
|
||||
smi|||Sami languages|sames, langues
|
||||
smj|||Lule Sami|sami de Lule
|
||||
smn|||Inari Sami|sami d'Inari
|
||||
smo||sm|Samoan|samoan
|
||||
sms|||Skolt Sami|sami skolt
|
||||
sna||sn|Shona|shona
|
||||
snd||sd|Sindhi|sindhi
|
||||
snk|||Soninke|soninké
|
||||
sog|||Sogdian|sogdien
|
||||
som||so|Somali|somali
|
||||
son|||Songhai languages|songhai, langues
|
||||
sot||st|Sotho, Southern|sotho du Sud
|
||||
spa||es|Spanish; Castilian|espagnol; castillan
|
||||
srd||sc|Sardinian|sarde
|
||||
srn|||Sranan Tongo|sranan tongo
|
||||
srp||sr|Serbian|serbe
|
||||
srr|||Serer|sérère
|
||||
ssa|||Nilo-Saharan languages|nilo-sahariennes, langues
|
||||
ssw||ss|Swati|swati
|
||||
suk|||Sukuma|sukuma
|
||||
sun||su|Sundanese|soundanais
|
||||
sus|||Susu|soussou
|
||||
sux|||Sumerian|sumérien
|
||||
swa||sw|Swahili|swahili
|
||||
swe||sv|Swedish|suédois
|
||||
syc|||Classical Syriac|syriaque classique
|
||||
syr|||Syriac|syriaque
|
||||
tah||ty|Tahitian|tahitien
|
||||
tai|||Tai languages|tai, langues
|
||||
tam||ta|Tamil|tamoul
|
||||
tat||tt|Tatar|tatar
|
||||
tel||te|Telugu|télougou
|
||||
tem|||Timne|temne
|
||||
ter|||Tereno|tereno
|
||||
tet|||Tetum|tetum
|
||||
tgk||tg|Tajik|tadjik
|
||||
tgl||tl|Tagalog|tagalog
|
||||
tha||th|Thai|thaï
|
||||
tib|bod|bo|Tibetan|tibétain
|
||||
tig|||Tigre|tigré
|
||||
tir||ti|Tigrinya|tigrigna
|
||||
tiv|||Tiv|tiv
|
||||
tkl|||Tokelau|tokelau
|
||||
tlh|||Klingon; tlhIngan-Hol|klingon
|
||||
tli|||Tlingit|tlingit
|
||||
tmh|||Tamashek|tamacheq
|
||||
tog|||Tonga (Nyasa)|tonga (Nyasa)
|
||||
ton||to|Tonga (Tonga Islands)|tongan (Îles Tonga)
|
||||
tpi|||Tok Pisin|tok pisin
|
||||
tsi|||Tsimshian|tsimshian
|
||||
tsn||tn|Tswana|tswana
|
||||
tso||ts|Tsonga|tsonga
|
||||
tuk||tk|Turkmen|turkmène
|
||||
tum|||Tumbuka|tumbuka
|
||||
tup|||Tupi languages|tupi, langues
|
||||
tur||tr|Turkish|turc
|
||||
tut|||Altaic languages|altaïques, langues
|
||||
tvl|||Tuvalu|tuvalu
|
||||
twi||tw|Twi|twi
|
||||
tyv|||Tuvinian|touva
|
||||
udm|||Udmurt|oudmourte
|
||||
uga|||Ugaritic|ougaritique
|
||||
uig||ug|Uighur; Uyghur|ouïgour
|
||||
ukr||uk|Ukrainian|ukrainien
|
||||
umb|||Umbundu|umbundu
|
||||
und|||Undetermined|indéterminée
|
||||
urd||ur|Urdu|ourdou
|
||||
uzb||uz|Uzbek|ouszbek
|
||||
vai|||Vai|vaï
|
||||
ven||ve|Venda|venda
|
||||
vie||vi|Vietnamese|vietnamien
|
||||
vol||vo|Volapük|volapük
|
||||
vot|||Votic|vote
|
||||
wak|||Wakashan languages|wakashanes, langues
|
||||
wal|||Walamo|walamo
|
||||
war|||Waray|waray
|
||||
was|||Washo|washo
|
||||
wel|cym|cy|Welsh|gallois
|
||||
wen|||Sorbian languages|sorabes, langues
|
||||
wln||wa|Walloon|wallon
|
||||
wol||wo|Wolof|wolof
|
||||
xal|||Kalmyk; Oirat|kalmouk; oïrat
|
||||
xho||xh|Xhosa|xhosa
|
||||
yao|||Yao|yao
|
||||
yap|||Yapese|yapois
|
||||
yid||yi|Yiddish|yiddish
|
||||
yor||yo|Yoruba|yoruba
|
||||
ypk|||Yupik languages|yupik, langues
|
||||
zap|||Zapotec|zapotèque
|
||||
zbl|||Blissymbols; Blissymbolics; Bliss|symboles Bliss; Bliss
|
||||
zen|||Zenaga|zenaga
|
||||
zha||za|Zhuang; Chuang|zhuang; chuang
|
||||
znd|||Zande languages|zandé, langues
|
||||
zul||zu|Zulu|zoulou
|
||||
zun|||Zuni|zuni
|
||||
zxx|||No linguistic content; Not applicable|pas de contenu linguistique; non applicable
|
||||
zza|||Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki|zaza; dimili; dimli; kirdki; kirmanjki; zazaki
|
289
libs/guessit/__init__.py
Normal file
289
libs/guessit/__init__.py
Normal file
|
@ -0,0 +1,289 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '0.6.2'
|
||||
__all__ = ['Guess', 'Language',
|
||||
'guess_file_info', 'guess_video_info',
|
||||
'guess_movie_info', 'guess_episode_info']
|
||||
|
||||
|
||||
# Do python3 detection before importing any other module, to be sure that
|
||||
# it will then always be available
|
||||
# with code from http://lucumr.pocoo.org/2011/1/22/forwards-compatible-python/
|
||||
import sys
|
||||
if sys.version_info[0] >= 3:
|
||||
PY3 = True
|
||||
unicode_text_type = str
|
||||
native_text_type = str
|
||||
base_text_type = str
|
||||
def u(x):
|
||||
return str(x)
|
||||
def s(x):
|
||||
return x
|
||||
class UnicodeMixin(object):
|
||||
__str__ = lambda x: x.__unicode__()
|
||||
import binascii
|
||||
def to_hex(x):
|
||||
return binascii.hexlify(x).decode('utf-8')
|
||||
|
||||
else:
|
||||
PY3 = False
|
||||
__all__ = [ str(s) for s in __all__ ] # fix imports for python2
|
||||
unicode_text_type = unicode
|
||||
native_text_type = str
|
||||
base_text_type = basestring
|
||||
def u(x):
|
||||
if isinstance(x, str):
|
||||
return x.decode('utf-8')
|
||||
return unicode(x)
|
||||
def s(x):
|
||||
if isinstance(x, unicode):
|
||||
return x.encode('utf-8')
|
||||
if isinstance(x, list):
|
||||
return [ s(y) for y in x ]
|
||||
if isinstance(x, tuple):
|
||||
return tuple(s(y) for y in x)
|
||||
if isinstance(x, dict):
|
||||
return dict((s(key), s(value)) for key, value in x.items())
|
||||
return x
|
||||
class UnicodeMixin(object):
|
||||
__str__ = lambda x: unicode(x).encode('utf-8')
|
||||
def to_hex(x):
|
||||
return x.encode('hex')
|
||||
|
||||
|
||||
from guessit.guess import Guess, merge_all
|
||||
from guessit.language import Language
|
||||
from guessit.matcher import IterativeMatcher
|
||||
from guessit.textutils import clean_string
|
||||
import logging
|
||||
import json
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
class NullHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
pass
|
||||
|
||||
# let's be a nicely behaving library
|
||||
h = NullHandler()
|
||||
log.addHandler(h)
|
||||
|
||||
|
||||
def _guess_filename(filename, filetype):
|
||||
def find_nodes(tree, props):
|
||||
"""Yields all nodes containing any of the given props."""
|
||||
if isinstance(props, base_text_type):
|
||||
props = [props]
|
||||
for node in tree.nodes():
|
||||
if any(prop in node.guess for prop in props):
|
||||
yield node
|
||||
|
||||
def warning(title):
|
||||
log.warning('%s, guesses: %s - %s' % (title, m.nice_string(), m2.nice_string()))
|
||||
return m
|
||||
|
||||
mtree = IterativeMatcher(filename, filetype=filetype)
|
||||
|
||||
m = mtree.matched()
|
||||
|
||||
second_pass_opts = []
|
||||
second_pass_transfo_opts = {}
|
||||
|
||||
# if there are multiple possible years found, we assume the first one is
|
||||
# part of the title, reparse the tree taking this into account
|
||||
years = set(n.value for n in find_nodes(mtree.match_tree, 'year'))
|
||||
if len(years) >= 2:
|
||||
second_pass_opts.append('skip_first_year')
|
||||
|
||||
to_skip_language_nodes = []
|
||||
|
||||
title_nodes = set(n for n in find_nodes(mtree.match_tree, ['title', 'series']))
|
||||
title_spans = {}
|
||||
for title_node in title_nodes:
|
||||
title_spans[title_node.span[0]] = title_node
|
||||
title_spans[title_node.span[1]] = title_node
|
||||
|
||||
for lang_key in ('language', 'subtitleLanguage'):
|
||||
langs = {}
|
||||
lang_nodes = set(n for n in find_nodes(mtree.match_tree, lang_key))
|
||||
|
||||
for lang_node in lang_nodes:
|
||||
lang = lang_node.guess.get(lang_key, None)
|
||||
if len(lang_node.value) > 3 and (lang_node.span[0] in title_spans.keys() or lang_node.span[1] in title_spans.keys()):
|
||||
# Language is next or before title, and is not a language code. Add to skip for 2nd pass.
|
||||
|
||||
# if filetype is subtitle and the language appears last, just before
|
||||
# the extension, then it is likely a subtitle language
|
||||
parts = clean_string(lang_node.root.value).split()
|
||||
if m['type'] in ['moviesubtitle', 'episodesubtitle'] and (parts.index(lang_node.value) == len(parts) - 2):
|
||||
continue
|
||||
|
||||
to_skip_language_nodes.append(lang_node)
|
||||
elif not lang in langs:
|
||||
langs[lang] = lang_node
|
||||
else:
|
||||
# The same language was found. Keep the more confident one, and add others to skip for 2nd pass.
|
||||
existing_lang_node = langs[lang]
|
||||
to_skip = None
|
||||
if existing_lang_node.guess.confidence('language') >= lang_node.guess.confidence('language'):
|
||||
# lang_node is to remove
|
||||
to_skip = lang_node
|
||||
else:
|
||||
# existing_lang_node is to remove
|
||||
langs[lang] = lang_node
|
||||
to_skip = existing_lang_node
|
||||
to_skip_language_nodes.append(to_skip)
|
||||
|
||||
|
||||
if to_skip_language_nodes:
|
||||
second_pass_transfo_opts['guess_language'] = (
|
||||
((), { 'skip': [ { 'node_idx': node.parent.node_idx,
|
||||
'span': node.span }
|
||||
for node in to_skip_language_nodes ] }))
|
||||
|
||||
if second_pass_opts or second_pass_transfo_opts:
|
||||
# 2nd pass is needed
|
||||
log.info("Running 2nd pass with options: %s" % second_pass_opts)
|
||||
log.info("Transfo options: %s" % second_pass_transfo_opts)
|
||||
mtree = IterativeMatcher(filename, filetype=filetype,
|
||||
opts=second_pass_opts,
|
||||
transfo_opts=second_pass_transfo_opts)
|
||||
|
||||
m = mtree.matched()
|
||||
|
||||
if 'language' not in m and 'subtitleLanguage' not in m or 'title' not in m:
|
||||
return m
|
||||
|
||||
# if we found some language, make sure we didn't cut a title or sth...
|
||||
mtree2 = IterativeMatcher(filename, filetype=filetype,
|
||||
opts=['nolanguage', 'nocountry'])
|
||||
m2 = mtree2.matched()
|
||||
|
||||
if m.get('title') != m2.get('title'):
|
||||
title = next(find_nodes(mtree.match_tree, 'title'))
|
||||
title2 = next(find_nodes(mtree2.match_tree, 'title'))
|
||||
|
||||
# if a node is in an explicit group, then the correct title is probably
|
||||
# the other one
|
||||
if title.root.node_at(title.node_idx[:2]).is_explicit():
|
||||
return m2
|
||||
elif title2.root.node_at(title2.node_idx[:2]).is_explicit():
|
||||
return m
|
||||
|
||||
return m
|
||||
|
||||
|
||||
def guess_file_info(filename, filetype, info=None):
|
||||
"""info can contain the names of the various plugins, such as 'filename' to
|
||||
detect filename info, or 'hash_md5' to get the md5 hash of the file.
|
||||
|
||||
>>> guess_file_info('tests/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1'])
|
||||
{'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'}
|
||||
"""
|
||||
result = []
|
||||
hashers = []
|
||||
|
||||
# Force unicode as soon as possible
|
||||
filename = u(filename)
|
||||
|
||||
if info is None:
|
||||
info = ['filename']
|
||||
|
||||
if isinstance(info, base_text_type):
|
||||
info = [info]
|
||||
|
||||
for infotype in info:
|
||||
if infotype == 'filename':
|
||||
result.append(_guess_filename(filename, filetype))
|
||||
|
||||
elif infotype == 'hash_mpc':
|
||||
from guessit.hash_mpc import hash_file
|
||||
try:
|
||||
result.append(Guess({'hash_mpc': hash_file(filename)},
|
||||
confidence=1.0))
|
||||
except Exception as e:
|
||||
log.warning('Could not compute MPC-style hash because: %s' % e)
|
||||
|
||||
elif infotype == 'hash_ed2k':
|
||||
from guessit.hash_ed2k import hash_file
|
||||
try:
|
||||
result.append(Guess({'hash_ed2k': hash_file(filename)},
|
||||
confidence=1.0))
|
||||
except Exception as e:
|
||||
log.warning('Could not compute ed2k hash because: %s' % e)
|
||||
|
||||
elif infotype.startswith('hash_'):
|
||||
import hashlib
|
||||
hashname = infotype[5:]
|
||||
try:
|
||||
hasher = getattr(hashlib, hashname)()
|
||||
hashers.append((infotype, hasher))
|
||||
except AttributeError:
|
||||
log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname)
|
||||
|
||||
else:
|
||||
log.warning('Invalid infotype: %s' % infotype)
|
||||
|
||||
# do all the hashes now, but on a single pass
|
||||
if hashers:
|
||||
try:
|
||||
blocksize = 8192
|
||||
hasherobjs = dict(hashers).values()
|
||||
|
||||
with open(filename, 'rb') as f:
|
||||
chunk = f.read(blocksize)
|
||||
while chunk:
|
||||
for hasher in hasherobjs:
|
||||
hasher.update(chunk)
|
||||
chunk = f.read(blocksize)
|
||||
|
||||
for infotype, hasher in hashers:
|
||||
result.append(Guess({infotype: hasher.hexdigest()},
|
||||
confidence=1.0))
|
||||
except Exception as e:
|
||||
log.warning('Could not compute hash because: %s' % e)
|
||||
|
||||
result = merge_all(result)
|
||||
|
||||
# last minute adjustments
|
||||
|
||||
# if country is in the guessed properties, make it part of the filename
|
||||
if 'series' in result and 'country' in result:
|
||||
result['series'] += ' (%s)' % result['country'].alpha2.upper()
|
||||
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def guess_video_info(filename, info=None):
|
||||
return guess_file_info(filename, 'autodetect', info)
|
||||
|
||||
|
||||
def guess_movie_info(filename, info=None):
|
||||
return guess_file_info(filename, 'movie', info)
|
||||
|
||||
|
||||
def guess_episode_info(filename, info=None):
|
||||
return guess_file_info(filename, 'episode', info)
|
126
libs/guessit/__main__.py
Normal file
126
libs/guessit/__main__.py
Normal file
|
@ -0,0 +1,126 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import print_function
|
||||
from guessit import u
|
||||
from guessit import slogging, guess_file_info
|
||||
from optparse import OptionParser
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
import locale
|
||||
|
||||
|
||||
def detect_filename(filename, filetype, info=['filename'], advanced = False):
|
||||
filename = u(filename)
|
||||
|
||||
print('For:', filename)
|
||||
print('GuessIt found:', guess_file_info(filename, filetype, info).nice_string(advanced))
|
||||
|
||||
|
||||
def run_demo(episodes=True, movies=True, advanced=False):
|
||||
# NOTE: tests should not be added here but rather in the tests/ folder
|
||||
# this is just intended as a quick example
|
||||
if episodes:
|
||||
testeps = [ 'Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi',
|
||||
'Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi',
|
||||
'Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.[tvu.org.ru].avi',
|
||||
'Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi',
|
||||
'Series/Duckman/Duckman - S1E13 Joking The Chicken (unedited).avi',
|
||||
'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg',
|
||||
'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi',
|
||||
'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi',
|
||||
'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi'
|
||||
]
|
||||
|
||||
for f in testeps:
|
||||
print('-'*80)
|
||||
detect_filename(f, filetype='episode', advanced=advanced)
|
||||
|
||||
|
||||
if movies:
|
||||
testmovies = [ 'Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv',
|
||||
'Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi',
|
||||
'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi',
|
||||
'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv',
|
||||
'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv',
|
||||
'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi', # FIXME: PROPER and R5 get overwritten
|
||||
'[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv', # FIXME: title gets overwritten
|
||||
'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi',
|
||||
'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt',
|
||||
'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv',
|
||||
'Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv',
|
||||
'Movies/Pirates of the Caribbean: The Curse of the Black Pearl (2003)/Pirates.Of.The.Carribean.DC.2003.iNT.DVDRip.XviD.AC3-NDRT.CD1.avi',
|
||||
'Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi',
|
||||
'Movies/The NeverEnding Story (1984)/The.NeverEnding.Story.1.1984.DVDRip.AC3.Xvid-Monteque.avi',
|
||||
'Movies/Juno (2007)/Juno KLAXXON.avi',
|
||||
'Movies/Chat noir, chat blanc (1998)/Chat noir, Chat blanc - Emir Kusturica (VO - VF - sub FR - Chapters).mkv',
|
||||
'Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.srt',
|
||||
'Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi',
|
||||
'testsmewt_bugs/movies/Baraka_Edition_Collector.avi'
|
||||
]
|
||||
|
||||
for f in testmovies:
|
||||
print('-'*80)
|
||||
detect_filename(f, filetype = 'movie', advanced = advanced)
|
||||
|
||||
|
||||
def main():
|
||||
slogging.setupLogging()
|
||||
|
||||
# see http://bugs.python.org/issue2128
|
||||
if sys.version_info.major < 3 and os.name == 'nt':
|
||||
for i, a in enumerate(sys.argv):
|
||||
sys.argv[i] = a.decode(locale.getpreferredencoding())
|
||||
|
||||
parser = OptionParser(usage = 'usage: %prog [options] file1 [file2...]')
|
||||
parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
|
||||
help = 'display debug output')
|
||||
parser.add_option('-i', '--info', dest = 'info', default = 'filename',
|
||||
help = 'the desired information type: filename, hash_mpc or a hash from python\'s '
|
||||
'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of '
|
||||
'them, comma-separated')
|
||||
parser.add_option('-t', '--type', dest = 'filetype', default = 'autodetect',
|
||||
help = 'the suggested file type: movie, episode or autodetect')
|
||||
parser.add_option('-a', '--advanced', dest = 'advanced', action='store_true', default = False,
|
||||
help = 'display advanced information for filename guesses, as json output')
|
||||
parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False,
|
||||
help = 'run a few builtin tests instead of analyzing a file')
|
||||
|
||||
options, args = parser.parse_args()
|
||||
if options.verbose:
|
||||
logging.getLogger('guessit').setLevel(logging.DEBUG)
|
||||
|
||||
if options.demo:
|
||||
run_demo(episodes=True, movies=True, advanced=options.advanced)
|
||||
else:
|
||||
if args:
|
||||
for filename in args:
|
||||
detect_filename(filename,
|
||||
filetype = options.filetype,
|
||||
info = options.info.split(','),
|
||||
advanced = options.advanced)
|
||||
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
112
libs/guessit/country.py
Normal file
112
libs/guessit/country.py
Normal file
|
@ -0,0 +1,112 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import UnicodeMixin, base_text_type, u
|
||||
from guessit.fileutils import load_file_in_same_dir
|
||||
import logging
|
||||
|
||||
__all__ = [ 'Country' ]
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# parsed from http://en.wikipedia.org/wiki/ISO_3166-1
|
||||
#
|
||||
# Description of the fields:
|
||||
# "An English name, an alpha-2 code (when given),
|
||||
# an alpha-3 code (when given), a numeric code, and an ISO 31666-2 code
|
||||
# are all separated by pipe (|) characters."
|
||||
_iso3166_contents = load_file_in_same_dir(__file__, 'ISO-3166-1_utf8.txt')
|
||||
|
||||
country_matrix = [ l.strip().split('|')
|
||||
for l in _iso3166_contents.strip().split('\n') ]
|
||||
|
||||
country_matrix += [ [ 'Unknown', 'un', 'unk', '', '' ],
|
||||
[ 'Latin America', '', 'lat', '', '' ]
|
||||
]
|
||||
|
||||
country_to_alpha3 = dict((c[0].lower(), c[2].lower()) for c in country_matrix)
|
||||
country_to_alpha3.update(dict((c[1].lower(), c[2].lower()) for c in country_matrix))
|
||||
country_to_alpha3.update(dict((c[2].lower(), c[2].lower()) for c in country_matrix))
|
||||
|
||||
# add here exceptions / non ISO representations
|
||||
# Note: remember to put those exceptions in lower-case, they won't work otherwise
|
||||
country_to_alpha3.update({ 'latinoamérica': 'lat',
|
||||
'brazilian': 'bra',
|
||||
'españa': 'esp',
|
||||
'uk': 'gbr'
|
||||
})
|
||||
|
||||
country_alpha3_to_en_name = dict((c[2].lower(), c[0]) for c in country_matrix)
|
||||
country_alpha3_to_alpha2 = dict((c[2].lower(), c[1].lower()) for c in country_matrix)
|
||||
|
||||
|
||||
|
||||
class Country(UnicodeMixin):
|
||||
"""This class represents a country.
|
||||
|
||||
You can initialize it with pretty much anything, as it knows conversion
|
||||
from ISO-3166 2-letter and 3-letter codes, and an English name.
|
||||
"""
|
||||
|
||||
def __init__(self, country, strict=False):
|
||||
country = u(country.strip().lower())
|
||||
self.alpha3 = country_to_alpha3.get(country)
|
||||
|
||||
if self.alpha3 is None and strict:
|
||||
msg = 'The given string "%s" could not be identified as a country'
|
||||
raise ValueError(msg % country)
|
||||
|
||||
if self.alpha3 is None:
|
||||
self.alpha3 = 'unk'
|
||||
|
||||
|
||||
@property
|
||||
def alpha2(self):
|
||||
return country_alpha3_to_alpha2[self.alpha3]
|
||||
|
||||
@property
|
||||
def english_name(self):
|
||||
return country_alpha3_to_en_name[self.alpha3]
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.alpha3)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, Country):
|
||||
return self.alpha3 == other.alpha3
|
||||
|
||||
if isinstance(other, base_text_type):
|
||||
try:
|
||||
return self == Country(other)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __unicode__(self):
|
||||
return self.english_name
|
||||
|
||||
def __repr__(self):
|
||||
return 'Country(%s)' % self.english_name
|
133
libs/guessit/date.py
Normal file
133
libs/guessit/date.py
Normal file
|
@ -0,0 +1,133 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import datetime
|
||||
import re
|
||||
|
||||
def valid_year(year):
|
||||
return 1920 < year < datetime.date.today().year + 5
|
||||
|
||||
def search_year(string):
|
||||
"""Looks for year patterns, and if found return the year and group span.
|
||||
Assumes there are sentinels at the beginning and end of the string that
|
||||
always allow matching a non-digit delimiting the date.
|
||||
|
||||
Note this only looks for valid production years, that is between 1920
|
||||
and now + 5 years, so for instance 2000 would be returned as a valid
|
||||
year but 1492 would not.
|
||||
|
||||
>>> search_year('in the year 2000...')
|
||||
(2000, (12, 16))
|
||||
|
||||
>>> search_year('they arrived in 1492.')
|
||||
(None, None)
|
||||
"""
|
||||
match = re.search(r'[^0-9]([0-9]{4})[^0-9]', string)
|
||||
if match:
|
||||
year = int(match.group(1))
|
||||
if valid_year(year):
|
||||
return (year, match.span(1))
|
||||
|
||||
return (None, None)
|
||||
|
||||
|
||||
def search_date(string):
|
||||
"""Looks for date patterns, and if found return the date and group span.
|
||||
Assumes there are sentinels at the beginning and end of the string that
|
||||
always allow matching a non-digit delimiting the date.
|
||||
|
||||
>>> search_date('This happened on 2002-04-22.')
|
||||
(datetime.date(2002, 4, 22), (17, 27))
|
||||
|
||||
>>> search_date('And this on 17-06-1998.')
|
||||
(datetime.date(1998, 6, 17), (12, 22))
|
||||
|
||||
>>> search_date('no date in here')
|
||||
(None, None)
|
||||
"""
|
||||
|
||||
dsep = r'[-/ \.]'
|
||||
|
||||
date_rexps = [
|
||||
# 20010823
|
||||
r'[^0-9]' +
|
||||
r'(?P<year>[0-9]{4})' +
|
||||
r'(?P<month>[0-9]{2})' +
|
||||
r'(?P<day>[0-9]{2})' +
|
||||
r'[^0-9]',
|
||||
|
||||
# 2001-08-23
|
||||
r'[^0-9]' +
|
||||
r'(?P<year>[0-9]{4})' + dsep +
|
||||
r'(?P<month>[0-9]{2})' + dsep +
|
||||
r'(?P<day>[0-9]{2})' +
|
||||
r'[^0-9]',
|
||||
|
||||
# 23-08-2001
|
||||
r'[^0-9]' +
|
||||
r'(?P<day>[0-9]{2})' + dsep +
|
||||
r'(?P<month>[0-9]{2})' + dsep +
|
||||
r'(?P<year>[0-9]{4})' +
|
||||
r'[^0-9]',
|
||||
|
||||
# 23-08-01
|
||||
r'[^0-9]' +
|
||||
r'(?P<day>[0-9]{2})' + dsep +
|
||||
r'(?P<month>[0-9]{2})' + dsep +
|
||||
r'(?P<year>[0-9]{2})' +
|
||||
r'[^0-9]',
|
||||
]
|
||||
|
||||
for drexp in date_rexps:
|
||||
match = re.search(drexp, string)
|
||||
if match:
|
||||
d = match.groupdict()
|
||||
year, month, day = int(d['year']), int(d['month']), int(d['day'])
|
||||
# years specified as 2 digits should be adjusted here
|
||||
if year < 100:
|
||||
if year > (datetime.date.today().year % 100) + 5:
|
||||
year = 1900 + year
|
||||
else:
|
||||
year = 2000 + year
|
||||
|
||||
date = None
|
||||
try:
|
||||
date = datetime.date(year, month, day)
|
||||
except ValueError:
|
||||
try:
|
||||
date = datetime.date(year, day, month)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if date is None:
|
||||
continue
|
||||
|
||||
# check date plausibility
|
||||
if not 1900 < date.year < datetime.date.today().year + 5:
|
||||
continue
|
||||
|
||||
# looks like we have a valid date
|
||||
# note: span is [+1,-1] because we don't want to include the
|
||||
# non-digit char
|
||||
start, end = match.span()
|
||||
return (date, (start + 1, end - 1))
|
||||
|
||||
return None, None
|
90
libs/guessit/fileutils.py
Normal file
90
libs/guessit/fileutils.py
Normal file
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import s, u
|
||||
import os.path
|
||||
import zipfile
|
||||
import io
|
||||
|
||||
|
||||
def split_path(path):
|
||||
r"""Splits the given path into the list of folders and the filename (or the
|
||||
last folder if you gave it a folder path.
|
||||
|
||||
If the given path was an absolute path, the first element will always be:
|
||||
- the '/' root folder on Unix systems
|
||||
- the drive letter on Windows systems (eg: r'C:\')
|
||||
- the mount point '\\' on Windows systems (eg: r'\\host\share')
|
||||
|
||||
>>> s(split_path('/usr/bin/smewt'))
|
||||
['/', 'usr', 'bin', 'smewt']
|
||||
|
||||
>>> s(split_path('relative_path/to/my_folder/'))
|
||||
['relative_path', 'to', 'my_folder']
|
||||
|
||||
"""
|
||||
result = []
|
||||
while True:
|
||||
head, tail = os.path.split(path)
|
||||
headlen = len(head)
|
||||
|
||||
# on Unix systems, the root folder is '/'
|
||||
if head and head == '/'*headlen and tail == '':
|
||||
return ['/'] + result
|
||||
|
||||
# on Windows, the root folder is a drive letter (eg: 'C:\') or for shares \\
|
||||
if ((headlen == 3 and head[1:] == ':\\') or (headlen == 2 and head == '\\\\')) and tail == '':
|
||||
return [head] + result
|
||||
|
||||
if head == '' and tail == '':
|
||||
return result
|
||||
|
||||
# we just split a directory ending with '/', so tail is empty
|
||||
if not tail:
|
||||
path = head
|
||||
continue
|
||||
|
||||
# otherwise, add the last path fragment and keep splitting
|
||||
result = [tail] + result
|
||||
path = head
|
||||
|
||||
|
||||
def file_in_same_dir(ref_file, desired_file):
|
||||
"""Return the path for a file in the same dir as a given reference file.
|
||||
|
||||
>>> s(file_in_same_dir('~/smewt/smewt.db', 'smewt.settings'))
|
||||
'~/smewt/smewt.settings'
|
||||
|
||||
"""
|
||||
return os.path.join(*(split_path(ref_file)[:-1] + [desired_file]))
|
||||
|
||||
|
||||
def load_file_in_same_dir(ref_file, filename):
|
||||
"""Load a given file. Works even when the file is contained inside a zip."""
|
||||
path = split_path(ref_file)[:-1] + [filename]
|
||||
|
||||
for i, p in enumerate(path):
|
||||
if p.endswith('.zip'):
|
||||
zfilename = os.path.join(*path[:i + 1])
|
||||
zfile = zipfile.ZipFile(zfilename)
|
||||
return zfile.read('/'.join(path[i + 1:]))
|
||||
|
||||
return u(io.open(os.path.join(*path), encoding='utf-8').read())
|
345
libs/guessit/guess.py
Normal file
345
libs/guessit/guess.py
Normal file
|
@ -0,0 +1,345 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import UnicodeMixin, s, u, base_text_type
|
||||
from guessit.language import Language
|
||||
from guessit.country import Country
|
||||
import json
|
||||
import datetime
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Guess(UnicodeMixin, dict):
|
||||
"""A Guess is a dictionary which has an associated confidence for each of
|
||||
its values.
|
||||
|
||||
As it is a subclass of dict, you can use it everywhere you expect a
|
||||
simple dict."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
try:
|
||||
confidence = kwargs.pop('confidence')
|
||||
except KeyError:
|
||||
confidence = 0
|
||||
|
||||
try:
|
||||
raw = kwargs.pop('raw')
|
||||
except KeyError:
|
||||
raw = None
|
||||
|
||||
dict.__init__(self, *args, **kwargs)
|
||||
|
||||
self._confidence = {}
|
||||
self._raw = {}
|
||||
for prop in self:
|
||||
self._confidence[prop] = confidence
|
||||
self._raw[prop] = raw
|
||||
|
||||
def to_dict(self, advanced=False):
|
||||
data = dict(self)
|
||||
for prop, value in data.items():
|
||||
if isinstance(value, datetime.date):
|
||||
data[prop] = value.isoformat()
|
||||
elif isinstance(value, (Language, Country, base_text_type)):
|
||||
data[prop] = u(value)
|
||||
elif isinstance(value, list):
|
||||
data[prop] = [u(x) for x in value]
|
||||
if advanced:
|
||||
data[prop] = {"value": data[prop], "raw": self.raw(prop), "confidence": self.confidence(prop)}
|
||||
|
||||
return data
|
||||
|
||||
def nice_string(self, advanced=False):
|
||||
if advanced:
|
||||
data = self.to_dict(advanced)
|
||||
return json.dumps(data, indent=4)
|
||||
else:
|
||||
data = self.to_dict()
|
||||
|
||||
parts = json.dumps(data, indent=4).split('\n')
|
||||
for i, p in enumerate(parts):
|
||||
if p[:5] != ' "':
|
||||
continue
|
||||
|
||||
prop = p.split('"')[1]
|
||||
parts[i] = (' [%.2f] "' % self.confidence(prop)) + p[5:]
|
||||
|
||||
return '\n'.join(parts)
|
||||
|
||||
def __unicode__(self):
|
||||
return u(self.to_dict())
|
||||
|
||||
def confidence(self, prop):
|
||||
return self._confidence.get(prop, -1)
|
||||
|
||||
def raw(self, prop):
|
||||
return self._raw.get(prop, None)
|
||||
|
||||
def set(self, prop, value, confidence=None, raw=None):
|
||||
self[prop] = value
|
||||
if confidence is not None:
|
||||
self._confidence[prop] = confidence
|
||||
if raw is not None:
|
||||
self._raw[prop] = raw
|
||||
|
||||
def set_confidence(self, prop, value):
|
||||
self._confidence[prop] = value
|
||||
|
||||
def set_raw(self, prop, value):
|
||||
self._raw[prop] = value
|
||||
|
||||
def update(self, other, confidence=None, raw=None):
|
||||
dict.update(self, other)
|
||||
if isinstance(other, Guess):
|
||||
for prop in other:
|
||||
self._confidence[prop] = other.confidence(prop)
|
||||
self._raw[prop] = other.raw(prop)
|
||||
|
||||
if confidence is not None:
|
||||
for prop in other:
|
||||
self._confidence[prop] = confidence
|
||||
|
||||
if raw is not None:
|
||||
for prop in other:
|
||||
self._raw[prop] = raw
|
||||
|
||||
def update_highest_confidence(self, other):
|
||||
"""Update this guess with the values from the given one. In case
|
||||
there is property present in both, only the one with the highest one
|
||||
is kept."""
|
||||
if not isinstance(other, Guess):
|
||||
raise ValueError('Can only call this function on Guess instances')
|
||||
|
||||
for prop in other:
|
||||
if prop in self and self.confidence(prop) >= other.confidence(prop):
|
||||
continue
|
||||
self[prop] = other[prop]
|
||||
self._confidence[prop] = other.confidence(prop)
|
||||
self._raw[prop] = other.raw(prop)
|
||||
|
||||
|
||||
def choose_int(g1, g2):
|
||||
"""Function used by merge_similar_guesses to choose between 2 possible
|
||||
properties when they are integers."""
|
||||
v1, c1 = g1 # value, confidence
|
||||
v2, c2 = g2
|
||||
if (v1 == v2):
|
||||
return (v1, 1 - (1 - c1) * (1 - c2))
|
||||
else:
|
||||
if c1 > c2:
|
||||
return (v1, c1 - c2)
|
||||
else:
|
||||
return (v2, c2 - c1)
|
||||
|
||||
|
||||
def choose_string(g1, g2):
|
||||
"""Function used by merge_similar_guesses to choose between 2 possible
|
||||
properties when they are strings.
|
||||
|
||||
If the 2 strings are similar, or one is contained in the other, the latter is returned
|
||||
with an increased confidence.
|
||||
|
||||
If the 2 strings are dissimilar, the one with the higher confidence is returned, with
|
||||
a weaker confidence.
|
||||
|
||||
Note that here, 'similar' means that 2 strings are either equal, or that they
|
||||
differ very little, such as one string being the other one with the 'the' word
|
||||
prepended to it.
|
||||
|
||||
>>> s(choose_string(('Hello', 0.75), ('World', 0.5)))
|
||||
('Hello', 0.25)
|
||||
|
||||
>>> s(choose_string(('Hello', 0.5), ('hello', 0.5)))
|
||||
('Hello', 0.75)
|
||||
|
||||
>>> s(choose_string(('Hello', 0.4), ('Hello World', 0.4)))
|
||||
('Hello', 0.64)
|
||||
|
||||
>>> s(choose_string(('simpsons', 0.5), ('The Simpsons', 0.5)))
|
||||
('The Simpsons', 0.75)
|
||||
|
||||
"""
|
||||
v1, c1 = g1 # value, confidence
|
||||
v2, c2 = g2
|
||||
|
||||
if not v1:
|
||||
return g2
|
||||
elif not v2:
|
||||
return g1
|
||||
|
||||
v1, v2 = v1.strip(), v2.strip()
|
||||
v1l, v2l = v1.lower(), v2.lower()
|
||||
|
||||
combined_prob = 1 - (1 - c1) * (1 - c2)
|
||||
|
||||
if v1l == v2l:
|
||||
return (v1, combined_prob)
|
||||
|
||||
# check for common patterns
|
||||
elif v1l == 'the ' + v2l:
|
||||
return (v1, combined_prob)
|
||||
elif v2l == 'the ' + v1l:
|
||||
return (v2, combined_prob)
|
||||
|
||||
# if one string is contained in the other, return the shortest one
|
||||
elif v2l in v1l:
|
||||
return (v2, combined_prob)
|
||||
elif v1l in v2l:
|
||||
return (v1, combined_prob)
|
||||
|
||||
# in case of conflict, return the one with highest confidence
|
||||
else:
|
||||
if c1 > c2:
|
||||
return (v1, c1 - c2)
|
||||
else:
|
||||
return (v2, c2 - c1)
|
||||
|
||||
|
||||
def _merge_similar_guesses_nocheck(guesses, prop, choose):
|
||||
"""Take a list of guesses and merge those which have the same properties,
|
||||
increasing or decreasing the confidence depending on whether their values
|
||||
are similar.
|
||||
|
||||
This function assumes there are at least 2 valid guesses."""
|
||||
|
||||
similar = [guess for guess in guesses if prop in guess]
|
||||
|
||||
g1, g2 = similar[0], similar[1]
|
||||
|
||||
other_props = set(g1) & set(g2) - set([prop])
|
||||
if other_props:
|
||||
log.debug('guess 1: %s' % g1)
|
||||
log.debug('guess 2: %s' % g2)
|
||||
for prop in other_props:
|
||||
if g1[prop] != g2[prop]:
|
||||
log.warning('both guesses to be merged have more than one '
|
||||
'different property in common, bailing out...')
|
||||
return
|
||||
|
||||
# merge all props of s2 into s1, updating the confidence for the
|
||||
# considered property
|
||||
v1, v2 = g1[prop], g2[prop]
|
||||
c1, c2 = g1.confidence(prop), g2.confidence(prop)
|
||||
|
||||
new_value, new_confidence = choose((v1, c1), (v2, c2))
|
||||
if new_confidence >= c1:
|
||||
msg = "Updating matching property '%s' with confidence %.2f"
|
||||
else:
|
||||
msg = "Updating non-matching property '%s' with confidence %.2f"
|
||||
log.debug(msg % (prop, new_confidence))
|
||||
|
||||
g2[prop] = new_value
|
||||
g2.set_confidence(prop, new_confidence)
|
||||
|
||||
g1.update(g2)
|
||||
guesses.remove(g2)
|
||||
|
||||
|
||||
def merge_similar_guesses(guesses, prop, choose):
|
||||
"""Take a list of guesses and merge those which have the same properties,
|
||||
increasing or decreasing the confidence depending on whether their values
|
||||
are similar."""
|
||||
|
||||
similar = [guess for guess in guesses if prop in guess]
|
||||
if len(similar) < 2:
|
||||
# nothing to merge
|
||||
return
|
||||
|
||||
if len(similar) == 2:
|
||||
_merge_similar_guesses_nocheck(guesses, prop, choose)
|
||||
|
||||
if len(similar) > 2:
|
||||
log.debug('complex merge, trying our best...')
|
||||
before = len(guesses)
|
||||
_merge_similar_guesses_nocheck(guesses, prop, choose)
|
||||
after = len(guesses)
|
||||
if after < before:
|
||||
# recurse only when the previous call actually did something,
|
||||
# otherwise we end up in an infinite loop
|
||||
merge_similar_guesses(guesses, prop, choose)
|
||||
|
||||
|
||||
def merge_all(guesses, append=None):
|
||||
"""Merge all the guesses in a single result, remove very unlikely values,
|
||||
and return it.
|
||||
You can specify a list of properties that should be appended into a list
|
||||
instead of being merged.
|
||||
|
||||
>>> s(merge_all([ Guess({'season': 2}, confidence=0.6),
|
||||
... Guess({'episodeNumber': 13}, confidence=0.8) ]))
|
||||
{'season': 2, 'episodeNumber': 13}
|
||||
|
||||
>>> s(merge_all([ Guess({'episodeNumber': 27}, confidence=0.02),
|
||||
... Guess({'season': 1}, confidence=0.2) ]))
|
||||
{'season': 1}
|
||||
|
||||
>>> s(merge_all([ Guess({'other': 'PROPER'}, confidence=0.8),
|
||||
... Guess({'releaseGroup': '2HD'}, confidence=0.8) ],
|
||||
... append=['other']))
|
||||
{'releaseGroup': '2HD', 'other': ['PROPER']}
|
||||
|
||||
|
||||
"""
|
||||
if not guesses:
|
||||
return Guess()
|
||||
|
||||
result = guesses[0]
|
||||
if append is None:
|
||||
append = []
|
||||
|
||||
for g in guesses[1:]:
|
||||
# first append our appendable properties
|
||||
for prop in append:
|
||||
if prop in g:
|
||||
result.set(prop, result.get(prop, []) + [g[prop]],
|
||||
# TODO: what to do with confidence here? maybe an
|
||||
# arithmetic mean...
|
||||
confidence=g.confidence(prop),
|
||||
raw=g.raw(prop))
|
||||
|
||||
del g[prop]
|
||||
|
||||
# then merge the remaining ones
|
||||
dups = set(result) & set(g)
|
||||
if dups:
|
||||
log.warning('duplicate properties %s in merged result...' % [ (result[p], g[p]) for p in dups] )
|
||||
|
||||
result.update_highest_confidence(g)
|
||||
|
||||
# delete very unlikely values
|
||||
for p in list(result.keys()):
|
||||
if result.confidence(p) < 0.05:
|
||||
del result[p]
|
||||
|
||||
# make sure our appendable properties contain unique values
|
||||
for prop in append:
|
||||
try:
|
||||
value = result[prop]
|
||||
if isinstance(value, list):
|
||||
result[prop] = list(set(value))
|
||||
else:
|
||||
result[prop] = [ value ]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
return result
|
65
libs/guessit/hash_ed2k.py
Normal file
65
libs/guessit/hash_ed2k.py
Normal file
|
@ -0,0 +1,65 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import s, to_hex
|
||||
import hashlib
|
||||
import os.path
|
||||
|
||||
|
||||
def hash_file(filename):
|
||||
"""Returns the ed2k hash of a given file.
|
||||
|
||||
>>> s(hash_file('tests/dummy.srt'))
|
||||
'ed2k://|file|dummy.srt|44|1CA0B9DED3473B926AA93A0A546138BB|/'
|
||||
"""
|
||||
return 'ed2k://|file|%s|%d|%s|/' % (os.path.basename(filename),
|
||||
os.path.getsize(filename),
|
||||
hash_filehash(filename).upper())
|
||||
|
||||
|
||||
def hash_filehash(filename):
|
||||
"""Returns the ed2k hash of a given file.
|
||||
|
||||
This function is taken from:
|
||||
http://www.radicand.org/blog/orz/2010/2/21/edonkey2000-hash-in-python/
|
||||
"""
|
||||
md4 = hashlib.new('md4').copy
|
||||
|
||||
def gen(f):
|
||||
while True:
|
||||
x = f.read(9728000)
|
||||
if x:
|
||||
yield x
|
||||
else:
|
||||
return
|
||||
|
||||
def md4_hash(data):
|
||||
m = md4()
|
||||
m.update(data)
|
||||
return m
|
||||
|
||||
with open(filename, 'rb') as f:
|
||||
a = gen(f)
|
||||
hashes = [md4_hash(data).digest() for data in a]
|
||||
if len(hashes) == 1:
|
||||
return to_hex(hashes[0])
|
||||
else:
|
||||
return md4_hash(reduce(lambda a, d: a + d, hashes, "")).hexd
|
57
libs/guessit/hash_mpc.py
Normal file
57
libs/guessit/hash_mpc.py
Normal file
|
@ -0,0 +1,57 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import struct
|
||||
import os
|
||||
|
||||
|
||||
def hash_file(filename):
|
||||
"""This function is taken from:
|
||||
http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
|
||||
and is licensed under the GPL."""
|
||||
|
||||
longlongformat = 'q' # long long
|
||||
bytesize = struct.calcsize(longlongformat)
|
||||
|
||||
f = open(filename, "rb")
|
||||
|
||||
filesize = os.path.getsize(filename)
|
||||
hash_value = filesize
|
||||
|
||||
if filesize < 65536 * 2:
|
||||
raise Exception("SizeError: size is %d, should be > 132K..." % filesize)
|
||||
|
||||
for x in range(65536 / bytesize):
|
||||
buf = f.read(bytesize)
|
||||
(l_value,) = struct.unpack(longlongformat, buf)
|
||||
hash_value += l_value
|
||||
hash_value = hash_value & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
|
||||
|
||||
f.seek(max(0, filesize - 65536), 0)
|
||||
for x in range(65536 / bytesize):
|
||||
buf = f.read(bytesize)
|
||||
(l_value,) = struct.unpack(longlongformat, buf)
|
||||
hash_value += l_value
|
||||
hash_value = hash_value & 0xFFFFFFFFFFFFFFFF
|
||||
|
||||
f.close()
|
||||
|
||||
return "%016x" % hash_value
|
400
libs/guessit/language.py
Normal file
400
libs/guessit/language.py
Normal file
|
@ -0,0 +1,400 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import UnicodeMixin, base_text_type, u, s
|
||||
from guessit.fileutils import load_file_in_same_dir
|
||||
from guessit.textutils import find_words
|
||||
from guessit.country import Country
|
||||
import re
|
||||
import logging
|
||||
|
||||
__all__ = [ 'is_iso_language', 'is_language', 'lang_set', 'Language',
|
||||
'ALL_LANGUAGES', 'ALL_LANGUAGES_NAMES', 'UNDETERMINED',
|
||||
'search_language', 'guess_language' ]
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# downloaded from http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
|
||||
#
|
||||
# Description of the fields:
|
||||
# "An alpha-3 (bibliographic) code, an alpha-3 (terminologic) code (when given),
|
||||
# an alpha-2 code (when given), an English name, and a French name of a language
|
||||
# are all separated by pipe (|) characters."
|
||||
_iso639_contents = load_file_in_same_dir(__file__, 'ISO-639-2_utf-8.txt')
|
||||
|
||||
# drop the BOM from the beginning of the file
|
||||
_iso639_contents = _iso639_contents[1:]
|
||||
|
||||
language_matrix = [ l.strip().split('|')
|
||||
for l in _iso639_contents.strip().split('\n') ]
|
||||
|
||||
|
||||
# update information in the language matrix
|
||||
language_matrix += [['mol', '', 'mo', 'Moldavian', 'moldave'],
|
||||
['ass', '', '', 'Assyrian', 'assyrien']]
|
||||
|
||||
for lang in language_matrix:
|
||||
# remove unused languages that shadow other common ones with a non-official form
|
||||
if (lang[2] == 'se' or # Northern Sami shadows Swedish
|
||||
lang[2] == 'br'): # Breton shadows Brazilian
|
||||
lang[2] = ''
|
||||
# add missing information
|
||||
if lang[0] == 'und':
|
||||
lang[2] = 'un'
|
||||
if lang[0] == 'srp':
|
||||
lang[1] = 'scc' # from OpenSubtitles
|
||||
|
||||
|
||||
lng3 = frozenset(l[0] for l in language_matrix if l[0])
|
||||
lng3term = frozenset(l[1] for l in language_matrix if l[1])
|
||||
lng2 = frozenset(l[2] for l in language_matrix if l[2])
|
||||
lng_en_name = frozenset(lng for l in language_matrix
|
||||
for lng in l[3].lower().split('; ') if lng)
|
||||
lng_fr_name = frozenset(lng for l in language_matrix
|
||||
for lng in l[4].lower().split('; ') if lng)
|
||||
lng_all_names = lng3 | lng3term | lng2 | lng_en_name | lng_fr_name
|
||||
|
||||
lng3_to_lng3term = dict((l[0], l[1]) for l in language_matrix if l[1])
|
||||
lng3term_to_lng3 = dict((l[1], l[0]) for l in language_matrix if l[1])
|
||||
|
||||
lng3_to_lng2 = dict((l[0], l[2]) for l in language_matrix if l[2])
|
||||
lng2_to_lng3 = dict((l[2], l[0]) for l in language_matrix if l[2])
|
||||
|
||||
# we only return the first given english name, hoping it is the most used one
|
||||
lng3_to_lng_en_name = dict((l[0], l[3].split('; ')[0])
|
||||
for l in language_matrix if l[3])
|
||||
lng_en_name_to_lng3 = dict((en_name.lower(), l[0])
|
||||
for l in language_matrix if l[3]
|
||||
for en_name in l[3].split('; '))
|
||||
|
||||
# we only return the first given french name, hoping it is the most used one
|
||||
lng3_to_lng_fr_name = dict((l[0], l[4].split('; ')[0])
|
||||
for l in language_matrix if l[4])
|
||||
lng_fr_name_to_lng3 = dict((fr_name.lower(), l[0])
|
||||
for l in language_matrix if l[4]
|
||||
for fr_name in l[4].split('; '))
|
||||
|
||||
# contains a list of exceptions: strings that should be parsed as a language
|
||||
# but which are not in an ISO form
|
||||
lng_exceptions = { 'unknown': ('und', None),
|
||||
'inconnu': ('und', None),
|
||||
'unk': ('und', None),
|
||||
'un': ('und', None),
|
||||
'gr': ('gre', None),
|
||||
'greek': ('gre', None),
|
||||
'esp': ('spa', None),
|
||||
'español': ('spa', None),
|
||||
'se': ('swe', None),
|
||||
'po': ('pt', 'br'),
|
||||
'pb': ('pt', 'br'),
|
||||
'pob': ('pt', 'br'),
|
||||
'br': ('pt', 'br'),
|
||||
'brazilian': ('pt', 'br'),
|
||||
'català': ('cat', None),
|
||||
'cz': ('cze', None),
|
||||
'ua': ('ukr', None),
|
||||
'cn': ('chi', None),
|
||||
'chs': ('chi', None),
|
||||
'jp': ('jpn', None),
|
||||
'scr': ('hrv', None)
|
||||
}
|
||||
|
||||
|
||||
def is_iso_language(language):
|
||||
return language.lower() in lng_all_names
|
||||
|
||||
def is_language(language):
|
||||
return is_iso_language(language) or language in lng_exceptions
|
||||
|
||||
def lang_set(languages, strict=False):
|
||||
"""Return a set of guessit.Language created from their given string
|
||||
representation.
|
||||
|
||||
if strict is True, then this will raise an exception if any language
|
||||
could not be identified.
|
||||
"""
|
||||
return set(Language(l, strict=strict) for l in languages)
|
||||
|
||||
|
||||
class Language(UnicodeMixin):
|
||||
"""This class represents a human language.
|
||||
|
||||
You can initialize it with pretty much anything, as it knows conversion
|
||||
from ISO-639 2-letter and 3-letter codes, English and French names.
|
||||
|
||||
You can also distinguish languages for specific countries, such as
|
||||
Portuguese and Brazilian Portuguese.
|
||||
|
||||
There are various properties on the language object that give you the
|
||||
representation of the language for a specific usage, such as .alpha3
|
||||
to get the ISO 3-letter code, or .opensubtitles to get the OpenSubtitles
|
||||
language code.
|
||||
|
||||
>>> Language('fr')
|
||||
Language(French)
|
||||
|
||||
>>> s(Language('eng').french_name)
|
||||
'anglais'
|
||||
|
||||
>>> s(Language('pt(br)').country.english_name)
|
||||
'Brazil'
|
||||
|
||||
>>> s(Language('Español (Latinoamérica)').country.english_name)
|
||||
'Latin America'
|
||||
|
||||
>>> Language('Spanish (Latin America)') == Language('Español (Latinoamérica)')
|
||||
True
|
||||
|
||||
>>> s(Language('zz', strict=False).english_name)
|
||||
'Undetermined'
|
||||
|
||||
>>> s(Language('pt(br)').opensubtitles)
|
||||
'pob'
|
||||
"""
|
||||
|
||||
_with_country_regexp = re.compile('(.*)\((.*)\)')
|
||||
_with_country_regexp2 = re.compile('(.*)-(.*)')
|
||||
|
||||
def __init__(self, language, country=None, strict=False, scheme=None):
|
||||
language = u(language.strip().lower())
|
||||
with_country = (Language._with_country_regexp.match(language) or
|
||||
Language._with_country_regexp2.match(language))
|
||||
if with_country:
|
||||
self.lang = Language(with_country.group(1)).lang
|
||||
self.country = Country(with_country.group(2))
|
||||
return
|
||||
|
||||
self.lang = None
|
||||
self.country = Country(country) if country else None
|
||||
|
||||
# first look for scheme specific languages
|
||||
if scheme == 'opensubtitles':
|
||||
if language == 'br':
|
||||
self.lang = 'bre'
|
||||
return
|
||||
elif language == 'se':
|
||||
self.lang = 'sme'
|
||||
return
|
||||
elif scheme is not None:
|
||||
log.warning('Unrecognized scheme: "%s" - Proceeding with standard one' % scheme)
|
||||
|
||||
# look for ISO language codes
|
||||
if len(language) == 2:
|
||||
self.lang = lng2_to_lng3.get(language)
|
||||
elif len(language) == 3:
|
||||
self.lang = (language
|
||||
if language in lng3
|
||||
else lng3term_to_lng3.get(language))
|
||||
else:
|
||||
self.lang = (lng_en_name_to_lng3.get(language) or
|
||||
lng_fr_name_to_lng3.get(language))
|
||||
|
||||
# general language exceptions
|
||||
if self.lang is None and language in lng_exceptions:
|
||||
lang, country = lng_exceptions[language]
|
||||
self.lang = Language(lang).alpha3
|
||||
self.country = Country(country) if country else None
|
||||
|
||||
msg = 'The given string "%s" could not be identified as a language' % language
|
||||
|
||||
if self.lang is None and strict:
|
||||
raise ValueError(msg)
|
||||
|
||||
if self.lang is None:
|
||||
log.debug(msg)
|
||||
self.lang = 'und'
|
||||
|
||||
@property
|
||||
def alpha2(self):
|
||||
return lng3_to_lng2[self.lang]
|
||||
|
||||
@property
|
||||
def alpha3(self):
|
||||
return self.lang
|
||||
|
||||
@property
|
||||
def alpha3term(self):
|
||||
return lng3_to_lng3term[self.lang]
|
||||
|
||||
@property
|
||||
def english_name(self):
|
||||
return lng3_to_lng_en_name[self.lang]
|
||||
|
||||
@property
|
||||
def french_name(self):
|
||||
return lng3_to_lng_fr_name[self.lang]
|
||||
|
||||
@property
|
||||
def opensubtitles(self):
|
||||
if self.lang == 'por' and self.country and self.country.alpha2 == 'br':
|
||||
return 'pob'
|
||||
elif self.lang in ['gre', 'srp']:
|
||||
return self.alpha3term
|
||||
return self.alpha3
|
||||
|
||||
@property
|
||||
def tmdb(self):
|
||||
if self.country:
|
||||
return '%s-%s' % (self.alpha2, self.country.alpha2.upper())
|
||||
return self.alpha2
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.lang)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, Language):
|
||||
return self.lang == other.lang
|
||||
|
||||
if isinstance(other, base_text_type):
|
||||
try:
|
||||
return self == Language(other)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __nonzero__(self):
|
||||
return self.lang != 'und'
|
||||
|
||||
def __unicode__(self):
|
||||
if self.country:
|
||||
return '%s(%s)' % (self.english_name, self.country.alpha2)
|
||||
else:
|
||||
return self.english_name
|
||||
|
||||
def __repr__(self):
|
||||
if self.country:
|
||||
return 'Language(%s, country=%s)' % (self.english_name, self.country)
|
||||
else:
|
||||
return 'Language(%s)' % self.english_name
|
||||
|
||||
|
||||
UNDETERMINED = Language('und')
|
||||
ALL_LANGUAGES = frozenset(Language(lng) for lng in lng_all_names) - frozenset([UNDETERMINED])
|
||||
ALL_LANGUAGES_NAMES = lng_all_names
|
||||
|
||||
def search_language(string, lang_filter=None, skip=None):
|
||||
"""Looks for language patterns, and if found return the language object,
|
||||
its group span and an associated confidence.
|
||||
|
||||
you can specify a list of allowed languages using the lang_filter argument,
|
||||
as in lang_filter = [ 'fr', 'eng', 'spanish' ]
|
||||
|
||||
>>> search_language('movie [en].avi')
|
||||
(Language(English), (7, 9), 0.8)
|
||||
|
||||
>>> search_language('the zen fat cat and the gay mad men got a new fan', lang_filter = ['en', 'fr', 'es'])
|
||||
(None, None, None)
|
||||
"""
|
||||
|
||||
# list of common words which could be interpreted as languages, but which
|
||||
# are far too common to be able to say they represent a language in the
|
||||
# middle of a string (where they most likely carry their commmon meaning)
|
||||
lng_common_words = frozenset([
|
||||
# english words
|
||||
'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
|
||||
'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
|
||||
'fry', 'cop', 'zen', 'gay', 'fat', 'cherokee', 'got', 'an', 'as',
|
||||
'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi',
|
||||
# french words
|
||||
'bas', 'de', 'le', 'son', 'vo', 'vf', 'ne', 'ca', 'ce', 'et', 'que',
|
||||
'mal', 'est', 'vol', 'or', 'mon', 'se',
|
||||
# spanish words
|
||||
'la', 'el', 'del', 'por', 'mar',
|
||||
# other
|
||||
'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
|
||||
'vi', 'ben', 'da', 'lt'
|
||||
])
|
||||
sep = r'[](){} \._-+'
|
||||
|
||||
if lang_filter:
|
||||
lang_filter = lang_set(lang_filter)
|
||||
|
||||
slow = ' %s ' % string.lower()
|
||||
confidence = 1.0 # for all of them
|
||||
|
||||
for lang in set(find_words(slow)) & lng_all_names:
|
||||
|
||||
if lang in lng_common_words:
|
||||
continue
|
||||
|
||||
pos = slow.find(lang)
|
||||
|
||||
if pos != -1:
|
||||
end = pos + len(lang)
|
||||
|
||||
# skip if span in in skip list
|
||||
while skip and (pos - 1, end - 1) in skip:
|
||||
pos = slow.find(lang, end)
|
||||
if pos == -1:
|
||||
continue
|
||||
end = pos + len(lang)
|
||||
if pos == -1:
|
||||
continue
|
||||
|
||||
# make sure our word is always surrounded by separators
|
||||
if slow[pos - 1] not in sep or slow[end] not in sep:
|
||||
continue
|
||||
|
||||
language = Language(slow[pos:end])
|
||||
if lang_filter and language not in lang_filter:
|
||||
continue
|
||||
|
||||
# only allow those languages that have a 2-letter code, those that
|
||||
# don't are too esoteric and probably false matches
|
||||
if language.lang not in lng3_to_lng2:
|
||||
continue
|
||||
|
||||
# confidence depends on lng2, lng3, english name, ...
|
||||
if len(lang) == 2:
|
||||
confidence = 0.8
|
||||
elif len(lang) == 3:
|
||||
confidence = 0.9
|
||||
else:
|
||||
# Note: we could either be really confident that we found a
|
||||
# language or assume that full language names are too
|
||||
# common words and lower their confidence accordingly
|
||||
confidence = 0.3 # going with the low-confidence route here
|
||||
|
||||
return language, (pos - 1, end - 1), confidence
|
||||
|
||||
return None, None, None
|
||||
|
||||
|
||||
def guess_language(text):
|
||||
"""Guess the language in which a body of text is written.
|
||||
|
||||
This uses the external guess-language python module, and will fail and return
|
||||
Language(Undetermined) if it is not installed.
|
||||
"""
|
||||
try:
|
||||
from guess_language import guessLanguage
|
||||
return Language(guessLanguage(text))
|
||||
|
||||
except ImportError:
|
||||
log.error('Cannot detect the language of the given text body, missing dependency: guess-language')
|
||||
log.error('Please install it from PyPI, by doing eg: pip install guess-language')
|
||||
return UNDETERMINED
|
180
libs/guessit/matcher.py
Normal file
180
libs/guessit/matcher.py
Normal file
|
@ -0,0 +1,180 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import PY3, u, base_text_type
|
||||
from guessit.matchtree import MatchTree
|
||||
from guessit.textutils import normalize_unicode, clean_string
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IterativeMatcher(object):
|
||||
def __init__(self, filename, filetype='autodetect', opts=None, transfo_opts=None):
|
||||
"""An iterative matcher tries to match different patterns that appear
|
||||
in the filename.
|
||||
|
||||
The 'filetype' argument indicates which type of file you want to match.
|
||||
If it is 'autodetect', the matcher will try to see whether it can guess
|
||||
that the file corresponds to an episode, or otherwise will assume it is
|
||||
a movie.
|
||||
|
||||
The recognized 'filetype' values are:
|
||||
[ autodetect, subtitle, info, movie, moviesubtitle, movieinfo, episode,
|
||||
episodesubtitle, episodeinfo ]
|
||||
|
||||
|
||||
The IterativeMatcher works mainly in 2 steps:
|
||||
|
||||
First, it splits the filename into a match_tree, which is a tree of groups
|
||||
which have a semantic meaning, such as episode number, movie title,
|
||||
etc...
|
||||
|
||||
The match_tree created looks like the following:
|
||||
|
||||
0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111
|
||||
0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000
|
||||
0000000000000000000000000000000001111112011112222333333401123334000011233340000000 000
|
||||
__________________(The.Prestige).______.[____.HP.______.{__-___}.St{__-___}.Chaps].___
|
||||
xxxxxttttttttttttt ffffff vvvv xxxxxx ll lll xx xxx ccc
|
||||
[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv
|
||||
|
||||
The first 3 lines indicates the group index in which a char in the
|
||||
filename is located. So for instance, x264 is the group (0, 4, 1), and
|
||||
it corresponds to a video codec, denoted by the letter'v' in the 4th line.
|
||||
(for more info, see guess.matchtree.to_string)
|
||||
|
||||
Second, it tries to merge all this information into a single object
|
||||
containing all the found properties, and does some (basic) conflict
|
||||
resolution when they arise.
|
||||
|
||||
|
||||
When you create the Matcher, you can pass it:
|
||||
- a list 'opts' of option names, that act as global flags
|
||||
- a dict 'transfo_opts' of { transfo_name: (transfo_args, transfo_kwargs) }
|
||||
with which to call the transfo.process() function.
|
||||
"""
|
||||
|
||||
valid_filetypes = ('autodetect', 'subtitle', 'info', 'video',
|
||||
'movie', 'moviesubtitle', 'movieinfo',
|
||||
'episode', 'episodesubtitle', 'episodeinfo')
|
||||
if filetype not in valid_filetypes:
|
||||
raise ValueError("filetype needs to be one of %s" % valid_filetypes)
|
||||
if not PY3 and not isinstance(filename, unicode):
|
||||
log.warning('Given filename to matcher is not unicode...')
|
||||
filename = filename.decode('utf-8')
|
||||
|
||||
filename = normalize_unicode(filename)
|
||||
|
||||
if opts is None:
|
||||
opts = []
|
||||
if not isinstance(opts, list):
|
||||
raise ValueError('opts must be a list of option names! Received: type=%s val=%s',
|
||||
type(opts), opts)
|
||||
|
||||
if transfo_opts is None:
|
||||
transfo_opts = {}
|
||||
if not isinstance(transfo_opts, dict):
|
||||
raise ValueError('transfo_opts must be a dict of { transfo_name: (args, kwargs) }. '+
|
||||
'Received: type=%s val=%s', type(transfo_opts), transfo_opts)
|
||||
|
||||
self.match_tree = MatchTree(filename)
|
||||
|
||||
# sanity check: make sure we don't process a (mostly) empty string
|
||||
if clean_string(filename) == '':
|
||||
return
|
||||
|
||||
mtree = self.match_tree
|
||||
mtree.guess.set('type', filetype, confidence=1.0)
|
||||
|
||||
def apply_transfo(transfo_name, *args, **kwargs):
|
||||
transfo = __import__('guessit.transfo.' + transfo_name,
|
||||
globals=globals(), locals=locals(),
|
||||
fromlist=['process'], level=0)
|
||||
default_args, default_kwargs = transfo_opts.get(transfo_name, ((), {}))
|
||||
all_args = args or default_args
|
||||
all_kwargs = dict(default_kwargs)
|
||||
all_kwargs.update(kwargs) # keep all kwargs merged together
|
||||
transfo.process(mtree, *all_args, **all_kwargs)
|
||||
|
||||
# 1- first split our path into dirs + basename + ext
|
||||
apply_transfo('split_path_components')
|
||||
|
||||
# 2- guess the file type now (will be useful later)
|
||||
apply_transfo('guess_filetype', filetype)
|
||||
if mtree.guess['type'] == 'unknown':
|
||||
return
|
||||
|
||||
# 3- split each of those into explicit groups (separated by parentheses
|
||||
# or square brackets)
|
||||
apply_transfo('split_explicit_groups')
|
||||
|
||||
# 4- try to match information for specific patterns
|
||||
# NOTE: order needs to comply to the following:
|
||||
# - website before language (eg: tvu.org.ru vs russian)
|
||||
# - language before episodes_rexps
|
||||
# - properties before language (eg: he-aac vs hebrew)
|
||||
# - release_group before properties (eg: XviD-?? vs xvid)
|
||||
if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'):
|
||||
strategy = [ 'guess_date', 'guess_website', 'guess_release_group',
|
||||
'guess_properties', 'guess_language',
|
||||
'guess_video_rexps',
|
||||
'guess_episodes_rexps', 'guess_weak_episodes_rexps' ]
|
||||
else:
|
||||
strategy = [ 'guess_date', 'guess_website', 'guess_release_group',
|
||||
'guess_properties', 'guess_language',
|
||||
'guess_video_rexps' ]
|
||||
|
||||
if 'nolanguage' in opts:
|
||||
strategy.remove('guess_language')
|
||||
|
||||
|
||||
for name in strategy:
|
||||
apply_transfo(name)
|
||||
|
||||
# more guessers for both movies and episodes
|
||||
apply_transfo('guess_bonus_features')
|
||||
apply_transfo('guess_year', skip_first_year=('skip_first_year' in opts))
|
||||
|
||||
if 'nocountry' not in opts:
|
||||
apply_transfo('guess_country')
|
||||
|
||||
apply_transfo('guess_idnumber')
|
||||
|
||||
|
||||
# split into '-' separated subgroups (with required separator chars
|
||||
# around the dash)
|
||||
apply_transfo('split_on_dash')
|
||||
|
||||
# 5- try to identify the remaining unknown groups by looking at their
|
||||
# position relative to other known elements
|
||||
if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'):
|
||||
apply_transfo('guess_episode_info_from_position')
|
||||
else:
|
||||
apply_transfo('guess_movie_title_from_position')
|
||||
|
||||
# 6- perform some post-processing steps
|
||||
apply_transfo('post_process')
|
||||
|
||||
log.debug('Found match tree:\n%s' % u(mtree))
|
||||
|
||||
def matched(self):
|
||||
return self.match_tree.matched()
|
287
libs/guessit/matchtree.py
Normal file
287
libs/guessit/matchtree.py
Normal file
|
@ -0,0 +1,287 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import UnicodeMixin, base_text_type, Guess
|
||||
from guessit.textutils import clean_string, str_fill
|
||||
from guessit.patterns import group_delimiters
|
||||
from guessit.guess import (merge_similar_guesses, merge_all,
|
||||
choose_int, choose_string)
|
||||
import copy
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseMatchTree(UnicodeMixin):
|
||||
"""A MatchTree represents the hierarchical split of a string into its
|
||||
constituent semantic groups."""
|
||||
|
||||
def __init__(self, string='', span=None, parent=None):
|
||||
self.string = string
|
||||
self.span = span or (0, len(string))
|
||||
self.parent = parent
|
||||
self.children = []
|
||||
self.guess = Guess()
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self.string[self.span[0]:self.span[1]]
|
||||
|
||||
@property
|
||||
def clean_value(self):
|
||||
return clean_string(self.value)
|
||||
|
||||
@property
|
||||
def offset(self):
|
||||
return self.span[0]
|
||||
|
||||
@property
|
||||
def info(self):
|
||||
result = dict(self.guess)
|
||||
|
||||
for c in self.children:
|
||||
result.update(c.info)
|
||||
|
||||
return result
|
||||
|
||||
@property
|
||||
def root(self):
|
||||
if not self.parent:
|
||||
return self
|
||||
|
||||
return self.parent.root
|
||||
|
||||
@property
|
||||
def depth(self):
|
||||
if self.is_leaf():
|
||||
return 0
|
||||
|
||||
return 1 + max(c.depth for c in self.children)
|
||||
|
||||
def is_leaf(self):
|
||||
return self.children == []
|
||||
|
||||
def add_child(self, span):
|
||||
child = MatchTree(self.string, span=span, parent=self)
|
||||
self.children.append(child)
|
||||
|
||||
def partition(self, indices):
|
||||
indices = sorted(indices)
|
||||
if indices[0] != 0:
|
||||
indices.insert(0, 0)
|
||||
if indices[-1] != len(self.value):
|
||||
indices.append(len(self.value))
|
||||
|
||||
for start, end in zip(indices[:-1], indices[1:]):
|
||||
self.add_child(span=(self.offset + start,
|
||||
self.offset + end))
|
||||
|
||||
def split_on_components(self, components):
|
||||
offset = 0
|
||||
for c in components:
|
||||
start = self.value.find(c, offset)
|
||||
end = start + len(c)
|
||||
self.add_child(span=(self.offset + start,
|
||||
self.offset + end))
|
||||
offset = end
|
||||
|
||||
def nodes_at_depth(self, depth):
|
||||
if depth == 0:
|
||||
yield self
|
||||
|
||||
for child in self.children:
|
||||
for node in child.nodes_at_depth(depth - 1):
|
||||
yield node
|
||||
|
||||
@property
|
||||
def node_idx(self):
|
||||
if self.parent is None:
|
||||
return ()
|
||||
return self.parent.node_idx + (self.parent.children.index(self),)
|
||||
|
||||
def node_at(self, idx):
|
||||
if not idx:
|
||||
return self
|
||||
|
||||
try:
|
||||
return self.children[idx[0]].node_at(idx[1:])
|
||||
except:
|
||||
raise ValueError('Non-existent node index: %s' % (idx,))
|
||||
|
||||
def nodes(self):
|
||||
yield self
|
||||
for child in self.children:
|
||||
for node in child.nodes():
|
||||
yield node
|
||||
|
||||
def _leaves(self):
|
||||
if self.is_leaf():
|
||||
yield self
|
||||
else:
|
||||
for child in self.children:
|
||||
# pylint: disable=W0212
|
||||
for leaf in child._leaves():
|
||||
yield leaf
|
||||
|
||||
def leaves(self):
|
||||
return list(self._leaves())
|
||||
|
||||
def to_string(self):
|
||||
empty_line = ' ' * len(self.string)
|
||||
|
||||
def to_hex(x):
|
||||
if isinstance(x, int):
|
||||
return str(x) if x < 10 else chr(55 + x)
|
||||
return x
|
||||
|
||||
def meaning(result):
|
||||
mmap = { 'episodeNumber': 'E',
|
||||
'season': 'S',
|
||||
'extension': 'e',
|
||||
'format': 'f',
|
||||
'language': 'l',
|
||||
'country': 'C',
|
||||
'videoCodec': 'v',
|
||||
'audioCodec': 'a',
|
||||
'website': 'w',
|
||||
'container': 'c',
|
||||
'series': 'T',
|
||||
'title': 't',
|
||||
'date': 'd',
|
||||
'year': 'y',
|
||||
'releaseGroup': 'r',
|
||||
'screenSize': 's'
|
||||
}
|
||||
|
||||
if result is None:
|
||||
return ' '
|
||||
|
||||
for prop, l in mmap.items():
|
||||
if prop in result:
|
||||
return l
|
||||
|
||||
return 'x'
|
||||
|
||||
lines = [ empty_line ] * (self.depth + 2) # +2: remaining, meaning
|
||||
lines[-2] = self.string
|
||||
|
||||
for node in self.nodes():
|
||||
if node == self:
|
||||
continue
|
||||
|
||||
idx = node.node_idx
|
||||
depth = len(idx) - 1
|
||||
if idx:
|
||||
lines[depth] = str_fill(lines[depth], node.span,
|
||||
to_hex(idx[-1]))
|
||||
if node.guess:
|
||||
lines[-2] = str_fill(lines[-2], node.span, '_')
|
||||
lines[-1] = str_fill(lines[-1], node.span, meaning(node.guess))
|
||||
|
||||
lines.append(self.string)
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
def __unicode__(self):
|
||||
return self.to_string()
|
||||
|
||||
|
||||
class MatchTree(BaseMatchTree):
|
||||
"""The MatchTree contains a few "utility" methods which are not necessary
|
||||
for the BaseMatchTree, but add a lot of convenience for writing
|
||||
higher-level rules."""
|
||||
|
||||
def _unidentified_leaves(self,
|
||||
valid=lambda leaf: len(leaf.clean_value) >= 2):
|
||||
for leaf in self._leaves():
|
||||
if not leaf.guess and valid(leaf):
|
||||
yield leaf
|
||||
|
||||
def unidentified_leaves(self,
|
||||
valid=lambda leaf: len(leaf.clean_value) >= 2):
|
||||
return list(self._unidentified_leaves(valid))
|
||||
|
||||
def _leaves_containing(self, property_name):
|
||||
if isinstance(property_name, base_text_type):
|
||||
property_name = [ property_name ]
|
||||
|
||||
for leaf in self._leaves():
|
||||
for prop in property_name:
|
||||
if prop in leaf.guess:
|
||||
yield leaf
|
||||
break
|
||||
|
||||
def leaves_containing(self, property_name):
|
||||
return list(self._leaves_containing(property_name))
|
||||
|
||||
def first_leaf_containing(self, property_name):
|
||||
try:
|
||||
return next(self._leaves_containing(property_name))
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def _previous_unidentified_leaves(self, node):
|
||||
node_idx = node.node_idx
|
||||
for leaf in self._unidentified_leaves():
|
||||
if leaf.node_idx < node_idx:
|
||||
yield leaf
|
||||
|
||||
def previous_unidentified_leaves(self, node):
|
||||
return list(self._previous_unidentified_leaves(node))
|
||||
|
||||
def _previous_leaves_containing(self, node, property_name):
|
||||
node_idx = node.node_idx
|
||||
for leaf in self._leaves_containing(property_name):
|
||||
if leaf.node_idx < node_idx:
|
||||
yield leaf
|
||||
|
||||
def previous_leaves_containing(self, node, property_name):
|
||||
return list(self._previous_leaves_containing(node, property_name))
|
||||
|
||||
def is_explicit(self):
|
||||
"""Return whether the group was explicitly enclosed by
|
||||
parentheses/square brackets/etc."""
|
||||
return (self.value[0] + self.value[-1]) in group_delimiters
|
||||
|
||||
def matched(self):
|
||||
# we need to make a copy here, as the merge functions work in place and
|
||||
# calling them on the match tree would modify it
|
||||
parts = [node.guess for node in self.nodes() if node.guess]
|
||||
parts = copy.deepcopy(parts)
|
||||
|
||||
# 1- try to merge similar information together and give it a higher
|
||||
# confidence
|
||||
for int_part in ('year', 'season', 'episodeNumber'):
|
||||
merge_similar_guesses(parts, int_part, choose_int)
|
||||
|
||||
for string_part in ('title', 'series', 'container', 'format',
|
||||
'releaseGroup', 'website', 'audioCodec',
|
||||
'videoCodec', 'screenSize', 'episodeFormat',
|
||||
'audioChannels', 'idNumber'):
|
||||
merge_similar_guesses(parts, string_part, choose_string)
|
||||
|
||||
# 2- merge the rest, potentially discarding information not properly
|
||||
# merged before
|
||||
result = merge_all(parts,
|
||||
append=['language', 'subtitleLanguage', 'other'])
|
||||
|
||||
log.debug('Final result: ' + result.nice_string())
|
||||
return result
|
250
libs/guessit/patterns.py
Normal file
250
libs/guessit/patterns.py
Normal file
|
@ -0,0 +1,250 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
# Copyright (c) 2011 Ricard Marxer <ricardmp@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
|
||||
subtitle_exts = [ 'srt', 'idx', 'sub', 'ssa' ]
|
||||
|
||||
info_exts = [ 'nfo' ]
|
||||
|
||||
video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
|
||||
'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
|
||||
'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv']
|
||||
|
||||
group_delimiters = [ '()', '[]', '{}' ]
|
||||
|
||||
# separator character regexp
|
||||
sep = r'[][,)(}{+ /\._-]' # regexp art, hehe :D
|
||||
|
||||
# character used to represent a deleted char (when matching groups)
|
||||
deleted = '_'
|
||||
|
||||
# format: [ (regexp, confidence, span_adjust) ]
|
||||
episode_rexps = [ # ... Season 2 ...
|
||||
(r'season (?P<season>[0-9]+)', 1.0, (0, 0)),
|
||||
(r'saison (?P<season>[0-9]+)', 1.0, (0, 0)),
|
||||
|
||||
# ... s02e13 ...
|
||||
(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<episodeNumber>(?:-?[eE-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
|
||||
|
||||
# ... s03-x02 ... # FIXME: redundant? remove it?
|
||||
#(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<bonusNumber>(?:-?[xX-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
|
||||
|
||||
# ... 2x13 ...
|
||||
(r'[^0-9](?P<season>[0-9]{1,2})[^0-9 .-]?(?P<episodeNumber>(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)),
|
||||
|
||||
# ... s02 ...
|
||||
#(sep + r's(?P<season>[0-9]{1,2})' + sep, 0.6, (1, -1)),
|
||||
(r's(?P<season>[0-9]{1,2})[^0-9]', 0.6, (0, -1)),
|
||||
|
||||
# v2 or v3 for some mangas which have multiples rips
|
||||
(r'(?P<episodeNumber>[0-9]{1,3})v[23]' + sep, 0.6, (0, 0)),
|
||||
|
||||
# ... ep 23 ...
|
||||
('ep' + sep + r'(?P<episodeNumber>[0-9]{1,2})[^0-9]', 0.7, (0, -1)),
|
||||
|
||||
# ... e13 ... for a mini-series without a season number
|
||||
(sep + r'e(?P<episodeNumber>[0-9]{1,2})' + sep, 0.6, (1, -1))
|
||||
|
||||
]
|
||||
|
||||
|
||||
weak_episode_rexps = [ # ... 213 or 0106 ...
|
||||
(sep + r'(?P<episodeNumber>[0-9]{2,4})' + sep, (1, -1))
|
||||
]
|
||||
|
||||
non_episode_title = [ 'extras', 'rip' ]
|
||||
|
||||
|
||||
video_rexps = [ # cd number
|
||||
(r'cd ?(?P<cdNumber>[0-9])( ?of ?(?P<cdNumberTotal>[0-9]))?', 1.0, (0, 0)),
|
||||
(r'(?P<cdNumberTotal>[1-9]) cds?', 0.9, (0, 0)),
|
||||
|
||||
# special editions
|
||||
(r'edition' + sep + r'(?P<edition>collector)', 1.0, (0, 0)),
|
||||
(r'(?P<edition>collector)' + sep + 'edition', 1.0, (0, 0)),
|
||||
(r'(?P<edition>special)' + sep + 'edition', 1.0, (0, 0)),
|
||||
(r'(?P<edition>criterion)' + sep + 'edition', 1.0, (0, 0)),
|
||||
|
||||
# director's cut
|
||||
(r"(?P<edition>director'?s?" + sep + "cut)", 1.0, (0, 0)),
|
||||
|
||||
# video size
|
||||
(r'(?P<width>[0-9]{3,4})x(?P<height>[0-9]{3,4})', 0.9, (0, 0)),
|
||||
|
||||
# website
|
||||
(r'(?P<website>www(\.[a-zA-Z0-9]+){2,3})', 0.8, (0, 0)),
|
||||
|
||||
# bonusNumber: ... x01 ...
|
||||
(r'x(?P<bonusNumber>[0-9]{1,2})', 1.0, (0, 0)),
|
||||
|
||||
# filmNumber: ... f01 ...
|
||||
(r'f(?P<filmNumber>[0-9]{1,2})', 1.0, (0, 0))
|
||||
]
|
||||
|
||||
websites = [ 'tvu.org.ru', 'emule-island.com', 'UsaBit.com', 'www.divx-overnet.com',
|
||||
'sharethefiles.com' ]
|
||||
|
||||
unlikely_series = [ 'series' ]
|
||||
|
||||
|
||||
# prop_multi is a dict of { property_name: { canonical_form: [ pattern ] } }
|
||||
# pattern is a string considered as a regexp, with the addition that dashes are
|
||||
# replaced with '([ \.-_])?' which matches more types of separators (or none)
|
||||
# note: simpler patterns need to be at the end of the list to not shadow more
|
||||
# complete ones, eg: 'AAC' needs to come after 'He-AAC'
|
||||
# ie: from most specific to less specific
|
||||
prop_multi = { 'format': { 'DVD': [ 'DVD', 'DVD-Rip', 'VIDEO-TS', 'DVDivX' ],
|
||||
'HD-DVD': [ 'HD-(?:DVD)?-Rip', 'HD-DVD' ],
|
||||
'BluRay': [ 'Blu-ray', 'B[DR]Rip' ],
|
||||
'HDTV': [ 'HD-TV' ],
|
||||
'DVB': [ 'DVB-Rip', 'DVB', 'PD-TV' ],
|
||||
'WEBRip': [ 'WEB-Rip' ],
|
||||
'Screener': [ 'DVD-SCR', 'Screener' ],
|
||||
'VHS': [ 'VHS' ],
|
||||
'WEB-DL': [ 'WEB-DL' ] },
|
||||
|
||||
'is3D': { True: [ '3D' ] },
|
||||
|
||||
'screenSize': { '480p': [ '480[pi]?' ],
|
||||
'720p': [ '720[pi]?' ],
|
||||
'1080i': [ '1080i' ],
|
||||
'1080p': [ '1080p', '1080[^i]' ] },
|
||||
|
||||
'videoCodec': { 'XviD': [ 'Xvid' ],
|
||||
'DivX': [ 'DVDivX', 'DivX' ],
|
||||
'h264': [ '[hx]-264' ],
|
||||
'Rv10': [ 'Rv10' ],
|
||||
'Mpeg2': [ 'Mpeg2' ] },
|
||||
|
||||
# has nothing to do here (or on filenames for that matter), but some
|
||||
# releases use it and it helps to identify release groups, so we adapt
|
||||
'videoApi': { 'DXVA': [ 'DXVA' ] },
|
||||
|
||||
'audioCodec': { 'AC3': [ 'AC3' ],
|
||||
'DTS': [ 'DTS' ],
|
||||
'AAC': [ 'He-AAC', 'AAC-He', 'AAC' ] },
|
||||
|
||||
'audioChannels': { '5.1': [ r'5\.1', 'DD5[._ ]1', '5ch' ] },
|
||||
|
||||
'episodeFormat': { 'Minisode': [ 'Minisodes?' ] }
|
||||
|
||||
}
|
||||
|
||||
# prop_single dict of { property_name: [ canonical_form ] }
|
||||
prop_single = { 'releaseGroup': [ 'ESiR', 'WAF', 'SEPTiC', r'\[XCT\]', 'iNT', 'PUKKA',
|
||||
'CHD', 'ViTE', 'TLF', 'FLAiTE',
|
||||
'MDX', 'GM4F', 'DVL', 'SVD', 'iLUMiNADOS',
|
||||
'aXXo', 'KLAXXON', 'NoTV', 'ZeaL', 'LOL',
|
||||
'CtrlHD', 'POD', 'WiKi','IMMERSE', 'FQM',
|
||||
'2HD', 'CTU', 'HALCYON', 'EbP', 'SiTV',
|
||||
'HDBRiSe', 'AlFleNi-TeaM', 'EVOLVE', '0TV',
|
||||
'TLA', 'NTB', 'ASAP', 'MOMENTUM', 'FoV', 'D-Z0N3',
|
||||
'TrollHD', 'ECI'
|
||||
],
|
||||
|
||||
# potentially confusing release group names (they are words)
|
||||
'weakReleaseGroup': [ 'DEiTY', 'FiNaLe', 'UnSeeN', 'KiNGS', 'CLUE', 'DIMENSION',
|
||||
'SAiNTS', 'ARROW', 'EuReKA', 'SiNNERS', 'DiRTY', 'REWARD',
|
||||
'REPTiLE',
|
||||
],
|
||||
|
||||
'other': [ 'PROPER', 'REPACK', 'LIMITED', 'DualAudio', 'Audiofixed', 'R5',
|
||||
'complete', 'classic', # not so sure about these ones, could appear in a title
|
||||
'ws' ] # widescreen
|
||||
}
|
||||
|
||||
_dash = '-'
|
||||
_psep = '[-. _]?'
|
||||
|
||||
def _to_rexp(prop):
|
||||
return re.compile(prop.replace(_dash, _psep), re.IGNORECASE)
|
||||
|
||||
# properties_rexps dict of { property_name: { canonical_form: [ rexp ] } }
|
||||
# containing the rexps compiled from both prop_multi and prop_single
|
||||
properties_rexps = dict((type, dict((canonical_form,
|
||||
[ _to_rexp(pattern) for pattern in patterns ])
|
||||
for canonical_form, patterns in props.items()))
|
||||
for type, props in prop_multi.items())
|
||||
|
||||
properties_rexps.update(dict((type, dict((canonical_form, [ _to_rexp(canonical_form) ])
|
||||
for canonical_form in props))
|
||||
for type, props in prop_single.items()))
|
||||
|
||||
|
||||
|
||||
def find_properties(string):
|
||||
result = []
|
||||
for property_name, props in properties_rexps.items():
|
||||
# FIXME: this should be done in a more flexible way...
|
||||
if property_name in ['weakReleaseGroup']:
|
||||
continue
|
||||
|
||||
for canonical_form, rexps in props.items():
|
||||
for value_rexp in rexps:
|
||||
match = value_rexp.search(string)
|
||||
if match:
|
||||
start, end = match.span()
|
||||
# make sure our word is always surrounded by separators
|
||||
# note: sep is a regexp, but in this case using it as
|
||||
# a char sequence achieves the same goal
|
||||
if ((start > 0 and string[start-1] not in sep) or
|
||||
(end < len(string) and string[end] not in sep)):
|
||||
continue
|
||||
|
||||
result.append((property_name, canonical_form, start, end))
|
||||
return result
|
||||
|
||||
|
||||
property_synonyms = { 'Special Edition': [ 'Special' ],
|
||||
'Collector Edition': [ 'Collector' ],
|
||||
'Criterion Edition': [ 'Criterion' ]
|
||||
}
|
||||
|
||||
|
||||
def revert_synonyms():
|
||||
reverse = {}
|
||||
|
||||
for canonical, synonyms in property_synonyms.items():
|
||||
for synonym in synonyms:
|
||||
reverse[synonym.lower()] = canonical
|
||||
|
||||
return reverse
|
||||
|
||||
|
||||
reverse_synonyms = revert_synonyms()
|
||||
|
||||
|
||||
def canonical_form(string):
|
||||
return reverse_synonyms.get(string.lower(), string)
|
||||
|
||||
|
||||
def compute_canonical_form(property_name, value):
|
||||
"""Return the canonical form of a property given its type if it is a valid
|
||||
one, None otherwise."""
|
||||
if isinstance(value, basestring):
|
||||
for canonical_form, rexps in properties_rexps[property_name].items():
|
||||
for rexp in rexps:
|
||||
if rexp.match(value):
|
||||
return canonical_form
|
||||
return None
|
89
libs/guessit/slogging.py
Normal file
89
libs/guessit/slogging.py
Normal file
|
@ -0,0 +1,89 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Smewt - A smart collection manager
|
||||
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# Smewt is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Smewt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import logging
|
||||
import sys
|
||||
import os, os.path
|
||||
|
||||
|
||||
GREEN_FONT = "\x1B[0;32m"
|
||||
YELLOW_FONT = "\x1B[0;33m"
|
||||
BLUE_FONT = "\x1B[0;34m"
|
||||
RED_FONT = "\x1B[0;31m"
|
||||
RESET_FONT = "\x1B[0m"
|
||||
|
||||
|
||||
def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False):
|
||||
"""Set up a nice colored logger as the main application logger."""
|
||||
|
||||
class SimpleFormatter(logging.Formatter):
|
||||
def __init__(self, with_time, with_thread):
|
||||
self.fmt = (('%(asctime)s ' if with_time else '') +
|
||||
'%(levelname)-8s ' +
|
||||
'[%(name)s:%(funcName)s' +
|
||||
(':%(lineno)s' if with_lineno else '') + ']' +
|
||||
('[%(threadName)s]' if with_thread else '') +
|
||||
' -- %(message)s')
|
||||
logging.Formatter.__init__(self, self.fmt)
|
||||
|
||||
class ColoredFormatter(logging.Formatter):
|
||||
def __init__(self, with_time, with_thread):
|
||||
self.fmt = (('%(asctime)s ' if with_time else '') +
|
||||
'-CC-%(levelname)-8s ' +
|
||||
BLUE_FONT + '[%(name)s:%(funcName)s' +
|
||||
(':%(lineno)s' if with_lineno else '') + ']' +
|
||||
RESET_FONT + ('[%(threadName)s]' if with_thread else '') +
|
||||
' -- %(message)s')
|
||||
|
||||
logging.Formatter.__init__(self, self.fmt)
|
||||
|
||||
def format(self, record):
|
||||
modpath = record.name.split('.')
|
||||
record.mname = modpath[0]
|
||||
record.mmodule = '.'.join(modpath[1:])
|
||||
result = logging.Formatter.format(self, record)
|
||||
if record.levelno == logging.DEBUG:
|
||||
color = BLUE_FONT
|
||||
elif record.levelno == logging.INFO:
|
||||
color = GREEN_FONT
|
||||
elif record.levelno == logging.WARNING:
|
||||
color = YELLOW_FONT
|
||||
else:
|
||||
color = RED_FONT
|
||||
|
||||
result = result.replace('-CC-', color)
|
||||
return result
|
||||
|
||||
if filename is not None:
|
||||
# make sure we can write to our log file
|
||||
logdir = os.path.dirname(filename)
|
||||
if not os.path.exists(logdir):
|
||||
os.makedirs(logdir)
|
||||
ch = logging.FileHandler(filename, mode='w')
|
||||
ch.setFormatter(SimpleFormatter(with_time, with_thread))
|
||||
else:
|
||||
ch = logging.StreamHandler()
|
||||
if colored and sys.platform != 'win32':
|
||||
ch.setFormatter(ColoredFormatter(with_time, with_thread))
|
||||
else:
|
||||
ch.setFormatter(SimpleFormatter(with_time, with_thread))
|
||||
|
||||
logging.getLogger().addHandler(ch)
|
224
libs/guessit/textutils.py
Normal file
224
libs/guessit/textutils.py
Normal file
|
@ -0,0 +1,224 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Smewt - A smart collection manager
|
||||
# Copyright (c) 2008-2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# Smewt is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Smewt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import s
|
||||
from guessit.patterns import sep
|
||||
import functools
|
||||
import unicodedata
|
||||
import re
|
||||
|
||||
# string-related functions
|
||||
|
||||
def normalize_unicode(s):
|
||||
return unicodedata.normalize('NFC', s)
|
||||
|
||||
|
||||
def strip_brackets(s):
|
||||
if not s:
|
||||
return s
|
||||
|
||||
if ((s[0] == '[' and s[-1] == ']') or
|
||||
(s[0] == '(' and s[-1] == ')') or
|
||||
(s[0] == '{' and s[-1] == '}')):
|
||||
return s[1:-1]
|
||||
|
||||
return s
|
||||
|
||||
|
||||
def clean_string(st):
|
||||
for c in sep:
|
||||
# do not remove certain chars
|
||||
if c in ['-', ',']:
|
||||
continue
|
||||
st = st.replace(c, ' ')
|
||||
parts = st.split()
|
||||
result = ' '.join(p for p in parts if p != '')
|
||||
|
||||
# now also remove dashes on the outer part of the string
|
||||
while result and result[0] in sep:
|
||||
result = result[1:]
|
||||
while result and result[-1] in sep:
|
||||
result = result[:-1]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
_words_rexp = re.compile('\w+', re.UNICODE)
|
||||
|
||||
def find_words(s):
|
||||
return _words_rexp.findall(s.replace('_', ' '))
|
||||
|
||||
|
||||
def reorder_title(title):
|
||||
ltitle = title.lower()
|
||||
if ltitle[-4:] == ',the':
|
||||
return title[-3:] + ' ' + title[:-4]
|
||||
if ltitle[-5:] == ', the':
|
||||
return title[-3:] + ' ' + title[:-5]
|
||||
return title
|
||||
|
||||
|
||||
def str_replace(string, pos, c):
|
||||
return string[:pos] + c + string[pos+1:]
|
||||
|
||||
|
||||
def str_fill(string, region, c):
|
||||
start, end = region
|
||||
return string[:start] + c * (end - start) + string[end:]
|
||||
|
||||
|
||||
|
||||
def levenshtein(a, b):
|
||||
if not a:
|
||||
return len(b)
|
||||
if not b:
|
||||
return len(a)
|
||||
|
||||
m = len(a)
|
||||
n = len(b)
|
||||
d = []
|
||||
for i in range(m+1):
|
||||
d.append([0] * (n+1))
|
||||
|
||||
for i in range(m+1):
|
||||
d[i][0] = i
|
||||
|
||||
for j in range(n+1):
|
||||
d[0][j] = j
|
||||
|
||||
for i in range(1, m+1):
|
||||
for j in range(1, n+1):
|
||||
if a[i-1] == b[j-1]:
|
||||
cost = 0
|
||||
else:
|
||||
cost = 1
|
||||
|
||||
d[i][j] = min(d[i-1][j] + 1, # deletion
|
||||
d[i][j-1] + 1, # insertion
|
||||
d[i-1][j-1] + cost # substitution
|
||||
)
|
||||
|
||||
return d[m][n]
|
||||
|
||||
|
||||
# group-related functions
|
||||
|
||||
def find_first_level_groups_span(string, enclosing):
|
||||
"""Return a list of pairs (start, end) for the groups delimited by the given
|
||||
enclosing characters.
|
||||
This does not return nested groups, ie: '(ab(c)(d))' will return a single group
|
||||
containing the whole string.
|
||||
|
||||
>>> find_first_level_groups_span('abcd', '()')
|
||||
[]
|
||||
|
||||
>>> find_first_level_groups_span('abc(de)fgh', '()')
|
||||
[(3, 7)]
|
||||
|
||||
>>> find_first_level_groups_span('(ab(c)(d))', '()')
|
||||
[(0, 10)]
|
||||
|
||||
>>> find_first_level_groups_span('ab[c]de[f]gh(i)', '[]')
|
||||
[(2, 5), (7, 10)]
|
||||
"""
|
||||
opening, closing = enclosing
|
||||
depth = [] # depth is a stack of indices where we opened a group
|
||||
result = []
|
||||
for i, c, in enumerate(string):
|
||||
if c == opening:
|
||||
depth.append(i)
|
||||
elif c == closing:
|
||||
try:
|
||||
start = depth.pop()
|
||||
end = i
|
||||
if not depth:
|
||||
# we emptied our stack, so we have a 1st level group
|
||||
result.append((start, end+1))
|
||||
except IndexError:
|
||||
# we closed a group which was not opened before
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def split_on_groups(string, groups):
|
||||
"""Split the given string using the different known groups for boundaries.
|
||||
>>> s(split_on_groups('0123456789', [ (2, 4) ]))
|
||||
['01', '23', '456789']
|
||||
|
||||
>>> s(split_on_groups('0123456789', [ (2, 4), (4, 6) ]))
|
||||
['01', '23', '45', '6789']
|
||||
|
||||
>>> s(split_on_groups('0123456789', [ (5, 7), (2, 4) ]))
|
||||
['01', '23', '4', '56', '789']
|
||||
|
||||
"""
|
||||
if not groups:
|
||||
return [ string ]
|
||||
|
||||
boundaries = sorted(set(functools.reduce(lambda l, x: l + list(x), groups, [])))
|
||||
if boundaries[0] != 0:
|
||||
boundaries.insert(0, 0)
|
||||
if boundaries[-1] != len(string):
|
||||
boundaries.append(len(string))
|
||||
|
||||
groups = [ string[start:end] for start, end in zip(boundaries[:-1],
|
||||
boundaries[1:]) ]
|
||||
|
||||
return [ g for g in groups if g ] # return only non-empty groups
|
||||
|
||||
|
||||
def find_first_level_groups(string, enclosing, blank_sep=None):
|
||||
"""Return a list of groups that could be split because of explicit grouping.
|
||||
The groups are delimited by the given enclosing characters.
|
||||
|
||||
You can also specify if you want to blank the separator chars in the returned
|
||||
list of groups by specifying a character for it. None means it won't be replaced.
|
||||
|
||||
This does not return nested groups, ie: '(ab(c)(d))' will return a single group
|
||||
containing the whole string.
|
||||
|
||||
>>> s(find_first_level_groups('', '()'))
|
||||
['']
|
||||
|
||||
>>> s(find_first_level_groups('abcd', '()'))
|
||||
['abcd']
|
||||
|
||||
>>> s(find_first_level_groups('abc(de)fgh', '()'))
|
||||
['abc', '(de)', 'fgh']
|
||||
|
||||
>>> s(find_first_level_groups('(ab(c)(d))', '()', blank_sep = '_'))
|
||||
['_ab(c)(d)_']
|
||||
|
||||
>>> s(find_first_level_groups('ab[c]de[f]gh(i)', '[]'))
|
||||
['ab', '[c]', 'de', '[f]', 'gh(i)']
|
||||
|
||||
>>> s(find_first_level_groups('()[]()', '()', blank_sep = '-'))
|
||||
['--', '[]', '--']
|
||||
|
||||
"""
|
||||
groups = find_first_level_groups_span(string, enclosing)
|
||||
if blank_sep:
|
||||
for start, end in groups:
|
||||
string = str_replace(string, start, blank_sep)
|
||||
string = str_replace(string, end-1, blank_sep)
|
||||
|
||||
return split_on_groups(string, groups)
|
109
libs/guessit/transfo/__init__.py
Normal file
109
libs/guessit/transfo/__init__.py
Normal file
|
@ -0,0 +1,109 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import base_text_type, Guess
|
||||
from guessit.patterns import canonical_form
|
||||
from guessit.textutils import clean_string
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def found_property(node, name, confidence):
|
||||
node.guess = Guess({name: node.clean_value}, confidence=confidence, raw=node.value)
|
||||
log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
|
||||
|
||||
|
||||
def format_guess(guess):
|
||||
"""Format all the found values to their natural type.
|
||||
For instance, a year would be stored as an int value, etc...
|
||||
|
||||
Note that this modifies the dictionary given as input.
|
||||
"""
|
||||
for prop, value in guess.items():
|
||||
if prop in ('season', 'episodeNumber', 'year', 'cdNumber',
|
||||
'cdNumberTotal', 'bonusNumber', 'filmNumber'):
|
||||
guess[prop] = int(guess[prop])
|
||||
elif isinstance(value, base_text_type):
|
||||
if prop in ('edition',):
|
||||
value = clean_string(value)
|
||||
guess[prop] = canonical_form(value).replace('\\', '')
|
||||
|
||||
return guess
|
||||
|
||||
|
||||
def find_and_split_node(node, strategy, logger):
|
||||
string = ' %s ' % node.value # add sentinels
|
||||
for matcher, confidence, args, kwargs in strategy:
|
||||
all_args = [string]
|
||||
if getattr(matcher, 'use_node', False):
|
||||
all_args.append(node)
|
||||
if args:
|
||||
all_args.append(args)
|
||||
|
||||
if kwargs:
|
||||
result, span = matcher(*all_args, **kwargs)
|
||||
else:
|
||||
result, span = matcher(*all_args)
|
||||
|
||||
if result:
|
||||
# readjust span to compensate for sentinels
|
||||
span = (span[0] - 1, span[1] - 1)
|
||||
|
||||
if isinstance(result, Guess):
|
||||
if confidence is None:
|
||||
confidence = result.confidence(list(result.keys())[0])
|
||||
else:
|
||||
if confidence is None:
|
||||
confidence = 1.0
|
||||
|
||||
guess = format_guess(Guess(result, confidence=confidence, raw=string[span[0] + 1:span[1] + 1]))
|
||||
msg = 'Found with confidence %.2f: %s' % (confidence, guess)
|
||||
(logger or log).debug(msg)
|
||||
|
||||
node.partition(span)
|
||||
absolute_span = (span[0] + node.offset, span[1] + node.offset)
|
||||
for child in node.children:
|
||||
if child.span == absolute_span:
|
||||
child.guess = guess
|
||||
else:
|
||||
find_and_split_node(child, strategy, logger)
|
||||
return
|
||||
|
||||
|
||||
class SingleNodeGuesser(object):
|
||||
def __init__(self, guess_func, confidence, logger, *args, **kwargs):
|
||||
self.guess_func = guess_func
|
||||
self.confidence = confidence
|
||||
self.logger = logger
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
def process(self, mtree):
|
||||
# strategy is a list of pairs (guesser, confidence)
|
||||
# - if the guesser returns a guessit.Guess and confidence is specified,
|
||||
# it will override it, otherwise it will leave the guess confidence
|
||||
# - if the guesser returns a simple dict as a guess and confidence is
|
||||
# specified, it will use it, or 1.0 otherwise
|
||||
strategy = [ (self.guess_func, self.confidence, self.args, self.kwargs) ]
|
||||
|
||||
for node in mtree.unidentified_leaves():
|
||||
find_and_split_node(node, strategy, self.logger)
|
61
libs/guessit/transfo/guess_bonus_features.py
Normal file
61
libs/guessit/transfo/guess_bonus_features.py
Normal file
|
@ -0,0 +1,61 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import found_property
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def process(mtree):
|
||||
def previous_group(g):
|
||||
for leaf in mtree.unidentified_leaves()[::-1]:
|
||||
if leaf.node_idx < g.node_idx:
|
||||
return leaf
|
||||
|
||||
def next_group(g):
|
||||
for leaf in mtree.unidentified_leaves():
|
||||
if leaf.node_idx > g.node_idx:
|
||||
return leaf
|
||||
|
||||
def same_group(g1, g2):
|
||||
return g1.node_idx[:2] == g2.node_idx[:2]
|
||||
|
||||
bonus = [ node for node in mtree.leaves() if 'bonusNumber' in node.guess ]
|
||||
if bonus:
|
||||
bonusTitle = next_group(bonus[0])
|
||||
if same_group(bonusTitle, bonus[0]):
|
||||
found_property(bonusTitle, 'bonusTitle', 0.8)
|
||||
|
||||
filmNumber = [ node for node in mtree.leaves()
|
||||
if 'filmNumber' in node.guess ]
|
||||
if filmNumber:
|
||||
filmSeries = previous_group(filmNumber[0])
|
||||
found_property(filmSeries, 'filmSeries', 0.9)
|
||||
|
||||
title = next_group(filmNumber[0])
|
||||
found_property(title, 'title', 0.9)
|
||||
|
||||
season = [ node for node in mtree.leaves() if 'season' in node.guess ]
|
||||
if season and 'bonusNumber' in mtree.info:
|
||||
series = previous_group(season[0])
|
||||
if same_group(series, season[0]):
|
||||
found_property(series, 'series', 0.9)
|
48
libs/guessit/transfo/guess_country.py
Normal file
48
libs/guessit/transfo/guess_country.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit.country import Country
|
||||
from guessit import Guess
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# list of common words which could be interpreted as countries, but which
|
||||
# are far too common to be able to say they represent a country
|
||||
country_common_words = frozenset([ 'bt', 'bb' ])
|
||||
|
||||
def process(mtree):
|
||||
for node in mtree.unidentified_leaves():
|
||||
if len(node.node_idx) == 2:
|
||||
c = node.value[1:-1].lower()
|
||||
if c in country_common_words:
|
||||
continue
|
||||
|
||||
# only keep explicit groups (enclosed in parentheses/brackets)
|
||||
if node.value[0] + node.value[-1] not in ['()', '[]', '{}']:
|
||||
continue
|
||||
|
||||
try:
|
||||
country = Country(c, strict=True)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
node.guess = Guess(country=country, confidence=1.0, raw=c)
|
38
libs/guessit/transfo/guess_date.py
Normal file
38
libs/guessit/transfo/guess_date.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.date import search_date
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def guess_date(string):
|
||||
date, span = search_date(string)
|
||||
if date:
|
||||
return { 'date': date }, span
|
||||
else:
|
||||
return None, None
|
||||
|
||||
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_date, 1.0, log).process(mtree)
|
146
libs/guessit/transfo/guess_episode_info_from_position.py
Normal file
146
libs/guessit/transfo/guess_episode_info_from_position.py
Normal file
|
@ -0,0 +1,146 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import found_property
|
||||
from guessit.patterns import non_episode_title, unlikely_series
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def match_from_epnum_position(mtree, node):
|
||||
epnum_idx = node.node_idx
|
||||
|
||||
# a few helper functions to be able to filter using high-level semantics
|
||||
def before_epnum_in_same_pathgroup():
|
||||
return [ leaf for leaf in mtree.unidentified_leaves()
|
||||
if (leaf.node_idx[0] == epnum_idx[0] and
|
||||
leaf.node_idx[1:] < epnum_idx[1:]) ]
|
||||
|
||||
def after_epnum_in_same_pathgroup():
|
||||
return [ leaf for leaf in mtree.unidentified_leaves()
|
||||
if (leaf.node_idx[0] == epnum_idx[0] and
|
||||
leaf.node_idx[1:] > epnum_idx[1:]) ]
|
||||
|
||||
def after_epnum_in_same_explicitgroup():
|
||||
return [ leaf for leaf in mtree.unidentified_leaves()
|
||||
if (leaf.node_idx[:2] == epnum_idx[:2] and
|
||||
leaf.node_idx[2:] > epnum_idx[2:]) ]
|
||||
|
||||
# epnumber is the first group and there are only 2 after it in same
|
||||
# path group
|
||||
# -> series title - episode title
|
||||
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
|
||||
if n.clean_value.lower() not in non_episode_title ]
|
||||
if ('title' not in mtree.info and # no title
|
||||
before_epnum_in_same_pathgroup() == [] and # no groups before
|
||||
len(title_candidates) == 2): # only 2 groups after
|
||||
|
||||
found_property(title_candidates[0], 'series', confidence=0.4)
|
||||
found_property(title_candidates[1], 'title', confidence=0.4)
|
||||
return
|
||||
|
||||
# if we have at least 1 valid group before the episodeNumber, then it's
|
||||
# probably the series name
|
||||
series_candidates = before_epnum_in_same_pathgroup()
|
||||
if len(series_candidates) >= 1:
|
||||
found_property(series_candidates[0], 'series', confidence=0.7)
|
||||
|
||||
# only 1 group after (in the same path group) and it's probably the
|
||||
# episode title
|
||||
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
|
||||
if n.clean_value.lower() not in non_episode_title ]
|
||||
|
||||
if len(title_candidates) == 1:
|
||||
found_property(title_candidates[0], 'title', confidence=0.5)
|
||||
return
|
||||
else:
|
||||
# try in the same explicit group, with lower confidence
|
||||
title_candidates = [ n for n in after_epnum_in_same_explicitgroup()
|
||||
if n.clean_value.lower() not in non_episode_title
|
||||
]
|
||||
if len(title_candidates) == 1:
|
||||
found_property(title_candidates[0], 'title', confidence=0.4)
|
||||
return
|
||||
elif len(title_candidates) > 1:
|
||||
found_property(title_candidates[0], 'title', confidence=0.3)
|
||||
return
|
||||
|
||||
# get the one with the longest value
|
||||
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
|
||||
if n.clean_value.lower() not in non_episode_title ]
|
||||
if title_candidates:
|
||||
maxidx = -1
|
||||
maxv = -1
|
||||
for i, c in enumerate(title_candidates):
|
||||
if len(c.clean_value) > maxv:
|
||||
maxidx = i
|
||||
maxv = len(c.clean_value)
|
||||
found_property(title_candidates[maxidx], 'title', confidence=0.3)
|
||||
|
||||
|
||||
def process(mtree):
|
||||
eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
|
||||
if eps:
|
||||
match_from_epnum_position(mtree, eps[0])
|
||||
|
||||
else:
|
||||
# if we don't have the episode number, but at least 2 groups in the
|
||||
# basename, then it's probably series - eptitle
|
||||
basename = mtree.node_at((-2,))
|
||||
title_candidates = [ n for n in basename.unidentified_leaves()
|
||||
if n.clean_value.lower() not in non_episode_title
|
||||
]
|
||||
|
||||
if len(title_candidates) >= 2:
|
||||
found_property(title_candidates[0], 'series', 0.4)
|
||||
found_property(title_candidates[1], 'title', 0.4)
|
||||
elif len(title_candidates) == 1:
|
||||
# but if there's only one candidate, it's probably the series name
|
||||
found_property(title_candidates[0], 'series', 0.4)
|
||||
|
||||
# if we only have 1 remaining valid group in the folder containing the
|
||||
# file, then it's likely that it is the series name
|
||||
try:
|
||||
series_candidates = mtree.node_at((-3,)).unidentified_leaves()
|
||||
except ValueError:
|
||||
series_candidates = []
|
||||
|
||||
if len(series_candidates) == 1:
|
||||
found_property(series_candidates[0], 'series', 0.3)
|
||||
|
||||
# if there's a path group that only contains the season info, then the
|
||||
# previous one is most likely the series title (ie: ../series/season X/..)
|
||||
eps = [ node for node in mtree.nodes()
|
||||
if 'season' in node.guess and 'episodeNumber' not in node.guess ]
|
||||
|
||||
if eps:
|
||||
previous = [ node for node in mtree.unidentified_leaves()
|
||||
if node.node_idx[0] == eps[0].node_idx[0] - 1 ]
|
||||
if len(previous) == 1:
|
||||
found_property(previous[0], 'series', 0.5)
|
||||
|
||||
# reduce the confidence of unlikely series
|
||||
for node in mtree.nodes():
|
||||
if 'series' in node.guess:
|
||||
if node.guess['series'].lower() in unlikely_series:
|
||||
new_confidence = node.guess.confidence('series') * 0.5
|
||||
node.guess.set_confidence('series', new_confidence)
|
66
libs/guessit/transfo/guess_episodes_rexps.py
Normal file
66
libs/guessit/transfo/guess_episodes_rexps.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import Guess
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.patterns import episode_rexps
|
||||
import re
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def number_list(s):
|
||||
l = [ int(n) for n in re.sub('[^0-9]+', ' ', s).split() ]
|
||||
|
||||
if len(l) == 2:
|
||||
# it is an episode interval, return all numbers in between
|
||||
return range(l[0], l[1]+1)
|
||||
|
||||
return l
|
||||
|
||||
def guess_episodes_rexps(string):
|
||||
for rexp, confidence, span_adjust in episode_rexps:
|
||||
match = re.search(rexp, string, re.IGNORECASE)
|
||||
if match:
|
||||
span = (match.start() + span_adjust[0],
|
||||
match.end() + span_adjust[1])
|
||||
guess = Guess(match.groupdict(), confidence=confidence, raw=string[span[0]:span[1]])
|
||||
|
||||
# decide whether we have only a single episode number or an
|
||||
# episode list
|
||||
if guess.get('episodeNumber'):
|
||||
eplist = number_list(guess['episodeNumber'])
|
||||
guess.set('episodeNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]])
|
||||
|
||||
if len(eplist) > 1:
|
||||
guess.set('episodeList', eplist, confidence=confidence, raw=string[span[0]:span[1]])
|
||||
|
||||
if guess.get('bonusNumber'):
|
||||
eplist = number_list(guess['bonusNumber'])
|
||||
guess.set('bonusNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]])
|
||||
|
||||
return guess, span
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_episodes_rexps, None, log).process(mtree)
|
199
libs/guessit/transfo/guess_filetype.py
Normal file
199
libs/guessit/transfo/guess_filetype.py
Normal file
|
@ -0,0 +1,199 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import Guess
|
||||
from guessit.patterns import (subtitle_exts, info_exts, video_exts, episode_rexps,
|
||||
find_properties, compute_canonical_form)
|
||||
from guessit.date import valid_year
|
||||
from guessit.textutils import clean_string
|
||||
import os.path
|
||||
import re
|
||||
import mimetypes
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# List of well known movies and series, hardcoded because they cannot be
|
||||
# guessed appropriately otherwise
|
||||
MOVIES = [ 'OSS 117' ]
|
||||
SERIES = [ 'Band of Brothers' ]
|
||||
|
||||
MOVIES = [ m.lower() for m in MOVIES ]
|
||||
SERIES = [ s.lower() for s in SERIES ]
|
||||
|
||||
def guess_filetype(mtree, filetype):
|
||||
# put the filetype inside a dummy container to be able to have the
|
||||
# following functions work correctly as closures
|
||||
# this is a workaround for python 2 which doesn't have the
|
||||
# 'nonlocal' keyword (python 3 does have it)
|
||||
filetype_container = [filetype]
|
||||
other = {}
|
||||
filename = mtree.string
|
||||
|
||||
def upgrade_episode():
|
||||
if filetype_container[0] == 'video':
|
||||
filetype_container[0] = 'episode'
|
||||
elif filetype_container[0] == 'subtitle':
|
||||
filetype_container[0] = 'episodesubtitle'
|
||||
elif filetype_container[0] == 'info':
|
||||
filetype_container[0] = 'episodeinfo'
|
||||
|
||||
def upgrade_movie():
|
||||
if filetype_container[0] == 'video':
|
||||
filetype_container[0] = 'movie'
|
||||
elif filetype_container[0] == 'subtitle':
|
||||
filetype_container[0] = 'moviesubtitle'
|
||||
elif filetype_container[0] == 'info':
|
||||
filetype_container[0] = 'movieinfo'
|
||||
|
||||
def upgrade_subtitle():
|
||||
if 'movie' in filetype_container[0]:
|
||||
filetype_container[0] = 'moviesubtitle'
|
||||
elif 'episode' in filetype_container[0]:
|
||||
filetype_container[0] = 'episodesubtitle'
|
||||
else:
|
||||
filetype_container[0] = 'subtitle'
|
||||
|
||||
def upgrade_info():
|
||||
if 'movie' in filetype_container[0]:
|
||||
filetype_container[0] = 'movieinfo'
|
||||
elif 'episode' in filetype_container[0]:
|
||||
filetype_container[0] = 'episodeinfo'
|
||||
else:
|
||||
filetype_container[0] = 'info'
|
||||
|
||||
def upgrade(type='unknown'):
|
||||
if filetype_container[0] == 'autodetect':
|
||||
filetype_container[0] = type
|
||||
|
||||
|
||||
# look at the extension first
|
||||
fileext = os.path.splitext(filename)[1][1:].lower()
|
||||
if fileext in subtitle_exts:
|
||||
upgrade_subtitle()
|
||||
other = { 'container': fileext }
|
||||
elif fileext in info_exts:
|
||||
upgrade_info()
|
||||
other = { 'container': fileext }
|
||||
elif fileext in video_exts:
|
||||
upgrade(type='video')
|
||||
other = { 'container': fileext }
|
||||
else:
|
||||
upgrade(type='unknown')
|
||||
other = { 'extension': fileext }
|
||||
|
||||
|
||||
|
||||
# check whether we are in a 'Movies', 'Tv Shows', ... folder
|
||||
folder_rexps = [ (r'Movies?', upgrade_movie),
|
||||
(r'Tv[ _-]?Shows?', upgrade_episode),
|
||||
(r'Series', upgrade_episode)
|
||||
]
|
||||
for frexp, upgrade_func in folder_rexps:
|
||||
frexp = re.compile(frexp, re.IGNORECASE)
|
||||
for pathgroup in mtree.children:
|
||||
if frexp.match(pathgroup.value):
|
||||
upgrade_func()
|
||||
|
||||
# check for a few specific cases which will unintentionally make the
|
||||
# following heuristics confused (eg: OSS 117 will look like an episode,
|
||||
# season 1, epnum 17, when it is in fact a movie)
|
||||
fname = clean_string(filename).lower()
|
||||
for m in MOVIES:
|
||||
if m in fname:
|
||||
log.debug('Found in exception list of movies -> type = movie')
|
||||
upgrade_movie()
|
||||
for s in SERIES:
|
||||
if s in fname:
|
||||
log.debug('Found in exception list of series -> type = episode')
|
||||
upgrade_episode()
|
||||
|
||||
# now look whether there are some specific hints for episode vs movie
|
||||
if filetype_container[0] in ('video', 'subtitle', 'info'):
|
||||
# if we have an episode_rexp (eg: s02e13), it is an episode
|
||||
for rexp, _, _ in episode_rexps:
|
||||
match = re.search(rexp, filename, re.IGNORECASE)
|
||||
if match:
|
||||
log.debug('Found matching regexp: "%s" (string = "%s") -> type = episode', rexp, match.group())
|
||||
upgrade_episode()
|
||||
break
|
||||
|
||||
# if we have a 3-4 digit number that's not a year, maybe an episode
|
||||
match = re.search(r'[^0-9]([0-9]{3,4})[^0-9]', filename)
|
||||
if match:
|
||||
fullnumber = int(match.group()[1:-1])
|
||||
#season = fullnumber // 100
|
||||
epnumber = fullnumber % 100
|
||||
possible = True
|
||||
|
||||
# check for validity
|
||||
if epnumber > 40:
|
||||
possible = False
|
||||
if valid_year(fullnumber):
|
||||
possible = False
|
||||
|
||||
if possible:
|
||||
log.debug('Found possible episode number: %s (from string "%s") -> type = episode', epnumber, match.group())
|
||||
upgrade_episode()
|
||||
|
||||
# if we have certain properties characteristic of episodes, it is an ep
|
||||
for prop, value, _, _ in find_properties(filename):
|
||||
log.debug('prop: %s = %s' % (prop, value))
|
||||
if prop == 'episodeFormat':
|
||||
log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
|
||||
upgrade_episode()
|
||||
break
|
||||
|
||||
elif compute_canonical_form('format', value) == 'DVB':
|
||||
log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
|
||||
upgrade_episode()
|
||||
break
|
||||
|
||||
# origin-specific type
|
||||
if 'tvu.org.ru' in filename:
|
||||
log.debug('Found characteristic property of episodes: %s = "%s"', prop, value)
|
||||
upgrade_episode()
|
||||
|
||||
# if no episode info found, assume it's a movie
|
||||
log.debug('Nothing characteristic found, assuming type = movie')
|
||||
upgrade_movie()
|
||||
|
||||
filetype = filetype_container[0]
|
||||
return filetype, other
|
||||
|
||||
|
||||
def process(mtree, filetype='autodetect'):
|
||||
filetype, other = guess_filetype(mtree, filetype)
|
||||
|
||||
mtree.guess.set('type', filetype, confidence=1.0)
|
||||
log.debug('Found with confidence %.2f: %s' % (1.0, mtree.guess))
|
||||
|
||||
filetype_info = Guess(other, confidence=1.0)
|
||||
# guess the mimetype of the filename
|
||||
# TODO: handle other mimetypes not found on the default type_maps
|
||||
# mimetypes.types_map['.srt']='text/subtitle'
|
||||
mime, _ = mimetypes.guess_type(mtree.string, strict=False)
|
||||
if mime is not None:
|
||||
filetype_info.update({'mimetype': mime}, confidence=1.0)
|
||||
|
||||
node_ext = mtree.node_at((-1,))
|
||||
node_ext.guess = filetype_info
|
||||
log.debug('Found with confidence %.2f: %s' % (1.0, node_ext.guess))
|
71
libs/guessit/transfo/guess_idnumber.py
Normal file
71
libs/guessit/transfo/guess_idnumber.py
Normal file
|
@ -0,0 +1,71 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2013 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.patterns import find_properties
|
||||
import re
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def guess_properties(string):
|
||||
try:
|
||||
prop, value, pos, end = find_properties(string)[0]
|
||||
return { prop: value }, (pos, end)
|
||||
except IndexError:
|
||||
return None, None
|
||||
|
||||
_idnum = re.compile(r'(?P<idNumber>[a-zA-Z0-9-]{10,})') # 1.0, (0, 0))
|
||||
|
||||
def guess_idnumber(string):
|
||||
match = _idnum.search(string)
|
||||
if match is not None:
|
||||
result = match.groupdict()
|
||||
switch_count = 0
|
||||
DIGIT = 0
|
||||
LETTER = 1
|
||||
OTHER = 2
|
||||
last = LETTER
|
||||
for c in result['idNumber']:
|
||||
if c in '0123456789':
|
||||
ci = DIGIT
|
||||
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
||||
ci = LETTER
|
||||
else:
|
||||
ci = OTHER
|
||||
|
||||
if ci != last:
|
||||
switch_count += 1
|
||||
|
||||
last = ci
|
||||
|
||||
switch_ratio = float(switch_count) / len(result['idNumber'])
|
||||
|
||||
# only return the result as probable if we alternate often between
|
||||
# char type (more likely for hash values than for common words)
|
||||
if switch_ratio > 0.4:
|
||||
return result, match.span()
|
||||
|
||||
return None, None
|
||||
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_idnumber, 0.4, log).process(mtree)
|
55
libs/guessit/transfo/guess_language.py
Normal file
55
libs/guessit/transfo/guess_language.py
Normal file
|
@ -0,0 +1,55 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import Guess
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.language import search_language
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def guess_language(string, node, skip=None):
|
||||
if skip:
|
||||
relative_skip = []
|
||||
for entry in skip:
|
||||
node_idx = entry['node_idx']
|
||||
span = entry['span']
|
||||
if node_idx == node.node_idx[:len(node_idx)]:
|
||||
relative_span = (span[0] - node.offset + 1, span[1] - node.offset + 1)
|
||||
relative_skip.append(relative_span)
|
||||
skip = relative_skip
|
||||
|
||||
language, span, confidence = search_language(string, skip=skip)
|
||||
if language:
|
||||
return (Guess({'language': language},
|
||||
confidence=confidence,
|
||||
raw= string[span[0]:span[1]]),
|
||||
span)
|
||||
|
||||
return None, None
|
||||
|
||||
guess_language.use_node = True
|
||||
|
||||
|
||||
def process(mtree, *args, **kwargs):
|
||||
SingleNodeGuesser(guess_language, None, log, *args, **kwargs).process(mtree)
|
||||
# Note: 'language' is promoted to 'subtitleLanguage' in the post_process transfo
|
174
libs/guessit/transfo/guess_movie_title_from_position.py
Normal file
174
libs/guessit/transfo/guess_movie_title_from_position.py
Normal file
|
@ -0,0 +1,174 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import Guess
|
||||
import unicodedata
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def process(mtree):
|
||||
def found_property(node, name, value, confidence):
|
||||
node.guess = Guess({ name: value },
|
||||
confidence=confidence,
|
||||
raw=value)
|
||||
log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
|
||||
|
||||
def found_title(node, confidence):
|
||||
found_property(node, 'title', node.clean_value, confidence)
|
||||
|
||||
basename = mtree.node_at((-2,))
|
||||
all_valid = lambda leaf: len(leaf.clean_value) > 0
|
||||
basename_leftover = basename.unidentified_leaves(valid=all_valid)
|
||||
|
||||
try:
|
||||
folder = mtree.node_at((-3,))
|
||||
folder_leftover = folder.unidentified_leaves()
|
||||
except ValueError:
|
||||
folder = None
|
||||
folder_leftover = []
|
||||
|
||||
log.debug('folder: %s' % folder_leftover)
|
||||
log.debug('basename: %s' % basename_leftover)
|
||||
|
||||
# specific cases:
|
||||
# if we find the same group both in the folder name and the filename,
|
||||
# it's a good candidate for title
|
||||
if (folder_leftover and basename_leftover and
|
||||
folder_leftover[0].clean_value == basename_leftover[0].clean_value):
|
||||
|
||||
found_title(folder_leftover[0], confidence=0.8)
|
||||
return
|
||||
|
||||
# specific cases:
|
||||
# if the basename contains a number first followed by an unidentified
|
||||
# group, and the folder only contains 1 unidentified one, then we have
|
||||
# a series
|
||||
# ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
|
||||
try:
|
||||
series = folder_leftover[0]
|
||||
filmNumber = basename_leftover[0]
|
||||
title = basename_leftover[1]
|
||||
|
||||
basename_leaves = basename.leaves()
|
||||
|
||||
num = int(filmNumber.clean_value)
|
||||
|
||||
log.debug('series: %s' % series.clean_value)
|
||||
log.debug('title: %s' % title.clean_value)
|
||||
if (series.clean_value != title.clean_value and
|
||||
series.clean_value != filmNumber.clean_value and
|
||||
basename_leaves.index(filmNumber) == 0 and
|
||||
basename_leaves.index(title) == 1):
|
||||
|
||||
found_title(title, confidence=0.6)
|
||||
found_property(series, 'filmSeries',
|
||||
series.clean_value, confidence=0.6)
|
||||
found_property(filmNumber, 'filmNumber',
|
||||
num, confidence=0.6)
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# specific cases:
|
||||
# - movies/tttttt (yyyy)/tttttt.ccc
|
||||
try:
|
||||
if mtree.node_at((-4, 0)).value.lower() == 'movies':
|
||||
folder = mtree.node_at((-3,))
|
||||
|
||||
# Note:too generic, might solve all the unittests as they all
|
||||
# contain 'movies' in their path
|
||||
#
|
||||
#if containing_folder.is_leaf() and not containing_folder.guess:
|
||||
# containing_folder.guess =
|
||||
# Guess({ 'title': clean_string(containing_folder.value) },
|
||||
# confidence=0.7)
|
||||
|
||||
year_group = folder.first_leaf_containing('year')
|
||||
groups_before = folder.previous_unidentified_leaves(year_group)
|
||||
|
||||
found_title(groups_before[0], confidence=0.8)
|
||||
return
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# if we have either format or videoCodec in the folder containing the file
|
||||
# or one of its parents, then we should probably look for the title in
|
||||
# there rather than in the basename
|
||||
try:
|
||||
props = mtree.previous_leaves_containing(mtree.children[-2],
|
||||
[ 'videoCodec', 'format',
|
||||
'language' ])
|
||||
except IndexError:
|
||||
props = []
|
||||
|
||||
if props:
|
||||
group_idx = props[0].node_idx[0]
|
||||
if all(g.node_idx[0] == group_idx for g in props):
|
||||
# if they're all in the same group, take leftover info from there
|
||||
leftover = mtree.node_at((group_idx,)).unidentified_leaves()
|
||||
|
||||
if leftover:
|
||||
found_title(leftover[0], confidence=0.7)
|
||||
return
|
||||
|
||||
# look for title in basename if there are some remaining undidentified
|
||||
# groups there
|
||||
if basename_leftover:
|
||||
title_candidate = basename_leftover[0]
|
||||
|
||||
# if basename is only one word and the containing folder has at least
|
||||
# 3 words in it, we should take the title from the folder name
|
||||
# ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
|
||||
# ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here?
|
||||
if (title_candidate.clean_value.count(' ') == 0 and
|
||||
folder_leftover and
|
||||
folder_leftover[0].clean_value.count(' ') >= 2):
|
||||
|
||||
found_title(folder_leftover[0], confidence=0.7)
|
||||
return
|
||||
|
||||
# if there are only 2 unidentified groups, the first of which is inside
|
||||
# brackets or parentheses, we take the second one for the title:
|
||||
# ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
|
||||
if len(basename_leftover) == 2 and basename_leftover[0].is_explicit():
|
||||
found_title(basename_leftover[1], confidence=0.8)
|
||||
return
|
||||
|
||||
# if all else fails, take the first remaining unidentified group in the
|
||||
# basename as title
|
||||
found_title(title_candidate, confidence=0.6)
|
||||
return
|
||||
|
||||
# if there are no leftover groups in the basename, look in the folder name
|
||||
if folder_leftover:
|
||||
found_title(folder_leftover[0], confidence=0.5)
|
||||
return
|
||||
|
||||
# if nothing worked, look if we have a very small group at the beginning
|
||||
# of the basename
|
||||
basename = mtree.node_at((-2,))
|
||||
basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
|
||||
if basename_leftover:
|
||||
found_title(basename_leftover[0], confidence=0.4)
|
||||
return
|
38
libs/guessit/transfo/guess_properties.py
Normal file
38
libs/guessit/transfo/guess_properties.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.patterns import find_properties
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def guess_properties(string):
|
||||
try:
|
||||
prop, value, pos, end = find_properties(string)[0]
|
||||
return { prop: value }, (pos, end)
|
||||
except IndexError:
|
||||
return None, None
|
||||
|
||||
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_properties, 1.0, log).process(mtree)
|
86
libs/guessit/transfo/guess_release_group.py
Normal file
86
libs/guessit/transfo/guess_release_group.py
Normal file
|
@ -0,0 +1,86 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.patterns import prop_multi, compute_canonical_form, _dash, _psep
|
||||
import re
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def get_patterns(property_name):
|
||||
return [ p.replace(_dash, _psep) for patterns in prop_multi[property_name].values() for p in patterns ]
|
||||
|
||||
CODECS = get_patterns('videoCodec')
|
||||
FORMATS = get_patterns('format')
|
||||
VAPIS = get_patterns('videoApi')
|
||||
|
||||
# RG names following a codec or format, with a potential space or dash inside the name
|
||||
GROUP_NAMES = [ r'(?P<videoCodec>' + codec + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
|
||||
for codec in CODECS ]
|
||||
GROUP_NAMES += [ r'(?P<format>' + fmt + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
|
||||
for fmt in FORMATS ]
|
||||
GROUP_NAMES += [ r'(?P<videoApi>' + api + r')[ \.-](?P<releaseGroup>.+?([- \.].*?)??)[ \.]'
|
||||
for api in VAPIS ]
|
||||
|
||||
GROUP_NAMES2 = [ r'\.(?P<videoCodec>' + codec + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
|
||||
for codec in CODECS ]
|
||||
GROUP_NAMES2 += [ r'\.(?P<format>' + fmt + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
|
||||
for fmt in FORMATS ]
|
||||
GROUP_NAMES2 += [ r'\.(?P<videoApi>' + vapi + r')-(?P<releaseGroup>.*?)(-(.*?))?[ \.]'
|
||||
for vapi in VAPIS ]
|
||||
|
||||
GROUP_NAMES = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES ]
|
||||
GROUP_NAMES2 = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES2 ]
|
||||
|
||||
def adjust_metadata(md):
|
||||
return dict((property_name, compute_canonical_form(property_name, value) or value)
|
||||
for property_name, value in md.items())
|
||||
|
||||
|
||||
def guess_release_group(string):
|
||||
# first try to see whether we have both a known codec and a known release group
|
||||
for rexp in GROUP_NAMES:
|
||||
match = rexp.search(string)
|
||||
while match:
|
||||
metadata = match.groupdict()
|
||||
# make sure this is an actual release group we caught
|
||||
release_group = (compute_canonical_form('releaseGroup', metadata['releaseGroup']) or
|
||||
compute_canonical_form('weakReleaseGroup', metadata['releaseGroup']))
|
||||
if release_group:
|
||||
return adjust_metadata(metadata), (match.start(1), match.end(2))
|
||||
|
||||
# we didn't find anything conclusive, keep searching
|
||||
match = rexp.search(string, match.span()[0]+1)
|
||||
|
||||
# pick anything as releaseGroup as long as we have a codec in front
|
||||
# this doesn't include a potential dash ('-') ending the release group
|
||||
# eg: [...].X264-HiS@SiLUHD-English.[...]
|
||||
for rexp in GROUP_NAMES2:
|
||||
match = rexp.search(string)
|
||||
if match:
|
||||
return adjust_metadata(match.groupdict()), (match.start(1), match.end(2))
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_release_group, 0.8, log).process(mtree)
|
50
libs/guessit/transfo/guess_video_rexps.py
Normal file
50
libs/guessit/transfo/guess_video_rexps.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import Guess
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.patterns import video_rexps, sep
|
||||
import re
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def guess_video_rexps(string):
|
||||
string = '-' + string + '-'
|
||||
for rexp, confidence, span_adjust in video_rexps:
|
||||
match = re.search(sep + rexp + sep, string, re.IGNORECASE)
|
||||
if match:
|
||||
metadata = match.groupdict()
|
||||
# is this the better place to put it? (maybe, as it is at least
|
||||
# the soonest that we can catch it)
|
||||
if metadata.get('cdNumberTotal', -1) is None:
|
||||
del metadata['cdNumberTotal']
|
||||
span = (match.start() + span_adjust[0],
|
||||
match.end() + span_adjust[1] - 2)
|
||||
return (Guess(metadata, confidence=confidence, raw=string[span[0]:span[1]]),
|
||||
span)
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_video_rexps, None, log).process(mtree)
|
62
libs/guessit/transfo/guess_weak_episodes_rexps.py
Normal file
62
libs/guessit/transfo/guess_weak_episodes_rexps.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import Guess
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.patterns import weak_episode_rexps
|
||||
import re
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def guess_weak_episodes_rexps(string, node):
|
||||
if 'episodeNumber' in node.root.info:
|
||||
return None, None
|
||||
|
||||
for rexp, span_adjust in weak_episode_rexps:
|
||||
match = re.search(rexp, string, re.IGNORECASE)
|
||||
if match:
|
||||
metadata = match.groupdict()
|
||||
span = (match.start() + span_adjust[0],
|
||||
match.end() + span_adjust[1])
|
||||
|
||||
epnum = int(metadata['episodeNumber'])
|
||||
if epnum > 100:
|
||||
season, epnum = epnum // 100, epnum % 100
|
||||
# episodes which have a season > 25 are most likely errors
|
||||
# (Simpsons is at 23!)
|
||||
if season > 25:
|
||||
continue
|
||||
return Guess({ 'season': season,
|
||||
'episodeNumber': epnum },
|
||||
confidence=0.6, raw=string[span[0]:span[1]]), span
|
||||
else:
|
||||
return Guess(metadata, confidence=0.3, raw=string[span[0]:span[1]]), span
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
guess_weak_episodes_rexps.use_node = True
|
||||
|
||||
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_weak_episodes_rexps, 0.6, log).process(mtree)
|
39
libs/guessit/transfo/guess_website.py
Normal file
39
libs/guessit/transfo/guess_website.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.patterns import websites
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def guess_website(string):
|
||||
low = string.lower()
|
||||
for site in websites:
|
||||
pos = low.find(site.lower())
|
||||
if pos != -1:
|
||||
return {'website': site}, (pos, pos + len(site))
|
||||
return None, None
|
||||
|
||||
|
||||
def process(mtree):
|
||||
SingleNodeGuesser(guess_website, 1.0, log).process(mtree)
|
50
libs/guessit/transfo/guess_year.py
Normal file
50
libs/guessit/transfo/guess_year.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit.transfo import SingleNodeGuesser
|
||||
from guessit.date import search_year
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def guess_year(string):
|
||||
year, span = search_year(string)
|
||||
if year:
|
||||
return { 'year': year }, span
|
||||
else:
|
||||
return None, None
|
||||
|
||||
def guess_year_skip_first(string):
|
||||
year, span = search_year(string)
|
||||
if year:
|
||||
year2, span2 = guess_year(string[span[1]:])
|
||||
if year2:
|
||||
return year2, (span2[0]+span[1], span2[1]+span[1])
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def process(mtree, skip_first_year=False):
|
||||
if skip_first_year:
|
||||
SingleNodeGuesser(guess_year_skip_first, 1.0, log).process(mtree)
|
||||
else:
|
||||
SingleNodeGuesser(guess_year, 1.0, log).process(mtree)
|
73
libs/guessit/transfo/post_process.py
Normal file
73
libs/guessit/transfo/post_process.py
Normal file
|
@ -0,0 +1,73 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit.patterns import subtitle_exts
|
||||
from guessit.textutils import reorder_title, find_words
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def process(mtree):
|
||||
# 1- try to promote language to subtitle language where it makes sense
|
||||
for node in mtree.nodes():
|
||||
if 'language' not in node.guess:
|
||||
continue
|
||||
|
||||
def promote_subtitle():
|
||||
# pylint: disable=W0631
|
||||
node.guess.set('subtitleLanguage', node.guess['language'],
|
||||
confidence=node.guess.confidence('language'))
|
||||
del node.guess['language']
|
||||
|
||||
# - if we matched a language in a file with a sub extension and that
|
||||
# the group is the last group of the filename, it is probably the
|
||||
# language of the subtitle
|
||||
# (eg: 'xxx.english.srt')
|
||||
if (mtree.node_at((-1,)).value.lower() in subtitle_exts and
|
||||
node == mtree.leaves()[-2]):
|
||||
promote_subtitle()
|
||||
|
||||
# - if we find the word 'sub' before the language, and in the same explicit
|
||||
# group, then upgrade the language
|
||||
explicit_group = mtree.node_at(node.node_idx[:2])
|
||||
group_str = explicit_group.value.lower()
|
||||
|
||||
if ('sub' in find_words(group_str) and
|
||||
0 <= group_str.find('sub') < (node.span[0] - explicit_group.span[0])):
|
||||
promote_subtitle()
|
||||
|
||||
# - if a language is in an explicit group just preceded by "st",
|
||||
# it is a subtitle language (eg: '...st[fr-eng]...')
|
||||
try:
|
||||
idx = node.node_idx
|
||||
previous = mtree.node_at((idx[0], idx[1] - 1)).leaves()[-1]
|
||||
if previous.value.lower()[-2:] == 'st':
|
||||
promote_subtitle()
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
# 2- ", the" at the end of a series title should be prepended to it
|
||||
for node in mtree.nodes():
|
||||
if 'series' not in node.guess:
|
||||
continue
|
||||
|
||||
node.guess['series'] = reorder_title(node.guess['series'])
|
44
libs/guessit/transfo/split_explicit_groups.py
Normal file
44
libs/guessit/transfo/split_explicit_groups.py
Normal file
|
@ -0,0 +1,44 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit.textutils import find_first_level_groups
|
||||
from guessit.patterns import group_delimiters
|
||||
import functools
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def process(mtree):
|
||||
"""return the string split into explicit groups, that is, those either
|
||||
between parenthese, square brackets or curly braces, and those separated
|
||||
by a dash."""
|
||||
for c in mtree.children:
|
||||
groups = find_first_level_groups(c.value, group_delimiters[0])
|
||||
for delimiters in group_delimiters:
|
||||
flatten = lambda l, x: l + find_first_level_groups(x, delimiters)
|
||||
groups = functools.reduce(flatten, groups, [])
|
||||
|
||||
# do not do this at this moment, it is not strong enough and can break other
|
||||
# patterns, such as dates, etc...
|
||||
#groups = functools.reduce(lambda l, x: l + x.split('-'), groups, [])
|
||||
|
||||
c.split_on_components(groups)
|
42
libs/guessit/transfo/split_on_dash.py
Normal file
42
libs/guessit/transfo/split_on_dash.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit.patterns import sep
|
||||
import re
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def process(mtree):
|
||||
for node in mtree.unidentified_leaves():
|
||||
indices = []
|
||||
|
||||
didx = 0
|
||||
pattern = re.compile(sep + '-' + sep)
|
||||
match = pattern.search(node.value)
|
||||
while match:
|
||||
span = match.span()
|
||||
indices.extend([ span[0], span[1] ])
|
||||
match = pattern.search(node.value, span[1])
|
||||
|
||||
if indices:
|
||||
node.partition(indices)
|
36
libs/guessit/transfo/split_path_components.py
Normal file
36
libs/guessit/transfo/split_path_components.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# GuessIt - A library for guessing information from filenames
|
||||
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
|
||||
#
|
||||
# GuessIt is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the Lesser GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GuessIt is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# Lesser GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the Lesser GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from guessit import fileutils
|
||||
import os.path
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def process(mtree):
|
||||
"""Returns the filename split into [ dir*, basename, ext ]."""
|
||||
components = fileutils.split_path(mtree.value)
|
||||
basename = components.pop(-1)
|
||||
components += list(os.path.splitext(basename))
|
||||
components[-1] = components[-1][1:] # remove the '.' from the extension
|
||||
|
||||
mtree.split_on_components(components)
|
Loading…
Add table
Add a link
Reference in a new issue