Move common libs to libs/common

This commit is contained in:
Labrys of Knossos 2018-12-16 13:30:24 -05:00
commit 1f4bd41bcc
1612 changed files with 962 additions and 10 deletions

View file

@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
__title__ = 'babelfish'
__version__ = '0.5.5-dev'
__author__ = 'Antoine Bertin'
__license__ = 'BSD'
__copyright__ = 'Copyright 2015 the BabelFish authors'
import sys
if sys.version_info[0] >= 3:
basestr = str
else:
basestr = basestring
from .converters import (LanguageConverter, LanguageReverseConverter, LanguageEquivalenceConverter, CountryConverter,
CountryReverseConverter)
from .country import country_converters, COUNTRIES, COUNTRY_MATRIX, Country
from .exceptions import Error, LanguageConvertError, LanguageReverseError, CountryConvertError, CountryReverseError
from .language import language_converters, LANGUAGES, LANGUAGE_MATRIX, Language
from .script import SCRIPTS, SCRIPT_MATRIX, Script

View file

@ -0,0 +1,287 @@
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
import collections
from pkg_resources import iter_entry_points, EntryPoint
from ..exceptions import LanguageConvertError, LanguageReverseError
# from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
class CaseInsensitiveDict(collections.MutableMapping):
"""A case-insensitive ``dict``-like object.
Implements all methods and operations of
``collections.MutableMapping`` as well as dict's ``copy``. Also
provides ``lower_items``.
All keys are expected to be strings. The structure remembers the
case of the last key to be set, and ``iter(instance)``,
``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
will contain case-sensitive keys. However, querying and contains
testing is case insensitive:
cid = CaseInsensitiveDict()
cid['English'] = 'eng'
cid['ENGLISH'] == 'eng' # True
list(cid) == ['English'] # True
If the constructor, ``.update``, or equality comparison
operations are given keys that have equal ``.lower()``s, the
behavior is undefined.
"""
def __init__(self, data=None, **kwargs):
self._store = dict()
if data is None:
data = {}
self.update(data, **kwargs)
def __setitem__(self, key, value):
# Use the lowercased key for lookups, but store the actual
# key alongside the value.
self._store[key.lower()] = (key, value)
def __getitem__(self, key):
return self._store[key.lower()][1]
def __delitem__(self, key):
del self._store[key.lower()]
def __iter__(self):
return (casedkey for casedkey, mappedvalue in self._store.values())
def __len__(self):
return len(self._store)
def lower_items(self):
"""Like iteritems(), but with all lowercase keys."""
return (
(lowerkey, keyval[1])
for (lowerkey, keyval)
in self._store.items()
)
def __eq__(self, other):
if isinstance(other, collections.Mapping):
other = CaseInsensitiveDict(other)
else:
return NotImplemented
# Compare insensitively
return dict(self.lower_items()) == dict(other.lower_items())
# Copy is required
def copy(self):
return CaseInsensitiveDict(self._store.values())
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
class LanguageConverter(object):
"""A :class:`LanguageConverter` supports converting an alpha3 language code with an
alpha2 country code and a script code into a custom code
.. attribute:: codes
Set of possible custom codes
"""
def convert(self, alpha3, country=None, script=None):
"""Convert an alpha3 language code with an alpha2 country code and a script code
into a custom code
:param string alpha3: ISO-639-3 language code
:param country: ISO-3166 country code, if any
:type country: string or None
:param script: ISO-15924 script code, if any
:type script: string or None
:return: the corresponding custom code
:rtype: string
:raise: :class:`~babelfish.exceptions.LanguageConvertError`
"""
raise NotImplementedError
class LanguageReverseConverter(LanguageConverter):
"""A :class:`LanguageConverter` able to reverse a custom code into a alpha3
ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
"""
def reverse(self, code):
"""Reverse a custom code into alpha3, country and script code
:param string code: custom code to reverse
:return: the corresponding alpha3 ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
:rtype: tuple
:raise: :class:`~babelfish.exceptions.LanguageReverseError`
"""
raise NotImplementedError
class LanguageEquivalenceConverter(LanguageReverseConverter):
"""A :class:`LanguageEquivalenceConverter` is a utility class that allows you to easily define a
:class:`LanguageReverseConverter` by only specifying the dict from alpha3 to their corresponding symbols.
You must specify the dict of equivalence as a class variable named SYMBOLS.
If you also set the class variable CASE_SENSITIVE to ``True`` then the reverse conversion function will be
case-sensitive (it is case-insensitive by default).
Example::
class MyCodeConverter(babelfish.LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {'fra': 'mycode1', 'eng': 'mycode2'}
"""
CASE_SENSITIVE = False
def __init__(self):
self.codes = set()
self.to_symbol = {}
if self.CASE_SENSITIVE:
self.from_symbol = {}
else:
self.from_symbol = CaseInsensitiveDict()
for alpha3, symbol in self.SYMBOLS.items():
self.to_symbol[alpha3] = symbol
self.from_symbol[symbol] = (alpha3, None, None)
self.codes.add(symbol)
def convert(self, alpha3, country=None, script=None):
try:
return self.to_symbol[alpha3]
except KeyError:
raise LanguageConvertError(alpha3, country, script)
def reverse(self, code):
try:
return self.from_symbol[code]
except KeyError:
raise LanguageReverseError(code)
class CountryConverter(object):
"""A :class:`CountryConverter` supports converting an alpha2 country code
into a custom code
.. attribute:: codes
Set of possible custom codes
"""
def convert(self, alpha2):
"""Convert an alpha2 country code into a custom code
:param string alpha2: ISO-3166-1 language code
:return: the corresponding custom code
:rtype: string
:raise: :class:`~babelfish.exceptions.CountryConvertError`
"""
raise NotImplementedError
class CountryReverseConverter(CountryConverter):
"""A :class:`CountryConverter` able to reverse a custom code into a alpha2
ISO-3166-1 country code
"""
def reverse(self, code):
"""Reverse a custom code into alpha2 code
:param string code: custom code to reverse
:return: the corresponding alpha2 ISO-3166-1 country code
:rtype: string
:raise: :class:`~babelfish.exceptions.CountryReverseError`
"""
raise NotImplementedError
class ConverterManager(object):
"""Manager for babelfish converters behaving like a dict with lazy loading
Loading is done in this order:
* Entry point converters
* Registered converters
* Internal converters
.. attribute:: entry_point
The entry point where to look for converters
.. attribute:: internal_converters
Internal converters with entry point syntax
"""
entry_point = ''
internal_converters = []
def __init__(self):
#: Registered converters with entry point syntax
self.registered_converters = []
#: Loaded converters
self.converters = {}
def __getitem__(self, name):
"""Get a converter, lazy loading it if necessary"""
if name in self.converters:
return self.converters[name]
for ep in iter_entry_points(self.entry_point):
if ep.name == name:
self.converters[ep.name] = ep.load()()
return self.converters[ep.name]
for ep in (EntryPoint.parse(c) for c in self.registered_converters + self.internal_converters):
if ep.name == name:
# `require` argument of ep.load() is deprecated in newer versions of setuptools
if hasattr(ep, 'resolve'):
plugin = ep.resolve()
elif hasattr(ep, '_load'):
plugin = ep._load()
else:
plugin = ep.load(require=False)
self.converters[ep.name] = plugin()
return self.converters[ep.name]
raise KeyError(name)
def __setitem__(self, name, converter):
"""Load a converter"""
self.converters[name] = converter
def __delitem__(self, name):
"""Unload a converter"""
del self.converters[name]
def __iter__(self):
"""Iterator over loaded converters"""
return iter(self.converters)
def register(self, entry_point):
"""Register a converter
:param string entry_point: converter to register (entry point syntax)
:raise: ValueError if already registered
"""
if entry_point in self.registered_converters:
raise ValueError('Already registered')
self.registered_converters.insert(0, entry_point)
def unregister(self, entry_point):
"""Unregister a converter
:param string entry_point: converter to unregister (entry point syntax)
"""
self.registered_converters.remove(entry_point)
def __contains__(self, name):
return name in self.converters

View file

@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class Alpha2Converter(LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.alpha2:
SYMBOLS[iso_language.alpha3] = iso_language.alpha2

View file

@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class Alpha3BConverter(LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.alpha3b:
SYMBOLS[iso_language.alpha3] = iso_language.alpha3b

View file

@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class Alpha3TConverter(LanguageEquivalenceConverter):
CASE_SENSITIVE = True
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.alpha3t:
SYMBOLS[iso_language.alpha3] = iso_language.alpha3t

View file

@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import CountryReverseConverter, CaseInsensitiveDict
from ..country import COUNTRY_MATRIX
from ..exceptions import CountryConvertError, CountryReverseError
class CountryNameConverter(CountryReverseConverter):
def __init__(self):
self.codes = set()
self.to_name = {}
self.from_name = CaseInsensitiveDict()
for country in COUNTRY_MATRIX:
self.codes.add(country.name)
self.to_name[country.alpha2] = country.name
self.from_name[country.name] = country.alpha2
def convert(self, alpha2):
if alpha2 not in self.to_name:
raise CountryConvertError(alpha2)
return self.to_name[alpha2]
def reverse(self, name):
if name not in self.from_name:
raise CountryReverseError(name)
return self.from_name[name]

View file

@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageEquivalenceConverter
from ..language import LANGUAGE_MATRIX
class NameConverter(LanguageEquivalenceConverter):
CASE_SENSITIVE = False
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
if iso_language.name:
SYMBOLS[iso_language.alpha3] = iso_language.name

View file

@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageReverseConverter, CaseInsensitiveDict
from ..exceptions import LanguageReverseError
from ..language import language_converters
class OpenSubtitlesConverter(LanguageReverseConverter):
def __init__(self):
self.alpha3b_converter = language_converters['alpha3b']
self.alpha2_converter = language_converters['alpha2']
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
'scc': ('srp', None), 'mne': ('srp', 'ME')})
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(['pob', 'pb', 'scc', 'mne']))
def convert(self, alpha3, country=None, script=None):
alpha3b = self.alpha3b_converter.convert(alpha3, country, script)
if (alpha3b, country) in self.to_opensubtitles:
return self.to_opensubtitles[(alpha3b, country)]
return alpha3b
def reverse(self, opensubtitles):
if opensubtitles in self.from_opensubtitles:
return self.from_opensubtitles[opensubtitles]
for conv in [self.alpha3b_converter, self.alpha2_converter]:
try:
return conv.reverse(opensubtitles)
except LanguageReverseError:
pass
raise LanguageReverseError(opensubtitles)

View file

@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageConverter
from ..exceptions import LanguageConvertError
from ..language import LANGUAGE_MATRIX
class ScopeConverter(LanguageConverter):
FULLNAME = {'I': 'individual', 'M': 'macrolanguage', 'S': 'special'}
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
SYMBOLS[iso_language.alpha3] = iso_language.scope
codes = set(SYMBOLS.values())
def convert(self, alpha3, country=None, script=None):
if self.SYMBOLS[alpha3] in self.FULLNAME:
return self.FULLNAME[self.SYMBOLS[alpha3]]
raise LanguageConvertError(alpha3, country, script)

View file

@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from . import LanguageConverter
from ..exceptions import LanguageConvertError
from ..language import LANGUAGE_MATRIX
class LanguageTypeConverter(LanguageConverter):
FULLNAME = {'A': 'ancient', 'C': 'constructed', 'E': 'extinct', 'H': 'historical', 'L': 'living', 'S': 'special'}
SYMBOLS = {}
for iso_language in LANGUAGE_MATRIX:
SYMBOLS[iso_language.alpha3] = iso_language.type
codes = set(SYMBOLS.values())
def convert(self, alpha3, country=None, script=None):
if self.SYMBOLS[alpha3] in self.FULLNAME:
return self.FULLNAME[self.SYMBOLS[alpha3]]
raise LanguageConvertError(alpha3, country, script)

View file

@ -0,0 +1,107 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from collections import namedtuple
from functools import partial
from pkg_resources import resource_stream # @UnresolvedImport
from .converters import ConverterManager
from . import basestr
COUNTRIES = {}
COUNTRY_MATRIX = []
#: The namedtuple used in the :data:`COUNTRY_MATRIX`
IsoCountry = namedtuple('IsoCountry', ['name', 'alpha2'])
f = resource_stream('babelfish', 'data/iso-3166-1.txt')
f.readline()
for l in f:
iso_country = IsoCountry(*l.decode('utf-8').strip().split(';'))
COUNTRIES[iso_country.alpha2] = iso_country.name
COUNTRY_MATRIX.append(iso_country)
f.close()
class CountryConverterManager(ConverterManager):
""":class:`~babelfish.converters.ConverterManager` for country converters"""
entry_point = 'babelfish.country_converters'
internal_converters = ['name = babelfish.converters.countryname:CountryNameConverter']
country_converters = CountryConverterManager()
class CountryMeta(type):
"""The :class:`Country` metaclass
Dynamically redirect :meth:`Country.frommycode` to :meth:`Country.fromcode` with the ``mycode`` `converter`
"""
def __getattr__(cls, name):
if name.startswith('from'):
return partial(cls.fromcode, converter=name[4:])
return type.__getattribute__(cls, name)
class Country(CountryMeta(str('CountryBase'), (object,), {})):
"""A country on Earth
A country is represented by a 2-letter code from the ISO-3166 standard
:param string country: 2-letter ISO-3166 country code
"""
def __init__(self, country):
if country not in COUNTRIES:
raise ValueError('%r is not a valid country' % country)
#: ISO-3166 2-letter country code
self.alpha2 = country
@classmethod
def fromcode(cls, code, converter):
"""Create a :class:`Country` by its `code` using `converter` to
:meth:`~babelfish.converters.CountryReverseConverter.reverse` it
:param string code: the code to reverse
:param string converter: name of the :class:`~babelfish.converters.CountryReverseConverter` to use
:return: the corresponding :class:`Country` instance
:rtype: :class:`Country`
"""
return cls(country_converters[converter].reverse(code))
def __getstate__(self):
return self.alpha2
def __setstate__(self, state):
self.alpha2 = state
def __getattr__(self, name):
try:
return country_converters[name].convert(self.alpha2)
except KeyError:
raise AttributeError(name)
def __hash__(self):
return hash(self.alpha2)
def __eq__(self, other):
if isinstance(other, basestr):
return str(self) == other
if not isinstance(other, Country):
return False
return self.alpha2 == other.alpha2
def __ne__(self, other):
return not self == other
def __repr__(self):
return '<Country [%s]>' % self
def __str__(self):
return self.alpha2

View file

@ -0,0 +1,250 @@
Country Name;ISO 3166-1-alpha-2 code
AFGHANISTAN;AF
ÅLAND ISLANDS;AX
ALBANIA;AL
ALGERIA;DZ
AMERICAN SAMOA;AS
ANDORRA;AD
ANGOLA;AO
ANGUILLA;AI
ANTARCTICA;AQ
ANTIGUA AND BARBUDA;AG
ARGENTINA;AR
ARMENIA;AM
ARUBA;AW
AUSTRALIA;AU
AUSTRIA;AT
AZERBAIJAN;AZ
BAHAMAS;BS
BAHRAIN;BH
BANGLADESH;BD
BARBADOS;BB
BELARUS;BY
BELGIUM;BE
BELIZE;BZ
BENIN;BJ
BERMUDA;BM
BHUTAN;BT
BOLIVIA, PLURINATIONAL STATE OF;BO
BONAIRE, SINT EUSTATIUS AND SABA;BQ
BOSNIA AND HERZEGOVINA;BA
BOTSWANA;BW
BOUVET ISLAND;BV
BRAZIL;BR
BRITISH INDIAN OCEAN TERRITORY;IO
BRUNEI DARUSSALAM;BN
BULGARIA;BG
BURKINA FASO;BF
BURUNDI;BI
CAMBODIA;KH
CAMEROON;CM
CANADA;CA
CAPE VERDE;CV
CAYMAN ISLANDS;KY
CENTRAL AFRICAN REPUBLIC;CF
CHAD;TD
CHILE;CL
CHINA;CN
CHRISTMAS ISLAND;CX
COCOS (KEELING) ISLANDS;CC
COLOMBIA;CO
COMOROS;KM
CONGO;CG
CONGO, THE DEMOCRATIC REPUBLIC OF THE;CD
COOK ISLANDS;CK
COSTA RICA;CR
CÔTE D'IVOIRE;CI
CROATIA;HR
CUBA;CU
CURAÇAO;CW
CYPRUS;CY
CZECH REPUBLIC;CZ
DENMARK;DK
DJIBOUTI;DJ
DOMINICA;DM
DOMINICAN REPUBLIC;DO
ECUADOR;EC
EGYPT;EG
EL SALVADOR;SV
EQUATORIAL GUINEA;GQ
ERITREA;ER
ESTONIA;EE
ETHIOPIA;ET
FALKLAND ISLANDS (MALVINAS);FK
FAROE ISLANDS;FO
FIJI;FJ
FINLAND;FI
FRANCE;FR
FRENCH GUIANA;GF
FRENCH POLYNESIA;PF
FRENCH SOUTHERN TERRITORIES;TF
GABON;GA
GAMBIA;GM
GEORGIA;GE
GERMANY;DE
GHANA;GH
GIBRALTAR;GI
GREECE;GR
GREENLAND;GL
GRENADA;GD
GUADELOUPE;GP
GUAM;GU
GUATEMALA;GT
GUERNSEY;GG
GUINEA;GN
GUINEA-BISSAU;GW
GUYANA;GY
HAITI;HT
HEARD ISLAND AND MCDONALD ISLANDS;HM
HOLY SEE (VATICAN CITY STATE);VA
HONDURAS;HN
HONG KONG;HK
HUNGARY;HU
ICELAND;IS
INDIA;IN
INDONESIA;ID
IRAN, ISLAMIC REPUBLIC OF;IR
IRAQ;IQ
IRELAND;IE
ISLE OF MAN;IM
ISRAEL;IL
ITALY;IT
JAMAICA;JM
JAPAN;JP
JERSEY;JE
JORDAN;JO
KAZAKHSTAN;KZ
KENYA;KE
KIRIBATI;KI
KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF;KP
KOREA, REPUBLIC OF;KR
KUWAIT;KW
KYRGYZSTAN;KG
LAO PEOPLE'S DEMOCRATIC REPUBLIC;LA
LATVIA;LV
LEBANON;LB
LESOTHO;LS
LIBERIA;LR
LIBYA;LY
LIECHTENSTEIN;LI
LITHUANIA;LT
LUXEMBOURG;LU
MACAO;MO
MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF;MK
MADAGASCAR;MG
MALAWI;MW
MALAYSIA;MY
MALDIVES;MV
MALI;ML
MALTA;MT
MARSHALL ISLANDS;MH
MARTINIQUE;MQ
MAURITANIA;MR
MAURITIUS;MU
MAYOTTE;YT
MEXICO;MX
MICRONESIA, FEDERATED STATES OF;FM
MOLDOVA, REPUBLIC OF;MD
MONACO;MC
MONGOLIA;MN
MONTENEGRO;ME
MONTSERRAT;MS
MOROCCO;MA
MOZAMBIQUE;MZ
MYANMAR;MM
NAMIBIA;NA
NAURU;NR
NEPAL;NP
NETHERLANDS;NL
NEW CALEDONIA;NC
NEW ZEALAND;NZ
NICARAGUA;NI
NIGER;NE
NIGERIA;NG
NIUE;NU
NORFOLK ISLAND;NF
NORTHERN MARIANA ISLANDS;MP
NORWAY;NO
OMAN;OM
PAKISTAN;PK
PALAU;PW
PALESTINE, STATE OF;PS
PANAMA;PA
PAPUA NEW GUINEA;PG
PARAGUAY;PY
PERU;PE
PHILIPPINES;PH
PITCAIRN;PN
POLAND;PL
PORTUGAL;PT
PUERTO RICO;PR
QATAR;QA
RÉUNION;RE
ROMANIA;RO
RUSSIAN FEDERATION;RU
RWANDA;RW
SAINT BARTHÉLEMY;BL
SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA;SH
SAINT KITTS AND NEVIS;KN
SAINT LUCIA;LC
SAINT MARTIN (FRENCH PART);MF
SAINT PIERRE AND MIQUELON;PM
SAINT VINCENT AND THE GRENADINES;VC
SAMOA;WS
SAN MARINO;SM
SAO TOME AND PRINCIPE;ST
SAUDI ARABIA;SA
SENEGAL;SN
SERBIA;RS
SEYCHELLES;SC
SIERRA LEONE;SL
SINGAPORE;SG
SINT MAARTEN (DUTCH PART);SX
SLOVAKIA;SK
SLOVENIA;SI
SOLOMON ISLANDS;SB
SOMALIA;SO
SOUTH AFRICA;ZA
SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS;GS
SOUTH SUDAN;SS
SPAIN;ES
SRI LANKA;LK
SUDAN;SD
SURINAME;SR
SVALBARD AND JAN MAYEN;SJ
SWAZILAND;SZ
SWEDEN;SE
SWITZERLAND;CH
SYRIAN ARAB REPUBLIC;SY
TAIWAN, PROVINCE OF CHINA;TW
TAJIKISTAN;TJ
TANZANIA, UNITED REPUBLIC OF;TZ
THAILAND;TH
TIMOR-LESTE;TL
TOGO;TG
TOKELAU;TK
TONGA;TO
TRINIDAD AND TOBAGO;TT
TUNISIA;TN
TURKEY;TR
TURKMENISTAN;TM
TURKS AND CAICOS ISLANDS;TC
TUVALU;TV
UGANDA;UG
UKRAINE;UA
UNITED ARAB EMIRATES;AE
UNITED KINGDOM;GB
UNITED STATES;US
UNITED STATES MINOR OUTLYING ISLANDS;UM
URUGUAY;UY
UZBEKISTAN;UZ
VANUATU;VU
VENEZUELA, BOLIVARIAN REPUBLIC OF;VE
VIET NAM;VN
VIRGIN ISLANDS, BRITISH;VG
VIRGIN ISLANDS, U.S.;VI
WALLIS AND FUTUNA;WF
WESTERN SAHARA;EH
YEMEN;YE
ZAMBIA;ZM
ZIMBABWE;ZW

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,176 @@
#
# ISO 15924 - Codes for the representation of names of scripts
# Codes pour la représentation des noms décritures
# Format:
# Code;N°;English Name;Nom français;PVA;Date
#
Afak;439;Afaka;afaka;;2010-12-21
Aghb;239;Caucasian Albanian;aghbanien;;2012-10-16
Ahom;338;Ahom, Tai Ahom;âhom;;2012-11-01
Arab;160;Arabic;arabe;Arabic;2004-05-01
Armi;124;Imperial Aramaic;araméen impérial;Imperial_Aramaic;2009-06-01
Armn;230;Armenian;arménien;Armenian;2004-05-01
Avst;134;Avestan;avestique;Avestan;2009-06-01
Bali;360;Balinese;balinais;Balinese;2006-10-10
Bamu;435;Bamum;bamoum;Bamum;2009-06-01
Bass;259;Bassa Vah;bassa;;2010-03-26
Batk;365;Batak;batik;Batak;2010-07-23
Beng;325;Bengali;bengalî;Bengali;2004-05-01
Blis;550;Blissymbols;symboles Bliss;;2004-05-01
Bopo;285;Bopomofo;bopomofo;Bopomofo;2004-05-01
Brah;300;Brahmi;brahma;Brahmi;2010-07-23
Brai;570;Braille;braille;Braille;2004-05-01
Bugi;367;Buginese;bouguis;Buginese;2006-06-21
Buhd;372;Buhid;bouhide;Buhid;2004-05-01
Cakm;349;Chakma;chakma;Chakma;2012-02-06
Cans;440;Unified Canadian Aboriginal Syllabics;syllabaire autochtone canadien unifié;Canadian_Aboriginal;2004-05-29
Cari;201;Carian;carien;Carian;2007-07-02
Cham;358;Cham;cham (čam, tcham);Cham;2009-11-11
Cher;445;Cherokee;tchérokî;Cherokee;2004-05-01
Cirt;291;Cirth;cirth;;2004-05-01
Copt;204;Coptic;copte;Coptic;2006-06-21
Cprt;403;Cypriot;syllabaire chypriote;Cypriot;2004-05-01
Cyrl;220;Cyrillic;cyrillique;Cyrillic;2004-05-01
Cyrs;221;Cyrillic (Old Church Slavonic variant);cyrillique (variante slavonne);;2004-05-01
Deva;315;Devanagari (Nagari);dévanâgarî;Devanagari;2004-05-01
Dsrt;250;Deseret (Mormon);déseret (mormon);Deseret;2004-05-01
Dupl;755;Duployan shorthand, Duployan stenography;sténographie Duployé;;2010-07-18
Egyd;070;Egyptian demotic;démotique égyptien;;2004-05-01
Egyh;060;Egyptian hieratic;hiératique égyptien;;2004-05-01
Egyp;050;Egyptian hieroglyphs;hiéroglyphes égyptiens;Egyptian_Hieroglyphs;2009-06-01
Elba;226;Elbasan;elbasan;;2010-07-18
Ethi;430;Ethiopic (Geʻez);éthiopien (geʻez, guèze);Ethiopic;2004-10-25
Geor;240;Georgian (Mkhedruli);géorgien (mkhédrouli);Georgian;2004-05-29
Geok;241;Khutsuri (Asomtavruli and Nuskhuri);khoutsouri (assomtavrouli et nouskhouri);Georgian;2012-10-16
Glag;225;Glagolitic;glagolitique;Glagolitic;2006-06-21
Goth;206;Gothic;gotique;Gothic;2004-05-01
Gran;343;Grantha;grantha;;2009-11-11
Grek;200;Greek;grec;Greek;2004-05-01
Gujr;320;Gujarati;goudjarâtî (gujrâtî);Gujarati;2004-05-01
Guru;310;Gurmukhi;gourmoukhî;Gurmukhi;2004-05-01
Hang;286;Hangul (Hangŭl, Hangeul);hangûl (hangŭl, hangeul);Hangul;2004-05-29
Hani;500;Han (Hanzi, Kanji, Hanja);idéogrammes han (sinogrammes);Han;2009-02-23
Hano;371;Hanunoo (Hanunóo);hanounóo;Hanunoo;2004-05-29
Hans;501;Han (Simplified variant);idéogrammes han (variante simplifiée);;2004-05-29
Hant;502;Han (Traditional variant);idéogrammes han (variante traditionnelle);;2004-05-29
Hatr;127;Hatran;hatrénien;;2012-11-01
Hebr;125;Hebrew;hébreu;Hebrew;2004-05-01
Hira;410;Hiragana;hiragana;Hiragana;2004-05-01
Hluw;080;Anatolian Hieroglyphs (Luwian Hieroglyphs, Hittite Hieroglyphs);hiéroglyphes anatoliens (hiéroglyphes louvites, hiéroglyphes hittites);;2011-12-09
Hmng;450;Pahawh Hmong;pahawh hmong;;2004-05-01
Hrkt;412;Japanese syllabaries (alias for Hiragana + Katakana);syllabaires japonais (alias pour hiragana + katakana);Katakana_Or_Hiragana;2011-06-21
Hung;176;Old Hungarian (Hungarian Runic);runes hongroises (ancien hongrois);;2012-10-16
Inds;610;Indus (Harappan);indus;;2004-05-01
Ital;210;Old Italic (Etruscan, Oscan, etc.);ancien italique (étrusque, osque, etc.);Old_Italic;2004-05-29
Java;361;Javanese;javanais;Javanese;2009-06-01
Jpan;413;Japanese (alias for Han + Hiragana + Katakana);japonais (alias pour han + hiragana + katakana);;2006-06-21
Jurc;510;Jurchen;jurchen;;2010-12-21
Kali;357;Kayah Li;kayah li;Kayah_Li;2007-07-02
Kana;411;Katakana;katakana;Katakana;2004-05-01
Khar;305;Kharoshthi;kharochthî;Kharoshthi;2006-06-21
Khmr;355;Khmer;khmer;Khmer;2004-05-29
Khoj;322;Khojki;khojkî;;2011-06-21
Knda;345;Kannada;kannara (canara);Kannada;2004-05-29
Kore;287;Korean (alias for Hangul + Han);coréen (alias pour hangûl + han);;2007-06-13
Kpel;436;Kpelle;kpèllé;;2010-03-26
Kthi;317;Kaithi;kaithî;Kaithi;2009-06-01
Lana;351;Tai Tham (Lanna);taï tham (lanna);Tai_Tham;2009-06-01
Laoo;356;Lao;laotien;Lao;2004-05-01
Latf;217;Latin (Fraktur variant);latin (variante brisée);;2004-05-01
Latg;216;Latin (Gaelic variant);latin (variante gaélique);;2004-05-01
Latn;215;Latin;latin;Latin;2004-05-01
Lepc;335;Lepcha (Róng);lepcha (róng);Lepcha;2007-07-02
Limb;336;Limbu;limbou;Limbu;2004-05-29
Lina;400;Linear A;linéaire A;;2004-05-01
Linb;401;Linear B;linéaire B;Linear_B;2004-05-29
Lisu;399;Lisu (Fraser);lisu (Fraser);Lisu;2009-06-01
Loma;437;Loma;loma;;2010-03-26
Lyci;202;Lycian;lycien;Lycian;2007-07-02
Lydi;116;Lydian;lydien;Lydian;2007-07-02
Mahj;314;Mahajani;mahâjanî;;2012-10-16
Mand;140;Mandaic, Mandaean;mandéen;Mandaic;2010-07-23
Mani;139;Manichaean;manichéen;;2007-07-15
Maya;090;Mayan hieroglyphs;hiéroglyphes mayas;;2004-05-01
Mend;438;Mende Kikakui;mendé kikakui;;2013-10-12
Merc;101;Meroitic Cursive;cursif méroïtique;Meroitic_Cursive;2012-02-06
Mero;100;Meroitic Hieroglyphs;hiéroglyphes méroïtiques;Meroitic_Hieroglyphs;2012-02-06
Mlym;347;Malayalam;malayâlam;Malayalam;2004-05-01
Modi;323;Modi, Moḍī;modî;;2013-10-12
Moon;218;Moon (Moon code, Moon script, Moon type);écriture Moon;;2006-12-11
Mong;145;Mongolian;mongol;Mongolian;2004-05-01
Mroo;199;Mro, Mru;mro;;2010-12-21
Mtei;337;Meitei Mayek (Meithei, Meetei);meitei mayek;Meetei_Mayek;2009-06-01
Mult;323; Multani;multanî;;2012-11-01
Mymr;350;Myanmar (Burmese);birman;Myanmar;2004-05-01
Narb;106;Old North Arabian (Ancient North Arabian);nord-arabique;;2010-03-26
Nbat;159;Nabataean;nabatéen;;2010-03-26
Nkgb;420;Nakhi Geba ('Na-'Khi ²Ggŏ-¹baw, Naxi Geba);nakhi géba;;2009-02-23
Nkoo;165;NKo;nko;Nko;2006-10-10
Nshu;499;Nüshu;nüshu;;2010-12-21
Ogam;212;Ogham;ogam;Ogham;2004-05-01
Olck;261;Ol Chiki (Ol Cemet, Ol, Santali);ol tchiki;Ol_Chiki;2007-07-02
Orkh;175;Old Turkic, Orkhon Runic;orkhon;Old_Turkic;2009-06-01
Orya;327;Oriya;oriyâ;Oriya;2004-05-01
Osma;260;Osmanya;osmanais;Osmanya;2004-05-01
Palm;126;Palmyrene;palmyrénien;;2010-03-26
Pauc;263;Pau Cin Hau;paou chin haou;;2013-10-12
Perm;227;Old Permic;ancien permien;;2004-05-01
Phag;331;Phags-pa;phags pa;Phags_Pa;2006-10-10
Phli;131;Inscriptional Pahlavi;pehlevi des inscriptions;Inscriptional_Pahlavi;2009-06-01
Phlp;132;Psalter Pahlavi;pehlevi des psautiers;;2007-11-26
Phlv;133;Book Pahlavi;pehlevi des livres;;2007-07-15
Phnx;115;Phoenician;phénicien;Phoenician;2006-10-10
Plrd;282;Miao (Pollard);miao (Pollard);Miao;2012-02-06
Prti;130;Inscriptional Parthian;parthe des inscriptions;Inscriptional_Parthian;2009-06-01
Qaaa;900;Reserved for private use (start);réservé à lusage privé (début);;2004-05-29
Qabx;949;Reserved for private use (end);réservé à lusage privé (fin);;2004-05-29
Rjng;363;Rejang (Redjang, Kaganga);redjang (kaganga);Rejang;2009-02-23
Roro;620;Rongorongo;rongorongo;;2004-05-01
Runr;211;Runic;runique;Runic;2004-05-01
Samr;123;Samaritan;samaritain;Samaritan;2009-06-01
Sara;292;Sarati;sarati;;2004-05-29
Sarb;105;Old South Arabian;sud-arabique, himyarite;Old_South_Arabian;2009-06-01
Saur;344;Saurashtra;saurachtra;Saurashtra;2007-07-02
Sgnw;095;SignWriting;SignÉcriture, SignWriting;;2006-10-10
Shaw;281;Shavian (Shaw);shavien (Shaw);Shavian;2004-05-01
Shrd;319;Sharada, Śāradā;charada, shard;Sharada;2012-02-06
Sidd;302;Siddham, Siddhaṃ, Siddhamātṛkā;siddham;;2013-10-12
Sind;318;Khudawadi, Sindhi;khoudawadî, sindhî;;2010-12-21
Sinh;348;Sinhala;singhalais;Sinhala;2004-05-01
Sora;398;Sora Sompeng;sora sompeng;Sora_Sompeng;2012-02-06
Sund;362;Sundanese;sundanais;Sundanese;2007-07-02
Sylo;316;Syloti Nagri;sylotî nâgrî;Syloti_Nagri;2006-06-21
Syrc;135;Syriac;syriaque;Syriac;2004-05-01
Syre;138;Syriac (Estrangelo variant);syriaque (variante estranghélo);;2004-05-01
Syrj;137;Syriac (Western variant);syriaque (variante occidentale);;2004-05-01
Syrn;136;Syriac (Eastern variant);syriaque (variante orientale);;2004-05-01
Tagb;373;Tagbanwa;tagbanoua;Tagbanwa;2004-05-01
Takr;321;Takri, Ṭākrī, Ṭāṅkrī;tâkrî;Takri;2012-02-06
Tale;353;Tai Le;taï-le;Tai_Le;2004-10-25
Talu;354;New Tai Lue;nouveau taï-lue;New_Tai_Lue;2006-06-21
Taml;346;Tamil;tamoul;Tamil;2004-05-01
Tang;520;Tangut;tangoute;;2010-12-21
Tavt;359;Tai Viet;taï viêt;Tai_Viet;2009-06-01
Telu;340;Telugu;télougou;Telugu;2004-05-01
Teng;290;Tengwar;tengwar;;2004-05-01
Tfng;120;Tifinagh (Berber);tifinagh (berbère);Tifinagh;2006-06-21
Tglg;370;Tagalog (Baybayin, Alibata);tagal (baybayin, alibata);Tagalog;2009-02-23
Thaa;170;Thaana;thâna;Thaana;2004-05-01
Thai;352;Thai;thaï;Thai;2004-05-01
Tibt;330;Tibetan;tibétain;Tibetan;2004-05-01
Tirh;326;Tirhuta;tirhouta;;2011-12-09
Ugar;040;Ugaritic;ougaritique;Ugaritic;2004-05-01
Vaii;470;Vai;vaï;Vai;2007-07-02
Visp;280;Visible Speech;parole visible;;2004-05-01
Wara;262;Warang Citi (Varang Kshiti);warang citi;;2009-11-11
Wole;480;Woleai;woléaï;;2010-12-21
Xpeo;030;Old Persian;cunéiforme persépolitain;Old_Persian;2006-06-21
Xsux;020;Cuneiform, Sumero-Akkadian;cunéiforme suméro-akkadien;Cuneiform;2006-10-10
Yiii;460;Yi;yi;Yi;2004-05-01
Zinh;994;Code for inherited script;codet pour écriture héritée;Inherited;2009-02-23
Zmth;995;Mathematical notation;notation mathématique;;2007-11-26
Zsym;996;Symbols;symboles;;2007-11-26
Zxxx;997;Code for unwritten documents;codet pour les documents non écrits;;2011-06-21
Zyyy;998;Code for undetermined script;codet pour écriture indéterminée;Common;2004-05-29
Zzzz;999;Code for uncoded script;codet pour écriture non codée;Unknown;2006-10-10

View file

@ -0,0 +1,474 @@
IdSubLanguage ISO639 LanguageName UploadEnabled WebEnabled
aar aa Afar, afar 0 0
abk ab Abkhazian 0 0
ace Achinese 0 0
ach Acoli 0 0
ada Adangme 0 0
ady adyghé 0 0
afa Afro-Asiatic (Other) 0 0
afh Afrihili 0 0
afr af Afrikaans 1 0
ain Ainu 0 0
aka ak Akan 0 0
akk Akkadian 0 0
alb sq Albanian 1 1
ale Aleut 0 0
alg Algonquian languages 0 0
alt Southern Altai 0 0
amh am Amharic 0 0
ang English, Old (ca.450-1100) 0 0
apa Apache languages 0 0
ara ar Arabic 1 1
arc Aramaic 0 0
arg an Aragonese 0 0
arm hy Armenian 1 0
arn Araucanian 0 0
arp Arapaho 0 0
art Artificial (Other) 0 0
arw Arawak 0 0
asm as Assamese 0 0
ast Asturian, Bable 0 0
ath Athapascan languages 0 0
aus Australian languages 0 0
ava av Avaric 0 0
ave ae Avestan 0 0
awa Awadhi 0 0
aym ay Aymara 0 0
aze az Azerbaijani 0 0
bad Banda 0 0
bai Bamileke languages 0 0
bak ba Bashkir 0 0
bal Baluchi 0 0
bam bm Bambara 0 0
ban Balinese 0 0
baq eu Basque 1 1
bas Basa 0 0
bat Baltic (Other) 0 0
bej Beja 0 0
bel be Belarusian 0 0
bem Bemba 0 0
ben bn Bengali 1 0
ber Berber (Other) 0 0
bho Bhojpuri 0 0
bih bh Bihari 0 0
bik Bikol 0 0
bin Bini 0 0
bis bi Bislama 0 0
bla Siksika 0 0
bnt Bantu (Other) 0 0
bos bs Bosnian 1 0
bra Braj 0 0
bre br Breton 1 0
btk Batak (Indonesia) 0 0
bua Buriat 0 0
bug Buginese 0 0
bul bg Bulgarian 1 1
bur my Burmese 1 0
byn Blin 0 0
cad Caddo 0 0
cai Central American Indian (Other) 0 0
car Carib 0 0
cat ca Catalan 1 1
cau Caucasian (Other) 0 0
ceb Cebuano 0 0
cel Celtic (Other) 0 0
cha ch Chamorro 0 0
chb Chibcha 0 0
che ce Chechen 0 0
chg Chagatai 0 0
chi zh Chinese 1 1
chk Chuukese 0 0
chm Mari 0 0
chn Chinook jargon 0 0
cho Choctaw 0 0
chp Chipewyan 0 0
chr Cherokee 0 0
chu cu Church Slavic 0 0
chv cv Chuvash 0 0
chy Cheyenne 0 0
cmc Chamic languages 0 0
cop Coptic 0 0
cor kw Cornish 0 0
cos co Corsican 0 0
cpe Creoles and pidgins, English based (Other) 0 0
cpf Creoles and pidgins, French-based (Other) 0 0
cpp Creoles and pidgins, Portuguese-based (Other) 0 0
cre cr Cree 0 0
crh Crimean Tatar 0 0
crp Creoles and pidgins (Other) 0 0
csb Kashubian 0 0
cus Cushitic (Other)' couchitiques, autres langues 0 0
cze cs Czech 1 1
dak Dakota 0 0
dan da Danish 1 1
dar Dargwa 0 0
day Dayak 0 0
del Delaware 0 0
den Slave (Athapascan) 0 0
dgr Dogrib 0 0
din Dinka 0 0
div dv Divehi 0 0
doi Dogri 0 0
dra Dravidian (Other) 0 0
dua Duala 0 0
dum Dutch, Middle (ca.1050-1350) 0 0
dut nl Dutch 1 1
dyu Dyula 0 0
dzo dz Dzongkha 0 0
efi Efik 0 0
egy Egyptian (Ancient) 0 0
eka Ekajuk 0 0
elx Elamite 0 0
eng en English 1 1
enm English, Middle (1100-1500) 0 0
epo eo Esperanto 1 0
est et Estonian 1 1
ewe ee Ewe 0 0
ewo Ewondo 0 0
fan Fang 0 0
fao fo Faroese 0 0
fat Fanti 0 0
fij fj Fijian 0 0
fil Filipino 0 0
fin fi Finnish 1 1
fiu Finno-Ugrian (Other) 0 0
fon Fon 0 0
fre fr French 1 1
frm French, Middle (ca.1400-1600) 0 0
fro French, Old (842-ca.1400) 0 0
fry fy Frisian 0 0
ful ff Fulah 0 0
fur Friulian 0 0
gaa Ga 0 0
gay Gayo 0 0
gba Gbaya 0 0
gem Germanic (Other) 0 0
geo ka Georgian 1 1
ger de German 1 1
gez Geez 0 0
gil Gilbertese 0 0
gla gd Gaelic 0 0
gle ga Irish 0 0
glg gl Galician 1 1
glv gv Manx 0 0
gmh German, Middle High (ca.1050-1500) 0 0
goh German, Old High (ca.750-1050) 0 0
gon Gondi 0 0
gor Gorontalo 0 0
got Gothic 0 0
grb Grebo 0 0
grc Greek, Ancient (to 1453) 0 0
ell el Greek 1 1
grn gn Guarani 0 0
guj gu Gujarati 0 0
gwi Gwich´in 0 0
hai Haida 0 0
hat ht Haitian 0 0
hau ha Hausa 0 0
haw Hawaiian 0 0
heb he Hebrew 1 1
her hz Herero 0 0
hil Hiligaynon 0 0
him Himachali 0 0
hin hi Hindi 1 1
hit Hittite 0 0
hmn Hmong 0 0
hmo ho Hiri Motu 0 0
hrv hr Croatian 1 1
hun hu Hungarian 1 1
hup Hupa 0 0
iba Iban 0 0
ibo ig Igbo 0 0
ice is Icelandic 1 1
ido io Ido 0 0
iii ii Sichuan Yi 0 0
ijo Ijo 0 0
iku iu Inuktitut 0 0
ile ie Interlingue 0 0
ilo Iloko 0 0
ina ia Interlingua (International Auxiliary Language Asso 0 0
inc Indic (Other) 0 0
ind id Indonesian 1 1
ine Indo-European (Other) 0 0
inh Ingush 0 0
ipk ik Inupiaq 0 0
ira Iranian (Other) 0 0
iro Iroquoian languages 0 0
ita it Italian 1 1
jav jv Javanese 0 0
jpn ja Japanese 1 1
jpr Judeo-Persian 0 0
jrb Judeo-Arabic 0 0
kaa Kara-Kalpak 0 0
kab Kabyle 0 0
kac Kachin 0 0
kal kl Kalaallisut 0 0
kam Kamba 0 0
kan kn Kannada 0 0
kar Karen 0 0
kas ks Kashmiri 0 0
kau kr Kanuri 0 0
kaw Kawi 0 0
kaz kk Kazakh 1 0
kbd Kabardian 0 0
kha Khasi 0 0
khi Khoisan (Other) 0 0
khm km Khmer 1 1
kho Khotanese 0 0
kik ki Kikuyu 0 0
kin rw Kinyarwanda 0 0
kir ky Kirghiz 0 0
kmb Kimbundu 0 0
kok Konkani 0 0
kom kv Komi 0 0
kon kg Kongo 0 0
kor ko Korean 1 1
kos Kosraean 0 0
kpe Kpelle 0 0
krc Karachay-Balkar 0 0
kro Kru 0 0
kru Kurukh 0 0
kua kj Kuanyama 0 0
kum Kumyk 0 0
kur ku Kurdish 0 0
kut Kutenai 0 0
lad Ladino 0 0
lah Lahnda 0 0
lam Lamba 0 0
lao lo Lao 0 0
lat la Latin 0 0
lav lv Latvian 1 0
lez Lezghian 0 0
lim li Limburgan 0 0
lin ln Lingala 0 0
lit lt Lithuanian 1 0
lol Mongo 0 0
loz Lozi 0 0
ltz lb Luxembourgish 1 0
lua Luba-Lulua 0 0
lub lu Luba-Katanga 0 0
lug lg Ganda 0 0
lui Luiseno 0 0
lun Lunda 0 0
luo Luo (Kenya and Tanzania) 0 0
lus lushai 0 0
mac mk Macedonian 1 1
mad Madurese 0 0
mag Magahi 0 0
mah mh Marshallese 0 0
mai Maithili 0 0
mak Makasar 0 0
mal ml Malayalam 1 0
man Mandingo 0 0
mao mi Maori 0 0
map Austronesian (Other) 0 0
mar mr Marathi 0 0
mas Masai 0 0
may ms Malay 1 1
mdf Moksha 0 0
mdr Mandar 0 0
men Mende 0 0
mga Irish, Middle (900-1200) 0 0
mic Mi'kmaq 0 0
min Minangkabau 0 0
mis Miscellaneous languages 0 0
mkh Mon-Khmer (Other) 0 0
mlg mg Malagasy 0 0
mlt mt Maltese 0 0
mnc Manchu 0 0
mni Manipuri 0 0
mno Manobo languages 0 0
moh Mohawk 0 0
mol mo Moldavian 0 0
mon mn Mongolian 1 0
mos Mossi 0 0
mwl Mirandese 0 0
mul Multiple languages 0 0
mun Munda languages 0 0
mus Creek 0 0
mwr Marwari 0 0
myn Mayan languages 0 0
myv Erzya 0 0
nah Nahuatl 0 0
nai North American Indian 0 0
nap Neapolitan 0 0
nau na Nauru 0 0
nav nv Navajo 0 0
nbl nr Ndebele, South 0 0
nde nd Ndebele, North 0 0
ndo ng Ndonga 0 0
nds Low German 0 0
nep ne Nepali 0 0
new Nepal Bhasa 0 0
nia Nias 0 0
nic Niger-Kordofanian (Other) 0 0
niu Niuean 0 0
nno nn Norwegian Nynorsk 0 0
nob nb Norwegian Bokmal 0 0
nog Nogai 0 0
non Norse, Old 0 0
nor no Norwegian 1 1
nso Northern Sotho 0 0
nub Nubian languages 0 0
nwc Classical Newari 0 0
nya ny Chichewa 0 0
nym Nyamwezi 0 0
nyn Nyankole 0 0
nyo Nyoro 0 0
nzi Nzima 0 0
oci oc Occitan 1 1
oji oj Ojibwa 0 0
ori or Oriya 0 0
orm om Oromo 0 0
osa Osage 0 0
oss os Ossetian 0 0
ota Turkish, Ottoman (1500-1928) 0 0
oto Otomian languages 0 0
paa Papuan (Other) 0 0
pag Pangasinan 0 0
pal Pahlavi 0 0
pam Pampanga 0 0
pan pa Panjabi 0 0
pap Papiamento 0 0
pau Palauan 0 0
peo Persian, Old (ca.600-400 B.C.) 0 0
per fa Persian 1 1
phi Philippine (Other) 0 0
phn Phoenician 0 0
pli pi Pali 0 0
pol pl Polish 1 1
pon Pohnpeian 0 0
por pt Portuguese 1 1
pra Prakrit languages 0 0
pro Provençal, Old (to 1500) 0 0
pus ps Pushto 0 0
que qu Quechua 0 0
raj Rajasthani 0 0
rap Rapanui 0 0
rar Rarotongan 0 0
roa Romance (Other) 0 0
roh rm Raeto-Romance 0 0
rom Romany 0 0
run rn Rundi 0 0
rup Aromanian 0 0
rus ru Russian 1 1
sad Sandawe 0 0
sag sg Sango 0 0
sah Yakut 0 0
sai South American Indian (Other) 0 0
sal Salishan languages 0 0
sam Samaritan Aramaic 0 0
san sa Sanskrit 0 0
sas Sasak 0 0
sat Santali 0 0
scc sr Serbian 1 1
scn Sicilian 0 0
sco Scots 0 0
sel Selkup 0 0
sem Semitic (Other) 0 0
sga Irish, Old (to 900) 0 0
sgn Sign Languages 0 0
shn Shan 0 0
sid Sidamo 0 0
sin si Sinhalese 1 1
sio Siouan languages 0 0
sit Sino-Tibetan (Other) 0 0
sla Slavic (Other) 0 0
slo sk Slovak 1 1
slv sl Slovenian 1 1
sma Southern Sami 0 0
sme se Northern Sami 0 0
smi Sami languages (Other) 0 0
smj Lule Sami 0 0
smn Inari Sami 0 0
smo sm Samoan 0 0
sms Skolt Sami 0 0
sna sn Shona 0 0
snd sd Sindhi 0 0
snk Soninke 0 0
sog Sogdian 0 0
som so Somali 0 0
son Songhai 0 0
sot st Sotho, Southern 0 0
spa es Spanish 1 1
srd sc Sardinian 0 0
srr Serer 0 0
ssa Nilo-Saharan (Other) 0 0
ssw ss Swati 0 0
suk Sukuma 0 0
sun su Sundanese 0 0
sus Susu 0 0
sux Sumerian 0 0
swa sw Swahili 1 0
swe sv Swedish 1 1
syr Syriac 1 0
tah ty Tahitian 0 0
tai Tai (Other) 0 0
tam ta Tamil 1 0
tat tt Tatar 0 0
tel te Telugu 1 0
tem Timne 0 0
ter Tereno 0 0
tet Tetum 0 0
tgk tg Tajik 0 0
tgl tl Tagalog 1 1
tha th Thai 1 1
tib bo Tibetan 0 0
tig Tigre 0 0
tir ti Tigrinya 0 0
tiv Tiv 0 0
tkl Tokelau 0 0
tlh Klingon 0 0
tli Tlingit 0 0
tmh Tamashek 0 0
tog Tonga (Nyasa) 0 0
ton to Tonga (Tonga Islands) 0 0
tpi Tok Pisin 0 0
tsi Tsimshian 0 0
tsn tn Tswana 0 0
tso ts Tsonga 0 0
tuk tk Turkmen 0 0
tum Tumbuka 0 0
tup Tupi languages 0 0
tur tr Turkish 1 1
tut Altaic (Other) 0 0
tvl Tuvalu 0 0
twi tw Twi 0 0
tyv Tuvinian 0 0
udm Udmurt 0 0
uga Ugaritic 0 0
uig ug Uighur 0 0
ukr uk Ukrainian 1 1
umb Umbundu 0 0
und Undetermined 0 0
urd ur Urdu 1 0
uzb uz Uzbek 0 0
vai Vai 0 0
ven ve Venda 0 0
vie vi Vietnamese 1 1
vol vo Volapük 0 0
vot Votic 0 0
wak Wakashan languages 0 0
wal Walamo 0 0
war Waray 0 0
was Washo 0 0
wel cy Welsh 0 0
wen Sorbian languages 0 0
wln wa Walloon 0 0
wol wo Wolof 0 0
xal Kalmyk 0 0
xho xh Xhosa 0 0
yao Yao 0 0
yap Yapese 0 0
yid yi Yiddish 0 0
yor yo Yoruba 0 0
ypk Yupik languages 0 0
zap Zapotec 0 0
zen Zenaga 0 0
zha za Zhuang 0 0
znd Zande 0 0
zul zu Zulu 0 0
zun Zuni 0 0
rum ro Romanian 1 1
pob pb Brazilian 1 1
mne Montenegrin 1 0

View file

@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
class Error(Exception):
"""Base class for all exceptions in babelfish"""
pass
class LanguageError(Error, AttributeError):
"""Base class for all language exceptions in babelfish"""
pass
class LanguageConvertError(LanguageError):
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageConverter.convert` fails
:param string alpha3: alpha3 code that failed conversion
:param country: country code that failed conversion, if any
:type country: string or None
:param script: script code that failed conversion, if any
:type script: string or None
"""
def __init__(self, alpha3, country=None, script=None):
self.alpha3 = alpha3
self.country = country
self.script = script
def __str__(self):
s = self.alpha3
if self.country is not None:
s += '-' + self.country
if self.script is not None:
s += '-' + self.script
return s
class LanguageReverseError(LanguageError):
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageReverseConverter.reverse` fails
:param string code: code that failed reverse conversion
"""
def __init__(self, code):
self.code = code
def __str__(self):
return repr(self.code)
class CountryError(Error, AttributeError):
"""Base class for all country exceptions in babelfish"""
pass
class CountryConvertError(CountryError):
"""Exception raised by converters when :meth:`~babelfish.converters.CountryConverter.convert` fails
:param string alpha2: alpha2 code that failed conversion
"""
def __init__(self, alpha2):
self.alpha2 = alpha2
def __str__(self):
return self.alpha2
class CountryReverseError(CountryError):
"""Exception raised by converters when :meth:`~babelfish.converters.CountryReverseConverter.reverse` fails
:param string code: code that failed reverse conversion
"""
def __init__(self, code):
self.code = code
def __str__(self):
return repr(self.code)

View file

@ -0,0 +1,185 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from collections import namedtuple
from functools import partial
from pkg_resources import resource_stream # @UnresolvedImport
from .converters import ConverterManager
from .country import Country
from .exceptions import LanguageConvertError
from .script import Script
from . import basestr
LANGUAGES = set()
LANGUAGE_MATRIX = []
#: The namedtuple used in the :data:`LANGUAGE_MATRIX`
IsoLanguage = namedtuple('IsoLanguage', ['alpha3', 'alpha3b', 'alpha3t', 'alpha2', 'scope', 'type', 'name', 'comment'])
f = resource_stream('babelfish', 'data/iso-639-3.tab')
f.readline()
for l in f:
iso_language = IsoLanguage(*l.decode('utf-8').split('\t'))
LANGUAGES.add(iso_language.alpha3)
LANGUAGE_MATRIX.append(iso_language)
f.close()
class LanguageConverterManager(ConverterManager):
""":class:`~babelfish.converters.ConverterManager` for language converters"""
entry_point = 'babelfish.language_converters'
internal_converters = ['alpha2 = babelfish.converters.alpha2:Alpha2Converter',
'alpha3b = babelfish.converters.alpha3b:Alpha3BConverter',
'alpha3t = babelfish.converters.alpha3t:Alpha3TConverter',
'name = babelfish.converters.name:NameConverter',
'scope = babelfish.converters.scope:ScopeConverter',
'type = babelfish.converters.type:LanguageTypeConverter',
'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter']
language_converters = LanguageConverterManager()
class LanguageMeta(type):
"""The :class:`Language` metaclass
Dynamically redirect :meth:`Language.frommycode` to :meth:`Language.fromcode` with the ``mycode`` `converter`
"""
def __getattr__(cls, name):
if name.startswith('from'):
return partial(cls.fromcode, converter=name[4:])
return type.__getattribute__(cls, name)
class Language(LanguageMeta(str('LanguageBase'), (object,), {})):
"""A human language
A human language is composed of a language part following the ISO-639
standard and can be country-specific when a :class:`~babelfish.country.Country`
is specified.
The :class:`Language` is extensible with custom converters (see :ref:`custom_converters`)
:param string language: the language as a 3-letter ISO-639-3 code
:param country: the country (if any) as a 2-letter ISO-3166 code or :class:`~babelfish.country.Country` instance
:type country: string or :class:`~babelfish.country.Country` or None
:param script: the script (if any) as a 4-letter ISO-15924 code or :class:`~babelfish.script.Script` instance
:type script: string or :class:`~babelfish.script.Script` or None
:param unknown: the unknown language as a three-letters ISO-639-3 code to use as fallback
:type unknown: string or None
:raise: ValueError if the language could not be recognized and `unknown` is ``None``
"""
def __init__(self, language, country=None, script=None, unknown=None):
if unknown is not None and language not in LANGUAGES:
language = unknown
if language not in LANGUAGES:
raise ValueError('%r is not a valid language' % language)
self.alpha3 = language
self.country = None
if isinstance(country, Country):
self.country = country
elif country is None:
self.country = None
else:
self.country = Country(country)
self.script = None
if isinstance(script, Script):
self.script = script
elif script is None:
self.script = None
else:
self.script = Script(script)
@classmethod
def fromcode(cls, code, converter):
"""Create a :class:`Language` by its `code` using `converter` to
:meth:`~babelfish.converters.LanguageReverseConverter.reverse` it
:param string code: the code to reverse
:param string converter: name of the :class:`~babelfish.converters.LanguageReverseConverter` to use
:return: the corresponding :class:`Language` instance
:rtype: :class:`Language`
"""
return cls(*language_converters[converter].reverse(code))
@classmethod
def fromietf(cls, ietf):
"""Create a :class:`Language` by from an IETF language code
:param string ietf: the ietf code
:return: the corresponding :class:`Language` instance
:rtype: :class:`Language`
"""
subtags = ietf.split('-')
language_subtag = subtags.pop(0).lower()
if len(language_subtag) == 2:
language = cls.fromalpha2(language_subtag)
else:
language = cls(language_subtag)
while subtags:
subtag = subtags.pop(0)
if len(subtag) == 2:
language.country = Country(subtag.upper())
else:
language.script = Script(subtag.capitalize())
if language.script is not None:
if subtags:
raise ValueError('Wrong IETF format. Unmatched subtags: %r' % subtags)
break
return language
def __getstate__(self):
return self.alpha3, self.country, self.script
def __setstate__(self, state):
self.alpha3, self.country, self.script = state
def __getattr__(self, name):
alpha3 = self.alpha3
country = self.country.alpha2 if self.country is not None else None
script = self.script.code if self.script is not None else None
try:
return language_converters[name].convert(alpha3, country, script)
except KeyError:
raise AttributeError(name)
def __hash__(self):
return hash(str(self))
def __eq__(self, other):
if isinstance(other, basestr):
return str(self) == other
if not isinstance(other, Language):
return False
return (self.alpha3 == other.alpha3 and
self.country == other.country and
self.script == other.script)
def __ne__(self, other):
return not self == other
def __bool__(self):
return self.alpha3 != 'und'
__nonzero__ = __bool__
def __repr__(self):
return '<Language [%s]>' % self
def __str__(self):
try:
s = self.alpha2
except LanguageConvertError:
s = self.alpha3
if self.country is not None:
s += '-' + str(self.country)
if self.script is not None:
s += '-' + str(self.script)
return s

View file

@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from collections import namedtuple
from pkg_resources import resource_stream # @UnresolvedImport
from . import basestr
#: Script code to script name mapping
SCRIPTS = {}
#: List of countries in the ISO-15924 as namedtuple of code, number, name, french_name, pva and date
SCRIPT_MATRIX = []
#: The namedtuple used in the :data:`SCRIPT_MATRIX`
IsoScript = namedtuple('IsoScript', ['code', 'number', 'name', 'french_name', 'pva', 'date'])
f = resource_stream('babelfish', 'data/iso15924-utf8-20131012.txt')
f.readline()
for l in f:
l = l.decode('utf-8').strip()
if not l or l.startswith('#'):
continue
script = IsoScript._make(l.split(';'))
SCRIPT_MATRIX.append(script)
SCRIPTS[script.code] = script.name
f.close()
class Script(object):
"""A human writing system
A script is represented by a 4-letter code from the ISO-15924 standard
:param string script: 4-letter ISO-15924 script code
"""
def __init__(self, script):
if script not in SCRIPTS:
raise ValueError('%r is not a valid script' % script)
#: ISO-15924 4-letter script code
self.code = script
@property
def name(self):
"""English name of the script"""
return SCRIPTS[self.code]
def __getstate__(self):
return self.code
def __setstate__(self, state):
self.code = state
def __hash__(self):
return hash(self.code)
def __eq__(self, other):
if isinstance(other, basestr):
return self.code == other
if not isinstance(other, Script):
return False
return self.code == other.code
def __ne__(self, other):
return not self == other
def __repr__(self):
return '<Script [%s]>' % self
def __str__(self):
return self.code

View file

@ -0,0 +1,373 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
import re
import sys
import pickle
from unittest import TestCase, TestSuite, TestLoader, TextTestRunner
from pkg_resources import resource_stream # @UnresolvedImport
from babelfish import (LANGUAGES, Language, Country, Script, language_converters, country_converters,
LanguageReverseConverter, LanguageConvertError, LanguageReverseError, CountryReverseError)
if sys.version_info[:2] <= (2, 6):
_MAX_LENGTH = 80
def safe_repr(obj, short=False):
try:
result = repr(obj)
except Exception:
result = object.__repr__(obj)
if not short or len(result) < _MAX_LENGTH:
return result
return result[:_MAX_LENGTH] + ' [truncated]...'
class _AssertRaisesContext(object):
"""A context manager used to implement TestCase.assertRaises* methods."""
def __init__(self, expected, test_case, expected_regexp=None):
self.expected = expected
self.failureException = test_case.failureException
self.expected_regexp = expected_regexp
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, tb):
if exc_type is None:
try:
exc_name = self.expected.__name__
except AttributeError:
exc_name = str(self.expected)
raise self.failureException(
"{0} not raised".format(exc_name))
if not issubclass(exc_type, self.expected):
# let unexpected exceptions pass through
return False
self.exception = exc_value # store for later retrieval
if self.expected_regexp is None:
return True
expected_regexp = self.expected_regexp
if isinstance(expected_regexp, basestring):
expected_regexp = re.compile(expected_regexp)
if not expected_regexp.search(str(exc_value)):
raise self.failureException('"%s" does not match "%s"' %
(expected_regexp.pattern, str(exc_value)))
return True
class _Py26FixTestCase(object):
def assertIsNone(self, obj, msg=None):
"""Same as self.assertTrue(obj is None), with a nicer default message."""
if obj is not None:
standardMsg = '%s is not None' % (safe_repr(obj),)
self.fail(self._formatMessage(msg, standardMsg))
def assertIsNotNone(self, obj, msg=None):
"""Included for symmetry with assertIsNone."""
if obj is None:
standardMsg = 'unexpectedly None'
self.fail(self._formatMessage(msg, standardMsg))
def assertIn(self, member, container, msg=None):
"""Just like self.assertTrue(a in b), but with a nicer default message."""
if member not in container:
standardMsg = '%s not found in %s' % (safe_repr(member),
safe_repr(container))
self.fail(self._formatMessage(msg, standardMsg))
def assertNotIn(self, member, container, msg=None):
"""Just like self.assertTrue(a not in b), but with a nicer default message."""
if member in container:
standardMsg = '%s unexpectedly found in %s' % (safe_repr(member),
safe_repr(container))
self.fail(self._formatMessage(msg, standardMsg))
def assertIs(self, expr1, expr2, msg=None):
"""Just like self.assertTrue(a is b), but with a nicer default message."""
if expr1 is not expr2:
standardMsg = '%s is not %s' % (safe_repr(expr1),
safe_repr(expr2))
self.fail(self._formatMessage(msg, standardMsg))
def assertIsNot(self, expr1, expr2, msg=None):
"""Just like self.assertTrue(a is not b), but with a nicer default message."""
if expr1 is expr2:
standardMsg = 'unexpectedly identical: %s' % (safe_repr(expr1),)
self.fail(self._formatMessage(msg, standardMsg))
else:
class _Py26FixTestCase(object):
pass
class TestScript(TestCase, _Py26FixTestCase):
def test_wrong_script(self):
self.assertRaises(ValueError, lambda: Script('Azer'))
def test_eq(self):
self.assertEqual(Script('Latn'), Script('Latn'))
def test_ne(self):
self.assertNotEqual(Script('Cyrl'), Script('Latn'))
def test_hash(self):
self.assertEqual(hash(Script('Hira')), hash('Hira'))
def test_pickle(self):
self.assertEqual(pickle.loads(pickle.dumps(Script('Latn'))), Script('Latn'))
class TestCountry(TestCase, _Py26FixTestCase):
def test_wrong_country(self):
self.assertRaises(ValueError, lambda: Country('ZZ'))
def test_eq(self):
self.assertEqual(Country('US'), Country('US'))
def test_ne(self):
self.assertNotEqual(Country('GB'), Country('US'))
self.assertIsNotNone(Country('US'))
def test_hash(self):
self.assertEqual(hash(Country('US')), hash('US'))
def test_pickle(self):
for country in [Country('GB'), Country('US')]:
self.assertEqual(pickle.loads(pickle.dumps(country)), country)
def test_converter_name(self):
self.assertEqual(Country('US').name, 'UNITED STATES')
self.assertEqual(Country.fromname('UNITED STATES'), Country('US'))
self.assertEqual(Country.fromcode('UNITED STATES', 'name'), Country('US'))
self.assertRaises(CountryReverseError, lambda: Country.fromname('ZZZZZ'))
self.assertEqual(len(country_converters['name'].codes), 249)
class TestLanguage(TestCase, _Py26FixTestCase):
def test_languages(self):
self.assertEqual(len(LANGUAGES), 7874)
def test_wrong_language(self):
self.assertRaises(ValueError, lambda: Language('zzz'))
def test_unknown_language(self):
self.assertEqual(Language('zzzz', unknown='und'), Language('und'))
def test_converter_alpha2(self):
self.assertEqual(Language('eng').alpha2, 'en')
self.assertEqual(Language.fromalpha2('en'), Language('eng'))
self.assertEqual(Language.fromcode('en', 'alpha2'), Language('eng'))
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha2('zz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha2)
self.assertEqual(len(language_converters['alpha2'].codes), 184)
def test_converter_alpha3b(self):
self.assertEqual(Language('fra').alpha3b, 'fre')
self.assertEqual(Language.fromalpha3b('fre'), Language('fra'))
self.assertEqual(Language.fromcode('fre', 'alpha3b'), Language('fra'))
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3b('zzz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3b)
self.assertEqual(len(language_converters['alpha3b'].codes), 418)
def test_converter_alpha3t(self):
self.assertEqual(Language('fra').alpha3t, 'fra')
self.assertEqual(Language.fromalpha3t('fra'), Language('fra'))
self.assertEqual(Language.fromcode('fra', 'alpha3t'), Language('fra'))
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3t('zzz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3t)
self.assertEqual(len(language_converters['alpha3t'].codes), 418)
def test_converter_name(self):
self.assertEqual(Language('eng').name, 'English')
self.assertEqual(Language.fromname('English'), Language('eng'))
self.assertEqual(Language.fromcode('English', 'name'), Language('eng'))
self.assertRaises(LanguageReverseError, lambda: Language.fromname('Zzzzzzzzz'))
self.assertEqual(len(language_converters['name'].codes), 7874)
def test_converter_scope(self):
self.assertEqual(language_converters['scope'].codes, set(['I', 'S', 'M']))
self.assertEqual(Language('eng').scope, 'individual')
self.assertEqual(Language('und').scope, 'special')
def test_converter_type(self):
self.assertEqual(language_converters['type'].codes, set(['A', 'C', 'E', 'H', 'L', 'S']))
self.assertEqual(Language('eng').type, 'living')
self.assertEqual(Language('und').type, 'special')
def test_converter_opensubtitles(self):
self.assertEqual(Language('fra').opensubtitles, Language('fra').alpha3b)
self.assertEqual(Language('por', 'BR').opensubtitles, 'pob')
self.assertEqual(Language.fromopensubtitles('fre'), Language('fra'))
self.assertEqual(Language.fromopensubtitles('pob'), Language('por', 'BR'))
self.assertEqual(Language.fromopensubtitles('pb'), Language('por', 'BR'))
# Montenegrin is not recognized as an ISO language (yet?) but for now it is
# unofficially accepted as Serbian from Montenegro
self.assertEqual(Language.fromopensubtitles('mne'), Language('srp', 'ME'))
self.assertEqual(Language.fromcode('pob', 'opensubtitles'), Language('por', 'BR'))
self.assertRaises(LanguageReverseError, lambda: Language.fromopensubtitles('zzz'))
self.assertRaises(LanguageConvertError, lambda: Language('aaa').opensubtitles)
self.assertEqual(len(language_converters['opensubtitles'].codes), 606)
# test with all the LANGUAGES from the opensubtitles api
# downloaded from: http://www.opensubtitles.org/addons/export_languages.php
f = resource_stream('babelfish', 'data/opensubtitles_languages.txt')
f.readline()
for l in f:
idlang, alpha2, _, upload_enabled, web_enabled = l.decode('utf-8').strip().split('\t')
if not int(upload_enabled) and not int(web_enabled):
# do not test LANGUAGES that are too esoteric / not widely available
continue
self.assertEqual(Language.fromopensubtitles(idlang).opensubtitles, idlang)
if alpha2:
self.assertEqual(Language.fromopensubtitles(idlang), Language.fromopensubtitles(alpha2))
f.close()
def test_fromietf_country_script(self):
language = Language.fromietf('fra-FR-Latn')
self.assertEqual(language.alpha3, 'fra')
self.assertEqual(language.country, Country('FR'))
self.assertEqual(language.script, Script('Latn'))
def test_fromietf_country_no_script(self):
language = Language.fromietf('fra-FR')
self.assertEqual(language.alpha3, 'fra')
self.assertEqual(language.country, Country('FR'))
self.assertIsNone(language.script)
def test_fromietf_no_country_no_script(self):
language = Language.fromietf('fra-FR')
self.assertEqual(language.alpha3, 'fra')
self.assertEqual(language.country, Country('FR'))
self.assertIsNone(language.script)
def test_fromietf_no_country_script(self):
language = Language.fromietf('fra-Latn')
self.assertEqual(language.alpha3, 'fra')
self.assertIsNone(language.country)
self.assertEqual(language.script, Script('Latn'))
def test_fromietf_alpha2_language(self):
language = Language.fromietf('fr-Latn')
self.assertEqual(language.alpha3, 'fra')
self.assertIsNone(language.country)
self.assertEqual(language.script, Script('Latn'))
def test_fromietf_wrong_language(self):
self.assertRaises(ValueError, lambda: Language.fromietf('xyz-FR'))
def test_fromietf_wrong_country(self):
self.assertRaises(ValueError, lambda: Language.fromietf('fra-YZ'))
def test_fromietf_wrong_script(self):
self.assertRaises(ValueError, lambda: Language.fromietf('fra-FR-Wxyz'))
def test_eq(self):
self.assertEqual(Language('eng'), Language('eng'))
def test_ne(self):
self.assertNotEqual(Language('fra'), Language('eng'))
self.assertIsNotNone(Language('fra'))
def test_nonzero(self):
self.assertFalse(bool(Language('und')))
self.assertTrue(bool(Language('eng')))
def test_language_hasattr(self):
self.assertTrue(hasattr(Language('fra'), 'alpha3'))
self.assertTrue(hasattr(Language('fra'), 'alpha2'))
self.assertFalse(hasattr(Language('bej'), 'alpha2'))
def test_country_hasattr(self):
self.assertTrue(hasattr(Country('US'), 'name'))
self.assertTrue(hasattr(Country('FR'), 'alpha2'))
self.assertFalse(hasattr(Country('BE'), 'none'))
def test_country(self):
self.assertEqual(Language('por', 'BR').country, Country('BR'))
self.assertEqual(Language('eng', Country('US')).country, Country('US'))
def test_eq_with_country(self):
self.assertEqual(Language('eng', 'US'), Language('eng', Country('US')))
def test_ne_with_country(self):
self.assertNotEqual(Language('eng', 'US'), Language('eng', Country('GB')))
def test_script(self):
self.assertEqual(Language('srp', script='Latn').script, Script('Latn'))
self.assertEqual(Language('srp', script=Script('Cyrl')).script, Script('Cyrl'))
def test_eq_with_script(self):
self.assertEqual(Language('srp', script='Latn'), Language('srp', script=Script('Latn')))
def test_ne_with_script(self):
self.assertNotEqual(Language('srp', script='Latn'), Language('srp', script=Script('Cyrl')))
def test_eq_with_country_and_script(self):
self.assertEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Latn')))
def test_ne_with_country_and_script(self):
self.assertNotEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Cyrl')))
def test_hash(self):
self.assertEqual(hash(Language('fra')), hash('fr'))
self.assertEqual(hash(Language('ace')), hash('ace'))
self.assertEqual(hash(Language('por', 'BR')), hash('pt-BR'))
self.assertEqual(hash(Language('srp', script='Cyrl')), hash('sr-Cyrl'))
self.assertEqual(hash(Language('eng', 'US', 'Latn')), hash('en-US-Latn'))
def test_pickle(self):
for lang in [Language('fra'),
Language('eng', 'US'),
Language('srp', script='Latn'),
Language('eng', 'US', 'Latn')]:
self.assertEqual(pickle.loads(pickle.dumps(lang)), lang)
def test_str(self):
self.assertEqual(Language.fromietf(str(Language('eng', 'US', 'Latn'))), Language('eng', 'US', 'Latn'))
self.assertEqual(Language.fromietf(str(Language('fra', 'FR'))), Language('fra', 'FR'))
self.assertEqual(Language.fromietf(str(Language('bel'))), Language('bel'))
def test_register_converter(self):
class TestConverter(LanguageReverseConverter):
def __init__(self):
self.to_test = {'fra': 'test1', 'eng': 'test2'}
self.from_test = {'test1': 'fra', 'test2': 'eng'}
def convert(self, alpha3, country=None, script=None):
if alpha3 not in self.to_test:
raise LanguageConvertError(alpha3, country, script)
return self.to_test[alpha3]
def reverse(self, test):
if test not in self.from_test:
raise LanguageReverseError(test)
return (self.from_test[test], None)
language = Language('fra')
self.assertFalse(hasattr(language, 'test'))
language_converters['test'] = TestConverter()
self.assertTrue(hasattr(language, 'test'))
self.assertIn('test', language_converters)
self.assertEqual(Language('fra').test, 'test1')
self.assertEqual(Language.fromtest('test2').alpha3, 'eng')
del language_converters['test']
self.assertNotIn('test', language_converters)
self.assertRaises(KeyError, lambda: Language.fromtest('test1'))
self.assertRaises(AttributeError, lambda: Language('fra').test)
def suite():
suite = TestSuite()
suite.addTest(TestLoader().loadTestsFromTestCase(TestScript))
suite.addTest(TestLoader().loadTestsFromTestCase(TestCountry))
suite.addTest(TestLoader().loadTestsFromTestCase(TestLanguage))
return suite
if __name__ == '__main__':
TextTestRunner().run(suite())