1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
|
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
from collections import namedtuple
from functools import partial
from pkg_resources import resource_stream # @UnresolvedImport
from .converters import ConverterManager
from .country import Country
from .exceptions import LanguageConvertError
from .script import Script
from . import basestr
LANGUAGES = set()
LANGUAGE_MATRIX = []
#: The namedtuple used in the :data:`LANGUAGE_MATRIX`
IsoLanguage = namedtuple('IsoLanguage', ['alpha3', 'alpha3b', 'alpha3t', 'alpha2', 'scope', 'type', 'name', 'comment'])
f = resource_stream('babelfish', 'data/iso-639-3.tab')
f.readline()
for l in f:
iso_language = IsoLanguage(*l.decode('utf-8').split('\t'))
LANGUAGES.add(iso_language.alpha3)
LANGUAGE_MATRIX.append(iso_language)
f.close()
class LanguageConverterManager(ConverterManager):
""":class:`~babelfish.converters.ConverterManager` for language converters"""
entry_point = 'babelfish.language_converters'
internal_converters = ['alpha2 = babelfish.converters.alpha2:Alpha2Converter',
'alpha3b = babelfish.converters.alpha3b:Alpha3BConverter',
'alpha3t = babelfish.converters.alpha3t:Alpha3TConverter',
'name = babelfish.converters.name:NameConverter',
'scope = babelfish.converters.scope:ScopeConverter',
'type = babelfish.converters.type:LanguageTypeConverter',
'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter']
language_converters = LanguageConverterManager()
class LanguageMeta(type):
"""The :class:`Language` metaclass
Dynamically redirect :meth:`Language.frommycode` to :meth:`Language.fromcode` with the ``mycode`` `converter`
"""
def __getattr__(cls, name):
if name.startswith('from'):
return partial(cls.fromcode, converter=name[4:])
return type.__getattribute__(cls, name)
class Language(LanguageMeta(str('LanguageBase'), (object,), {})):
"""A human language
A human language is composed of a language part following the ISO-639
standard and can be country-specific when a :class:`~babelfish.country.Country`
is specified.
The :class:`Language` is extensible with custom converters (see :ref:`custom_converters`)
:param string language: the language as a 3-letter ISO-639-3 code
:param country: the country (if any) as a 2-letter ISO-3166 code or :class:`~babelfish.country.Country` instance
:type country: string or :class:`~babelfish.country.Country` or None
:param script: the script (if any) as a 4-letter ISO-15924 code or :class:`~babelfish.script.Script` instance
:type script: string or :class:`~babelfish.script.Script` or None
:param unknown: the unknown language as a three-letters ISO-639-3 code to use as fallback
:type unknown: string or None
:raise: ValueError if the language could not be recognized and `unknown` is ``None``
"""
def __init__(self, language, country=None, script=None, unknown=None):
if unknown is not None and language not in LANGUAGES:
language = unknown
if language not in LANGUAGES:
raise ValueError('%r is not a valid language' % language)
self.alpha3 = language
self.country = None
if isinstance(country, Country):
self.country = country
elif country is None:
self.country = None
else:
self.country = Country(country)
self.script = None
if isinstance(script, Script):
self.script = script
elif script is None:
self.script = None
else:
self.script = Script(script)
@classmethod
def fromcode(cls, code, converter):
"""Create a :class:`Language` by its `code` using `converter` to
:meth:`~babelfish.converters.LanguageReverseConverter.reverse` it
:param string code: the code to reverse
:param string converter: name of the :class:`~babelfish.converters.LanguageReverseConverter` to use
:return: the corresponding :class:`Language` instance
:rtype: :class:`Language`
"""
return cls(*language_converters[converter].reverse(code))
@classmethod
def fromietf(cls, ietf):
"""Create a :class:`Language` by from an IETF language code
:param string ietf: the ietf code
:return: the corresponding :class:`Language` instance
:rtype: :class:`Language`
"""
subtags = ietf.split('-')
language_subtag = subtags.pop(0).lower()
if len(language_subtag) == 2:
language = cls.fromalpha2(language_subtag)
else:
language = cls(language_subtag)
while subtags:
subtag = subtags.pop(0)
if len(subtag) == 2:
language.country = Country(subtag.upper())
else:
language.script = Script(subtag.capitalize())
if language.script is not None:
if subtags:
raise ValueError('Wrong IETF format. Unmatched subtags: %r' % subtags)
break
return language
def __getstate__(self):
return self.alpha3, self.country, self.script
def __setstate__(self, state):
self.alpha3, self.country, self.script = state
def __getattr__(self, name):
alpha3 = self.alpha3
country = self.country.alpha2 if self.country is not None else None
script = self.script.code if self.script is not None else None
try:
return language_converters[name].convert(alpha3, country, script)
except KeyError:
raise AttributeError(name)
def __hash__(self):
return hash(str(self))
def __eq__(self, other):
if isinstance(other, basestr):
return str(self) == other
if not isinstance(other, Language):
return False
return (self.alpha3 == other.alpha3 and
self.country == other.country and
self.script == other.script)
def __ne__(self, other):
return not self == other
def __bool__(self):
return self.alpha3 != 'und'
__nonzero__ = __bool__
def __repr__(self):
return '<Language [%s]>' % self
def __str__(self):
try:
s = self.alpha2
except LanguageConvertError:
s = self.alpha3
if self.country is not None:
s += '-' + str(self.country)
if self.script is not None:
s += '-' + str(self.script)
return s
|