summaryrefslogtreecommitdiffhomepage
path: root/libs/subzero/language.py
blob: 199856704857489b822de659fc0e733a5a37162e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# coding=utf-8
from __future__ import absolute_import
import types
import re

from babelfish.exceptions import LanguageError
from babelfish import Language as Language_, basestr, LANGUAGE_MATRIX
from six.moves import zip

repl_map = {
    "dk": "da",
    "nld": "nl",
    "english": "en",
    "alb": "sq",
    "arm": "hy",
    "baq": "eu",
    "bur": "my",
    "chi": "zh",
    "cze": "cs",
    "dut": "nl",
    "fre": "fr",
    "geo": "ka",
    "ger": "de",
    "gre": "el",
    "ice": "is",
    "mac": "mk",
    "mao": "mi",
    "may": "ms",
    "per": "fa",
    "rum": "ro",
    "slo": "sk",
    "tib": "bo",
}

CUSTOM_LIST = ["chs", "sc", "zhs", "hans", "gb", u"简", u"双语",
               "cht", "tc", "zht", "hant", "big5", u"繁", u"雙語",
               "spl", "ea", "pob", "pb"]

ALPHA2_LIST = list(set(filter(lambda x: x, map(lambda x: x.alpha2, LANGUAGE_MATRIX)))) + list(repl_map.values())
ALPHA3b_LIST = list(set(filter(lambda x: x, map(lambda x: x.alpha3, LANGUAGE_MATRIX)))) + \
               list(set(filter(lambda x: len(x) == 3, list(repl_map.keys()))))
FULL_LANGUAGE_LIST = ALPHA2_LIST + ALPHA3b_LIST
FULL_LANGUAGE_LIST.extend(CUSTOM_LIST)


def language_from_stream(l):
    if not l:
        raise LanguageError()
    for method in ("fromietf", "fromalpha3t", "fromalpha3b"):
        try:
            return getattr(Language, method)(l)
        except (LanguageError, ValueError):
            pass
    raise LanguageError()


def wrap_forced(f):
    def inner(*args, **kwargs):
        """
        classmethod wrapper
        :param args: args[0] = cls
        :param kwargs:
        :return:
        """
        args = list(args)
        cls = args[0]
        args = args[1:]
        s = args.pop(0)
        forced = None
        hi = None
        if isinstance(s, (str,)):
            base, forced = s.split(":") if ":" in s else (s, False)
        else:
            base = s

        instance = f(cls, base, *args, **kwargs)
        if isinstance(instance, Language):
            instance.forced = forced == "forced"
            instance.hi = hi == "hi"
        return instance

    return inner


class Language(Language_):
    forced = False
    hi = False

    def __init__(self, language, country=None, script=None, unknown=None, forced=False, hi=False):
        self.forced = forced
        self.hi = hi
        super(Language, self).__init__(language, country=country, script=script, unknown=unknown)

    def __getstate__(self):
        return self.alpha3, self.country, self.script, self.hi, self.forced

    def __setstate__(self, forced):
        self.alpha3, self.country, self.script, self.hi, self.forced = forced

    def __hash__(self):
        return hash(str(self))

    def __eq__(self, other):
        if isinstance(other, basestr):
            return str(self) == other
        if not isinstance(other, Language):
            return False
        return (self.alpha3 == other.alpha3 and
                self.country == other.country and
                self.script == other.script and
                bool(self.forced) == bool(other.forced) and
                bool(self.hi) == bool(other.hi))

    def __str__(self):
        return super(Language, self).__str__() + (":forced" if self.forced else "")

    @property
    def basename(self):
        return super(Language, self).__str__()

    def __getattr__(self, name):
        ret = super(Language, self).__getattr__(name)
        if isinstance(ret, Language):
            ret.forced = self.forced
            ret.hi = self.hi
        return ret

    @classmethod
    def rebuild(cls, instance, **replkw):
        state = instance.__getstate__()
        attrs = ("country", "script", "hi", "forced")
        language = state[0]
        kwa = dict(list(zip(attrs, state[1:])))
        kwa.update(replkw)
        return cls(language, **kwa)

    @classmethod
    @wrap_forced
    def fromcode(cls, code, converter):
        return Language(*Language_.fromcode(code, converter).__getstate__())

    @classmethod
    @wrap_forced
    def fromietf(cls, ietf):
        ietf_lower = ietf.lower()
        if ietf_lower in repl_map:
            ietf = repl_map[ietf_lower]

        return Language(*Language_.fromietf(ietf).__getstate__())

    @classmethod
    @wrap_forced
    def fromalpha3b(cls, s):
        if s in repl_map:
            s = repl_map[s]
            return Language(*Language_.fromietf(s).__getstate__())

        return Language(*Language_.fromalpha3b(s).__getstate__())


IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$"
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")


def match_ietf_language(s, ietf=False):
    language_match = re.match(".+\.([^\.]+)$" if not ietf
                              else IETF_MATCH, s)
    if language_match and len(language_match.groups()) == 1:
        language = language_match.groups()[0]
        return language
    return s