summaryrefslogtreecommitdiffhomepage
path: root/libs/chardet
diff options
context:
space:
mode:
authormorpheus65535 <[email protected]>2022-11-07 13:06:49 -0500
committermorpheus65535 <[email protected]>2022-11-07 13:08:27 -0500
commitbbe2483e21c2c1549ceeed16f021f9581b899f70 (patch)
treebcc2bef2f55789ec6e6c64809c07fb4f4d3d9c86 /libs/chardet
parent708fbfcd8ec0620647975be39a1f6acbbf08f767 (diff)
downloadbazarr-bbe2483e21c2c1549ceeed16f021f9581b899f70.tar.gz
bazarr-bbe2483e21c2c1549ceeed16f021f9581b899f70.zip
Updated vendored dependencies.
Diffstat (limited to 'libs/chardet')
-rw-r--r--libs/chardet/__init__.py72
-rw-r--r--libs/chardet/big5freq.py6
-rw-r--r--libs/chardet/big5prober.py6
-rw-r--r--libs/chardet/chardistribution.py102
-rw-r--r--libs/chardet/charsetgroupprober.py16
-rw-r--r--libs/chardet/charsetprober.py47
-rw-r--r--libs/chardet/cli/__init__.py1
-rw-r--r--libs/chardet/cli/chardetect.py52
-rw-r--r--libs/chardet/codingstatemachine.py16
-rw-r--r--libs/chardet/cp949prober.py2
-rw-r--r--libs/chardet/enums.py18
-rw-r--r--libs/chardet/escprober.py19
-rw-r--r--libs/chardet/escsm.py384
-rw-r--r--libs/chardet/eucjpprober.py41
-rw-r--r--libs/chardet/euckrfreq.py3
-rw-r--r--libs/chardet/euckrprober.py6
-rw-r--r--libs/chardet/euctwfreq.py677
-rw-r--r--libs/chardet/euctwprober.py7
-rw-r--r--libs/chardet/gb2312freq.py3
-rw-r--r--libs/chardet/gb2312prober.py7
-rw-r--r--libs/chardet/hebrewprober.py68
-rw-r--r--libs/chardet/jisfreq.py4
-rw-r--r--libs/chardet/johabfreq.py2382
-rw-r--r--libs/chardet/johabprober.py (renamed from libs/chardet/compat.py)41
-rw-r--r--libs/chardet/jpcntx.py190
-rw-r--r--libs/chardet/langbulgarianmodel.py1061
-rw-r--r--libs/chardet/langgreekmodel.py1061
-rw-r--r--libs/chardet/langhebrewmodel.py533
-rw-r--r--libs/chardet/langhungarianmodel.py1061
-rw-r--r--libs/chardet/langrussianmodel.py3173
-rw-r--r--libs/chardet/langthaimodel.py533
-rw-r--r--libs/chardet/langturkishmodel.py533
-rw-r--r--libs/chardet/latin1prober.py26
-rw-r--r--libs/chardet/mbcharsetprober.py32
-rw-r--r--libs/chardet/mbcsgroupprober.py16
-rw-r--r--libs/chardet/mbcssm.py812
-rw-r--r--libs/chardet/metadata/languages.py571
-rw-r--r--libs/chardet/sbcharsetprober.py73
-rw-r--r--libs/chardet/sbcsgroupprober.py31
-rw-r--r--libs/chardet/sjisprober.py46
-rw-r--r--libs/chardet/universaldetector.py174
-rw-r--r--libs/chardet/utf1632prober.py223
-rw-r--r--libs/chardet/utf8prober.py16
-rw-r--r--libs/chardet/version.py4
44 files changed, 8518 insertions, 5631 deletions
diff --git a/libs/chardet/__init__.py b/libs/chardet/__init__.py
index 80ad2546d..e91ad6182 100644
--- a/libs/chardet/__init__.py
+++ b/libs/chardet/__init__.py
@@ -15,13 +15,11 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
-
-from .universaldetector import UniversalDetector
from .enums import InputState
-from .version import __version__, VERSION
-
+from .universaldetector import UniversalDetector
+from .version import VERSION, __version__
-__all__ = ['UniversalDetector', 'detect', 'detect_all', '__version__', 'VERSION']
+__all__ = ["UniversalDetector", "detect", "detect_all", "__version__", "VERSION"]
def detect(byte_str):
@@ -33,51 +31,63 @@ def detect(byte_str):
"""
if not isinstance(byte_str, bytearray):
if not isinstance(byte_str, bytes):
- raise TypeError('Expected object of type bytes or bytearray, got: '
- '{}'.format(type(byte_str)))
- else:
- byte_str = bytearray(byte_str)
+ raise TypeError(
+ f"Expected object of type bytes or bytearray, got: {type(byte_str)}"
+ )
+ byte_str = bytearray(byte_str)
detector = UniversalDetector()
detector.feed(byte_str)
return detector.close()
-def detect_all(byte_str):
+def detect_all(byte_str, ignore_threshold=False):
"""
Detect all the possible encodings of the given byte string.
- :param byte_str: The byte sequence to examine.
- :type byte_str: ``bytes`` or ``bytearray``
+ :param byte_str: The byte sequence to examine.
+ :type byte_str: ``bytes`` or ``bytearray``
+ :param ignore_threshold: Include encodings that are below
+ ``UniversalDetector.MINIMUM_THRESHOLD``
+ in results.
+ :type ignore_threshold: ``bool``
"""
if not isinstance(byte_str, bytearray):
if not isinstance(byte_str, bytes):
- raise TypeError('Expected object of type bytes or bytearray, got: '
- '{}'.format(type(byte_str)))
- else:
- byte_str = bytearray(byte_str)
+ raise TypeError(
+ f"Expected object of type bytes or bytearray, got: {type(byte_str)}"
+ )
+ byte_str = bytearray(byte_str)
detector = UniversalDetector()
detector.feed(byte_str)
detector.close()
- if detector._input_state == InputState.HIGH_BYTE:
+ if detector.input_state == InputState.HIGH_BYTE:
results = []
- for prober in detector._charset_probers:
- if prober.get_confidence() > detector.MINIMUM_THRESHOLD:
- charset_name = prober.charset_name
- lower_charset_name = prober.charset_name.lower()
+ probers = []
+ for prober in detector.charset_probers:
+ if hasattr(prober, "probers"):
+ probers.extend(p for p in prober.probers)
+ else:
+ probers.append(prober)
+ for prober in probers:
+ if ignore_threshold or prober.get_confidence() > detector.MINIMUM_THRESHOLD:
+ charset_name = prober.charset_name or ""
+ lower_charset_name = charset_name.lower()
# Use Windows encoding name instead of ISO-8859 if we saw any
# extra Windows-specific bytes
- if lower_charset_name.startswith('iso-8859'):
- if detector._has_win_bytes:
- charset_name = detector.ISO_WIN_MAP.get(lower_charset_name,
- charset_name)
- results.append({
- 'encoding': charset_name,
- 'confidence': prober.get_confidence(),
- 'language': prober.language,
- })
+ if lower_charset_name.startswith("iso-8859") and detector.has_win_bytes:
+ charset_name = detector.ISO_WIN_MAP.get(
+ lower_charset_name, charset_name
+ )
+ results.append(
+ {
+ "encoding": charset_name,
+ "confidence": prober.get_confidence(),
+ "language": prober.language,
+ }
+ )
if len(results) > 0:
- return sorted(results, key=lambda result: -result['confidence'])
+ return sorted(results, key=lambda result: -result["confidence"])
return [detector.result]
diff --git a/libs/chardet/big5freq.py b/libs/chardet/big5freq.py
index 38f32517a..87d9f972e 100644
--- a/libs/chardet/big5freq.py
+++ b/libs/chardet/big5freq.py
@@ -42,9 +42,9 @@
BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75
-#Char to FreqOrder table
+# Char to FreqOrder table
BIG5_TABLE_SIZE = 5376
-
+# fmt: off
BIG5_CHAR_TO_FREQ_ORDER = (
1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16
3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32
@@ -383,4 +383,4 @@ BIG5_CHAR_TO_FREQ_ORDER = (
890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360
2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376
)
-
+# fmt: on
diff --git a/libs/chardet/big5prober.py b/libs/chardet/big5prober.py
index 98f997012..e4dfa7aa0 100644
--- a/libs/chardet/big5prober.py
+++ b/libs/chardet/big5prober.py
@@ -25,15 +25,15 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
from .chardistribution import Big5DistributionAnalysis
+from .codingstatemachine import CodingStateMachine
+from .mbcharsetprober import MultiByteCharSetProber
from .mbcssm import BIG5_SM_MODEL
class Big5Prober(MultiByteCharSetProber):
def __init__(self):
- super(Big5Prober, self).__init__()
+ super().__init__()
self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
self.distribution_analyzer = Big5DistributionAnalysis()
self.reset()
diff --git a/libs/chardet/chardistribution.py b/libs/chardet/chardistribution.py
index c0395f4a4..27b4a2939 100644
--- a/libs/chardet/chardistribution.py
+++ b/libs/chardet/chardistribution.py
@@ -25,19 +25,35 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
-from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE,
- EUCTW_TYPICAL_DISTRIBUTION_RATIO)
-from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE,
- EUCKR_TYPICAL_DISTRIBUTION_RATIO)
-from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE,
- GB2312_TYPICAL_DISTRIBUTION_RATIO)
-from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE,
- BIG5_TYPICAL_DISTRIBUTION_RATIO)
-from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE,
- JIS_TYPICAL_DISTRIBUTION_RATIO)
-
-
-class CharDistributionAnalysis(object):
+from .big5freq import (
+ BIG5_CHAR_TO_FREQ_ORDER,
+ BIG5_TABLE_SIZE,
+ BIG5_TYPICAL_DISTRIBUTION_RATIO,
+)
+from .euckrfreq import (
+ EUCKR_CHAR_TO_FREQ_ORDER,
+ EUCKR_TABLE_SIZE,
+ EUCKR_TYPICAL_DISTRIBUTION_RATIO,
+)
+from .euctwfreq import (
+ EUCTW_CHAR_TO_FREQ_ORDER,
+ EUCTW_TABLE_SIZE,
+ EUCTW_TYPICAL_DISTRIBUTION_RATIO,
+)
+from .gb2312freq import (
+ GB2312_CHAR_TO_FREQ_ORDER,
+ GB2312_TABLE_SIZE,
+ GB2312_TYPICAL_DISTRIBUTION_RATIO,
+)
+from .jisfreq import (
+ JIS_CHAR_TO_FREQ_ORDER,
+ JIS_TABLE_SIZE,
+ JIS_TYPICAL_DISTRIBUTION_RATIO,
+)
+from .johabfreq import JOHAB_TO_EUCKR_ORDER_TABLE
+
+
+class CharDistributionAnalysis:
ENOUGH_DATA_THRESHOLD = 1024
SURE_YES = 0.99
SURE_NO = 0.01
@@ -46,7 +62,7 @@ class CharDistributionAnalysis(object):
def __init__(self):
# Mapping table to get frequency order from char order (get from
# GetOrder())
- self._char_to_freq_order = None
+ self._char_to_freq_order = tuple()
self._table_size = None # Size of above table
# This is a constant value which varies from language to language,
# used in calculating confidence. See
@@ -89,8 +105,9 @@ class CharDistributionAnalysis(object):
return self.SURE_NO
if self._total_chars != self._freq_chars:
- r = (self._freq_chars / ((self._total_chars - self._freq_chars)
- * self.typical_distribution_ratio))
+ r = self._freq_chars / (
+ (self._total_chars - self._freq_chars) * self.typical_distribution_ratio
+ )
if r < self.SURE_YES:
return r
@@ -102,7 +119,7 @@ class CharDistributionAnalysis(object):
# For charset detection, certain amount of data is enough
return self._total_chars > self.ENOUGH_DATA_THRESHOLD
- def get_order(self, byte_str):
+ def get_order(self, _):
# We do not handle characters based on the original encoding string,
# but convert this encoding string to a number, here called order.
# This allows multiple encodings of a language to share one frequency
@@ -112,7 +129,7 @@ class CharDistributionAnalysis(object):
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
- super(EUCTWDistributionAnalysis, self).__init__()
+ super().__init__()
self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
self._table_size = EUCTW_TABLE_SIZE
self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
@@ -125,13 +142,12 @@ class EUCTWDistributionAnalysis(CharDistributionAnalysis):
first_char = byte_str[0]
if first_char >= 0xC4:
return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1
- else:
- return -1
+ return -1
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
- super(EUCKRDistributionAnalysis, self).__init__()
+ super().__init__()
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
self._table_size = EUCKR_TABLE_SIZE
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
@@ -144,13 +160,27 @@ class EUCKRDistributionAnalysis(CharDistributionAnalysis):
first_char = byte_str[0]
if first_char >= 0xB0:
return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1
- else:
- return -1
+ return -1
+
+
+class JOHABDistributionAnalysis(CharDistributionAnalysis):
+ def __init__(self):
+ super().__init__()
+ self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
+ self._table_size = EUCKR_TABLE_SIZE
+ self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
+
+ def get_order(self, byte_str):
+ first_char = byte_str[0]
+ if 0x88 <= first_char < 0xD4:
+ code = first_char * 256 + byte_str[1]
+ return JOHAB_TO_EUCKR_ORDER_TABLE.get(code, -1)
+ return -1
class GB2312DistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
- super(GB2312DistributionAnalysis, self).__init__()
+ super().__init__()
self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
self._table_size = GB2312_TABLE_SIZE
self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
@@ -163,13 +193,12 @@ class GB2312DistributionAnalysis(CharDistributionAnalysis):
first_char, second_char = byte_str[0], byte_str[1]
if (first_char >= 0xB0) and (second_char >= 0xA1):
return 94 * (first_char - 0xB0) + second_char - 0xA1
- else:
- return -1
+ return -1
class Big5DistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
- super(Big5DistributionAnalysis, self).__init__()
+ super().__init__()
self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
self._table_size = BIG5_TABLE_SIZE
self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
@@ -183,15 +212,13 @@ class Big5DistributionAnalysis(CharDistributionAnalysis):
if first_char >= 0xA4:
if second_char >= 0xA1:
return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
- else:
- return 157 * (first_char - 0xA4) + second_char - 0x40
- else:
- return -1
+ return 157 * (first_char - 0xA4) + second_char - 0x40
+ return -1
class SJISDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
- super(SJISDistributionAnalysis, self).__init__()
+ super().__init__()
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
self._table_size = JIS_TABLE_SIZE
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
@@ -202,9 +229,9 @@ class SJISDistributionAnalysis(CharDistributionAnalysis):
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
# no validation needed here. State machine has done that
first_char, second_char = byte_str[0], byte_str[1]
- if (first_char >= 0x81) and (first_char <= 0x9F):
+ if 0x81 <= first_char <= 0x9F:
order = 188 * (first_char - 0x81)
- elif (first_char >= 0xE0) and (first_char <= 0xEF):
+ elif 0xE0 <= first_char <= 0xEF:
order = 188 * (first_char - 0xE0 + 31)
else:
return -1
@@ -216,7 +243,7 @@ class SJISDistributionAnalysis(CharDistributionAnalysis):
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
- super(EUCJPDistributionAnalysis, self).__init__()
+ super().__init__()
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
self._table_size = JIS_TABLE_SIZE
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
@@ -228,6 +255,5 @@ class EUCJPDistributionAnalysis(CharDistributionAnalysis):
# no validation needed here. State machine has done that
char = byte_str[0]
if char >= 0xA0:
- return 94 * (char - 0xA1) + byte_str[1] - 0xa1
- else:
- return -1
+ return 94 * (char - 0xA1) + byte_str[1] - 0xA1
+ return -1
diff --git a/libs/chardet/charsetgroupprober.py b/libs/chardet/charsetgroupprober.py
index 5812cef0b..778ff332b 100644
--- a/libs/chardet/charsetgroupprober.py
+++ b/libs/chardet/charsetgroupprober.py
@@ -25,19 +25,19 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
-from .enums import ProbingState
from .charsetprober import CharSetProber
+from .enums import ProbingState
class CharSetGroupProber(CharSetProber):
def __init__(self, lang_filter=None):
- super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
+ super().__init__(lang_filter=lang_filter)
self._active_num = 0
self.probers = []
self._best_guess_prober = None
def reset(self):
- super(CharSetGroupProber, self).reset()
+ super().reset()
self._active_num = 0
for prober in self.probers:
if prober:
@@ -75,7 +75,7 @@ class CharSetGroupProber(CharSetProber):
self._best_guess_prober = prober
self._state = ProbingState.FOUND_IT
return self.state
- elif state == ProbingState.NOT_ME:
+ if state == ProbingState.NOT_ME:
prober.active = False
self._active_num -= 1
if self._active_num <= 0:
@@ -87,7 +87,7 @@ class CharSetGroupProber(CharSetProber):
state = self.state
if state == ProbingState.FOUND_IT:
return 0.99
- elif state == ProbingState.NOT_ME:
+ if state == ProbingState.NOT_ME:
return 0.01
best_conf = 0.0
self._best_guess_prober = None
@@ -95,10 +95,12 @@ class CharSetGroupProber(CharSetProber):
if not prober:
continue
if not prober.active:
- self.logger.debug('%s not active', prober.charset_name)
+ self.logger.debug("%s not active", prober.charset_name)
continue
conf = prober.get_confidence()
- self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
+ self.logger.debug(
+ "%s %s confidence = %s", prober.charset_name, prober.language, conf
+ )
if best_conf < conf:
best_conf = conf
self._best_guess_prober = prober
diff --git a/libs/chardet/charsetprober.py b/libs/chardet/charsetprober.py
index eac4e5986..9f1afd999 100644
--- a/libs/chardet/charsetprober.py
+++ b/libs/chardet/charsetprober.py
@@ -31,8 +31,12 @@ import re
from .enums import ProbingState
+INTERNATIONAL_WORDS_PATTERN = re.compile(
+ b"[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?"
+)
-class CharSetProber(object):
+
+class CharSetProber:
SHORTCUT_THRESHOLD = 0.95
@@ -48,8 +52,8 @@ class CharSetProber(object):
def charset_name(self):
return None
- def feed(self, buf):
- pass
+ def feed(self, byte_str):
+ raise NotImplementedError
@property
def state(self):
@@ -60,7 +64,7 @@ class CharSetProber(object):
@staticmethod
def filter_high_byte_only(buf):
- buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
+ buf = re.sub(b"([\x00-\x7F])+", b" ", buf)
return buf
@staticmethod
@@ -70,12 +74,10 @@ class CharSetProber(object):
alphabet: english alphabets [a-zA-Z]
international: international characters [\x80-\xFF]
marker: everything else [^a-zA-Z\x80-\xFF]
-
The input buffer can be thought to contain a series of words delimited
by markers. This function works to filter all words that contain at
least one international character. All contiguous sequences of markers
are replaced by a single space ascii character.
-
This filter applies to all scripts which do not use English characters.
"""
filtered = bytearray()
@@ -83,8 +85,7 @@ class CharSetProber(object):
# This regex expression filters out only words that have at-least one
# international character. The word may include one marker character at
# the end.
- words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
- buf)
+ words = INTERNATIONAL_WORDS_PATTERN.findall(buf)
for word in words:
filtered.extend(word[:-1])
@@ -94,20 +95,17 @@ class CharSetProber(object):
# similarly across all languages and may thus have similar
# frequencies).
last_char = word[-1:]
- if not last_char.isalpha() and last_char < b'\x80':
- last_char = b' '
+ if not last_char.isalpha() and last_char < b"\x80":
+ last_char = b" "
filtered.extend(last_char)
return filtered
@staticmethod
- def filter_with_english_letters(buf):
+ def remove_xml_tags(buf):
"""
Returns a copy of ``buf`` that retains only the sequences of English
alphabet and high byte characters that are not between <> characters.
- Also retains English alphabet and high byte characters immediately
- before occurrences of >.
-
This filter can be applied to all scripts which contain both English
characters and extended ASCII characters, but is currently only used by
``Latin1Prober``.
@@ -115,26 +113,21 @@ class CharSetProber(object):
filtered = bytearray()
in_tag = False
prev = 0
+ buf = memoryview(buf).cast("c")
- for curr in range(len(buf)):
- # Slice here to get bytes instead of an int with Python 3
- buf_char = buf[curr:curr + 1]
- # Check if we're coming out of or entering an HTML tag
- if buf_char == b'>':
+ for curr, buf_char in enumerate(buf):
+ # Check if we're coming out of or entering an XML tag
+ if buf_char == b">":
+ prev = curr + 1
in_tag = False
- elif buf_char == b'<':
- in_tag = True
-
- # If current character is not extended-ASCII and not alphabetic...
- if buf_char < b'\x80' and not buf_char.isalpha():
- # ...and we're not in a tag
+ elif buf_char == b"<":
if curr > prev and not in_tag:
# Keep everything after last non-extended-ASCII,
# non-alphabetic character
filtered.extend(buf[prev:curr])
# Output a space to delimit stretch we kept
- filtered.extend(b' ')
- prev = curr + 1
+ filtered.extend(b" ")
+ in_tag = True
# If we're not in a tag...
if not in_tag:
diff --git a/libs/chardet/cli/__init__.py b/libs/chardet/cli/__init__.py
index 8b1378917..e69de29bb 100644
--- a/libs/chardet/cli/__init__.py
+++ b/libs/chardet/cli/__init__.py
@@ -1 +0,0 @@
-
diff --git a/libs/chardet/cli/chardetect.py b/libs/chardet/cli/chardetect.py
index e1d8cd69a..7926fa37e 100644
--- a/libs/chardet/cli/chardetect.py
+++ b/libs/chardet/cli/chardetect.py
@@ -12,17 +12,15 @@ If no paths are provided, it takes its input from stdin.
"""
-from __future__ import absolute_import, print_function, unicode_literals
import argparse
import sys
-from chardet import __version__
-from chardet.compat import PY2
-from chardet.universaldetector import UniversalDetector
+from .. import __version__
+from ..universaldetector import UniversalDetector
-def description_of(lines, name='stdin'):
+def description_of(lines, name="stdin"):
"""
Return a string describing the probable encoding of a file or
list of strings.
@@ -41,13 +39,9 @@ def description_of(lines, name='stdin'):
break
u.close()
result = u.result
- if PY2:
- name = name.decode(sys.getfilesystemencoding(), 'ignore')
- if result['encoding']:
- return '{}: {} with confidence {}'.format(name, result['encoding'],
- result['confidence'])
- else:
- return '{}: no result'.format(name)
+ if result["encoding"]:
+ return f'{name}: {result["encoding"]} with confidence {result["confidence"]}'
+ return f"{name}: no result"
def main(argv=None):
@@ -61,24 +55,32 @@ def main(argv=None):
# Get command line arguments
parser = argparse.ArgumentParser(
description="Takes one or more file paths and reports their detected \
- encodings")
- parser.add_argument('input',
- help='File whose encoding we would like to determine. \
- (default: stdin)',
- type=argparse.FileType('rb'), nargs='*',
- default=[sys.stdin if PY2 else sys.stdin.buffer])
- parser.add_argument('--version', action='version',
- version='%(prog)s {}'.format(__version__))
+ encodings"
+ )
+ parser.add_argument(
+ "input",
+ help="File whose encoding we would like to determine. \
+ (default: stdin)",
+ type=argparse.FileType("rb"),
+ nargs="*",
+ default=[sys.stdin.buffer],
+ )
+ parser.add_argument(
+ "--version", action="version", version=f"%(prog)s {__version__}"
+ )
args = parser.parse_args(argv)
for f in args.input:
if f.isatty():
- print("You are running chardetect interactively. Press " +
- "CTRL-D twice at the start of a blank line to signal the " +
- "end of your input. If you want help, run chardetect " +
- "--help\n", file=sys.stderr)
+ print(
+ "You are running chardetect interactively. Press "
+ "CTRL-D twice at the start of a blank line to signal the "
+ "end of your input. If you want help, run chardetect "
+ "--help\n",
+ file=sys.stderr,
+ )
print(description_of(f, f.name))
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/libs/chardet/codingstatemachine.py b/libs/chardet/codingstatemachine.py
index 68fba44f1..d3e3e825d 100644
--- a/libs/chardet/codingstatemachine.py
+++ b/libs/chardet/codingstatemachine.py
@@ -30,7 +30,7 @@ import logging
from .enums import MachineState
-class CodingStateMachine(object):
+class CodingStateMachine:
"""
A state machine to verify a byte sequence for a particular encoding. For
each byte the detector receives, it will feed that byte to every active
@@ -52,6 +52,7 @@ class CodingStateMachine(object):
negative answer for this encoding. Detector will exclude this
encoding from consideration from here on.
"""
+
def __init__(self, sm):
self._model = sm
self._curr_byte_pos = 0
@@ -66,14 +67,13 @@ class CodingStateMachine(object):
def next_state(self, c):
# for each byte we get its class
# if it is first byte, we also get byte length
- byte_class = self._model['class_table'][c]
+ byte_class = self._model["class_table"][c]
if self._curr_state == MachineState.START:
self._curr_byte_pos = 0
- self._curr_char_len = self._model['char_len_table'][byte_class]
+ self._curr_char_len = self._model["char_len_table"][byte_class]
# from byte's class and state_table, we get its next state
- curr_state = (self._curr_state * self._model['class_factor']
- + byte_class)
- self._curr_state = self._model['state_table'][curr_state]
+ curr_state = self._curr_state * self._model["class_factor"] + byte_class
+ self._curr_state = self._model["state_table"][curr_state]
self._curr_byte_pos += 1
return self._curr_state
@@ -81,8 +81,8 @@ class CodingStateMachine(object):
return self._curr_char_len
def get_coding_state_machine(self):
- return self._model['name']
+ return self._model["name"]
@property
def language(self):
- return self._model['language']
+ return self._model["language"]
diff --git a/libs/chardet/cp949prober.py b/libs/chardet/cp949prober.py
index efd793abc..28a1f3dbb 100644
--- a/libs/chardet/cp949prober.py
+++ b/libs/chardet/cp949prober.py
@@ -33,7 +33,7 @@ from .mbcssm import CP949_SM_MODEL
class CP949Prober(MultiByteCharSetProber):
def __init__(self):
- super(CP949Prober, self).__init__()
+ super().__init__()
self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
# not different.
diff --git a/libs/chardet/enums.py b/libs/chardet/enums.py
index 045120722..32a77e76c 100644
--- a/libs/chardet/enums.py
+++ b/libs/chardet/enums.py
@@ -5,20 +5,22 @@ All of the Enums that are used throughout the chardet package.
"""
-class InputState(object):
+class InputState:
"""
This enum represents the different states a universal detector can be in.
"""
+
PURE_ASCII = 0
ESC_ASCII = 1
HIGH_BYTE = 2
-class LanguageFilter(object):
+class LanguageFilter:
"""
This enum represents the different language filters we can apply to a
``UniversalDetector``.
"""
+
CHINESE_SIMPLIFIED = 0x01
CHINESE_TRADITIONAL = 0x02
JAPANESE = 0x04
@@ -29,28 +31,31 @@ class LanguageFilter(object):
CJK = CHINESE | JAPANESE | KOREAN
-class ProbingState(object):
+class ProbingState:
"""
This enum represents the different states a prober can be in.
"""
+
DETECTING = 0
FOUND_IT = 1
NOT_ME = 2
-class MachineState(object):
+class MachineState:
"""
This enum represents the different states a state machine can be in.
"""
+
START = 0
ERROR = 1
ITS_ME = 2
-class SequenceLikelihood(object):
+class SequenceLikelihood:
"""
This enum represents the likelihood of a character following the previous one.
"""
+
NEGATIVE = 0
UNLIKELY = 1
LIKELY = 2
@@ -62,13 +67,14 @@ class SequenceLikelihood(object):
return 4
-class CharacterCategory(object):
+class CharacterCategory:
"""
This enum represents the different categories language models for
``SingleByteCharsetProber`` put characters into.
Anything less than CONTROL is considered a letter.
"""
+
UNDEFINED = 255
LINE_BREAK = 254
SYMBOL = 253
diff --git a/libs/chardet/escprober.py b/libs/chardet/escprober.py
index c70493f2b..d9926115d 100644
--- a/libs/chardet/escprober.py
+++ b/libs/chardet/escprober.py
@@ -27,9 +27,13 @@
from .charsetprober import CharSetProber
from .codingstatemachine import CodingStateMachine
-from .enums import LanguageFilter, ProbingState, MachineState
-from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL,
- ISO2022KR_SM_MODEL)
+from .enums import LanguageFilter, MachineState, ProbingState
+from .escsm import (
+ HZ_SM_MODEL,
+ ISO2022CN_SM_MODEL,
+ ISO2022JP_SM_MODEL,
+ ISO2022KR_SM_MODEL,
+)
class EscCharSetProber(CharSetProber):
@@ -40,7 +44,7 @@ class EscCharSetProber(CharSetProber):
"""
def __init__(self, lang_filter=None):
- super(EscCharSetProber, self).__init__(lang_filter=lang_filter)
+ super().__init__(lang_filter=lang_filter)
self.coding_sm = []
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
@@ -56,7 +60,7 @@ class EscCharSetProber(CharSetProber):
self.reset()
def reset(self):
- super(EscCharSetProber, self).reset()
+ super().reset()
for coding_sm in self.coding_sm:
if not coding_sm:
continue
@@ -75,10 +79,7 @@ class EscCharSetProber(CharSetProber):
return self._detected_language
def get_confidence(self):
- if self._detected_charset:
- return 0.99
- else:
- return 0.00
+ return 0.99 if self._detected_charset else 0.00
def feed(self, byte_str):
for c in byte_str:
diff --git a/libs/chardet/escsm.py b/libs/chardet/escsm.py
index 0069523a0..3aa0f4d96 100644
--- a/libs/chardet/escsm.py
+++ b/libs/chardet/escsm.py
@@ -12,7 +12,7 @@
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
+# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -20,227 +20,241 @@
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .enums import MachineState
+# fmt: off
HZ_CLS = (
-1,0,0,0,0,0,0,0, # 00 - 07
-0,0,0,0,0,0,0,0, # 08 - 0f
-0,0,0,0,0,0,0,0, # 10 - 17
-0,0,0,1,0,0,0,0, # 18 - 1f
-0,0,0,0,0,0,0,0, # 20 - 27
-0,0,0,0,0,0,0,0, # 28 - 2f
-0,0,0,0,0,0,0,0, # 30 - 37
-0,0,0,0,0,0,0,0, # 38 - 3f
-0,0,0,0,0,0,0,0, # 40 - 47
-0,0,0,0,0,0,0,0, # 48 - 4f
-0,0,0,0,0,0,0,0, # 50 - 57
-0,0,0,0,0,0,0,0, # 58 - 5f
-0,0,0,0,0,0,0,0, # 60 - 67
-0,0,0,0,0,0,0,0, # 68 - 6f
-0,0,0,0,0,0,0,0, # 70 - 77
-0,0,0,4,0,5,2,0, # 78 - 7f
-1,1,1,1,1,1,1,1, # 80 - 87
-1,1,1,1,1,1,1,1, # 88 - 8f
-1,1,1,1,1,1,1,1, # 90 - 97
-1,1,1,1,1,1,1,1, # 98 - 9f
-1,1,1,1,1,1,1,1, # a0 - a7
-1,1,1,1,1,1,1,1, # a8 - af
-1,1,1,1,1,1,1,1, # b0 - b7
-1,1,1,1,1,1,1,1, # b8 - bf
-1,1,1,1,1,1,1,1, # c0 - c7
-1,1,1,1,1,1,1,1, # c8 - cf
-1,1,1,1,1,1,1,1, # d0 - d7
-1,1,1,1,1,1,1,1, # d8 - df
-1,1,1,1,1,1,1,1, # e0 - e7
-1,1,1,1,1,1,1,1, # e8 - ef
-1,1,1,1,1,1,1,1, # f0 - f7
-1,1,1,1,1,1,1,1, # f8 - ff
+ 1, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
+ 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
+ 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27
+ 0, 0, 0, 0, 0, 0, 0, 0, # 28 - 2f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
+ 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47
+ 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
+ 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
+ 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
+ 0, 0, 0, 4, 0, 5, 2, 0, # 78 - 7f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 80 - 87
+ 1, 1, 1, 1, 1, 1, 1, 1, # 88 - 8f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 90 - 97
+ 1, 1, 1, 1, 1, 1, 1, 1, # 98 - 9f
+ 1, 1, 1, 1, 1, 1, 1, 1, # a0 - a7
+ 1, 1, 1, 1, 1, 1, 1, 1, # a8 - af
+ 1, 1, 1, 1, 1, 1, 1, 1, # b0 - b7
+ 1, 1, 1, 1, 1, 1, 1, 1, # b8 - bf
+ 1, 1, 1, 1, 1, 1, 1, 1, # c0 - c7
+ 1, 1, 1, 1, 1, 1, 1, 1, # c8 - cf
+ 1, 1, 1, 1, 1, 1, 1, 1, # d0 - d7
+ 1, 1, 1, 1, 1, 1, 1, 1, # d8 - df
+ 1, 1, 1, 1, 1, 1, 1, 1, # e0 - e7
+ 1, 1, 1, 1, 1, 1, 1, 1, # e8 - ef
+ 1, 1, 1, 1, 1, 1, 1, 1, # f0 - f7
+ 1, 1, 1, 1, 1, 1, 1, 1, # f8 - ff
)
HZ_ST = (
-MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
-MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
-MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17
- 5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f
- 4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27
- 4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f
+MachineState.START, MachineState.ERROR, 3, MachineState.START, MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, # 00-07
+MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 08-0f
+MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.START, MachineState.START, 4, MachineState.ERROR, # 10-17
+ 5, MachineState.ERROR, 6, MachineState.ERROR, 5, 5, 4, MachineState.ERROR, # 18-1f
+ 4, MachineState.ERROR, 4, 4, 4, MachineState.ERROR, 4, MachineState.ERROR, # 20-27
+ 4, MachineState.ITS_ME, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 28-2f
)
+# fmt: on
HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
-HZ_SM_MODEL = {'class_table': HZ_CLS,
- 'class_factor': 6,
- 'state_table': HZ_ST,
- 'char_len_table': HZ_CHAR_LEN_TABLE,
- 'name': "HZ-GB-2312",
- 'language': 'Chinese'}
+HZ_SM_MODEL = {
+ "class_table": HZ_CLS,
+ "class_factor": 6,
+ "state_table": HZ_ST,
+ "char_len_table": HZ_CHAR_LEN_TABLE,
+ "name": "HZ-GB-2312",
+ "language": "Chinese",
+}
+# fmt: off
ISO2022CN_CLS = (
-2,0,0,0,0,0,0,0, # 00 - 07
-0,0,0,0,0,0,0,0, # 08 - 0f
-0,0,0,0,0,0,0,0, # 10 - 17
-0,0,0,1,0,0,0,0, # 18 - 1f
-0,0,0,0,0,0,0,0, # 20 - 27
-0,3,0,0,0,0,0,0, # 28 - 2f
-0,0,0,0,0,0,0,0, # 30 - 37
-0,0,0,0,0,0,0,0, # 38 - 3f
-0,0,0,4,0,0,0,0, # 40 - 47
-0,0,0,0,0,0,0,0, # 48 - 4f
-0,0,0,0,0,0,0,0, # 50 - 57
-0,0,0,0,0,0,0,0, # 58 - 5f
-0,0,0,0,0,0,0,0, # 60 - 67
-0,0,0,0,0,0,0,0, # 68 - 6f
-0,0,0,0,0,0,0,0, # 70 - 77
-0,0,0,0,0,0,0,0, # 78 - 7f
-2,2,2,2,2,2,2,2, # 80 - 87
-2,2,2,2,2,2,2,2, # 88 - 8f
-2,2,2,2,2,2,2,2, # 90 - 97
-2,2,2,2,2,2,2,2, # 98 - 9f
-2,2,2,2,2,2,2,2, # a0 - a7
-2,2,2,2,2,2,2,2, # a8 - af
-2,2,2,2,2,2,2,2, # b0 - b7
-2,2,2,2,2,2,2,2, # b8 - bf
-2,2,2,2,2,2,2,2, # c0 - c7
-2,2,2,2,2,2,2,2, # c8 - cf
-2,2,2,2,2,2,2,2, # d0 - d7
-2,2,2,2,2,2,2,2, # d8 - df
-2,2,2,2,2,2,2,2, # e0 - e7
-2,2,2,2,2,2,2,2, # e8 - ef
-2,2,2,2,2,2,2,2, # f0 - f7
-2,2,2,2,2,2,2,2, # f8 - ff
+ 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
+ 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
+ 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27
+ 0, 3, 0, 0, 0, 0, 0, 0, # 28 - 2f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
+ 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
+ 0, 0, 0, 4, 0, 0, 0, 0, # 40 - 47
+ 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
+ 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
+ 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
+ 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87
+ 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97
+ 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f
+ 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
+ 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
+ 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
+ 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
+ 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
+ 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
+ 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
+ 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
+ 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7
+ 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef
+ 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7
+ 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff
)
ISO2022CN_ST = (
-MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
-MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
-MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
-MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f
-MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27
- 5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f
-MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37
-MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f
+ MachineState.START, 3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 00-07
+ MachineState.START, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 08-0f
+ MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 10-17
+ MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 4, MachineState.ERROR, # 18-1f
+ MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 20-27
+ 5, 6, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 28-2f
+ MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 30-37
+ MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.START, # 38-3f
)
+# fmt: on
ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
-ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS,
- 'class_factor': 9,
- 'state_table': ISO2022CN_ST,
- 'char_len_table': ISO2022CN_CHAR_LEN_TABLE,
- 'name': "ISO-2022-CN",
- 'language': 'Chinese'}
+ISO2022CN_SM_MODEL = {
+ "class_table": ISO2022CN_CLS,
+ "class_factor": 9,
+ "state_table": ISO2022CN_ST,
+ "char_len_table": ISO2022CN_CHAR_LEN_TABLE,
+ "name": "ISO-2022-CN",
+ "language": "Chinese",
+}
+# fmt: off
ISO2022JP_CLS = (
-2,0,0,0,0,0,0,0, # 00 - 07
-0,0,0,0,0,0,2,2, # 08 - 0f
-0,0,0,0,0,0,0,0, # 10 - 17
-0,0,0,1,0,0,0,0, # 18 - 1f
-0,0,0,0,7,0,0,0, # 20 - 27
-3,0,0,0,0,0,0,0, # 28 - 2f
-0,0,0,0,0,0,0,0, # 30 - 37
-0,0,0,0,0,0,0,0, # 38 - 3f
-6,0,4,0,8,0,0,0, # 40 - 47
-0,9,5,0,0,0,0,0, # 48 - 4f
-0,0,0,0,0,0,0,0, # 50 - 57
-0,0,0,0,0,0,0,0, # 58 - 5f
-0,0,0,0,0,0,0,0, # 60 - 67
-0,0,0,0,0,0,0,0, # 68 - 6f
-0,0,0,0,0,0,0,0, # 70 - 77
-0,0,0,0,0,0,0,0, # 78 - 7f
-2,2,2,2,2,2,2,2, # 80 - 87
-2,2,2,2,2,2,2,2, # 88 - 8f
-2,2,2,2,2,2,2,2, # 90 - 97
-2,2,2,2,2,2,2,2, # 98 - 9f
-2,2,2,2,2,2,2,2, # a0 - a7
-2,2,2,2,2,2,2,2, # a8 - af
-2,2,2,2,2,2,2,2, # b0 - b7
-2,2,2,2,2,2,2,2, # b8 - bf
-2,2,2,2,2,2,2,2, # c0 - c7
-2,2,2,2,2,2,2,2, # c8 - cf
-2,2,2,2,2,2,2,2, # d0 - d7
-2,2,2,2,2,2,2,2, # d8 - df
-2,2,2,2,2,2,2,2, # e0 - e7
-2,2,2,2,2,2,2,2, # e8 - ef
-2,2,2,2,2,2,2,2, # f0 - f7
-2,2,2,2,2,2,2,2, # f8 - ff
+ 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
+ 0, 0, 0, 0, 0, 0, 2, 2, # 08 - 0f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
+ 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f
+ 0, 0, 0, 0, 7, 0, 0, 0, # 20 - 27
+ 3, 0, 0, 0, 0, 0, 0, 0, # 28 - 2f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
+ 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
+ 6, 0, 4, 0, 8, 0, 0, 0, # 40 - 47
+ 0, 9, 5, 0, 0, 0, 0, 0, # 48 - 4f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
+ 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
+ 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
+ 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87
+ 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97
+ 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f
+ 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
+ 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
+ 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
+ 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
+ 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
+ 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
+ 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
+ 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
+ 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7
+ 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef
+ 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7
+ 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff
)
ISO2022JP_ST = (
-MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
-MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
-MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
-MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f
-MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27
-MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f
-MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37
-MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f
-MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47
+ MachineState.START, 3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 00-07
+ MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 08-0f
+ MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 10-17
+ MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, # 18-1f
+ MachineState.ERROR, 5, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 4, MachineState.ERROR, MachineState.ERROR, # 20-27
+ MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 6, MachineState.ITS_ME, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, # 28-2f
+ MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, # 30-37
+ MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 38-3f
+ MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.START, MachineState.START, # 40-47
)
+# fmt: on
ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS,
- 'class_factor': 10,
- 'state_table': ISO2022JP_ST,
- 'char_len_table': ISO2022JP_CHAR_LEN_TABLE,
- 'name': "ISO-2022-JP",
- 'language': 'Japanese'}
+ISO2022JP_SM_MODEL = {
+ "class_table": ISO2022JP_CLS,
+ "class_factor": 10,
+ "state_table": ISO2022JP_ST,
+ "char_len_table": ISO2022JP_CHAR_LEN_TABLE,
+ "name": "ISO-2022-JP",
+ "language": "Japanese",
+}
+# fmt: off
ISO2022KR_CLS = (
-2,0,0,0,0,0,0,0, # 00 - 07
-0,0,0,0,0,0,0,0, # 08 - 0f
-0,0,0,0,0,0,0,0, # 10 - 17
-0,0,0,1,0,0,0,0, # 18 - 1f
-0,0,0,0,3,0,0,0, # 20 - 27
-0,4,0,0,0,0,0,0, # 28 - 2f
-0,0,0,0,0,0,0,0, # 30 - 37
-0,0,0,0,0,0,0,0, # 38 - 3f
-0,0,0,5,0,0,0,0, # 40 - 47
-0,0,0,0,0,0,0,0, # 48 - 4f
-0,0,0,0,0,0,0,0, # 50 - 57
-0,0,0,0,0,0,0,0, # 58 - 5f
-0,0,0,0,0,0,0,0, # 60 - 67
-0,0,0,0,0,0,0,0, # 68 - 6f
-0,0,0,0,0,0,0,0, # 70 - 77
-0,0,0,0,0,0,0,0, # 78 - 7f
-2,2,2,2,2,2,2,2, # 80 - 87
-2,2,2,2,2,2,2,2, # 88 - 8f
-2,2,2,2,2,2,2,2, # 90 - 97
-2,2,2,2,2,2,2,2, # 98 - 9f
-2,2,2,2,2,2,2,2, # a0 - a7
-2,2,2,2,2,2,2,2, # a8 - af
-2,2,2,2,2,2,2,2, # b0 - b7
-2,2,2,2,2,2,2,2, # b8 - bf
-2,2,2,2,2,2,2,2, # c0 - c7
-2,2,2,2,2,2,2,2, # c8 - cf
-2,2,2,2,2,2,2,2, # d0 - d7
-2,2,2,2,2,2,2,2, # d8 - df
-2,2,2,2,2,2,2,2, # e0 - e7
-2,2,2,2,2,2,2,2, # e8 - ef
-2,2,2,2,2,2,2,2, # f0 - f7
-2,2,2,2,2,2,2,2, # f8 - ff
+ 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
+ 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
+ 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f
+ 0, 0, 0, 0, 3, 0, 0, 0, # 20 - 27
+ 0, 4, 0, 0, 0, 0, 0, 0, # 28 - 2f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
+ 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
+ 0, 0, 0, 5, 0, 0, 0, 0, # 40 - 47
+ 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
+ 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
+ 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
+ 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87
+ 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97
+ 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f
+ 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
+ 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
+ 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
+ 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
+ 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
+ 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
+ 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
+ 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
+ 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7
+ 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef
+ 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7
+ 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff
)
ISO2022KR_ST = (
-MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
-MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
-MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17
-MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f
-MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27
+ MachineState.START, 3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, # 00-07
+ MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 08-0f
+ MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 4, MachineState.ERROR, MachineState.ERROR, # 10-17
+ MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 5, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 18-1f
+ MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 20-27
)
+# fmt: on
ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
-ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS,
- 'class_factor': 6,
- 'state_table': ISO2022KR_ST,
- 'char_len_table': ISO2022KR_CHAR_LEN_TABLE,
- 'name': "ISO-2022-KR",
- 'language': 'Korean'}
-
-
+ISO2022KR_SM_MODEL = {
+ "class_table": ISO2022KR_CLS,
+ "class_factor": 6,
+ "state_table": ISO2022KR_ST,
+ "char_len_table": ISO2022KR_CHAR_LEN_TABLE,
+ "name": "ISO-2022-KR",
+ "language": "Korean",
+}
diff --git a/libs/chardet/eucjpprober.py b/libs/chardet/eucjpprober.py
index 20ce8f7d1..abf2e66e2 100644
--- a/libs/chardet/eucjpprober.py
+++ b/libs/chardet/eucjpprober.py
@@ -25,24 +25,24 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
-from .enums import ProbingState, MachineState
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
from .chardistribution import EUCJPDistributionAnalysis
+from .codingstatemachine import CodingStateMachine
+from .enums import MachineState, ProbingState
from .jpcntx import EUCJPContextAnalysis
+from .mbcharsetprober import MultiByteCharSetProber
from .mbcssm import EUCJP_SM_MODEL
class EUCJPProber(MultiByteCharSetProber):
def __init__(self):
- super(EUCJPProber, self).__init__()
+ super().__init__()
self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
self.distribution_analyzer = EUCJPDistributionAnalysis()
self.context_analyzer = EUCJPContextAnalysis()
self.reset()
def reset(self):
- super(EUCJPProber, self).reset()
+ super().reset()
self.context_analyzer.reset()
@property
@@ -54,34 +54,37 @@ class EUCJPProber(MultiByteCharSetProber):
return "Japanese"
def feed(self, byte_str):
- for i in range(len(byte_str)):
- # PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte
- coding_state = self.coding_sm.next_state(byte_str[i])
+ for i, byte in enumerate(byte_str):
+ # PY3K: byte_str is a byte array, so byte is an int, not a byte
+ coding_state = self.coding_sm.next_state(byte)
if coding_state == MachineState.ERROR:
- self.logger.debug('%s %s prober hit error at byte %s',
- self.charset_name, self.language, i)
+ self.logger.debug(
+ "%s %s prober hit error at byte %s",
+ self.charset_name,
+ self.language,
+ i,
+ )
self._state = ProbingState.NOT_ME
break
- elif coding_state == MachineState.ITS_ME:
+ if coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
break
- elif coding_state == MachineState.START:
+ if coding_state == MachineState.START:
char_len = self.coding_sm.get_current_charlen()
if i == 0:
- self._last_char[1] = byte_str[0]
+ self._last_char[1] = byte
self.context_analyzer.feed(self._last_char, char_len)
self.distribution_analyzer.feed(self._last_char, char_len)
else:
- self.context_analyzer.feed(byte_str[i - 1:i + 1],
- char_len)
- self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
- char_len)
+ self.context_analyzer.feed(byte_str[i - 1 : i + 1], char_len)
+ self.distribution_analyzer.feed(byte_str[i - 1 : i + 1], char_len)
self._last_char[0] = byte_str[-1]
if self.state == ProbingState.DETECTING:
- if (self.context_analyzer.got_enough_data() and
- (self.get_confidence() > self.SHORTCUT_THRESHOLD)):
+ if self.context_analyzer.got_enough_data() and (
+ self.get_confidence() > self.SHORTCUT_THRESHOLD
+ ):
self._state = ProbingState.FOUND_IT
return self.state
diff --git a/libs/chardet/euckrfreq.py b/libs/chardet/euckrfreq.py
index b68078cb9..7dc3b1038 100644
--- a/libs/chardet/euckrfreq.py
+++ b/libs/chardet/euckrfreq.py
@@ -43,6 +43,7 @@ EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0
EUCKR_TABLE_SIZE = 2352
# Char to FreqOrder table ,
+# fmt: off
EUCKR_CHAR_TO_FREQ_ORDER = (
13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87,
1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398,
@@ -192,4 +193,4 @@ EUCKR_CHAR_TO_FREQ_ORDER = (
2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042,
670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256
)
-
+# fmt: on
diff --git a/libs/chardet/euckrprober.py b/libs/chardet/euckrprober.py
index 345a060d0..154a6d216 100644
--- a/libs/chardet/euckrprober.py
+++ b/libs/chardet/euckrprober.py
@@ -25,15 +25,15 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
from .chardistribution import EUCKRDistributionAnalysis
+from .codingstatemachine import CodingStateMachine
+from .mbcharsetprober import MultiByteCharSetProber
from .mbcssm import EUCKR_SM_MODEL
class EUCKRProber(MultiByteCharSetProber):
def __init__(self):
- super(EUCKRProber, self).__init__()
+ super().__init__()
self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
self.distribution_analyzer = EUCKRDistributionAnalysis()
self.reset()
diff --git a/libs/chardet/euctwfreq.py b/libs/chardet/euctwfreq.py
index ed7a995a3..4900ccc16 100644
--- a/libs/chardet/euctwfreq.py
+++ b/libs/chardet/euctwfreq.py
@@ -43,345 +43,346 @@
EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75
-# Char to FreqOrder table ,
+# Char to FreqOrder table
EUCTW_TABLE_SIZE = 5376
+# fmt: off
EUCTW_CHAR_TO_FREQ_ORDER = (
- 1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742
-3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758
-1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774
- 63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790
-3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806
-4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822
-7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838
- 630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854
- 179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870
- 995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886
-2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902
-1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918
-3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934
- 706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950
-1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966
-3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982
-2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998
- 437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014
-3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030
-1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046
-7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062
- 266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078
-7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094
-1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110
- 32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126
- 188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142
-3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158
-3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174
- 324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190
-2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206
-2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222
- 314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238
- 287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254
-3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270
-1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286
-1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302
-1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318
-2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334
- 265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350
-4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366
-1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382
-7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398
-2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414
- 383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430
- 98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446
- 523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462
- 710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478
-7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494
- 379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510
-1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526
- 585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542
- 690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558
-7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574
-1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590
- 544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606
-3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622
-4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638
-3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654
- 279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670
- 610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686
-1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702
-4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718
-3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734
-3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750
-2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766
-7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782
-3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798
-7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814
-1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830
-2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846
-1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862
- 78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878
-1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894
-4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910
-3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926
- 534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942
- 165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958
- 626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974
-2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990
-7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006
-1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022
-2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038
-1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054
-1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070
-7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086
-7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102
-7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118
-3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134
-4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150
-1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166
-7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182
-2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198
-7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214
-3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230
-3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246
-7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262
-2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278
-7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294
- 862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310
-4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326
-2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342
-7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358
-3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374
-2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390
-2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406
- 294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422
-2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438
-1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454
-1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470
-2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486
-1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502
-7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518
-7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534
-2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550
-4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566
-1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582
-7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598
- 829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614
-4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630
- 375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646
-2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662
- 444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678
-1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694
-1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710
- 730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726
-3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742
-3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758
-1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774
-3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790
-7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806
-7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822
-1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838
-2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854
-1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870
-3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886
-2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902
-3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918
-2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934
-4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950
-4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966
-3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982
- 97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998
-3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014
- 424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030
-3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046
-3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062
-3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078
-1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094
-7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110
- 199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126
-7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142
-1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158
- 391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174
-4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190
-3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206
- 397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222
-2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238
-2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254
-3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270
-1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286
-4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302
-2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318
-1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334
-1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350
-2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366
-3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382
-1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398
-7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414
-1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430
-4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446
-1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462
- 135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478
-1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494
-3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510
-3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526
-2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542
-1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558
-4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574
- 660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590
-7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606
-2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622
-3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638
-4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654
- 790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670
-7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686
-7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702
-1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718
-4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734
-3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750
-2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766
-3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782
-3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798
-2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814
-1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830
-4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846
-3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862
-3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878
-2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894
-4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910
-7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926
-3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942
-2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958
-3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974
-1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990
-2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006
-3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022
-4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038
-2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054
-2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070
-7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086
-1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102
-2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118
-1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134
-3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150
-4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166
-2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182
-3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198
-3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214
-2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230
-4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246
-2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262
-3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278
-4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294
-7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310
-3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326
- 194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342
-1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358
-4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374
-1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390
-4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406
-7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422
- 510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438
-7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454
-2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470
-1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486
-1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502
-3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518
- 509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534
- 552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550
- 478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566
-3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582
-2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598
- 751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614
-7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630
-1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646
-3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662
-7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678
-1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694
-7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710
-4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726
-1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742
-2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758
-2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774
-4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790
- 802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806
- 809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822
-3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838
-3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854
-1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870
-2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886
-7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902
-1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918
-1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934
-3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950
- 919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966
-1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982
-4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998
-7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014
-2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030
-3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046
- 516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062
-1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078
-2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094
-2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110
-7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126
-7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142
-7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158
-2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174
-2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190
-1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206
-4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222
-3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238
-3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254
-4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270
-4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286
-2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302
-2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318
-7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334
-4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350
-7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366
-2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382
-1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398
-3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414
-4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430
-2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446
- 120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462
-2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478
-1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494
-2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510
-2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526
-4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542
-7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558
-1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574
-3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590
-7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606
-1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622
-8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638
-2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654
-8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670
-2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686
-2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702
-8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718
-8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734
-8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750
- 408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766
-8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782
-4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798
-3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814
-8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830
-1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846
-8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862
- 425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878
-1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894
- 479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910
-4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926
-1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942
-4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958
-1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974
- 433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990
-3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006
-4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022
-8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038
- 938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054
-3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070
- 890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086
-2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102
+ 1, 1800, 1506, 255, 1431, 198, 9, 82, 6, 7310, 177, 202, 3615, 1256, 2808, 110, # 2742
+ 3735, 33, 3241, 261, 76, 44, 2113, 16, 2931, 2184, 1176, 659, 3868, 26, 3404, 2643, # 2758
+ 1198, 3869, 3313, 4060, 410, 2211, 302, 590, 361, 1963, 8, 204, 58, 4296, 7311, 1931, # 2774
+ 63, 7312, 7313, 317, 1614, 75, 222, 159, 4061, 2412, 1480, 7314, 3500, 3068, 224, 2809, # 2790
+ 3616, 3, 10, 3870, 1471, 29, 2774, 1135, 2852, 1939, 873, 130, 3242, 1123, 312, 7315, # 2806
+ 4297, 2051, 507, 252, 682, 7316, 142, 1914, 124, 206, 2932, 34, 3501, 3173, 64, 604, # 2822
+ 7317, 2494, 1976, 1977, 155, 1990, 645, 641, 1606, 7318, 3405, 337, 72, 406, 7319, 80, # 2838
+ 630, 238, 3174, 1509, 263, 939, 1092, 2644, 756, 1440, 1094, 3406, 449, 69, 2969, 591, # 2854
+ 179, 2095, 471, 115, 2034, 1843, 60, 50, 2970, 134, 806, 1868, 734, 2035, 3407, 180, # 2870
+ 995, 1607, 156, 537, 2893, 688, 7320, 319, 1305, 779, 2144, 514, 2374, 298, 4298, 359, # 2886
+ 2495, 90, 2707, 1338, 663, 11, 906, 1099, 2545, 20, 2436, 182, 532, 1716, 7321, 732, # 2902
+ 1376, 4062, 1311, 1420, 3175, 25, 2312, 1056, 113, 399, 382, 1949, 242, 3408, 2467, 529, # 2918
+ 3243, 475, 1447, 3617, 7322, 117, 21, 656, 810, 1297, 2295, 2329, 3502, 7323, 126, 4063, # 2934
+ 706, 456, 150, 613, 4299, 71, 1118, 2036, 4064, 145, 3069, 85, 835, 486, 2114, 1246, # 2950
+ 1426, 428, 727, 1285, 1015, 800, 106, 623, 303, 1281, 7324, 2127, 2354, 347, 3736, 221, # 2966
+ 3503, 3110, 7325, 1955, 1153, 4065, 83, 296, 1199, 3070, 192, 624, 93, 7326, 822, 1897, # 2982
+ 2810, 3111, 795, 2064, 991, 1554, 1542, 1592, 27, 43, 2853, 859, 139, 1456, 860, 4300, # 2998
+ 437, 712, 3871, 164, 2392, 3112, 695, 211, 3017, 2096, 195, 3872, 1608, 3504, 3505, 3618, # 3014
+ 3873, 234, 811, 2971, 2097, 3874, 2229, 1441, 3506, 1615, 2375, 668, 2076, 1638, 305, 228, # 3030
+ 1664, 4301, 467, 415, 7327, 262, 2098, 1593, 239, 108, 300, 200, 1033, 512, 1247, 2077, # 3046
+ 7328, 7329, 2173, 3176, 3619, 2673, 593, 845, 1062, 3244, 88, 1723, 2037, 3875, 1950, 212, # 3062
+ 266, 152, 149, 468, 1898, 4066, 4302, 77, 187, 7330, 3018, 37, 5, 2972, 7331, 3876, # 3078
+ 7332, 7333, 39, 2517, 4303, 2894, 3177, 2078, 55, 148, 74, 4304, 545, 483, 1474, 1029, # 3094
+ 1665, 217, 1869, 1531, 3113, 1104, 2645, 4067, 24, 172, 3507, 900, 3877, 3508, 3509, 4305, # 3110
+ 32, 1408, 2811, 1312, 329, 487, 2355, 2247, 2708, 784, 2674, 4, 3019, 3314, 1427, 1788, # 3126
+ 188, 109, 499, 7334, 3620, 1717, 1789, 888, 1217, 3020, 4306, 7335, 3510, 7336, 3315, 1520, # 3142
+ 3621, 3878, 196, 1034, 775, 7337, 7338, 929, 1815, 249, 439, 38, 7339, 1063, 7340, 794, # 3158
+ 3879, 1435, 2296, 46, 178, 3245, 2065, 7341, 2376, 7342, 214, 1709, 4307, 804, 35, 707, # 3174
+ 324, 3622, 1601, 2546, 140, 459, 4068, 7343, 7344, 1365, 839, 272, 978, 2257, 2572, 3409, # 3190
+ 2128, 1363, 3623, 1423, 697, 100, 3071, 48, 70, 1231, 495, 3114, 2193, 7345, 1294, 7346, # 3206
+ 2079, 462, 586, 1042, 3246, 853, 256, 988, 185, 2377, 3410, 1698, 434, 1084, 7347, 3411, # 3222
+ 314, 2615, 2775, 4308, 2330, 2331, 569, 2280, 637, 1816, 2518, 757, 1162, 1878, 1616, 3412, # 3238
+ 287, 1577, 2115, 768, 4309, 1671, 2854, 3511, 2519, 1321, 3737, 909, 2413, 7348, 4069, 933, # 3254
+ 3738, 7349, 2052, 2356, 1222, 4310, 765, 2414, 1322, 786, 4311, 7350, 1919, 1462, 1677, 2895, # 3270
+ 1699, 7351, 4312, 1424, 2437, 3115, 3624, 2590, 3316, 1774, 1940, 3413, 3880, 4070, 309, 1369, # 3286
+ 1130, 2812, 364, 2230, 1653, 1299, 3881, 3512, 3882, 3883, 2646, 525, 1085, 3021, 902, 2000, # 3302
+ 1475, 964, 4313, 421, 1844, 1415, 1057, 2281, 940, 1364, 3116, 376, 4314, 4315, 1381, 7, # 3318
+ 2520, 983, 2378, 336, 1710, 2675, 1845, 321, 3414, 559, 1131, 3022, 2742, 1808, 1132, 1313, # 3334
+ 265, 1481, 1857, 7352, 352, 1203, 2813, 3247, 167, 1089, 420, 2814, 776, 792, 1724, 3513, # 3350
+ 4071, 2438, 3248, 7353, 4072, 7354, 446, 229, 333, 2743, 901, 3739, 1200, 1557, 4316, 2647, # 3366
+ 1920, 395, 2744, 2676, 3740, 4073, 1835, 125, 916, 3178, 2616, 4317, 7355, 7356, 3741, 7357, # 3382
+ 7358, 7359, 4318, 3117, 3625, 1133, 2547, 1757, 3415, 1510, 2313, 1409, 3514, 7360, 2145, 438, # 3398
+ 2591, 2896, 2379, 3317, 1068, 958, 3023, 461, 311, 2855, 2677, 4074, 1915, 3179, 4075, 1978, # 3414
+ 383, 750, 2745, 2617, 4076, 274, 539, 385, 1278, 1442, 7361, 1154, 1964, 384, 561, 210, # 3430
+ 98, 1295, 2548, 3515, 7362, 1711, 2415, 1482, 3416, 3884, 2897, 1257, 129, 7363, 3742, 642, # 3446
+ 523, 2776, 2777, 2648, 7364, 141, 2231, 1333, 68, 176, 441, 876, 907, 4077, 603, 2592, # 3462
+ 710, 171, 3417, 404, 549, 18, 3118, 2393, 1410, 3626, 1666, 7365, 3516, 4319, 2898, 4320, # 3478
+ 7366, 2973, 368, 7367, 146, 366, 99, 871, 3627, 1543, 748, 807, 1586, 1185, 22, 2258, # 3494
+ 379, 3743, 3180, 7368, 3181, 505, 1941, 2618, 1991, 1382, 2314, 7369, 380, 2357, 218, 702, # 3510
+ 1817, 1248, 3418, 3024, 3517, 3318, 3249, 7370, 2974, 3628, 930, 3250, 3744, 7371, 59, 7372, # 3526
+ 585, 601, 4078, 497, 3419, 1112, 1314, 4321, 1801, 7373, 1223, 1472, 2174, 7374, 749, 1836, # 3542
+ 690, 1899, 3745, 1772, 3885, 1476, 429, 1043, 1790, 2232, 2116, 917, 4079, 447, 1086, 1629, # 3558
+ 7375, 556, 7376, 7377, 2020, 1654, 844, 1090, 105, 550, 966, 1758, 2815, 1008, 1782, 686, # 3574
+ 1095, 7378, 2282, 793, 1602, 7379, 3518, 2593, 4322, 4080, 2933, 2297, 4323, 3746, 980, 2496, # 3590
+ 544, 353, 527, 4324, 908, 2678, 2899, 7380, 381, 2619, 1942, 1348, 7381, 1341, 1252, 560, # 3606
+ 3072, 7382, 3420, 2856, 7383, 2053, 973, 886, 2080, 143, 4325, 7384, 7385, 157, 3886, 496, # 3622
+ 4081, 57, 840, 540, 2038, 4326, 4327, 3421, 2117, 1445, 970, 2259, 1748, 1965, 2081, 4082, # 3638
+ 3119, 1234, 1775, 3251, 2816, 3629, 773, 1206, 2129, 1066, 2039, 1326, 3887, 1738, 1725, 4083, # 3654
+ 279, 3120, 51, 1544, 2594, 423, 1578, 2130, 2066, 173, 4328, 1879, 7386, 7387, 1583, 264, # 3670
+ 610, 3630, 4329, 2439, 280, 154, 7388, 7389, 7390, 1739, 338, 1282, 3073, 693, 2857, 1411, # 3686
+ 1074, 3747, 2440, 7391, 4330, 7392, 7393, 1240, 952, 2394, 7394, 2900, 1538, 2679, 685, 1483, # 3702
+ 4084, 2468, 1436, 953, 4085, 2054, 4331, 671, 2395, 79, 4086, 2441, 3252, 608, 567, 2680, # 3718
+ 3422, 4087, 4088, 1691, 393, 1261, 1791, 2396, 7395, 4332, 7396, 7397, 7398, 7399, 1383, 1672, # 3734
+ 3748, 3182, 1464, 522, 1119, 661, 1150, 216, 675, 4333, 3888, 1432, 3519, 609, 4334, 2681, # 3750
+ 2397, 7400, 7401, 7402, 4089, 3025, 0, 7403, 2469, 315, 231, 2442, 301, 3319, 4335, 2380, # 3766
+ 7404, 233, 4090, 3631, 1818, 4336, 4337, 7405, 96, 1776, 1315, 2082, 7406, 257, 7407, 1809, # 3782
+ 3632, 2709, 1139, 1819, 4091, 2021, 1124, 2163, 2778, 1777, 2649, 7408, 3074, 363, 1655, 3183, # 3798
+ 7409, 2975, 7410, 7411, 7412, 3889, 1567, 3890, 718, 103, 3184, 849, 1443, 341, 3320, 2934, # 3814
+ 1484, 7413, 1712, 127, 67, 339, 4092, 2398, 679, 1412, 821, 7414, 7415, 834, 738, 351, # 3830
+ 2976, 2146, 846, 235, 1497, 1880, 418, 1992, 3749, 2710, 186, 1100, 2147, 2746, 3520, 1545, # 3846
+ 1355, 2935, 2858, 1377, 583, 3891, 4093, 2573, 2977, 7416, 1298, 3633, 1078, 2549, 3634, 2358, # 3862
+ 78, 3750, 3751, 267, 1289, 2099, 2001, 1594, 4094, 348, 369, 1274, 2194, 2175, 1837, 4338, # 3878
+ 1820, 2817, 3635, 2747, 2283, 2002, 4339, 2936, 2748, 144, 3321, 882, 4340, 3892, 2749, 3423, # 3894
+ 4341, 2901, 7417, 4095, 1726, 320, 7418, 3893, 3026, 788, 2978, 7419, 2818, 1773, 1327, 2859, # 3910
+ 3894, 2819, 7420, 1306, 4342, 2003, 1700, 3752, 3521, 2359, 2650, 787, 2022, 506, 824, 3636, # 3926
+ 534, 323, 4343, 1044, 3322, 2023, 1900, 946, 3424, 7421, 1778, 1500, 1678, 7422, 1881, 4344, # 3942
+ 165, 243, 4345, 3637, 2521, 123, 683, 4096, 764, 4346, 36, 3895, 1792, 589, 2902, 816, # 3958
+ 626, 1667, 3027, 2233, 1639, 1555, 1622, 3753, 3896, 7423, 3897, 2860, 1370, 1228, 1932, 891, # 3974
+ 2083, 2903, 304, 4097, 7424, 292, 2979, 2711, 3522, 691, 2100, 4098, 1115, 4347, 118, 662, # 3990
+ 7425, 611, 1156, 854, 2381, 1316, 2861, 2, 386, 515, 2904, 7426, 7427, 3253, 868, 2234, # 4006
+ 1486, 855, 2651, 785, 2212, 3028, 7428, 1040, 3185, 3523, 7429, 3121, 448, 7430, 1525, 7431, # 4022
+ 2164, 4348, 7432, 3754, 7433, 4099, 2820, 3524, 3122, 503, 818, 3898, 3123, 1568, 814, 676, # 4038
+ 1444, 306, 1749, 7434, 3755, 1416, 1030, 197, 1428, 805, 2821, 1501, 4349, 7435, 7436, 7437, # 4054
+ 1993, 7438, 4350, 7439, 7440, 2195, 13, 2779, 3638, 2980, 3124, 1229, 1916, 7441, 3756, 2131, # 4070
+ 7442, 4100, 4351, 2399, 3525, 7443, 2213, 1511, 1727, 1120, 7444, 7445, 646, 3757, 2443, 307, # 4086
+ 7446, 7447, 1595, 3186, 7448, 7449, 7450, 3639, 1113, 1356, 3899, 1465, 2522, 2523, 7451, 519, # 4102
+ 7452, 128, 2132, 92, 2284, 1979, 7453, 3900, 1512, 342, 3125, 2196, 7454, 2780, 2214, 1980, # 4118
+ 3323, 7455, 290, 1656, 1317, 789, 827, 2360, 7456, 3758, 4352, 562, 581, 3901, 7457, 401, # 4134
+ 4353, 2248, 94, 4354, 1399, 2781, 7458, 1463, 2024, 4355, 3187, 1943, 7459, 828, 1105, 4101, # 4150
+ 1262, 1394, 7460, 4102, 605, 4356, 7461, 1783, 2862, 7462, 2822, 819, 2101, 578, 2197, 2937, # 4166
+ 7463, 1502, 436, 3254, 4103, 3255, 2823, 3902, 2905, 3425, 3426, 7464, 2712, 2315, 7465, 7466, # 4182
+ 2332, 2067, 23, 4357, 193, 826, 3759, 2102, 699, 1630, 4104, 3075, 390, 1793, 1064, 3526, # 4198
+ 7467, 1579, 3076, 3077, 1400, 7468, 4105, 1838, 1640, 2863, 7469, 4358, 4359, 137, 4106, 598, # 4214
+ 3078, 1966, 780, 104, 974, 2938, 7470, 278, 899, 253, 402, 572, 504, 493, 1339, 7471, # 4230
+ 3903, 1275, 4360, 2574, 2550, 7472, 3640, 3029, 3079, 2249, 565, 1334, 2713, 863, 41, 7473, # 4246
+ 7474, 4361, 7475, 1657, 2333, 19, 463, 2750, 4107, 606, 7476, 2981, 3256, 1087, 2084, 1323, # 4262
+ 2652, 2982, 7477, 1631, 1623, 1750, 4108, 2682, 7478, 2864, 791, 2714, 2653, 2334, 232, 2416, # 4278
+ 7479, 2983, 1498, 7480, 2654, 2620, 755, 1366, 3641, 3257, 3126, 2025, 1609, 119, 1917, 3427, # 4294
+ 862, 1026, 4109, 7481, 3904, 3760, 4362, 3905, 4363, 2260, 1951, 2470, 7482, 1125, 817, 4110, # 4310
+ 4111, 3906, 1513, 1766, 2040, 1487, 4112, 3030, 3258, 2824, 3761, 3127, 7483, 7484, 1507, 7485, # 4326
+ 2683, 733, 40, 1632, 1106, 2865, 345, 4113, 841, 2524, 230, 4364, 2984, 1846, 3259, 3428, # 4342
+ 7486, 1263, 986, 3429, 7487, 735, 879, 254, 1137, 857, 622, 1300, 1180, 1388, 1562, 3907, # 4358
+ 3908, 2939, 967, 2751, 2655, 1349, 592, 2133, 1692, 3324, 2985, 1994, 4114, 1679, 3909, 1901, # 4374
+ 2185, 7488, 739, 3642, 2715, 1296, 1290, 7489, 4115, 2198, 2199, 1921, 1563, 2595, 2551, 1870, # 4390
+ 2752, 2986, 7490, 435, 7491, 343, 1108, 596, 17, 1751, 4365, 2235, 3430, 3643, 7492, 4366, # 4406
+ 294, 3527, 2940, 1693, 477, 979, 281, 2041, 3528, 643, 2042, 3644, 2621, 2782, 2261, 1031, # 4422
+ 2335, 2134, 2298, 3529, 4367, 367, 1249, 2552, 7493, 3530, 7494, 4368, 1283, 3325, 2004, 240, # 4438
+ 1762, 3326, 4369, 4370, 836, 1069, 3128, 474, 7495, 2148, 2525, 268, 3531, 7496, 3188, 1521, # 4454
+ 1284, 7497, 1658, 1546, 4116, 7498, 3532, 3533, 7499, 4117, 3327, 2684, 1685, 4118, 961, 1673, # 4470
+ 2622, 190, 2005, 2200, 3762, 4371, 4372, 7500, 570, 2497, 3645, 1490, 7501, 4373, 2623, 3260, # 4486
+ 1956, 4374, 584, 1514, 396, 1045, 1944, 7502, 4375, 1967, 2444, 7503, 7504, 4376, 3910, 619, # 4502
+ 7505, 3129, 3261, 215, 2006, 2783, 2553, 3189, 4377, 3190, 4378, 763, 4119, 3763, 4379, 7506, # 4518
+ 7507, 1957, 1767, 2941, 3328, 3646, 1174, 452, 1477, 4380, 3329, 3130, 7508, 2825, 1253, 2382, # 4534
+ 2186, 1091, 2285, 4120, 492, 7509, 638, 1169, 1824, 2135, 1752, 3911, 648, 926, 1021, 1324, # 4550
+ 4381, 520, 4382, 997, 847, 1007, 892, 4383, 3764, 2262, 1871, 3647, 7510, 2400, 1784, 4384, # 4566
+ 1952, 2942, 3080, 3191, 1728, 4121, 2043, 3648, 4385, 2007, 1701, 3131, 1551, 30, 2263, 4122, # 4582
+ 7511, 2026, 4386, 3534, 7512, 501, 7513, 4123, 594, 3431, 2165, 1821, 3535, 3432, 3536, 3192, # 4598
+ 829, 2826, 4124, 7514, 1680, 3132, 1225, 4125, 7515, 3262, 4387, 4126, 3133, 2336, 7516, 4388, # 4614
+ 4127, 7517, 3912, 3913, 7518, 1847, 2383, 2596, 3330, 7519, 4389, 374, 3914, 652, 4128, 4129, # 4630
+ 375, 1140, 798, 7520, 7521, 7522, 2361, 4390, 2264, 546, 1659, 138, 3031, 2445, 4391, 7523, # 4646
+ 2250, 612, 1848, 910, 796, 3765, 1740, 1371, 825, 3766, 3767, 7524, 2906, 2554, 7525, 692, # 4662
+ 444, 3032, 2624, 801, 4392, 4130, 7526, 1491, 244, 1053, 3033, 4131, 4132, 340, 7527, 3915, # 4678
+ 1041, 2987, 293, 1168, 87, 1357, 7528, 1539, 959, 7529, 2236, 721, 694, 4133, 3768, 219, # 4694
+ 1478, 644, 1417, 3331, 2656, 1413, 1401, 1335, 1389, 3916, 7530, 7531, 2988, 2362, 3134, 1825, # 4710
+ 730, 1515, 184, 2827, 66, 4393, 7532, 1660, 2943, 246, 3332, 378, 1457, 226, 3433, 975, # 4726
+ 3917, 2944, 1264, 3537, 674, 696, 7533, 163, 7534, 1141, 2417, 2166, 713, 3538, 3333, 4394, # 4742
+ 3918, 7535, 7536, 1186, 15, 7537, 1079, 1070, 7538, 1522, 3193, 3539, 276, 1050, 2716, 758, # 4758
+ 1126, 653, 2945, 3263, 7539, 2337, 889, 3540, 3919, 3081, 2989, 903, 1250, 4395, 3920, 3434, # 4774
+ 3541, 1342, 1681, 1718, 766, 3264, 286, 89, 2946, 3649, 7540, 1713, 7541, 2597, 3334, 2990, # 4790
+ 7542, 2947, 2215, 3194, 2866, 7543, 4396, 2498, 2526, 181, 387, 1075, 3921, 731, 2187, 3335, # 4806
+ 7544, 3265, 310, 313, 3435, 2299, 770, 4134, 54, 3034, 189, 4397, 3082, 3769, 3922, 7545, # 4822
+ 1230, 1617, 1849, 355, 3542, 4135, 4398, 3336, 111, 4136, 3650, 1350, 3135, 3436, 3035, 4137, # 4838
+ 2149, 3266, 3543, 7546, 2784, 3923, 3924, 2991, 722, 2008, 7547, 1071, 247, 1207, 2338, 2471, # 4854
+ 1378, 4399, 2009, 864, 1437, 1214, 4400, 373, 3770, 1142, 2216, 667, 4401, 442, 2753, 2555, # 4870
+ 3771, 3925, 1968, 4138, 3267, 1839, 837, 170, 1107, 934, 1336, 1882, 7548, 7549, 2118, 4139, # 4886
+ 2828, 743, 1569, 7550, 4402, 4140, 582, 2384, 1418, 3437, 7551, 1802, 7552, 357, 1395, 1729, # 4902
+ 3651, 3268, 2418, 1564, 2237, 7553, 3083, 3772, 1633, 4403, 1114, 2085, 4141, 1532, 7554, 482, # 4918
+ 2446, 4404, 7555, 7556, 1492, 833, 1466, 7557, 2717, 3544, 1641, 2829, 7558, 1526, 1272, 3652, # 4934
+ 4142, 1686, 1794, 416, 2556, 1902, 1953, 1803, 7559, 3773, 2785, 3774, 1159, 2316, 7560, 2867, # 4950
+ 4405, 1610, 1584, 3036, 2419, 2754, 443, 3269, 1163, 3136, 7561, 7562, 3926, 7563, 4143, 2499, # 4966
+ 3037, 4406, 3927, 3137, 2103, 1647, 3545, 2010, 1872, 4144, 7564, 4145, 431, 3438, 7565, 250, # 4982
+ 97, 81, 4146, 7566, 1648, 1850, 1558, 160, 848, 7567, 866, 740, 1694, 7568, 2201, 2830, # 4998
+ 3195, 4147, 4407, 3653, 1687, 950, 2472, 426, 469, 3196, 3654, 3655, 3928, 7569, 7570, 1188, # 5014
+ 424, 1995, 861, 3546, 4148, 3775, 2202, 2685, 168, 1235, 3547, 4149, 7571, 2086, 1674, 4408, # 5030
+ 3337, 3270, 220, 2557, 1009, 7572, 3776, 670, 2992, 332, 1208, 717, 7573, 7574, 3548, 2447, # 5046
+ 3929, 3338, 7575, 513, 7576, 1209, 2868, 3339, 3138, 4409, 1080, 7577, 7578, 7579, 7580, 2527, # 5062
+ 3656, 3549, 815, 1587, 3930, 3931, 7581, 3550, 3439, 3777, 1254, 4410, 1328, 3038, 1390, 3932, # 5078
+ 1741, 3933, 3778, 3934, 7582, 236, 3779, 2448, 3271, 7583, 7584, 3657, 3780, 1273, 3781, 4411, # 5094
+ 7585, 308, 7586, 4412, 245, 4413, 1851, 2473, 1307, 2575, 430, 715, 2136, 2449, 7587, 270, # 5110
+ 199, 2869, 3935, 7588, 3551, 2718, 1753, 761, 1754, 725, 1661, 1840, 4414, 3440, 3658, 7589, # 5126
+ 7590, 587, 14, 3272, 227, 2598, 326, 480, 2265, 943, 2755, 3552, 291, 650, 1883, 7591, # 5142
+ 1702, 1226, 102, 1547, 62, 3441, 904, 4415, 3442, 1164, 4150, 7592, 7593, 1224, 1548, 2756, # 5158
+ 391, 498, 1493, 7594, 1386, 1419, 7595, 2055, 1177, 4416, 813, 880, 1081, 2363, 566, 1145, # 5174
+ 4417, 2286, 1001, 1035, 2558, 2599, 2238, 394, 1286, 7596, 7597, 2068, 7598, 86, 1494, 1730, # 5190
+ 3936, 491, 1588, 745, 897, 2948, 843, 3340, 3937, 2757, 2870, 3273, 1768, 998, 2217, 2069, # 5206
+ 397, 1826, 1195, 1969, 3659, 2993, 3341, 284, 7599, 3782, 2500, 2137, 2119, 1903, 7600, 3938, # 5222
+ 2150, 3939, 4151, 1036, 3443, 1904, 114, 2559, 4152, 209, 1527, 7601, 7602, 2949, 2831, 2625, # 5238
+ 2385, 2719, 3139, 812, 2560, 7603, 3274, 7604, 1559, 737, 1884, 3660, 1210, 885, 28, 2686, # 5254
+ 3553, 3783, 7605, 4153, 1004, 1779, 4418, 7606, 346, 1981, 2218, 2687, 4419, 3784, 1742, 797, # 5270
+ 1642, 3940, 1933, 1072, 1384, 2151, 896, 3941, 3275, 3661, 3197, 2871, 3554, 7607, 2561, 1958, # 5286
+ 4420, 2450, 1785, 7608, 7609, 7610, 3942, 4154, 1005, 1308, 3662, 4155, 2720, 4421, 4422, 1528, # 5302
+ 2600, 161, 1178, 4156, 1982, 987, 4423, 1101, 4157, 631, 3943, 1157, 3198, 2420, 1343, 1241, # 5318
+ 1016, 2239, 2562, 372, 877, 2339, 2501, 1160, 555, 1934, 911, 3944, 7611, 466, 1170, 169, # 5334
+ 1051, 2907, 2688, 3663, 2474, 2994, 1182, 2011, 2563, 1251, 2626, 7612, 992, 2340, 3444, 1540, # 5350
+ 2721, 1201, 2070, 2401, 1996, 2475, 7613, 4424, 528, 1922, 2188, 1503, 1873, 1570, 2364, 3342, # 5366
+ 3276, 7614, 557, 1073, 7615, 1827, 3445, 2087, 2266, 3140, 3039, 3084, 767, 3085, 2786, 4425, # 5382
+ 1006, 4158, 4426, 2341, 1267, 2176, 3664, 3199, 778, 3945, 3200, 2722, 1597, 2657, 7616, 4427, # 5398
+ 7617, 3446, 7618, 7619, 7620, 3277, 2689, 1433, 3278, 131, 95, 1504, 3946, 723, 4159, 3141, # 5414
+ 1841, 3555, 2758, 2189, 3947, 2027, 2104, 3665, 7621, 2995, 3948, 1218, 7622, 3343, 3201, 3949, # 5430
+ 4160, 2576, 248, 1634, 3785, 912, 7623, 2832, 3666, 3040, 3786, 654, 53, 7624, 2996, 7625, # 5446
+ 1688, 4428, 777, 3447, 1032, 3950, 1425, 7626, 191, 820, 2120, 2833, 971, 4429, 931, 3202, # 5462
+ 135, 664, 783, 3787, 1997, 772, 2908, 1935, 3951, 3788, 4430, 2909, 3203, 282, 2723, 640, # 5478
+ 1372, 3448, 1127, 922, 325, 3344, 7627, 7628, 711, 2044, 7629, 7630, 3952, 2219, 2787, 1936, # 5494
+ 3953, 3345, 2220, 2251, 3789, 2300, 7631, 4431, 3790, 1258, 3279, 3954, 3204, 2138, 2950, 3955, # 5510
+ 3956, 7632, 2221, 258, 3205, 4432, 101, 1227, 7633, 3280, 1755, 7634, 1391, 3281, 7635, 2910, # 5526
+ 2056, 893, 7636, 7637, 7638, 1402, 4161, 2342, 7639, 7640, 3206, 3556, 7641, 7642, 878, 1325, # 5542
+ 1780, 2788, 4433, 259, 1385, 2577, 744, 1183, 2267, 4434, 7643, 3957, 2502, 7644, 684, 1024, # 5558
+ 4162, 7645, 472, 3557, 3449, 1165, 3282, 3958, 3959, 322, 2152, 881, 455, 1695, 1152, 1340, # 5574
+ 660, 554, 2153, 4435, 1058, 4436, 4163, 830, 1065, 3346, 3960, 4437, 1923, 7646, 1703, 1918, # 5590
+ 7647, 932, 2268, 122, 7648, 4438, 947, 677, 7649, 3791, 2627, 297, 1905, 1924, 2269, 4439, # 5606
+ 2317, 3283, 7650, 7651, 4164, 7652, 4165, 84, 4166, 112, 989, 7653, 547, 1059, 3961, 701, # 5622
+ 3558, 1019, 7654, 4167, 7655, 3450, 942, 639, 457, 2301, 2451, 993, 2951, 407, 851, 494, # 5638
+ 4440, 3347, 927, 7656, 1237, 7657, 2421, 3348, 573, 4168, 680, 921, 2911, 1279, 1874, 285, # 5654
+ 790, 1448, 1983, 719, 2167, 7658, 7659, 4441, 3962, 3963, 1649, 7660, 1541, 563, 7661, 1077, # 5670
+ 7662, 3349, 3041, 3451, 511, 2997, 3964, 3965, 3667, 3966, 1268, 2564, 3350, 3207, 4442, 4443, # 5686
+ 7663, 535, 1048, 1276, 1189, 2912, 2028, 3142, 1438, 1373, 2834, 2952, 1134, 2012, 7664, 4169, # 5702
+ 1238, 2578, 3086, 1259, 7665, 700, 7666, 2953, 3143, 3668, 4170, 7667, 4171, 1146, 1875, 1906, # 5718
+ 4444, 2601, 3967, 781, 2422, 132, 1589, 203, 147, 273, 2789, 2402, 898, 1786, 2154, 3968, # 5734
+ 3969, 7668, 3792, 2790, 7669, 7670, 4445, 4446, 7671, 3208, 7672, 1635, 3793, 965, 7673, 1804, # 5750
+ 2690, 1516, 3559, 1121, 1082, 1329, 3284, 3970, 1449, 3794, 65, 1128, 2835, 2913, 2759, 1590, # 5766
+ 3795, 7674, 7675, 12, 2658, 45, 976, 2579, 3144, 4447, 517, 2528, 1013, 1037, 3209, 7676, # 5782
+ 3796, 2836, 7677, 3797, 7678, 3452, 7679, 2602, 614, 1998, 2318, 3798, 3087, 2724, 2628, 7680, # 5798
+ 2580, 4172, 599, 1269, 7681, 1810, 3669, 7682, 2691, 3088, 759, 1060, 489, 1805, 3351, 3285, # 5814
+ 1358, 7683, 7684, 2386, 1387, 1215, 2629, 2252, 490, 7685, 7686, 4173, 1759, 2387, 2343, 7687, # 5830
+ 4448, 3799, 1907, 3971, 2630, 1806, 3210, 4449, 3453, 3286, 2760, 2344, 874, 7688, 7689, 3454, # 5846
+ 3670, 1858, 91, 2914, 3671, 3042, 3800, 4450, 7690, 3145, 3972, 2659, 7691, 3455, 1202, 1403, # 5862
+ 3801, 2954, 2529, 1517, 2503, 4451, 3456, 2504, 7692, 4452, 7693, 2692, 1885, 1495, 1731, 3973, # 5878
+ 2365, 4453, 7694, 2029, 7695, 7696, 3974, 2693, 1216, 237, 2581, 4174, 2319, 3975, 3802, 4454, # 5894
+ 4455, 2694, 3560, 3457, 445, 4456, 7697, 7698, 7699, 7700, 2761, 61, 3976, 3672, 1822, 3977, # 5910
+ 7701, 687, 2045, 935, 925, 405, 2660, 703, 1096, 1859, 2725, 4457, 3978, 1876, 1367, 2695, # 5926
+ 3352, 918, 2105, 1781, 2476, 334, 3287, 1611, 1093, 4458, 564, 3146, 3458, 3673, 3353, 945, # 5942
+ 2631, 2057, 4459, 7702, 1925, 872, 4175, 7703, 3459, 2696, 3089, 349, 4176, 3674, 3979, 4460, # 5958
+ 3803, 4177, 3675, 2155, 3980, 4461, 4462, 4178, 4463, 2403, 2046, 782, 3981, 400, 251, 4179, # 5974
+ 1624, 7704, 7705, 277, 3676, 299, 1265, 476, 1191, 3804, 2121, 4180, 4181, 1109, 205, 7706, # 5990
+ 2582, 1000, 2156, 3561, 1860, 7707, 7708, 7709, 4464, 7710, 4465, 2565, 107, 2477, 2157, 3982, # 6006
+ 3460, 3147, 7711, 1533, 541, 1301, 158, 753, 4182, 2872, 3562, 7712, 1696, 370, 1088, 4183, # 6022
+ 4466, 3563, 579, 327, 440, 162, 2240, 269, 1937, 1374, 3461, 968, 3043, 56, 1396, 3090, # 6038
+ 2106, 3288, 3354, 7713, 1926, 2158, 4467, 2998, 7714, 3564, 7715, 7716, 3677, 4468, 2478, 7717, # 6054
+ 2791, 7718, 1650, 4469, 7719, 2603, 7720, 7721, 3983, 2661, 3355, 1149, 3356, 3984, 3805, 3985, # 6070
+ 7722, 1076, 49, 7723, 951, 3211, 3289, 3290, 450, 2837, 920, 7724, 1811, 2792, 2366, 4184, # 6086
+ 1908, 1138, 2367, 3806, 3462, 7725, 3212, 4470, 1909, 1147, 1518, 2423, 4471, 3807, 7726, 4472, # 6102
+ 2388, 2604, 260, 1795, 3213, 7727, 7728, 3808, 3291, 708, 7729, 3565, 1704, 7730, 3566, 1351, # 6118
+ 1618, 3357, 2999, 1886, 944, 4185, 3358, 4186, 3044, 3359, 4187, 7731, 3678, 422, 413, 1714, # 6134
+ 3292, 500, 2058, 2345, 4188, 2479, 7732, 1344, 1910, 954, 7733, 1668, 7734, 7735, 3986, 2404, # 6150
+ 4189, 3567, 3809, 4190, 7736, 2302, 1318, 2505, 3091, 133, 3092, 2873, 4473, 629, 31, 2838, # 6166
+ 2697, 3810, 4474, 850, 949, 4475, 3987, 2955, 1732, 2088, 4191, 1496, 1852, 7737, 3988, 620, # 6182
+ 3214, 981, 1242, 3679, 3360, 1619, 3680, 1643, 3293, 2139, 2452, 1970, 1719, 3463, 2168, 7738, # 6198
+ 3215, 7739, 7740, 3361, 1828, 7741, 1277, 4476, 1565, 2047, 7742, 1636, 3568, 3093, 7743, 869, # 6214
+ 2839, 655, 3811, 3812, 3094, 3989, 3000, 3813, 1310, 3569, 4477, 7744, 7745, 7746, 1733, 558, # 6230
+ 4478, 3681, 335, 1549, 3045, 1756, 4192, 3682, 1945, 3464, 1829, 1291, 1192, 470, 2726, 2107, # 6246
+ 2793, 913, 1054, 3990, 7747, 1027, 7748, 3046, 3991, 4479, 982, 2662, 3362, 3148, 3465, 3216, # 6262
+ 3217, 1946, 2794, 7749, 571, 4480, 7750, 1830, 7751, 3570, 2583, 1523, 2424, 7752, 2089, 984, # 6278
+ 4481, 3683, 1959, 7753, 3684, 852, 923, 2795, 3466, 3685, 969, 1519, 999, 2048, 2320, 1705, # 6294
+ 7754, 3095, 615, 1662, 151, 597, 3992, 2405, 2321, 1049, 275, 4482, 3686, 4193, 568, 3687, # 6310
+ 3571, 2480, 4194, 3688, 7755, 2425, 2270, 409, 3218, 7756, 1566, 2874, 3467, 1002, 769, 2840, # 6326
+ 194, 2090, 3149, 3689, 2222, 3294, 4195, 628, 1505, 7757, 7758, 1763, 2177, 3001, 3993, 521, # 6342
+ 1161, 2584, 1787, 2203, 2406, 4483, 3994, 1625, 4196, 4197, 412, 42, 3096, 464, 7759, 2632, # 6358
+ 4484, 3363, 1760, 1571, 2875, 3468, 2530, 1219, 2204, 3814, 2633, 2140, 2368, 4485, 4486, 3295, # 6374
+ 1651, 3364, 3572, 7760, 7761, 3573, 2481, 3469, 7762, 3690, 7763, 7764, 2271, 2091, 460, 7765, # 6390
+ 4487, 7766, 3002, 962, 588, 3574, 289, 3219, 2634, 1116, 52, 7767, 3047, 1796, 7768, 7769, # 6406
+ 7770, 1467, 7771, 1598, 1143, 3691, 4198, 1984, 1734, 1067, 4488, 1280, 3365, 465, 4489, 1572, # 6422
+ 510, 7772, 1927, 2241, 1812, 1644, 3575, 7773, 4490, 3692, 7774, 7775, 2663, 1573, 1534, 7776, # 6438
+ 7777, 4199, 536, 1807, 1761, 3470, 3815, 3150, 2635, 7778, 7779, 7780, 4491, 3471, 2915, 1911, # 6454
+ 2796, 7781, 3296, 1122, 377, 3220, 7782, 360, 7783, 7784, 4200, 1529, 551, 7785, 2059, 3693, # 6470
+ 1769, 2426, 7786, 2916, 4201, 3297, 3097, 2322, 2108, 2030, 4492, 1404, 136, 1468, 1479, 672, # 6486
+ 1171, 3221, 2303, 271, 3151, 7787, 2762, 7788, 2049, 678, 2727, 865, 1947, 4493, 7789, 2013, # 6502
+ 3995, 2956, 7790, 2728, 2223, 1397, 3048, 3694, 4494, 4495, 1735, 2917, 3366, 3576, 7791, 3816, # 6518
+ 509, 2841, 2453, 2876, 3817, 7792, 7793, 3152, 3153, 4496, 4202, 2531, 4497, 2304, 1166, 1010, # 6534
+ 552, 681, 1887, 7794, 7795, 2957, 2958, 3996, 1287, 1596, 1861, 3154, 358, 453, 736, 175, # 6550
+ 478, 1117, 905, 1167, 1097, 7796, 1853, 1530, 7797, 1706, 7798, 2178, 3472, 2287, 3695, 3473, # 6566
+ 3577, 4203, 2092, 4204, 7799, 3367, 1193, 2482, 4205, 1458, 2190, 2205, 1862, 1888, 1421, 3298, # 6582
+ 2918, 3049, 2179, 3474, 595, 2122, 7800, 3997, 7801, 7802, 4206, 1707, 2636, 223, 3696, 1359, # 6598
+ 751, 3098, 183, 3475, 7803, 2797, 3003, 419, 2369, 633, 704, 3818, 2389, 241, 7804, 7805, # 6614
+ 7806, 838, 3004, 3697, 2272, 2763, 2454, 3819, 1938, 2050, 3998, 1309, 3099, 2242, 1181, 7807, # 6630
+ 1136, 2206, 3820, 2370, 1446, 4207, 2305, 4498, 7808, 7809, 4208, 1055, 2605, 484, 3698, 7810, # 6646
+ 3999, 625, 4209, 2273, 3368, 1499, 4210, 4000, 7811, 4001, 4211, 3222, 2274, 2275, 3476, 7812, # 6662
+ 7813, 2764, 808, 2606, 3699, 3369, 4002, 4212, 3100, 2532, 526, 3370, 3821, 4213, 955, 7814, # 6678
+ 1620, 4214, 2637, 2427, 7815, 1429, 3700, 1669, 1831, 994, 928, 7816, 3578, 1260, 7817, 7818, # 6694
+ 7819, 1948, 2288, 741, 2919, 1626, 4215, 2729, 2455, 867, 1184, 362, 3371, 1392, 7820, 7821, # 6710
+ 4003, 4216, 1770, 1736, 3223, 2920, 4499, 4500, 1928, 2698, 1459, 1158, 7822, 3050, 3372, 2877, # 6726
+ 1292, 1929, 2506, 2842, 3701, 1985, 1187, 2071, 2014, 2607, 4217, 7823, 2566, 2507, 2169, 3702, # 6742
+ 2483, 3299, 7824, 3703, 4501, 7825, 7826, 666, 1003, 3005, 1022, 3579, 4218, 7827, 4502, 1813, # 6758
+ 2253, 574, 3822, 1603, 295, 1535, 705, 3823, 4219, 283, 858, 417, 7828, 7829, 3224, 4503, # 6774
+ 4504, 3051, 1220, 1889, 1046, 2276, 2456, 4004, 1393, 1599, 689, 2567, 388, 4220, 7830, 2484, # 6790
+ 802, 7831, 2798, 3824, 2060, 1405, 2254, 7832, 4505, 3825, 2109, 1052, 1345, 3225, 1585, 7833, # 6806
+ 809, 7834, 7835, 7836, 575, 2730, 3477, 956, 1552, 1469, 1144, 2323, 7837, 2324, 1560, 2457, # 6822
+ 3580, 3226, 4005, 616, 2207, 3155, 2180, 2289, 7838, 1832, 7839, 3478, 4506, 7840, 1319, 3704, # 6838
+ 3705, 1211, 3581, 1023, 3227, 1293, 2799, 7841, 7842, 7843, 3826, 607, 2306, 3827, 762, 2878, # 6854
+ 1439, 4221, 1360, 7844, 1485, 3052, 7845, 4507, 1038, 4222, 1450, 2061, 2638, 4223, 1379, 4508, # 6870
+ 2585, 7846, 7847, 4224, 1352, 1414, 2325, 2921, 1172, 7848, 7849, 3828, 3829, 7850, 1797, 1451, # 6886
+ 7851, 7852, 7853, 7854, 2922, 4006, 4007, 2485, 2346, 411, 4008, 4009, 3582, 3300, 3101, 4509, # 6902
+ 1561, 2664, 1452, 4010, 1375, 7855, 7856, 47, 2959, 316, 7857, 1406, 1591, 2923, 3156, 7858, # 6918
+ 1025, 2141, 3102, 3157, 354, 2731, 884, 2224, 4225, 2407, 508, 3706, 726, 3583, 996, 2428, # 6934
+ 3584, 729, 7859, 392, 2191, 1453, 4011, 4510, 3707, 7860, 7861, 2458, 3585, 2608, 1675, 2800, # 6950
+ 919, 2347, 2960, 2348, 1270, 4511, 4012, 73, 7862, 7863, 647, 7864, 3228, 2843, 2255, 1550, # 6966
+ 1346, 3006, 7865, 1332, 883, 3479, 7866, 7867, 7868, 7869, 3301, 2765, 7870, 1212, 831, 1347, # 6982
+ 4226, 4512, 2326, 3830, 1863, 3053, 720, 3831, 4513, 4514, 3832, 7871, 4227, 7872, 7873, 4515, # 6998
+ 7874, 7875, 1798, 4516, 3708, 2609, 4517, 3586, 1645, 2371, 7876, 7877, 2924, 669, 2208, 2665, # 7014
+ 2429, 7878, 2879, 7879, 7880, 1028, 3229, 7881, 4228, 2408, 7882, 2256, 1353, 7883, 7884, 4518, # 7030
+ 3158, 518, 7885, 4013, 7886, 4229, 1960, 7887, 2142, 4230, 7888, 7889, 3007, 2349, 2350, 3833, # 7046
+ 516, 1833, 1454, 4014, 2699, 4231, 4519, 2225, 2610, 1971, 1129, 3587, 7890, 2766, 7891, 2961, # 7062
+ 1422, 577, 1470, 3008, 1524, 3373, 7892, 7893, 432, 4232, 3054, 3480, 7894, 2586, 1455, 2508, # 7078
+ 2226, 1972, 1175, 7895, 1020, 2732, 4015, 3481, 4520, 7896, 2733, 7897, 1743, 1361, 3055, 3482, # 7094
+ 2639, 4016, 4233, 4521, 2290, 895, 924, 4234, 2170, 331, 2243, 3056, 166, 1627, 3057, 1098, # 7110
+ 7898, 1232, 2880, 2227, 3374, 4522, 657, 403, 1196, 2372, 542, 3709, 3375, 1600, 4235, 3483, # 7126
+ 7899, 4523, 2767, 3230, 576, 530, 1362, 7900, 4524, 2533, 2666, 3710, 4017, 7901, 842, 3834, # 7142
+ 7902, 2801, 2031, 1014, 4018, 213, 2700, 3376, 665, 621, 4236, 7903, 3711, 2925, 2430, 7904, # 7158
+ 2431, 3302, 3588, 3377, 7905, 4237, 2534, 4238, 4525, 3589, 1682, 4239, 3484, 1380, 7906, 724, # 7174
+ 2277, 600, 1670, 7907, 1337, 1233, 4526, 3103, 2244, 7908, 1621, 4527, 7909, 651, 4240, 7910, # 7190
+ 1612, 4241, 2611, 7911, 2844, 7912, 2734, 2307, 3058, 7913, 716, 2459, 3059, 174, 1255, 2701, # 7206
+ 4019, 3590, 548, 1320, 1398, 728, 4020, 1574, 7914, 1890, 1197, 3060, 4021, 7915, 3061, 3062, # 7222
+ 3712, 3591, 3713, 747, 7916, 635, 4242, 4528, 7917, 7918, 7919, 4243, 7920, 7921, 4529, 7922, # 7238
+ 3378, 4530, 2432, 451, 7923, 3714, 2535, 2072, 4244, 2735, 4245, 4022, 7924, 1764, 4531, 7925, # 7254
+ 4246, 350, 7926, 2278, 2390, 2486, 7927, 4247, 4023, 2245, 1434, 4024, 488, 4532, 458, 4248, # 7270
+ 4025, 3715, 771, 1330, 2391, 3835, 2568, 3159, 2159, 2409, 1553, 2667, 3160, 4249, 7928, 2487, # 7286
+ 2881, 2612, 1720, 2702, 4250, 3379, 4533, 7929, 2536, 4251, 7930, 3231, 4252, 2768, 7931, 2015, # 7302
+ 2736, 7932, 1155, 1017, 3716, 3836, 7933, 3303, 2308, 201, 1864, 4253, 1430, 7934, 4026, 7935, # 7318
+ 7936, 7937, 7938, 7939, 4254, 1604, 7940, 414, 1865, 371, 2587, 4534, 4535, 3485, 2016, 3104, # 7334
+ 4536, 1708, 960, 4255, 887, 389, 2171, 1536, 1663, 1721, 7941, 2228, 4027, 2351, 2926, 1580, # 7350
+ 7942, 7943, 7944, 1744, 7945, 2537, 4537, 4538, 7946, 4539, 7947, 2073, 7948, 7949, 3592, 3380, # 7366
+ 2882, 4256, 7950, 4257, 2640, 3381, 2802, 673, 2703, 2460, 709, 3486, 4028, 3593, 4258, 7951, # 7382
+ 1148, 502, 634, 7952, 7953, 1204, 4540, 3594, 1575, 4541, 2613, 3717, 7954, 3718, 3105, 948, # 7398
+ 3232, 121, 1745, 3837, 1110, 7955, 4259, 3063, 2509, 3009, 4029, 3719, 1151, 1771, 3838, 1488, # 7414
+ 4030, 1986, 7956, 2433, 3487, 7957, 7958, 2093, 7959, 4260, 3839, 1213, 1407, 2803, 531, 2737, # 7430
+ 2538, 3233, 1011, 1537, 7960, 2769, 4261, 3106, 1061, 7961, 3720, 3721, 1866, 2883, 7962, 2017, # 7446
+ 120, 4262, 4263, 2062, 3595, 3234, 2309, 3840, 2668, 3382, 1954, 4542, 7963, 7964, 3488, 1047, # 7462
+ 2704, 1266, 7965, 1368, 4543, 2845, 649, 3383, 3841, 2539, 2738, 1102, 2846, 2669, 7966, 7967, # 7478
+ 1999, 7968, 1111, 3596, 2962, 7969, 2488, 3842, 3597, 2804, 1854, 3384, 3722, 7970, 7971, 3385, # 7494
+ 2410, 2884, 3304, 3235, 3598, 7972, 2569, 7973, 3599, 2805, 4031, 1460, 856, 7974, 3600, 7975, # 7510
+ 2885, 2963, 7976, 2886, 3843, 7977, 4264, 632, 2510, 875, 3844, 1697, 3845, 2291, 7978, 7979, # 7526
+ 4544, 3010, 1239, 580, 4545, 4265, 7980, 914, 936, 2074, 1190, 4032, 1039, 2123, 7981, 7982, # 7542
+ 7983, 3386, 1473, 7984, 1354, 4266, 3846, 7985, 2172, 3064, 4033, 915, 3305, 4267, 4268, 3306, # 7558
+ 1605, 1834, 7986, 2739, 398, 3601, 4269, 3847, 4034, 328, 1912, 2847, 4035, 3848, 1331, 4270, # 7574
+ 3011, 937, 4271, 7987, 3602, 4036, 4037, 3387, 2160, 4546, 3388, 524, 742, 538, 3065, 1012, # 7590
+ 7988, 7989, 3849, 2461, 7990, 658, 1103, 225, 3850, 7991, 7992, 4547, 7993, 4548, 7994, 3236, # 7606
+ 1243, 7995, 4038, 963, 2246, 4549, 7996, 2705, 3603, 3161, 7997, 7998, 2588, 2327, 7999, 4550, # 7622
+ 8000, 8001, 8002, 3489, 3307, 957, 3389, 2540, 2032, 1930, 2927, 2462, 870, 2018, 3604, 1746, # 7638
+ 2770, 2771, 2434, 2463, 8003, 3851, 8004, 3723, 3107, 3724, 3490, 3390, 3725, 8005, 1179, 3066, # 7654
+ 8006, 3162, 2373, 4272, 3726, 2541, 3163, 3108, 2740, 4039, 8007, 3391, 1556, 2542, 2292, 977, # 7670
+ 2887, 2033, 4040, 1205, 3392, 8008, 1765, 3393, 3164, 2124, 1271, 1689, 714, 4551, 3491, 8009, # 7686
+ 2328, 3852, 533, 4273, 3605, 2181, 617, 8010, 2464, 3308, 3492, 2310, 8011, 8012, 3165, 8013, # 7702
+ 8014, 3853, 1987, 618, 427, 2641, 3493, 3394, 8015, 8016, 1244, 1690, 8017, 2806, 4274, 4552, # 7718
+ 8018, 3494, 8019, 8020, 2279, 1576, 473, 3606, 4275, 3395, 972, 8021, 3607, 8022, 3067, 8023, # 7734
+ 8024, 4553, 4554, 8025, 3727, 4041, 4042, 8026, 153, 4555, 356, 8027, 1891, 2888, 4276, 2143, # 7750
+ 408, 803, 2352, 8028, 3854, 8029, 4277, 1646, 2570, 2511, 4556, 4557, 3855, 8030, 3856, 4278, # 7766
+ 8031, 2411, 3396, 752, 8032, 8033, 1961, 2964, 8034, 746, 3012, 2465, 8035, 4279, 3728, 698, # 7782
+ 4558, 1892, 4280, 3608, 2543, 4559, 3609, 3857, 8036, 3166, 3397, 8037, 1823, 1302, 4043, 2706, # 7798
+ 3858, 1973, 4281, 8038, 4282, 3167, 823, 1303, 1288, 1236, 2848, 3495, 4044, 3398, 774, 3859, # 7814
+ 8039, 1581, 4560, 1304, 2849, 3860, 4561, 8040, 2435, 2161, 1083, 3237, 4283, 4045, 4284, 344, # 7830
+ 1173, 288, 2311, 454, 1683, 8041, 8042, 1461, 4562, 4046, 2589, 8043, 8044, 4563, 985, 894, # 7846
+ 8045, 3399, 3168, 8046, 1913, 2928, 3729, 1988, 8047, 2110, 1974, 8048, 4047, 8049, 2571, 1194, # 7862
+ 425, 8050, 4564, 3169, 1245, 3730, 4285, 8051, 8052, 2850, 8053, 636, 4565, 1855, 3861, 760, # 7878
+ 1799, 8054, 4286, 2209, 1508, 4566, 4048, 1893, 1684, 2293, 8055, 8056, 8057, 4287, 4288, 2210, # 7894
+ 479, 8058, 8059, 832, 8060, 4049, 2489, 8061, 2965, 2490, 3731, 990, 3109, 627, 1814, 2642, # 7910
+ 4289, 1582, 4290, 2125, 2111, 3496, 4567, 8062, 799, 4291, 3170, 8063, 4568, 2112, 1737, 3013, # 7926
+ 1018, 543, 754, 4292, 3309, 1676, 4569, 4570, 4050, 8064, 1489, 8065, 3497, 8066, 2614, 2889, # 7942
+ 4051, 8067, 8068, 2966, 8069, 8070, 8071, 8072, 3171, 4571, 4572, 2182, 1722, 8073, 3238, 3239, # 7958
+ 1842, 3610, 1715, 481, 365, 1975, 1856, 8074, 8075, 1962, 2491, 4573, 8076, 2126, 3611, 3240, # 7974
+ 433, 1894, 2063, 2075, 8077, 602, 2741, 8078, 8079, 8080, 8081, 8082, 3014, 1628, 3400, 8083, # 7990
+ 3172, 4574, 4052, 2890, 4575, 2512, 8084, 2544, 2772, 8085, 8086, 8087, 3310, 4576, 2891, 8088, # 8006
+ 4577, 8089, 2851, 4578, 4579, 1221, 2967, 4053, 2513, 8090, 8091, 8092, 1867, 1989, 8093, 8094, # 8022
+ 8095, 1895, 8096, 8097, 4580, 1896, 4054, 318, 8098, 2094, 4055, 4293, 8099, 8100, 485, 8101, # 8038
+ 938, 3862, 553, 2670, 116, 8102, 3863, 3612, 8103, 3498, 2671, 2773, 3401, 3311, 2807, 8104, # 8054
+ 3613, 2929, 4056, 1747, 2930, 2968, 8105, 8106, 207, 8107, 8108, 2672, 4581, 2514, 8109, 3015, # 8070
+ 890, 3614, 3864, 8110, 1877, 3732, 3402, 8111, 2183, 2353, 3403, 1652, 8112, 8113, 8114, 941, # 8086
+ 2294, 208, 3499, 4057, 2019, 330, 4294, 3865, 2892, 2492, 3733, 4295, 8115, 8116, 8117, 8118, # 8102
)
-
+# fmt: on
diff --git a/libs/chardet/euctwprober.py b/libs/chardet/euctwprober.py
index 35669cc4d..ca10a23ca 100644
--- a/libs/chardet/euctwprober.py
+++ b/libs/chardet/euctwprober.py
@@ -25,14 +25,15 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
from .chardistribution import EUCTWDistributionAnalysis
+from .codingstatemachine import CodingStateMachine
+from .mbcharsetprober import MultiByteCharSetProber
from .mbcssm import EUCTW_SM_MODEL
+
class EUCTWProber(MultiByteCharSetProber):
def __init__(self):
- super(EUCTWProber, self).__init__()
+ super().__init__()
self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
self.distribution_analyzer = EUCTWDistributionAnalysis()
self.reset()
diff --git a/libs/chardet/gb2312freq.py b/libs/chardet/gb2312freq.py
index 697837bd9..b32bfc742 100644
--- a/libs/chardet/gb2312freq.py
+++ b/libs/chardet/gb2312freq.py
@@ -43,6 +43,7 @@ GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9
GB2312_TABLE_SIZE = 3760
+# fmt: off
GB2312_CHAR_TO_FREQ_ORDER = (
1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
@@ -280,4 +281,4 @@ GB2312_CHAR_TO_FREQ_ORDER = (
381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189,
852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, #last 512
)
-
+# fmt: on
diff --git a/libs/chardet/gb2312prober.py b/libs/chardet/gb2312prober.py
index 8446d2dd9..251c04295 100644
--- a/libs/chardet/gb2312prober.py
+++ b/libs/chardet/gb2312prober.py
@@ -25,14 +25,15 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
from .chardistribution import GB2312DistributionAnalysis
+from .codingstatemachine import CodingStateMachine
+from .mbcharsetprober import MultiByteCharSetProber
from .mbcssm import GB2312_SM_MODEL
+
class GB2312Prober(MultiByteCharSetProber):
def __init__(self):
- super(GB2312Prober, self).__init__()
+ super().__init__()
self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
self.distribution_analyzer = GB2312DistributionAnalysis()
self.reset()
diff --git a/libs/chardet/hebrewprober.py b/libs/chardet/hebrewprober.py
index b0e1bf492..3ca634bf3 100644
--- a/libs/chardet/hebrewprober.py
+++ b/libs/chardet/hebrewprober.py
@@ -125,18 +125,19 @@ from .enums import ProbingState
# model probers scores. The answer is returned in the form of the name of the
# charset identified, either "windows-1255" or "ISO-8859-8".
+
class HebrewProber(CharSetProber):
# windows-1255 / ISO-8859-8 code points of interest
- FINAL_KAF = 0xea
- NORMAL_KAF = 0xeb
- FINAL_MEM = 0xed
- NORMAL_MEM = 0xee
- FINAL_NUN = 0xef
- NORMAL_NUN = 0xf0
- FINAL_PE = 0xf3
- NORMAL_PE = 0xf4
- FINAL_TSADI = 0xf5
- NORMAL_TSADI = 0xf6
+ FINAL_KAF = 0xEA
+ NORMAL_KAF = 0xEB
+ FINAL_MEM = 0xED
+ NORMAL_MEM = 0xEE
+ FINAL_NUN = 0xEF
+ NORMAL_NUN = 0xF0
+ FINAL_PE = 0xF3
+ NORMAL_PE = 0xF4
+ FINAL_TSADI = 0xF5
+ NORMAL_TSADI = 0xF6
# Minimum Visual vs Logical final letter score difference.
# If the difference is below this, don't rely solely on the final letter score
@@ -152,7 +153,7 @@ class HebrewProber(CharSetProber):
LOGICAL_HEBREW_NAME = "windows-1255"
def __init__(self):
- super(HebrewProber, self).__init__()
+ super().__init__()
self._final_char_logical_score = None
self._final_char_visual_score = None
self._prev = None
@@ -167,17 +168,22 @@ class HebrewProber(CharSetProber):
# The two last characters seen in the previous buffer,
# mPrev and mBeforePrev are initialized to space in order to simulate
# a word delimiter at the beginning of the data
- self._prev = ' '
- self._before_prev = ' '
+ self._prev = " "
+ self._before_prev = " "
# These probers are owned by the group prober.
- def set_model_probers(self, logicalProber, visualProber):
- self._logical_prober = logicalProber
- self._visual_prober = visualProber
+ def set_model_probers(self, logical_prober, visual_prober):
+ self._logical_prober = logical_prober
+ self._visual_prober = visual_prober
def is_final(self, c):
- return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN,
- self.FINAL_PE, self.FINAL_TSADI]
+ return c in [
+ self.FINAL_KAF,
+ self.FINAL_MEM,
+ self.FINAL_NUN,
+ self.FINAL_PE,
+ self.FINAL_TSADI,
+ ]
def is_non_final(self, c):
# The normal Tsadi is not a good Non-Final letter due to words like
@@ -190,8 +196,7 @@ class HebrewProber(CharSetProber):
# for example legally end with a Non-Final Pe or Kaf. However, the
# benefit of these letters as Non-Final letters outweighs the damage
# since these words are quite rare.
- return c in [self.NORMAL_KAF, self.NORMAL_MEM,
- self.NORMAL_NUN, self.NORMAL_PE]
+ return c in [self.NORMAL_KAF, self.NORMAL_MEM, self.NORMAL_NUN, self.NORMAL_PE]
def feed(self, byte_str):
# Final letter analysis for logical-visual decision.
@@ -227,9 +232,9 @@ class HebrewProber(CharSetProber):
byte_str = self.filter_high_byte_only(byte_str)
for cur in byte_str:
- if cur == ' ':
+ if cur == " ":
# We stand on a space - a word just ended
- if self._before_prev != ' ':
+ if self._before_prev != " ":
# next-to-last char was not a space so self._prev is not a
# 1 letter word
if self.is_final(self._prev):
@@ -241,8 +246,11 @@ class HebrewProber(CharSetProber):
self._final_char_visual_score += 1
else:
# Not standing on a space
- if ((self._before_prev == ' ') and
- (self.is_final(self._prev)) and (cur != ' ')):
+ if (
+ (self._before_prev == " ")
+ and (self.is_final(self._prev))
+ and (cur != " ")
+ ):
# case (3) [-2:space][-1:final letter][cur:not space]
self._final_char_visual_score += 1
self._before_prev = self._prev
@@ -263,8 +271,9 @@ class HebrewProber(CharSetProber):
return self.VISUAL_HEBREW_NAME
# It's not dominant enough, try to rely on the model scores instead.
- modelsub = (self._logical_prober.get_confidence()
- - self._visual_prober.get_confidence())
+ modelsub = (
+ self._logical_prober.get_confidence() - self._visual_prober.get_confidence()
+ )
if modelsub > self.MIN_MODEL_DISTANCE:
return self.LOGICAL_HEBREW_NAME
if modelsub < -self.MIN_MODEL_DISTANCE:
@@ -281,12 +290,13 @@ class HebrewProber(CharSetProber):
@property
def language(self):
- return 'Hebrew'
+ return "Hebrew"
@property
def state(self):
# Remain active as long as any of the model probers are active.
- if (self._logical_prober.state == ProbingState.NOT_ME) and \
- (self._visual_prober.state == ProbingState.NOT_ME):
+ if (self._logical_prober.state == ProbingState.NOT_ME) and (
+ self._visual_prober.state == ProbingState.NOT_ME
+ ):
return ProbingState.NOT_ME
return ProbingState.DETECTING
diff --git a/libs/chardet/jisfreq.py b/libs/chardet/jisfreq.py
index 83fc082b5..3293576e0 100644
--- a/libs/chardet/jisfreq.py
+++ b/libs/chardet/jisfreq.py
@@ -46,6 +46,7 @@ JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0
# Char to FreqOrder table ,
JIS_TABLE_SIZE = 4368
+# fmt: off
JIS_CHAR_TO_FREQ_ORDER = (
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32
@@ -321,5 +322,4 @@ JIS_CHAR_TO_FREQ_ORDER = (
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512
)
-
-
+# fmt: on
diff --git a/libs/chardet/johabfreq.py b/libs/chardet/johabfreq.py
new file mode 100644
index 000000000..c12969990
--- /dev/null
+++ b/libs/chardet/johabfreq.py
@@ -0,0 +1,2382 @@
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+# The frequency data itself is the same as euc-kr.
+# This is just a mapping table to euc-kr.
+
+JOHAB_TO_EUCKR_ORDER_TABLE = {
+ 0x8861: 0,
+ 0x8862: 1,
+ 0x8865: 2,
+ 0x8868: 3,
+ 0x8869: 4,
+ 0x886A: 5,
+ 0x886B: 6,
+ 0x8871: 7,
+ 0x8873: 8,
+ 0x8874: 9,
+ 0x8875: 10,
+ 0x8876: 11,
+ 0x8877: 12,
+ 0x8878: 13,
+ 0x8879: 14,
+ 0x887B: 15,
+ 0x887C: 16,
+ 0x887D: 17,
+ 0x8881: 18,
+ 0x8882: 19,
+ 0x8885: 20,
+ 0x8889: 21,
+ 0x8891: 22,
+ 0x8893: 23,
+ 0x8895: 24,
+ 0x8896: 25,
+ 0x8897: 26,
+ 0x88A1: 27,
+ 0x88A2: 28,
+ 0x88A5: 29,
+ 0x88A9: 30,
+ 0x88B5: 31,
+ 0x88B7: 32,
+ 0x88C1: 33,
+ 0x88C5: 34,
+ 0x88C9: 35,
+ 0x88E1: 36,
+ 0x88E2: 37,
+ 0x88E5: 38,
+ 0x88E8: 39,
+ 0x88E9: 40,
+ 0x88EB: 41,
+ 0x88F1: 42,
+ 0x88F3: 43,
+ 0x88F5: 44,
+ 0x88F6: 45,
+ 0x88F7: 46,
+ 0x88F8: 47,
+ 0x88FB: 48,
+ 0x88FC: 49,
+ 0x88FD: 50,
+ 0x8941: 51,
+ 0x8945: 52,
+ 0x8949: 53,
+ 0x8951: 54,
+ 0x8953: 55,
+ 0x8955: 56,
+ 0x8956: 57,
+ 0x8957: 58,
+ 0x8961: 59,
+ 0x8962: 60,
+ 0x8963: 61,
+ 0x8965: 62,
+ 0x8968: 63,
+ 0x8969: 64,
+ 0x8971: 65,
+ 0x8973: 66,
+ 0x8975: 67,
+ 0x8976: 68,
+ 0x8977: 69,
+ 0x897B: 70,
+ 0x8981: 71,
+ 0x8985: 72,
+ 0x8989: 73,
+ 0x8993: 74,
+ 0x8995: 75,
+ 0x89A1: 76,
+ 0x89A2: 77,
+ 0x89A5: 78,
+ 0x89A8: 79,
+ 0x89A9: 80,
+ 0x89AB: 81,
+ 0x89AD: 82,
+ 0x89B0: 83,
+ 0x89B1: 84,
+ 0x89B3: 85,
+ 0x89B5: 86,
+ 0x89B7: 87,
+ 0x89B8: 88,
+ 0x89C1: 89,
+ 0x89C2: 90,
+ 0x89C5: 91,
+ 0x89C9: 92,
+ 0x89CB: 93,
+ 0x89D1: 94,
+ 0x89D3: 95,
+ 0x89D5: 96,
+ 0x89D7: 97,
+ 0x89E1: 98,
+ 0x89E5: 99,
+ 0x89E9: 100,
+ 0x89F3: 101,
+ 0x89F6: 102,
+ 0x89F7: 103,
+ 0x8A41: 104,
+ 0x8A42: 105,
+ 0x8A45: 106,
+ 0x8A49: 107,
+ 0x8A51: 108,
+ 0x8A53: 109,
+ 0x8A55: 110,
+ 0x8A57: 111,
+ 0x8A61: 112,
+ 0x8A65: 113,
+ 0x8A69: 114,
+ 0x8A73: 115,
+ 0x8A75: 116,
+ 0x8A81: 117,
+ 0x8A82: 118,
+ 0x8A85: 119,
+ 0x8A88: 120,
+ 0x8A89: 121,
+ 0x8A8A: 122,
+ 0x8A8B: 123,
+ 0x8A90: 124,
+ 0x8A91: 125,
+ 0x8A93: 126,
+ 0x8A95: 127,
+ 0x8A97: 128,
+ 0x8A98: 129,
+ 0x8AA1: 130,
+ 0x8AA2: 131,
+ 0x8AA5: 132,
+ 0x8AA9: 133,
+ 0x8AB6: 134,
+ 0x8AB7: 135,
+ 0x8AC1: 136,
+ 0x8AD5: 137,
+ 0x8AE1: 138,
+ 0x8AE2: 139,
+ 0x8AE5: 140,
+ 0x8AE9: 141,
+ 0x8AF1: 142,
+ 0x8AF3: 143,
+ 0x8AF5: 144,
+ 0x8B41: 145,
+ 0x8B45: 146,
+ 0x8B49: 147,
+ 0x8B61: 148,
+ 0x8B62: 149,
+ 0x8B65: 150,
+ 0x8B68: 151,
+ 0x8B69: 152,
+ 0x8B6A: 153,
+ 0x8B71: 154,
+ 0x8B73: 155,
+ 0x8B75: 156,
+ 0x8B77: 157,
+ 0x8B81: 158,
+ 0x8BA1: 159,
+ 0x8BA2: 160,
+ 0x8BA5: 161,
+ 0x8BA8: 162,
+ 0x8BA9: 163,
+ 0x8BAB: 164,
+ 0x8BB1: 165,
+ 0x8BB3: 166,
+ 0x8BB5: 167,
+ 0x8BB7: 168,
+ 0x8BB8: 169,
+ 0x8BBC: 170,
+ 0x8C61: 171,
+ 0x8C62: 172,
+ 0x8C63: 173,
+ 0x8C65: 174,
+ 0x8C69: 175,
+ 0x8C6B: 176,
+ 0x8C71: 177,
+ 0x8C73: 178,
+ 0x8C75: 179,
+ 0x8C76: 180,
+ 0x8C77: 181,
+ 0x8C7B: 182,
+ 0x8C81: 183,
+ 0x8C82: 184,
+ 0x8C85: 185,
+ 0x8C89: 186,
+ 0x8C91: 187,
+ 0x8C93: 188,
+ 0x8C95: 189,
+ 0x8C96: 190,
+ 0x8C97: 191,
+ 0x8CA1: 192,
+ 0x8CA2: 193,
+ 0x8CA9: 194,
+ 0x8CE1: 195,
+ 0x8CE2: 196,
+ 0x8CE3: 197,
+ 0x8CE5: 198,
+ 0x8CE9: 199,
+ 0x8CF1: 200,
+ 0x8CF3: 201,
+ 0x8CF5: 202,
+ 0x8CF6: 203,
+ 0x8CF7: 204,
+ 0x8D41: 205,
+ 0x8D42: 206,
+ 0x8D45: 207,
+ 0x8D51: 208,
+ 0x8D55: 209,
+ 0x8D57: 210,
+ 0x8D61: 211,
+ 0x8D65: 212,
+ 0x8D69: 213,
+ 0x8D75: 214,
+ 0x8D76: 215,
+ 0x8D7B: 216,
+ 0x8D81: 217,
+ 0x8DA1: 218,
+ 0x8DA2: 219,
+ 0x8DA5: 220,
+ 0x8DA7: 221,
+ 0x8DA9: 222,
+ 0x8DB1: 223,
+ 0x8DB3: 224,
+ 0x8DB5: 225,
+ 0x8DB7: 226,
+ 0x8DB8: 227,
+ 0x8DB9: 228,
+ 0x8DC1: 229,
+ 0x8DC2: 230,
+ 0x8DC9: 231,
+ 0x8DD6: 232,
+ 0x8DD7: 233,
+ 0x8DE1: 234,
+ 0x8DE2: 235,
+ 0x8DF7: 236,
+ 0x8E41: 237,
+ 0x8E45: 238,
+ 0x8E49: 239,
+ 0x8E51: 240,
+ 0x8E53: 241,
+ 0x8E57: 242,
+ 0x8E61: 243,
+ 0x8E81: 244,
+ 0x8E82: 245,
+ 0x8E85: 246,
+ 0x8E89: 247,
+ 0x8E90: 248,
+ 0x8E91: 249,
+ 0x8E93: 250,
+ 0x8E95: 251,
+ 0x8E97: 252,
+ 0x8E98: 253,
+ 0x8EA1: 254,
+ 0x8EA9: 255,
+ 0x8EB6: 256,
+ 0x8EB7: 257,
+ 0x8EC1: 258,
+ 0x8EC2: 259,
+ 0x8EC5: 260,
+ 0x8EC9: 261,
+ 0x8ED1: 262,
+ 0x8ED3: 263,
+ 0x8ED6: 264,
+ 0x8EE1: 265,
+ 0x8EE5: 266,
+ 0x8EE9: 267,
+ 0x8EF1: 268,
+ 0x8EF3: 269,
+ 0x8F41: 270,
+ 0x8F61: 271,
+ 0x8F62: 272,
+ 0x8F65: 273,
+ 0x8F67: 274,
+ 0x8F69: 275,
+ 0x8F6B: 276,
+ 0x8F70: 277,
+ 0x8F71: 278,
+ 0x8F73: 279,
+ 0x8F75: 280,
+ 0x8F77: 281,
+ 0x8F7B: 282,
+ 0x8FA1: 283,
+ 0x8FA2: 284,
+ 0x8FA5: 285,
+ 0x8FA9: 286,
+ 0x8FB1: 287,
+ 0x8FB3: 288,
+ 0x8FB5: 289,
+ 0x8FB7: 290,
+ 0x9061: 291,
+ 0x9062: 292,
+ 0x9063: 293,
+ 0x9065: 294,
+ 0x9068: 295,
+ 0x9069: 296,
+ 0x906A: 297,
+ 0x906B: 298,
+ 0x9071: 299,
+ 0x9073: 300,
+ 0x9075: 301,
+ 0x9076: 302,
+ 0x9077: 303,
+ 0x9078: 304,
+ 0x9079: 305,
+ 0x907B: 306,
+ 0x907D: 307,
+ 0x9081: 308,
+ 0x9082: 309,
+ 0x9085: 310,
+ 0x9089: 311,
+ 0x9091: 312,
+ 0x9093: 313,
+ 0x9095: 314,
+ 0x9096: 315,
+ 0x9097: 316,
+ 0x90A1: 317,
+ 0x90A2: 318,
+ 0x90A5: 319,
+ 0x90A9: 320,
+ 0x90B1: 321,
+ 0x90B7: 322,
+ 0x90E1: 323,
+ 0x90E2: 324,
+ 0x90E4: 325,
+ 0x90E5: 326,
+ 0x90E9: 327,
+ 0x90EB: 328,
+ 0x90EC: 329,
+ 0x90F1: 330,
+ 0x90F3: 331,
+ 0x90F5: 332,
+ 0x90F6: 333,
+ 0x90F7: 334,
+ 0x90FD: 335,
+ 0x9141: 336,
+ 0x9142: 337,
+ 0x9145: 338,
+ 0x9149: 339,
+ 0x9151: 340,
+ 0x9153: 341,
+ 0x9155: 342,
+ 0x9156: 343,
+ 0x9157: 344,
+ 0x9161: 345,
+ 0x9162: 346,
+ 0x9165: 347,
+ 0x9169: 348,
+ 0x9171: 349,
+ 0x9173: 350,
+ 0x9176: 351,
+ 0x9177: 352,
+ 0x917A: 353,
+ 0x9181: 354,
+ 0x9185: 355,
+ 0x91A1: 356,
+ 0x91A2: 357,
+ 0x91A5: 358,
+ 0x91A9: 359,
+ 0x91AB: 360,
+ 0x91B1: 361,
+ 0x91B3: 362,
+ 0x91B5: 363,
+ 0x91B7: 364,
+ 0x91BC: 365,
+ 0x91BD: 366,
+ 0x91C1: 367,
+ 0x91C5: 368,
+ 0x91C9: 369,
+ 0x91D6: 370,
+ 0x9241: 371,
+ 0x9245: 372,
+ 0x9249: 373,
+ 0x9251: 374,
+ 0x9253: 375,
+ 0x9255: 376,
+ 0x9261: 377,
+ 0x9262: 378,
+ 0x9265: 379,
+ 0x9269: 380,
+ 0x9273: 381,
+ 0x9275: 382,
+ 0x9277: 383,
+ 0x9281: 384,
+ 0x9282: 385,
+ 0x9285: 386,
+ 0x9288: 387,
+ 0x9289: 388,
+ 0x9291: 389,
+ 0x9293: 390,
+ 0x9295: 391,
+ 0x9297: 392,
+ 0x92A1: 393,
+ 0x92B6: 394,
+ 0x92C1: 395,
+ 0x92E1: 396,
+ 0x92E5: 397,
+ 0x92E9: 398,
+ 0x92F1: 399,
+ 0x92F3: 400,
+ 0x9341: 401,
+ 0x9342: 402,
+ 0x9349: 403,
+ 0x9351: 404,
+ 0x9353: 405,
+ 0x9357: 406,
+ 0x9361: 407,
+ 0x9362: 408,
+ 0x9365: 409,
+ 0x9369: 410,
+ 0x936A: 411,
+ 0x936B: 412,
+ 0x9371: 413,
+ 0x9373: 414,
+ 0x9375: 415,
+ 0x9377: 416,
+ 0x9378: 417,
+ 0x937C: 418,
+ 0x9381: 419,
+ 0x9385: 420,
+ 0x9389: 421,
+ 0x93A1: 422,
+ 0x93A2: 423,
+ 0x93A5: 424,
+ 0x93A9: 425,
+ 0x93AB: 426,
+ 0x93B1: 427,
+ 0x93B3: 428,
+ 0x93B5: 429,
+ 0x93B7: 430,
+ 0x93BC: 431,
+ 0x9461: 432,
+ 0x9462: 433,
+ 0x9463: 434,
+ 0x9465: 435,
+ 0x9468: 436,
+ 0x9469: 437,
+ 0x946A: 438,
+ 0x946B: 439,
+ 0x946C: 440,
+ 0x9470: 441,
+ 0x9471: 442,
+ 0x9473: 443,
+ 0x9475: 444,
+ 0x9476: 445,
+ 0x9477: 446,
+ 0x9478: 447,
+ 0x9479: 448,
+ 0x947D: 449,
+ 0x9481: 450,
+ 0x9482: 451,
+ 0x9485: 452,
+ 0x9489: 453,
+ 0x9491: 454,
+ 0x9493: 455,
+ 0x9495: 456,
+ 0x9496: 457,
+ 0x9497: 458,
+ 0x94A1: 459,
+ 0x94E1: 460,
+ 0x94E2: 461,
+ 0x94E3: 462,
+ 0x94E5: 463,
+ 0x94E8: 464,
+ 0x94E9: 465,
+ 0x94EB: 466,
+ 0x94EC: 467,
+ 0x94F1: 468,
+ 0x94F3: 469,
+ 0x94F5: 470,
+ 0x94F7: 471,
+ 0x94F9: 472,
+ 0x94FC: 473,
+ 0x9541: 474,
+ 0x9542: 475,
+ 0x9545: 476,
+ 0x9549: 477,
+ 0x9551: 478,
+ 0x9553: 479,
+ 0x9555: 480,
+ 0x9556: 481,
+ 0x9557: 482,
+ 0x9561: 483,
+ 0x9565: 484,
+ 0x9569: 485,
+ 0x9576: 486,
+ 0x9577: 487,
+ 0x9581: 488,
+ 0x9585: 489,
+ 0x95A1: 490,
+ 0x95A2: 491,
+ 0x95A5: 492,
+ 0x95A8: 493,
+ 0x95A9: 494,
+ 0x95AB: 495,
+ 0x95AD: 496,
+ 0x95B1: 497,
+ 0x95B3: 498,
+ 0x95B5: 499,
+ 0x95B7: 500,
+ 0x95B9: 501,
+ 0x95BB: 502,
+ 0x95C1: 503,
+ 0x95C5: 504,
+ 0x95C9: 505,
+ 0x95E1: 506,
+ 0x95F6: 507,
+ 0x9641: 508,
+ 0x9645: 509,
+ 0x9649: 510,
+ 0x9651: 511,
+ 0x9653: 512,
+ 0x9655: 513,
+ 0x9661: 514,
+ 0x9681: 515,
+ 0x9682: 516,
+ 0x9685: 517,
+ 0x9689: 518,
+ 0x9691: 519,
+ 0x9693: 520,
+ 0x9695: 521,
+ 0x9697: 522,
+ 0x96A1: 523,
+ 0x96B6: 524,
+ 0x96C1: 525,
+ 0x96D7: 526,
+ 0x96E1: 527,
+ 0x96E5: 528,
+ 0x96E9: 529,
+ 0x96F3: 530,
+ 0x96F5: 531,
+ 0x96F7: 532,
+ 0x9741: 533,
+ 0x9745: 534,
+ 0x9749: 535,
+ 0x9751: 536,
+ 0x9757: 537,
+ 0x9761: 538,
+ 0x9762: 539,
+ 0x9765: 540,
+ 0x9768: 541,
+ 0x9769: 542,
+ 0x976B: 543,
+ 0x9771: 544,
+ 0x9773: 545,
+ 0x9775: 546,
+ 0x9777: 547,
+ 0x9781: 548,
+ 0x97A1: 549,
+ 0x97A2: 550,
+ 0x97A5: 551,
+ 0x97A8: 552,
+ 0x97A9: 553,
+ 0x97B1: 554,
+ 0x97B3: 555,
+ 0x97B5: 556,
+ 0x97B6: 557,
+ 0x97B7: 558,
+ 0x97B8: 559,
+ 0x9861: 560,
+ 0x9862: 561,
+ 0x9865: 562,
+ 0x9869: 563,
+ 0x9871: 564,
+ 0x9873: 565,
+ 0x9875: 566,
+ 0x9876: 567,
+ 0x9877: 568,
+ 0x987D: 569,
+ 0x9881: 570,
+ 0x9882: 571,
+ 0x9885: 572,
+ 0x9889: 573,
+ 0x9891: 574,
+ 0x9893: 575,
+ 0x9895: 576,
+ 0x9896: 577,
+ 0x9897: 578,
+ 0x98E1: 579,
+ 0x98E2: 580,
+ 0x98E5: 581,
+ 0x98E9: 582,
+ 0x98EB: 583,
+ 0x98EC: 584,
+ 0x98F1: 585,
+ 0x98F3: 586,
+ 0x98F5: 587,
+ 0x98F6: 588,
+ 0x98F7: 589,
+ 0x98FD: 590,
+ 0x9941: 591,
+ 0x9942: 592,
+ 0x9945: 593,
+ 0x9949: 594,
+ 0x9951: 595,
+ 0x9953: 596,
+ 0x9955: 597,
+ 0x9956: 598,
+ 0x9957: 599,
+ 0x9961: 600,
+ 0x9976: 601,
+ 0x99A1: 602,
+ 0x99A2: 603,
+ 0x99A5: 604,
+ 0x99A9: 605,
+ 0x99B7: 606,
+ 0x99C1: 607,
+ 0x99C9: 608,
+ 0x99E1: 609,
+ 0x9A41: 610,
+ 0x9A45: 611,
+ 0x9A81: 612,
+ 0x9A82: 613,
+ 0x9A85: 614,
+ 0x9A89: 615,
+ 0x9A90: 616,
+ 0x9A91: 617,
+ 0x9A97: 618,
+ 0x9AC1: 619,
+ 0x9AE1: 620,
+ 0x9AE5: 621,
+ 0x9AE9: 622,
+ 0x9AF1: 623,
+ 0x9AF3: 624,
+ 0x9AF7: 625,
+ 0x9B61: 626,
+ 0x9B62: 627,
+ 0x9B65: 628,
+ 0x9B68: 629,
+ 0x9B69: 630,
+ 0x9B71: 631,
+ 0x9B73: 632,
+ 0x9B75: 633,
+ 0x9B81: 634,
+ 0x9B85: 635,
+ 0x9B89: 636,
+ 0x9B91: 637,
+ 0x9B93: 638,
+ 0x9BA1: 639,
+ 0x9BA5: 640,
+ 0x9BA9: 641,
+ 0x9BB1: 642,
+ 0x9BB3: 643,
+ 0x9BB5: 644,
+ 0x9BB7: 645,
+ 0x9C61: 646,
+ 0x9C62: 647,
+ 0x9C65: 648,
+ 0x9C69: 649,
+ 0x9C71: 650,
+ 0x9C73: 651,
+ 0x9C75: 652,
+ 0x9C76: 653,
+ 0x9C77: 654,
+ 0x9C78: 655,
+ 0x9C7C: 656,
+ 0x9C7D: 657,
+ 0x9C81: 658,
+ 0x9C82: 659,
+ 0x9C85: 660,
+ 0x9C89: 661,
+ 0x9C91: 662,
+ 0x9C93: 663,
+ 0x9C95: 664,
+ 0x9C96: 665,
+ 0x9C97: 666,
+ 0x9CA1: 667,
+ 0x9CA2: 668,
+ 0x9CA5: 669,
+ 0x9CB5: 670,
+ 0x9CB7: 671,
+ 0x9CE1: 672,
+ 0x9CE2: 673,
+ 0x9CE5: 674,
+ 0x9CE9: 675,
+ 0x9CF1: 676,
+ 0x9CF3: 677,
+ 0x9CF5: 678,
+ 0x9CF6: 679,
+ 0x9CF7: 680,
+ 0x9CFD: 681,
+ 0x9D41: 682,
+ 0x9D42: 683,
+ 0x9D45: 684,
+ 0x9D49: 685,
+ 0x9D51: 686,
+ 0x9D53: 687,
+ 0x9D55: 688,
+ 0x9D57: 689,
+ 0x9D61: 690,
+ 0x9D62: 691,
+ 0x9D65: 692,
+ 0x9D69: 693,
+ 0x9D71: 694,
+ 0x9D73: 695,
+ 0x9D75: 696,
+ 0x9D76: 697,
+ 0x9D77: 698,
+ 0x9D81: 699,
+ 0x9D85: 700,
+ 0x9D93: 701,
+ 0x9D95: 702,
+ 0x9DA1: 703,
+ 0x9DA2: 704,
+ 0x9DA5: 705,
+ 0x9DA9: 706,
+ 0x9DB1: 707,
+ 0x9DB3: 708,
+ 0x9DB5: 709,
+ 0x9DB7: 710,
+ 0x9DC1: 711,
+ 0x9DC5: 712,
+ 0x9DD7: 713,
+ 0x9DF6: 714,
+ 0x9E41: 715,
+ 0x9E45: 716,
+ 0x9E49: 717,
+ 0x9E51: 718,
+ 0x9E53: 719,
+ 0x9E55: 720,
+ 0x9E57: 721,
+ 0x9E61: 722,
+ 0x9E65: 723,
+ 0x9E69: 724,
+ 0x9E73: 725,
+ 0x9E75: 726,
+ 0x9E77: 727,
+ 0x9E81: 728,
+ 0x9E82: 729,
+ 0x9E85: 730,
+ 0x9E89: 731,
+ 0x9E91: 732,
+ 0x9E93: 733,
+ 0x9E95: 734,
+ 0x9E97: 735,
+ 0x9EA1: 736,
+ 0x9EB6: 737,
+ 0x9EC1: 738,
+ 0x9EE1: 739,
+ 0x9EE2: 740,
+ 0x9EE5: 741,
+ 0x9EE9: 742,
+ 0x9EF1: 743,
+ 0x9EF5: 744,
+ 0x9EF7: 745,
+ 0x9F41: 746,
+ 0x9F42: 747,
+ 0x9F45: 748,
+ 0x9F49: 749,
+ 0x9F51: 750,
+ 0x9F53: 751,
+ 0x9F55: 752,
+ 0x9F57: 753,
+ 0x9F61: 754,
+ 0x9F62: 755,
+ 0x9F65: 756,
+ 0x9F69: 757,
+ 0x9F71: 758,
+ 0x9F73: 759,
+ 0x9F75: 760,
+ 0x9F77: 761,
+ 0x9F78: 762,
+ 0x9F7B: 763,
+ 0x9F7C: 764,
+ 0x9FA1: 765,
+ 0x9FA2: 766,
+ 0x9FA5: 767,
+ 0x9FA9: 768,
+ 0x9FB1: 769,
+ 0x9FB3: 770,
+ 0x9FB5: 771,
+ 0x9FB7: 772,
+ 0xA061: 773,
+ 0xA062: 774,
+ 0xA065: 775,
+ 0xA067: 776,
+ 0xA068: 777,
+ 0xA069: 778,
+ 0xA06A: 779,
+ 0xA06B: 780,
+ 0xA071: 781,
+ 0xA073: 782,
+ 0xA075: 783,
+ 0xA077: 784,
+ 0xA078: 785,
+ 0xA07B: 786,
+ 0xA07D: 787,
+ 0xA081: 788,
+ 0xA082: 789,
+ 0xA085: 790,
+ 0xA089: 791,
+ 0xA091: 792,
+ 0xA093: 793,
+ 0xA095: 794,
+ 0xA096: 795,
+ 0xA097: 796,
+ 0xA098: 797,
+ 0xA0A1: 798,
+ 0xA0A2: 799,
+ 0xA0A9: 800,
+ 0xA0B7: 801,
+ 0xA0E1: 802,
+ 0xA0E2: 803,
+ 0xA0E5: 804,
+ 0xA0E9: 805,
+ 0xA0EB: 806,
+ 0xA0F1: 807,
+ 0xA0F3: 808,
+ 0xA0F5: 809,
+ 0xA0F7: 810,
+ 0xA0F8: 811,
+ 0xA0FD: 812,
+ 0xA141: 813,
+ 0xA142: 814,
+ 0xA145: 815,
+ 0xA149: 816,
+ 0xA151: 817,
+ 0xA153: 818,
+ 0xA155: 819,
+ 0xA156: 820,
+ 0xA157: 821,
+ 0xA161: 822,
+ 0xA162: 823,
+ 0xA165: 824,
+ 0xA169: 825,
+ 0xA175: 826,
+ 0xA176: 827,
+ 0xA177: 828,
+ 0xA179: 829,
+ 0xA181: 830,
+ 0xA1A1: 831,
+ 0xA1A2: 832,
+ 0xA1A4: 833,
+ 0xA1A5: 834,
+ 0xA1A9: 835,
+ 0xA1AB: 836,
+ 0xA1B1: 837,
+ 0xA1B3: 838,
+ 0xA1B5: 839,
+ 0xA1B7: 840,
+ 0xA1C1: 841,
+ 0xA1C5: 842,
+ 0xA1D6: 843,
+ 0xA1D7: 844,
+ 0xA241: 845,
+ 0xA245: 846,
+ 0xA249: 847,
+ 0xA253: 848,
+ 0xA255: 849,
+ 0xA257: 850,
+ 0xA261: 851,
+ 0xA265: 852,
+ 0xA269: 853,
+ 0xA273: 854,
+ 0xA275: 855,
+ 0xA281: 856,
+ 0xA282: 857,
+ 0xA283: 858,
+ 0xA285: 859,
+ 0xA288: 860,
+ 0xA289: 861,
+ 0xA28A: 862,
+ 0xA28B: 863,
+ 0xA291: 864,
+ 0xA293: 865,
+ 0xA295: 866,
+ 0xA297: 867,
+ 0xA29B: 868,
+ 0xA29D: 869,
+ 0xA2A1: 870,
+ 0xA2A5: 871,
+ 0xA2A9: 872,
+ 0xA2B3: 873,
+ 0xA2B5: 874,
+ 0xA2C1: 875,
+ 0xA2E1: 876,
+ 0xA2E5: 877,
+ 0xA2E9: 878,
+ 0xA341: 879,
+ 0xA345: 880,
+ 0xA349: 881,
+ 0xA351: 882,
+ 0xA355: 883,
+ 0xA361: 884,
+ 0xA365: 885,
+ 0xA369: 886,
+ 0xA371: 887,
+ 0xA375: 888,
+ 0xA3A1: 889,
+ 0xA3A2: 890,
+ 0xA3A5: 891,
+ 0xA3A8: 892,
+ 0xA3A9: 893,
+ 0xA3AB: 894,
+ 0xA3B1: 895,
+ 0xA3B3: 896,
+ 0xA3B5: 897,
+ 0xA3B6: 898,
+ 0xA3B7: 899,
+ 0xA3B9: 900,
+ 0xA3BB: 901,
+ 0xA461: 902,
+ 0xA462: 903,
+ 0xA463: 904,
+ 0xA464: 905,
+ 0xA465: 906,
+ 0xA468: 907,
+ 0xA469: 908,
+ 0xA46A: 909,
+ 0xA46B: 910,
+ 0xA46C: 911,
+ 0xA471: 912,
+ 0xA473: 913,
+ 0xA475: 914,
+ 0xA477: 915,
+ 0xA47B: 916,
+ 0xA481: 917,
+ 0xA482: 918,
+ 0xA485: 919,
+ 0xA489: 920,
+ 0xA491: 921,
+ 0xA493: 922,
+ 0xA495: 923,
+ 0xA496: 924,
+ 0xA497: 925,
+ 0xA49B: 926,
+ 0xA4A1: 927,
+ 0xA4A2: 928,
+ 0xA4A5: 929,
+ 0xA4B3: 930,
+ 0xA4E1: 931,
+ 0xA4E2: 932,
+ 0xA4E5: 933,
+ 0xA4E8: 934,
+ 0xA4E9: 935,
+ 0xA4EB: 936,
+ 0xA4F1: 937,
+ 0xA4F3: 938,
+ 0xA4F5: 939,
+ 0xA4F7: 940,
+ 0xA4F8: 941,
+ 0xA541: 942,
+ 0xA542: 943,
+ 0xA545: 944,
+ 0xA548: 945,
+ 0xA549: 946,
+ 0xA551: 947,
+ 0xA553: 948,
+ 0xA555: 949,
+ 0xA556: 950,
+ 0xA557: 951,
+ 0xA561: 952,
+ 0xA562: 953,
+ 0xA565: 954,
+ 0xA569: 955,
+ 0xA573: 956,
+ 0xA575: 957,
+ 0xA576: 958,
+ 0xA577: 959,
+ 0xA57B: 960,
+ 0xA581: 961,
+ 0xA585: 962,
+ 0xA5A1: 963,
+ 0xA5A2: 964,
+ 0xA5A3: 965,
+ 0xA5A5: 966,
+ 0xA5A9: 967,
+ 0xA5B1: 968,
+ 0xA5B3: 969,
+ 0xA5B5: 970,
+ 0xA5B7: 971,
+ 0xA5C1: 972,
+ 0xA5C5: 973,
+ 0xA5D6: 974,
+ 0xA5E1: 975,
+ 0xA5F6: 976,
+ 0xA641: 977,
+ 0xA642: 978,
+ 0xA645: 979,
+ 0xA649: 980,
+ 0xA651: 981,
+ 0xA653: 982,
+ 0xA661: 983,
+ 0xA665: 984,
+ 0xA681: 985,
+ 0xA682: 986,
+ 0xA685: 987,
+ 0xA688: 988,
+ 0xA689: 989,
+ 0xA68A: 990,
+ 0xA68B: 991,
+ 0xA691: 992,
+ 0xA693: 993,
+ 0xA695: 994,
+ 0xA697: 995,
+ 0xA69B: 996,
+ 0xA69C: 997,
+ 0xA6A1: 998,
+ 0xA6A9: 999,
+ 0xA6B6: 1000,
+ 0xA6C1: 1001,
+ 0xA6E1: 1002,
+ 0xA6E2: 1003,
+ 0xA6E5: 1004,
+ 0xA6E9: 1005,
+ 0xA6F7: 1006,
+ 0xA741: 1007,
+ 0xA745: 1008,
+ 0xA749: 1009,
+ 0xA751: 1010,
+ 0xA755: 1011,
+ 0xA757: 1012,
+ 0xA761: 1013,
+ 0xA762: 1014,
+ 0xA765: 1015,
+ 0xA769: 1016,
+ 0xA771: 1017,
+ 0xA773: 1018,
+ 0xA775: 1019,
+ 0xA7A1: 1020,
+ 0xA7A2: 1021,
+ 0xA7A5: 1022,
+ 0xA7A9: 1023,
+ 0xA7AB: 1024,
+ 0xA7B1: 1025,
+ 0xA7B3: 1026,
+ 0xA7B5: 1027,
+ 0xA7B7: 1028,
+ 0xA7B8: 1029,
+ 0xA7B9: 1030,
+ 0xA861: 1031,
+ 0xA862: 1032,
+ 0xA865: 1033,
+ 0xA869: 1034,
+ 0xA86B: 1035,
+ 0xA871: 1036,
+ 0xA873: 1037,
+ 0xA875: 1038,
+ 0xA876: 1039,
+ 0xA877: 1040,
+ 0xA87D: 1041,
+ 0xA881: 1042,
+ 0xA882: 1043,
+ 0xA885: 1044,
+ 0xA889: 1045,
+ 0xA891: 1046,
+ 0xA893: 1047,
+ 0xA895: 1048,
+ 0xA896: 1049,
+ 0xA897: 1050,
+ 0xA8A1: 1051,
+ 0xA8A2: 1052,
+ 0xA8B1: 1053,
+ 0xA8E1: 1054,
+ 0xA8E2: 1055,
+ 0xA8E5: 1056,
+ 0xA8E8: 1057,
+ 0xA8E9: 1058,
+ 0xA8F1: 1059,
+ 0xA8F5: 1060,
+ 0xA8F6: 1061,
+ 0xA8F7: 1062,
+ 0xA941: 1063,
+ 0xA957: 1064,
+ 0xA961: 1065,
+ 0xA962: 1066,
+ 0xA971: 1067,
+ 0xA973: 1068,
+ 0xA975: 1069,
+ 0xA976: 1070,
+ 0xA977: 1071,
+ 0xA9A1: 1072,
+ 0xA9A2: 1073,
+ 0xA9A5: 1074,
+ 0xA9A9: 1075,
+ 0xA9B1: 1076,
+ 0xA9B3: 1077,
+ 0xA9B7: 1078,
+ 0xAA41: 1079,
+ 0xAA61: 1080,
+ 0xAA77: 1081,
+ 0xAA81: 1082,
+ 0xAA82: 1083,
+ 0xAA85: 1084,
+ 0xAA89: 1085,
+ 0xAA91: 1086,
+ 0xAA95: 1087,
+ 0xAA97: 1088,
+ 0xAB41: 1089,
+ 0xAB57: 1090,
+ 0xAB61: 1091,
+ 0xAB65: 1092,
+ 0xAB69: 1093,
+ 0xAB71: 1094,
+ 0xAB73: 1095,
+ 0xABA1: 1096,
+ 0xABA2: 1097,
+ 0xABA5: 1098,
+ 0xABA9: 1099,
+ 0xABB1: 1100,
+ 0xABB3: 1101,
+ 0xABB5: 1102,
+ 0xABB7: 1103,
+ 0xAC61: 1104,
+ 0xAC62: 1105,
+ 0xAC64: 1106,
+ 0xAC65: 1107,
+ 0xAC68: 1108,
+ 0xAC69: 1109,
+ 0xAC6A: 1110,
+ 0xAC6B: 1111,
+ 0xAC71: 1112,
+ 0xAC73: 1113,
+ 0xAC75: 1114,
+ 0xAC76: 1115,
+ 0xAC77: 1116,
+ 0xAC7B: 1117,
+ 0xAC81: 1118,
+ 0xAC82: 1119,
+ 0xAC85: 1120,
+ 0xAC89: 1121,
+ 0xAC91: 1122,
+ 0xAC93: 1123,
+ 0xAC95: 1124,
+ 0xAC96: 1125,
+ 0xAC97: 1126,
+ 0xACA1: 1127,
+ 0xACA2: 1128,
+ 0xACA5: 1129,
+ 0xACA9: 1130,
+ 0xACB1: 1131,
+ 0xACB3: 1132,
+ 0xACB5: 1133,
+ 0xACB7: 1134,
+ 0xACC1: 1135,
+ 0xACC5: 1136,
+ 0xACC9: 1137,
+ 0xACD1: 1138,
+ 0xACD7: 1139,
+ 0xACE1: 1140,
+ 0xACE2: 1141,
+ 0xACE3: 1142,
+ 0xACE4: 1143,
+ 0xACE5: 1144,
+ 0xACE8: 1145,
+ 0xACE9: 1146,
+ 0xACEB: 1147,
+ 0xACEC: 1148,
+ 0xACF1: 1149,
+ 0xACF3: 1150,
+ 0xACF5: 1151,
+ 0xACF6: 1152,
+ 0xACF7: 1153,
+ 0xACFC: 1154,
+ 0xAD41: 1155,
+ 0xAD42: 1156,
+ 0xAD45: 1157,
+ 0xAD49: 1158,
+ 0xAD51: 1159,
+ 0xAD53: 1160,
+ 0xAD55: 1161,
+ 0xAD56: 1162,
+ 0xAD57: 1163,
+ 0xAD61: 1164,
+ 0xAD62: 1165,
+ 0xAD65: 1166,
+ 0xAD69: 1167,
+ 0xAD71: 1168,
+ 0xAD73: 1169,
+ 0xAD75: 1170,
+ 0xAD76: 1171,
+ 0xAD77: 1172,
+ 0xAD81: 1173,
+ 0xAD85: 1174,
+ 0xAD89: 1175,
+ 0xAD97: 1176,
+ 0xADA1: 1177,
+ 0xADA2: 1178,
+ 0xADA3: 1179,
+ 0xADA5: 1180,
+ 0xADA9: 1181,
+ 0xADAB: 1182,
+ 0xADB1: 1183,
+ 0xADB3: 1184,
+ 0xADB5: 1185,
+ 0xADB7: 1186,
+ 0xADBB: 1187,
+ 0xADC1: 1188,
+ 0xADC2: 1189,
+ 0xADC5: 1190,
+ 0xADC9: 1191,
+ 0xADD7: 1192,
+ 0xADE1: 1193,
+ 0xADE5: 1194,
+ 0xADE9: 1195,
+ 0xADF1: 1196,
+ 0xADF5: 1197,
+ 0xADF6: 1198,
+ 0xAE41: 1199,
+ 0xAE45: 1200,
+ 0xAE49: 1201,
+ 0xAE51: 1202,
+ 0xAE53: 1203,
+ 0xAE55: 1204,
+ 0xAE61: 1205,
+ 0xAE62: 1206,
+ 0xAE65: 1207,
+ 0xAE69: 1208,
+ 0xAE71: 1209,
+ 0xAE73: 1210,
+ 0xAE75: 1211,
+ 0xAE77: 1212,
+ 0xAE81: 1213,
+ 0xAE82: 1214,
+ 0xAE85: 1215,
+ 0xAE88: 1216,
+ 0xAE89: 1217,
+ 0xAE91: 1218,
+ 0xAE93: 1219,
+ 0xAE95: 1220,
+ 0xAE97: 1221,
+ 0xAE99: 1222,
+ 0xAE9B: 1223,
+ 0xAE9C: 1224,
+ 0xAEA1: 1225,
+ 0xAEB6: 1226,
+ 0xAEC1: 1227,
+ 0xAEC2: 1228,
+ 0xAEC5: 1229,
+ 0xAEC9: 1230,
+ 0xAED1: 1231,
+ 0xAED7: 1232,
+ 0xAEE1: 1233,
+ 0xAEE2: 1234,
+ 0xAEE5: 1235,
+ 0xAEE9: 1236,
+ 0xAEF1: 1237,
+ 0xAEF3: 1238,
+ 0xAEF5: 1239,
+ 0xAEF7: 1240,
+ 0xAF41: 1241,
+ 0xAF42: 1242,
+ 0xAF49: 1243,
+ 0xAF51: 1244,
+ 0xAF55: 1245,
+ 0xAF57: 1246,
+ 0xAF61: 1247,
+ 0xAF62: 1248,
+ 0xAF65: 1249,
+ 0xAF69: 1250,
+ 0xAF6A: 1251,
+ 0xAF71: 1252,
+ 0xAF73: 1253,
+ 0xAF75: 1254,
+ 0xAF77: 1255,
+ 0xAFA1: 1256,
+ 0xAFA2: 1257,
+ 0xAFA5: 1258,
+ 0xAFA8: 1259,
+ 0xAFA9: 1260,
+ 0xAFB0: 1261,
+ 0xAFB1: 1262,
+ 0xAFB3: 1263,
+ 0xAFB5: 1264,
+ 0xAFB7: 1265,
+ 0xAFBC: 1266,
+ 0xB061: 1267,
+ 0xB062: 1268,
+ 0xB064: 1269,
+ 0xB065: 1270,
+ 0xB069: 1271,
+ 0xB071: 1272,
+ 0xB073: 1273,
+ 0xB076: 1274,
+ 0xB077: 1275,
+ 0xB07D: 1276,
+ 0xB081: 1277,
+ 0xB082: 1278,
+ 0xB085: 1279,
+ 0xB089: 1280,
+ 0xB091: 1281,
+ 0xB093: 1282,
+ 0xB096: 1283,
+ 0xB097: 1284,
+ 0xB0B7: 1285,
+ 0xB0E1: 1286,
+ 0xB0E2: 1287,
+ 0xB0E5: 1288,
+ 0xB0E9: 1289,
+ 0xB0EB: 1290,
+ 0xB0F1: 1291,
+ 0xB0F3: 1292,
+ 0xB0F6: 1293,
+ 0xB0F7: 1294,
+ 0xB141: 1295,
+ 0xB145: 1296,
+ 0xB149: 1297,
+ 0xB185: 1298,
+ 0xB1A1: 1299,
+ 0xB1A2: 1300,
+ 0xB1A5: 1301,
+ 0xB1A8: 1302,
+ 0xB1A9: 1303,
+ 0xB1AB: 1304,
+ 0xB1B1: 1305,
+ 0xB1B3: 1306,
+ 0xB1B7: 1307,
+ 0xB1C1: 1308,
+ 0xB1C2: 1309,
+ 0xB1C5: 1310,
+ 0xB1D6: 1311,
+ 0xB1E1: 1312,
+ 0xB1F6: 1313,
+ 0xB241: 1314,
+ 0xB245: 1315,
+ 0xB249: 1316,
+ 0xB251: 1317,
+ 0xB253: 1318,
+ 0xB261: 1319,
+ 0xB281: 1320,
+ 0xB282: 1321,
+ 0xB285: 1322,
+ 0xB289: 1323,
+ 0xB291: 1324,
+ 0xB293: 1325,
+ 0xB297: 1326,
+ 0xB2A1: 1327,
+ 0xB2B6: 1328,
+ 0xB2C1: 1329,
+ 0xB2E1: 1330,
+ 0xB2E5: 1331,
+ 0xB357: 1332,
+ 0xB361: 1333,
+ 0xB362: 1334,
+ 0xB365: 1335,
+ 0xB369: 1336,
+ 0xB36B: 1337,
+ 0xB370: 1338,
+ 0xB371: 1339,
+ 0xB373: 1340,
+ 0xB381: 1341,
+ 0xB385: 1342,
+ 0xB389: 1343,
+ 0xB391: 1344,
+ 0xB3A1: 1345,
+ 0xB3A2: 1346,
+ 0xB3A5: 1347,
+ 0xB3A9: 1348,
+ 0xB3B1: 1349,
+ 0xB3B3: 1350,
+ 0xB3B5: 1351,
+ 0xB3B7: 1352,
+ 0xB461: 1353,
+ 0xB462: 1354,
+ 0xB465: 1355,
+ 0xB466: 1356,
+ 0xB467: 1357,
+ 0xB469: 1358,
+ 0xB46A: 1359,
+ 0xB46B: 1360,
+ 0xB470: 1361,
+ 0xB471: 1362,
+ 0xB473: 1363,
+ 0xB475: 1364,
+ 0xB476: 1365,
+ 0xB477: 1366,
+ 0xB47B: 1367,
+ 0xB47C: 1368,
+ 0xB481: 1369,
+ 0xB482: 1370,
+ 0xB485: 1371,
+ 0xB489: 1372,
+ 0xB491: 1373,
+ 0xB493: 1374,
+ 0xB495: 1375,
+ 0xB496: 1376,
+ 0xB497: 1377,
+ 0xB4A1: 1378,
+ 0xB4A2: 1379,
+ 0xB4A5: 1380,
+ 0xB4A9: 1381,
+ 0xB4AC: 1382,
+ 0xB4B1: 1383,
+ 0xB4B3: 1384,
+ 0xB4B5: 1385,
+ 0xB4B7: 1386,
+ 0xB4BB: 1387,
+ 0xB4BD: 1388,
+ 0xB4C1: 1389,
+ 0xB4C5: 1390,
+ 0xB4C9: 1391,
+ 0xB4D3: 1392,
+ 0xB4E1: 1393,
+ 0xB4E2: 1394,
+ 0xB4E5: 1395,
+ 0xB4E6: 1396,
+ 0xB4E8: 1397,
+ 0xB4E9: 1398,
+ 0xB4EA: 1399,
+ 0xB4EB: 1400,
+ 0xB4F1: 1401,
+ 0xB4F3: 1402,
+ 0xB4F4: 1403,
+ 0xB4F5: 1404,
+ 0xB4F6: 1405,
+ 0xB4F7: 1406,
+ 0xB4F8: 1407,
+ 0xB4FA: 1408,
+ 0xB4FC: 1409,
+ 0xB541: 1410,
+ 0xB542: 1411,
+ 0xB545: 1412,
+ 0xB549: 1413,
+ 0xB551: 1414,
+ 0xB553: 1415,
+ 0xB555: 1416,
+ 0xB557: 1417,
+ 0xB561: 1418,
+ 0xB562: 1419,
+ 0xB563: 1420,
+ 0xB565: 1421,
+ 0xB569: 1422,
+ 0xB56B: 1423,
+ 0xB56C: 1424,
+ 0xB571: 1425,
+ 0xB573: 1426,
+ 0xB574: 1427,
+ 0xB575: 1428,
+ 0xB576: 1429,
+ 0xB577: 1430,
+ 0xB57B: 1431,
+ 0xB57C: 1432,
+ 0xB57D: 1433,
+ 0xB581: 1434,
+ 0xB585: 1435,
+ 0xB589: 1436,
+ 0xB591: 1437,
+ 0xB593: 1438,
+ 0xB595: 1439,
+ 0xB596: 1440,
+ 0xB5A1: 1441,
+ 0xB5A2: 1442,
+ 0xB5A5: 1443,
+ 0xB5A9: 1444,
+ 0xB5AA: 1445,
+ 0xB5AB: 1446,
+ 0xB5AD: 1447,
+ 0xB5B0: 1448,
+ 0xB5B1: 1449,
+ 0xB5B3: 1450,
+ 0xB5B5: 1451,
+ 0xB5B7: 1452,
+ 0xB5B9: 1453,
+ 0xB5C1: 1454,
+ 0xB5C2: 1455,
+ 0xB5C5: 1456,
+ 0xB5C9: 1457,
+ 0xB5D1: 1458,
+ 0xB5D3: 1459,
+ 0xB5D5: 1460,
+ 0xB5D6: 1461,
+ 0xB5D7: 1462,
+ 0xB5E1: 1463,
+ 0xB5E2: 1464,
+ 0xB5E5: 1465,
+ 0xB5F1: 1466,
+ 0xB5F5: 1467,
+ 0xB5F7: 1468,
+ 0xB641: 1469,
+ 0xB642: 1470,
+ 0xB645: 1471,
+ 0xB649: 1472,
+ 0xB651: 1473,
+ 0xB653: 1474,
+ 0xB655: 1475,
+ 0xB657: 1476,
+ 0xB661: 1477,
+ 0xB662: 1478,
+ 0xB665: 1479,
+ 0xB669: 1480,
+ 0xB671: 1481,
+ 0xB673: 1482,
+ 0xB675: 1483,
+ 0xB677: 1484,
+ 0xB681: 1485,
+ 0xB682: 1486,
+ 0xB685: 1487,
+ 0xB689: 1488,
+ 0xB68A: 1489,
+ 0xB68B: 1490,
+ 0xB691: 1491,
+ 0xB693: 1492,
+ 0xB695: 1493,
+ 0xB697: 1494,
+ 0xB6A1: 1495,
+ 0xB6A2: 1496,
+ 0xB6A5: 1497,
+ 0xB6A9: 1498,
+ 0xB6B1: 1499,
+ 0xB6B3: 1500,
+ 0xB6B6: 1501,
+ 0xB6B7: 1502,
+ 0xB6C1: 1503,
+ 0xB6C2: 1504,
+ 0xB6C5: 1505,
+ 0xB6C9: 1506,
+ 0xB6D1: 1507,
+ 0xB6D3: 1508,
+ 0xB6D7: 1509,
+ 0xB6E1: 1510,
+ 0xB6E2: 1511,
+ 0xB6E5: 1512,
+ 0xB6E9: 1513,
+ 0xB6F1: 1514,
+ 0xB6F3: 1515,
+ 0xB6F5: 1516,
+ 0xB6F7: 1517,
+ 0xB741: 1518,
+ 0xB742: 1519,
+ 0xB745: 1520,
+ 0xB749: 1521,
+ 0xB751: 1522,
+ 0xB753: 1523,
+ 0xB755: 1524,
+ 0xB757: 1525,
+ 0xB759: 1526,
+ 0xB761: 1527,
+ 0xB762: 1528,
+ 0xB765: 1529,
+ 0xB769: 1530,
+ 0xB76F: 1531,
+ 0xB771: 1532,
+ 0xB773: 1533,
+ 0xB775: 1534,
+ 0xB777: 1535,
+ 0xB778: 1536,
+ 0xB779: 1537,
+ 0xB77A: 1538,
+ 0xB77B: 1539,
+ 0xB77C: 1540,
+ 0xB77D: 1541,
+ 0xB781: 1542,
+ 0xB785: 1543,
+ 0xB789: 1544,
+ 0xB791: 1545,
+ 0xB795: 1546,
+ 0xB7A1: 1547,
+ 0xB7A2: 1548,
+ 0xB7A5: 1549,
+ 0xB7A9: 1550,
+ 0xB7AA: 1551,
+ 0xB7AB: 1552,
+ 0xB7B0: 1553,
+ 0xB7B1: 1554,
+ 0xB7B3: 1555,
+ 0xB7B5: 1556,
+ 0xB7B6: 1557,
+ 0xB7B7: 1558,
+ 0xB7B8: 1559,
+ 0xB7BC: 1560,
+ 0xB861: 1561,
+ 0xB862: 1562,
+ 0xB865: 1563,
+ 0xB867: 1564,
+ 0xB868: 1565,
+ 0xB869: 1566,
+ 0xB86B: 1567,
+ 0xB871: 1568,
+ 0xB873: 1569,
+ 0xB875: 1570,
+ 0xB876: 1571,
+ 0xB877: 1572,
+ 0xB878: 1573,
+ 0xB881: 1574,
+ 0xB882: 1575,
+ 0xB885: 1576,
+ 0xB889: 1577,
+ 0xB891: 1578,
+ 0xB893: 1579,
+ 0xB895: 1580,
+ 0xB896: 1581,
+ 0xB897: 1582,
+ 0xB8A1: 1583,
+ 0xB8A2: 1584,
+ 0xB8A5: 1585,
+ 0xB8A7: 1586,
+ 0xB8A9: 1587,
+ 0xB8B1: 1588,
+ 0xB8B7: 1589,
+ 0xB8C1: 1590,
+ 0xB8C5: 1591,
+ 0xB8C9: 1592,
+ 0xB8E1: 1593,
+ 0xB8E2: 1594,
+ 0xB8E5: 1595,
+ 0xB8E9: 1596,
+ 0xB8EB: 1597,
+ 0xB8F1: 1598,
+ 0xB8F3: 1599,
+ 0xB8F5: 1600,
+ 0xB8F7: 1601,
+ 0xB8F8: 1602,
+ 0xB941: 1603,
+ 0xB942: 1604,
+ 0xB945: 1605,
+ 0xB949: 1606,
+ 0xB951: 1607,
+ 0xB953: 1608,
+ 0xB955: 1609,
+ 0xB957: 1610,
+ 0xB961: 1611,
+ 0xB965: 1612,
+ 0xB969: 1613,
+ 0xB971: 1614,
+ 0xB973: 1615,
+ 0xB976: 1616,
+ 0xB977: 1617,
+ 0xB981: 1618,
+ 0xB9A1: 1619,
+ 0xB9A2: 1620,
+ 0xB9A5: 1621,
+ 0xB9A9: 1622,
+ 0xB9AB: 1623,
+ 0xB9B1: 1624,
+ 0xB9B3: 1625,
+ 0xB9B5: 1626,
+ 0xB9B7: 1627,
+ 0xB9B8: 1628,
+ 0xB9B9: 1629,
+ 0xB9BD: 1630,
+ 0xB9C1: 1631,
+ 0xB9C2: 1632,
+ 0xB9C9: 1633,
+ 0xB9D3: 1634,
+ 0xB9D5: 1635,
+ 0xB9D7: 1636,
+ 0xB9E1: 1637,
+ 0xB9F6: 1638,
+ 0xB9F7: 1639,
+ 0xBA41: 1640,
+ 0xBA45: 1641,
+ 0xBA49: 1642,
+ 0xBA51: 1643,
+ 0xBA53: 1644,
+ 0xBA55: 1645,
+ 0xBA57: 1646,
+ 0xBA61: 1647,
+ 0xBA62: 1648,
+ 0xBA65: 1649,
+ 0xBA77: 1650,
+ 0xBA81: 1651,
+ 0xBA82: 1652,
+ 0xBA85: 1653,
+ 0xBA89: 1654,
+ 0xBA8A: 1655,
+ 0xBA8B: 1656,
+ 0xBA91: 1657,
+ 0xBA93: 1658,
+ 0xBA95: 1659,
+ 0xBA97: 1660,
+ 0xBAA1: 1661,
+ 0xBAB6: 1662,
+ 0xBAC1: 1663,
+ 0xBAE1: 1664,
+ 0xBAE2: 1665,
+ 0xBAE5: 1666,
+ 0xBAE9: 1667,
+ 0xBAF1: 1668,
+ 0xBAF3: 1669,
+ 0xBAF5: 1670,
+ 0xBB41: 1671,
+ 0xBB45: 1672,
+ 0xBB49: 1673,
+ 0xBB51: 1674,
+ 0xBB61: 1675,
+ 0xBB62: 1676,
+ 0xBB65: 1677,
+ 0xBB69: 1678,
+ 0xBB71: 1679,
+ 0xBB73: 1680,
+ 0xBB75: 1681,
+ 0xBB77: 1682,
+ 0xBBA1: 1683,
+ 0xBBA2: 1684,
+ 0xBBA5: 1685,
+ 0xBBA8: 1686,
+ 0xBBA9: 1687,
+ 0xBBAB: 1688,
+ 0xBBB1: 1689,
+ 0xBBB3: 1690,
+ 0xBBB5: 1691,
+ 0xBBB7: 1692,
+ 0xBBB8: 1693,
+ 0xBBBB: 1694,
+ 0xBBBC: 1695,
+ 0xBC61: 1696,
+ 0xBC62: 1697,
+ 0xBC65: 1698,
+ 0xBC67: 1699,
+ 0xBC69: 1700,
+ 0xBC6C: 1701,
+ 0xBC71: 1702,
+ 0xBC73: 1703,
+ 0xBC75: 1704,
+ 0xBC76: 1705,
+ 0xBC77: 1706,
+ 0xBC81: 1707,
+ 0xBC82: 1708,
+ 0xBC85: 1709,
+ 0xBC89: 1710,
+ 0xBC91: 1711,
+ 0xBC93: 1712,
+ 0xBC95: 1713,
+ 0xBC96: 1714,
+ 0xBC97: 1715,
+ 0xBCA1: 1716,
+ 0xBCA5: 1717,
+ 0xBCB7: 1718,
+ 0xBCE1: 1719,
+ 0xBCE2: 1720,
+ 0xBCE5: 1721,
+ 0xBCE9: 1722,
+ 0xBCF1: 1723,
+ 0xBCF3: 1724,
+ 0xBCF5: 1725,
+ 0xBCF6: 1726,
+ 0xBCF7: 1727,
+ 0xBD41: 1728,
+ 0xBD57: 1729,
+ 0xBD61: 1730,
+ 0xBD76: 1731,
+ 0xBDA1: 1732,
+ 0xBDA2: 1733,
+ 0xBDA5: 1734,
+ 0xBDA9: 1735,
+ 0xBDB1: 1736,
+ 0xBDB3: 1737,
+ 0xBDB5: 1738,
+ 0xBDB7: 1739,
+ 0xBDB9: 1740,
+ 0xBDC1: 1741,
+ 0xBDC2: 1742,
+ 0xBDC9: 1743,
+ 0xBDD6: 1744,
+ 0xBDE1: 1745,
+ 0xBDF6: 1746,
+ 0xBE41: 1747,
+ 0xBE45: 1748,
+ 0xBE49: 1749,
+ 0xBE51: 1750,
+ 0xBE53: 1751,
+ 0xBE77: 1752,
+ 0xBE81: 1753,
+ 0xBE82: 1754,
+ 0xBE85: 1755,
+ 0xBE89: 1756,
+ 0xBE91: 1757,
+ 0xBE93: 1758,
+ 0xBE97: 1759,
+ 0xBEA1: 1760,
+ 0xBEB6: 1761,
+ 0xBEB7: 1762,
+ 0xBEE1: 1763,
+ 0xBF41: 1764,
+ 0xBF61: 1765,
+ 0xBF71: 1766,
+ 0xBF75: 1767,
+ 0xBF77: 1768,
+ 0xBFA1: 1769,
+ 0xBFA2: 1770,
+ 0xBFA5: 1771,
+ 0xBFA9: 1772,
+ 0xBFB1: 1773,
+ 0xBFB3: 1774,
+ 0xBFB7: 1775,
+ 0xBFB8: 1776,
+ 0xBFBD: 1777,
+ 0xC061: 1778,
+ 0xC062: 1779,
+ 0xC065: 1780,
+ 0xC067: 1781,
+ 0xC069: 1782,
+ 0xC071: 1783,
+ 0xC073: 1784,
+ 0xC075: 1785,
+ 0xC076: 1786,
+ 0xC077: 1787,
+ 0xC078: 1788,
+ 0xC081: 1789,
+ 0xC082: 1790,
+ 0xC085: 1791,
+ 0xC089: 1792,
+ 0xC091: 1793,
+ 0xC093: 1794,
+ 0xC095: 1795,
+ 0xC096: 1796,
+ 0xC097: 1797,
+ 0xC0A1: 1798,
+ 0xC0A5: 1799,
+ 0xC0A7: 1800,
+ 0xC0A9: 1801,
+ 0xC0B1: 1802,
+ 0xC0B7: 1803,
+ 0xC0E1: 1804,
+ 0xC0E2: 1805,
+ 0xC0E5: 1806,
+ 0xC0E9: 1807,
+ 0xC0F1: 1808,
+ 0xC0F3: 1809,
+ 0xC0F5: 1810,
+ 0xC0F6: 1811,
+ 0xC0F7: 1812,
+ 0xC141: 1813,
+ 0xC142: 1814,
+ 0xC145: 1815,
+ 0xC149: 1816,
+ 0xC151: 1817,
+ 0xC153: 1818,
+ 0xC155: 1819,
+ 0xC157: 1820,
+ 0xC161: 1821,
+ 0xC165: 1822,
+ 0xC176: 1823,
+ 0xC181: 1824,
+ 0xC185: 1825,
+ 0xC197: 1826,
+ 0xC1A1: 1827,
+ 0xC1A2: 1828,
+ 0xC1A5: 1829,
+ 0xC1A9: 1830,
+ 0xC1B1: 1831,
+ 0xC1B3: 1832,
+ 0xC1B5: 1833,
+ 0xC1B7: 1834,
+ 0xC1C1: 1835,
+ 0xC1C5: 1836,
+ 0xC1C9: 1837,
+ 0xC1D7: 1838,
+ 0xC241: 1839,
+ 0xC245: 1840,
+ 0xC249: 1841,
+ 0xC251: 1842,
+ 0xC253: 1843,
+ 0xC255: 1844,
+ 0xC257: 1845,
+ 0xC261: 1846,
+ 0xC271: 1847,
+ 0xC281: 1848,
+ 0xC282: 1849,
+ 0xC285: 1850,
+ 0xC289: 1851,
+ 0xC291: 1852,
+ 0xC293: 1853,
+ 0xC295: 1854,
+ 0xC297: 1855,
+ 0xC2A1: 1856,
+ 0xC2B6: 1857,
+ 0xC2C1: 1858,
+ 0xC2C5: 1859,
+ 0xC2E1: 1860,
+ 0xC2E5: 1861,
+ 0xC2E9: 1862,
+ 0xC2F1: 1863,
+ 0xC2F3: 1864,
+ 0xC2F5: 1865,
+ 0xC2F7: 1866,
+ 0xC341: 1867,
+ 0xC345: 1868,
+ 0xC349: 1869,
+ 0xC351: 1870,
+ 0xC357: 1871,
+ 0xC361: 1872,
+ 0xC362: 1873,
+ 0xC365: 1874,
+ 0xC369: 1875,
+ 0xC371: 1876,
+ 0xC373: 1877,
+ 0xC375: 1878,
+ 0xC377: 1879,
+ 0xC3A1: 1880,
+ 0xC3A2: 1881,
+ 0xC3A5: 1882,
+ 0xC3A8: 1883,
+ 0xC3A9: 1884,
+ 0xC3AA: 1885,
+ 0xC3B1: 1886,
+ 0xC3B3: 1887,
+ 0xC3B5: 1888,
+ 0xC3B7: 1889,
+ 0xC461: 1890,
+ 0xC462: 1891,
+ 0xC465: 1892,
+ 0xC469: 1893,
+ 0xC471: 1894,
+ 0xC473: 1895,
+ 0xC475: 1896,
+ 0xC477: 1897,
+ 0xC481: 1898,
+ 0xC482: 1899,
+ 0xC485: 1900,
+ 0xC489: 1901,
+ 0xC491: 1902,
+ 0xC493: 1903,
+ 0xC495: 1904,
+ 0xC496: 1905,
+ 0xC497: 1906,
+ 0xC4A1: 1907,
+ 0xC4A2: 1908,
+ 0xC4B7: 1909,
+ 0xC4E1: 1910,
+ 0xC4E2: 1911,
+ 0xC4E5: 1912,
+ 0xC4E8: 1913,
+ 0xC4E9: 1914,
+ 0xC4F1: 1915,
+ 0xC4F3: 1916,
+ 0xC4F5: 1917,
+ 0xC4F6: 1918,
+ 0xC4F7: 1919,
+ 0xC541: 1920,
+ 0xC542: 1921,
+ 0xC545: 1922,
+ 0xC549: 1923,
+ 0xC551: 1924,
+ 0xC553: 1925,
+ 0xC555: 1926,
+ 0xC557: 1927,
+ 0xC561: 1928,
+ 0xC565: 1929,
+ 0xC569: 1930,
+ 0xC571: 1931,
+ 0xC573: 1932,
+ 0xC575: 1933,
+ 0xC576: 1934,
+ 0xC577: 1935,
+ 0xC581: 1936,
+ 0xC5A1: 1937,
+ 0xC5A2: 1938,
+ 0xC5A5: 1939,
+ 0xC5A9: 1940,
+ 0xC5B1: 1941,
+ 0xC5B3: 1942,
+ 0xC5B5: 1943,
+ 0xC5B7: 1944,
+ 0xC5C1: 1945,
+ 0xC5C2: 1946,
+ 0xC5C5: 1947,
+ 0xC5C9: 1948,
+ 0xC5D1: 1949,
+ 0xC5D7: 1950,
+ 0xC5E1: 1951,
+ 0xC5F7: 1952,
+ 0xC641: 1953,
+ 0xC649: 1954,
+ 0xC661: 1955,
+ 0xC681: 1956,
+ 0xC682: 1957,
+ 0xC685: 1958,
+ 0xC689: 1959,
+ 0xC691: 1960,
+ 0xC693: 1961,
+ 0xC695: 1962,
+ 0xC697: 1963,
+ 0xC6A1: 1964,
+ 0xC6A5: 1965,
+ 0xC6A9: 1966,
+ 0xC6B7: 1967,
+ 0xC6C1: 1968,
+ 0xC6D7: 1969,
+ 0xC6E1: 1970,
+ 0xC6E2: 1971,
+ 0xC6E5: 1972,
+ 0xC6E9: 1973,
+ 0xC6F1: 1974,
+ 0xC6F3: 1975,
+ 0xC6F5: 1976,
+ 0xC6F7: 1977,
+ 0xC741: 1978,
+ 0xC745: 1979,
+ 0xC749: 1980,
+ 0xC751: 1981,
+ 0xC761: 1982,
+ 0xC762: 1983,
+ 0xC765: 1984,
+ 0xC769: 1985,
+ 0xC771: 1986,
+ 0xC773: 1987,
+ 0xC777: 1988,
+ 0xC7A1: 1989,
+ 0xC7A2: 1990,
+ 0xC7A5: 1991,
+ 0xC7A9: 1992,
+ 0xC7B1: 1993,
+ 0xC7B3: 1994,
+ 0xC7B5: 1995,
+ 0xC7B7: 1996,
+ 0xC861: 1997,
+ 0xC862: 1998,
+ 0xC865: 1999,
+ 0xC869: 2000,
+ 0xC86A: 2001,
+ 0xC871: 2002,
+ 0xC873: 2003,
+ 0xC875: 2004,
+ 0xC876: 2005,
+ 0xC877: 2006,
+ 0xC881: 2007,
+ 0xC882: 2008,
+ 0xC885: 2009,
+ 0xC889: 2010,
+ 0xC891: 2011,
+ 0xC893: 2012,
+ 0xC895: 2013,
+ 0xC896: 2014,
+ 0xC897: 2015,
+ 0xC8A1: 2016,
+ 0xC8B7: 2017,
+ 0xC8E1: 2018,
+ 0xC8E2: 2019,
+ 0xC8E5: 2020,
+ 0xC8E9: 2021,
+ 0xC8EB: 2022,
+ 0xC8F1: 2023,
+ 0xC8F3: 2024,
+ 0xC8F5: 2025,
+ 0xC8F6: 2026,
+ 0xC8F7: 2027,
+ 0xC941: 2028,
+ 0xC942: 2029,
+ 0xC945: 2030,
+ 0xC949: 2031,
+ 0xC951: 2032,
+ 0xC953: 2033,
+ 0xC955: 2034,
+ 0xC957: 2035,
+ 0xC961: 2036,
+ 0xC965: 2037,
+ 0xC976: 2038,
+ 0xC981: 2039,
+ 0xC985: 2040,
+ 0xC9A1: 2041,
+ 0xC9A2: 2042,
+ 0xC9A5: 2043,
+ 0xC9A9: 2044,
+ 0xC9B1: 2045,
+ 0xC9B3: 2046,
+ 0xC9B5: 2047,
+ 0xC9B7: 2048,
+ 0xC9BC: 2049,
+ 0xC9C1: 2050,
+ 0xC9C5: 2051,
+ 0xC9E1: 2052,
+ 0xCA41: 2053,
+ 0xCA45: 2054,
+ 0xCA55: 2055,
+ 0xCA57: 2056,
+ 0xCA61: 2057,
+ 0xCA81: 2058,
+ 0xCA82: 2059,
+ 0xCA85: 2060,
+ 0xCA89: 2061,
+ 0xCA91: 2062,
+ 0xCA93: 2063,
+ 0xCA95: 2064,
+ 0xCA97: 2065,
+ 0xCAA1: 2066,
+ 0xCAB6: 2067,
+ 0xCAC1: 2068,
+ 0xCAE1: 2069,
+ 0xCAE2: 2070,
+ 0xCAE5: 2071,
+ 0xCAE9: 2072,
+ 0xCAF1: 2073,
+ 0xCAF3: 2074,
+ 0xCAF7: 2075,
+ 0xCB41: 2076,
+ 0xCB45: 2077,
+ 0xCB49: 2078,
+ 0xCB51: 2079,
+ 0xCB57: 2080,
+ 0xCB61: 2081,
+ 0xCB62: 2082,
+ 0xCB65: 2083,
+ 0xCB68: 2084,
+ 0xCB69: 2085,
+ 0xCB6B: 2086,
+ 0xCB71: 2087,
+ 0xCB73: 2088,
+ 0xCB75: 2089,
+ 0xCB81: 2090,
+ 0xCB85: 2091,
+ 0xCB89: 2092,
+ 0xCB91: 2093,
+ 0xCB93: 2094,
+ 0xCBA1: 2095,
+ 0xCBA2: 2096,
+ 0xCBA5: 2097,
+ 0xCBA9: 2098,
+ 0xCBB1: 2099,
+ 0xCBB3: 2100,
+ 0xCBB5: 2101,
+ 0xCBB7: 2102,
+ 0xCC61: 2103,
+ 0xCC62: 2104,
+ 0xCC63: 2105,
+ 0xCC65: 2106,
+ 0xCC69: 2107,
+ 0xCC6B: 2108,
+ 0xCC71: 2109,
+ 0xCC73: 2110,
+ 0xCC75: 2111,
+ 0xCC76: 2112,
+ 0xCC77: 2113,
+ 0xCC7B: 2114,
+ 0xCC81: 2115,
+ 0xCC82: 2116,
+ 0xCC85: 2117,
+ 0xCC89: 2118,
+ 0xCC91: 2119,
+ 0xCC93: 2120,
+ 0xCC95: 2121,
+ 0xCC96: 2122,
+ 0xCC97: 2123,
+ 0xCCA1: 2124,
+ 0xCCA2: 2125,
+ 0xCCE1: 2126,
+ 0xCCE2: 2127,
+ 0xCCE5: 2128,
+ 0xCCE9: 2129,
+ 0xCCF1: 2130,
+ 0xCCF3: 2131,
+ 0xCCF5: 2132,
+ 0xCCF6: 2133,
+ 0xCCF7: 2134,
+ 0xCD41: 2135,
+ 0xCD42: 2136,
+ 0xCD45: 2137,
+ 0xCD49: 2138,
+ 0xCD51: 2139,
+ 0xCD53: 2140,
+ 0xCD55: 2141,
+ 0xCD57: 2142,
+ 0xCD61: 2143,
+ 0xCD65: 2144,
+ 0xCD69: 2145,
+ 0xCD71: 2146,
+ 0xCD73: 2147,
+ 0xCD76: 2148,
+ 0xCD77: 2149,
+ 0xCD81: 2150,
+ 0xCD89: 2151,
+ 0xCD93: 2152,
+ 0xCD95: 2153,
+ 0xCDA1: 2154,
+ 0xCDA2: 2155,
+ 0xCDA5: 2156,
+ 0xCDA9: 2157,
+ 0xCDB1: 2158,
+ 0xCDB3: 2159,
+ 0xCDB5: 2160,
+ 0xCDB7: 2161,
+ 0xCDC1: 2162,
+ 0xCDD7: 2163,
+ 0xCE41: 2164,
+ 0xCE45: 2165,
+ 0xCE61: 2166,
+ 0xCE65: 2167,
+ 0xCE69: 2168,
+ 0xCE73: 2169,
+ 0xCE75: 2170,
+ 0xCE81: 2171,
+ 0xCE82: 2172,
+ 0xCE85: 2173,
+ 0xCE88: 2174,
+ 0xCE89: 2175,
+ 0xCE8B: 2176,
+ 0xCE91: 2177,
+ 0xCE93: 2178,
+ 0xCE95: 2179,
+ 0xCE97: 2180,
+ 0xCEA1: 2181,
+ 0xCEB7: 2182,
+ 0xCEE1: 2183,
+ 0xCEE5: 2184,
+ 0xCEE9: 2185,
+ 0xCEF1: 2186,
+ 0xCEF5: 2187,
+ 0xCF41: 2188,
+ 0xCF45: 2189,
+ 0xCF49: 2190,
+ 0xCF51: 2191,
+ 0xCF55: 2192,
+ 0xCF57: 2193,
+ 0xCF61: 2194,
+ 0xCF65: 2195,
+ 0xCF69: 2196,
+ 0xCF71: 2197,
+ 0xCF73: 2198,
+ 0xCF75: 2199,
+ 0xCFA1: 2200,
+ 0xCFA2: 2201,
+ 0xCFA5: 2202,
+ 0xCFA9: 2203,
+ 0xCFB1: 2204,
+ 0xCFB3: 2205,
+ 0xCFB5: 2206,
+ 0xCFB7: 2207,
+ 0xD061: 2208,
+ 0xD062: 2209,
+ 0xD065: 2210,
+ 0xD069: 2211,
+ 0xD06E: 2212,
+ 0xD071: 2213,
+ 0xD073: 2214,
+ 0xD075: 2215,
+ 0xD077: 2216,
+ 0xD081: 2217,
+ 0xD082: 2218,
+ 0xD085: 2219,
+ 0xD089: 2220,
+ 0xD091: 2221,
+ 0xD093: 2222,
+ 0xD095: 2223,
+ 0xD096: 2224,
+ 0xD097: 2225,
+ 0xD0A1: 2226,
+ 0xD0B7: 2227,
+ 0xD0E1: 2228,
+ 0xD0E2: 2229,
+ 0xD0E5: 2230,
+ 0xD0E9: 2231,
+ 0xD0EB: 2232,
+ 0xD0F1: 2233,
+ 0xD0F3: 2234,
+ 0xD0F5: 2235,
+ 0xD0F7: 2236,
+ 0xD141: 2237,
+ 0xD142: 2238,
+ 0xD145: 2239,
+ 0xD149: 2240,
+ 0xD151: 2241,
+ 0xD153: 2242,
+ 0xD155: 2243,
+ 0xD157: 2244,
+ 0xD161: 2245,
+ 0xD162: 2246,
+ 0xD165: 2247,
+ 0xD169: 2248,
+ 0xD171: 2249,
+ 0xD173: 2250,
+ 0xD175: 2251,
+ 0xD176: 2252,
+ 0xD177: 2253,
+ 0xD181: 2254,
+ 0xD185: 2255,
+ 0xD189: 2256,
+ 0xD193: 2257,
+ 0xD1A1: 2258,
+ 0xD1A2: 2259,
+ 0xD1A5: 2260,
+ 0xD1A9: 2261,
+ 0xD1AE: 2262,
+ 0xD1B1: 2263,
+ 0xD1B3: 2264,
+ 0xD1B5: 2265,
+ 0xD1B7: 2266,
+ 0xD1BB: 2267,
+ 0xD1C1: 2268,
+ 0xD1C2: 2269,
+ 0xD1C5: 2270,
+ 0xD1C9: 2271,
+ 0xD1D5: 2272,
+ 0xD1D7: 2273,
+ 0xD1E1: 2274,
+ 0xD1E2: 2275,
+ 0xD1E5: 2276,
+ 0xD1F5: 2277,
+ 0xD1F7: 2278,
+ 0xD241: 2279,
+ 0xD242: 2280,
+ 0xD245: 2281,
+ 0xD249: 2282,
+ 0xD253: 2283,
+ 0xD255: 2284,
+ 0xD257: 2285,
+ 0xD261: 2286,
+ 0xD265: 2287,
+ 0xD269: 2288,
+ 0xD273: 2289,
+ 0xD275: 2290,
+ 0xD281: 2291,
+ 0xD282: 2292,
+ 0xD285: 2293,
+ 0xD289: 2294,
+ 0xD28E: 2295,
+ 0xD291: 2296,
+ 0xD295: 2297,
+ 0xD297: 2298,
+ 0xD2A1: 2299,
+ 0xD2A5: 2300,
+ 0xD2A9: 2301,
+ 0xD2B1: 2302,
+ 0xD2B7: 2303,
+ 0xD2C1: 2304,
+ 0xD2C2: 2305,
+ 0xD2C5: 2306,
+ 0xD2C9: 2307,
+ 0xD2D7: 2308,
+ 0xD2E1: 2309,
+ 0xD2E2: 2310,
+ 0xD2E5: 2311,
+ 0xD2E9: 2312,
+ 0xD2F1: 2313,
+ 0xD2F3: 2314,
+ 0xD2F5: 2315,
+ 0xD2F7: 2316,
+ 0xD341: 2317,
+ 0xD342: 2318,
+ 0xD345: 2319,
+ 0xD349: 2320,
+ 0xD351: 2321,
+ 0xD355: 2322,
+ 0xD357: 2323,
+ 0xD361: 2324,
+ 0xD362: 2325,
+ 0xD365: 2326,
+ 0xD367: 2327,
+ 0xD368: 2328,
+ 0xD369: 2329,
+ 0xD36A: 2330,
+ 0xD371: 2331,
+ 0xD373: 2332,
+ 0xD375: 2333,
+ 0xD377: 2334,
+ 0xD37B: 2335,
+ 0xD381: 2336,
+ 0xD385: 2337,
+ 0xD389: 2338,
+ 0xD391: 2339,
+ 0xD393: 2340,
+ 0xD397: 2341,
+ 0xD3A1: 2342,
+ 0xD3A2: 2343,
+ 0xD3A5: 2344,
+ 0xD3A9: 2345,
+ 0xD3B1: 2346,
+ 0xD3B3: 2347,
+ 0xD3B5: 2348,
+ 0xD3B7: 2349,
+}
diff --git a/libs/chardet/compat.py b/libs/chardet/johabprober.py
index 8941572b3..6f359d193 100644
--- a/libs/chardet/compat.py
+++ b/libs/chardet/johabprober.py
@@ -1,7 +1,13 @@
######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
# Contributor(s):
-# Dan Blanchard
-# Ian Cordasco
+# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -19,18 +25,23 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
-import sys
+from .chardistribution import JOHABDistributionAnalysis
+from .codingstatemachine import CodingStateMachine
+from .mbcharsetprober import MultiByteCharSetProber
+from .mbcssm import JOHAB_SM_MODEL
+
+
+class JOHABProber(MultiByteCharSetProber):
+ def __init__(self):
+ super().__init__()
+ self.coding_sm = CodingStateMachine(JOHAB_SM_MODEL)
+ self.distribution_analyzer = JOHABDistributionAnalysis()
+ self.reset()
+ @property
+ def charset_name(self):
+ return "Johab"
-if sys.version_info < (3, 0):
- PY2 = True
- PY3 = False
- string_types = (str, unicode)
- text_type = unicode
- iteritems = dict.iteritems
-else:
- PY2 = False
- PY3 = True
- string_types = (bytes, str)
- text_type = str
- iteritems = dict.items
+ @property
+ def language(self):
+ return "Korean"
diff --git a/libs/chardet/jpcntx.py b/libs/chardet/jpcntx.py
index 20044e4bc..7a8e5be06 100644
--- a/libs/chardet/jpcntx.py
+++ b/libs/chardet/jpcntx.py
@@ -27,93 +27,96 @@
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
-jp2CharContext = (
-(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),
-(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),
-(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),
-(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
-(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
-(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
-(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),
-(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),
-(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),
-(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),
-(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),
-(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),
-(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),
-(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),
-(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),
-(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),
-(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),
-(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),
-(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),
-(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),
-(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),
-(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),
-(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),
-(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),
-(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),
-(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),
-(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),
-(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),
-(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),
-(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),
-(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),
-(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),
-(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),
-(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),
-(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),
-(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),
-(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),
-(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),
-(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),
-(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),
-(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),
-(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),
-(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),
-(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),
-(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),
-(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),
-(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),
-(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),
-(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),
-(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),
-(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),
-(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),
-(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),
-(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),
-(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),
-(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),
-(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),
-(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),
-(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),
-(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),
-(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),
-(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),
-(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),
-(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),
-(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),
-(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),
-(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),
-(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),
-(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),
-(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),
-(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),
-(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),
-(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),
-(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
-(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),
-(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),
-(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),
-(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
+# fmt: off
+jp2_char_context = (
+ (0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
+ (2, 4, 0, 4, 0, 3, 0, 4, 0, 3, 4, 4, 4, 2, 4, 3, 3, 4, 3, 2, 3, 3, 4, 2, 3, 3, 3, 2, 4, 1, 4, 3, 3, 1, 5, 4, 3, 4, 3, 4, 3, 5, 3, 0, 3, 5, 4, 2, 0, 3, 1, 0, 3, 3, 0, 3, 3, 0, 1, 1, 0, 4, 3, 0, 3, 3, 0, 4, 0, 2, 0, 3, 5, 5, 5, 5, 4, 0, 4, 1, 0, 3, 4),
+ (0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2),
+ (0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 4, 4, 3, 5, 3, 5, 1, 5, 3, 4, 3, 4, 4, 3, 4, 3, 3, 4, 3, 5, 4, 4, 3, 5, 5, 3, 5, 5, 5, 3, 5, 5, 3, 4, 5, 5, 3, 1, 3, 2, 0, 3, 4, 0, 4, 2, 0, 4, 2, 1, 5, 3, 2, 3, 5, 0, 4, 0, 2, 0, 5, 4, 4, 5, 4, 5, 0, 4, 0, 0, 4, 4),
+ (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ (0, 3, 0, 4, 0, 3, 0, 3, 0, 4, 5, 4, 3, 3, 3, 3, 4, 3, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 4, 4, 4, 4, 5, 3, 4, 4, 3, 4, 5, 5, 4, 5, 5, 1, 4, 5, 4, 3, 0, 3, 3, 1, 3, 3, 0, 4, 4, 0, 3, 3, 1, 5, 3, 3, 3, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 0, 4, 1, 1, 3, 4),
+ (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ (0, 4, 0, 3, 0, 3, 0, 4, 0, 3, 4, 4, 3, 2, 2, 1, 2, 1, 3, 1, 3, 3, 3, 3, 3, 4, 3, 1, 3, 3, 5, 3, 3, 0, 4, 3, 0, 5, 4, 3, 3, 5, 4, 4, 3, 4, 4, 5, 0, 1, 2, 0, 1, 2, 0, 2, 2, 0, 1, 0, 0, 5, 2, 2, 1, 4, 0, 3, 0, 1, 0, 4, 4, 3, 5, 4, 3, 0, 2, 1, 0, 4, 3),
+ (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ (0, 3, 0, 5, 0, 4, 0, 2, 1, 4, 4, 2, 4, 1, 4, 2, 4, 2, 4, 3, 3, 3, 4, 3, 3, 3, 3, 1, 4, 2, 3, 3, 3, 1, 4, 4, 1, 1, 1, 4, 3, 3, 2, 0, 2, 4, 3, 2, 0, 3, 3, 0, 3, 1, 1, 0, 0, 0, 3, 3, 0, 4, 2, 2, 3, 4, 0, 4, 0, 3, 0, 4, 4, 5, 3, 4, 4, 0, 3, 0, 0, 1, 4),
+ (1, 4, 0, 4, 0, 4, 0, 4, 0, 3, 5, 4, 4, 3, 4, 3, 5, 4, 3, 3, 4, 3, 5, 4, 4, 4, 4, 3, 4, 2, 4, 3, 3, 1, 5, 4, 3, 2, 4, 5, 4, 5, 5, 4, 4, 5, 4, 4, 0, 3, 2, 2, 3, 3, 0, 4, 3, 1, 3, 2, 1, 4, 3, 3, 4, 5, 0, 3, 0, 2, 0, 4, 5, 5, 4, 5, 4, 0, 4, 0, 0, 5, 4),
+ (0, 5, 0, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 3, 4, 0, 4, 4, 4, 3, 4, 3, 4, 3, 3, 1, 4, 2, 4, 3, 4, 0, 5, 4, 1, 4, 5, 4, 4, 5, 3, 2, 4, 3, 4, 3, 2, 4, 1, 3, 3, 3, 2, 3, 2, 0, 4, 3, 3, 4, 3, 3, 3, 4, 0, 4, 0, 3, 0, 4, 5, 4, 4, 4, 3, 0, 4, 1, 0, 1, 3),
+ (0, 3, 1, 4, 0, 3, 0, 2, 0, 3, 4, 4, 3, 1, 4, 2, 3, 3, 4, 3, 4, 3, 4, 3, 4, 4, 3, 2, 3, 1, 5, 4, 4, 1, 4, 4, 3, 5, 4, 4, 3, 5, 5, 4, 3, 4, 4, 3, 1, 2, 3, 1, 2, 2, 0, 3, 2, 0, 3, 1, 0, 5, 3, 3, 3, 4, 3, 3, 3, 3, 4, 4, 4, 4, 5, 4, 2, 0, 3, 3, 2, 4, 3),
+ (0, 2, 0, 3, 0, 1, 0, 1, 0, 0, 3, 2, 0, 0, 2, 0, 1, 0, 2, 1, 3, 3, 3, 1, 2, 3, 1, 0, 1, 0, 4, 2, 1, 1, 3, 3, 0, 4, 3, 3, 1, 4, 3, 3, 0, 3, 3, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 4, 1, 0, 2, 3, 2, 2, 2, 1, 3, 3, 3, 4, 4, 3, 2, 0, 3, 1, 0, 3, 3),
+ (0, 4, 0, 4, 0, 3, 0, 3, 0, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 3, 4, 2, 4, 3, 4, 3, 3, 2, 4, 3, 4, 5, 4, 1, 4, 5, 3, 5, 4, 5, 3, 5, 4, 0, 3, 5, 5, 3, 1, 3, 3, 2, 2, 3, 0, 3, 4, 1, 3, 3, 2, 4, 3, 3, 3, 4, 0, 4, 0, 3, 0, 4, 5, 4, 4, 5, 3, 0, 4, 1, 0, 3, 4),
+ (0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 2, 2, 1, 0, 1, 0, 0, 0, 3, 0, 3, 0, 3, 0, 1, 3, 1, 0, 3, 1, 3, 3, 3, 1, 3, 3, 3, 0, 1, 3, 1, 3, 4, 0, 0, 3, 1, 1, 0, 3, 2, 0, 0, 0, 0, 1, 3, 0, 1, 0, 0, 3, 3, 2, 0, 3, 0, 0, 0, 0, 0, 3, 4, 3, 4, 3, 3, 0, 3, 0, 0, 2, 3),
+ (2, 3, 0, 3, 0, 2, 0, 1, 0, 3, 3, 4, 3, 1, 3, 1, 1, 1, 3, 1, 4, 3, 4, 3, 3, 3, 0, 0, 3, 1, 5, 4, 3, 1, 4, 3, 2, 5, 5, 4, 4, 4, 4, 3, 3, 4, 4, 4, 0, 2, 1, 1, 3, 2, 0, 1, 2, 0, 0, 1, 0, 4, 1, 3, 3, 3, 0, 3, 0, 1, 0, 4, 4, 4, 5, 5, 3, 0, 2, 0, 0, 4, 4),
+ (0, 2, 0, 1, 0, 3, 1, 3, 0, 2, 3, 3, 3, 0, 3, 1, 0, 0, 3, 0, 3, 2, 3, 1, 3, 2, 1, 1, 0, 0, 4, 2, 1, 0, 2, 3, 1, 4, 3, 2, 0, 4, 4, 3, 1, 3, 1, 3, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 1, 1, 1, 2, 0, 3, 0, 0, 0, 3, 4, 2, 4, 3, 2, 0, 1, 0, 0, 3, 3),
+ (0, 1, 0, 4, 0, 5, 0, 4, 0, 2, 4, 4, 2, 3, 3, 2, 3, 3, 5, 3, 3, 3, 4, 3, 4, 2, 3, 0, 4, 3, 3, 3, 4, 1, 4, 3, 2, 1, 5, 5, 3, 4, 5, 1, 3, 5, 4, 2, 0, 3, 3, 0, 1, 3, 0, 4, 2, 0, 1, 3, 1, 4, 3, 3, 3, 3, 0, 3, 0, 1, 0, 3, 4, 4, 4, 5, 5, 0, 3, 0, 1, 4, 5),
+ (0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 3, 1, 3, 0, 4, 0, 1, 1, 3, 0, 3, 4, 3, 2, 3, 1, 0, 3, 3, 2, 3, 1, 3, 0, 2, 3, 0, 2, 1, 4, 1, 2, 2, 0, 0, 3, 3, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 3, 2, 1, 3, 3, 0, 2, 0, 2, 0, 0, 3, 3, 1, 2, 4, 0, 3, 0, 2, 2, 3),
+ (2, 4, 0, 5, 0, 4, 0, 4, 0, 2, 4, 4, 4, 3, 4, 3, 3, 3, 1, 2, 4, 3, 4, 3, 4, 4, 5, 0, 3, 3, 3, 3, 2, 0, 4, 3, 1, 4, 3, 4, 1, 4, 4, 3, 3, 4, 4, 3, 1, 2, 3, 0, 4, 2, 0, 4, 1, 0, 3, 3, 0, 4, 3, 3, 3, 4, 0, 4, 0, 2, 0, 3, 5, 3, 4, 5, 2, 0, 3, 0, 0, 4, 5),
+ (0, 3, 0, 4, 0, 1, 0, 1, 0, 1, 3, 2, 2, 1, 3, 0, 3, 0, 2, 0, 2, 0, 3, 0, 2, 0, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 0, 0, 4, 0, 3, 1, 0, 2, 1, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 2, 2, 3, 1, 0, 3, 0, 0, 0, 1, 4, 4, 4, 3, 0, 0, 4, 0, 0, 1, 4),
+ (1, 4, 1, 5, 0, 3, 0, 3, 0, 4, 5, 4, 4, 3, 5, 3, 3, 4, 4, 3, 4, 1, 3, 3, 3, 3, 2, 1, 4, 1, 5, 4, 3, 1, 4, 4, 3, 5, 4, 4, 3, 5, 4, 3, 3, 4, 4, 4, 0, 3, 3, 1, 2, 3, 0, 3, 1, 0, 3, 3, 0, 5, 4, 4, 4, 4, 4, 4, 3, 3, 5, 4, 4, 3, 3, 5, 4, 0, 3, 2, 0, 4, 4),
+ (0, 2, 0, 3, 0, 1, 0, 0, 0, 1, 3, 3, 3, 2, 4, 1, 3, 0, 3, 1, 3, 0, 2, 2, 1, 1, 0, 0, 2, 0, 4, 3, 1, 0, 4, 3, 0, 4, 4, 4, 1, 4, 3, 1, 1, 3, 3, 1, 0, 2, 0, 0, 1, 3, 0, 0, 0, 0, 2, 0, 0, 4, 3, 2, 4, 3, 5, 4, 3, 3, 3, 4, 3, 3, 4, 3, 3, 0, 2, 1, 0, 3, 3),
+ (0, 2, 0, 4, 0, 3, 0, 2, 0, 2, 5, 5, 3, 4, 4, 4, 4, 1, 4, 3, 3, 0, 4, 3, 4, 3, 1, 3, 3, 2, 4, 3, 0, 3, 4, 3, 0, 3, 4, 4, 2, 4, 4, 0, 4, 5, 3, 3, 2, 2, 1, 1, 1, 2, 0, 1, 5, 0, 3, 3, 2, 4, 3, 3, 3, 4, 0, 3, 0, 2, 0, 4, 4, 3, 5, 5, 0, 0, 3, 0, 2, 3, 3),
+ (0, 3, 0, 4, 0, 3, 0, 1, 0, 3, 4, 3, 3, 1, 3, 3, 3, 0, 3, 1, 3, 0, 4, 3, 3, 1, 1, 0, 3, 0, 3, 3, 0, 0, 4, 4, 0, 1, 5, 4, 3, 3, 5, 0, 3, 3, 4, 3, 0, 2, 0, 1, 1, 1, 0, 1, 3, 0, 1, 2, 1, 3, 3, 2, 3, 3, 0, 3, 0, 1, 0, 1, 3, 3, 4, 4, 1, 0, 1, 2, 2, 1, 3),
+ (0, 1, 0, 4, 0, 4, 0, 3, 0, 1, 3, 3, 3, 2, 3, 1, 1, 0, 3, 0, 3, 3, 4, 3, 2, 4, 2, 0, 1, 0, 4, 3, 2, 0, 4, 3, 0, 5, 3, 3, 2, 4, 4, 4, 3, 3, 3, 4, 0, 1, 3, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 4, 2, 3, 3, 3, 0, 3, 0, 0, 0, 4, 4, 4, 5, 3, 2, 0, 3, 3, 0, 3, 5),
+ (0, 2, 0, 3, 0, 0, 0, 3, 0, 1, 3, 0, 2, 0, 0, 0, 1, 0, 3, 1, 1, 3, 3, 0, 0, 3, 0, 0, 3, 0, 2, 3, 1, 0, 3, 1, 0, 3, 3, 2, 0, 4, 2, 2, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 0, 1, 0, 1, 0, 0, 0, 1, 3, 1, 2, 0, 0, 0, 1, 0, 0, 1, 4),
+ (0, 3, 0, 3, 0, 5, 0, 1, 0, 2, 4, 3, 1, 3, 3, 2, 1, 1, 5, 2, 1, 0, 5, 1, 2, 0, 0, 0, 3, 3, 2, 2, 3, 2, 4, 3, 0, 0, 3, 3, 1, 3, 3, 0, 2, 5, 3, 4, 0, 3, 3, 0, 1, 2, 0, 2, 2, 0, 3, 2, 0, 2, 2, 3, 3, 3, 0, 2, 0, 1, 0, 3, 4, 4, 2, 5, 4, 0, 3, 0, 0, 3, 5),
+ (0, 3, 0, 3, 0, 3, 0, 1, 0, 3, 3, 3, 3, 0, 3, 0, 2, 0, 2, 1, 1, 0, 2, 0, 1, 0, 0, 0, 2, 1, 0, 0, 1, 0, 3, 2, 0, 0, 3, 3, 1, 2, 3, 1, 0, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 3, 1, 2, 3, 0, 3, 0, 1, 0, 3, 2, 1, 0, 4, 3, 0, 1, 1, 0, 3, 3),
+ (0, 4, 0, 5, 0, 3, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 4, 3, 5, 3, 3, 2, 5, 3, 4, 4, 4, 3, 4, 3, 4, 5, 5, 3, 4, 4, 3, 4, 4, 5, 4, 4, 4, 3, 4, 5, 5, 4, 2, 3, 4, 2, 3, 4, 0, 3, 3, 1, 4, 3, 2, 4, 3, 3, 5, 5, 0, 3, 0, 3, 0, 5, 5, 5, 5, 4, 4, 0, 4, 0, 1, 4, 4),
+ (0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 5, 4, 4, 2, 3, 2, 5, 1, 3, 2, 5, 1, 4, 2, 3, 2, 3, 3, 4, 3, 3, 3, 3, 2, 5, 4, 1, 3, 3, 5, 3, 4, 4, 0, 4, 4, 3, 1, 1, 3, 1, 0, 2, 3, 0, 2, 3, 0, 3, 0, 0, 4, 3, 1, 3, 4, 0, 3, 0, 2, 0, 4, 4, 4, 3, 4, 5, 0, 4, 0, 0, 3, 4),
+ (0, 3, 0, 3, 0, 3, 1, 2, 0, 3, 4, 4, 3, 3, 3, 0, 2, 2, 4, 3, 3, 1, 3, 3, 3, 1, 1, 0, 3, 1, 4, 3, 2, 3, 4, 4, 2, 4, 4, 4, 3, 4, 4, 3, 2, 4, 4, 3, 1, 3, 3, 1, 3, 3, 0, 4, 1, 0, 2, 2, 1, 4, 3, 2, 3, 3, 5, 4, 3, 3, 5, 4, 4, 3, 3, 0, 4, 0, 3, 2, 2, 4, 4),
+ (0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 2, 1, 3, 0, 0, 0, 0, 0, 2, 0, 1, 2, 1, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 1, 0, 1, 1, 3, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0, 3, 4, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1),
+ (0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 4, 1, 4, 0, 3, 0, 4, 0, 3, 0, 4, 0, 3, 0, 3, 0, 4, 1, 5, 1, 4, 0, 0, 3, 0, 5, 0, 5, 2, 0, 1, 0, 0, 0, 2, 1, 4, 0, 1, 3, 0, 0, 3, 0, 0, 3, 1, 1, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0),
+ (1, 4, 0, 5, 0, 3, 0, 2, 0, 3, 5, 4, 4, 3, 4, 3, 5, 3, 4, 3, 3, 0, 4, 3, 3, 3, 3, 3, 3, 2, 4, 4, 3, 1, 3, 4, 4, 5, 4, 4, 3, 4, 4, 1, 3, 5, 4, 3, 3, 3, 1, 2, 2, 3, 3, 1, 3, 1, 3, 3, 3, 5, 3, 3, 4, 5, 0, 3, 0, 3, 0, 3, 4, 3, 4, 4, 3, 0, 3, 0, 2, 4, 3),
+ (0, 1, 0, 4, 0, 0, 0, 0, 0, 1, 4, 0, 4, 1, 4, 2, 4, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 2, 0, 3, 1, 1, 1, 0, 3, 0, 0, 0, 1, 2, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 3, 2, 0, 2, 2, 0, 1, 0, 0, 0, 2, 3, 2, 3, 3, 0, 0, 0, 0, 2, 1, 0),
+ (0, 5, 1, 5, 0, 3, 0, 3, 0, 5, 4, 4, 5, 1, 5, 3, 3, 0, 4, 3, 4, 3, 5, 3, 4, 3, 3, 2, 4, 3, 4, 3, 3, 0, 3, 3, 1, 4, 4, 3, 4, 4, 4, 3, 4, 5, 5, 3, 2, 3, 1, 1, 3, 3, 1, 3, 1, 1, 3, 3, 2, 4, 5, 3, 3, 5, 0, 4, 0, 3, 0, 4, 4, 3, 5, 3, 3, 0, 3, 4, 0, 4, 3),
+ (0, 5, 0, 5, 0, 3, 0, 2, 0, 4, 4, 3, 5, 2, 4, 3, 3, 3, 4, 4, 4, 3, 5, 3, 5, 3, 3, 1, 4, 0, 4, 3, 3, 0, 3, 3, 0, 4, 4, 4, 4, 5, 4, 3, 3, 5, 5, 3, 2, 3, 1, 2, 3, 2, 0, 1, 0, 0, 3, 2, 2, 4, 4, 3, 1, 5, 0, 4, 0, 3, 0, 4, 3, 1, 3, 2, 1, 0, 3, 3, 0, 3, 3),
+ (0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 5, 5, 3, 4, 3, 3, 2, 5, 4, 4, 3, 5, 3, 5, 3, 4, 0, 4, 3, 4, 4, 3, 2, 4, 4, 3, 4, 5, 4, 4, 5, 5, 0, 3, 5, 5, 4, 1, 3, 3, 2, 3, 3, 1, 3, 1, 0, 4, 3, 1, 4, 4, 3, 4, 5, 0, 4, 0, 2, 0, 4, 3, 4, 4, 3, 3, 0, 4, 0, 0, 5, 5),
+ (0, 4, 0, 4, 0, 5, 0, 1, 1, 3, 3, 4, 4, 3, 4, 1, 3, 0, 5, 1, 3, 0, 3, 1, 3, 1, 1, 0, 3, 0, 3, 3, 4, 0, 4, 3, 0, 4, 4, 4, 3, 4, 4, 0, 3, 5, 4, 1, 0, 3, 0, 0, 2, 3, 0, 3, 1, 0, 3, 1, 0, 3, 2, 1, 3, 5, 0, 3, 0, 1, 0, 3, 2, 3, 3, 4, 4, 0, 2, 2, 0, 4, 4),
+ (2, 4, 0, 5, 0, 4, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 5, 3, 5, 3, 5, 2, 5, 3, 4, 3, 3, 4, 3, 4, 5, 3, 2, 1, 5, 4, 3, 2, 3, 4, 5, 3, 4, 1, 2, 5, 4, 3, 0, 3, 3, 0, 3, 2, 0, 2, 3, 0, 4, 1, 0, 3, 4, 3, 3, 5, 0, 3, 0, 1, 0, 4, 5, 5, 5, 4, 3, 0, 4, 2, 0, 3, 5),
+ (0, 5, 0, 4, 0, 4, 0, 2, 0, 5, 4, 3, 4, 3, 4, 3, 3, 3, 4, 3, 4, 2, 5, 3, 5, 3, 4, 1, 4, 3, 4, 4, 4, 0, 3, 5, 0, 4, 4, 4, 4, 5, 3, 1, 3, 4, 5, 3, 3, 3, 3, 3, 3, 3, 0, 2, 2, 0, 3, 3, 2, 4, 3, 3, 3, 5, 3, 4, 1, 3, 3, 5, 3, 2, 0, 0, 0, 0, 4, 3, 1, 3, 3),
+ (0, 1, 0, 3, 0, 3, 0, 1, 0, 1, 3, 3, 3, 2, 3, 3, 3, 0, 3, 0, 0, 0, 3, 1, 3, 0, 0, 0, 2, 2, 2, 3, 0, 0, 3, 2, 0, 1, 2, 4, 1, 3, 3, 0, 0, 3, 3, 3, 0, 1, 0, 0, 2, 1, 0, 0, 3, 0, 3, 1, 0, 3, 0, 0, 1, 3, 0, 2, 0, 1, 0, 3, 3, 1, 3, 3, 0, 0, 1, 1, 0, 3, 3),
+ (0, 2, 0, 3, 0, 2, 1, 4, 0, 2, 2, 3, 1, 1, 3, 1, 1, 0, 2, 0, 3, 1, 2, 3, 1, 3, 0, 0, 1, 0, 4, 3, 2, 3, 3, 3, 1, 4, 2, 3, 3, 3, 3, 1, 0, 3, 1, 4, 0, 1, 1, 0, 1, 2, 0, 1, 1, 0, 1, 1, 0, 3, 1, 3, 2, 2, 0, 1, 0, 0, 0, 2, 3, 3, 3, 1, 0, 0, 0, 0, 0, 2, 3),
+ (0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 5, 5, 3, 3, 4, 3, 3, 1, 5, 4, 4, 2, 4, 4, 4, 3, 4, 2, 4, 3, 5, 5, 4, 3, 3, 4, 3, 3, 5, 5, 4, 5, 5, 1, 3, 4, 5, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3, 1, 4, 5, 3, 3, 5, 0, 4, 0, 3, 0, 5, 3, 3, 1, 4, 3, 0, 4, 0, 1, 5, 3),
+ (0, 5, 0, 5, 0, 4, 0, 2, 0, 4, 4, 3, 4, 3, 3, 3, 3, 3, 5, 4, 4, 4, 4, 4, 4, 5, 3, 3, 5, 2, 4, 4, 4, 3, 4, 4, 3, 3, 4, 4, 5, 5, 3, 3, 4, 3, 4, 3, 3, 4, 3, 3, 3, 3, 1, 2, 2, 1, 4, 3, 3, 5, 4, 4, 3, 4, 0, 4, 0, 3, 0, 4, 4, 4, 4, 4, 1, 0, 4, 2, 0, 2, 4),
+ (0, 4, 0, 4, 0, 3, 0, 1, 0, 3, 5, 2, 3, 0, 3, 0, 2, 1, 4, 2, 3, 3, 4, 1, 4, 3, 3, 2, 4, 1, 3, 3, 3, 0, 3, 3, 0, 0, 3, 3, 3, 5, 3, 3, 3, 3, 3, 2, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 1, 0, 0, 3, 1, 2, 2, 3, 0, 3, 0, 2, 0, 4, 4, 3, 3, 4, 1, 0, 3, 0, 0, 2, 4),
+ (0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 2, 0, 1, 0, 0, 0, 0, 0, 3, 1, 3, 0, 3, 2, 0, 0, 0, 1, 0, 3, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 0, 2, 0, 0, 0, 0, 0, 0, 2),
+ (0, 2, 1, 3, 0, 2, 0, 2, 0, 3, 3, 3, 3, 1, 3, 1, 3, 3, 3, 3, 3, 3, 4, 2, 2, 1, 2, 1, 4, 0, 4, 3, 1, 3, 3, 3, 2, 4, 3, 5, 4, 3, 3, 3, 3, 3, 3, 3, 0, 1, 3, 0, 2, 0, 0, 1, 0, 0, 1, 0, 0, 4, 2, 0, 2, 3, 0, 3, 3, 0, 3, 3, 4, 2, 3, 1, 4, 0, 1, 2, 0, 2, 3),
+ (0, 3, 0, 3, 0, 1, 0, 3, 0, 2, 3, 3, 3, 0, 3, 1, 2, 0, 3, 3, 2, 3, 3, 2, 3, 2, 3, 1, 3, 0, 4, 3, 2, 0, 3, 3, 1, 4, 3, 3, 2, 3, 4, 3, 1, 3, 3, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 4, 1, 1, 0, 3, 0, 3, 1, 0, 2, 3, 3, 3, 3, 3, 1, 0, 0, 2, 0, 3, 3),
+ (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 3, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 3),
+ (0, 2, 0, 3, 1, 3, 0, 3, 0, 2, 3, 3, 3, 1, 3, 1, 3, 1, 3, 1, 3, 3, 3, 1, 3, 0, 2, 3, 1, 1, 4, 3, 3, 2, 3, 3, 1, 2, 2, 4, 1, 3, 3, 0, 1, 4, 2, 3, 0, 1, 3, 0, 3, 0, 0, 1, 3, 0, 2, 0, 0, 3, 3, 2, 1, 3, 0, 3, 0, 2, 0, 3, 4, 4, 4, 3, 1, 0, 3, 0, 0, 3, 3),
+ (0, 2, 0, 1, 0, 2, 0, 0, 0, 1, 3, 2, 2, 1, 3, 0, 1, 1, 3, 0, 3, 2, 3, 1, 2, 0, 2, 0, 1, 1, 3, 3, 3, 0, 3, 3, 1, 1, 2, 3, 2, 3, 3, 1, 2, 3, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 2, 1, 2, 1, 3, 0, 3, 0, 0, 0, 3, 4, 4, 4, 3, 2, 0, 2, 0, 0, 2, 4),
+ (0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 3),
+ (0, 3, 0, 3, 0, 2, 0, 3, 0, 3, 3, 3, 2, 3, 2, 2, 2, 0, 3, 1, 3, 3, 3, 2, 3, 3, 0, 0, 3, 0, 3, 2, 2, 0, 2, 3, 1, 4, 3, 4, 3, 3, 2, 3, 1, 5, 4, 4, 0, 3, 1, 2, 1, 3, 0, 3, 1, 1, 2, 0, 2, 3, 1, 3, 1, 3, 0, 3, 0, 1, 0, 3, 3, 4, 4, 2, 1, 0, 2, 1, 0, 2, 4),
+ (0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 4, 2, 5, 1, 4, 0, 2, 0, 2, 1, 3, 1, 4, 0, 2, 1, 0, 0, 2, 1, 4, 1, 1, 0, 3, 3, 0, 5, 1, 3, 2, 3, 3, 1, 0, 3, 2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 1, 0, 3, 0, 2, 0, 1, 0, 3, 3, 3, 4, 3, 3, 0, 0, 0, 0, 2, 3),
+ (0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0, 3),
+ (0, 1, 0, 3, 0, 4, 0, 3, 0, 2, 4, 3, 1, 0, 3, 2, 2, 1, 3, 1, 2, 2, 3, 1, 1, 1, 2, 1, 3, 0, 1, 2, 0, 1, 3, 2, 1, 3, 0, 5, 5, 1, 0, 0, 1, 3, 2, 1, 0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 3, 4, 0, 1, 1, 1, 3, 2, 0, 2, 0, 1, 0, 2, 3, 3, 1, 2, 3, 0, 1, 0, 1, 0, 4),
+ (0, 0, 0, 1, 0, 3, 0, 3, 0, 2, 2, 1, 0, 0, 4, 0, 3, 0, 3, 1, 3, 0, 3, 0, 3, 0, 1, 0, 3, 0, 3, 1, 3, 0, 3, 3, 0, 0, 1, 2, 1, 1, 1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 1, 2, 0, 0, 2, 0, 0, 0, 0, 2, 3, 3, 3, 3, 0, 0, 0, 0, 1, 4),
+ (0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 2, 0, 2, 3, 0, 0, 2, 2, 3, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 2, 0, 0, 0, 0, 2, 3),
+ (2, 4, 0, 5, 0, 5, 0, 4, 0, 3, 4, 3, 3, 3, 4, 3, 3, 3, 4, 3, 4, 4, 5, 4, 5, 5, 5, 2, 3, 0, 5, 5, 4, 1, 5, 4, 3, 1, 5, 4, 3, 4, 4, 3, 3, 4, 3, 3, 0, 3, 2, 0, 2, 3, 0, 3, 0, 0, 3, 3, 0, 5, 3, 2, 3, 3, 0, 3, 0, 3, 0, 3, 4, 5, 4, 5, 3, 0, 4, 3, 0, 3, 4),
+ (0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 3, 4, 3, 2, 3, 2, 3, 0, 4, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 2, 4, 3, 3, 1, 3, 4, 3, 4, 4, 4, 3, 4, 4, 3, 2, 4, 4, 1, 0, 2, 0, 0, 1, 1, 0, 2, 0, 0, 3, 1, 0, 5, 3, 2, 1, 3, 0, 3, 0, 1, 2, 4, 3, 2, 4, 3, 3, 0, 3, 2, 0, 4, 4),
+ (0, 3, 0, 3, 0, 1, 0, 0, 0, 1, 4, 3, 3, 2, 3, 1, 3, 1, 4, 2, 3, 2, 4, 2, 3, 4, 3, 0, 2, 2, 3, 3, 3, 0, 3, 3, 3, 0, 3, 4, 1, 3, 3, 0, 3, 4, 3, 3, 0, 1, 1, 0, 1, 0, 0, 0, 4, 0, 3, 0, 0, 3, 1, 2, 1, 3, 0, 4, 0, 1, 0, 4, 3, 3, 4, 3, 3, 0, 2, 0, 0, 3, 3),
+ (0, 3, 0, 4, 0, 1, 0, 3, 0, 3, 4, 3, 3, 0, 3, 3, 3, 1, 3, 1, 3, 3, 4, 3, 3, 3, 0, 0, 3, 1, 5, 3, 3, 1, 3, 3, 2, 5, 4, 3, 3, 4, 5, 3, 2, 5, 3, 4, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 0, 4, 2, 2, 1, 3, 0, 3, 0, 2, 0, 4, 4, 3, 5, 3, 2, 0, 1, 1, 0, 3, 4),
+ (0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 4, 3, 3, 2, 3, 3, 3, 1, 4, 3, 4, 1, 5, 3, 4, 3, 4, 0, 4, 2, 4, 3, 4, 1, 5, 4, 0, 4, 4, 4, 4, 5, 4, 1, 3, 5, 4, 2, 1, 4, 1, 1, 3, 2, 0, 3, 1, 0, 3, 2, 1, 4, 3, 3, 3, 4, 0, 4, 0, 3, 0, 4, 4, 4, 3, 3, 3, 0, 4, 2, 0, 3, 4),
+ (1, 4, 0, 4, 0, 3, 0, 1, 0, 3, 3, 3, 1, 1, 3, 3, 2, 2, 3, 3, 1, 0, 3, 2, 2, 1, 2, 0, 3, 1, 2, 1, 2, 0, 3, 2, 0, 2, 2, 3, 3, 4, 3, 0, 3, 3, 1, 2, 0, 1, 1, 3, 1, 2, 0, 0, 3, 0, 1, 1, 0, 3, 2, 2, 3, 3, 0, 3, 0, 0, 0, 2, 3, 3, 4, 3, 3, 0, 1, 0, 0, 1, 4),
+ (0, 4, 0, 4, 0, 4, 0, 0, 0, 3, 4, 4, 3, 1, 4, 2, 3, 2, 3, 3, 3, 1, 4, 3, 4, 0, 3, 0, 4, 2, 3, 3, 2, 2, 5, 4, 2, 1, 3, 4, 3, 4, 3, 1, 3, 3, 4, 2, 0, 2, 1, 0, 3, 3, 0, 0, 2, 0, 3, 1, 0, 4, 4, 3, 4, 3, 0, 4, 0, 1, 0, 2, 4, 4, 4, 4, 4, 0, 3, 2, 0, 3, 3),
+ (0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2),
+ (0, 2, 0, 3, 0, 4, 0, 4, 0, 1, 3, 3, 3, 0, 4, 0, 2, 1, 2, 1, 1, 1, 2, 0, 3, 1, 1, 0, 1, 0, 3, 1, 0, 0, 3, 3, 2, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 2, 0, 2, 2, 0, 3, 1, 0, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 0, 0, 0, 0, 1, 0, 0, 3, 3, 4, 3, 1, 0, 1, 0, 3, 0, 2),
+ (0, 0, 0, 3, 0, 5, 0, 0, 0, 0, 1, 0, 2, 0, 3, 1, 0, 1, 3, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 4, 0, 0, 0, 2, 3, 0, 1, 4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 3),
+ (0, 2, 0, 5, 0, 5, 0, 1, 0, 2, 4, 3, 3, 2, 5, 1, 3, 2, 3, 3, 3, 0, 4, 1, 2, 0, 3, 0, 4, 0, 2, 2, 1, 1, 5, 3, 0, 0, 1, 4, 2, 3, 2, 0, 3, 3, 3, 2, 0, 2, 4, 1, 1, 2, 0, 1, 1, 0, 3, 1, 0, 1, 3, 1, 2, 3, 0, 2, 0, 0, 0, 1, 3, 5, 4, 4, 4, 0, 3, 0, 0, 1, 3),
+ (0, 4, 0, 5, 0, 4, 0, 4, 0, 4, 5, 4, 3, 3, 4, 3, 3, 3, 4, 3, 4, 4, 5, 3, 4, 5, 4, 2, 4, 2, 3, 4, 3, 1, 4, 4, 1, 3, 5, 4, 4, 5, 5, 4, 4, 5, 5, 5, 2, 3, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3, 4, 4, 4, 0, 3, 0, 4, 0, 3, 3, 4, 4, 5, 0, 0, 4, 3, 0, 4, 5),
+ (0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 4, 4, 4, 3, 3, 2, 4, 3, 4, 3, 4, 3, 5, 3, 4, 3, 2, 1, 4, 2, 4, 4, 3, 1, 3, 4, 2, 4, 5, 5, 3, 4, 5, 4, 1, 5, 4, 3, 0, 3, 2, 2, 3, 2, 1, 3, 1, 0, 3, 3, 3, 5, 3, 3, 3, 5, 4, 4, 2, 3, 3, 4, 3, 3, 3, 2, 1, 0, 3, 2, 1, 4, 3),
+ (0, 4, 0, 5, 0, 4, 0, 3, 0, 3, 5, 5, 3, 2, 4, 3, 4, 0, 5, 4, 4, 1, 4, 4, 4, 3, 3, 3, 4, 3, 5, 5, 2, 3, 3, 4, 1, 2, 5, 5, 3, 5, 5, 2, 3, 5, 5, 4, 0, 3, 2, 0, 3, 3, 1, 1, 5, 1, 4, 1, 0, 4, 3, 2, 3, 5, 0, 4, 0, 3, 0, 5, 4, 3, 4, 3, 0, 0, 4, 1, 0, 4, 4),
+ (1, 3, 0, 4, 0, 2, 0, 2, 0, 2, 5, 5, 3, 3, 3, 3, 3, 0, 4, 2, 3, 4, 4, 4, 3, 4, 0, 0, 3, 4, 5, 4, 3, 3, 3, 3, 2, 5, 5, 4, 5, 5, 5, 4, 3, 5, 5, 5, 1, 3, 1, 0, 1, 0, 0, 3, 2, 0, 4, 2, 0, 5, 2, 3, 2, 4, 1, 3, 0, 3, 0, 4, 5, 4, 5, 4, 3, 0, 4, 2, 0, 5, 4),
+ (0, 3, 0, 4, 0, 5, 0, 3, 0, 3, 4, 4, 3, 2, 3, 2, 3, 3, 3, 3, 3, 2, 4, 3, 3, 2, 2, 0, 3, 3, 3, 3, 3, 1, 3, 3, 3, 0, 4, 4, 3, 4, 4, 1, 1, 4, 4, 2, 0, 3, 1, 0, 1, 1, 0, 4, 1, 0, 2, 3, 1, 3, 3, 1, 3, 4, 0, 3, 0, 1, 0, 3, 1, 3, 0, 0, 1, 0, 2, 0, 0, 4, 4),
+ (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ (0, 3, 0, 3, 0, 2, 0, 3, 0, 1, 5, 4, 3, 3, 3, 1, 4, 2, 1, 2, 3, 4, 4, 2, 4, 4, 5, 0, 3, 1, 4, 3, 4, 0, 4, 3, 3, 3, 2, 3, 2, 5, 3, 4, 3, 2, 2, 3, 0, 0, 3, 0, 2, 1, 0, 1, 2, 0, 0, 0, 0, 2, 1, 1, 3, 1, 0, 2, 0, 4, 0, 3, 4, 4, 4, 5, 2, 0, 2, 0, 0, 1, 3),
+ (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 4, 2, 1, 1, 0, 1, 0, 3, 2, 0, 0, 3, 1, 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1, 4, 0, 4, 2, 1, 0, 0, 0, 0, 0, 1),
+ (0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 3, 1, 0, 0, 0, 2, 0, 2, 1, 0, 0, 1, 2, 1, 0, 1, 1, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2),
+ (0, 4, 0, 4, 0, 4, 0, 3, 0, 4, 4, 3, 4, 2, 4, 3, 2, 0, 4, 4, 4, 3, 5, 3, 5, 3, 3, 2, 4, 2, 4, 3, 4, 3, 1, 4, 0, 2, 3, 4, 4, 4, 3, 3, 3, 4, 4, 4, 3, 4, 1, 3, 4, 3, 2, 1, 2, 1, 3, 3, 3, 4, 4, 3, 3, 5, 0, 4, 0, 3, 0, 4, 3, 3, 3, 2, 1, 0, 3, 0, 0, 3, 3),
+ (0, 4, 0, 3, 0, 3, 0, 3, 0, 3, 5, 5, 3, 3, 3, 3, 4, 3, 4, 3, 3, 3, 4, 4, 4, 3, 3, 3, 3, 4, 3, 5, 3, 3, 1, 3, 2, 4, 5, 5, 5, 5, 4, 3, 4, 5, 5, 3, 2, 2, 3, 3, 3, 3, 2, 3, 3, 1, 2, 3, 2, 4, 3, 3, 3, 4, 0, 4, 0, 2, 0, 4, 3, 2, 2, 1, 2, 0, 3, 0, 0, 4, 1),
)
+# fmt: on
-class JapaneseContextAnalysis(object):
+
+class JapaneseContextAnalysis:
NUM_OF_CATEGORY = 6
DONT_KNOW = -1
ENOUGH_REL_THRESHOLD = 100
@@ -153,7 +156,7 @@ class JapaneseContextAnalysis(object):
# this character will simply our logic and improve performance.
i = self._need_to_skip_char_num
while i < num_bytes:
- order, char_len = self.get_order(byte_str[i:i + 2])
+ order, char_len = self.get_order(byte_str[i : i + 2])
i += char_len
if i > num_bytes:
self._need_to_skip_char_num = i - num_bytes
@@ -164,7 +167,9 @@ class JapaneseContextAnalysis(object):
if self._total_rel > self.MAX_REL_THRESHOLD:
self._done = True
break
- self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1
+ self._rel_sample[
+ jp2_char_context[self._last_char_order][order]
+ ] += 1
self._last_char_order = order
def got_enough_data(self):
@@ -174,15 +179,15 @@ class JapaneseContextAnalysis(object):
# This is just one way to calculate confidence. It works well for me.
if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
return (self._total_rel - self._rel_sample[0]) / self._total_rel
- else:
- return self.DONT_KNOW
+ return self.DONT_KNOW
- def get_order(self, byte_str):
+ def get_order(self, _):
return -1, 1
+
class SJISContextAnalysis(JapaneseContextAnalysis):
def __init__(self):
- super(SJISContextAnalysis, self).__init__()
+ super().__init__()
self._charset_name = "SHIFT_JIS"
@property
@@ -209,6 +214,7 @@ class SJISContextAnalysis(JapaneseContextAnalysis):
return -1, char_len
+
class EUCJPContextAnalysis(JapaneseContextAnalysis):
def get_order(self, byte_str):
if not byte_str:
@@ -229,5 +235,3 @@ class EUCJPContextAnalysis(JapaneseContextAnalysis):
return second_char - 0xA1, char_len
return -1, char_len
-
-
diff --git a/libs/chardet/langbulgarianmodel.py b/libs/chardet/langbulgarianmodel.py
index 561bfd905..2f771bb81 100644
--- a/libs/chardet/langbulgarianmodel.py
+++ b/libs/chardet/langbulgarianmodel.py
@@ -1,9 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
from chardet.sbcharsetprober import SingleByteCharSetModel
-
# 3: Positive
# 2: Likely
# 1: Unlikely
@@ -4115,536 +4111,539 @@ BULGARIAN_LANG_MODEL = {
# Character Mapping Table(s):
ISO_8859_5_BULGARIAN_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 254, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 254, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 253, # ' '
- 33: 253, # '!'
- 34: 253, # '"'
- 35: 253, # '#'
- 36: 253, # '$'
- 37: 253, # '%'
- 38: 253, # '&'
- 39: 253, # "'"
- 40: 253, # '('
- 41: 253, # ')'
- 42: 253, # '*'
- 43: 253, # '+'
- 44: 253, # ','
- 45: 253, # '-'
- 46: 253, # '.'
- 47: 253, # '/'
- 48: 252, # '0'
- 49: 252, # '1'
- 50: 252, # '2'
- 51: 252, # '3'
- 52: 252, # '4'
- 53: 252, # '5'
- 54: 252, # '6'
- 55: 252, # '7'
- 56: 252, # '8'
- 57: 252, # '9'
- 58: 253, # ':'
- 59: 253, # ';'
- 60: 253, # '<'
- 61: 253, # '='
- 62: 253, # '>'
- 63: 253, # '?'
- 64: 253, # '@'
- 65: 77, # 'A'
- 66: 90, # 'B'
- 67: 99, # 'C'
- 68: 100, # 'D'
- 69: 72, # 'E'
- 70: 109, # 'F'
- 71: 107, # 'G'
- 72: 101, # 'H'
- 73: 79, # 'I'
- 74: 185, # 'J'
- 75: 81, # 'K'
- 76: 102, # 'L'
- 77: 76, # 'M'
- 78: 94, # 'N'
- 79: 82, # 'O'
- 80: 110, # 'P'
- 81: 186, # 'Q'
- 82: 108, # 'R'
- 83: 91, # 'S'
- 84: 74, # 'T'
- 85: 119, # 'U'
- 86: 84, # 'V'
- 87: 96, # 'W'
- 88: 111, # 'X'
- 89: 187, # 'Y'
- 90: 115, # 'Z'
- 91: 253, # '['
- 92: 253, # '\\'
- 93: 253, # ']'
- 94: 253, # '^'
- 95: 253, # '_'
- 96: 253, # '`'
- 97: 65, # 'a'
- 98: 69, # 'b'
- 99: 70, # 'c'
- 100: 66, # 'd'
- 101: 63, # 'e'
- 102: 68, # 'f'
- 103: 112, # 'g'
- 104: 103, # 'h'
- 105: 92, # 'i'
- 106: 194, # 'j'
- 107: 104, # 'k'
- 108: 95, # 'l'
- 109: 86, # 'm'
- 110: 87, # 'n'
- 111: 71, # 'o'
- 112: 116, # 'p'
- 113: 195, # 'q'
- 114: 85, # 'r'
- 115: 93, # 's'
- 116: 97, # 't'
- 117: 113, # 'u'
- 118: 196, # 'v'
- 119: 197, # 'w'
- 120: 198, # 'x'
- 121: 199, # 'y'
- 122: 200, # 'z'
- 123: 253, # '{'
- 124: 253, # '|'
- 125: 253, # '}'
- 126: 253, # '~'
- 127: 253, # '\x7f'
- 128: 194, # '\x80'
- 129: 195, # '\x81'
- 130: 196, # '\x82'
- 131: 197, # '\x83'
- 132: 198, # '\x84'
- 133: 199, # '\x85'
- 134: 200, # '\x86'
- 135: 201, # '\x87'
- 136: 202, # '\x88'
- 137: 203, # '\x89'
- 138: 204, # '\x8a'
- 139: 205, # '\x8b'
- 140: 206, # '\x8c'
- 141: 207, # '\x8d'
- 142: 208, # '\x8e'
- 143: 209, # '\x8f'
- 144: 210, # '\x90'
- 145: 211, # '\x91'
- 146: 212, # '\x92'
- 147: 213, # '\x93'
- 148: 214, # '\x94'
- 149: 215, # '\x95'
- 150: 216, # '\x96'
- 151: 217, # '\x97'
- 152: 218, # '\x98'
- 153: 219, # '\x99'
- 154: 220, # '\x9a'
- 155: 221, # '\x9b'
- 156: 222, # '\x9c'
- 157: 223, # '\x9d'
- 158: 224, # '\x9e'
- 159: 225, # '\x9f'
- 160: 81, # '\xa0'
- 161: 226, # 'Ё'
- 162: 227, # 'Ђ'
- 163: 228, # 'Ѓ'
- 164: 229, # 'Є'
- 165: 230, # 'Ѕ'
- 166: 105, # 'І'
- 167: 231, # 'Ї'
- 168: 232, # 'Ј'
- 169: 233, # 'Љ'
- 170: 234, # 'Њ'
- 171: 235, # 'Ћ'
- 172: 236, # 'Ќ'
- 173: 45, # '\xad'
- 174: 237, # 'Ў'
- 175: 238, # 'Џ'
- 176: 31, # 'А'
- 177: 32, # 'Б'
- 178: 35, # 'В'
- 179: 43, # 'Г'
- 180: 37, # 'Д'
- 181: 44, # 'Е'
- 182: 55, # 'Ж'
- 183: 47, # 'З'
- 184: 40, # 'И'
- 185: 59, # 'Й'
- 186: 33, # 'К'
- 187: 46, # 'Л'
- 188: 38, # 'М'
- 189: 36, # 'Н'
- 190: 41, # 'О'
- 191: 30, # 'П'
- 192: 39, # 'Р'
- 193: 28, # 'С'
- 194: 34, # 'Т'
- 195: 51, # 'У'
- 196: 48, # 'Ф'
- 197: 49, # 'Х'
- 198: 53, # 'Ц'
- 199: 50, # 'Ч'
- 200: 54, # 'Ш'
- 201: 57, # 'Щ'
- 202: 61, # 'Ъ'
- 203: 239, # 'Ы'
- 204: 67, # 'Ь'
- 205: 240, # 'Э'
- 206: 60, # 'Ю'
- 207: 56, # 'Я'
- 208: 1, # 'а'
- 209: 18, # 'б'
- 210: 9, # 'в'
- 211: 20, # 'г'
- 212: 11, # 'д'
- 213: 3, # 'е'
- 214: 23, # 'ж'
- 215: 15, # 'з'
- 216: 2, # 'и'
- 217: 26, # 'й'
- 218: 12, # 'к'
- 219: 10, # 'л'
- 220: 14, # 'м'
- 221: 6, # 'н'
- 222: 4, # 'о'
- 223: 13, # 'п'
- 224: 7, # 'р'
- 225: 8, # 'с'
- 226: 5, # 'т'
- 227: 19, # 'у'
- 228: 29, # 'ф'
- 229: 25, # 'х'
- 230: 22, # 'ц'
- 231: 21, # 'ч'
- 232: 27, # 'ш'
- 233: 24, # 'щ'
- 234: 17, # 'ъ'
- 235: 75, # 'ы'
- 236: 52, # 'ь'
- 237: 241, # 'э'
- 238: 42, # 'ю'
- 239: 16, # 'я'
- 240: 62, # '№'
- 241: 242, # 'ё'
- 242: 243, # 'ђ'
- 243: 244, # 'ѓ'
- 244: 58, # 'є'
- 245: 245, # 'ѕ'
- 246: 98, # 'і'
- 247: 246, # 'ї'
- 248: 247, # 'ј'
- 249: 248, # 'љ'
- 250: 249, # 'њ'
- 251: 250, # 'ћ'
- 252: 251, # 'ќ'
- 253: 91, # '§'
- 254: 252, # 'ў'
- 255: 253, # 'џ'
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 254, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 254, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 253, # ' '
+ 33: 253, # '!'
+ 34: 253, # '"'
+ 35: 253, # '#'
+ 36: 253, # '$'
+ 37: 253, # '%'
+ 38: 253, # '&'
+ 39: 253, # "'"
+ 40: 253, # '('
+ 41: 253, # ')'
+ 42: 253, # '*'
+ 43: 253, # '+'
+ 44: 253, # ','
+ 45: 253, # '-'
+ 46: 253, # '.'
+ 47: 253, # '/'
+ 48: 252, # '0'
+ 49: 252, # '1'
+ 50: 252, # '2'
+ 51: 252, # '3'
+ 52: 252, # '4'
+ 53: 252, # '5'
+ 54: 252, # '6'
+ 55: 252, # '7'
+ 56: 252, # '8'
+ 57: 252, # '9'
+ 58: 253, # ':'
+ 59: 253, # ';'
+ 60: 253, # '<'
+ 61: 253, # '='
+ 62: 253, # '>'
+ 63: 253, # '?'
+ 64: 253, # '@'
+ 65: 77, # 'A'
+ 66: 90, # 'B'
+ 67: 99, # 'C'
+ 68: 100, # 'D'
+ 69: 72, # 'E'
+ 70: 109, # 'F'
+ 71: 107, # 'G'
+ 72: 101, # 'H'
+ 73: 79, # 'I'
+ 74: 185, # 'J'
+ 75: 81, # 'K'
+ 76: 102, # 'L'
+ 77: 76, # 'M'
+ 78: 94, # 'N'
+ 79: 82, # 'O'
+ 80: 110, # 'P'
+ 81: 186, # 'Q'
+ 82: 108, # 'R'
+ 83: 91, # 'S'
+ 84: 74, # 'T'
+ 85: 119, # 'U'
+ 86: 84, # 'V'
+ 87: 96, # 'W'
+ 88: 111, # 'X'
+ 89: 187, # 'Y'
+ 90: 115, # 'Z'
+ 91: 253, # '['
+ 92: 253, # '\\'
+ 93: 253, # ']'
+ 94: 253, # '^'
+ 95: 253, # '_'
+ 96: 253, # '`'
+ 97: 65, # 'a'
+ 98: 69, # 'b'
+ 99: 70, # 'c'
+ 100: 66, # 'd'
+ 101: 63, # 'e'
+ 102: 68, # 'f'
+ 103: 112, # 'g'
+ 104: 103, # 'h'
+ 105: 92, # 'i'
+ 106: 194, # 'j'
+ 107: 104, # 'k'
+ 108: 95, # 'l'
+ 109: 86, # 'm'
+ 110: 87, # 'n'
+ 111: 71, # 'o'
+ 112: 116, # 'p'
+ 113: 195, # 'q'
+ 114: 85, # 'r'
+ 115: 93, # 's'
+ 116: 97, # 't'
+ 117: 113, # 'u'
+ 118: 196, # 'v'
+ 119: 197, # 'w'
+ 120: 198, # 'x'
+ 121: 199, # 'y'
+ 122: 200, # 'z'
+ 123: 253, # '{'
+ 124: 253, # '|'
+ 125: 253, # '}'
+ 126: 253, # '~'
+ 127: 253, # '\x7f'
+ 128: 194, # '\x80'
+ 129: 195, # '\x81'
+ 130: 196, # '\x82'
+ 131: 197, # '\x83'
+ 132: 198, # '\x84'
+ 133: 199, # '\x85'
+ 134: 200, # '\x86'
+ 135: 201, # '\x87'
+ 136: 202, # '\x88'
+ 137: 203, # '\x89'
+ 138: 204, # '\x8a'
+ 139: 205, # '\x8b'
+ 140: 206, # '\x8c'
+ 141: 207, # '\x8d'
+ 142: 208, # '\x8e'
+ 143: 209, # '\x8f'
+ 144: 210, # '\x90'
+ 145: 211, # '\x91'
+ 146: 212, # '\x92'
+ 147: 213, # '\x93'
+ 148: 214, # '\x94'
+ 149: 215, # '\x95'
+ 150: 216, # '\x96'
+ 151: 217, # '\x97'
+ 152: 218, # '\x98'
+ 153: 219, # '\x99'
+ 154: 220, # '\x9a'
+ 155: 221, # '\x9b'
+ 156: 222, # '\x9c'
+ 157: 223, # '\x9d'
+ 158: 224, # '\x9e'
+ 159: 225, # '\x9f'
+ 160: 81, # '\xa0'
+ 161: 226, # 'Ё'
+ 162: 227, # 'Ђ'
+ 163: 228, # 'Ѓ'
+ 164: 229, # 'Є'
+ 165: 230, # 'Ѕ'
+ 166: 105, # 'І'
+ 167: 231, # 'Ї'
+ 168: 232, # 'Ј'
+ 169: 233, # 'Љ'
+ 170: 234, # 'Њ'
+ 171: 235, # 'Ћ'
+ 172: 236, # 'Ќ'
+ 173: 45, # '\xad'
+ 174: 237, # 'Ў'
+ 175: 238, # 'Џ'
+ 176: 31, # 'А'
+ 177: 32, # 'Б'
+ 178: 35, # 'В'
+ 179: 43, # 'Г'
+ 180: 37, # 'Д'
+ 181: 44, # 'Е'
+ 182: 55, # 'Ж'
+ 183: 47, # 'З'
+ 184: 40, # 'И'
+ 185: 59, # 'Й'
+ 186: 33, # 'К'
+ 187: 46, # 'Л'
+ 188: 38, # 'М'
+ 189: 36, # 'Н'
+ 190: 41, # 'О'
+ 191: 30, # 'П'
+ 192: 39, # 'Р'
+ 193: 28, # 'С'
+ 194: 34, # 'Т'
+ 195: 51, # 'У'
+ 196: 48, # 'Ф'
+ 197: 49, # 'Х'
+ 198: 53, # 'Ц'
+ 199: 50, # 'Ч'
+ 200: 54, # 'Ш'
+ 201: 57, # 'Щ'
+ 202: 61, # 'Ъ'
+ 203: 239, # 'Ы'
+ 204: 67, # 'Ь'
+ 205: 240, # 'Э'
+ 206: 60, # 'Ю'
+ 207: 56, # 'Я'
+ 208: 1, # 'а'
+ 209: 18, # 'б'
+ 210: 9, # 'в'
+ 211: 20, # 'г'
+ 212: 11, # 'д'
+ 213: 3, # 'е'
+ 214: 23, # 'ж'
+ 215: 15, # 'з'
+ 216: 2, # 'и'
+ 217: 26, # 'й'
+ 218: 12, # 'к'
+ 219: 10, # 'л'
+ 220: 14, # 'м'
+ 221: 6, # 'н'
+ 222: 4, # 'о'
+ 223: 13, # 'п'
+ 224: 7, # 'р'
+ 225: 8, # 'с'
+ 226: 5, # 'т'
+ 227: 19, # 'у'
+ 228: 29, # 'ф'
+ 229: 25, # 'х'
+ 230: 22, # 'ц'
+ 231: 21, # 'ч'
+ 232: 27, # 'ш'
+ 233: 24, # 'щ'
+ 234: 17, # 'ъ'
+ 235: 75, # 'ы'
+ 236: 52, # 'ь'
+ 237: 241, # 'э'
+ 238: 42, # 'ю'
+ 239: 16, # 'я'
+ 240: 62, # '№'
+ 241: 242, # 'ё'
+ 242: 243, # 'ђ'
+ 243: 244, # 'ѓ'
+ 244: 58, # 'є'
+ 245: 245, # 'ѕ'
+ 246: 98, # 'і'
+ 247: 246, # 'ї'
+ 248: 247, # 'ј'
+ 249: 248, # 'љ'
+ 250: 249, # 'њ'
+ 251: 250, # 'ћ'
+ 252: 251, # 'ќ'
+ 253: 91, # '§'
+ 254: 252, # 'ў'
+ 255: 253, # 'џ'
}
-ISO_8859_5_BULGARIAN_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-5',
- language='Bulgarian',
- char_to_order_map=ISO_8859_5_BULGARIAN_CHAR_TO_ORDER,
- language_model=BULGARIAN_LANG_MODEL,
- typical_positive_ratio=0.969392,
- keep_ascii_letters=False,
- alphabet='АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя')
+ISO_8859_5_BULGARIAN_MODEL = SingleByteCharSetModel(
+ charset_name="ISO-8859-5",
+ language="Bulgarian",
+ char_to_order_map=ISO_8859_5_BULGARIAN_CHAR_TO_ORDER,
+ language_model=BULGARIAN_LANG_MODEL,
+ typical_positive_ratio=0.969392,
+ keep_ascii_letters=False,
+ alphabet="АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя",
+)
WINDOWS_1251_BULGARIAN_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 254, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 254, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 253, # ' '
- 33: 253, # '!'
- 34: 253, # '"'
- 35: 253, # '#'
- 36: 253, # '$'
- 37: 253, # '%'
- 38: 253, # '&'
- 39: 253, # "'"
- 40: 253, # '('
- 41: 253, # ')'
- 42: 253, # '*'
- 43: 253, # '+'
- 44: 253, # ','
- 45: 253, # '-'
- 46: 253, # '.'
- 47: 253, # '/'
- 48: 252, # '0'
- 49: 252, # '1'
- 50: 252, # '2'
- 51: 252, # '3'
- 52: 252, # '4'
- 53: 252, # '5'
- 54: 252, # '6'
- 55: 252, # '7'
- 56: 252, # '8'
- 57: 252, # '9'
- 58: 253, # ':'
- 59: 253, # ';'
- 60: 253, # '<'
- 61: 253, # '='
- 62: 253, # '>'
- 63: 253, # '?'
- 64: 253, # '@'
- 65: 77, # 'A'
- 66: 90, # 'B'
- 67: 99, # 'C'
- 68: 100, # 'D'
- 69: 72, # 'E'
- 70: 109, # 'F'
- 71: 107, # 'G'
- 72: 101, # 'H'
- 73: 79, # 'I'
- 74: 185, # 'J'
- 75: 81, # 'K'
- 76: 102, # 'L'
- 77: 76, # 'M'
- 78: 94, # 'N'
- 79: 82, # 'O'
- 80: 110, # 'P'
- 81: 186, # 'Q'
- 82: 108, # 'R'
- 83: 91, # 'S'
- 84: 74, # 'T'
- 85: 119, # 'U'
- 86: 84, # 'V'
- 87: 96, # 'W'
- 88: 111, # 'X'
- 89: 187, # 'Y'
- 90: 115, # 'Z'
- 91: 253, # '['
- 92: 253, # '\\'
- 93: 253, # ']'
- 94: 253, # '^'
- 95: 253, # '_'
- 96: 253, # '`'
- 97: 65, # 'a'
- 98: 69, # 'b'
- 99: 70, # 'c'
- 100: 66, # 'd'
- 101: 63, # 'e'
- 102: 68, # 'f'
- 103: 112, # 'g'
- 104: 103, # 'h'
- 105: 92, # 'i'
- 106: 194, # 'j'
- 107: 104, # 'k'
- 108: 95, # 'l'
- 109: 86, # 'm'
- 110: 87, # 'n'
- 111: 71, # 'o'
- 112: 116, # 'p'
- 113: 195, # 'q'
- 114: 85, # 'r'
- 115: 93, # 's'
- 116: 97, # 't'
- 117: 113, # 'u'
- 118: 196, # 'v'
- 119: 197, # 'w'
- 120: 198, # 'x'
- 121: 199, # 'y'
- 122: 200, # 'z'
- 123: 253, # '{'
- 124: 253, # '|'
- 125: 253, # '}'
- 126: 253, # '~'
- 127: 253, # '\x7f'
- 128: 206, # 'Ђ'
- 129: 207, # 'Ѓ'
- 130: 208, # '‚'
- 131: 209, # 'ѓ'
- 132: 210, # '„'
- 133: 211, # '…'
- 134: 212, # '†'
- 135: 213, # '‡'
- 136: 120, # '€'
- 137: 214, # '‰'
- 138: 215, # 'Љ'
- 139: 216, # '‹'
- 140: 217, # 'Њ'
- 141: 218, # 'Ќ'
- 142: 219, # 'Ћ'
- 143: 220, # 'Џ'
- 144: 221, # 'ђ'
- 145: 78, # '‘'
- 146: 64, # '’'
- 147: 83, # '“'
- 148: 121, # '”'
- 149: 98, # '•'
- 150: 117, # '–'
- 151: 105, # '—'
- 152: 222, # None
- 153: 223, # '™'
- 154: 224, # 'љ'
- 155: 225, # '›'
- 156: 226, # 'њ'
- 157: 227, # 'ќ'
- 158: 228, # 'ћ'
- 159: 229, # 'џ'
- 160: 88, # '\xa0'
- 161: 230, # 'Ў'
- 162: 231, # 'ў'
- 163: 232, # 'Ј'
- 164: 233, # '¤'
- 165: 122, # 'Ґ'
- 166: 89, # '¦'
- 167: 106, # '§'
- 168: 234, # 'Ё'
- 169: 235, # '©'
- 170: 236, # 'Є'
- 171: 237, # '«'
- 172: 238, # '¬'
- 173: 45, # '\xad'
- 174: 239, # '®'
- 175: 240, # 'Ї'
- 176: 73, # '°'
- 177: 80, # '±'
- 178: 118, # 'І'
- 179: 114, # 'і'
- 180: 241, # 'ґ'
- 181: 242, # 'µ'
- 182: 243, # '¶'
- 183: 244, # '·'
- 184: 245, # 'ё'
- 185: 62, # '№'
- 186: 58, # 'є'
- 187: 246, # '»'
- 188: 247, # 'ј'
- 189: 248, # 'Ѕ'
- 190: 249, # 'ѕ'
- 191: 250, # 'ї'
- 192: 31, # 'А'
- 193: 32, # 'Б'
- 194: 35, # 'В'
- 195: 43, # 'Г'
- 196: 37, # 'Д'
- 197: 44, # 'Е'
- 198: 55, # 'Ж'
- 199: 47, # 'З'
- 200: 40, # 'И'
- 201: 59, # 'Й'
- 202: 33, # 'К'
- 203: 46, # 'Л'
- 204: 38, # 'М'
- 205: 36, # 'Н'
- 206: 41, # 'О'
- 207: 30, # 'П'
- 208: 39, # 'Р'
- 209: 28, # 'С'
- 210: 34, # 'Т'
- 211: 51, # 'У'
- 212: 48, # 'Ф'
- 213: 49, # 'Х'
- 214: 53, # 'Ц'
- 215: 50, # 'Ч'
- 216: 54, # 'Ш'
- 217: 57, # 'Щ'
- 218: 61, # 'Ъ'
- 219: 251, # 'Ы'
- 220: 67, # 'Ь'
- 221: 252, # 'Э'
- 222: 60, # 'Ю'
- 223: 56, # 'Я'
- 224: 1, # 'а'
- 225: 18, # 'б'
- 226: 9, # 'в'
- 227: 20, # 'г'
- 228: 11, # 'д'
- 229: 3, # 'е'
- 230: 23, # 'ж'
- 231: 15, # 'з'
- 232: 2, # 'и'
- 233: 26, # 'й'
- 234: 12, # 'к'
- 235: 10, # 'л'
- 236: 14, # 'м'
- 237: 6, # 'н'
- 238: 4, # 'о'
- 239: 13, # 'п'
- 240: 7, # 'р'
- 241: 8, # 'с'
- 242: 5, # 'т'
- 243: 19, # 'у'
- 244: 29, # 'ф'
- 245: 25, # 'х'
- 246: 22, # 'ц'
- 247: 21, # 'ч'
- 248: 27, # 'ш'
- 249: 24, # 'щ'
- 250: 17, # 'ъ'
- 251: 75, # 'ы'
- 252: 52, # 'ь'
- 253: 253, # 'э'
- 254: 42, # 'ю'
- 255: 16, # 'я'
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 254, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 254, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 253, # ' '
+ 33: 253, # '!'
+ 34: 253, # '"'
+ 35: 253, # '#'
+ 36: 253, # '$'
+ 37: 253, # '%'
+ 38: 253, # '&'
+ 39: 253, # "'"
+ 40: 253, # '('
+ 41: 253, # ')'
+ 42: 253, # '*'
+ 43: 253, # '+'
+ 44: 253, # ','
+ 45: 253, # '-'
+ 46: 253, # '.'
+ 47: 253, # '/'
+ 48: 252, # '0'
+ 49: 252, # '1'
+ 50: 252, # '2'
+ 51: 252, # '3'
+ 52: 252, # '4'
+ 53: 252, # '5'
+ 54: 252, # '6'
+ 55: 252, # '7'
+ 56: 252, # '8'
+ 57: 252, # '9'
+ 58: 253, # ':'
+ 59: 253, # ';'
+ 60: 253, # '<'
+ 61: 253, # '='
+ 62: 253, # '>'
+ 63: 253, # '?'
+ 64: 253, # '@'
+ 65: 77, # 'A'
+ 66: 90, # 'B'
+ 67: 99, # 'C'
+ 68: 100, # 'D'
+ 69: 72, # 'E'
+ 70: 109, # 'F'
+ 71: 107, # 'G'
+ 72: 101, # 'H'
+ 73: 79, # 'I'
+ 74: 185, # 'J'
+ 75: 81, # 'K'
+ 76: 102, # 'L'
+ 77: 76, # 'M'
+ 78: 94, # 'N'
+ 79: 82, # 'O'
+ 80: 110, # 'P'
+ 81: 186, # 'Q'
+ 82: 108, # 'R'
+ 83: 91, # 'S'
+ 84: 74, # 'T'
+ 85: 119, # 'U'
+ 86: 84, # 'V'
+ 87: 96, # 'W'
+ 88: 111, # 'X'
+ 89: 187, # 'Y'
+ 90: 115, # 'Z'
+ 91: 253, # '['
+ 92: 253, # '\\'
+ 93: 253, # ']'
+ 94: 253, # '^'
+ 95: 253, # '_'
+ 96: 253, # '`'
+ 97: 65, # 'a'
+ 98: 69, # 'b'
+ 99: 70, # 'c'
+ 100: 66, # 'd'
+ 101: 63, # 'e'
+ 102: 68, # 'f'
+ 103: 112, # 'g'
+ 104: 103, # 'h'
+ 105: 92, # 'i'
+ 106: 194, # 'j'
+ 107: 104, # 'k'
+ 108: 95, # 'l'
+ 109: 86, # 'm'
+ 110: 87, # 'n'
+ 111: 71, # 'o'
+ 112: 116, # 'p'
+ 113: 195, # 'q'
+ 114: 85, # 'r'
+ 115: 93, # 's'
+ 116: 97, # 't'
+ 117: 113, # 'u'
+ 118: 196, # 'v'
+ 119: 197, # 'w'
+ 120: 198, # 'x'
+ 121: 199, # 'y'
+ 122: 200, # 'z'
+ 123: 253, # '{'
+ 124: 253, # '|'
+ 125: 253, # '}'
+ 126: 253, # '~'
+ 127: 253, # '\x7f'
+ 128: 206, # 'Ђ'
+ 129: 207, # 'Ѓ'
+ 130: 208, # '‚'
+ 131: 209, # 'ѓ'
+ 132: 210, # '„'
+ 133: 211, # '…'
+ 134: 212, # '†'
+ 135: 213, # '‡'
+ 136: 120, # '€'
+ 137: 214, # '‰'
+ 138: 215, # 'Љ'
+ 139: 216, # '‹'
+ 140: 217, # 'Њ'
+ 141: 218, # 'Ќ'
+ 142: 219, # 'Ћ'
+ 143: 220, # 'Џ'
+ 144: 221, # 'ђ'
+ 145: 78, # '‘'
+ 146: 64, # '’'
+ 147: 83, # '“'
+ 148: 121, # '”'
+ 149: 98, # '•'
+ 150: 117, # '–'
+ 151: 105, # '—'
+ 152: 222, # None
+ 153: 223, # '™'
+ 154: 224, # 'љ'
+ 155: 225, # '›'
+ 156: 226, # 'њ'
+ 157: 227, # 'ќ'
+ 158: 228, # 'ћ'
+ 159: 229, # 'џ'
+ 160: 88, # '\xa0'
+ 161: 230, # 'Ў'
+ 162: 231, # 'ў'
+ 163: 232, # 'Ј'
+ 164: 233, # '¤'
+ 165: 122, # 'Ґ'
+ 166: 89, # '¦'
+ 167: 106, # '§'
+ 168: 234, # 'Ё'
+ 169: 235, # '©'
+ 170: 236, # 'Є'
+ 171: 237, # '«'
+ 172: 238, # '¬'
+ 173: 45, # '\xad'
+ 174: 239, # '®'
+ 175: 240, # 'Ї'
+ 176: 73, # '°'
+ 177: 80, # '±'
+ 178: 118, # 'І'
+ 179: 114, # 'і'
+ 180: 241, # 'ґ'
+ 181: 242, # 'µ'
+ 182: 243, # '¶'
+ 183: 244, # '·'
+ 184: 245, # 'ё'
+ 185: 62, # '№'
+ 186: 58, # 'є'
+ 187: 246, # '»'
+ 188: 247, # 'ј'
+ 189: 248, # 'Ѕ'
+ 190: 249, # 'ѕ'
+ 191: 250, # 'ї'
+ 192: 31, # 'А'
+ 193: 32, # 'Б'
+ 194: 35, # 'В'
+ 195: 43, # 'Г'
+ 196: 37, # 'Д'
+ 197: 44, # 'Е'
+ 198: 55, # 'Ж'
+ 199: 47, # 'З'
+ 200: 40, # 'И'
+ 201: 59, # 'Й'
+ 202: 33, # 'К'
+ 203: 46, # 'Л'
+ 204: 38, # 'М'
+ 205: 36, # 'Н'
+ 206: 41, # 'О'
+ 207: 30, # 'П'
+ 208: 39, # 'Р'
+ 209: 28, # 'С'
+ 210: 34, # 'Т'
+ 211: 51, # 'У'
+ 212: 48, # 'Ф'
+ 213: 49, # 'Х'
+ 214: 53, # 'Ц'
+ 215: 50, # 'Ч'
+ 216: 54, # 'Ш'
+ 217: 57, # 'Щ'
+ 218: 61, # 'Ъ'
+ 219: 251, # 'Ы'
+ 220: 67, # 'Ь'
+ 221: 252, # 'Э'
+ 222: 60, # 'Ю'
+ 223: 56, # 'Я'
+ 224: 1, # 'а'
+ 225: 18, # 'б'
+ 226: 9, # 'в'
+ 227: 20, # 'г'
+ 228: 11, # 'д'
+ 229: 3, # 'е'
+ 230: 23, # 'ж'
+ 231: 15, # 'з'
+ 232: 2, # 'и'
+ 233: 26, # 'й'
+ 234: 12, # 'к'
+ 235: 10, # 'л'
+ 236: 14, # 'м'
+ 237: 6, # 'н'
+ 238: 4, # 'о'
+ 239: 13, # 'п'
+ 240: 7, # 'р'
+ 241: 8, # 'с'
+ 242: 5, # 'т'
+ 243: 19, # 'у'
+ 244: 29, # 'ф'
+ 245: 25, # 'х'
+ 246: 22, # 'ц'
+ 247: 21, # 'ч'
+ 248: 27, # 'ш'
+ 249: 24, # 'щ'
+ 250: 17, # 'ъ'
+ 251: 75, # 'ы'
+ 252: 52, # 'ь'
+ 253: 253, # 'э'
+ 254: 42, # 'ю'
+ 255: 16, # 'я'
}
-WINDOWS_1251_BULGARIAN_MODEL = SingleByteCharSetModel(charset_name='windows-1251',
- language='Bulgarian',
- char_to_order_map=WINDOWS_1251_BULGARIAN_CHAR_TO_ORDER,
- language_model=BULGARIAN_LANG_MODEL,
- typical_positive_ratio=0.969392,
- keep_ascii_letters=False,
- alphabet='АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя')
-
+WINDOWS_1251_BULGARIAN_MODEL = SingleByteCharSetModel(
+ charset_name="windows-1251",
+ language="Bulgarian",
+ char_to_order_map=WINDOWS_1251_BULGARIAN_CHAR_TO_ORDER,
+ language_model=BULGARIAN_LANG_MODEL,
+ typical_positive_ratio=0.969392,
+ keep_ascii_letters=False,
+ alphabet="АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя",
+)
diff --git a/libs/chardet/langgreekmodel.py b/libs/chardet/langgreekmodel.py
index 02b94de65..0471d8bb1 100644
--- a/libs/chardet/langgreekmodel.py
+++ b/libs/chardet/langgreekmodel.py
@@ -1,9 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
from chardet.sbcharsetprober import SingleByteCharSetModel
-
# 3: Positive
# 2: Likely
# 1: Unlikely
@@ -3863,536 +3859,539 @@ GREEK_LANG_MODEL = {
# Character Mapping Table(s):
WINDOWS_1253_GREEK_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 254, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 254, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 253, # ' '
- 33: 253, # '!'
- 34: 253, # '"'
- 35: 253, # '#'
- 36: 253, # '$'
- 37: 253, # '%'
- 38: 253, # '&'
- 39: 253, # "'"
- 40: 253, # '('
- 41: 253, # ')'
- 42: 253, # '*'
- 43: 253, # '+'
- 44: 253, # ','
- 45: 253, # '-'
- 46: 253, # '.'
- 47: 253, # '/'
- 48: 252, # '0'
- 49: 252, # '1'
- 50: 252, # '2'
- 51: 252, # '3'
- 52: 252, # '4'
- 53: 252, # '5'
- 54: 252, # '6'
- 55: 252, # '7'
- 56: 252, # '8'
- 57: 252, # '9'
- 58: 253, # ':'
- 59: 253, # ';'
- 60: 253, # '<'
- 61: 253, # '='
- 62: 253, # '>'
- 63: 253, # '?'
- 64: 253, # '@'
- 65: 82, # 'A'
- 66: 100, # 'B'
- 67: 104, # 'C'
- 68: 94, # 'D'
- 69: 98, # 'E'
- 70: 101, # 'F'
- 71: 116, # 'G'
- 72: 102, # 'H'
- 73: 111, # 'I'
- 74: 187, # 'J'
- 75: 117, # 'K'
- 76: 92, # 'L'
- 77: 88, # 'M'
- 78: 113, # 'N'
- 79: 85, # 'O'
- 80: 79, # 'P'
- 81: 118, # 'Q'
- 82: 105, # 'R'
- 83: 83, # 'S'
- 84: 67, # 'T'
- 85: 114, # 'U'
- 86: 119, # 'V'
- 87: 95, # 'W'
- 88: 99, # 'X'
- 89: 109, # 'Y'
- 90: 188, # 'Z'
- 91: 253, # '['
- 92: 253, # '\\'
- 93: 253, # ']'
- 94: 253, # '^'
- 95: 253, # '_'
- 96: 253, # '`'
- 97: 72, # 'a'
- 98: 70, # 'b'
- 99: 80, # 'c'
- 100: 81, # 'd'
- 101: 60, # 'e'
- 102: 96, # 'f'
- 103: 93, # 'g'
- 104: 89, # 'h'
- 105: 68, # 'i'
- 106: 120, # 'j'
- 107: 97, # 'k'
- 108: 77, # 'l'
- 109: 86, # 'm'
- 110: 69, # 'n'
- 111: 55, # 'o'
- 112: 78, # 'p'
- 113: 115, # 'q'
- 114: 65, # 'r'
- 115: 66, # 's'
- 116: 58, # 't'
- 117: 76, # 'u'
- 118: 106, # 'v'
- 119: 103, # 'w'
- 120: 87, # 'x'
- 121: 107, # 'y'
- 122: 112, # 'z'
- 123: 253, # '{'
- 124: 253, # '|'
- 125: 253, # '}'
- 126: 253, # '~'
- 127: 253, # '\x7f'
- 128: 255, # '€'
- 129: 255, # None
- 130: 255, # '‚'
- 131: 255, # 'ƒ'
- 132: 255, # '„'
- 133: 255, # '…'
- 134: 255, # '†'
- 135: 255, # '‡'
- 136: 255, # None
- 137: 255, # '‰'
- 138: 255, # None
- 139: 255, # '‹'
- 140: 255, # None
- 141: 255, # None
- 142: 255, # None
- 143: 255, # None
- 144: 255, # None
- 145: 255, # '‘'
- 146: 255, # '’'
- 147: 255, # '“'
- 148: 255, # '”'
- 149: 255, # '•'
- 150: 255, # '–'
- 151: 255, # '—'
- 152: 255, # None
- 153: 255, # '™'
- 154: 255, # None
- 155: 255, # '›'
- 156: 255, # None
- 157: 255, # None
- 158: 255, # None
- 159: 255, # None
- 160: 253, # '\xa0'
- 161: 233, # '΅'
- 162: 61, # 'Ά'
- 163: 253, # '£'
- 164: 253, # '¤'
- 165: 253, # '¥'
- 166: 253, # '¦'
- 167: 253, # '§'
- 168: 253, # '¨'
- 169: 253, # '©'
- 170: 253, # None
- 171: 253, # '«'
- 172: 253, # '¬'
- 173: 74, # '\xad'
- 174: 253, # '®'
- 175: 253, # '―'
- 176: 253, # '°'
- 177: 253, # '±'
- 178: 253, # '²'
- 179: 253, # '³'
- 180: 247, # '΄'
- 181: 253, # 'µ'
- 182: 253, # '¶'
- 183: 36, # '·'
- 184: 46, # 'Έ'
- 185: 71, # 'Ή'
- 186: 73, # 'Ί'
- 187: 253, # '»'
- 188: 54, # 'Ό'
- 189: 253, # '½'
- 190: 108, # 'Ύ'
- 191: 123, # 'Ώ'
- 192: 110, # 'ΐ'
- 193: 31, # 'Α'
- 194: 51, # 'Β'
- 195: 43, # 'Γ'
- 196: 41, # 'Δ'
- 197: 34, # 'Ε'
- 198: 91, # 'Ζ'
- 199: 40, # 'Η'
- 200: 52, # 'Θ'
- 201: 47, # 'Ι'
- 202: 44, # 'Κ'
- 203: 53, # 'Λ'
- 204: 38, # 'Μ'
- 205: 49, # 'Ν'
- 206: 59, # 'Ξ'
- 207: 39, # 'Ο'
- 208: 35, # 'Π'
- 209: 48, # 'Ρ'
- 210: 250, # None
- 211: 37, # 'Σ'
- 212: 33, # 'Τ'
- 213: 45, # 'Υ'
- 214: 56, # 'Φ'
- 215: 50, # 'Χ'
- 216: 84, # 'Ψ'
- 217: 57, # 'Ω'
- 218: 120, # 'Ϊ'
- 219: 121, # 'Ϋ'
- 220: 17, # 'ά'
- 221: 18, # 'έ'
- 222: 22, # 'ή'
- 223: 15, # 'ί'
- 224: 124, # 'ΰ'
- 225: 1, # 'α'
- 226: 29, # 'β'
- 227: 20, # 'γ'
- 228: 21, # 'δ'
- 229: 3, # 'ε'
- 230: 32, # 'ζ'
- 231: 13, # 'η'
- 232: 25, # 'θ'
- 233: 5, # 'ι'
- 234: 11, # 'κ'
- 235: 16, # 'λ'
- 236: 10, # 'μ'
- 237: 6, # 'ν'
- 238: 30, # 'ξ'
- 239: 4, # 'ο'
- 240: 9, # 'π'
- 241: 8, # 'ρ'
- 242: 14, # 'ς'
- 243: 7, # 'σ'
- 244: 2, # 'τ'
- 245: 12, # 'υ'
- 246: 28, # 'φ'
- 247: 23, # 'χ'
- 248: 42, # 'ψ'
- 249: 24, # 'ω'
- 250: 64, # 'ϊ'
- 251: 75, # 'ϋ'
- 252: 19, # 'ό'
- 253: 26, # 'ύ'
- 254: 27, # 'ώ'
- 255: 253, # None
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 254, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 254, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 253, # ' '
+ 33: 253, # '!'
+ 34: 253, # '"'
+ 35: 253, # '#'
+ 36: 253, # '$'
+ 37: 253, # '%'
+ 38: 253, # '&'
+ 39: 253, # "'"
+ 40: 253, # '('
+ 41: 253, # ')'
+ 42: 253, # '*'
+ 43: 253, # '+'
+ 44: 253, # ','
+ 45: 253, # '-'
+ 46: 253, # '.'
+ 47: 253, # '/'
+ 48: 252, # '0'
+ 49: 252, # '1'
+ 50: 252, # '2'
+ 51: 252, # '3'
+ 52: 252, # '4'
+ 53: 252, # '5'
+ 54: 252, # '6'
+ 55: 252, # '7'
+ 56: 252, # '8'
+ 57: 252, # '9'
+ 58: 253, # ':'
+ 59: 253, # ';'
+ 60: 253, # '<'
+ 61: 253, # '='
+ 62: 253, # '>'
+ 63: 253, # '?'
+ 64: 253, # '@'
+ 65: 82, # 'A'
+ 66: 100, # 'B'
+ 67: 104, # 'C'
+ 68: 94, # 'D'
+ 69: 98, # 'E'
+ 70: 101, # 'F'
+ 71: 116, # 'G'
+ 72: 102, # 'H'
+ 73: 111, # 'I'
+ 74: 187, # 'J'
+ 75: 117, # 'K'
+ 76: 92, # 'L'
+ 77: 88, # 'M'
+ 78: 113, # 'N'
+ 79: 85, # 'O'
+ 80: 79, # 'P'
+ 81: 118, # 'Q'
+ 82: 105, # 'R'
+ 83: 83, # 'S'
+ 84: 67, # 'T'
+ 85: 114, # 'U'
+ 86: 119, # 'V'
+ 87: 95, # 'W'
+ 88: 99, # 'X'
+ 89: 109, # 'Y'
+ 90: 188, # 'Z'
+ 91: 253, # '['
+ 92: 253, # '\\'
+ 93: 253, # ']'
+ 94: 253, # '^'
+ 95: 253, # '_'
+ 96: 253, # '`'
+ 97: 72, # 'a'
+ 98: 70, # 'b'
+ 99: 80, # 'c'
+ 100: 81, # 'd'
+ 101: 60, # 'e'
+ 102: 96, # 'f'
+ 103: 93, # 'g'
+ 104: 89, # 'h'
+ 105: 68, # 'i'
+ 106: 120, # 'j'
+ 107: 97, # 'k'
+ 108: 77, # 'l'
+ 109: 86, # 'm'
+ 110: 69, # 'n'
+ 111: 55, # 'o'
+ 112: 78, # 'p'
+ 113: 115, # 'q'
+ 114: 65, # 'r'
+ 115: 66, # 's'
+ 116: 58, # 't'
+ 117: 76, # 'u'
+ 118: 106, # 'v'
+ 119: 103, # 'w'
+ 120: 87, # 'x'
+ 121: 107, # 'y'
+ 122: 112, # 'z'
+ 123: 253, # '{'
+ 124: 253, # '|'
+ 125: 253, # '}'
+ 126: 253, # '~'
+ 127: 253, # '\x7f'
+ 128: 255, # '€'
+ 129: 255, # None
+ 130: 255, # '‚'
+ 131: 255, # 'ƒ'
+ 132: 255, # '„'
+ 133: 255, # '…'
+ 134: 255, # '†'
+ 135: 255, # '‡'
+ 136: 255, # None
+ 137: 255, # '‰'
+ 138: 255, # None
+ 139: 255, # '‹'
+ 140: 255, # None
+ 141: 255, # None
+ 142: 255, # None
+ 143: 255, # None
+ 144: 255, # None
+ 145: 255, # '‘'
+ 146: 255, # '’'
+ 147: 255, # '“'
+ 148: 255, # '”'
+ 149: 255, # '•'
+ 150: 255, # '–'
+ 151: 255, # '—'
+ 152: 255, # None
+ 153: 255, # '™'
+ 154: 255, # None
+ 155: 255, # '›'
+ 156: 255, # None
+ 157: 255, # None
+ 158: 255, # None
+ 159: 255, # None
+ 160: 253, # '\xa0'
+ 161: 233, # '΅'
+ 162: 61, # 'Ά'
+ 163: 253, # '£'
+ 164: 253, # '¤'
+ 165: 253, # '¥'
+ 166: 253, # '¦'
+ 167: 253, # '§'
+ 168: 253, # '¨'
+ 169: 253, # '©'
+ 170: 253, # None
+ 171: 253, # '«'
+ 172: 253, # '¬'
+ 173: 74, # '\xad'
+ 174: 253, # '®'
+ 175: 253, # '―'
+ 176: 253, # '°'
+ 177: 253, # '±'
+ 178: 253, # '²'
+ 179: 253, # '³'
+ 180: 247, # '΄'
+ 181: 253, # 'µ'
+ 182: 253, # '¶'
+ 183: 36, # '·'
+ 184: 46, # 'Έ'
+ 185: 71, # 'Ή'
+ 186: 73, # 'Ί'
+ 187: 253, # '»'
+ 188: 54, # 'Ό'
+ 189: 253, # '½'
+ 190: 108, # 'Ύ'
+ 191: 123, # 'Ώ'
+ 192: 110, # 'ΐ'
+ 193: 31, # 'Α'
+ 194: 51, # 'Β'
+ 195: 43, # 'Γ'
+ 196: 41, # 'Δ'
+ 197: 34, # 'Ε'
+ 198: 91, # 'Ζ'
+ 199: 40, # 'Η'
+ 200: 52, # 'Θ'
+ 201: 47, # 'Ι'
+ 202: 44, # 'Κ'
+ 203: 53, # 'Λ'
+ 204: 38, # 'Μ'
+ 205: 49, # 'Ν'
+ 206: 59, # 'Ξ'
+ 207: 39, # 'Ο'
+ 208: 35, # 'Π'
+ 209: 48, # 'Ρ'
+ 210: 250, # None
+ 211: 37, # 'Σ'
+ 212: 33, # 'Τ'
+ 213: 45, # 'Υ'
+ 214: 56, # 'Φ'
+ 215: 50, # 'Χ'
+ 216: 84, # 'Ψ'
+ 217: 57, # 'Ω'
+ 218: 120, # 'Ϊ'
+ 219: 121, # 'Ϋ'
+ 220: 17, # 'ά'
+ 221: 18, # 'έ'
+ 222: 22, # 'ή'
+ 223: 15, # 'ί'
+ 224: 124, # 'ΰ'
+ 225: 1, # 'α'
+ 226: 29, # 'β'
+ 227: 20, # 'γ'
+ 228: 21, # 'δ'
+ 229: 3, # 'ε'
+ 230: 32, # 'ζ'
+ 231: 13, # 'η'
+ 232: 25, # 'θ'
+ 233: 5, # 'ι'
+ 234: 11, # 'κ'
+ 235: 16, # 'λ'
+ 236: 10, # 'μ'
+ 237: 6, # 'ν'
+ 238: 30, # 'ξ'
+ 239: 4, # 'ο'
+ 240: 9, # 'π'
+ 241: 8, # 'ρ'
+ 242: 14, # 'ς'
+ 243: 7, # 'σ'
+ 244: 2, # 'τ'
+ 245: 12, # 'υ'
+ 246: 28, # 'φ'
+ 247: 23, # 'χ'
+ 248: 42, # 'ψ'
+ 249: 24, # 'ω'
+ 250: 64, # 'ϊ'
+ 251: 75, # 'ϋ'
+ 252: 19, # 'ό'
+ 253: 26, # 'ύ'
+ 254: 27, # 'ώ'
+ 255: 253, # None
}
-WINDOWS_1253_GREEK_MODEL = SingleByteCharSetModel(charset_name='windows-1253',
- language='Greek',
- char_to_order_map=WINDOWS_1253_GREEK_CHAR_TO_ORDER,
- language_model=GREEK_LANG_MODEL,
- typical_positive_ratio=0.982851,
- keep_ascii_letters=False,
- alphabet='ΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίαβγδεζηθικλμνξοπρςστυφχψωόύώ')
+WINDOWS_1253_GREEK_MODEL = SingleByteCharSetModel(
+ charset_name="windows-1253",
+ language="Greek",
+ char_to_order_map=WINDOWS_1253_GREEK_CHAR_TO_ORDER,
+ language_model=GREEK_LANG_MODEL,
+ typical_positive_ratio=0.982851,
+ keep_ascii_letters=False,
+ alphabet="ΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίαβγδεζηθικλμνξοπρςστυφχψωόύώ",
+)
ISO_8859_7_GREEK_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 254, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 254, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 253, # ' '
- 33: 253, # '!'
- 34: 253, # '"'
- 35: 253, # '#'
- 36: 253, # '$'
- 37: 253, # '%'
- 38: 253, # '&'
- 39: 253, # "'"
- 40: 253, # '('
- 41: 253, # ')'
- 42: 253, # '*'
- 43: 253, # '+'
- 44: 253, # ','
- 45: 253, # '-'
- 46: 253, # '.'
- 47: 253, # '/'
- 48: 252, # '0'
- 49: 252, # '1'
- 50: 252, # '2'
- 51: 252, # '3'
- 52: 252, # '4'
- 53: 252, # '5'
- 54: 252, # '6'
- 55: 252, # '7'
- 56: 252, # '8'
- 57: 252, # '9'
- 58: 253, # ':'
- 59: 253, # ';'
- 60: 253, # '<'
- 61: 253, # '='
- 62: 253, # '>'
- 63: 253, # '?'
- 64: 253, # '@'
- 65: 82, # 'A'
- 66: 100, # 'B'
- 67: 104, # 'C'
- 68: 94, # 'D'
- 69: 98, # 'E'
- 70: 101, # 'F'
- 71: 116, # 'G'
- 72: 102, # 'H'
- 73: 111, # 'I'
- 74: 187, # 'J'
- 75: 117, # 'K'
- 76: 92, # 'L'
- 77: 88, # 'M'
- 78: 113, # 'N'
- 79: 85, # 'O'
- 80: 79, # 'P'
- 81: 118, # 'Q'
- 82: 105, # 'R'
- 83: 83, # 'S'
- 84: 67, # 'T'
- 85: 114, # 'U'
- 86: 119, # 'V'
- 87: 95, # 'W'
- 88: 99, # 'X'
- 89: 109, # 'Y'
- 90: 188, # 'Z'
- 91: 253, # '['
- 92: 253, # '\\'
- 93: 253, # ']'
- 94: 253, # '^'
- 95: 253, # '_'
- 96: 253, # '`'
- 97: 72, # 'a'
- 98: 70, # 'b'
- 99: 80, # 'c'
- 100: 81, # 'd'
- 101: 60, # 'e'
- 102: 96, # 'f'
- 103: 93, # 'g'
- 104: 89, # 'h'
- 105: 68, # 'i'
- 106: 120, # 'j'
- 107: 97, # 'k'
- 108: 77, # 'l'
- 109: 86, # 'm'
- 110: 69, # 'n'
- 111: 55, # 'o'
- 112: 78, # 'p'
- 113: 115, # 'q'
- 114: 65, # 'r'
- 115: 66, # 's'
- 116: 58, # 't'
- 117: 76, # 'u'
- 118: 106, # 'v'
- 119: 103, # 'w'
- 120: 87, # 'x'
- 121: 107, # 'y'
- 122: 112, # 'z'
- 123: 253, # '{'
- 124: 253, # '|'
- 125: 253, # '}'
- 126: 253, # '~'
- 127: 253, # '\x7f'
- 128: 255, # '\x80'
- 129: 255, # '\x81'
- 130: 255, # '\x82'
- 131: 255, # '\x83'
- 132: 255, # '\x84'
- 133: 255, # '\x85'
- 134: 255, # '\x86'
- 135: 255, # '\x87'
- 136: 255, # '\x88'
- 137: 255, # '\x89'
- 138: 255, # '\x8a'
- 139: 255, # '\x8b'
- 140: 255, # '\x8c'
- 141: 255, # '\x8d'
- 142: 255, # '\x8e'
- 143: 255, # '\x8f'
- 144: 255, # '\x90'
- 145: 255, # '\x91'
- 146: 255, # '\x92'
- 147: 255, # '\x93'
- 148: 255, # '\x94'
- 149: 255, # '\x95'
- 150: 255, # '\x96'
- 151: 255, # '\x97'
- 152: 255, # '\x98'
- 153: 255, # '\x99'
- 154: 255, # '\x9a'
- 155: 255, # '\x9b'
- 156: 255, # '\x9c'
- 157: 255, # '\x9d'
- 158: 255, # '\x9e'
- 159: 255, # '\x9f'
- 160: 253, # '\xa0'
- 161: 233, # '‘'
- 162: 90, # '’'
- 163: 253, # '£'
- 164: 253, # '€'
- 165: 253, # '₯'
- 166: 253, # '¦'
- 167: 253, # '§'
- 168: 253, # '¨'
- 169: 253, # '©'
- 170: 253, # 'ͺ'
- 171: 253, # '«'
- 172: 253, # '¬'
- 173: 74, # '\xad'
- 174: 253, # None
- 175: 253, # '―'
- 176: 253, # '°'
- 177: 253, # '±'
- 178: 253, # '²'
- 179: 253, # '³'
- 180: 247, # '΄'
- 181: 248, # '΅'
- 182: 61, # 'Ά'
- 183: 36, # '·'
- 184: 46, # 'Έ'
- 185: 71, # 'Ή'
- 186: 73, # 'Ί'
- 187: 253, # '»'
- 188: 54, # 'Ό'
- 189: 253, # '½'
- 190: 108, # 'Ύ'
- 191: 123, # 'Ώ'
- 192: 110, # 'ΐ'
- 193: 31, # 'Α'
- 194: 51, # 'Β'
- 195: 43, # 'Γ'
- 196: 41, # 'Δ'
- 197: 34, # 'Ε'
- 198: 91, # 'Ζ'
- 199: 40, # 'Η'
- 200: 52, # 'Θ'
- 201: 47, # 'Ι'
- 202: 44, # 'Κ'
- 203: 53, # 'Λ'
- 204: 38, # 'Μ'
- 205: 49, # 'Ν'
- 206: 59, # 'Ξ'
- 207: 39, # 'Ο'
- 208: 35, # 'Π'
- 209: 48, # 'Ρ'
- 210: 250, # None
- 211: 37, # 'Σ'
- 212: 33, # 'Τ'
- 213: 45, # 'Υ'
- 214: 56, # 'Φ'
- 215: 50, # 'Χ'
- 216: 84, # 'Ψ'
- 217: 57, # 'Ω'
- 218: 120, # 'Ϊ'
- 219: 121, # 'Ϋ'
- 220: 17, # 'ά'
- 221: 18, # 'έ'
- 222: 22, # 'ή'
- 223: 15, # 'ί'
- 224: 124, # 'ΰ'
- 225: 1, # 'α'
- 226: 29, # 'β'
- 227: 20, # 'γ'
- 228: 21, # 'δ'
- 229: 3, # 'ε'
- 230: 32, # 'ζ'
- 231: 13, # 'η'
- 232: 25, # 'θ'
- 233: 5, # 'ι'
- 234: 11, # 'κ'
- 235: 16, # 'λ'
- 236: 10, # 'μ'
- 237: 6, # 'ν'
- 238: 30, # 'ξ'
- 239: 4, # 'ο'
- 240: 9, # 'π'
- 241: 8, # 'ρ'
- 242: 14, # 'ς'
- 243: 7, # 'σ'
- 244: 2, # 'τ'
- 245: 12, # 'υ'
- 246: 28, # 'φ'
- 247: 23, # 'χ'
- 248: 42, # 'ψ'
- 249: 24, # 'ω'
- 250: 64, # 'ϊ'
- 251: 75, # 'ϋ'
- 252: 19, # 'ό'
- 253: 26, # 'ύ'
- 254: 27, # 'ώ'
- 255: 253, # None
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 254, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 254, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 253, # ' '
+ 33: 253, # '!'
+ 34: 253, # '"'
+ 35: 253, # '#'
+ 36: 253, # '$'
+ 37: 253, # '%'
+ 38: 253, # '&'
+ 39: 253, # "'"
+ 40: 253, # '('
+ 41: 253, # ')'
+ 42: 253, # '*'
+ 43: 253, # '+'
+ 44: 253, # ','
+ 45: 253, # '-'
+ 46: 253, # '.'
+ 47: 253, # '/'
+ 48: 252, # '0'
+ 49: 252, # '1'
+ 50: 252, # '2'
+ 51: 252, # '3'
+ 52: 252, # '4'
+ 53: 252, # '5'
+ 54: 252, # '6'
+ 55: 252, # '7'
+ 56: 252, # '8'
+ 57: 252, # '9'
+ 58: 253, # ':'
+ 59: 253, # ';'
+ 60: 253, # '<'
+ 61: 253, # '='
+ 62: 253, # '>'
+ 63: 253, # '?'
+ 64: 253, # '@'
+ 65: 82, # 'A'
+ 66: 100, # 'B'
+ 67: 104, # 'C'
+ 68: 94, # 'D'
+ 69: 98, # 'E'
+ 70: 101, # 'F'
+ 71: 116, # 'G'
+ 72: 102, # 'H'
+ 73: 111, # 'I'
+ 74: 187, # 'J'
+ 75: 117, # 'K'
+ 76: 92, # 'L'
+ 77: 88, # 'M'
+ 78: 113, # 'N'
+ 79: 85, # 'O'
+ 80: 79, # 'P'
+ 81: 118, # 'Q'
+ 82: 105, # 'R'
+ 83: 83, # 'S'
+ 84: 67, # 'T'
+ 85: 114, # 'U'
+ 86: 119, # 'V'
+ 87: 95, # 'W'
+ 88: 99, # 'X'
+ 89: 109, # 'Y'
+ 90: 188, # 'Z'
+ 91: 253, # '['
+ 92: 253, # '\\'
+ 93: 253, # ']'
+ 94: 253, # '^'
+ 95: 253, # '_'
+ 96: 253, # '`'
+ 97: 72, # 'a'
+ 98: 70, # 'b'
+ 99: 80, # 'c'
+ 100: 81, # 'd'
+ 101: 60, # 'e'
+ 102: 96, # 'f'
+ 103: 93, # 'g'
+ 104: 89, # 'h'
+ 105: 68, # 'i'
+ 106: 120, # 'j'
+ 107: 97, # 'k'
+ 108: 77, # 'l'
+ 109: 86, # 'm'
+ 110: 69, # 'n'
+ 111: 55, # 'o'
+ 112: 78, # 'p'
+ 113: 115, # 'q'
+ 114: 65, # 'r'
+ 115: 66, # 's'
+ 116: 58, # 't'
+ 117: 76, # 'u'
+ 118: 106, # 'v'
+ 119: 103, # 'w'
+ 120: 87, # 'x'
+ 121: 107, # 'y'
+ 122: 112, # 'z'
+ 123: 253, # '{'
+ 124: 253, # '|'
+ 125: 253, # '}'
+ 126: 253, # '~'
+ 127: 253, # '\x7f'
+ 128: 255, # '\x80'
+ 129: 255, # '\x81'
+ 130: 255, # '\x82'
+ 131: 255, # '\x83'
+ 132: 255, # '\x84'
+ 133: 255, # '\x85'
+ 134: 255, # '\x86'
+ 135: 255, # '\x87'
+ 136: 255, # '\x88'
+ 137: 255, # '\x89'
+ 138: 255, # '\x8a'
+ 139: 255, # '\x8b'
+ 140: 255, # '\x8c'
+ 141: 255, # '\x8d'
+ 142: 255, # '\x8e'
+ 143: 255, # '\x8f'
+ 144: 255, # '\x90'
+ 145: 255, # '\x91'
+ 146: 255, # '\x92'
+ 147: 255, # '\x93'
+ 148: 255, # '\x94'
+ 149: 255, # '\x95'
+ 150: 255, # '\x96'
+ 151: 255, # '\x97'
+ 152: 255, # '\x98'
+ 153: 255, # '\x99'
+ 154: 255, # '\x9a'
+ 155: 255, # '\x9b'
+ 156: 255, # '\x9c'
+ 157: 255, # '\x9d'
+ 158: 255, # '\x9e'
+ 159: 255, # '\x9f'
+ 160: 253, # '\xa0'
+ 161: 233, # '‘'
+ 162: 90, # '’'
+ 163: 253, # '£'
+ 164: 253, # '€'
+ 165: 253, # '₯'
+ 166: 253, # '¦'
+ 167: 253, # '§'
+ 168: 253, # '¨'
+ 169: 253, # '©'
+ 170: 253, # 'ͺ'
+ 171: 253, # '«'
+ 172: 253, # '¬'
+ 173: 74, # '\xad'
+ 174: 253, # None
+ 175: 253, # '―'
+ 176: 253, # '°'
+ 177: 253, # '±'
+ 178: 253, # '²'
+ 179: 253, # '³'
+ 180: 247, # '΄'
+ 181: 248, # '΅'
+ 182: 61, # 'Ά'
+ 183: 36, # '·'
+ 184: 46, # 'Έ'
+ 185: 71, # 'Ή'
+ 186: 73, # 'Ί'
+ 187: 253, # '»'
+ 188: 54, # 'Ό'
+ 189: 253, # '½'
+ 190: 108, # 'Ύ'
+ 191: 123, # 'Ώ'
+ 192: 110, # 'ΐ'
+ 193: 31, # 'Α'
+ 194: 51, # 'Β'
+ 195: 43, # 'Γ'
+ 196: 41, # 'Δ'
+ 197: 34, # 'Ε'
+ 198: 91, # 'Ζ'
+ 199: 40, # 'Η'
+ 200: 52, # 'Θ'
+ 201: 47, # 'Ι'
+ 202: 44, # 'Κ'
+ 203: 53, # 'Λ'
+ 204: 38, # 'Μ'
+ 205: 49, # 'Ν'
+ 206: 59, # 'Ξ'
+ 207: 39, # 'Ο'
+ 208: 35, # 'Π'
+ 209: 48, # 'Ρ'
+ 210: 250, # None
+ 211: 37, # 'Σ'
+ 212: 33, # 'Τ'
+ 213: 45, # 'Υ'
+ 214: 56, # 'Φ'
+ 215: 50, # 'Χ'
+ 216: 84, # 'Ψ'
+ 217: 57, # 'Ω'
+ 218: 120, # 'Ϊ'
+ 219: 121, # 'Ϋ'
+ 220: 17, # 'ά'
+ 221: 18, # 'έ'
+ 222: 22, # 'ή'
+ 223: 15, # 'ί'
+ 224: 124, # 'ΰ'
+ 225: 1, # 'α'
+ 226: 29, # 'β'
+ 227: 20, # 'γ'
+ 228: 21, # 'δ'
+ 229: 3, # 'ε'
+ 230: 32, # 'ζ'
+ 231: 13, # 'η'
+ 232: 25, # 'θ'
+ 233: 5, # 'ι'
+ 234: 11, # 'κ'
+ 235: 16, # 'λ'
+ 236: 10, # 'μ'
+ 237: 6, # 'ν'
+ 238: 30, # 'ξ'
+ 239: 4, # 'ο'
+ 240: 9, # 'π'
+ 241: 8, # 'ρ'
+ 242: 14, # 'ς'
+ 243: 7, # 'σ'
+ 244: 2, # 'τ'
+ 245: 12, # 'υ'
+ 246: 28, # 'φ'
+ 247: 23, # 'χ'
+ 248: 42, # 'ψ'
+ 249: 24, # 'ω'
+ 250: 64, # 'ϊ'
+ 251: 75, # 'ϋ'
+ 252: 19, # 'ό'
+ 253: 26, # 'ύ'
+ 254: 27, # 'ώ'
+ 255: 253, # None
}
-ISO_8859_7_GREEK_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-7',
- language='Greek',
- char_to_order_map=ISO_8859_7_GREEK_CHAR_TO_ORDER,
- language_model=GREEK_LANG_MODEL,
- typical_positive_ratio=0.982851,
- keep_ascii_letters=False,
- alphabet='ΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίαβγδεζηθικλμνξοπρςστυφχψωόύώ')
-
+ISO_8859_7_GREEK_MODEL = SingleByteCharSetModel(
+ charset_name="ISO-8859-7",
+ language="Greek",
+ char_to_order_map=ISO_8859_7_GREEK_CHAR_TO_ORDER,
+ language_model=GREEK_LANG_MODEL,
+ typical_positive_ratio=0.982851,
+ keep_ascii_letters=False,
+ alphabet="ΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίαβγδεζηθικλμνξοπρςστυφχψωόύώ",
+)
diff --git a/libs/chardet/langhebrewmodel.py b/libs/chardet/langhebrewmodel.py
index 40fd674c4..86b3c5e64 100644
--- a/libs/chardet/langhebrewmodel.py
+++ b/libs/chardet/langhebrewmodel.py
@@ -1,9 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
from chardet.sbcharsetprober import SingleByteCharSetModel
-
# 3: Positive
# 2: Likely
# 1: Unlikely
@@ -4115,269 +4111,270 @@ HEBREW_LANG_MODEL = {
# Character Mapping Table(s):
WINDOWS_1255_HEBREW_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 254, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 254, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 253, # ' '
- 33: 253, # '!'
- 34: 253, # '"'
- 35: 253, # '#'
- 36: 253, # '$'
- 37: 253, # '%'
- 38: 253, # '&'
- 39: 253, # "'"
- 40: 253, # '('
- 41: 253, # ')'
- 42: 253, # '*'
- 43: 253, # '+'
- 44: 253, # ','
- 45: 253, # '-'
- 46: 253, # '.'
- 47: 253, # '/'
- 48: 252, # '0'
- 49: 252, # '1'
- 50: 252, # '2'
- 51: 252, # '3'
- 52: 252, # '4'
- 53: 252, # '5'
- 54: 252, # '6'
- 55: 252, # '7'
- 56: 252, # '8'
- 57: 252, # '9'
- 58: 253, # ':'
- 59: 253, # ';'
- 60: 253, # '<'
- 61: 253, # '='
- 62: 253, # '>'
- 63: 253, # '?'
- 64: 253, # '@'
- 65: 69, # 'A'
- 66: 91, # 'B'
- 67: 79, # 'C'
- 68: 80, # 'D'
- 69: 92, # 'E'
- 70: 89, # 'F'
- 71: 97, # 'G'
- 72: 90, # 'H'
- 73: 68, # 'I'
- 74: 111, # 'J'
- 75: 112, # 'K'
- 76: 82, # 'L'
- 77: 73, # 'M'
- 78: 95, # 'N'
- 79: 85, # 'O'
- 80: 78, # 'P'
- 81: 121, # 'Q'
- 82: 86, # 'R'
- 83: 71, # 'S'
- 84: 67, # 'T'
- 85: 102, # 'U'
- 86: 107, # 'V'
- 87: 84, # 'W'
- 88: 114, # 'X'
- 89: 103, # 'Y'
- 90: 115, # 'Z'
- 91: 253, # '['
- 92: 253, # '\\'
- 93: 253, # ']'
- 94: 253, # '^'
- 95: 253, # '_'
- 96: 253, # '`'
- 97: 50, # 'a'
- 98: 74, # 'b'
- 99: 60, # 'c'
- 100: 61, # 'd'
- 101: 42, # 'e'
- 102: 76, # 'f'
- 103: 70, # 'g'
- 104: 64, # 'h'
- 105: 53, # 'i'
- 106: 105, # 'j'
- 107: 93, # 'k'
- 108: 56, # 'l'
- 109: 65, # 'm'
- 110: 54, # 'n'
- 111: 49, # 'o'
- 112: 66, # 'p'
- 113: 110, # 'q'
- 114: 51, # 'r'
- 115: 43, # 's'
- 116: 44, # 't'
- 117: 63, # 'u'
- 118: 81, # 'v'
- 119: 77, # 'w'
- 120: 98, # 'x'
- 121: 75, # 'y'
- 122: 108, # 'z'
- 123: 253, # '{'
- 124: 253, # '|'
- 125: 253, # '}'
- 126: 253, # '~'
- 127: 253, # '\x7f'
- 128: 124, # '€'
- 129: 202, # None
- 130: 203, # '‚'
- 131: 204, # 'ƒ'
- 132: 205, # '„'
- 133: 40, # '…'
- 134: 58, # '†'
- 135: 206, # '‡'
- 136: 207, # 'ˆ'
- 137: 208, # '‰'
- 138: 209, # None
- 139: 210, # '‹'
- 140: 211, # None
- 141: 212, # None
- 142: 213, # None
- 143: 214, # None
- 144: 215, # None
- 145: 83, # '‘'
- 146: 52, # '’'
- 147: 47, # '“'
- 148: 46, # '”'
- 149: 72, # '•'
- 150: 32, # '–'
- 151: 94, # '—'
- 152: 216, # '˜'
- 153: 113, # '™'
- 154: 217, # None
- 155: 109, # '›'
- 156: 218, # None
- 157: 219, # None
- 158: 220, # None
- 159: 221, # None
- 160: 34, # '\xa0'
- 161: 116, # '¡'
- 162: 222, # '¢'
- 163: 118, # '£'
- 164: 100, # '₪'
- 165: 223, # '¥'
- 166: 224, # '¦'
- 167: 117, # '§'
- 168: 119, # '¨'
- 169: 104, # '©'
- 170: 125, # '×'
- 171: 225, # '«'
- 172: 226, # '¬'
- 173: 87, # '\xad'
- 174: 99, # '®'
- 175: 227, # '¯'
- 176: 106, # '°'
- 177: 122, # '±'
- 178: 123, # '²'
- 179: 228, # '³'
- 180: 55, # '´'
- 181: 229, # 'µ'
- 182: 230, # '¶'
- 183: 101, # '·'
- 184: 231, # '¸'
- 185: 232, # '¹'
- 186: 120, # '÷'
- 187: 233, # '»'
- 188: 48, # '¼'
- 189: 39, # '½'
- 190: 57, # '¾'
- 191: 234, # '¿'
- 192: 30, # 'ְ'
- 193: 59, # 'ֱ'
- 194: 41, # 'ֲ'
- 195: 88, # 'ֳ'
- 196: 33, # 'ִ'
- 197: 37, # 'ֵ'
- 198: 36, # 'ֶ'
- 199: 31, # 'ַ'
- 200: 29, # 'ָ'
- 201: 35, # 'ֹ'
- 202: 235, # None
- 203: 62, # 'ֻ'
- 204: 28, # 'ּ'
- 205: 236, # 'ֽ'
- 206: 126, # '־'
- 207: 237, # 'ֿ'
- 208: 238, # '׀'
- 209: 38, # 'ׁ'
- 210: 45, # 'ׂ'
- 211: 239, # '׃'
- 212: 240, # 'װ'
- 213: 241, # 'ױ'
- 214: 242, # 'ײ'
- 215: 243, # '׳'
- 216: 127, # '״'
- 217: 244, # None
- 218: 245, # None
- 219: 246, # None
- 220: 247, # None
- 221: 248, # None
- 222: 249, # None
- 223: 250, # None
- 224: 9, # 'א'
- 225: 8, # 'ב'
- 226: 20, # 'ג'
- 227: 16, # 'ד'
- 228: 3, # 'ה'
- 229: 2, # 'ו'
- 230: 24, # 'ז'
- 231: 14, # 'ח'
- 232: 22, # 'ט'
- 233: 1, # 'י'
- 234: 25, # 'ך'
- 235: 15, # 'כ'
- 236: 4, # 'ל'
- 237: 11, # 'ם'
- 238: 6, # 'מ'
- 239: 23, # 'ן'
- 240: 12, # 'נ'
- 241: 19, # 'ס'
- 242: 13, # 'ע'
- 243: 26, # 'ף'
- 244: 18, # 'פ'
- 245: 27, # 'ץ'
- 246: 21, # 'צ'
- 247: 17, # 'ק'
- 248: 7, # 'ר'
- 249: 10, # 'ש'
- 250: 5, # 'ת'
- 251: 251, # None
- 252: 252, # None
- 253: 128, # '\u200e'
- 254: 96, # '\u200f'
- 255: 253, # None
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 254, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 254, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 253, # ' '
+ 33: 253, # '!'
+ 34: 253, # '"'
+ 35: 253, # '#'
+ 36: 253, # '$'
+ 37: 253, # '%'
+ 38: 253, # '&'
+ 39: 253, # "'"
+ 40: 253, # '('
+ 41: 253, # ')'
+ 42: 253, # '*'
+ 43: 253, # '+'
+ 44: 253, # ','
+ 45: 253, # '-'
+ 46: 253, # '.'
+ 47: 253, # '/'
+ 48: 252, # '0'
+ 49: 252, # '1'
+ 50: 252, # '2'
+ 51: 252, # '3'
+ 52: 252, # '4'
+ 53: 252, # '5'
+ 54: 252, # '6'
+ 55: 252, # '7'
+ 56: 252, # '8'
+ 57: 252, # '9'
+ 58: 253, # ':'
+ 59: 253, # ';'
+ 60: 253, # '<'
+ 61: 253, # '='
+ 62: 253, # '>'
+ 63: 253, # '?'
+ 64: 253, # '@'
+ 65: 69, # 'A'
+ 66: 91, # 'B'
+ 67: 79, # 'C'
+ 68: 80, # 'D'
+ 69: 92, # 'E'
+ 70: 89, # 'F'
+ 71: 97, # 'G'
+ 72: 90, # 'H'
+ 73: 68, # 'I'
+ 74: 111, # 'J'
+ 75: 112, # 'K'
+ 76: 82, # 'L'
+ 77: 73, # 'M'
+ 78: 95, # 'N'
+ 79: 85, # 'O'
+ 80: 78, # 'P'
+ 81: 121, # 'Q'
+ 82: 86, # 'R'
+ 83: 71, # 'S'
+ 84: 67, # 'T'
+ 85: 102, # 'U'
+ 86: 107, # 'V'
+ 87: 84, # 'W'
+ 88: 114, # 'X'
+ 89: 103, # 'Y'
+ 90: 115, # 'Z'
+ 91: 253, # '['
+ 92: 253, # '\\'
+ 93: 253, # ']'
+ 94: 253, # '^'
+ 95: 253, # '_'
+ 96: 253, # '`'
+ 97: 50, # 'a'
+ 98: 74, # 'b'
+ 99: 60, # 'c'
+ 100: 61, # 'd'
+ 101: 42, # 'e'
+ 102: 76, # 'f'
+ 103: 70, # 'g'
+ 104: 64, # 'h'
+ 105: 53, # 'i'
+ 106: 105, # 'j'
+ 107: 93, # 'k'
+ 108: 56, # 'l'
+ 109: 65, # 'm'
+ 110: 54, # 'n'
+ 111: 49, # 'o'
+ 112: 66, # 'p'
+ 113: 110, # 'q'
+ 114: 51, # 'r'
+ 115: 43, # 's'
+ 116: 44, # 't'
+ 117: 63, # 'u'
+ 118: 81, # 'v'
+ 119: 77, # 'w'
+ 120: 98, # 'x'
+ 121: 75, # 'y'
+ 122: 108, # 'z'
+ 123: 253, # '{'
+ 124: 253, # '|'
+ 125: 253, # '}'
+ 126: 253, # '~'
+ 127: 253, # '\x7f'
+ 128: 124, # '€'
+ 129: 202, # None
+ 130: 203, # '‚'
+ 131: 204, # 'ƒ'
+ 132: 205, # '„'
+ 133: 40, # '…'
+ 134: 58, # '†'
+ 135: 206, # '‡'
+ 136: 207, # 'ˆ'
+ 137: 208, # '‰'
+ 138: 209, # None
+ 139: 210, # '‹'
+ 140: 211, # None
+ 141: 212, # None
+ 142: 213, # None
+ 143: 214, # None
+ 144: 215, # None
+ 145: 83, # '‘'
+ 146: 52, # '’'
+ 147: 47, # '“'
+ 148: 46, # '”'
+ 149: 72, # '•'
+ 150: 32, # '–'
+ 151: 94, # '—'
+ 152: 216, # '˜'
+ 153: 113, # '™'
+ 154: 217, # None
+ 155: 109, # '›'
+ 156: 218, # None
+ 157: 219, # None
+ 158: 220, # None
+ 159: 221, # None
+ 160: 34, # '\xa0'
+ 161: 116, # '¡'
+ 162: 222, # '¢'
+ 163: 118, # '£'
+ 164: 100, # '₪'
+ 165: 223, # '¥'
+ 166: 224, # '¦'
+ 167: 117, # '§'
+ 168: 119, # '¨'
+ 169: 104, # '©'
+ 170: 125, # '×'
+ 171: 225, # '«'
+ 172: 226, # '¬'
+ 173: 87, # '\xad'
+ 174: 99, # '®'
+ 175: 227, # '¯'
+ 176: 106, # '°'
+ 177: 122, # '±'
+ 178: 123, # '²'
+ 179: 228, # '³'
+ 180: 55, # '´'
+ 181: 229, # 'µ'
+ 182: 230, # '¶'
+ 183: 101, # '·'
+ 184: 231, # '¸'
+ 185: 232, # '¹'
+ 186: 120, # '÷'
+ 187: 233, # '»'
+ 188: 48, # '¼'
+ 189: 39, # '½'
+ 190: 57, # '¾'
+ 191: 234, # '¿'
+ 192: 30, # 'ְ'
+ 193: 59, # 'ֱ'
+ 194: 41, # 'ֲ'
+ 195: 88, # 'ֳ'
+ 196: 33, # 'ִ'
+ 197: 37, # 'ֵ'
+ 198: 36, # 'ֶ'
+ 199: 31, # 'ַ'
+ 200: 29, # 'ָ'
+ 201: 35, # 'ֹ'
+ 202: 235, # None
+ 203: 62, # 'ֻ'
+ 204: 28, # 'ּ'
+ 205: 236, # 'ֽ'
+ 206: 126, # '־'
+ 207: 237, # 'ֿ'
+ 208: 238, # '׀'
+ 209: 38, # 'ׁ'
+ 210: 45, # 'ׂ'
+ 211: 239, # '׃'
+ 212: 240, # 'װ'
+ 213: 241, # 'ױ'
+ 214: 242, # 'ײ'
+ 215: 243, # '׳'
+ 216: 127, # '״'
+ 217: 244, # None
+ 218: 245, # None
+ 219: 246, # None
+ 220: 247, # None
+ 221: 248, # None
+ 222: 249, # None
+ 223: 250, # None
+ 224: 9, # 'א'
+ 225: 8, # 'ב'
+ 226: 20, # 'ג'
+ 227: 16, # 'ד'
+ 228: 3, # 'ה'
+ 229: 2, # 'ו'
+ 230: 24, # 'ז'
+ 231: 14, # 'ח'
+ 232: 22, # 'ט'
+ 233: 1, # 'י'
+ 234: 25, # 'ך'
+ 235: 15, # 'כ'
+ 236: 4, # 'ל'
+ 237: 11, # 'ם'
+ 238: 6, # 'מ'
+ 239: 23, # 'ן'
+ 240: 12, # 'נ'
+ 241: 19, # 'ס'
+ 242: 13, # 'ע'
+ 243: 26, # 'ף'
+ 244: 18, # 'פ'
+ 245: 27, # 'ץ'
+ 246: 21, # 'צ'
+ 247: 17, # 'ק'
+ 248: 7, # 'ר'
+ 249: 10, # 'ש'
+ 250: 5, # 'ת'
+ 251: 251, # None
+ 252: 252, # None
+ 253: 128, # '\u200e'
+ 254: 96, # '\u200f'
+ 255: 253, # None
}
-WINDOWS_1255_HEBREW_MODEL = SingleByteCharSetModel(charset_name='windows-1255',
- language='Hebrew',
- char_to_order_map=WINDOWS_1255_HEBREW_CHAR_TO_ORDER,
- language_model=HEBREW_LANG_MODEL,
- typical_positive_ratio=0.984004,
- keep_ascii_letters=False,
- alphabet='אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ')
-
+WINDOWS_1255_HEBREW_MODEL = SingleByteCharSetModel(
+ charset_name="windows-1255",
+ language="Hebrew",
+ char_to_order_map=WINDOWS_1255_HEBREW_CHAR_TO_ORDER,
+ language_model=HEBREW_LANG_MODEL,
+ typical_positive_ratio=0.984004,
+ keep_ascii_letters=False,
+ alphabet="אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ",
+)
diff --git a/libs/chardet/langhungarianmodel.py b/libs/chardet/langhungarianmodel.py
index 24a097f52..bd6630a05 100644
--- a/libs/chardet/langhungarianmodel.py
+++ b/libs/chardet/langhungarianmodel.py
@@ -1,9 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
from chardet.sbcharsetprober import SingleByteCharSetModel
-
# 3: Positive
# 2: Likely
# 1: Unlikely
@@ -4115,536 +4111,539 @@ HUNGARIAN_LANG_MODEL = {
# Character Mapping Table(s):
WINDOWS_1250_HUNGARIAN_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 254, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 254, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 253, # ' '
- 33: 253, # '!'
- 34: 253, # '"'
- 35: 253, # '#'
- 36: 253, # '$'
- 37: 253, # '%'
- 38: 253, # '&'
- 39: 253, # "'"
- 40: 253, # '('
- 41: 253, # ')'
- 42: 253, # '*'
- 43: 253, # '+'
- 44: 253, # ','
- 45: 253, # '-'
- 46: 253, # '.'
- 47: 253, # '/'
- 48: 252, # '0'
- 49: 252, # '1'
- 50: 252, # '2'
- 51: 252, # '3'
- 52: 252, # '4'
- 53: 252, # '5'
- 54: 252, # '6'
- 55: 252, # '7'
- 56: 252, # '8'
- 57: 252, # '9'
- 58: 253, # ':'
- 59: 253, # ';'
- 60: 253, # '<'
- 61: 253, # '='
- 62: 253, # '>'
- 63: 253, # '?'
- 64: 253, # '@'
- 65: 28, # 'A'
- 66: 40, # 'B'
- 67: 54, # 'C'
- 68: 45, # 'D'
- 69: 32, # 'E'
- 70: 50, # 'F'
- 71: 49, # 'G'
- 72: 38, # 'H'
- 73: 39, # 'I'
- 74: 53, # 'J'
- 75: 36, # 'K'
- 76: 41, # 'L'
- 77: 34, # 'M'
- 78: 35, # 'N'
- 79: 47, # 'O'
- 80: 46, # 'P'
- 81: 72, # 'Q'
- 82: 43, # 'R'
- 83: 33, # 'S'
- 84: 37, # 'T'
- 85: 57, # 'U'
- 86: 48, # 'V'
- 87: 64, # 'W'
- 88: 68, # 'X'
- 89: 55, # 'Y'
- 90: 52, # 'Z'
- 91: 253, # '['
- 92: 253, # '\\'
- 93: 253, # ']'
- 94: 253, # '^'
- 95: 253, # '_'
- 96: 253, # '`'
- 97: 2, # 'a'
- 98: 18, # 'b'
- 99: 26, # 'c'
- 100: 17, # 'd'
- 101: 1, # 'e'
- 102: 27, # 'f'
- 103: 12, # 'g'
- 104: 20, # 'h'
- 105: 9, # 'i'
- 106: 22, # 'j'
- 107: 7, # 'k'
- 108: 6, # 'l'
- 109: 13, # 'm'
- 110: 4, # 'n'
- 111: 8, # 'o'
- 112: 23, # 'p'
- 113: 67, # 'q'
- 114: 10, # 'r'
- 115: 5, # 's'
- 116: 3, # 't'
- 117: 21, # 'u'
- 118: 19, # 'v'
- 119: 65, # 'w'
- 120: 62, # 'x'
- 121: 16, # 'y'
- 122: 11, # 'z'
- 123: 253, # '{'
- 124: 253, # '|'
- 125: 253, # '}'
- 126: 253, # '~'
- 127: 253, # '\x7f'
- 128: 161, # '€'
- 129: 162, # None
- 130: 163, # '‚'
- 131: 164, # None
- 132: 165, # '„'
- 133: 166, # '…'
- 134: 167, # '†'
- 135: 168, # '‡'
- 136: 169, # None
- 137: 170, # '‰'
- 138: 171, # 'Š'
- 139: 172, # '‹'
- 140: 173, # 'Ś'
- 141: 174, # 'Ť'
- 142: 175, # 'Ž'
- 143: 176, # 'Ź'
- 144: 177, # None
- 145: 178, # '‘'
- 146: 179, # '’'
- 147: 180, # '“'
- 148: 78, # '”'
- 149: 181, # '•'
- 150: 69, # '–'
- 151: 182, # '—'
- 152: 183, # None
- 153: 184, # '™'
- 154: 185, # 'š'
- 155: 186, # '›'
- 156: 187, # 'ś'
- 157: 188, # 'ť'
- 158: 189, # 'ž'
- 159: 190, # 'ź'
- 160: 191, # '\xa0'
- 161: 192, # 'ˇ'
- 162: 193, # '˘'
- 163: 194, # 'Ł'
- 164: 195, # '¤'
- 165: 196, # 'Ą'
- 166: 197, # '¦'
- 167: 76, # '§'
- 168: 198, # '¨'
- 169: 199, # '©'
- 170: 200, # 'Ş'
- 171: 201, # '«'
- 172: 202, # '¬'
- 173: 203, # '\xad'
- 174: 204, # '®'
- 175: 205, # 'Ż'
- 176: 81, # '°'
- 177: 206, # '±'
- 178: 207, # '˛'
- 179: 208, # 'ł'
- 180: 209, # '´'
- 181: 210, # 'µ'
- 182: 211, # '¶'
- 183: 212, # '·'
- 184: 213, # '¸'
- 185: 214, # 'ą'
- 186: 215, # 'ş'
- 187: 216, # '»'
- 188: 217, # 'Ľ'
- 189: 218, # '˝'
- 190: 219, # 'ľ'
- 191: 220, # 'ż'
- 192: 221, # 'Ŕ'
- 193: 51, # 'Á'
- 194: 83, # 'Â'
- 195: 222, # 'Ă'
- 196: 80, # 'Ä'
- 197: 223, # 'Ĺ'
- 198: 224, # 'Ć'
- 199: 225, # 'Ç'
- 200: 226, # 'Č'
- 201: 44, # 'É'
- 202: 227, # 'Ę'
- 203: 228, # 'Ë'
- 204: 229, # 'Ě'
- 205: 61, # 'Í'
- 206: 230, # 'Î'
- 207: 231, # 'Ď'
- 208: 232, # 'Đ'
- 209: 233, # 'Ń'
- 210: 234, # 'Ň'
- 211: 58, # 'Ó'
- 212: 235, # 'Ô'
- 213: 66, # 'Ő'
- 214: 59, # 'Ö'
- 215: 236, # '×'
- 216: 237, # 'Ř'
- 217: 238, # 'Ů'
- 218: 60, # 'Ú'
- 219: 70, # 'Ű'
- 220: 63, # 'Ü'
- 221: 239, # 'Ý'
- 222: 240, # 'Ţ'
- 223: 241, # 'ß'
- 224: 84, # 'ŕ'
- 225: 14, # 'á'
- 226: 75, # 'â'
- 227: 242, # 'ă'
- 228: 71, # 'ä'
- 229: 82, # 'ĺ'
- 230: 243, # 'ć'
- 231: 73, # 'ç'
- 232: 244, # 'č'
- 233: 15, # 'é'
- 234: 85, # 'ę'
- 235: 79, # 'ë'
- 236: 86, # 'ě'
- 237: 30, # 'í'
- 238: 77, # 'î'
- 239: 87, # 'ď'
- 240: 245, # 'đ'
- 241: 246, # 'ń'
- 242: 247, # 'ň'
- 243: 25, # 'ó'
- 244: 74, # 'ô'
- 245: 42, # 'ő'
- 246: 24, # 'ö'
- 247: 248, # '÷'
- 248: 249, # 'ř'
- 249: 250, # 'ů'
- 250: 31, # 'ú'
- 251: 56, # 'ű'
- 252: 29, # 'ü'
- 253: 251, # 'ý'
- 254: 252, # 'ţ'
- 255: 253, # '˙'
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 254, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 254, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 253, # ' '
+ 33: 253, # '!'
+ 34: 253, # '"'
+ 35: 253, # '#'
+ 36: 253, # '$'
+ 37: 253, # '%'
+ 38: 253, # '&'
+ 39: 253, # "'"
+ 40: 253, # '('
+ 41: 253, # ')'
+ 42: 253, # '*'
+ 43: 253, # '+'
+ 44: 253, # ','
+ 45: 253, # '-'
+ 46: 253, # '.'
+ 47: 253, # '/'
+ 48: 252, # '0'
+ 49: 252, # '1'
+ 50: 252, # '2'
+ 51: 252, # '3'
+ 52: 252, # '4'
+ 53: 252, # '5'
+ 54: 252, # '6'
+ 55: 252, # '7'
+ 56: 252, # '8'
+ 57: 252, # '9'
+ 58: 253, # ':'
+ 59: 253, # ';'
+ 60: 253, # '<'
+ 61: 253, # '='
+ 62: 253, # '>'
+ 63: 253, # '?'
+ 64: 253, # '@'
+ 65: 28, # 'A'
+ 66: 40, # 'B'
+ 67: 54, # 'C'
+ 68: 45, # 'D'
+ 69: 32, # 'E'
+ 70: 50, # 'F'
+ 71: 49, # 'G'
+ 72: 38, # 'H'
+ 73: 39, # 'I'
+ 74: 53, # 'J'
+ 75: 36, # 'K'
+ 76: 41, # 'L'
+ 77: 34, # 'M'
+ 78: 35, # 'N'
+ 79: 47, # 'O'
+ 80: 46, # 'P'
+ 81: 72, # 'Q'
+ 82: 43, # 'R'
+ 83: 33, # 'S'
+ 84: 37, # 'T'
+ 85: 57, # 'U'
+ 86: 48, # 'V'
+ 87: 64, # 'W'
+ 88: 68, # 'X'
+ 89: 55, # 'Y'
+ 90: 52, # 'Z'
+ 91: 253, # '['
+ 92: 253, # '\\'
+ 93: 253, # ']'
+ 94: 253, # '^'
+ 95: 253, # '_'
+ 96: 253, # '`'
+ 97: 2, # 'a'
+ 98: 18, # 'b'
+ 99: 26, # 'c'
+ 100: 17, # 'd'
+ 101: 1, # 'e'
+ 102: 27, # 'f'
+ 103: 12, # 'g'
+ 104: 20, # 'h'
+ 105: 9, # 'i'
+ 106: 22, # 'j'
+ 107: 7, # 'k'
+ 108: 6, # 'l'
+ 109: 13, # 'm'
+ 110: 4, # 'n'
+ 111: 8, # 'o'
+ 112: 23, # 'p'
+ 113: 67, # 'q'
+ 114: 10, # 'r'
+ 115: 5, # 's'
+ 116: 3, # 't'
+ 117: 21, # 'u'
+ 118: 19, # 'v'
+ 119: 65, # 'w'
+ 120: 62, # 'x'
+ 121: 16, # 'y'
+ 122: 11, # 'z'
+ 123: 253, # '{'
+ 124: 253, # '|'
+ 125: 253, # '}'
+ 126: 253, # '~'
+ 127: 253, # '\x7f'
+ 128: 161, # '€'
+ 129: 162, # None
+ 130: 163, # '‚'
+ 131: 164, # None
+ 132: 165, # '„'
+ 133: 166, # '…'
+ 134: 167, # '†'
+ 135: 168, # '‡'
+ 136: 169, # None
+ 137: 170, # '‰'
+ 138: 171, # 'Š'
+ 139: 172, # '‹'
+ 140: 173, # 'Ś'
+ 141: 174, # 'Ť'
+ 142: 175, # 'Ž'
+ 143: 176, # 'Ź'
+ 144: 177, # None
+ 145: 178, # '‘'
+ 146: 179, # '’'
+ 147: 180, # '“'
+ 148: 78, # '”'
+ 149: 181, # '•'
+ 150: 69, # '–'
+ 151: 182, # '—'
+ 152: 183, # None
+ 153: 184, # '™'
+ 154: 185, # 'š'
+ 155: 186, # '›'
+ 156: 187, # 'ś'
+ 157: 188, # 'ť'
+ 158: 189, # 'ž'
+ 159: 190, # 'ź'
+ 160: 191, # '\xa0'
+ 161: 192, # 'ˇ'
+ 162: 193, # '˘'
+ 163: 194, # 'Ł'
+ 164: 195, # '¤'
+ 165: 196, # 'Ą'
+ 166: 197, # '¦'
+ 167: 76, # '§'
+ 168: 198, # '¨'
+ 169: 199, # '©'
+ 170: 200, # 'Ş'
+ 171: 201, # '«'
+ 172: 202, # '¬'
+ 173: 203, # '\xad'
+ 174: 204, # '®'
+ 175: 205, # 'Ż'
+ 176: 81, # '°'
+ 177: 206, # '±'
+ 178: 207, # '˛'
+ 179: 208, # 'ł'
+ 180: 209, # '´'
+ 181: 210, # 'µ'
+ 182: 211, # '¶'
+ 183: 212, # '·'
+ 184: 213, # '¸'
+ 185: 214, # 'ą'
+ 186: 215, # 'ş'
+ 187: 216, # '»'
+ 188: 217, # 'Ľ'
+ 189: 218, # '˝'
+ 190: 219, # 'ľ'
+ 191: 220, # 'ż'
+ 192: 221, # 'Ŕ'
+ 193: 51, # 'Á'
+ 194: 83, # 'Â'
+ 195: 222, # 'Ă'
+ 196: 80, # 'Ä'
+ 197: 223, # 'Ĺ'
+ 198: 224, # 'Ć'
+ 199: 225, # 'Ç'
+ 200: 226, # 'Č'
+ 201: 44, # 'É'
+ 202: 227, # 'Ę'
+ 203: 228, # 'Ë'
+ 204: 229, # 'Ě'
+ 205: 61, # 'Í'
+ 206: 230, # 'Î'
+ 207: 231, # 'Ď'
+ 208: 232, # 'Đ'
+ 209: 233, # 'Ń'
+ 210: 234, # 'Ň'
+ 211: 58, # 'Ó'
+ 212: 235, # 'Ô'
+ 213: 66, # 'Ő'
+ 214: 59, # 'Ö'
+ 215: 236, # '×'
+ 216: 237, # 'Ř'
+ 217: 238, # 'Ů'
+ 218: 60, # 'Ú'
+ 219: 70, # 'Ű'
+ 220: 63, # 'Ü'
+ 221: 239, # 'Ý'
+ 222: 240, # 'Ţ'
+ 223: 241, # 'ß'
+ 224: 84, # 'ŕ'
+ 225: 14, # 'á'
+ 226: 75, # 'â'
+ 227: 242, # 'ă'
+ 228: 71, # 'ä'
+ 229: 82, # 'ĺ'
+ 230: 243, # 'ć'
+ 231: 73, # 'ç'
+ 232: 244, # 'č'
+ 233: 15, # 'é'
+ 234: 85, # 'ę'
+ 235: 79, # 'ë'
+ 236: 86, # 'ě'
+ 237: 30, # 'í'
+ 238: 77, # 'î'
+ 239: 87, # 'ď'
+ 240: 245, # 'đ'
+ 241: 246, # 'ń'
+ 242: 247, # 'ň'
+ 243: 25, # 'ó'
+ 244: 74, # 'ô'
+ 245: 42, # 'ő'
+ 246: 24, # 'ö'
+ 247: 248, # '÷'
+ 248: 249, # 'ř'
+ 249: 250, # 'ů'
+ 250: 31, # 'ú'
+ 251: 56, # 'ű'
+ 252: 29, # 'ü'
+ 253: 251, # 'ý'
+ 254: 252, # 'ţ'
+ 255: 253, # '˙'
}
-WINDOWS_1250_HUNGARIAN_MODEL = SingleByteCharSetModel(charset_name='windows-1250',
- language='Hungarian',
- char_to_order_map=WINDOWS_1250_HUNGARIAN_CHAR_TO_ORDER,
- language_model=HUNGARIAN_LANG_MODEL,
- typical_positive_ratio=0.947368,
- keep_ascii_letters=True,
- alphabet='ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzÁÉÍÓÖÚÜáéíóöúüŐőŰű')
+WINDOWS_1250_HUNGARIAN_MODEL = SingleByteCharSetModel(
+ charset_name="windows-1250",
+ language="Hungarian",
+ char_to_order_map=WINDOWS_1250_HUNGARIAN_CHAR_TO_ORDER,
+ language_model=HUNGARIAN_LANG_MODEL,
+ typical_positive_ratio=0.947368,
+ keep_ascii_letters=True,
+ alphabet="ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzÁÉÍÓÖÚÜáéíóöúüŐőŰű",
+)
ISO_8859_2_HUNGARIAN_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 254, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 254, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 253, # ' '
- 33: 253, # '!'
- 34: 253, # '"'
- 35: 253, # '#'
- 36: 253, # '$'
- 37: 253, # '%'
- 38: 253, # '&'
- 39: 253, # "'"
- 40: 253, # '('
- 41: 253, # ')'
- 42: 253, # '*'
- 43: 253, # '+'
- 44: 253, # ','
- 45: 253, # '-'
- 46: 253, # '.'
- 47: 253, # '/'
- 48: 252, # '0'
- 49: 252, # '1'
- 50: 252, # '2'
- 51: 252, # '3'
- 52: 252, # '4'
- 53: 252, # '5'
- 54: 252, # '6'
- 55: 252, # '7'
- 56: 252, # '8'
- 57: 252, # '9'
- 58: 253, # ':'
- 59: 253, # ';'
- 60: 253, # '<'
- 61: 253, # '='
- 62: 253, # '>'
- 63: 253, # '?'
- 64: 253, # '@'
- 65: 28, # 'A'
- 66: 40, # 'B'
- 67: 54, # 'C'
- 68: 45, # 'D'
- 69: 32, # 'E'
- 70: 50, # 'F'
- 71: 49, # 'G'
- 72: 38, # 'H'
- 73: 39, # 'I'
- 74: 53, # 'J'
- 75: 36, # 'K'
- 76: 41, # 'L'
- 77: 34, # 'M'
- 78: 35, # 'N'
- 79: 47, # 'O'
- 80: 46, # 'P'
- 81: 71, # 'Q'
- 82: 43, # 'R'
- 83: 33, # 'S'
- 84: 37, # 'T'
- 85: 57, # 'U'
- 86: 48, # 'V'
- 87: 64, # 'W'
- 88: 68, # 'X'
- 89: 55, # 'Y'
- 90: 52, # 'Z'
- 91: 253, # '['
- 92: 253, # '\\'
- 93: 253, # ']'
- 94: 253, # '^'
- 95: 253, # '_'
- 96: 253, # '`'
- 97: 2, # 'a'
- 98: 18, # 'b'
- 99: 26, # 'c'
- 100: 17, # 'd'
- 101: 1, # 'e'
- 102: 27, # 'f'
- 103: 12, # 'g'
- 104: 20, # 'h'
- 105: 9, # 'i'
- 106: 22, # 'j'
- 107: 7, # 'k'
- 108: 6, # 'l'
- 109: 13, # 'm'
- 110: 4, # 'n'
- 111: 8, # 'o'
- 112: 23, # 'p'
- 113: 67, # 'q'
- 114: 10, # 'r'
- 115: 5, # 's'
- 116: 3, # 't'
- 117: 21, # 'u'
- 118: 19, # 'v'
- 119: 65, # 'w'
- 120: 62, # 'x'
- 121: 16, # 'y'
- 122: 11, # 'z'
- 123: 253, # '{'
- 124: 253, # '|'
- 125: 253, # '}'
- 126: 253, # '~'
- 127: 253, # '\x7f'
- 128: 159, # '\x80'
- 129: 160, # '\x81'
- 130: 161, # '\x82'
- 131: 162, # '\x83'
- 132: 163, # '\x84'
- 133: 164, # '\x85'
- 134: 165, # '\x86'
- 135: 166, # '\x87'
- 136: 167, # '\x88'
- 137: 168, # '\x89'
- 138: 169, # '\x8a'
- 139: 170, # '\x8b'
- 140: 171, # '\x8c'
- 141: 172, # '\x8d'
- 142: 173, # '\x8e'
- 143: 174, # '\x8f'
- 144: 175, # '\x90'
- 145: 176, # '\x91'
- 146: 177, # '\x92'
- 147: 178, # '\x93'
- 148: 179, # '\x94'
- 149: 180, # '\x95'
- 150: 181, # '\x96'
- 151: 182, # '\x97'
- 152: 183, # '\x98'
- 153: 184, # '\x99'
- 154: 185, # '\x9a'
- 155: 186, # '\x9b'
- 156: 187, # '\x9c'
- 157: 188, # '\x9d'
- 158: 189, # '\x9e'
- 159: 190, # '\x9f'
- 160: 191, # '\xa0'
- 161: 192, # 'Ą'
- 162: 193, # '˘'
- 163: 194, # 'Ł'
- 164: 195, # '¤'
- 165: 196, # 'Ľ'
- 166: 197, # 'Ś'
- 167: 75, # '§'
- 168: 198, # '¨'
- 169: 199, # 'Š'
- 170: 200, # 'Ş'
- 171: 201, # 'Ť'
- 172: 202, # 'Ź'
- 173: 203, # '\xad'
- 174: 204, # 'Ž'
- 175: 205, # 'Ż'
- 176: 79, # '°'
- 177: 206, # 'ą'
- 178: 207, # '˛'
- 179: 208, # 'ł'
- 180: 209, # '´'
- 181: 210, # 'ľ'
- 182: 211, # 'ś'
- 183: 212, # 'ˇ'
- 184: 213, # '¸'
- 185: 214, # 'š'
- 186: 215, # 'ş'
- 187: 216, # 'ť'
- 188: 217, # 'ź'
- 189: 218, # '˝'
- 190: 219, # 'ž'
- 191: 220, # 'ż'
- 192: 221, # 'Ŕ'
- 193: 51, # 'Á'
- 194: 81, # 'Â'
- 195: 222, # 'Ă'
- 196: 78, # 'Ä'
- 197: 223, # 'Ĺ'
- 198: 224, # 'Ć'
- 199: 225, # 'Ç'
- 200: 226, # 'Č'
- 201: 44, # 'É'
- 202: 227, # 'Ę'
- 203: 228, # 'Ë'
- 204: 229, # 'Ě'
- 205: 61, # 'Í'
- 206: 230, # 'Î'
- 207: 231, # 'Ď'
- 208: 232, # 'Đ'
- 209: 233, # 'Ń'
- 210: 234, # 'Ň'
- 211: 58, # 'Ó'
- 212: 235, # 'Ô'
- 213: 66, # 'Ő'
- 214: 59, # 'Ö'
- 215: 236, # '×'
- 216: 237, # 'Ř'
- 217: 238, # 'Ů'
- 218: 60, # 'Ú'
- 219: 69, # 'Ű'
- 220: 63, # 'Ü'
- 221: 239, # 'Ý'
- 222: 240, # 'Ţ'
- 223: 241, # 'ß'
- 224: 82, # 'ŕ'
- 225: 14, # 'á'
- 226: 74, # 'â'
- 227: 242, # 'ă'
- 228: 70, # 'ä'
- 229: 80, # 'ĺ'
- 230: 243, # 'ć'
- 231: 72, # 'ç'
- 232: 244, # 'č'
- 233: 15, # 'é'
- 234: 83, # 'ę'
- 235: 77, # 'ë'
- 236: 84, # 'ě'
- 237: 30, # 'í'
- 238: 76, # 'î'
- 239: 85, # 'ď'
- 240: 245, # 'đ'
- 241: 246, # 'ń'
- 242: 247, # 'ň'
- 243: 25, # 'ó'
- 244: 73, # 'ô'
- 245: 42, # 'ő'
- 246: 24, # 'ö'
- 247: 248, # '÷'
- 248: 249, # 'ř'
- 249: 250, # 'ů'
- 250: 31, # 'ú'
- 251: 56, # 'ű'
- 252: 29, # 'ü'
- 253: 251, # 'ý'
- 254: 252, # 'ţ'
- 255: 253, # '˙'
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 254, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 254, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 253, # ' '
+ 33: 253, # '!'
+ 34: 253, # '"'
+ 35: 253, # '#'
+ 36: 253, # '$'
+ 37: 253, # '%'
+ 38: 253, # '&'
+ 39: 253, # "'"
+ 40: 253, # '('
+ 41: 253, # ')'
+ 42: 253, # '*'
+ 43: 253, # '+'
+ 44: 253, # ','
+ 45: 253, # '-'
+ 46: 253, # '.'
+ 47: 253, # '/'
+ 48: 252, # '0'
+ 49: 252, # '1'
+ 50: 252, # '2'
+ 51: 252, # '3'
+ 52: 252, # '4'
+ 53: 252, # '5'
+ 54: 252, # '6'
+ 55: 252, # '7'
+ 56: 252, # '8'
+ 57: 252, # '9'
+ 58: 253, # ':'
+ 59: 253, # ';'
+ 60: 253, # '<'
+ 61: 253, # '='
+ 62: 253, # '>'
+ 63: 253, # '?'
+ 64: 253, # '@'
+ 65: 28, # 'A'
+ 66: 40, # 'B'
+ 67: 54, # 'C'
+ 68: 45, # 'D'
+ 69: 32, # 'E'
+ 70: 50, # 'F'
+ 71: 49, # 'G'
+ 72: 38, # 'H'
+ 73: 39, # 'I'
+ 74: 53, # 'J'
+ 75: 36, # 'K'
+ 76: 41, # 'L'
+ 77: 34, # 'M'
+ 78: 35, # 'N'
+ 79: 47, # 'O'
+ 80: 46, # 'P'
+ 81: 71, # 'Q'
+ 82: 43, # 'R'
+ 83: 33, # 'S'
+ 84: 37, # 'T'
+ 85: 57, # 'U'
+ 86: 48, # 'V'
+ 87: 64, # 'W'
+ 88: 68, # 'X'
+ 89: 55, # 'Y'
+ 90: 52, # 'Z'
+ 91: 253, # '['
+ 92: 253, # '\\'
+ 93: 253, # ']'
+ 94: 253, # '^'
+ 95: 253, # '_'
+ 96: 253, # '`'
+ 97: 2, # 'a'
+ 98: 18, # 'b'
+ 99: 26, # 'c'
+ 100: 17, # 'd'
+ 101: 1, # 'e'
+ 102: 27, # 'f'
+ 103: 12, # 'g'
+ 104: 20, # 'h'
+ 105: 9, # 'i'
+ 106: 22, # 'j'
+ 107: 7, # 'k'
+ 108: 6, # 'l'
+ 109: 13, # 'm'
+ 110: 4, # 'n'
+ 111: 8, # 'o'
+ 112: 23, # 'p'
+ 113: 67, # 'q'
+ 114: 10, # 'r'
+ 115: 5, # 's'
+ 116: 3, # 't'
+ 117: 21, # 'u'
+ 118: 19, # 'v'
+ 119: 65, # 'w'
+ 120: 62, # 'x'
+ 121: 16, # 'y'
+ 122: 11, # 'z'
+ 123: 253, # '{'
+ 124: 253, # '|'
+ 125: 253, # '}'
+ 126: 253, # '~'
+ 127: 253, # '\x7f'
+ 128: 159, # '\x80'
+ 129: 160, # '\x81'
+ 130: 161, # '\x82'
+ 131: 162, # '\x83'
+ 132: 163, # '\x84'
+ 133: 164, # '\x85'
+ 134: 165, # '\x86'
+ 135: 166, # '\x87'
+ 136: 167, # '\x88'
+ 137: 168, # '\x89'
+ 138: 169, # '\x8a'
+ 139: 170, # '\x8b'
+ 140: 171, # '\x8c'
+ 141: 172, # '\x8d'
+ 142: 173, # '\x8e'
+ 143: 174, # '\x8f'
+ 144: 175, # '\x90'
+ 145: 176, # '\x91'
+ 146: 177, # '\x92'
+ 147: 178, # '\x93'
+ 148: 179, # '\x94'
+ 149: 180, # '\x95'
+ 150: 181, # '\x96'
+ 151: 182, # '\x97'
+ 152: 183, # '\x98'
+ 153: 184, # '\x99'
+ 154: 185, # '\x9a'
+ 155: 186, # '\x9b'
+ 156: 187, # '\x9c'
+ 157: 188, # '\x9d'
+ 158: 189, # '\x9e'
+ 159: 190, # '\x9f'
+ 160: 191, # '\xa0'
+ 161: 192, # 'Ą'
+ 162: 193, # '˘'
+ 163: 194, # 'Ł'
+ 164: 195, # '¤'
+ 165: 196, # 'Ľ'
+ 166: 197, # 'Ś'
+ 167: 75, # '§'
+ 168: 198, # '¨'
+ 169: 199, # 'Š'
+ 170: 200, # 'Ş'
+ 171: 201, # 'Ť'
+ 172: 202, # 'Ź'
+ 173: 203, # '\xad'
+ 174: 204, # 'Ž'
+ 175: 205, # 'Ż'
+ 176: 79, # '°'
+ 177: 206, # 'ą'
+ 178: 207, # '˛'
+ 179: 208, # 'ł'
+ 180: 209, # '´'
+ 181: 210, # 'ľ'
+ 182: 211, # 'ś'
+ 183: 212, # 'ˇ'
+ 184: 213, # '¸'
+ 185: 214, # 'š'
+ 186: 215, # 'ş'
+ 187: 216, # 'ť'
+ 188: 217, # 'ź'
+ 189: 218, # '˝'
+ 190: 219, # 'ž'
+ 191: 220, # 'ż'
+ 192: 221, # 'Ŕ'
+ 193: 51, # 'Á'
+ 194: 81, # 'Â'
+ 195: 222, # 'Ă'
+ 196: 78, # 'Ä'
+ 197: 223, # 'Ĺ'
+ 198: 224, # 'Ć'
+ 199: 225, # 'Ç'
+ 200: 226, # 'Č'
+ 201: 44, # 'É'
+ 202: 227, # 'Ę'
+ 203: 228, # 'Ë'
+ 204: 229, # 'Ě'
+ 205: 61, # 'Í'
+ 206: 230, # 'Î'
+ 207: 231, # 'Ď'
+ 208: 232, # 'Đ'
+ 209: 233, # 'Ń'
+ 210: 234, # 'Ň'
+ 211: 58, # 'Ó'
+ 212: 235, # 'Ô'
+ 213: 66, # 'Ő'
+ 214: 59, # 'Ö'
+ 215: 236, # '×'
+ 216: 237, # 'Ř'
+ 217: 238, # 'Ů'
+ 218: 60, # 'Ú'
+ 219: 69, # 'Ű'
+ 220: 63, # 'Ü'
+ 221: 239, # 'Ý'
+ 222: 240, # 'Ţ'
+ 223: 241, # 'ß'
+ 224: 82, # 'ŕ'
+ 225: 14, # 'á'
+ 226: 74, # 'â'
+ 227: 242, # 'ă'
+ 228: 70, # 'ä'
+ 229: 80, # 'ĺ'
+ 230: 243, # 'ć'
+ 231: 72, # 'ç'
+ 232: 244, # 'č'
+ 233: 15, # 'é'
+ 234: 83, # 'ę'
+ 235: 77, # 'ë'
+ 236: 84, # 'ě'
+ 237: 30, # 'í'
+ 238: 76, # 'î'
+ 239: 85, # 'ď'
+ 240: 245, # 'đ'
+ 241: 246, # 'ń'
+ 242: 247, # 'ň'
+ 243: 25, # 'ó'
+ 244: 73, # 'ô'
+ 245: 42, # 'ő'
+ 246: 24, # 'ö'
+ 247: 248, # '÷'
+ 248: 249, # 'ř'
+ 249: 250, # 'ů'
+ 250: 31, # 'ú'
+ 251: 56, # 'ű'
+ 252: 29, # 'ü'
+ 253: 251, # 'ý'
+ 254: 252, # 'ţ'
+ 255: 253, # '˙'
}
-ISO_8859_2_HUNGARIAN_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-2',
- language='Hungarian',
- char_to_order_map=ISO_8859_2_HUNGARIAN_CHAR_TO_ORDER,
- language_model=HUNGARIAN_LANG_MODEL,
- typical_positive_ratio=0.947368,
- keep_ascii_letters=True,
- alphabet='ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzÁÉÍÓÖÚÜáéíóöúüŐőŰű')
-
+ISO_8859_2_HUNGARIAN_MODEL = SingleByteCharSetModel(
+ charset_name="ISO-8859-2",
+ language="Hungarian",
+ char_to_order_map=ISO_8859_2_HUNGARIAN_CHAR_TO_ORDER,
+ language_model=HUNGARIAN_LANG_MODEL,
+ typical_positive_ratio=0.947368,
+ keep_ascii_letters=True,
+ alphabet="ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzÁÉÍÓÖÚÜáéíóöúüŐőŰű",
+)
diff --git a/libs/chardet/langrussianmodel.py b/libs/chardet/langrussianmodel.py
index 569689d0f..0d5b17844 100644
--- a/libs/chardet/langrussianmodel.py
+++ b/libs/chardet/langrussianmodel.py
@@ -1,9 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
from chardet.sbcharsetprober import SingleByteCharSetModel
-
# 3: Positive
# 2: Likely
# 1: Unlikely
@@ -4115,1604 +4111,1615 @@ RUSSIAN_LANG_MODEL = {
# Character Mapping Table(s):
IBM866_RUSSIAN_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 254, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 254, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 253, # ' '
- 33: 253, # '!'
- 34: 253, # '"'
- 35: 253, # '#'
- 36: 253, # '$'
- 37: 253, # '%'
- 38: 253, # '&'
- 39: 253, # "'"
- 40: 253, # '('
- 41: 253, # ')'
- 42: 253, # '*'
- 43: 253, # '+'
- 44: 253, # ','
- 45: 253, # '-'
- 46: 253, # '.'
- 47: 253, # '/'
- 48: 252, # '0'
- 49: 252, # '1'
- 50: 252, # '2'
- 51: 252, # '3'
- 52: 252, # '4'
- 53: 252, # '5'
- 54: 252, # '6'
- 55: 252, # '7'
- 56: 252, # '8'
- 57: 252, # '9'
- 58: 253, # ':'
- 59: 253, # ';'
- 60: 253, # '<'
- 61: 253, # '='
- 62: 253, # '>'
- 63: 253, # '?'
- 64: 253, # '@'
- 65: 142, # 'A'
- 66: 143, # 'B'
- 67: 144, # 'C'
- 68: 145, # 'D'
- 69: 146, # 'E'
- 70: 147, # 'F'
- 71: 148, # 'G'
- 72: 149, # 'H'
- 73: 150, # 'I'
- 74: 151, # 'J'
- 75: 152, # 'K'
- 76: 74, # 'L'
- 77: 153, # 'M'
- 78: 75, # 'N'
- 79: 154, # 'O'
- 80: 155, # 'P'
- 81: 156, # 'Q'
- 82: 157, # 'R'
- 83: 158, # 'S'
- 84: 159, # 'T'
- 85: 160, # 'U'
- 86: 161, # 'V'
- 87: 162, # 'W'
- 88: 163, # 'X'
- 89: 164, # 'Y'
- 90: 165, # 'Z'
- 91: 253, # '['
- 92: 253, # '\\'
- 93: 253, # ']'
- 94: 253, # '^'
- 95: 253, # '_'
- 96: 253, # '`'
- 97: 71, # 'a'
- 98: 172, # 'b'
- 99: 66, # 'c'
- 100: 173, # 'd'
- 101: 65, # 'e'
- 102: 174, # 'f'
- 103: 76, # 'g'
- 104: 175, # 'h'
- 105: 64, # 'i'
- 106: 176, # 'j'
- 107: 177, # 'k'
- 108: 77, # 'l'
- 109: 72, # 'm'
- 110: 178, # 'n'
- 111: 69, # 'o'
- 112: 67, # 'p'
- 113: 179, # 'q'
- 114: 78, # 'r'
- 115: 73, # 's'
- 116: 180, # 't'
- 117: 181, # 'u'
- 118: 79, # 'v'
- 119: 182, # 'w'
- 120: 183, # 'x'
- 121: 184, # 'y'
- 122: 185, # 'z'
- 123: 253, # '{'
- 124: 253, # '|'
- 125: 253, # '}'
- 126: 253, # '~'
- 127: 253, # '\x7f'
- 128: 37, # 'А'
- 129: 44, # 'Б'
- 130: 33, # 'В'
- 131: 46, # 'Г'
- 132: 41, # 'Д'
- 133: 48, # 'Е'
- 134: 56, # 'Ж'
- 135: 51, # 'З'
- 136: 42, # 'И'
- 137: 60, # 'Й'
- 138: 36, # 'К'
- 139: 49, # 'Л'
- 140: 38, # 'М'
- 141: 31, # 'Н'
- 142: 34, # 'О'
- 143: 35, # 'П'
- 144: 45, # 'Р'
- 145: 32, # 'С'
- 146: 40, # 'Т'
- 147: 52, # 'У'
- 148: 53, # 'Ф'
- 149: 55, # 'Х'
- 150: 58, # 'Ц'
- 151: 50, # 'Ч'
- 152: 57, # 'Ш'
- 153: 63, # 'Щ'
- 154: 70, # 'Ъ'
- 155: 62, # 'Ы'
- 156: 61, # 'Ь'
- 157: 47, # 'Э'
- 158: 59, # 'Ю'
- 159: 43, # 'Я'
- 160: 3, # 'а'
- 161: 21, # 'б'
- 162: 10, # 'в'
- 163: 19, # 'г'
- 164: 13, # 'д'
- 165: 2, # 'е'
- 166: 24, # 'ж'
- 167: 20, # 'з'
- 168: 4, # 'и'
- 169: 23, # 'й'
- 170: 11, # 'к'
- 171: 8, # 'л'
- 172: 12, # 'м'
- 173: 5, # 'н'
- 174: 1, # 'о'
- 175: 15, # 'п'
- 176: 191, # '░'
- 177: 192, # '▒'
- 178: 193, # '▓'
- 179: 194, # '│'
- 180: 195, # '┤'
- 181: 196, # '╡'
- 182: 197, # '╢'
- 183: 198, # '╖'
- 184: 199, # '╕'
- 185: 200, # '╣'
- 186: 201, # '║'
- 187: 202, # '╗'
- 188: 203, # '╝'
- 189: 204, # '╜'
- 190: 205, # '╛'
- 191: 206, # '┐'
- 192: 207, # '└'
- 193: 208, # '┴'
- 194: 209, # '┬'
- 195: 210, # '├'
- 196: 211, # '─'
- 197: 212, # '┼'
- 198: 213, # '╞'
- 199: 214, # '╟'
- 200: 215, # '╚'
- 201: 216, # '╔'
- 202: 217, # '╩'
- 203: 218, # '╦'
- 204: 219, # '╠'
- 205: 220, # '═'
- 206: 221, # '╬'
- 207: 222, # '╧'
- 208: 223, # '╨'
- 209: 224, # '╤'
- 210: 225, # '╥'
- 211: 226, # '╙'
- 212: 227, # '╘'
- 213: 228, # '╒'
- 214: 229, # '╓'
- 215: 230, # '╫'
- 216: 231, # '╪'
- 217: 232, # '┘'
- 218: 233, # '┌'
- 219: 234, # '█'
- 220: 235, # '▄'
- 221: 236, # '▌'
- 222: 237, # '▐'
- 223: 238, # '▀'
- 224: 9, # 'р'
- 225: 7, # 'с'
- 226: 6, # 'т'
- 227: 14, # 'у'
- 228: 39, # 'ф'
- 229: 26, # 'х'
- 230: 28, # 'ц'
- 231: 22, # 'ч'
- 232: 25, # 'ш'
- 233: 29, # 'щ'
- 234: 54, # 'ъ'
- 235: 18, # 'ы'
- 236: 17, # 'ь'
- 237: 30, # 'э'
- 238: 27, # 'ю'
- 239: 16, # 'я'
- 240: 239, # 'Ё'
- 241: 68, # 'ё'
- 242: 240, # 'Є'
- 243: 241, # 'є'
- 244: 242, # 'Ї'
- 245: 243, # 'ї'
- 246: 244, # 'Ў'
- 247: 245, # 'ў'
- 248: 246, # '°'
- 249: 247, # '∙'
- 250: 248, # '·'
- 251: 249, # '√'
- 252: 250, # '№'
- 253: 251, # '¤'
- 254: 252, # '■'
- 255: 255, # '\xa0'
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 254, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 254, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 253, # ' '
+ 33: 253, # '!'
+ 34: 253, # '"'
+ 35: 253, # '#'
+ 36: 253, # '$'
+ 37: 253, # '%'
+ 38: 253, # '&'
+ 39: 253, # "'"
+ 40: 253, # '('
+ 41: 253, # ')'
+ 42: 253, # '*'
+ 43: 253, # '+'
+ 44: 253, # ','
+ 45: 253, # '-'
+ 46: 253, # '.'
+ 47: 253, # '/'
+ 48: 252, # '0'
+ 49: 252, # '1'
+ 50: 252, # '2'
+ 51: 252, # '3'
+ 52: 252, # '4'
+ 53: 252, # '5'
+ 54: 252, # '6'
+ 55: 252, # '7'
+ 56: 252, # '8'
+ 57: 252, # '9'
+ 58: 253, # ':'
+ 59: 253, # ';'
+ 60: 253, # '<'
+ 61: 253, # '='
+ 62: 253, # '>'
+ 63: 253, # '?'
+ 64: 253, # '@'
+ 65: 142, # 'A'
+ 66: 143, # 'B'
+ 67: 144, # 'C'
+ 68: 145, # 'D'
+ 69: 146, # 'E'
+ 70: 147, # 'F'
+ 71: 148, # 'G'
+ 72: 149, # 'H'
+ 73: 150, # 'I'
+ 74: 151, # 'J'
+ 75: 152, # 'K'
+ 76: 74, # 'L'
+ 77: 153, # 'M'
+ 78: 75, # 'N'
+ 79: 154, # 'O'
+ 80: 155, # 'P'
+ 81: 156, # 'Q'
+ 82: 157, # 'R'
+ 83: 158, # 'S'
+ 84: 159, # 'T'
+ 85: 160, # 'U'
+ 86: 161, # 'V'
+ 87: 162, # 'W'
+ 88: 163, # 'X'
+ 89: 164, # 'Y'
+ 90: 165, # 'Z'
+ 91: 253, # '['
+ 92: 253, # '\\'
+ 93: 253, # ']'
+ 94: 253, # '^'
+ 95: 253, # '_'
+ 96: 253, # '`'
+ 97: 71, # 'a'
+ 98: 172, # 'b'
+ 99: 66, # 'c'
+ 100: 173, # 'd'
+ 101: 65, # 'e'
+ 102: 174, # 'f'
+ 103: 76, # 'g'
+ 104: 175, # 'h'
+ 105: 64, # 'i'
+ 106: 176, # 'j'
+ 107: 177, # 'k'
+ 108: 77, # 'l'
+ 109: 72, # 'm'
+ 110: 178, # 'n'
+ 111: 69, # 'o'
+ 112: 67, # 'p'
+ 113: 179, # 'q'
+ 114: 78, # 'r'
+ 115: 73, # 's'
+ 116: 180, # 't'
+ 117: 181, # 'u'
+ 118: 79, # 'v'
+ 119: 182, # 'w'
+ 120: 183, # 'x'
+ 121: 184, # 'y'
+ 122: 185, # 'z'
+ 123: 253, # '{'
+ 124: 253, # '|'
+ 125: 253, # '}'
+ 126: 253, # '~'
+ 127: 253, # '\x7f'
+ 128: 37, # 'А'
+ 129: 44, # 'Б'
+ 130: 33, # 'В'
+ 131: 46, # 'Г'
+ 132: 41, # 'Д'
+ 133: 48, # 'Е'
+ 134: 56, # 'Ж'
+ 135: 51, # 'З'
+ 136: 42, # 'И'
+ 137: 60, # 'Й'
+ 138: 36, # 'К'
+ 139: 49, # 'Л'
+ 140: 38, # 'М'
+ 141: 31, # 'Н'
+ 142: 34, # 'О'
+ 143: 35, # 'П'
+ 144: 45, # 'Р'
+ 145: 32, # 'С'
+ 146: 40, # 'Т'
+ 147: 52, # 'У'
+ 148: 53, # 'Ф'
+ 149: 55, # 'Х'
+ 150: 58, # 'Ц'
+ 151: 50, # 'Ч'
+ 152: 57, # 'Ш'
+ 153: 63, # 'Щ'
+ 154: 70, # 'Ъ'
+ 155: 62, # 'Ы'
+ 156: 61, # 'Ь'
+ 157: 47, # 'Э'
+ 158: 59, # 'Ю'
+ 159: 43, # 'Я'
+ 160: 3, # 'а'
+ 161: 21, # 'б'
+ 162: 10, # 'в'
+ 163: 19, # 'г'
+ 164: 13, # 'д'
+ 165: 2, # 'е'
+ 166: 24, # 'ж'
+ 167: 20, # 'з'
+ 168: 4, # 'и'
+ 169: 23, # 'й'
+ 170: 11, # 'к'
+ 171: 8, # 'л'
+ 172: 12, # 'м'
+ 173: 5, # 'н'
+ 174: 1, # 'о'
+ 175: 15, # 'п'
+ 176: 191, # '░'
+ 177: 192, # '▒'
+ 178: 193, # '▓'
+ 179: 194, # '│'
+ 180: 195, # '┤'
+ 181: 196, # '╡'
+ 182: 197, # '╢'
+ 183: 198, # '╖'
+ 184: 199, # '╕'
+ 185: 200, # '╣'
+ 186: 201, # '║'
+ 187: 202, # '╗'
+ 188: 203, # '╝'
+ 189: 204, # '╜'
+ 190: 205, # '╛'
+ 191: 206, # '┐'
+ 192: 207, # '└'
+ 193: 208, # '┴'
+ 194: 209, # '┬'
+ 195: 210, # '├'
+ 196: 211, # '─'
+ 197: 212, # '┼'
+ 198: 213, # '╞'
+ 199: 214, # '╟'
+ 200: 215, # '╚'
+ 201: 216, # '╔'
+ 202: 217, # '╩'
+ 203: 218, # '╦'
+ 204: 219, # '╠'
+ 205: 220, # '═'
+ 206: 221, # '╬'
+ 207: 222, # '╧'
+ 208: 223, # '╨'
+ 209: 224, # '╤'
+ 210: 225, # '╥'
+ 211: 226, # '╙'
+ 212: 227, # '╘'
+ 213: 228, # '╒'
+ 214: 229, # '╓'
+ 215: 230, # '╫'
+ 216: 231, # '╪'
+ 217: 232, # '┘'
+ 218: 233, # '┌'
+ 219: 234, # '█'
+ 220: 235, # '▄'
+ 221: 236, # '▌'
+ 222: 237, # '▐'
+ 223: 238, # '▀'
+ 224: 9, # 'р'
+ 225: 7, # 'с'
+ 226: 6, # 'т'
+ 227: 14, # 'у'
+ 228: 39, # 'ф'
+ 229: 26, # 'х'
+ 230: 28, # 'ц'
+ 231: 22, # 'ч'
+ 232: 25, # 'ш'
+ 233: 29, # 'щ'
+ 234: 54, # 'ъ'
+ 235: 18, # 'ы'
+ 236: 17, # 'ь'
+ 237: 30, # 'э'
+ 238: 27, # 'ю'
+ 239: 16, # 'я'
+ 240: 239, # 'Ё'
+ 241: 68, # 'ё'
+ 242: 240, # 'Є'
+ 243: 241, # 'є'
+ 244: 242, # 'Ї'
+ 245: 243, # 'ї'
+ 246: 244, # 'Ў'
+ 247: 245, # 'ў'
+ 248: 246, # '°'
+ 249: 247, # '∙'
+ 250: 248, # '·'
+ 251: 249, # '√'
+ 252: 250, # '№'
+ 253: 251, # '¤'
+ 254: 252, # '■'
+ 255: 255, # '\xa0'
}
-IBM866_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='IBM866',
- language='Russian',
- char_to_order_map=IBM866_RUSSIAN_CHAR_TO_ORDER,
- language_model=RUSSIAN_LANG_MODEL,
- typical_positive_ratio=0.976601,
- keep_ascii_letters=False,
- alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё')
+IBM866_RUSSIAN_MODEL = SingleByteCharSetModel(
+ charset_name="IBM866",
+ language="Russian",
+ char_to_order_map=IBM866_RUSSIAN_CHAR_TO_ORDER,
+ language_model=RUSSIAN_LANG_MODEL,
+ typical_positive_ratio=0.976601,
+ keep_ascii_letters=False,
+ alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё",
+)
WINDOWS_1251_RUSSIAN_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 254, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 254, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 253, # ' '
- 33: 253, # '!'
- 34: 253, # '"'
- 35: 253, # '#'
- 36: 253, # '$'
- 37: 253, # '%'
- 38: 253, # '&'
- 39: 253, # "'"
- 40: 253, # '('
- 41: 253, # ')'
- 42: 253, # '*'
- 43: 253, # '+'
- 44: 253, # ','
- 45: 253, # '-'
- 46: 253, # '.'
- 47: 253, # '/'
- 48: 252, # '0'
- 49: 252, # '1'
- 50: 252, # '2'
- 51: 252, # '3'
- 52: 252, # '4'
- 53: 252, # '5'
- 54: 252, # '6'
- 55: 252, # '7'
- 56: 252, # '8'
- 57: 252, # '9'
- 58: 253, # ':'
- 59: 253, # ';'
- 60: 253, # '<'
- 61: 253, # '='
- 62: 253, # '>'
- 63: 253, # '?'
- 64: 253, # '@'
- 65: 142, # 'A'
- 66: 143, # 'B'
- 67: 144, # 'C'
- 68: 145, # 'D'
- 69: 146, # 'E'
- 70: 147, # 'F'
- 71: 148, # 'G'
- 72: 149, # 'H'
- 73: 150, # 'I'
- 74: 151, # 'J'
- 75: 152, # 'K'
- 76: 74, # 'L'
- 77: 153, # 'M'
- 78: 75, # 'N'
- 79: 154, # 'O'
- 80: 155, # 'P'
- 81: 156, # 'Q'
- 82: 157, # 'R'
- 83: 158, # 'S'
- 84: 159, # 'T'
- 85: 160, # 'U'
- 86: 161, # 'V'
- 87: 162, # 'W'
- 88: 163, # 'X'
- 89: 164, # 'Y'
- 90: 165, # 'Z'
- 91: 253, # '['
- 92: 253, # '\\'
- 93: 253, # ']'
- 94: 253, # '^'
- 95: 253, # '_'
- 96: 253, # '`'
- 97: 71, # 'a'
- 98: 172, # 'b'
- 99: 66, # 'c'
- 100: 173, # 'd'
- 101: 65, # 'e'
- 102: 174, # 'f'
- 103: 76, # 'g'
- 104: 175, # 'h'
- 105: 64, # 'i'
- 106: 176, # 'j'
- 107: 177, # 'k'
- 108: 77, # 'l'
- 109: 72, # 'm'
- 110: 178, # 'n'
- 111: 69, # 'o'
- 112: 67, # 'p'
- 113: 179, # 'q'
- 114: 78, # 'r'
- 115: 73, # 's'
- 116: 180, # 't'
- 117: 181, # 'u'
- 118: 79, # 'v'
- 119: 182, # 'w'
- 120: 183, # 'x'
- 121: 184, # 'y'
- 122: 185, # 'z'
- 123: 253, # '{'
- 124: 253, # '|'
- 125: 253, # '}'
- 126: 253, # '~'
- 127: 253, # '\x7f'
- 128: 191, # 'Ђ'
- 129: 192, # 'Ѓ'
- 130: 193, # '‚'
- 131: 194, # 'ѓ'
- 132: 195, # '„'
- 133: 196, # '…'
- 134: 197, # '†'
- 135: 198, # '‡'
- 136: 199, # '€'
- 137: 200, # '‰'
- 138: 201, # 'Љ'
- 139: 202, # '‹'
- 140: 203, # 'Њ'
- 141: 204, # 'Ќ'
- 142: 205, # 'Ћ'
- 143: 206, # 'Џ'
- 144: 207, # 'ђ'
- 145: 208, # '‘'
- 146: 209, # '’'
- 147: 210, # '“'
- 148: 211, # '”'
- 149: 212, # '•'
- 150: 213, # '–'
- 151: 214, # '—'
- 152: 215, # None
- 153: 216, # '™'
- 154: 217, # 'љ'
- 155: 218, # '›'
- 156: 219, # 'њ'
- 157: 220, # 'ќ'
- 158: 221, # 'ћ'
- 159: 222, # 'џ'
- 160: 223, # '\xa0'
- 161: 224, # 'Ў'
- 162: 225, # 'ў'
- 163: 226, # 'Ј'
- 164: 227, # '¤'
- 165: 228, # 'Ґ'
- 166: 229, # '¦'
- 167: 230, # '§'
- 168: 231, # 'Ё'
- 169: 232, # '©'
- 170: 233, # 'Є'
- 171: 234, # '«'
- 172: 235, # '¬'
- 173: 236, # '\xad'
- 174: 237, # '®'
- 175: 238, # 'Ї'
- 176: 239, # '°'
- 177: 240, # '±'
- 178: 241, # 'І'
- 179: 242, # 'і'
- 180: 243, # 'ґ'
- 181: 244, # 'µ'
- 182: 245, # '¶'
- 183: 246, # '·'
- 184: 68, # 'ё'
- 185: 247, # '№'
- 186: 248, # 'є'
- 187: 249, # '»'
- 188: 250, # 'ј'
- 189: 251, # 'Ѕ'
- 190: 252, # 'ѕ'
- 191: 253, # 'ї'
- 192: 37, # 'А'
- 193: 44, # 'Б'
- 194: 33, # 'В'
- 195: 46, # 'Г'
- 196: 41, # 'Д'
- 197: 48, # 'Е'
- 198: 56, # 'Ж'
- 199: 51, # 'З'
- 200: 42, # 'И'
- 201: 60, # 'Й'
- 202: 36, # 'К'
- 203: 49, # 'Л'
- 204: 38, # 'М'
- 205: 31, # 'Н'
- 206: 34, # 'О'
- 207: 35, # 'П'
- 208: 45, # 'Р'
- 209: 32, # 'С'
- 210: 40, # 'Т'
- 211: 52, # 'У'
- 212: 53, # 'Ф'
- 213: 55, # 'Х'
- 214: 58, # 'Ц'
- 215: 50, # 'Ч'
- 216: 57, # 'Ш'
- 217: 63, # 'Щ'
- 218: 70, # 'Ъ'
- 219: 62, # 'Ы'
- 220: 61, # 'Ь'
- 221: 47, # 'Э'
- 222: 59, # 'Ю'
- 223: 43, # 'Я'
- 224: 3, # 'а'
- 225: 21, # 'б'
- 226: 10, # 'в'
- 227: 19, # 'г'
- 228: 13, # 'д'
- 229: 2, # 'е'
- 230: 24, # 'ж'
- 231: 20, # 'з'
- 232: 4, # 'и'
- 233: 23, # 'й'
- 234: 11, # 'к'
- 235: 8, # 'л'
- 236: 12, # 'м'
- 237: 5, # 'н'
- 238: 1, # 'о'
- 239: 15, # 'п'
- 240: 9, # 'р'
- 241: 7, # 'с'
- 242: 6, # 'т'
- 243: 14, # 'у'
- 244: 39, # 'ф'
- 245: 26, # 'х'
- 246: 28, # 'ц'
- 247: 22, # 'ч'
- 248: 25, # 'ш'
- 249: 29, # 'щ'
- 250: 54, # 'ъ'
- 251: 18, # 'ы'
- 252: 17, # 'ь'
- 253: 30, # 'э'
- 254: 27, # 'ю'
- 255: 16, # 'я'
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 254, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 254, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 253, # ' '
+ 33: 253, # '!'
+ 34: 253, # '"'
+ 35: 253, # '#'
+ 36: 253, # '$'
+ 37: 253, # '%'
+ 38: 253, # '&'
+ 39: 253, # "'"
+ 40: 253, # '('
+ 41: 253, # ')'
+ 42: 253, # '*'
+ 43: 253, # '+'
+ 44: 253, # ','
+ 45: 253, # '-'
+ 46: 253, # '.'
+ 47: 253, # '/'
+ 48: 252, # '0'
+ 49: 252, # '1'
+ 50: 252, # '2'
+ 51: 252, # '3'
+ 52: 252, # '4'
+ 53: 252, # '5'
+ 54: 252, # '6'
+ 55: 252, # '7'
+ 56: 252, # '8'
+ 57: 252, # '9'
+ 58: 253, # ':'
+ 59: 253, # ';'
+ 60: 253, # '<'
+ 61: 253, # '='
+ 62: 253, # '>'
+ 63: 253, # '?'
+ 64: 253, # '@'
+ 65: 142, # 'A'
+ 66: 143, # 'B'
+ 67: 144, # 'C'
+ 68: 145, # 'D'
+ 69: 146, # 'E'
+ 70: 147, # 'F'
+ 71: 148, # 'G'
+ 72: 149, # 'H'
+ 73: 150, # 'I'
+ 74: 151, # 'J'
+ 75: 152, # 'K'
+ 76: 74, # 'L'
+ 77: 153, # 'M'
+ 78: 75, # 'N'
+ 79: 154, # 'O'
+ 80: 155, # 'P'
+ 81: 156, # 'Q'
+ 82: 157, # 'R'
+ 83: 158, # 'S'
+ 84: 159, # 'T'
+ 85: 160, # 'U'
+ 86: 161, # 'V'
+ 87: 162, # 'W'
+ 88: 163, # 'X'
+ 89: 164, # 'Y'
+ 90: 165, # 'Z'
+ 91: 253, # '['
+ 92: 253, # '\\'
+ 93: 253, # ']'
+ 94: 253, # '^'
+ 95: 253, # '_'
+ 96: 253, # '`'
+ 97: 71, # 'a'
+ 98: 172, # 'b'
+ 99: 66, # 'c'
+ 100: 173, # 'd'
+ 101: 65, # 'e'
+ 102: 174, # 'f'
+ 103: 76, # 'g'
+ 104: 175, # 'h'
+ 105: 64, # 'i'
+ 106: 176, # 'j'
+ 107: 177, # 'k'
+ 108: 77, # 'l'
+ 109: 72, # 'm'
+ 110: 178, # 'n'
+ 111: 69, # 'o'
+ 112: 67, # 'p'
+ 113: 179, # 'q'
+ 114: 78, # 'r'
+ 115: 73, # 's'
+ 116: 180, # 't'
+ 117: 181, # 'u'
+ 118: 79, # 'v'
+ 119: 182, # 'w'
+ 120: 183, # 'x'
+ 121: 184, # 'y'
+ 122: 185, # 'z'
+ 123: 253, # '{'
+ 124: 253, # '|'
+ 125: 253, # '}'
+ 126: 253, # '~'
+ 127: 253, # '\x7f'
+ 128: 191, # 'Ђ'
+ 129: 192, # 'Ѓ'
+ 130: 193, # '‚'
+ 131: 194, # 'ѓ'
+ 132: 195, # '„'
+ 133: 196, # '…'
+ 134: 197, # '†'
+ 135: 198, # '‡'
+ 136: 199, # '€'
+ 137: 200, # '‰'
+ 138: 201, # 'Љ'
+ 139: 202, # '‹'
+ 140: 203, # 'Њ'
+ 141: 204, # 'Ќ'
+ 142: 205, # 'Ћ'
+ 143: 206, # 'Џ'
+ 144: 207, # 'ђ'
+ 145: 208, # '‘'
+ 146: 209, # '’'
+ 147: 210, # '“'
+ 148: 211, # '”'
+ 149: 212, # '•'
+ 150: 213, # '–'
+ 151: 214, # '—'
+ 152: 215, # None
+ 153: 216, # '™'
+ 154: 217, # 'љ'
+ 155: 218, # '›'
+ 156: 219, # 'њ'
+ 157: 220, # 'ќ'
+ 158: 221, # 'ћ'
+ 159: 222, # 'џ'
+ 160: 223, # '\xa0'
+ 161: 224, # 'Ў'
+ 162: 225, # 'ў'
+ 163: 226, # 'Ј'
+ 164: 227, # '¤'
+ 165: 228, # 'Ґ'
+ 166: 229, # '¦'
+ 167: 230, # '§'
+ 168: 231, # 'Ё'
+ 169: 232, # '©'
+ 170: 233, # 'Є'
+ 171: 234, # '«'
+ 172: 235, # '¬'
+ 173: 236, # '\xad'
+ 174: 237, # '®'
+ 175: 238, # 'Ї'
+ 176: 239, # '°'
+ 177: 240, # '±'
+ 178: 241, # 'І'
+ 179: 242, # 'і'
+ 180: 243, # 'ґ'
+ 181: 244, # 'µ'
+ 182: 245, # '¶'
+ 183: 246, # '·'
+ 184: 68, # 'ё'
+ 185: 247, # '№'
+ 186: 248, # 'є'
+ 187: 249, # '»'
+ 188: 250, # 'ј'
+ 189: 251, # 'Ѕ'
+ 190: 252, # 'ѕ'
+ 191: 253, # 'ї'
+ 192: 37, # 'А'
+ 193: 44, # 'Б'
+ 194: 33, # 'В'
+ 195: 46, # 'Г'
+ 196: 41, # 'Д'
+ 197: 48, # 'Е'
+ 198: 56, # 'Ж'
+ 199: 51, # 'З'
+ 200: 42, # 'И'
+ 201: 60, # 'Й'
+ 202: 36, # 'К'
+ 203: 49, # 'Л'
+ 204: 38, # 'М'
+ 205: 31, # 'Н'
+ 206: 34, # 'О'
+ 207: 35, # 'П'
+ 208: 45, # 'Р'
+ 209: 32, # 'С'
+ 210: 40, # 'Т'
+ 211: 52, # 'У'
+ 212: 53, # 'Ф'
+ 213: 55, # 'Х'
+ 214: 58, # 'Ц'
+ 215: 50, # 'Ч'
+ 216: 57, # 'Ш'
+ 217: 63, # 'Щ'
+ 218: 70, # 'Ъ'
+ 219: 62, # 'Ы'
+ 220: 61, # 'Ь'
+ 221: 47, # 'Э'
+ 222: 59, # 'Ю'
+ 223: 43, # 'Я'
+ 224: 3, # 'а'
+ 225: 21, # 'б'
+ 226: 10, # 'в'
+ 227: 19, # 'г'
+ 228: 13, # 'д'
+ 229: 2, # 'е'
+ 230: 24, # 'ж'
+ 231: 20, # 'з'
+ 232: 4, # 'и'
+ 233: 23, # 'й'
+ 234: 11, # 'к'
+ 235: 8, # 'л'
+ 236: 12, # 'м'
+ 237: 5, # 'н'
+ 238: 1, # 'о'
+ 239: 15, # 'п'
+ 240: 9, # 'р'
+ 241: 7, # 'с'
+ 242: 6, # 'т'
+ 243: 14, # 'у'
+ 244: 39, # 'ф'
+ 245: 26, # 'х'
+ 246: 28, # 'ц'
+ 247: 22, # 'ч'
+ 248: 25, # 'ш'
+ 249: 29, # 'щ'
+ 250: 54, # 'ъ'
+ 251: 18, # 'ы'
+ 252: 17, # 'ь'
+ 253: 30, # 'э'
+ 254: 27, # 'ю'
+ 255: 16, # 'я'
}
-WINDOWS_1251_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='windows-1251',
- language='Russian',
- char_to_order_map=WINDOWS_1251_RUSSIAN_CHAR_TO_ORDER,
- language_model=RUSSIAN_LANG_MODEL,
- typical_positive_ratio=0.976601,
- keep_ascii_letters=False,
- alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё')
+WINDOWS_1251_RUSSIAN_MODEL = SingleByteCharSetModel(
+ charset_name="windows-1251",
+ language="Russian",
+ char_to_order_map=WINDOWS_1251_RUSSIAN_CHAR_TO_ORDER,
+ language_model=RUSSIAN_LANG_MODEL,
+ typical_positive_ratio=0.976601,
+ keep_ascii_letters=False,
+ alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё",
+)
IBM855_RUSSIAN_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 254, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 254, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 253, # ' '
- 33: 253, # '!'
- 34: 253, # '"'
- 35: 253, # '#'
- 36: 253, # '$'
- 37: 253, # '%'
- 38: 253, # '&'
- 39: 253, # "'"
- 40: 253, # '('
- 41: 253, # ')'
- 42: 253, # '*'
- 43: 253, # '+'
- 44: 253, # ','
- 45: 253, # '-'
- 46: 253, # '.'
- 47: 253, # '/'
- 48: 252, # '0'
- 49: 252, # '1'
- 50: 252, # '2'
- 51: 252, # '3'
- 52: 252, # '4'
- 53: 252, # '5'
- 54: 252, # '6'
- 55: 252, # '7'
- 56: 252, # '8'
- 57: 252, # '9'
- 58: 253, # ':'
- 59: 253, # ';'
- 60: 253, # '<'
- 61: 253, # '='
- 62: 253, # '>'
- 63: 253, # '?'
- 64: 253, # '@'
- 65: 142, # 'A'
- 66: 143, # 'B'
- 67: 144, # 'C'
- 68: 145, # 'D'
- 69: 146, # 'E'
- 70: 147, # 'F'
- 71: 148, # 'G'
- 72: 149, # 'H'
- 73: 150, # 'I'
- 74: 151, # 'J'
- 75: 152, # 'K'
- 76: 74, # 'L'
- 77: 153, # 'M'
- 78: 75, # 'N'
- 79: 154, # 'O'
- 80: 155, # 'P'
- 81: 156, # 'Q'
- 82: 157, # 'R'
- 83: 158, # 'S'
- 84: 159, # 'T'
- 85: 160, # 'U'
- 86: 161, # 'V'
- 87: 162, # 'W'
- 88: 163, # 'X'
- 89: 164, # 'Y'
- 90: 165, # 'Z'
- 91: 253, # '['
- 92: 253, # '\\'
- 93: 253, # ']'
- 94: 253, # '^'
- 95: 253, # '_'
- 96: 253, # '`'
- 97: 71, # 'a'
- 98: 172, # 'b'
- 99: 66, # 'c'
- 100: 173, # 'd'
- 101: 65, # 'e'
- 102: 174, # 'f'
- 103: 76, # 'g'
- 104: 175, # 'h'
- 105: 64, # 'i'
- 106: 176, # 'j'
- 107: 177, # 'k'
- 108: 77, # 'l'
- 109: 72, # 'm'
- 110: 178, # 'n'
- 111: 69, # 'o'
- 112: 67, # 'p'
- 113: 179, # 'q'
- 114: 78, # 'r'
- 115: 73, # 's'
- 116: 180, # 't'
- 117: 181, # 'u'
- 118: 79, # 'v'
- 119: 182, # 'w'
- 120: 183, # 'x'
- 121: 184, # 'y'
- 122: 185, # 'z'
- 123: 253, # '{'
- 124: 253, # '|'
- 125: 253, # '}'
- 126: 253, # '~'
- 127: 253, # '\x7f'
- 128: 191, # 'ђ'
- 129: 192, # 'Ђ'
- 130: 193, # 'ѓ'
- 131: 194, # 'Ѓ'
- 132: 68, # 'ё'
- 133: 195, # 'Ё'
- 134: 196, # 'є'
- 135: 197, # 'Є'
- 136: 198, # 'ѕ'
- 137: 199, # 'Ѕ'
- 138: 200, # 'і'
- 139: 201, # 'І'
- 140: 202, # 'ї'
- 141: 203, # 'Ї'
- 142: 204, # 'ј'
- 143: 205, # 'Ј'
- 144: 206, # 'љ'
- 145: 207, # 'Љ'
- 146: 208, # 'њ'
- 147: 209, # 'Њ'
- 148: 210, # 'ћ'
- 149: 211, # 'Ћ'
- 150: 212, # 'ќ'
- 151: 213, # 'Ќ'
- 152: 214, # 'ў'
- 153: 215, # 'Ў'
- 154: 216, # 'џ'
- 155: 217, # 'Џ'
- 156: 27, # 'ю'
- 157: 59, # 'Ю'
- 158: 54, # 'ъ'
- 159: 70, # 'Ъ'
- 160: 3, # 'а'
- 161: 37, # 'А'
- 162: 21, # 'б'
- 163: 44, # 'Б'
- 164: 28, # 'ц'
- 165: 58, # 'Ц'
- 166: 13, # 'д'
- 167: 41, # 'Д'
- 168: 2, # 'е'
- 169: 48, # 'Е'
- 170: 39, # 'ф'
- 171: 53, # 'Ф'
- 172: 19, # 'г'
- 173: 46, # 'Г'
- 174: 218, # '«'
- 175: 219, # '»'
- 176: 220, # '░'
- 177: 221, # '▒'
- 178: 222, # '▓'
- 179: 223, # '│'
- 180: 224, # '┤'
- 181: 26, # 'х'
- 182: 55, # 'Х'
- 183: 4, # 'и'
- 184: 42, # 'И'
- 185: 225, # '╣'
- 186: 226, # '║'
- 187: 227, # '╗'
- 188: 228, # '╝'
- 189: 23, # 'й'
- 190: 60, # 'Й'
- 191: 229, # '┐'
- 192: 230, # '└'
- 193: 231, # '┴'
- 194: 232, # '┬'
- 195: 233, # '├'
- 196: 234, # '─'
- 197: 235, # '┼'
- 198: 11, # 'к'
- 199: 36, # 'К'
- 200: 236, # '╚'
- 201: 237, # '╔'
- 202: 238, # '╩'
- 203: 239, # '╦'
- 204: 240, # '╠'
- 205: 241, # '═'
- 206: 242, # '╬'
- 207: 243, # '¤'
- 208: 8, # 'л'
- 209: 49, # 'Л'
- 210: 12, # 'м'
- 211: 38, # 'М'
- 212: 5, # 'н'
- 213: 31, # 'Н'
- 214: 1, # 'о'
- 215: 34, # 'О'
- 216: 15, # 'п'
- 217: 244, # '┘'
- 218: 245, # '┌'
- 219: 246, # '█'
- 220: 247, # '▄'
- 221: 35, # 'П'
- 222: 16, # 'я'
- 223: 248, # '▀'
- 224: 43, # 'Я'
- 225: 9, # 'р'
- 226: 45, # 'Р'
- 227: 7, # 'с'
- 228: 32, # 'С'
- 229: 6, # 'т'
- 230: 40, # 'Т'
- 231: 14, # 'у'
- 232: 52, # 'У'
- 233: 24, # 'ж'
- 234: 56, # 'Ж'
- 235: 10, # 'в'
- 236: 33, # 'В'
- 237: 17, # 'ь'
- 238: 61, # 'Ь'
- 239: 249, # '№'
- 240: 250, # '\xad'
- 241: 18, # 'ы'
- 242: 62, # 'Ы'
- 243: 20, # 'з'
- 244: 51, # 'З'
- 245: 25, # 'ш'
- 246: 57, # 'Ш'
- 247: 30, # 'э'
- 248: 47, # 'Э'
- 249: 29, # 'щ'
- 250: 63, # 'Щ'
- 251: 22, # 'ч'
- 252: 50, # 'Ч'
- 253: 251, # '§'
- 254: 252, # '■'
- 255: 255, # '\xa0'
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 254, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 254, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 253, # ' '
+ 33: 253, # '!'
+ 34: 253, # '"'
+ 35: 253, # '#'
+ 36: 253, # '$'
+ 37: 253, # '%'
+ 38: 253, # '&'
+ 39: 253, # "'"
+ 40: 253, # '('
+ 41: 253, # ')'
+ 42: 253, # '*'
+ 43: 253, # '+'
+ 44: 253, # ','
+ 45: 253, # '-'
+ 46: 253, # '.'
+ 47: 253, # '/'
+ 48: 252, # '0'
+ 49: 252, # '1'
+ 50: 252, # '2'
+ 51: 252, # '3'
+ 52: 252, # '4'
+ 53: 252, # '5'
+ 54: 252, # '6'
+ 55: 252, # '7'
+ 56: 252, # '8'
+ 57: 252, # '9'
+ 58: 253, # ':'
+ 59: 253, # ';'
+ 60: 253, # '<'
+ 61: 253, # '='
+ 62: 253, # '>'
+ 63: 253, # '?'
+ 64: 253, # '@'
+ 65: 142, # 'A'
+ 66: 143, # 'B'
+ 67: 144, # 'C'
+ 68: 145, # 'D'
+ 69: 146, # 'E'
+ 70: 147, # 'F'
+ 71: 148, # 'G'
+ 72: 149, # 'H'
+ 73: 150, # 'I'
+ 74: 151, # 'J'
+ 75: 152, # 'K'
+ 76: 74, # 'L'
+ 77: 153, # 'M'
+ 78: 75, # 'N'
+ 79: 154, # 'O'
+ 80: 155, # 'P'
+ 81: 156, # 'Q'
+ 82: 157, # 'R'
+ 83: 158, # 'S'
+ 84: 159, # 'T'
+ 85: 160, # 'U'
+ 86: 161, # 'V'
+ 87: 162, # 'W'
+ 88: 163, # 'X'
+ 89: 164, # 'Y'
+ 90: 165, # 'Z'
+ 91: 253, # '['
+ 92: 253, # '\\'
+ 93: 253, # ']'
+ 94: 253, # '^'
+ 95: 253, # '_'
+ 96: 253, # '`'
+ 97: 71, # 'a'
+ 98: 172, # 'b'
+ 99: 66, # 'c'
+ 100: 173, # 'd'
+ 101: 65, # 'e'
+ 102: 174, # 'f'
+ 103: 76, # 'g'
+ 104: 175, # 'h'
+ 105: 64, # 'i'
+ 106: 176, # 'j'
+ 107: 177, # 'k'
+ 108: 77, # 'l'
+ 109: 72, # 'm'
+ 110: 178, # 'n'
+ 111: 69, # 'o'
+ 112: 67, # 'p'
+ 113: 179, # 'q'
+ 114: 78, # 'r'
+ 115: 73, # 's'
+ 116: 180, # 't'
+ 117: 181, # 'u'
+ 118: 79, # 'v'
+ 119: 182, # 'w'
+ 120: 183, # 'x'
+ 121: 184, # 'y'
+ 122: 185, # 'z'
+ 123: 253, # '{'
+ 124: 253, # '|'
+ 125: 253, # '}'
+ 126: 253, # '~'
+ 127: 253, # '\x7f'
+ 128: 191, # 'ђ'
+ 129: 192, # 'Ђ'
+ 130: 193, # 'ѓ'
+ 131: 194, # 'Ѓ'
+ 132: 68, # 'ё'
+ 133: 195, # 'Ё'
+ 134: 196, # 'є'
+ 135: 197, # 'Є'
+ 136: 198, # 'ѕ'
+ 137: 199, # 'Ѕ'
+ 138: 200, # 'і'
+ 139: 201, # 'І'
+ 140: 202, # 'ї'
+ 141: 203, # 'Ї'
+ 142: 204, # 'ј'
+ 143: 205, # 'Ј'
+ 144: 206, # 'љ'
+ 145: 207, # 'Љ'
+ 146: 208, # 'њ'
+ 147: 209, # 'Њ'
+ 148: 210, # 'ћ'
+ 149: 211, # 'Ћ'
+ 150: 212, # 'ќ'
+ 151: 213, # 'Ќ'
+ 152: 214, # 'ў'
+ 153: 215, # 'Ў'
+ 154: 216, # 'џ'
+ 155: 217, # 'Џ'
+ 156: 27, # 'ю'
+ 157: 59, # 'Ю'
+ 158: 54, # 'ъ'
+ 159: 70, # 'Ъ'
+ 160: 3, # 'а'
+ 161: 37, # 'А'
+ 162: 21, # 'б'
+ 163: 44, # 'Б'
+ 164: 28, # 'ц'
+ 165: 58, # 'Ц'
+ 166: 13, # 'д'
+ 167: 41, # 'Д'
+ 168: 2, # 'е'
+ 169: 48, # 'Е'
+ 170: 39, # 'ф'
+ 171: 53, # 'Ф'
+ 172: 19, # 'г'
+ 173: 46, # 'Г'
+ 174: 218, # '«'
+ 175: 219, # '»'
+ 176: 220, # '░'
+ 177: 221, # '▒'
+ 178: 222, # '▓'
+ 179: 223, # '│'
+ 180: 224, # '┤'
+ 181: 26, # 'х'
+ 182: 55, # 'Х'
+ 183: 4, # 'и'
+ 184: 42, # 'И'
+ 185: 225, # '╣'
+ 186: 226, # '║'
+ 187: 227, # '╗'
+ 188: 228, # '╝'
+ 189: 23, # 'й'
+ 190: 60, # 'Й'
+ 191: 229, # '┐'
+ 192: 230, # '└'
+ 193: 231, # '┴'
+ 194: 232, # '┬'
+ 195: 233, # '├'
+ 196: 234, # '─'
+ 197: 235, # '┼'
+ 198: 11, # 'к'
+ 199: 36, # 'К'
+ 200: 236, # '╚'
+ 201: 237, # '╔'
+ 202: 238, # '╩'
+ 203: 239, # '╦'
+ 204: 240, # '╠'
+ 205: 241, # '═'
+ 206: 242, # '╬'
+ 207: 243, # '¤'
+ 208: 8, # 'л'
+ 209: 49, # 'Л'
+ 210: 12, # 'м'
+ 211: 38, # 'М'
+ 212: 5, # 'н'
+ 213: 31, # 'Н'
+ 214: 1, # 'о'
+ 215: 34, # 'О'
+ 216: 15, # 'п'
+ 217: 244, # '┘'
+ 218: 245, # '┌'
+ 219: 246, # '█'
+ 220: 247, # '▄'
+ 221: 35, # 'П'
+ 222: 16, # 'я'
+ 223: 248, # '▀'
+ 224: 43, # 'Я'
+ 225: 9, # 'р'
+ 226: 45, # 'Р'
+ 227: 7, # 'с'
+ 228: 32, # 'С'
+ 229: 6, # 'т'
+ 230: 40, # 'Т'
+ 231: 14, # 'у'
+ 232: 52, # 'У'
+ 233: 24, # 'ж'
+ 234: 56, # 'Ж'
+ 235: 10, # 'в'
+ 236: 33, # 'В'
+ 237: 17, # 'ь'
+ 238: 61, # 'Ь'
+ 239: 249, # '№'
+ 240: 250, # '\xad'
+ 241: 18, # 'ы'
+ 242: 62, # 'Ы'
+ 243: 20, # 'з'
+ 244: 51, # 'З'
+ 245: 25, # 'ш'
+ 246: 57, # 'Ш'
+ 247: 30, # 'э'
+ 248: 47, # 'Э'
+ 249: 29, # 'щ'
+ 250: 63, # 'Щ'
+ 251: 22, # 'ч'
+ 252: 50, # 'Ч'
+ 253: 251, # '§'
+ 254: 252, # '■'
+ 255: 255, # '\xa0'
}
-IBM855_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='IBM855',
- language='Russian',
- char_to_order_map=IBM855_RUSSIAN_CHAR_TO_ORDER,
- language_model=RUSSIAN_LANG_MODEL,
- typical_positive_ratio=0.976601,
- keep_ascii_letters=False,
- alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё')
+IBM855_RUSSIAN_MODEL = SingleByteCharSetModel(
+ charset_name="IBM855",
+ language="Russian",
+ char_to_order_map=IBM855_RUSSIAN_CHAR_TO_ORDER,
+ language_model=RUSSIAN_LANG_MODEL,
+ typical_positive_ratio=0.976601,
+ keep_ascii_letters=False,
+ alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё",
+)
KOI8_R_RUSSIAN_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 254, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 254, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 253, # ' '
- 33: 253, # '!'
- 34: 253, # '"'
- 35: 253, # '#'
- 36: 253, # '$'
- 37: 253, # '%'
- 38: 253, # '&'
- 39: 253, # "'"
- 40: 253, # '('
- 41: 253, # ')'
- 42: 253, # '*'
- 43: 253, # '+'
- 44: 253, # ','
- 45: 253, # '-'
- 46: 253, # '.'
- 47: 253, # '/'
- 48: 252, # '0'
- 49: 252, # '1'
- 50: 252, # '2'
- 51: 252, # '3'
- 52: 252, # '4'
- 53: 252, # '5'
- 54: 252, # '6'
- 55: 252, # '7'
- 56: 252, # '8'
- 57: 252, # '9'
- 58: 253, # ':'
- 59: 253, # ';'
- 60: 253, # '<'
- 61: 253, # '='
- 62: 253, # '>'
- 63: 253, # '?'
- 64: 253, # '@'
- 65: 142, # 'A'
- 66: 143, # 'B'
- 67: 144, # 'C'
- 68: 145, # 'D'
- 69: 146, # 'E'
- 70: 147, # 'F'
- 71: 148, # 'G'
- 72: 149, # 'H'
- 73: 150, # 'I'
- 74: 151, # 'J'
- 75: 152, # 'K'
- 76: 74, # 'L'
- 77: 153, # 'M'
- 78: 75, # 'N'
- 79: 154, # 'O'
- 80: 155, # 'P'
- 81: 156, # 'Q'
- 82: 157, # 'R'
- 83: 158, # 'S'
- 84: 159, # 'T'
- 85: 160, # 'U'
- 86: 161, # 'V'
- 87: 162, # 'W'
- 88: 163, # 'X'
- 89: 164, # 'Y'
- 90: 165, # 'Z'
- 91: 253, # '['
- 92: 253, # '\\'
- 93: 253, # ']'
- 94: 253, # '^'
- 95: 253, # '_'
- 96: 253, # '`'
- 97: 71, # 'a'
- 98: 172, # 'b'
- 99: 66, # 'c'
- 100: 173, # 'd'
- 101: 65, # 'e'
- 102: 174, # 'f'
- 103: 76, # 'g'
- 104: 175, # 'h'
- 105: 64, # 'i'
- 106: 176, # 'j'
- 107: 177, # 'k'
- 108: 77, # 'l'
- 109: 72, # 'm'
- 110: 178, # 'n'
- 111: 69, # 'o'
- 112: 67, # 'p'
- 113: 179, # 'q'
- 114: 78, # 'r'
- 115: 73, # 's'
- 116: 180, # 't'
- 117: 181, # 'u'
- 118: 79, # 'v'
- 119: 182, # 'w'
- 120: 183, # 'x'
- 121: 184, # 'y'
- 122: 185, # 'z'
- 123: 253, # '{'
- 124: 253, # '|'
- 125: 253, # '}'
- 126: 253, # '~'
- 127: 253, # '\x7f'
- 128: 191, # '─'
- 129: 192, # '│'
- 130: 193, # '┌'
- 131: 194, # '┐'
- 132: 195, # '└'
- 133: 196, # '┘'
- 134: 197, # '├'
- 135: 198, # '┤'
- 136: 199, # '┬'
- 137: 200, # '┴'
- 138: 201, # '┼'
- 139: 202, # '▀'
- 140: 203, # '▄'
- 141: 204, # '█'
- 142: 205, # '▌'
- 143: 206, # '▐'
- 144: 207, # '░'
- 145: 208, # '▒'
- 146: 209, # '▓'
- 147: 210, # '⌠'
- 148: 211, # '■'
- 149: 212, # '∙'
- 150: 213, # '√'
- 151: 214, # '≈'
- 152: 215, # '≤'
- 153: 216, # '≥'
- 154: 217, # '\xa0'
- 155: 218, # '⌡'
- 156: 219, # '°'
- 157: 220, # '²'
- 158: 221, # '·'
- 159: 222, # '÷'
- 160: 223, # '═'
- 161: 224, # '║'
- 162: 225, # '╒'
- 163: 68, # 'ё'
- 164: 226, # '╓'
- 165: 227, # '╔'
- 166: 228, # '╕'
- 167: 229, # '╖'
- 168: 230, # '╗'
- 169: 231, # '╘'
- 170: 232, # '╙'
- 171: 233, # '╚'
- 172: 234, # '╛'
- 173: 235, # '╜'
- 174: 236, # '╝'
- 175: 237, # '╞'
- 176: 238, # '╟'
- 177: 239, # '╠'
- 178: 240, # '╡'
- 179: 241, # 'Ё'
- 180: 242, # '╢'
- 181: 243, # '╣'
- 182: 244, # '╤'
- 183: 245, # '╥'
- 184: 246, # '╦'
- 185: 247, # '╧'
- 186: 248, # '╨'
- 187: 249, # '╩'
- 188: 250, # '╪'
- 189: 251, # '╫'
- 190: 252, # '╬'
- 191: 253, # '©'
- 192: 27, # 'ю'
- 193: 3, # 'а'
- 194: 21, # 'б'
- 195: 28, # 'ц'
- 196: 13, # 'д'
- 197: 2, # 'е'
- 198: 39, # 'ф'
- 199: 19, # 'г'
- 200: 26, # 'х'
- 201: 4, # 'и'
- 202: 23, # 'й'
- 203: 11, # 'к'
- 204: 8, # 'л'
- 205: 12, # 'м'
- 206: 5, # 'н'
- 207: 1, # 'о'
- 208: 15, # 'п'
- 209: 16, # 'я'
- 210: 9, # 'р'
- 211: 7, # 'с'
- 212: 6, # 'т'
- 213: 14, # 'у'
- 214: 24, # 'ж'
- 215: 10, # 'в'
- 216: 17, # 'ь'
- 217: 18, # 'ы'
- 218: 20, # 'з'
- 219: 25, # 'ш'
- 220: 30, # 'э'
- 221: 29, # 'щ'
- 222: 22, # 'ч'
- 223: 54, # 'ъ'
- 224: 59, # 'Ю'
- 225: 37, # 'А'
- 226: 44, # 'Б'
- 227: 58, # 'Ц'
- 228: 41, # 'Д'
- 229: 48, # 'Е'
- 230: 53, # 'Ф'
- 231: 46, # 'Г'
- 232: 55, # 'Х'
- 233: 42, # 'И'
- 234: 60, # 'Й'
- 235: 36, # 'К'
- 236: 49, # 'Л'
- 237: 38, # 'М'
- 238: 31, # 'Н'
- 239: 34, # 'О'
- 240: 35, # 'П'
- 241: 43, # 'Я'
- 242: 45, # 'Р'
- 243: 32, # 'С'
- 244: 40, # 'Т'
- 245: 52, # 'У'
- 246: 56, # 'Ж'
- 247: 33, # 'В'
- 248: 61, # 'Ь'
- 249: 62, # 'Ы'
- 250: 51, # 'З'
- 251: 57, # 'Ш'
- 252: 47, # 'Э'
- 253: 63, # 'Щ'
- 254: 50, # 'Ч'
- 255: 70, # 'Ъ'
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 254, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 254, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 253, # ' '
+ 33: 253, # '!'
+ 34: 253, # '"'
+ 35: 253, # '#'
+ 36: 253, # '$'
+ 37: 253, # '%'
+ 38: 253, # '&'
+ 39: 253, # "'"
+ 40: 253, # '('
+ 41: 253, # ')'
+ 42: 253, # '*'
+ 43: 253, # '+'
+ 44: 253, # ','
+ 45: 253, # '-'
+ 46: 253, # '.'
+ 47: 253, # '/'
+ 48: 252, # '0'
+ 49: 252, # '1'
+ 50: 252, # '2'
+ 51: 252, # '3'
+ 52: 252, # '4'
+ 53: 252, # '5'
+ 54: 252, # '6'
+ 55: 252, # '7'
+ 56: 252, # '8'
+ 57: 252, # '9'
+ 58: 253, # ':'
+ 59: 253, # ';'
+ 60: 253, # '<'
+ 61: 253, # '='
+ 62: 253, # '>'
+ 63: 253, # '?'
+ 64: 253, # '@'
+ 65: 142, # 'A'
+ 66: 143, # 'B'
+ 67: 144, # 'C'
+ 68: 145, # 'D'
+ 69: 146, # 'E'
+ 70: 147, # 'F'
+ 71: 148, # 'G'
+ 72: 149, # 'H'
+ 73: 150, # 'I'
+ 74: 151, # 'J'
+ 75: 152, # 'K'
+ 76: 74, # 'L'
+ 77: 153, # 'M'
+ 78: 75, # 'N'
+ 79: 154, # 'O'
+ 80: 155, # 'P'
+ 81: 156, # 'Q'
+ 82: 157, # 'R'
+ 83: 158, # 'S'
+ 84: 159, # 'T'
+ 85: 160, # 'U'
+ 86: 161, # 'V'
+ 87: 162, # 'W'
+ 88: 163, # 'X'
+ 89: 164, # 'Y'
+ 90: 165, # 'Z'
+ 91: 253, # '['
+ 92: 253, # '\\'
+ 93: 253, # ']'
+ 94: 253, # '^'
+ 95: 253, # '_'
+ 96: 253, # '`'
+ 97: 71, # 'a'
+ 98: 172, # 'b'
+ 99: 66, # 'c'
+ 100: 173, # 'd'
+ 101: 65, # 'e'
+ 102: 174, # 'f'
+ 103: 76, # 'g'
+ 104: 175, # 'h'
+ 105: 64, # 'i'
+ 106: 176, # 'j'
+ 107: 177, # 'k'
+ 108: 77, # 'l'
+ 109: 72, # 'm'
+ 110: 178, # 'n'
+ 111: 69, # 'o'
+ 112: 67, # 'p'
+ 113: 179, # 'q'
+ 114: 78, # 'r'
+ 115: 73, # 's'
+ 116: 180, # 't'
+ 117: 181, # 'u'
+ 118: 79, # 'v'
+ 119: 182, # 'w'
+ 120: 183, # 'x'
+ 121: 184, # 'y'
+ 122: 185, # 'z'
+ 123: 253, # '{'
+ 124: 253, # '|'
+ 125: 253, # '}'
+ 126: 253, # '~'
+ 127: 253, # '\x7f'
+ 128: 191, # '─'
+ 129: 192, # '│'
+ 130: 193, # '┌'
+ 131: 194, # '┐'
+ 132: 195, # '└'
+ 133: 196, # '┘'
+ 134: 197, # '├'
+ 135: 198, # '┤'
+ 136: 199, # '┬'
+ 137: 200, # '┴'
+ 138: 201, # '┼'
+ 139: 202, # '▀'
+ 140: 203, # '▄'
+ 141: 204, # '█'
+ 142: 205, # '▌'
+ 143: 206, # '▐'
+ 144: 207, # '░'
+ 145: 208, # '▒'
+ 146: 209, # '▓'
+ 147: 210, # '⌠'
+ 148: 211, # '■'
+ 149: 212, # '∙'
+ 150: 213, # '√'
+ 151: 214, # '≈'
+ 152: 215, # '≤'
+ 153: 216, # '≥'
+ 154: 217, # '\xa0'
+ 155: 218, # '⌡'
+ 156: 219, # '°'
+ 157: 220, # '²'
+ 158: 221, # '·'
+ 159: 222, # '÷'
+ 160: 223, # '═'
+ 161: 224, # '║'
+ 162: 225, # '╒'
+ 163: 68, # 'ё'
+ 164: 226, # '╓'
+ 165: 227, # '╔'
+ 166: 228, # '╕'
+ 167: 229, # '╖'
+ 168: 230, # '╗'
+ 169: 231, # '╘'
+ 170: 232, # '╙'
+ 171: 233, # '╚'
+ 172: 234, # '╛'
+ 173: 235, # '╜'
+ 174: 236, # '╝'
+ 175: 237, # '╞'
+ 176: 238, # '╟'
+ 177: 239, # '╠'
+ 178: 240, # '╡'
+ 179: 241, # 'Ё'
+ 180: 242, # '╢'
+ 181: 243, # '╣'
+ 182: 244, # '╤'
+ 183: 245, # '╥'
+ 184: 246, # '╦'
+ 185: 247, # '╧'
+ 186: 248, # '╨'
+ 187: 249, # '╩'
+ 188: 250, # '╪'
+ 189: 251, # '╫'
+ 190: 252, # '╬'
+ 191: 253, # '©'
+ 192: 27, # 'ю'
+ 193: 3, # 'а'
+ 194: 21, # 'б'
+ 195: 28, # 'ц'
+ 196: 13, # 'д'
+ 197: 2, # 'е'
+ 198: 39, # 'ф'
+ 199: 19, # 'г'
+ 200: 26, # 'х'
+ 201: 4, # 'и'
+ 202: 23, # 'й'
+ 203: 11, # 'к'
+ 204: 8, # 'л'
+ 205: 12, # 'м'
+ 206: 5, # 'н'
+ 207: 1, # 'о'
+ 208: 15, # 'п'
+ 209: 16, # 'я'
+ 210: 9, # 'р'
+ 211: 7, # 'с'
+ 212: 6, # 'т'
+ 213: 14, # 'у'
+ 214: 24, # 'ж'
+ 215: 10, # 'в'
+ 216: 17, # 'ь'
+ 217: 18, # 'ы'
+ 218: 20, # 'з'
+ 219: 25, # 'ш'
+ 220: 30, # 'э'
+ 221: 29, # 'щ'
+ 222: 22, # 'ч'
+ 223: 54, # 'ъ'
+ 224: 59, # 'Ю'
+ 225: 37, # 'А'
+ 226: 44, # 'Б'
+ 227: 58, # 'Ц'
+ 228: 41, # 'Д'
+ 229: 48, # 'Е'
+ 230: 53, # 'Ф'
+ 231: 46, # 'Г'
+ 232: 55, # 'Х'
+ 233: 42, # 'И'
+ 234: 60, # 'Й'
+ 235: 36, # 'К'
+ 236: 49, # 'Л'
+ 237: 38, # 'М'
+ 238: 31, # 'Н'
+ 239: 34, # 'О'
+ 240: 35, # 'П'
+ 241: 43, # 'Я'
+ 242: 45, # 'Р'
+ 243: 32, # 'С'
+ 244: 40, # 'Т'
+ 245: 52, # 'У'
+ 246: 56, # 'Ж'
+ 247: 33, # 'В'
+ 248: 61, # 'Ь'
+ 249: 62, # 'Ы'
+ 250: 51, # 'З'
+ 251: 57, # 'Ш'
+ 252: 47, # 'Э'
+ 253: 63, # 'Щ'
+ 254: 50, # 'Ч'
+ 255: 70, # 'Ъ'
}
-KOI8_R_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='KOI8-R',
- language='Russian',
- char_to_order_map=KOI8_R_RUSSIAN_CHAR_TO_ORDER,
- language_model=RUSSIAN_LANG_MODEL,
- typical_positive_ratio=0.976601,
- keep_ascii_letters=False,
- alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё')
+KOI8_R_RUSSIAN_MODEL = SingleByteCharSetModel(
+ charset_name="KOI8-R",
+ language="Russian",
+ char_to_order_map=KOI8_R_RUSSIAN_CHAR_TO_ORDER,
+ language_model=RUSSIAN_LANG_MODEL,
+ typical_positive_ratio=0.976601,
+ keep_ascii_letters=False,
+ alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё",
+)
MACCYRILLIC_RUSSIAN_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 254, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 254, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 253, # ' '
- 33: 253, # '!'
- 34: 253, # '"'
- 35: 253, # '#'
- 36: 253, # '$'
- 37: 253, # '%'
- 38: 253, # '&'
- 39: 253, # "'"
- 40: 253, # '('
- 41: 253, # ')'
- 42: 253, # '*'
- 43: 253, # '+'
- 44: 253, # ','
- 45: 253, # '-'
- 46: 253, # '.'
- 47: 253, # '/'
- 48: 252, # '0'
- 49: 252, # '1'
- 50: 252, # '2'
- 51: 252, # '3'
- 52: 252, # '4'
- 53: 252, # '5'
- 54: 252, # '6'
- 55: 252, # '7'
- 56: 252, # '8'
- 57: 252, # '9'
- 58: 253, # ':'
- 59: 253, # ';'
- 60: 253, # '<'
- 61: 253, # '='
- 62: 253, # '>'
- 63: 253, # '?'
- 64: 253, # '@'
- 65: 142, # 'A'
- 66: 143, # 'B'
- 67: 144, # 'C'
- 68: 145, # 'D'
- 69: 146, # 'E'
- 70: 147, # 'F'
- 71: 148, # 'G'
- 72: 149, # 'H'
- 73: 150, # 'I'
- 74: 151, # 'J'
- 75: 152, # 'K'
- 76: 74, # 'L'
- 77: 153, # 'M'
- 78: 75, # 'N'
- 79: 154, # 'O'
- 80: 155, # 'P'
- 81: 156, # 'Q'
- 82: 157, # 'R'
- 83: 158, # 'S'
- 84: 159, # 'T'
- 85: 160, # 'U'
- 86: 161, # 'V'
- 87: 162, # 'W'
- 88: 163, # 'X'
- 89: 164, # 'Y'
- 90: 165, # 'Z'
- 91: 253, # '['
- 92: 253, # '\\'
- 93: 253, # ']'
- 94: 253, # '^'
- 95: 253, # '_'
- 96: 253, # '`'
- 97: 71, # 'a'
- 98: 172, # 'b'
- 99: 66, # 'c'
- 100: 173, # 'd'
- 101: 65, # 'e'
- 102: 174, # 'f'
- 103: 76, # 'g'
- 104: 175, # 'h'
- 105: 64, # 'i'
- 106: 176, # 'j'
- 107: 177, # 'k'
- 108: 77, # 'l'
- 109: 72, # 'm'
- 110: 178, # 'n'
- 111: 69, # 'o'
- 112: 67, # 'p'
- 113: 179, # 'q'
- 114: 78, # 'r'
- 115: 73, # 's'
- 116: 180, # 't'
- 117: 181, # 'u'
- 118: 79, # 'v'
- 119: 182, # 'w'
- 120: 183, # 'x'
- 121: 184, # 'y'
- 122: 185, # 'z'
- 123: 253, # '{'
- 124: 253, # '|'
- 125: 253, # '}'
- 126: 253, # '~'
- 127: 253, # '\x7f'
- 128: 37, # 'А'
- 129: 44, # 'Б'
- 130: 33, # 'В'
- 131: 46, # 'Г'
- 132: 41, # 'Д'
- 133: 48, # 'Е'
- 134: 56, # 'Ж'
- 135: 51, # 'З'
- 136: 42, # 'И'
- 137: 60, # 'Й'
- 138: 36, # 'К'
- 139: 49, # 'Л'
- 140: 38, # 'М'
- 141: 31, # 'Н'
- 142: 34, # 'О'
- 143: 35, # 'П'
- 144: 45, # 'Р'
- 145: 32, # 'С'
- 146: 40, # 'Т'
- 147: 52, # 'У'
- 148: 53, # 'Ф'
- 149: 55, # 'Х'
- 150: 58, # 'Ц'
- 151: 50, # 'Ч'
- 152: 57, # 'Ш'
- 153: 63, # 'Щ'
- 154: 70, # 'Ъ'
- 155: 62, # 'Ы'
- 156: 61, # 'Ь'
- 157: 47, # 'Э'
- 158: 59, # 'Ю'
- 159: 43, # 'Я'
- 160: 191, # '†'
- 161: 192, # '°'
- 162: 193, # 'Ґ'
- 163: 194, # '£'
- 164: 195, # '§'
- 165: 196, # '•'
- 166: 197, # '¶'
- 167: 198, # 'І'
- 168: 199, # '®'
- 169: 200, # '©'
- 170: 201, # '™'
- 171: 202, # 'Ђ'
- 172: 203, # 'ђ'
- 173: 204, # '≠'
- 174: 205, # 'Ѓ'
- 175: 206, # 'ѓ'
- 176: 207, # '∞'
- 177: 208, # '±'
- 178: 209, # '≤'
- 179: 210, # '≥'
- 180: 211, # 'і'
- 181: 212, # 'µ'
- 182: 213, # 'ґ'
- 183: 214, # 'Ј'
- 184: 215, # 'Є'
- 185: 216, # 'є'
- 186: 217, # 'Ї'
- 187: 218, # 'ї'
- 188: 219, # 'Љ'
- 189: 220, # 'љ'
- 190: 221, # 'Њ'
- 191: 222, # 'њ'
- 192: 223, # 'ј'
- 193: 224, # 'Ѕ'
- 194: 225, # '¬'
- 195: 226, # '√'
- 196: 227, # 'ƒ'
- 197: 228, # '≈'
- 198: 229, # '∆'
- 199: 230, # '«'
- 200: 231, # '»'
- 201: 232, # '…'
- 202: 233, # '\xa0'
- 203: 234, # 'Ћ'
- 204: 235, # 'ћ'
- 205: 236, # 'Ќ'
- 206: 237, # 'ќ'
- 207: 238, # 'ѕ'
- 208: 239, # '–'
- 209: 240, # '—'
- 210: 241, # '“'
- 211: 242, # '”'
- 212: 243, # '‘'
- 213: 244, # '’'
- 214: 245, # '÷'
- 215: 246, # '„'
- 216: 247, # 'Ў'
- 217: 248, # 'ў'
- 218: 249, # 'Џ'
- 219: 250, # 'џ'
- 220: 251, # '№'
- 221: 252, # 'Ё'
- 222: 68, # 'ё'
- 223: 16, # 'я'
- 224: 3, # 'а'
- 225: 21, # 'б'
- 226: 10, # 'в'
- 227: 19, # 'г'
- 228: 13, # 'д'
- 229: 2, # 'е'
- 230: 24, # 'ж'
- 231: 20, # 'з'
- 232: 4, # 'и'
- 233: 23, # 'й'
- 234: 11, # 'к'
- 235: 8, # 'л'
- 236: 12, # 'м'
- 237: 5, # 'н'
- 238: 1, # 'о'
- 239: 15, # 'п'
- 240: 9, # 'р'
- 241: 7, # 'с'
- 242: 6, # 'т'
- 243: 14, # 'у'
- 244: 39, # 'ф'
- 245: 26, # 'х'
- 246: 28, # 'ц'
- 247: 22, # 'ч'
- 248: 25, # 'ш'
- 249: 29, # 'щ'
- 250: 54, # 'ъ'
- 251: 18, # 'ы'
- 252: 17, # 'ь'
- 253: 30, # 'э'
- 254: 27, # 'ю'
- 255: 255, # '€'
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 254, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 254, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 253, # ' '
+ 33: 253, # '!'
+ 34: 253, # '"'
+ 35: 253, # '#'
+ 36: 253, # '$'
+ 37: 253, # '%'
+ 38: 253, # '&'
+ 39: 253, # "'"
+ 40: 253, # '('
+ 41: 253, # ')'
+ 42: 253, # '*'
+ 43: 253, # '+'
+ 44: 253, # ','
+ 45: 253, # '-'
+ 46: 253, # '.'
+ 47: 253, # '/'
+ 48: 252, # '0'
+ 49: 252, # '1'
+ 50: 252, # '2'
+ 51: 252, # '3'
+ 52: 252, # '4'
+ 53: 252, # '5'
+ 54: 252, # '6'
+ 55: 252, # '7'
+ 56: 252, # '8'
+ 57: 252, # '9'
+ 58: 253, # ':'
+ 59: 253, # ';'
+ 60: 253, # '<'
+ 61: 253, # '='
+ 62: 253, # '>'
+ 63: 253, # '?'
+ 64: 253, # '@'
+ 65: 142, # 'A'
+ 66: 143, # 'B'
+ 67: 144, # 'C'
+ 68: 145, # 'D'
+ 69: 146, # 'E'
+ 70: 147, # 'F'
+ 71: 148, # 'G'
+ 72: 149, # 'H'
+ 73: 150, # 'I'
+ 74: 151, # 'J'
+ 75: 152, # 'K'
+ 76: 74, # 'L'
+ 77: 153, # 'M'
+ 78: 75, # 'N'
+ 79: 154, # 'O'
+ 80: 155, # 'P'
+ 81: 156, # 'Q'
+ 82: 157, # 'R'
+ 83: 158, # 'S'
+ 84: 159, # 'T'
+ 85: 160, # 'U'
+ 86: 161, # 'V'
+ 87: 162, # 'W'
+ 88: 163, # 'X'
+ 89: 164, # 'Y'
+ 90: 165, # 'Z'
+ 91: 253, # '['
+ 92: 253, # '\\'
+ 93: 253, # ']'
+ 94: 253, # '^'
+ 95: 253, # '_'
+ 96: 253, # '`'
+ 97: 71, # 'a'
+ 98: 172, # 'b'
+ 99: 66, # 'c'
+ 100: 173, # 'd'
+ 101: 65, # 'e'
+ 102: 174, # 'f'
+ 103: 76, # 'g'
+ 104: 175, # 'h'
+ 105: 64, # 'i'
+ 106: 176, # 'j'
+ 107: 177, # 'k'
+ 108: 77, # 'l'
+ 109: 72, # 'm'
+ 110: 178, # 'n'
+ 111: 69, # 'o'
+ 112: 67, # 'p'
+ 113: 179, # 'q'
+ 114: 78, # 'r'
+ 115: 73, # 's'
+ 116: 180, # 't'
+ 117: 181, # 'u'
+ 118: 79, # 'v'
+ 119: 182, # 'w'
+ 120: 183, # 'x'
+ 121: 184, # 'y'
+ 122: 185, # 'z'
+ 123: 253, # '{'
+ 124: 253, # '|'
+ 125: 253, # '}'
+ 126: 253, # '~'
+ 127: 253, # '\x7f'
+ 128: 37, # 'А'
+ 129: 44, # 'Б'
+ 130: 33, # 'В'
+ 131: 46, # 'Г'
+ 132: 41, # 'Д'
+ 133: 48, # 'Е'
+ 134: 56, # 'Ж'
+ 135: 51, # 'З'
+ 136: 42, # 'И'
+ 137: 60, # 'Й'
+ 138: 36, # 'К'
+ 139: 49, # 'Л'
+ 140: 38, # 'М'
+ 141: 31, # 'Н'
+ 142: 34, # 'О'
+ 143: 35, # 'П'
+ 144: 45, # 'Р'
+ 145: 32, # 'С'
+ 146: 40, # 'Т'
+ 147: 52, # 'У'
+ 148: 53, # 'Ф'
+ 149: 55, # 'Х'
+ 150: 58, # 'Ц'
+ 151: 50, # 'Ч'
+ 152: 57, # 'Ш'
+ 153: 63, # 'Щ'
+ 154: 70, # 'Ъ'
+ 155: 62, # 'Ы'
+ 156: 61, # 'Ь'
+ 157: 47, # 'Э'
+ 158: 59, # 'Ю'
+ 159: 43, # 'Я'
+ 160: 191, # '†'
+ 161: 192, # '°'
+ 162: 193, # 'Ґ'
+ 163: 194, # '£'
+ 164: 195, # '§'
+ 165: 196, # '•'
+ 166: 197, # '¶'
+ 167: 198, # 'І'
+ 168: 199, # '®'
+ 169: 200, # '©'
+ 170: 201, # '™'
+ 171: 202, # 'Ђ'
+ 172: 203, # 'ђ'
+ 173: 204, # '≠'
+ 174: 205, # 'Ѓ'
+ 175: 206, # 'ѓ'
+ 176: 207, # '∞'
+ 177: 208, # '±'
+ 178: 209, # '≤'
+ 179: 210, # '≥'
+ 180: 211, # 'і'
+ 181: 212, # 'µ'
+ 182: 213, # 'ґ'
+ 183: 214, # 'Ј'
+ 184: 215, # 'Є'
+ 185: 216, # 'є'
+ 186: 217, # 'Ї'
+ 187: 218, # 'ї'
+ 188: 219, # 'Љ'
+ 189: 220, # 'љ'
+ 190: 221, # 'Њ'
+ 191: 222, # 'њ'
+ 192: 223, # 'ј'
+ 193: 224, # 'Ѕ'
+ 194: 225, # '¬'
+ 195: 226, # '√'
+ 196: 227, # 'ƒ'
+ 197: 228, # '≈'
+ 198: 229, # '∆'
+ 199: 230, # '«'
+ 200: 231, # '»'
+ 201: 232, # '…'
+ 202: 233, # '\xa0'
+ 203: 234, # 'Ћ'
+ 204: 235, # 'ћ'
+ 205: 236, # 'Ќ'
+ 206: 237, # 'ќ'
+ 207: 238, # 'ѕ'
+ 208: 239, # '–'
+ 209: 240, # '—'
+ 210: 241, # '“'
+ 211: 242, # '”'
+ 212: 243, # '‘'
+ 213: 244, # '’'
+ 214: 245, # '÷'
+ 215: 246, # '„'
+ 216: 247, # 'Ў'
+ 217: 248, # 'ў'
+ 218: 249, # 'Џ'
+ 219: 250, # 'џ'
+ 220: 251, # '№'
+ 221: 252, # 'Ё'
+ 222: 68, # 'ё'
+ 223: 16, # 'я'
+ 224: 3, # 'а'
+ 225: 21, # 'б'
+ 226: 10, # 'в'
+ 227: 19, # 'г'
+ 228: 13, # 'д'
+ 229: 2, # 'е'
+ 230: 24, # 'ж'
+ 231: 20, # 'з'
+ 232: 4, # 'и'
+ 233: 23, # 'й'
+ 234: 11, # 'к'
+ 235: 8, # 'л'
+ 236: 12, # 'м'
+ 237: 5, # 'н'
+ 238: 1, # 'о'
+ 239: 15, # 'п'
+ 240: 9, # 'р'
+ 241: 7, # 'с'
+ 242: 6, # 'т'
+ 243: 14, # 'у'
+ 244: 39, # 'ф'
+ 245: 26, # 'х'
+ 246: 28, # 'ц'
+ 247: 22, # 'ч'
+ 248: 25, # 'ш'
+ 249: 29, # 'щ'
+ 250: 54, # 'ъ'
+ 251: 18, # 'ы'
+ 252: 17, # 'ь'
+ 253: 30, # 'э'
+ 254: 27, # 'ю'
+ 255: 255, # '€'
}
-MACCYRILLIC_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='MacCyrillic',
- language='Russian',
- char_to_order_map=MACCYRILLIC_RUSSIAN_CHAR_TO_ORDER,
- language_model=RUSSIAN_LANG_MODEL,
- typical_positive_ratio=0.976601,
- keep_ascii_letters=False,
- alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё')
+MACCYRILLIC_RUSSIAN_MODEL = SingleByteCharSetModel(
+ charset_name="MacCyrillic",
+ language="Russian",
+ char_to_order_map=MACCYRILLIC_RUSSIAN_CHAR_TO_ORDER,
+ language_model=RUSSIAN_LANG_MODEL,
+ typical_positive_ratio=0.976601,
+ keep_ascii_letters=False,
+ alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё",
+)
ISO_8859_5_RUSSIAN_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 254, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 254, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 253, # ' '
- 33: 253, # '!'
- 34: 253, # '"'
- 35: 253, # '#'
- 36: 253, # '$'
- 37: 253, # '%'
- 38: 253, # '&'
- 39: 253, # "'"
- 40: 253, # '('
- 41: 253, # ')'
- 42: 253, # '*'
- 43: 253, # '+'
- 44: 253, # ','
- 45: 253, # '-'
- 46: 253, # '.'
- 47: 253, # '/'
- 48: 252, # '0'
- 49: 252, # '1'
- 50: 252, # '2'
- 51: 252, # '3'
- 52: 252, # '4'
- 53: 252, # '5'
- 54: 252, # '6'
- 55: 252, # '7'
- 56: 252, # '8'
- 57: 252, # '9'
- 58: 253, # ':'
- 59: 253, # ';'
- 60: 253, # '<'
- 61: 253, # '='
- 62: 253, # '>'
- 63: 253, # '?'
- 64: 253, # '@'
- 65: 142, # 'A'
- 66: 143, # 'B'
- 67: 144, # 'C'
- 68: 145, # 'D'
- 69: 146, # 'E'
- 70: 147, # 'F'
- 71: 148, # 'G'
- 72: 149, # 'H'
- 73: 150, # 'I'
- 74: 151, # 'J'
- 75: 152, # 'K'
- 76: 74, # 'L'
- 77: 153, # 'M'
- 78: 75, # 'N'
- 79: 154, # 'O'
- 80: 155, # 'P'
- 81: 156, # 'Q'
- 82: 157, # 'R'
- 83: 158, # 'S'
- 84: 159, # 'T'
- 85: 160, # 'U'
- 86: 161, # 'V'
- 87: 162, # 'W'
- 88: 163, # 'X'
- 89: 164, # 'Y'
- 90: 165, # 'Z'
- 91: 253, # '['
- 92: 253, # '\\'
- 93: 253, # ']'
- 94: 253, # '^'
- 95: 253, # '_'
- 96: 253, # '`'
- 97: 71, # 'a'
- 98: 172, # 'b'
- 99: 66, # 'c'
- 100: 173, # 'd'
- 101: 65, # 'e'
- 102: 174, # 'f'
- 103: 76, # 'g'
- 104: 175, # 'h'
- 105: 64, # 'i'
- 106: 176, # 'j'
- 107: 177, # 'k'
- 108: 77, # 'l'
- 109: 72, # 'm'
- 110: 178, # 'n'
- 111: 69, # 'o'
- 112: 67, # 'p'
- 113: 179, # 'q'
- 114: 78, # 'r'
- 115: 73, # 's'
- 116: 180, # 't'
- 117: 181, # 'u'
- 118: 79, # 'v'
- 119: 182, # 'w'
- 120: 183, # 'x'
- 121: 184, # 'y'
- 122: 185, # 'z'
- 123: 253, # '{'
- 124: 253, # '|'
- 125: 253, # '}'
- 126: 253, # '~'
- 127: 253, # '\x7f'
- 128: 191, # '\x80'
- 129: 192, # '\x81'
- 130: 193, # '\x82'
- 131: 194, # '\x83'
- 132: 195, # '\x84'
- 133: 196, # '\x85'
- 134: 197, # '\x86'
- 135: 198, # '\x87'
- 136: 199, # '\x88'
- 137: 200, # '\x89'
- 138: 201, # '\x8a'
- 139: 202, # '\x8b'
- 140: 203, # '\x8c'
- 141: 204, # '\x8d'
- 142: 205, # '\x8e'
- 143: 206, # '\x8f'
- 144: 207, # '\x90'
- 145: 208, # '\x91'
- 146: 209, # '\x92'
- 147: 210, # '\x93'
- 148: 211, # '\x94'
- 149: 212, # '\x95'
- 150: 213, # '\x96'
- 151: 214, # '\x97'
- 152: 215, # '\x98'
- 153: 216, # '\x99'
- 154: 217, # '\x9a'
- 155: 218, # '\x9b'
- 156: 219, # '\x9c'
- 157: 220, # '\x9d'
- 158: 221, # '\x9e'
- 159: 222, # '\x9f'
- 160: 223, # '\xa0'
- 161: 224, # 'Ё'
- 162: 225, # 'Ђ'
- 163: 226, # 'Ѓ'
- 164: 227, # 'Є'
- 165: 228, # 'Ѕ'
- 166: 229, # 'І'
- 167: 230, # 'Ї'
- 168: 231, # 'Ј'
- 169: 232, # 'Љ'
- 170: 233, # 'Њ'
- 171: 234, # 'Ћ'
- 172: 235, # 'Ќ'
- 173: 236, # '\xad'
- 174: 237, # 'Ў'
- 175: 238, # 'Џ'
- 176: 37, # 'А'
- 177: 44, # 'Б'
- 178: 33, # 'В'
- 179: 46, # 'Г'
- 180: 41, # 'Д'
- 181: 48, # 'Е'
- 182: 56, # 'Ж'
- 183: 51, # 'З'
- 184: 42, # 'И'
- 185: 60, # 'Й'
- 186: 36, # 'К'
- 187: 49, # 'Л'
- 188: 38, # 'М'
- 189: 31, # 'Н'
- 190: 34, # 'О'
- 191: 35, # 'П'
- 192: 45, # 'Р'
- 193: 32, # 'С'
- 194: 40, # 'Т'
- 195: 52, # 'У'
- 196: 53, # 'Ф'
- 197: 55, # 'Х'
- 198: 58, # 'Ц'
- 199: 50, # 'Ч'
- 200: 57, # 'Ш'
- 201: 63, # 'Щ'
- 202: 70, # 'Ъ'
- 203: 62, # 'Ы'
- 204: 61, # 'Ь'
- 205: 47, # 'Э'
- 206: 59, # 'Ю'
- 207: 43, # 'Я'
- 208: 3, # 'а'
- 209: 21, # 'б'
- 210: 10, # 'в'
- 211: 19, # 'г'
- 212: 13, # 'д'
- 213: 2, # 'е'
- 214: 24, # 'ж'
- 215: 20, # 'з'
- 216: 4, # 'и'
- 217: 23, # 'й'
- 218: 11, # 'к'
- 219: 8, # 'л'
- 220: 12, # 'м'
- 221: 5, # 'н'
- 222: 1, # 'о'
- 223: 15, # 'п'
- 224: 9, # 'р'
- 225: 7, # 'с'
- 226: 6, # 'т'
- 227: 14, # 'у'
- 228: 39, # 'ф'
- 229: 26, # 'х'
- 230: 28, # 'ц'
- 231: 22, # 'ч'
- 232: 25, # 'ш'
- 233: 29, # 'щ'
- 234: 54, # 'ъ'
- 235: 18, # 'ы'
- 236: 17, # 'ь'
- 237: 30, # 'э'
- 238: 27, # 'ю'
- 239: 16, # 'я'
- 240: 239, # '№'
- 241: 68, # 'ё'
- 242: 240, # 'ђ'
- 243: 241, # 'ѓ'
- 244: 242, # 'є'
- 245: 243, # 'ѕ'
- 246: 244, # 'і'
- 247: 245, # 'ї'
- 248: 246, # 'ј'
- 249: 247, # 'љ'
- 250: 248, # 'њ'
- 251: 249, # 'ћ'
- 252: 250, # 'ќ'
- 253: 251, # '§'
- 254: 252, # 'ў'
- 255: 255, # 'џ'
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 254, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 254, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 253, # ' '
+ 33: 253, # '!'
+ 34: 253, # '"'
+ 35: 253, # '#'
+ 36: 253, # '$'
+ 37: 253, # '%'
+ 38: 253, # '&'
+ 39: 253, # "'"
+ 40: 253, # '('
+ 41: 253, # ')'
+ 42: 253, # '*'
+ 43: 253, # '+'
+ 44: 253, # ','
+ 45: 253, # '-'
+ 46: 253, # '.'
+ 47: 253, # '/'
+ 48: 252, # '0'
+ 49: 252, # '1'
+ 50: 252, # '2'
+ 51: 252, # '3'
+ 52: 252, # '4'
+ 53: 252, # '5'
+ 54: 252, # '6'
+ 55: 252, # '7'
+ 56: 252, # '8'
+ 57: 252, # '9'
+ 58: 253, # ':'
+ 59: 253, # ';'
+ 60: 253, # '<'
+ 61: 253, # '='
+ 62: 253, # '>'
+ 63: 253, # '?'
+ 64: 253, # '@'
+ 65: 142, # 'A'
+ 66: 143, # 'B'
+ 67: 144, # 'C'
+ 68: 145, # 'D'
+ 69: 146, # 'E'
+ 70: 147, # 'F'
+ 71: 148, # 'G'
+ 72: 149, # 'H'
+ 73: 150, # 'I'
+ 74: 151, # 'J'
+ 75: 152, # 'K'
+ 76: 74, # 'L'
+ 77: 153, # 'M'
+ 78: 75, # 'N'
+ 79: 154, # 'O'
+ 80: 155, # 'P'
+ 81: 156, # 'Q'
+ 82: 157, # 'R'
+ 83: 158, # 'S'
+ 84: 159, # 'T'
+ 85: 160, # 'U'
+ 86: 161, # 'V'
+ 87: 162, # 'W'
+ 88: 163, # 'X'
+ 89: 164, # 'Y'
+ 90: 165, # 'Z'
+ 91: 253, # '['
+ 92: 253, # '\\'
+ 93: 253, # ']'
+ 94: 253, # '^'
+ 95: 253, # '_'
+ 96: 253, # '`'
+ 97: 71, # 'a'
+ 98: 172, # 'b'
+ 99: 66, # 'c'
+ 100: 173, # 'd'
+ 101: 65, # 'e'
+ 102: 174, # 'f'
+ 103: 76, # 'g'
+ 104: 175, # 'h'
+ 105: 64, # 'i'
+ 106: 176, # 'j'
+ 107: 177, # 'k'
+ 108: 77, # 'l'
+ 109: 72, # 'm'
+ 110: 178, # 'n'
+ 111: 69, # 'o'
+ 112: 67, # 'p'
+ 113: 179, # 'q'
+ 114: 78, # 'r'
+ 115: 73, # 's'
+ 116: 180, # 't'
+ 117: 181, # 'u'
+ 118: 79, # 'v'
+ 119: 182, # 'w'
+ 120: 183, # 'x'
+ 121: 184, # 'y'
+ 122: 185, # 'z'
+ 123: 253, # '{'
+ 124: 253, # '|'
+ 125: 253, # '}'
+ 126: 253, # '~'
+ 127: 253, # '\x7f'
+ 128: 191, # '\x80'
+ 129: 192, # '\x81'
+ 130: 193, # '\x82'
+ 131: 194, # '\x83'
+ 132: 195, # '\x84'
+ 133: 196, # '\x85'
+ 134: 197, # '\x86'
+ 135: 198, # '\x87'
+ 136: 199, # '\x88'
+ 137: 200, # '\x89'
+ 138: 201, # '\x8a'
+ 139: 202, # '\x8b'
+ 140: 203, # '\x8c'
+ 141: 204, # '\x8d'
+ 142: 205, # '\x8e'
+ 143: 206, # '\x8f'
+ 144: 207, # '\x90'
+ 145: 208, # '\x91'
+ 146: 209, # '\x92'
+ 147: 210, # '\x93'
+ 148: 211, # '\x94'
+ 149: 212, # '\x95'
+ 150: 213, # '\x96'
+ 151: 214, # '\x97'
+ 152: 215, # '\x98'
+ 153: 216, # '\x99'
+ 154: 217, # '\x9a'
+ 155: 218, # '\x9b'
+ 156: 219, # '\x9c'
+ 157: 220, # '\x9d'
+ 158: 221, # '\x9e'
+ 159: 222, # '\x9f'
+ 160: 223, # '\xa0'
+ 161: 224, # 'Ё'
+ 162: 225, # 'Ђ'
+ 163: 226, # 'Ѓ'
+ 164: 227, # 'Є'
+ 165: 228, # 'Ѕ'
+ 166: 229, # 'І'
+ 167: 230, # 'Ї'
+ 168: 231, # 'Ј'
+ 169: 232, # 'Љ'
+ 170: 233, # 'Њ'
+ 171: 234, # 'Ћ'
+ 172: 235, # 'Ќ'
+ 173: 236, # '\xad'
+ 174: 237, # 'Ў'
+ 175: 238, # 'Џ'
+ 176: 37, # 'А'
+ 177: 44, # 'Б'
+ 178: 33, # 'В'
+ 179: 46, # 'Г'
+ 180: 41, # 'Д'
+ 181: 48, # 'Е'
+ 182: 56, # 'Ж'
+ 183: 51, # 'З'
+ 184: 42, # 'И'
+ 185: 60, # 'Й'
+ 186: 36, # 'К'
+ 187: 49, # 'Л'
+ 188: 38, # 'М'
+ 189: 31, # 'Н'
+ 190: 34, # 'О'
+ 191: 35, # 'П'
+ 192: 45, # 'Р'
+ 193: 32, # 'С'
+ 194: 40, # 'Т'
+ 195: 52, # 'У'
+ 196: 53, # 'Ф'
+ 197: 55, # 'Х'
+ 198: 58, # 'Ц'
+ 199: 50, # 'Ч'
+ 200: 57, # 'Ш'
+ 201: 63, # 'Щ'
+ 202: 70, # 'Ъ'
+ 203: 62, # 'Ы'
+ 204: 61, # 'Ь'
+ 205: 47, # 'Э'
+ 206: 59, # 'Ю'
+ 207: 43, # 'Я'
+ 208: 3, # 'а'
+ 209: 21, # 'б'
+ 210: 10, # 'в'
+ 211: 19, # 'г'
+ 212: 13, # 'д'
+ 213: 2, # 'е'
+ 214: 24, # 'ж'
+ 215: 20, # 'з'
+ 216: 4, # 'и'
+ 217: 23, # 'й'
+ 218: 11, # 'к'
+ 219: 8, # 'л'
+ 220: 12, # 'м'
+ 221: 5, # 'н'
+ 222: 1, # 'о'
+ 223: 15, # 'п'
+ 224: 9, # 'р'
+ 225: 7, # 'с'
+ 226: 6, # 'т'
+ 227: 14, # 'у'
+ 228: 39, # 'ф'
+ 229: 26, # 'х'
+ 230: 28, # 'ц'
+ 231: 22, # 'ч'
+ 232: 25, # 'ш'
+ 233: 29, # 'щ'
+ 234: 54, # 'ъ'
+ 235: 18, # 'ы'
+ 236: 17, # 'ь'
+ 237: 30, # 'э'
+ 238: 27, # 'ю'
+ 239: 16, # 'я'
+ 240: 239, # '№'
+ 241: 68, # 'ё'
+ 242: 240, # 'ђ'
+ 243: 241, # 'ѓ'
+ 244: 242, # 'є'
+ 245: 243, # 'ѕ'
+ 246: 244, # 'і'
+ 247: 245, # 'ї'
+ 248: 246, # 'ј'
+ 249: 247, # 'љ'
+ 250: 248, # 'њ'
+ 251: 249, # 'ћ'
+ 252: 250, # 'ќ'
+ 253: 251, # '§'
+ 254: 252, # 'ў'
+ 255: 255, # 'џ'
}
-ISO_8859_5_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-5',
- language='Russian',
- char_to_order_map=ISO_8859_5_RUSSIAN_CHAR_TO_ORDER,
- language_model=RUSSIAN_LANG_MODEL,
- typical_positive_ratio=0.976601,
- keep_ascii_letters=False,
- alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё')
-
+ISO_8859_5_RUSSIAN_MODEL = SingleByteCharSetModel(
+ charset_name="ISO-8859-5",
+ language="Russian",
+ char_to_order_map=ISO_8859_5_RUSSIAN_CHAR_TO_ORDER,
+ language_model=RUSSIAN_LANG_MODEL,
+ typical_positive_ratio=0.976601,
+ keep_ascii_letters=False,
+ alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё",
+)
diff --git a/libs/chardet/langthaimodel.py b/libs/chardet/langthaimodel.py
index d0191f241..883fdb1ea 100644
--- a/libs/chardet/langthaimodel.py
+++ b/libs/chardet/langthaimodel.py
@@ -1,9 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
from chardet.sbcharsetprober import SingleByteCharSetModel
-
# 3: Positive
# 2: Likely
# 1: Unlikely
@@ -4115,269 +4111,270 @@ THAI_LANG_MODEL = {
# Character Mapping Table(s):
TIS_620_THAI_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 254, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 254, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 253, # ' '
- 33: 253, # '!'
- 34: 253, # '"'
- 35: 253, # '#'
- 36: 253, # '$'
- 37: 253, # '%'
- 38: 253, # '&'
- 39: 253, # "'"
- 40: 253, # '('
- 41: 253, # ')'
- 42: 253, # '*'
- 43: 253, # '+'
- 44: 253, # ','
- 45: 253, # '-'
- 46: 253, # '.'
- 47: 253, # '/'
- 48: 252, # '0'
- 49: 252, # '1'
- 50: 252, # '2'
- 51: 252, # '3'
- 52: 252, # '4'
- 53: 252, # '5'
- 54: 252, # '6'
- 55: 252, # '7'
- 56: 252, # '8'
- 57: 252, # '9'
- 58: 253, # ':'
- 59: 253, # ';'
- 60: 253, # '<'
- 61: 253, # '='
- 62: 253, # '>'
- 63: 253, # '?'
- 64: 253, # '@'
- 65: 182, # 'A'
- 66: 106, # 'B'
- 67: 107, # 'C'
- 68: 100, # 'D'
- 69: 183, # 'E'
- 70: 184, # 'F'
- 71: 185, # 'G'
- 72: 101, # 'H'
- 73: 94, # 'I'
- 74: 186, # 'J'
- 75: 187, # 'K'
- 76: 108, # 'L'
- 77: 109, # 'M'
- 78: 110, # 'N'
- 79: 111, # 'O'
- 80: 188, # 'P'
- 81: 189, # 'Q'
- 82: 190, # 'R'
- 83: 89, # 'S'
- 84: 95, # 'T'
- 85: 112, # 'U'
- 86: 113, # 'V'
- 87: 191, # 'W'
- 88: 192, # 'X'
- 89: 193, # 'Y'
- 90: 194, # 'Z'
- 91: 253, # '['
- 92: 253, # '\\'
- 93: 253, # ']'
- 94: 253, # '^'
- 95: 253, # '_'
- 96: 253, # '`'
- 97: 64, # 'a'
- 98: 72, # 'b'
- 99: 73, # 'c'
- 100: 114, # 'd'
- 101: 74, # 'e'
- 102: 115, # 'f'
- 103: 116, # 'g'
- 104: 102, # 'h'
- 105: 81, # 'i'
- 106: 201, # 'j'
- 107: 117, # 'k'
- 108: 90, # 'l'
- 109: 103, # 'm'
- 110: 78, # 'n'
- 111: 82, # 'o'
- 112: 96, # 'p'
- 113: 202, # 'q'
- 114: 91, # 'r'
- 115: 79, # 's'
- 116: 84, # 't'
- 117: 104, # 'u'
- 118: 105, # 'v'
- 119: 97, # 'w'
- 120: 98, # 'x'
- 121: 92, # 'y'
- 122: 203, # 'z'
- 123: 253, # '{'
- 124: 253, # '|'
- 125: 253, # '}'
- 126: 253, # '~'
- 127: 253, # '\x7f'
- 128: 209, # '\x80'
- 129: 210, # '\x81'
- 130: 211, # '\x82'
- 131: 212, # '\x83'
- 132: 213, # '\x84'
- 133: 88, # '\x85'
- 134: 214, # '\x86'
- 135: 215, # '\x87'
- 136: 216, # '\x88'
- 137: 217, # '\x89'
- 138: 218, # '\x8a'
- 139: 219, # '\x8b'
- 140: 220, # '\x8c'
- 141: 118, # '\x8d'
- 142: 221, # '\x8e'
- 143: 222, # '\x8f'
- 144: 223, # '\x90'
- 145: 224, # '\x91'
- 146: 99, # '\x92'
- 147: 85, # '\x93'
- 148: 83, # '\x94'
- 149: 225, # '\x95'
- 150: 226, # '\x96'
- 151: 227, # '\x97'
- 152: 228, # '\x98'
- 153: 229, # '\x99'
- 154: 230, # '\x9a'
- 155: 231, # '\x9b'
- 156: 232, # '\x9c'
- 157: 233, # '\x9d'
- 158: 234, # '\x9e'
- 159: 235, # '\x9f'
- 160: 236, # None
- 161: 5, # 'ก'
- 162: 30, # 'ข'
- 163: 237, # 'ฃ'
- 164: 24, # 'ค'
- 165: 238, # 'ฅ'
- 166: 75, # 'ฆ'
- 167: 8, # 'ง'
- 168: 26, # 'จ'
- 169: 52, # 'ฉ'
- 170: 34, # 'ช'
- 171: 51, # 'ซ'
- 172: 119, # 'ฌ'
- 173: 47, # 'ญ'
- 174: 58, # 'ฎ'
- 175: 57, # 'ฏ'
- 176: 49, # 'ฐ'
- 177: 53, # 'ฑ'
- 178: 55, # 'ฒ'
- 179: 43, # 'ณ'
- 180: 20, # 'ด'
- 181: 19, # 'ต'
- 182: 44, # 'ถ'
- 183: 14, # 'ท'
- 184: 48, # 'ธ'
- 185: 3, # 'น'
- 186: 17, # 'บ'
- 187: 25, # 'ป'
- 188: 39, # 'ผ'
- 189: 62, # 'ฝ'
- 190: 31, # 'พ'
- 191: 54, # 'ฟ'
- 192: 45, # 'ภ'
- 193: 9, # 'ม'
- 194: 16, # 'ย'
- 195: 2, # 'ร'
- 196: 61, # 'ฤ'
- 197: 15, # 'ล'
- 198: 239, # 'ฦ'
- 199: 12, # 'ว'
- 200: 42, # 'ศ'
- 201: 46, # 'ษ'
- 202: 18, # 'ส'
- 203: 21, # 'ห'
- 204: 76, # 'ฬ'
- 205: 4, # 'อ'
- 206: 66, # 'ฮ'
- 207: 63, # 'ฯ'
- 208: 22, # 'ะ'
- 209: 10, # 'ั'
- 210: 1, # 'า'
- 211: 36, # 'ำ'
- 212: 23, # 'ิ'
- 213: 13, # 'ี'
- 214: 40, # 'ึ'
- 215: 27, # 'ื'
- 216: 32, # 'ุ'
- 217: 35, # 'ู'
- 218: 86, # 'ฺ'
- 219: 240, # None
- 220: 241, # None
- 221: 242, # None
- 222: 243, # None
- 223: 244, # '฿'
- 224: 11, # 'เ'
- 225: 28, # 'แ'
- 226: 41, # 'โ'
- 227: 29, # 'ใ'
- 228: 33, # 'ไ'
- 229: 245, # 'ๅ'
- 230: 50, # 'ๆ'
- 231: 37, # '็'
- 232: 6, # '่'
- 233: 7, # '้'
- 234: 67, # '๊'
- 235: 77, # '๋'
- 236: 38, # '์'
- 237: 93, # 'ํ'
- 238: 246, # '๎'
- 239: 247, # '๏'
- 240: 68, # '๐'
- 241: 56, # '๑'
- 242: 59, # '๒'
- 243: 65, # '๓'
- 244: 69, # '๔'
- 245: 60, # '๕'
- 246: 70, # '๖'
- 247: 80, # '๗'
- 248: 71, # '๘'
- 249: 87, # '๙'
- 250: 248, # '๚'
- 251: 249, # '๛'
- 252: 250, # None
- 253: 251, # None
- 254: 252, # None
- 255: 253, # None
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 254, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 254, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 253, # ' '
+ 33: 253, # '!'
+ 34: 253, # '"'
+ 35: 253, # '#'
+ 36: 253, # '$'
+ 37: 253, # '%'
+ 38: 253, # '&'
+ 39: 253, # "'"
+ 40: 253, # '('
+ 41: 253, # ')'
+ 42: 253, # '*'
+ 43: 253, # '+'
+ 44: 253, # ','
+ 45: 253, # '-'
+ 46: 253, # '.'
+ 47: 253, # '/'
+ 48: 252, # '0'
+ 49: 252, # '1'
+ 50: 252, # '2'
+ 51: 252, # '3'
+ 52: 252, # '4'
+ 53: 252, # '5'
+ 54: 252, # '6'
+ 55: 252, # '7'
+ 56: 252, # '8'
+ 57: 252, # '9'
+ 58: 253, # ':'
+ 59: 253, # ';'
+ 60: 253, # '<'
+ 61: 253, # '='
+ 62: 253, # '>'
+ 63: 253, # '?'
+ 64: 253, # '@'
+ 65: 182, # 'A'
+ 66: 106, # 'B'
+ 67: 107, # 'C'
+ 68: 100, # 'D'
+ 69: 183, # 'E'
+ 70: 184, # 'F'
+ 71: 185, # 'G'
+ 72: 101, # 'H'
+ 73: 94, # 'I'
+ 74: 186, # 'J'
+ 75: 187, # 'K'
+ 76: 108, # 'L'
+ 77: 109, # 'M'
+ 78: 110, # 'N'
+ 79: 111, # 'O'
+ 80: 188, # 'P'
+ 81: 189, # 'Q'
+ 82: 190, # 'R'
+ 83: 89, # 'S'
+ 84: 95, # 'T'
+ 85: 112, # 'U'
+ 86: 113, # 'V'
+ 87: 191, # 'W'
+ 88: 192, # 'X'
+ 89: 193, # 'Y'
+ 90: 194, # 'Z'
+ 91: 253, # '['
+ 92: 253, # '\\'
+ 93: 253, # ']'
+ 94: 253, # '^'
+ 95: 253, # '_'
+ 96: 253, # '`'
+ 97: 64, # 'a'
+ 98: 72, # 'b'
+ 99: 73, # 'c'
+ 100: 114, # 'd'
+ 101: 74, # 'e'
+ 102: 115, # 'f'
+ 103: 116, # 'g'
+ 104: 102, # 'h'
+ 105: 81, # 'i'
+ 106: 201, # 'j'
+ 107: 117, # 'k'
+ 108: 90, # 'l'
+ 109: 103, # 'm'
+ 110: 78, # 'n'
+ 111: 82, # 'o'
+ 112: 96, # 'p'
+ 113: 202, # 'q'
+ 114: 91, # 'r'
+ 115: 79, # 's'
+ 116: 84, # 't'
+ 117: 104, # 'u'
+ 118: 105, # 'v'
+ 119: 97, # 'w'
+ 120: 98, # 'x'
+ 121: 92, # 'y'
+ 122: 203, # 'z'
+ 123: 253, # '{'
+ 124: 253, # '|'
+ 125: 253, # '}'
+ 126: 253, # '~'
+ 127: 253, # '\x7f'
+ 128: 209, # '\x80'
+ 129: 210, # '\x81'
+ 130: 211, # '\x82'
+ 131: 212, # '\x83'
+ 132: 213, # '\x84'
+ 133: 88, # '\x85'
+ 134: 214, # '\x86'
+ 135: 215, # '\x87'
+ 136: 216, # '\x88'
+ 137: 217, # '\x89'
+ 138: 218, # '\x8a'
+ 139: 219, # '\x8b'
+ 140: 220, # '\x8c'
+ 141: 118, # '\x8d'
+ 142: 221, # '\x8e'
+ 143: 222, # '\x8f'
+ 144: 223, # '\x90'
+ 145: 224, # '\x91'
+ 146: 99, # '\x92'
+ 147: 85, # '\x93'
+ 148: 83, # '\x94'
+ 149: 225, # '\x95'
+ 150: 226, # '\x96'
+ 151: 227, # '\x97'
+ 152: 228, # '\x98'
+ 153: 229, # '\x99'
+ 154: 230, # '\x9a'
+ 155: 231, # '\x9b'
+ 156: 232, # '\x9c'
+ 157: 233, # '\x9d'
+ 158: 234, # '\x9e'
+ 159: 235, # '\x9f'
+ 160: 236, # None
+ 161: 5, # 'ก'
+ 162: 30, # 'ข'
+ 163: 237, # 'ฃ'
+ 164: 24, # 'ค'
+ 165: 238, # 'ฅ'
+ 166: 75, # 'ฆ'
+ 167: 8, # 'ง'
+ 168: 26, # 'จ'
+ 169: 52, # 'ฉ'
+ 170: 34, # 'ช'
+ 171: 51, # 'ซ'
+ 172: 119, # 'ฌ'
+ 173: 47, # 'ญ'
+ 174: 58, # 'ฎ'
+ 175: 57, # 'ฏ'
+ 176: 49, # 'ฐ'
+ 177: 53, # 'ฑ'
+ 178: 55, # 'ฒ'
+ 179: 43, # 'ณ'
+ 180: 20, # 'ด'
+ 181: 19, # 'ต'
+ 182: 44, # 'ถ'
+ 183: 14, # 'ท'
+ 184: 48, # 'ธ'
+ 185: 3, # 'น'
+ 186: 17, # 'บ'
+ 187: 25, # 'ป'
+ 188: 39, # 'ผ'
+ 189: 62, # 'ฝ'
+ 190: 31, # 'พ'
+ 191: 54, # 'ฟ'
+ 192: 45, # 'ภ'
+ 193: 9, # 'ม'
+ 194: 16, # 'ย'
+ 195: 2, # 'ร'
+ 196: 61, # 'ฤ'
+ 197: 15, # 'ล'
+ 198: 239, # 'ฦ'
+ 199: 12, # 'ว'
+ 200: 42, # 'ศ'
+ 201: 46, # 'ษ'
+ 202: 18, # 'ส'
+ 203: 21, # 'ห'
+ 204: 76, # 'ฬ'
+ 205: 4, # 'อ'
+ 206: 66, # 'ฮ'
+ 207: 63, # 'ฯ'
+ 208: 22, # 'ะ'
+ 209: 10, # 'ั'
+ 210: 1, # 'า'
+ 211: 36, # 'ำ'
+ 212: 23, # 'ิ'
+ 213: 13, # 'ี'
+ 214: 40, # 'ึ'
+ 215: 27, # 'ื'
+ 216: 32, # 'ุ'
+ 217: 35, # 'ู'
+ 218: 86, # 'ฺ'
+ 219: 240, # None
+ 220: 241, # None
+ 221: 242, # None
+ 222: 243, # None
+ 223: 244, # '฿'
+ 224: 11, # 'เ'
+ 225: 28, # 'แ'
+ 226: 41, # 'โ'
+ 227: 29, # 'ใ'
+ 228: 33, # 'ไ'
+ 229: 245, # 'ๅ'
+ 230: 50, # 'ๆ'
+ 231: 37, # '็'
+ 232: 6, # '่'
+ 233: 7, # '้'
+ 234: 67, # '๊'
+ 235: 77, # '๋'
+ 236: 38, # '์'
+ 237: 93, # 'ํ'
+ 238: 246, # '๎'
+ 239: 247, # '๏'
+ 240: 68, # '๐'
+ 241: 56, # '๑'
+ 242: 59, # '๒'
+ 243: 65, # '๓'
+ 244: 69, # '๔'
+ 245: 60, # '๕'
+ 246: 70, # '๖'
+ 247: 80, # '๗'
+ 248: 71, # '๘'
+ 249: 87, # '๙'
+ 250: 248, # '๚'
+ 251: 249, # '๛'
+ 252: 250, # None
+ 253: 251, # None
+ 254: 252, # None
+ 255: 253, # None
}
-TIS_620_THAI_MODEL = SingleByteCharSetModel(charset_name='TIS-620',
- language='Thai',
- char_to_order_map=TIS_620_THAI_CHAR_TO_ORDER,
- language_model=THAI_LANG_MODEL,
- typical_positive_ratio=0.926386,
- keep_ascii_letters=False,
- alphabet='กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛')
-
+TIS_620_THAI_MODEL = SingleByteCharSetModel(
+ charset_name="TIS-620",
+ language="Thai",
+ char_to_order_map=TIS_620_THAI_CHAR_TO_ORDER,
+ language_model=THAI_LANG_MODEL,
+ typical_positive_ratio=0.926386,
+ keep_ascii_letters=False,
+ alphabet="กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛",
+)
diff --git a/libs/chardet/langturkishmodel.py b/libs/chardet/langturkishmodel.py
index 8ba93224d..64c94336c 100644
--- a/libs/chardet/langturkishmodel.py
+++ b/libs/chardet/langturkishmodel.py
@@ -1,9 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
from chardet.sbcharsetprober import SingleByteCharSetModel
-
# 3: Positive
# 2: Likely
# 1: Unlikely
@@ -4115,269 +4111,270 @@ TURKISH_LANG_MODEL = {
# Character Mapping Table(s):
ISO_8859_9_TURKISH_CHAR_TO_ORDER = {
- 0: 255, # '\x00'
- 1: 255, # '\x01'
- 2: 255, # '\x02'
- 3: 255, # '\x03'
- 4: 255, # '\x04'
- 5: 255, # '\x05'
- 6: 255, # '\x06'
- 7: 255, # '\x07'
- 8: 255, # '\x08'
- 9: 255, # '\t'
- 10: 255, # '\n'
- 11: 255, # '\x0b'
- 12: 255, # '\x0c'
- 13: 255, # '\r'
- 14: 255, # '\x0e'
- 15: 255, # '\x0f'
- 16: 255, # '\x10'
- 17: 255, # '\x11'
- 18: 255, # '\x12'
- 19: 255, # '\x13'
- 20: 255, # '\x14'
- 21: 255, # '\x15'
- 22: 255, # '\x16'
- 23: 255, # '\x17'
- 24: 255, # '\x18'
- 25: 255, # '\x19'
- 26: 255, # '\x1a'
- 27: 255, # '\x1b'
- 28: 255, # '\x1c'
- 29: 255, # '\x1d'
- 30: 255, # '\x1e'
- 31: 255, # '\x1f'
- 32: 255, # ' '
- 33: 255, # '!'
- 34: 255, # '"'
- 35: 255, # '#'
- 36: 255, # '$'
- 37: 255, # '%'
- 38: 255, # '&'
- 39: 255, # "'"
- 40: 255, # '('
- 41: 255, # ')'
- 42: 255, # '*'
- 43: 255, # '+'
- 44: 255, # ','
- 45: 255, # '-'
- 46: 255, # '.'
- 47: 255, # '/'
- 48: 255, # '0'
- 49: 255, # '1'
- 50: 255, # '2'
- 51: 255, # '3'
- 52: 255, # '4'
- 53: 255, # '5'
- 54: 255, # '6'
- 55: 255, # '7'
- 56: 255, # '8'
- 57: 255, # '9'
- 58: 255, # ':'
- 59: 255, # ';'
- 60: 255, # '<'
- 61: 255, # '='
- 62: 255, # '>'
- 63: 255, # '?'
- 64: 255, # '@'
- 65: 23, # 'A'
- 66: 37, # 'B'
- 67: 47, # 'C'
- 68: 39, # 'D'
- 69: 29, # 'E'
- 70: 52, # 'F'
- 71: 36, # 'G'
- 72: 45, # 'H'
- 73: 53, # 'I'
- 74: 60, # 'J'
- 75: 16, # 'K'
- 76: 49, # 'L'
- 77: 20, # 'M'
- 78: 46, # 'N'
- 79: 42, # 'O'
- 80: 48, # 'P'
- 81: 69, # 'Q'
- 82: 44, # 'R'
- 83: 35, # 'S'
- 84: 31, # 'T'
- 85: 51, # 'U'
- 86: 38, # 'V'
- 87: 62, # 'W'
- 88: 65, # 'X'
- 89: 43, # 'Y'
- 90: 56, # 'Z'
- 91: 255, # '['
- 92: 255, # '\\'
- 93: 255, # ']'
- 94: 255, # '^'
- 95: 255, # '_'
- 96: 255, # '`'
- 97: 1, # 'a'
- 98: 21, # 'b'
- 99: 28, # 'c'
- 100: 12, # 'd'
- 101: 2, # 'e'
- 102: 18, # 'f'
- 103: 27, # 'g'
- 104: 25, # 'h'
- 105: 3, # 'i'
- 106: 24, # 'j'
- 107: 10, # 'k'
- 108: 5, # 'l'
- 109: 13, # 'm'
- 110: 4, # 'n'
- 111: 15, # 'o'
- 112: 26, # 'p'
- 113: 64, # 'q'
- 114: 7, # 'r'
- 115: 8, # 's'
- 116: 9, # 't'
- 117: 14, # 'u'
- 118: 32, # 'v'
- 119: 57, # 'w'
- 120: 58, # 'x'
- 121: 11, # 'y'
- 122: 22, # 'z'
- 123: 255, # '{'
- 124: 255, # '|'
- 125: 255, # '}'
- 126: 255, # '~'
- 127: 255, # '\x7f'
- 128: 180, # '\x80'
- 129: 179, # '\x81'
- 130: 178, # '\x82'
- 131: 177, # '\x83'
- 132: 176, # '\x84'
- 133: 175, # '\x85'
- 134: 174, # '\x86'
- 135: 173, # '\x87'
- 136: 172, # '\x88'
- 137: 171, # '\x89'
- 138: 170, # '\x8a'
- 139: 169, # '\x8b'
- 140: 168, # '\x8c'
- 141: 167, # '\x8d'
- 142: 166, # '\x8e'
- 143: 165, # '\x8f'
- 144: 164, # '\x90'
- 145: 163, # '\x91'
- 146: 162, # '\x92'
- 147: 161, # '\x93'
- 148: 160, # '\x94'
- 149: 159, # '\x95'
- 150: 101, # '\x96'
- 151: 158, # '\x97'
- 152: 157, # '\x98'
- 153: 156, # '\x99'
- 154: 155, # '\x9a'
- 155: 154, # '\x9b'
- 156: 153, # '\x9c'
- 157: 152, # '\x9d'
- 158: 151, # '\x9e'
- 159: 106, # '\x9f'
- 160: 150, # '\xa0'
- 161: 149, # '¡'
- 162: 148, # '¢'
- 163: 147, # '£'
- 164: 146, # '¤'
- 165: 145, # '¥'
- 166: 144, # '¦'
- 167: 100, # '§'
- 168: 143, # '¨'
- 169: 142, # '©'
- 170: 141, # 'ª'
- 171: 140, # '«'
- 172: 139, # '¬'
- 173: 138, # '\xad'
- 174: 137, # '®'
- 175: 136, # '¯'
- 176: 94, # '°'
- 177: 80, # '±'
- 178: 93, # '²'
- 179: 135, # '³'
- 180: 105, # '´'
- 181: 134, # 'µ'
- 182: 133, # '¶'
- 183: 63, # '·'
- 184: 132, # '¸'
- 185: 131, # '¹'
- 186: 130, # 'º'
- 187: 129, # '»'
- 188: 128, # '¼'
- 189: 127, # '½'
- 190: 126, # '¾'
- 191: 125, # '¿'
- 192: 124, # 'À'
- 193: 104, # 'Á'
- 194: 73, # 'Â'
- 195: 99, # 'Ã'
- 196: 79, # 'Ä'
- 197: 85, # 'Å'
- 198: 123, # 'Æ'
- 199: 54, # 'Ç'
- 200: 122, # 'È'
- 201: 98, # 'É'
- 202: 92, # 'Ê'
- 203: 121, # 'Ë'
- 204: 120, # 'Ì'
- 205: 91, # 'Í'
- 206: 103, # 'Î'
- 207: 119, # 'Ï'
- 208: 68, # 'Ğ'
- 209: 118, # 'Ñ'
- 210: 117, # 'Ò'
- 211: 97, # 'Ó'
- 212: 116, # 'Ô'
- 213: 115, # 'Õ'
- 214: 50, # 'Ö'
- 215: 90, # '×'
- 216: 114, # 'Ø'
- 217: 113, # 'Ù'
- 218: 112, # 'Ú'
- 219: 111, # 'Û'
- 220: 55, # 'Ü'
- 221: 41, # 'İ'
- 222: 40, # 'Ş'
- 223: 86, # 'ß'
- 224: 89, # 'à'
- 225: 70, # 'á'
- 226: 59, # 'â'
- 227: 78, # 'ã'
- 228: 71, # 'ä'
- 229: 82, # 'å'
- 230: 88, # 'æ'
- 231: 33, # 'ç'
- 232: 77, # 'è'
- 233: 66, # 'é'
- 234: 84, # 'ê'
- 235: 83, # 'ë'
- 236: 110, # 'ì'
- 237: 75, # 'í'
- 238: 61, # 'î'
- 239: 96, # 'ï'
- 240: 30, # 'ğ'
- 241: 67, # 'ñ'
- 242: 109, # 'ò'
- 243: 74, # 'ó'
- 244: 87, # 'ô'
- 245: 102, # 'õ'
- 246: 34, # 'ö'
- 247: 95, # '÷'
- 248: 81, # 'ø'
- 249: 108, # 'ù'
- 250: 76, # 'ú'
- 251: 72, # 'û'
- 252: 17, # 'ü'
- 253: 6, # 'ı'
- 254: 19, # 'ş'
- 255: 107, # 'ÿ'
+ 0: 255, # '\x00'
+ 1: 255, # '\x01'
+ 2: 255, # '\x02'
+ 3: 255, # '\x03'
+ 4: 255, # '\x04'
+ 5: 255, # '\x05'
+ 6: 255, # '\x06'
+ 7: 255, # '\x07'
+ 8: 255, # '\x08'
+ 9: 255, # '\t'
+ 10: 255, # '\n'
+ 11: 255, # '\x0b'
+ 12: 255, # '\x0c'
+ 13: 255, # '\r'
+ 14: 255, # '\x0e'
+ 15: 255, # '\x0f'
+ 16: 255, # '\x10'
+ 17: 255, # '\x11'
+ 18: 255, # '\x12'
+ 19: 255, # '\x13'
+ 20: 255, # '\x14'
+ 21: 255, # '\x15'
+ 22: 255, # '\x16'
+ 23: 255, # '\x17'
+ 24: 255, # '\x18'
+ 25: 255, # '\x19'
+ 26: 255, # '\x1a'
+ 27: 255, # '\x1b'
+ 28: 255, # '\x1c'
+ 29: 255, # '\x1d'
+ 30: 255, # '\x1e'
+ 31: 255, # '\x1f'
+ 32: 255, # ' '
+ 33: 255, # '!'
+ 34: 255, # '"'
+ 35: 255, # '#'
+ 36: 255, # '$'
+ 37: 255, # '%'
+ 38: 255, # '&'
+ 39: 255, # "'"
+ 40: 255, # '('
+ 41: 255, # ')'
+ 42: 255, # '*'
+ 43: 255, # '+'
+ 44: 255, # ','
+ 45: 255, # '-'
+ 46: 255, # '.'
+ 47: 255, # '/'
+ 48: 255, # '0'
+ 49: 255, # '1'
+ 50: 255, # '2'
+ 51: 255, # '3'
+ 52: 255, # '4'
+ 53: 255, # '5'
+ 54: 255, # '6'
+ 55: 255, # '7'
+ 56: 255, # '8'
+ 57: 255, # '9'
+ 58: 255, # ':'
+ 59: 255, # ';'
+ 60: 255, # '<'
+ 61: 255, # '='
+ 62: 255, # '>'
+ 63: 255, # '?'
+ 64: 255, # '@'
+ 65: 23, # 'A'
+ 66: 37, # 'B'
+ 67: 47, # 'C'
+ 68: 39, # 'D'
+ 69: 29, # 'E'
+ 70: 52, # 'F'
+ 71: 36, # 'G'
+ 72: 45, # 'H'
+ 73: 53, # 'I'
+ 74: 60, # 'J'
+ 75: 16, # 'K'
+ 76: 49, # 'L'
+ 77: 20, # 'M'
+ 78: 46, # 'N'
+ 79: 42, # 'O'
+ 80: 48, # 'P'
+ 81: 69, # 'Q'
+ 82: 44, # 'R'
+ 83: 35, # 'S'
+ 84: 31, # 'T'
+ 85: 51, # 'U'
+ 86: 38, # 'V'
+ 87: 62, # 'W'
+ 88: 65, # 'X'
+ 89: 43, # 'Y'
+ 90: 56, # 'Z'
+ 91: 255, # '['
+ 92: 255, # '\\'
+ 93: 255, # ']'
+ 94: 255, # '^'
+ 95: 255, # '_'
+ 96: 255, # '`'
+ 97: 1, # 'a'
+ 98: 21, # 'b'
+ 99: 28, # 'c'
+ 100: 12, # 'd'
+ 101: 2, # 'e'
+ 102: 18, # 'f'
+ 103: 27, # 'g'
+ 104: 25, # 'h'
+ 105: 3, # 'i'
+ 106: 24, # 'j'
+ 107: 10, # 'k'
+ 108: 5, # 'l'
+ 109: 13, # 'm'
+ 110: 4, # 'n'
+ 111: 15, # 'o'
+ 112: 26, # 'p'
+ 113: 64, # 'q'
+ 114: 7, # 'r'
+ 115: 8, # 's'
+ 116: 9, # 't'
+ 117: 14, # 'u'
+ 118: 32, # 'v'
+ 119: 57, # 'w'
+ 120: 58, # 'x'
+ 121: 11, # 'y'
+ 122: 22, # 'z'
+ 123: 255, # '{'
+ 124: 255, # '|'
+ 125: 255, # '}'
+ 126: 255, # '~'
+ 127: 255, # '\x7f'
+ 128: 180, # '\x80'
+ 129: 179, # '\x81'
+ 130: 178, # '\x82'
+ 131: 177, # '\x83'
+ 132: 176, # '\x84'
+ 133: 175, # '\x85'
+ 134: 174, # '\x86'
+ 135: 173, # '\x87'
+ 136: 172, # '\x88'
+ 137: 171, # '\x89'
+ 138: 170, # '\x8a'
+ 139: 169, # '\x8b'
+ 140: 168, # '\x8c'
+ 141: 167, # '\x8d'
+ 142: 166, # '\x8e'
+ 143: 165, # '\x8f'
+ 144: 164, # '\x90'
+ 145: 163, # '\x91'
+ 146: 162, # '\x92'
+ 147: 161, # '\x93'
+ 148: 160, # '\x94'
+ 149: 159, # '\x95'
+ 150: 101, # '\x96'
+ 151: 158, # '\x97'
+ 152: 157, # '\x98'
+ 153: 156, # '\x99'
+ 154: 155, # '\x9a'
+ 155: 154, # '\x9b'
+ 156: 153, # '\x9c'
+ 157: 152, # '\x9d'
+ 158: 151, # '\x9e'
+ 159: 106, # '\x9f'
+ 160: 150, # '\xa0'
+ 161: 149, # '¡'
+ 162: 148, # '¢'
+ 163: 147, # '£'
+ 164: 146, # '¤'
+ 165: 145, # '¥'
+ 166: 144, # '¦'
+ 167: 100, # '§'
+ 168: 143, # '¨'
+ 169: 142, # '©'
+ 170: 141, # 'ª'
+ 171: 140, # '«'
+ 172: 139, # '¬'
+ 173: 138, # '\xad'
+ 174: 137, # '®'
+ 175: 136, # '¯'
+ 176: 94, # '°'
+ 177: 80, # '±'
+ 178: 93, # '²'
+ 179: 135, # '³'
+ 180: 105, # '´'
+ 181: 134, # 'µ'
+ 182: 133, # '¶'
+ 183: 63, # '·'
+ 184: 132, # '¸'
+ 185: 131, # '¹'
+ 186: 130, # 'º'
+ 187: 129, # '»'
+ 188: 128, # '¼'
+ 189: 127, # '½'
+ 190: 126, # '¾'
+ 191: 125, # '¿'
+ 192: 124, # 'À'
+ 193: 104, # 'Á'
+ 194: 73, # 'Â'
+ 195: 99, # 'Ã'
+ 196: 79, # 'Ä'
+ 197: 85, # 'Å'
+ 198: 123, # 'Æ'
+ 199: 54, # 'Ç'
+ 200: 122, # 'È'
+ 201: 98, # 'É'
+ 202: 92, # 'Ê'
+ 203: 121, # 'Ë'
+ 204: 120, # 'Ì'
+ 205: 91, # 'Í'
+ 206: 103, # 'Î'
+ 207: 119, # 'Ï'
+ 208: 68, # 'Ğ'
+ 209: 118, # 'Ñ'
+ 210: 117, # 'Ò'
+ 211: 97, # 'Ó'
+ 212: 116, # 'Ô'
+ 213: 115, # 'Õ'
+ 214: 50, # 'Ö'
+ 215: 90, # '×'
+ 216: 114, # 'Ø'
+ 217: 113, # 'Ù'
+ 218: 112, # 'Ú'
+ 219: 111, # 'Û'
+ 220: 55, # 'Ü'
+ 221: 41, # 'İ'
+ 222: 40, # 'Ş'
+ 223: 86, # 'ß'
+ 224: 89, # 'à'
+ 225: 70, # 'á'
+ 226: 59, # 'â'
+ 227: 78, # 'ã'
+ 228: 71, # 'ä'
+ 229: 82, # 'å'
+ 230: 88, # 'æ'
+ 231: 33, # 'ç'
+ 232: 77, # 'è'
+ 233: 66, # 'é'
+ 234: 84, # 'ê'
+ 235: 83, # 'ë'
+ 236: 110, # 'ì'
+ 237: 75, # 'í'
+ 238: 61, # 'î'
+ 239: 96, # 'ï'
+ 240: 30, # 'ğ'
+ 241: 67, # 'ñ'
+ 242: 109, # 'ò'
+ 243: 74, # 'ó'
+ 244: 87, # 'ô'
+ 245: 102, # 'õ'
+ 246: 34, # 'ö'
+ 247: 95, # '÷'
+ 248: 81, # 'ø'
+ 249: 108, # 'ù'
+ 250: 76, # 'ú'
+ 251: 72, # 'û'
+ 252: 17, # 'ü'
+ 253: 6, # 'ı'
+ 254: 19, # 'ş'
+ 255: 107, # 'ÿ'
}
-ISO_8859_9_TURKISH_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-9',
- language='Turkish',
- char_to_order_map=ISO_8859_9_TURKISH_CHAR_TO_ORDER,
- language_model=TURKISH_LANG_MODEL,
- typical_positive_ratio=0.97029,
- keep_ascii_letters=True,
- alphabet='ABCDEFGHIJKLMNOPRSTUVYZabcdefghijklmnoprstuvyzÂÇÎÖÛÜâçîöûüĞğİıŞş')
-
+ISO_8859_9_TURKISH_MODEL = SingleByteCharSetModel(
+ charset_name="ISO-8859-9",
+ language="Turkish",
+ char_to_order_map=ISO_8859_9_TURKISH_CHAR_TO_ORDER,
+ language_model=TURKISH_LANG_MODEL,
+ typical_positive_ratio=0.97029,
+ keep_ascii_letters=True,
+ alphabet="ABCDEFGHIJKLMNOPRSTUVYZabcdefghijklmnoprstuvyzÂÇÎÖÛÜâçîöûüĞğİıŞş",
+)
diff --git a/libs/chardet/latin1prober.py b/libs/chardet/latin1prober.py
index 7d1e8c20f..241f14ab9 100644
--- a/libs/chardet/latin1prober.py
+++ b/libs/chardet/latin1prober.py
@@ -41,6 +41,7 @@ ASV = 6 # accent small vowel
ASO = 7 # accent small other
CLASS_NUM = 8 # total classes
+# fmt: off
Latin1_CharToClass = (
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
@@ -91,11 +92,12 @@ Latin1ClassModel = (
0, 3, 1, 3, 1, 1, 1, 3, # ASV
0, 3, 1, 3, 1, 1, 3, 3, # ASO
)
+# fmt: on
class Latin1Prober(CharSetProber):
def __init__(self):
- super(Latin1Prober, self).__init__()
+ super().__init__()
self._last_char_class = None
self._freq_counter = None
self.reset()
@@ -103,7 +105,7 @@ class Latin1Prober(CharSetProber):
def reset(self):
self._last_char_class = OTH
self._freq_counter = [0] * FREQ_CAT_NUM
- CharSetProber.reset(self)
+ super().reset()
@property
def charset_name(self):
@@ -114,11 +116,10 @@ class Latin1Prober(CharSetProber):
return ""
def feed(self, byte_str):
- byte_str = self.filter_with_english_letters(byte_str)
+ byte_str = self.remove_xml_tags(byte_str)
for c in byte_str:
char_class = Latin1_CharToClass[c]
- freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
- + char_class]
+ freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM) + char_class]
if freq == 0:
self._state = ProbingState.NOT_ME
break
@@ -132,14 +133,13 @@ class Latin1Prober(CharSetProber):
return 0.01
total = sum(self._freq_counter)
- if total < 0.01:
- confidence = 0.0
- else:
- confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0)
- / total)
- if confidence < 0.0:
- confidence = 0.0
+ confidence = (
+ 0.0
+ if total < 0.01
+ else (self._freq_counter[3] - self._freq_counter[1] * 20.0) / total
+ )
+ confidence = max(confidence, 0.0)
# lower the confidence of latin1 so that other more accurate
# detector can take priority.
- confidence = confidence * 0.73
+ confidence *= 0.73
return confidence
diff --git a/libs/chardet/mbcharsetprober.py b/libs/chardet/mbcharsetprober.py
index 6256ecfd1..bf96ad5d4 100644
--- a/libs/chardet/mbcharsetprober.py
+++ b/libs/chardet/mbcharsetprober.py
@@ -28,7 +28,7 @@
######################### END LICENSE BLOCK #########################
from .charsetprober import CharSetProber
-from .enums import ProbingState, MachineState
+from .enums import MachineState, ProbingState
class MultiByteCharSetProber(CharSetProber):
@@ -37,13 +37,13 @@ class MultiByteCharSetProber(CharSetProber):
"""
def __init__(self, lang_filter=None):
- super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter)
+ super().__init__(lang_filter=lang_filter)
self.distribution_analyzer = None
self.coding_sm = None
self._last_char = [0, 0]
def reset(self):
- super(MultiByteCharSetProber, self).reset()
+ super().reset()
if self.coding_sm:
self.coding_sm.reset()
if self.distribution_analyzer:
@@ -59,30 +59,34 @@ class MultiByteCharSetProber(CharSetProber):
raise NotImplementedError
def feed(self, byte_str):
- for i in range(len(byte_str)):
- coding_state = self.coding_sm.next_state(byte_str[i])
+ for i, byte in enumerate(byte_str):
+ coding_state = self.coding_sm.next_state(byte)
if coding_state == MachineState.ERROR:
- self.logger.debug('%s %s prober hit error at byte %s',
- self.charset_name, self.language, i)
+ self.logger.debug(
+ "%s %s prober hit error at byte %s",
+ self.charset_name,
+ self.language,
+ i,
+ )
self._state = ProbingState.NOT_ME
break
- elif coding_state == MachineState.ITS_ME:
+ if coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
break
- elif coding_state == MachineState.START:
+ if coding_state == MachineState.START:
char_len = self.coding_sm.get_current_charlen()
if i == 0:
- self._last_char[1] = byte_str[0]
+ self._last_char[1] = byte
self.distribution_analyzer.feed(self._last_char, char_len)
else:
- self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
- char_len)
+ self.distribution_analyzer.feed(byte_str[i - 1 : i + 1], char_len)
self._last_char[0] = byte_str[-1]
if self.state == ProbingState.DETECTING:
- if (self.distribution_analyzer.got_enough_data() and
- (self.get_confidence() > self.SHORTCUT_THRESHOLD)):
+ if self.distribution_analyzer.got_enough_data() and (
+ self.get_confidence() > self.SHORTCUT_THRESHOLD
+ ):
self._state = ProbingState.FOUND_IT
return self.state
diff --git a/libs/chardet/mbcsgroupprober.py b/libs/chardet/mbcsgroupprober.py
index 530abe75e..94488360c 100644
--- a/libs/chardet/mbcsgroupprober.py
+++ b/libs/chardet/mbcsgroupprober.py
@@ -27,20 +27,21 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from .big5prober import Big5Prober
from .charsetgroupprober import CharSetGroupProber
-from .utf8prober import UTF8Prober
-from .sjisprober import SJISProber
+from .cp949prober import CP949Prober
from .eucjpprober import EUCJPProber
-from .gb2312prober import GB2312Prober
from .euckrprober import EUCKRProber
-from .cp949prober import CP949Prober
-from .big5prober import Big5Prober
from .euctwprober import EUCTWProber
+from .gb2312prober import GB2312Prober
+from .johabprober import JOHABProber
+from .sjisprober import SJISProber
+from .utf8prober import UTF8Prober
class MBCSGroupProber(CharSetGroupProber):
def __init__(self, lang_filter=None):
- super(MBCSGroupProber, self).__init__(lang_filter=lang_filter)
+ super().__init__(lang_filter=lang_filter)
self.probers = [
UTF8Prober(),
SJISProber(),
@@ -49,6 +50,7 @@ class MBCSGroupProber(CharSetGroupProber):
EUCKRProber(),
CP949Prober(),
Big5Prober(),
- EUCTWProber()
+ EUCTWProber(),
+ JOHABProber(),
]
self.reset()
diff --git a/libs/chardet/mbcssm.py b/libs/chardet/mbcssm.py
index 8360d0f28..d3b9c4b75 100644
--- a/libs/chardet/mbcssm.py
+++ b/libs/chardet/mbcssm.py
@@ -29,39 +29,40 @@ from .enums import MachineState
# BIG5
+# fmt: off
BIG5_CLS = (
- 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
- 1,1,1,1,1,1,0,0, # 08 - 0f
- 1,1,1,1,1,1,1,1, # 10 - 17
- 1,1,1,0,1,1,1,1, # 18 - 1f
- 1,1,1,1,1,1,1,1, # 20 - 27
- 1,1,1,1,1,1,1,1, # 28 - 2f
- 1,1,1,1,1,1,1,1, # 30 - 37
- 1,1,1,1,1,1,1,1, # 38 - 3f
- 2,2,2,2,2,2,2,2, # 40 - 47
- 2,2,2,2,2,2,2,2, # 48 - 4f
- 2,2,2,2,2,2,2,2, # 50 - 57
- 2,2,2,2,2,2,2,2, # 58 - 5f
- 2,2,2,2,2,2,2,2, # 60 - 67
- 2,2,2,2,2,2,2,2, # 68 - 6f
- 2,2,2,2,2,2,2,2, # 70 - 77
- 2,2,2,2,2,2,2,1, # 78 - 7f
- 4,4,4,4,4,4,4,4, # 80 - 87
- 4,4,4,4,4,4,4,4, # 88 - 8f
- 4,4,4,4,4,4,4,4, # 90 - 97
- 4,4,4,4,4,4,4,4, # 98 - 9f
- 4,3,3,3,3,3,3,3, # a0 - a7
- 3,3,3,3,3,3,3,3, # a8 - af
- 3,3,3,3,3,3,3,3, # b0 - b7
- 3,3,3,3,3,3,3,3, # b8 - bf
- 3,3,3,3,3,3,3,3, # c0 - c7
- 3,3,3,3,3,3,3,3, # c8 - cf
- 3,3,3,3,3,3,3,3, # d0 - d7
- 3,3,3,3,3,3,3,3, # d8 - df
- 3,3,3,3,3,3,3,3, # e0 - e7
- 3,3,3,3,3,3,3,3, # e8 - ef
- 3,3,3,3,3,3,3,3, # f0 - f7
- 3,3,3,3,3,3,3,0 # f8 - ff
+ 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 #allow 0x00 as legal value
+ 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
+ 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
+ 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
+ 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
+ 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
+ 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
+ 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
+ 2, 2, 2, 2, 2, 2, 2, 1, # 78 - 7f
+ 4, 4, 4, 4, 4, 4, 4, 4, # 80 - 87
+ 4, 4, 4, 4, 4, 4, 4, 4, # 88 - 8f
+ 4, 4, 4, 4, 4, 4, 4, 4, # 90 - 97
+ 4, 4, 4, 4, 4, 4, 4, 4, # 98 - 9f
+ 4, 3, 3, 3, 3, 3, 3, 3, # a0 - a7
+ 3, 3, 3, 3, 3, 3, 3, 3, # a8 - af
+ 3, 3, 3, 3, 3, 3, 3, 3, # b0 - b7
+ 3, 3, 3, 3, 3, 3, 3, 3, # b8 - bf
+ 3, 3, 3, 3, 3, 3, 3, 3, # c0 - c7
+ 3, 3, 3, 3, 3, 3, 3, 3, # c8 - cf
+ 3, 3, 3, 3, 3, 3, 3, 3, # d0 - d7
+ 3, 3, 3, 3, 3, 3, 3, 3, # d8 - df
+ 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7
+ 3, 3, 3, 3, 3, 3, 3, 3, # e8 - ef
+ 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7
+ 3, 3, 3, 3, 3, 3, 3, 0 # f8 - ff
)
BIG5_ST = (
@@ -69,34 +70,37 @@ BIG5_ST = (
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17
)
+# fmt: on
BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
-BIG5_SM_MODEL = {'class_table': BIG5_CLS,
- 'class_factor': 5,
- 'state_table': BIG5_ST,
- 'char_len_table': BIG5_CHAR_LEN_TABLE,
- 'name': 'Big5'}
+BIG5_SM_MODEL = {
+ "class_table": BIG5_CLS,
+ "class_factor": 5,
+ "state_table": BIG5_ST,
+ "char_len_table": BIG5_CHAR_LEN_TABLE,
+ "name": "Big5",
+}
# CP949
-
+# fmt: off
CP949_CLS = (
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
- 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f
- 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f
- 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f
- 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f
- 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f
- 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f
- 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f
- 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af
- 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf
- 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf
- 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df
- 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef
- 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, # 00 - 0f
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, # 10 - 1f
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 2f
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 3f
+ 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, # 40 - 4f
+ 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, # 50 - 5f
+ 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, # 60 - 6f
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, # 70 - 7f
+ 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, # 80 - 8f
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, # 90 - 9f
+ 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, # a0 - af
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, # b0 - bf
+ 7, 7, 7, 7, 7, 7, 9, 2, 2, 3, 2, 2, 2, 2, 2, 2, # c0 - cf
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # d0 - df
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # e0 - ef
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, # f0 - ff
)
CP949_ST = (
@@ -109,50 +113,53 @@ CP949_ST = (
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6
)
+# fmt: on
CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
-CP949_SM_MODEL = {'class_table': CP949_CLS,
- 'class_factor': 10,
- 'state_table': CP949_ST,
- 'char_len_table': CP949_CHAR_LEN_TABLE,
- 'name': 'CP949'}
+CP949_SM_MODEL = {
+ "class_table": CP949_CLS,
+ "class_factor": 10,
+ "state_table": CP949_ST,
+ "char_len_table": CP949_CHAR_LEN_TABLE,
+ "name": "CP949",
+}
# EUC-JP
-
+# fmt: off
EUCJP_CLS = (
- 4,4,4,4,4,4,4,4, # 00 - 07
- 4,4,4,4,4,4,5,5, # 08 - 0f
- 4,4,4,4,4,4,4,4, # 10 - 17
- 4,4,4,5,4,4,4,4, # 18 - 1f
- 4,4,4,4,4,4,4,4, # 20 - 27
- 4,4,4,4,4,4,4,4, # 28 - 2f
- 4,4,4,4,4,4,4,4, # 30 - 37
- 4,4,4,4,4,4,4,4, # 38 - 3f
- 4,4,4,4,4,4,4,4, # 40 - 47
- 4,4,4,4,4,4,4,4, # 48 - 4f
- 4,4,4,4,4,4,4,4, # 50 - 57
- 4,4,4,4,4,4,4,4, # 58 - 5f
- 4,4,4,4,4,4,4,4, # 60 - 67
- 4,4,4,4,4,4,4,4, # 68 - 6f
- 4,4,4,4,4,4,4,4, # 70 - 77
- 4,4,4,4,4,4,4,4, # 78 - 7f
- 5,5,5,5,5,5,5,5, # 80 - 87
- 5,5,5,5,5,5,1,3, # 88 - 8f
- 5,5,5,5,5,5,5,5, # 90 - 97
- 5,5,5,5,5,5,5,5, # 98 - 9f
- 5,2,2,2,2,2,2,2, # a0 - a7
- 2,2,2,2,2,2,2,2, # a8 - af
- 2,2,2,2,2,2,2,2, # b0 - b7
- 2,2,2,2,2,2,2,2, # b8 - bf
- 2,2,2,2,2,2,2,2, # c0 - c7
- 2,2,2,2,2,2,2,2, # c8 - cf
- 2,2,2,2,2,2,2,2, # d0 - d7
- 2,2,2,2,2,2,2,2, # d8 - df
- 0,0,0,0,0,0,0,0, # e0 - e7
- 0,0,0,0,0,0,0,0, # e8 - ef
- 0,0,0,0,0,0,0,0, # f0 - f7
- 0,0,0,0,0,0,0,5 # f8 - ff
+ 4, 4, 4, 4, 4, 4, 4, 4, # 00 - 07
+ 4, 4, 4, 4, 4, 4, 5, 5, # 08 - 0f
+ 4, 4, 4, 4, 4, 4, 4, 4, # 10 - 17
+ 4, 4, 4, 5, 4, 4, 4, 4, # 18 - 1f
+ 4, 4, 4, 4, 4, 4, 4, 4, # 20 - 27
+ 4, 4, 4, 4, 4, 4, 4, 4, # 28 - 2f
+ 4, 4, 4, 4, 4, 4, 4, 4, # 30 - 37
+ 4, 4, 4, 4, 4, 4, 4, 4, # 38 - 3f
+ 4, 4, 4, 4, 4, 4, 4, 4, # 40 - 47
+ 4, 4, 4, 4, 4, 4, 4, 4, # 48 - 4f
+ 4, 4, 4, 4, 4, 4, 4, 4, # 50 - 57
+ 4, 4, 4, 4, 4, 4, 4, 4, # 58 - 5f
+ 4, 4, 4, 4, 4, 4, 4, 4, # 60 - 67
+ 4, 4, 4, 4, 4, 4, 4, 4, # 68 - 6f
+ 4, 4, 4, 4, 4, 4, 4, 4, # 70 - 77
+ 4, 4, 4, 4, 4, 4, 4, 4, # 78 - 7f
+ 5, 5, 5, 5, 5, 5, 5, 5, # 80 - 87
+ 5, 5, 5, 5, 5, 5, 1, 3, # 88 - 8f
+ 5, 5, 5, 5, 5, 5, 5, 5, # 90 - 97
+ 5, 5, 5, 5, 5, 5, 5, 5, # 98 - 9f
+ 5, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
+ 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
+ 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
+ 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
+ 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
+ 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
+ 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
+ 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
+ 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7
+ 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef
+ 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7
+ 0, 0, 0, 0, 0, 0, 0, 5 # f8 - ff
)
EUCJP_ST = (
@@ -162,100 +169,163 @@ EUCJP_ST = (
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f
3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27
)
+# fmt: on
EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
-EUCJP_SM_MODEL = {'class_table': EUCJP_CLS,
- 'class_factor': 6,
- 'state_table': EUCJP_ST,
- 'char_len_table': EUCJP_CHAR_LEN_TABLE,
- 'name': 'EUC-JP'}
+EUCJP_SM_MODEL = {
+ "class_table": EUCJP_CLS,
+ "class_factor": 6,
+ "state_table": EUCJP_ST,
+ "char_len_table": EUCJP_CHAR_LEN_TABLE,
+ "name": "EUC-JP",
+}
# EUC-KR
-
+# fmt: off
EUCKR_CLS = (
- 1,1,1,1,1,1,1,1, # 00 - 07
- 1,1,1,1,1,1,0,0, # 08 - 0f
- 1,1,1,1,1,1,1,1, # 10 - 17
- 1,1,1,0,1,1,1,1, # 18 - 1f
- 1,1,1,1,1,1,1,1, # 20 - 27
- 1,1,1,1,1,1,1,1, # 28 - 2f
- 1,1,1,1,1,1,1,1, # 30 - 37
- 1,1,1,1,1,1,1,1, # 38 - 3f
- 1,1,1,1,1,1,1,1, # 40 - 47
+ 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07
+ 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
+ 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
+ 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
+ 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 40 - 47
+ 1, 1, 1, 1, 1, 1, 1, 1, # 48 - 4f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 50 - 57
+ 1, 1, 1, 1, 1, 1, 1, 1, # 58 - 5f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 60 - 67
+ 1, 1, 1, 1, 1, 1, 1, 1, # 68 - 6f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 70 - 77
+ 1, 1, 1, 1, 1, 1, 1, 1, # 78 - 7f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
+ 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
+ 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
+ 0, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
+ 2, 2, 2, 2, 2, 3, 3, 3, # a8 - af
+ 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
+ 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
+ 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
+ 2, 3, 2, 2, 2, 2, 2, 2, # c8 - cf
+ 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
+ 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
+ 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7
+ 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef
+ 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7
+ 2, 2, 2, 2, 2, 2, 2, 0 # f8 - ff
+)
+
+EUCKR_ST = (
+ MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
+ MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f
+)
+# fmt: on
+
+EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
+
+EUCKR_SM_MODEL = {
+ "class_table": EUCKR_CLS,
+ "class_factor": 4,
+ "state_table": EUCKR_ST,
+ "char_len_table": EUCKR_CHAR_LEN_TABLE,
+ "name": "EUC-KR",
+}
+
+# JOHAB
+# fmt: off
+JOHAB_CLS = (
+ 4,4,4,4,4,4,4,4, # 00 - 07
+ 4,4,4,4,4,4,0,0, # 08 - 0f
+ 4,4,4,4,4,4,4,4, # 10 - 17
+ 4,4,4,0,4,4,4,4, # 18 - 1f
+ 4,4,4,4,4,4,4,4, # 20 - 27
+ 4,4,4,4,4,4,4,4, # 28 - 2f
+ 4,3,3,3,3,3,3,3, # 30 - 37
+ 3,3,3,3,3,3,3,3, # 38 - 3f
+ 3,1,1,1,1,1,1,1, # 40 - 47
1,1,1,1,1,1,1,1, # 48 - 4f
1,1,1,1,1,1,1,1, # 50 - 57
1,1,1,1,1,1,1,1, # 58 - 5f
1,1,1,1,1,1,1,1, # 60 - 67
1,1,1,1,1,1,1,1, # 68 - 6f
1,1,1,1,1,1,1,1, # 70 - 77
- 1,1,1,1,1,1,1,1, # 78 - 7f
- 0,0,0,0,0,0,0,0, # 80 - 87
- 0,0,0,0,0,0,0,0, # 88 - 8f
- 0,0,0,0,0,0,0,0, # 90 - 97
- 0,0,0,0,0,0,0,0, # 98 - 9f
- 0,2,2,2,2,2,2,2, # a0 - a7
- 2,2,2,2,2,3,3,3, # a8 - af
- 2,2,2,2,2,2,2,2, # b0 - b7
- 2,2,2,2,2,2,2,2, # b8 - bf
- 2,2,2,2,2,2,2,2, # c0 - c7
- 2,3,2,2,2,2,2,2, # c8 - cf
- 2,2,2,2,2,2,2,2, # d0 - d7
- 2,2,2,2,2,2,2,2, # d8 - df
- 2,2,2,2,2,2,2,2, # e0 - e7
- 2,2,2,2,2,2,2,2, # e8 - ef
- 2,2,2,2,2,2,2,2, # f0 - f7
- 2,2,2,2,2,2,2,0 # f8 - ff
+ 1,1,1,1,1,1,1,2, # 78 - 7f
+ 6,6,6,6,8,8,8,8, # 80 - 87
+ 8,8,8,8,8,8,8,8, # 88 - 8f
+ 8,7,7,7,7,7,7,7, # 90 - 97
+ 7,7,7,7,7,7,7,7, # 98 - 9f
+ 7,7,7,7,7,7,7,7, # a0 - a7
+ 7,7,7,7,7,7,7,7, # a8 - af
+ 7,7,7,7,7,7,7,7, # b0 - b7
+ 7,7,7,7,7,7,7,7, # b8 - bf
+ 7,7,7,7,7,7,7,7, # c0 - c7
+ 7,7,7,7,7,7,7,7, # c8 - cf
+ 7,7,7,7,5,5,5,5, # d0 - d7
+ 5,9,9,9,9,9,9,5, # d8 - df
+ 9,9,9,9,9,9,9,9, # e0 - e7
+ 9,9,9,9,9,9,9,9, # e8 - ef
+ 9,9,9,9,9,9,9,9, # f0 - f7
+ 9,9,5,5,5,5,5,0 # f8 - ff
)
-EUCKR_ST = (
- MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
- MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f
+JOHAB_ST = (
+# cls = 0 1 2 3 4 5 6 7 8 9
+ MachineState.ERROR ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.ERROR ,MachineState.ERROR ,3 ,3 ,4 , # MachineState.START
+ MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME
+ MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR , # MachineState.ERROR
+ MachineState.ERROR ,MachineState.START ,MachineState.START ,MachineState.ERROR ,MachineState.ERROR ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.START , # 3
+ MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START , # 4
)
+# fmt: on
-EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
+JOHAB_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 0, 0, 2, 2, 2)
-EUCKR_SM_MODEL = {'class_table': EUCKR_CLS,
- 'class_factor': 4,
- 'state_table': EUCKR_ST,
- 'char_len_table': EUCKR_CHAR_LEN_TABLE,
- 'name': 'EUC-KR'}
+JOHAB_SM_MODEL = {
+ "class_table": JOHAB_CLS,
+ "class_factor": 10,
+ "state_table": JOHAB_ST,
+ "char_len_table": JOHAB_CHAR_LEN_TABLE,
+ "name": "Johab",
+}
# EUC-TW
-
+# fmt: off
EUCTW_CLS = (
- 2,2,2,2,2,2,2,2, # 00 - 07
- 2,2,2,2,2,2,0,0, # 08 - 0f
- 2,2,2,2,2,2,2,2, # 10 - 17
- 2,2,2,0,2,2,2,2, # 18 - 1f
- 2,2,2,2,2,2,2,2, # 20 - 27
- 2,2,2,2,2,2,2,2, # 28 - 2f
- 2,2,2,2,2,2,2,2, # 30 - 37
- 2,2,2,2,2,2,2,2, # 38 - 3f
- 2,2,2,2,2,2,2,2, # 40 - 47
- 2,2,2,2,2,2,2,2, # 48 - 4f
- 2,2,2,2,2,2,2,2, # 50 - 57
- 2,2,2,2,2,2,2,2, # 58 - 5f
- 2,2,2,2,2,2,2,2, # 60 - 67
- 2,2,2,2,2,2,2,2, # 68 - 6f
- 2,2,2,2,2,2,2,2, # 70 - 77
- 2,2,2,2,2,2,2,2, # 78 - 7f
- 0,0,0,0,0,0,0,0, # 80 - 87
- 0,0,0,0,0,0,6,0, # 88 - 8f
- 0,0,0,0,0,0,0,0, # 90 - 97
- 0,0,0,0,0,0,0,0, # 98 - 9f
- 0,3,4,4,4,4,4,4, # a0 - a7
- 5,5,1,1,1,1,1,1, # a8 - af
- 1,1,1,1,1,1,1,1, # b0 - b7
- 1,1,1,1,1,1,1,1, # b8 - bf
- 1,1,3,1,3,3,3,3, # c0 - c7
- 3,3,3,3,3,3,3,3, # c8 - cf
- 3,3,3,3,3,3,3,3, # d0 - d7
- 3,3,3,3,3,3,3,3, # d8 - df
- 3,3,3,3,3,3,3,3, # e0 - e7
- 3,3,3,3,3,3,3,3, # e8 - ef
- 3,3,3,3,3,3,3,3, # f0 - f7
- 3,3,3,3,3,3,3,0 # f8 - ff
+ 2, 2, 2, 2, 2, 2, 2, 2, # 00 - 07
+ 2, 2, 2, 2, 2, 2, 0, 0, # 08 - 0f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 10 - 17
+ 2, 2, 2, 0, 2, 2, 2, 2, # 18 - 1f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 20 - 27
+ 2, 2, 2, 2, 2, 2, 2, 2, # 28 - 2f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 30 - 37
+ 2, 2, 2, 2, 2, 2, 2, 2, # 38 - 3f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
+ 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
+ 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
+ 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
+ 2, 2, 2, 2, 2, 2, 2, 2, # 78 - 7f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
+ 0, 0, 0, 0, 0, 0, 6, 0, # 88 - 8f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
+ 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
+ 0, 3, 4, 4, 4, 4, 4, 4, # a0 - a7
+ 5, 5, 1, 1, 1, 1, 1, 1, # a8 - af
+ 1, 1, 1, 1, 1, 1, 1, 1, # b0 - b7
+ 1, 1, 1, 1, 1, 1, 1, 1, # b8 - bf
+ 1, 1, 3, 1, 3, 3, 3, 3, # c0 - c7
+ 3, 3, 3, 3, 3, 3, 3, 3, # c8 - cf
+ 3, 3, 3, 3, 3, 3, 3, 3, # d0 - d7
+ 3, 3, 3, 3, 3, 3, 3, 3, # d8 - df
+ 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7
+ 3, 3, 3, 3, 3, 3, 3, 3, # e8 - ef
+ 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7
+ 3, 3, 3, 3, 3, 3, 3, 0 # f8 - ff
)
EUCTW_ST = (
@@ -266,50 +336,53 @@ EUCTW_ST = (
5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27
MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
)
+# fmt: on
EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
-EUCTW_SM_MODEL = {'class_table': EUCTW_CLS,
- 'class_factor': 7,
- 'state_table': EUCTW_ST,
- 'char_len_table': EUCTW_CHAR_LEN_TABLE,
- 'name': 'x-euc-tw'}
+EUCTW_SM_MODEL = {
+ "class_table": EUCTW_CLS,
+ "class_factor": 7,
+ "state_table": EUCTW_ST,
+ "char_len_table": EUCTW_CHAR_LEN_TABLE,
+ "name": "x-euc-tw",
+}
# GB2312
-
+# fmt: off
GB2312_CLS = (
- 1,1,1,1,1,1,1,1, # 00 - 07
- 1,1,1,1,1,1,0,0, # 08 - 0f
- 1,1,1,1,1,1,1,1, # 10 - 17
- 1,1,1,0,1,1,1,1, # 18 - 1f
- 1,1,1,1,1,1,1,1, # 20 - 27
- 1,1,1,1,1,1,1,1, # 28 - 2f
- 3,3,3,3,3,3,3,3, # 30 - 37
- 3,3,1,1,1,1,1,1, # 38 - 3f
- 2,2,2,2,2,2,2,2, # 40 - 47
- 2,2,2,2,2,2,2,2, # 48 - 4f
- 2,2,2,2,2,2,2,2, # 50 - 57
- 2,2,2,2,2,2,2,2, # 58 - 5f
- 2,2,2,2,2,2,2,2, # 60 - 67
- 2,2,2,2,2,2,2,2, # 68 - 6f
- 2,2,2,2,2,2,2,2, # 70 - 77
- 2,2,2,2,2,2,2,4, # 78 - 7f
- 5,6,6,6,6,6,6,6, # 80 - 87
- 6,6,6,6,6,6,6,6, # 88 - 8f
- 6,6,6,6,6,6,6,6, # 90 - 97
- 6,6,6,6,6,6,6,6, # 98 - 9f
- 6,6,6,6,6,6,6,6, # a0 - a7
- 6,6,6,6,6,6,6,6, # a8 - af
- 6,6,6,6,6,6,6,6, # b0 - b7
- 6,6,6,6,6,6,6,6, # b8 - bf
- 6,6,6,6,6,6,6,6, # c0 - c7
- 6,6,6,6,6,6,6,6, # c8 - cf
- 6,6,6,6,6,6,6,6, # d0 - d7
- 6,6,6,6,6,6,6,6, # d8 - df
- 6,6,6,6,6,6,6,6, # e0 - e7
- 6,6,6,6,6,6,6,6, # e8 - ef
- 6,6,6,6,6,6,6,6, # f0 - f7
- 6,6,6,6,6,6,6,0 # f8 - ff
+ 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07
+ 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
+ 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
+ 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
+ 3, 3, 3, 3, 3, 3, 3, 3, # 30 - 37
+ 3, 3, 1, 1, 1, 1, 1, 1, # 38 - 3f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
+ 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
+ 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
+ 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
+ 2, 2, 2, 2, 2, 2, 2, 4, # 78 - 7f
+ 5, 6, 6, 6, 6, 6, 6, 6, # 80 - 87
+ 6, 6, 6, 6, 6, 6, 6, 6, # 88 - 8f
+ 6, 6, 6, 6, 6, 6, 6, 6, # 90 - 97
+ 6, 6, 6, 6, 6, 6, 6, 6, # 98 - 9f
+ 6, 6, 6, 6, 6, 6, 6, 6, # a0 - a7
+ 6, 6, 6, 6, 6, 6, 6, 6, # a8 - af
+ 6, 6, 6, 6, 6, 6, 6, 6, # b0 - b7
+ 6, 6, 6, 6, 6, 6, 6, 6, # b8 - bf
+ 6, 6, 6, 6, 6, 6, 6, 6, # c0 - c7
+ 6, 6, 6, 6, 6, 6, 6, 6, # c8 - cf
+ 6, 6, 6, 6, 6, 6, 6, 6, # d0 - d7
+ 6, 6, 6, 6, 6, 6, 6, 6, # d8 - df
+ 6, 6, 6, 6, 6, 6, 6, 6, # e0 - e7
+ 6, 6, 6, 6, 6, 6, 6, 6, # e8 - ef
+ 6, 6, 6, 6, 6, 6, 6, 6, # f0 - f7
+ 6, 6, 6, 6, 6, 6, 6, 0 # f8 - ff
)
GB2312_ST = (
@@ -320,6 +393,7 @@ GB2312_ST = (
MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
)
+# fmt: on
# To be accurate, the length of class 6 can be either 2 or 4.
# But it is not necessary to discriminate between the two since
@@ -328,100 +402,105 @@ GB2312_ST = (
# 2 here.
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
-GB2312_SM_MODEL = {'class_table': GB2312_CLS,
- 'class_factor': 7,
- 'state_table': GB2312_ST,
- 'char_len_table': GB2312_CHAR_LEN_TABLE,
- 'name': 'GB2312'}
+GB2312_SM_MODEL = {
+ "class_table": GB2312_CLS,
+ "class_factor": 7,
+ "state_table": GB2312_ST,
+ "char_len_table": GB2312_CHAR_LEN_TABLE,
+ "name": "GB2312",
+}
# Shift_JIS
-
+# fmt: off
SJIS_CLS = (
- 1,1,1,1,1,1,1,1, # 00 - 07
- 1,1,1,1,1,1,0,0, # 08 - 0f
- 1,1,1,1,1,1,1,1, # 10 - 17
- 1,1,1,0,1,1,1,1, # 18 - 1f
- 1,1,1,1,1,1,1,1, # 20 - 27
- 1,1,1,1,1,1,1,1, # 28 - 2f
- 1,1,1,1,1,1,1,1, # 30 - 37
- 1,1,1,1,1,1,1,1, # 38 - 3f
- 2,2,2,2,2,2,2,2, # 40 - 47
- 2,2,2,2,2,2,2,2, # 48 - 4f
- 2,2,2,2,2,2,2,2, # 50 - 57
- 2,2,2,2,2,2,2,2, # 58 - 5f
- 2,2,2,2,2,2,2,2, # 60 - 67
- 2,2,2,2,2,2,2,2, # 68 - 6f
- 2,2,2,2,2,2,2,2, # 70 - 77
- 2,2,2,2,2,2,2,1, # 78 - 7f
- 3,3,3,3,3,2,2,3, # 80 - 87
- 3,3,3,3,3,3,3,3, # 88 - 8f
- 3,3,3,3,3,3,3,3, # 90 - 97
- 3,3,3,3,3,3,3,3, # 98 - 9f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07
+ 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
+ 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
+ 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
+ 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
+ 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
+ 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
+ 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
+ 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
+ 2, 2, 2, 2, 2, 2, 2, 1, # 78 - 7f
+ 3, 3, 3, 3, 3, 2, 2, 3, # 80 - 87
+ 3, 3, 3, 3, 3, 3, 3, 3, # 88 - 8f
+ 3, 3, 3, 3, 3, 3, 3, 3, # 90 - 97
+ 3, 3, 3, 3, 3, 3, 3, 3, # 98 - 9f
#0xa0 is illegal in sjis encoding, but some pages does
#contain such byte. We need to be more error forgiven.
- 2,2,2,2,2,2,2,2, # a0 - a7
- 2,2,2,2,2,2,2,2, # a8 - af
- 2,2,2,2,2,2,2,2, # b0 - b7
- 2,2,2,2,2,2,2,2, # b8 - bf
- 2,2,2,2,2,2,2,2, # c0 - c7
- 2,2,2,2,2,2,2,2, # c8 - cf
- 2,2,2,2,2,2,2,2, # d0 - d7
- 2,2,2,2,2,2,2,2, # d8 - df
- 3,3,3,3,3,3,3,3, # e0 - e7
- 3,3,3,3,3,4,4,4, # e8 - ef
- 3,3,3,3,3,3,3,3, # f0 - f7
- 3,3,3,3,3,0,0,0) # f8 - ff
-
+ 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
+ 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
+ 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
+ 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
+ 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
+ 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
+ 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
+ 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
+ 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7
+ 3, 3, 3, 3, 3, 4, 4, 4, # e8 - ef
+ 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7
+ 3, 3, 3, 3, 3, 0, 0, 0, # f8 - ff
+)
SJIS_ST = (
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17
)
+# fmt: on
SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
-SJIS_SM_MODEL = {'class_table': SJIS_CLS,
- 'class_factor': 6,
- 'state_table': SJIS_ST,
- 'char_len_table': SJIS_CHAR_LEN_TABLE,
- 'name': 'Shift_JIS'}
+SJIS_SM_MODEL = {
+ "class_table": SJIS_CLS,
+ "class_factor": 6,
+ "state_table": SJIS_ST,
+ "char_len_table": SJIS_CHAR_LEN_TABLE,
+ "name": "Shift_JIS",
+}
# UCS2-BE
-
+# fmt: off
UCS2BE_CLS = (
- 0,0,0,0,0,0,0,0, # 00 - 07
- 0,0,1,0,0,2,0,0, # 08 - 0f
- 0,0,0,0,0,0,0,0, # 10 - 17
- 0,0,0,3,0,0,0,0, # 18 - 1f
- 0,0,0,0,0,0,0,0, # 20 - 27
- 0,3,3,3,3,3,0,0, # 28 - 2f
- 0,0,0,0,0,0,0,0, # 30 - 37
- 0,0,0,0,0,0,0,0, # 38 - 3f
- 0,0,0,0,0,0,0,0, # 40 - 47
- 0,0,0,0,0,0,0,0, # 48 - 4f
- 0,0,0,0,0,0,0,0, # 50 - 57
- 0,0,0,0,0,0,0,0, # 58 - 5f
- 0,0,0,0,0,0,0,0, # 60 - 67
- 0,0,0,0,0,0,0,0, # 68 - 6f
- 0,0,0,0,0,0,0,0, # 70 - 77
- 0,0,0,0,0,0,0,0, # 78 - 7f
- 0,0,0,0,0,0,0,0, # 80 - 87
- 0,0,0,0,0,0,0,0, # 88 - 8f
- 0,0,0,0,0,0,0,0, # 90 - 97
- 0,0,0,0,0,0,0,0, # 98 - 9f
- 0,0,0,0,0,0,0,0, # a0 - a7
- 0,0,0,0,0,0,0,0, # a8 - af
- 0,0,0,0,0,0,0,0, # b0 - b7
- 0,0,0,0,0,0,0,0, # b8 - bf
- 0,0,0,0,0,0,0,0, # c0 - c7
- 0,0,0,0,0,0,0,0, # c8 - cf
- 0,0,0,0,0,0,0,0, # d0 - d7
- 0,0,0,0,0,0,0,0, # d8 - df
- 0,0,0,0,0,0,0,0, # e0 - e7
- 0,0,0,0,0,0,0,0, # e8 - ef
- 0,0,0,0,0,0,0,0, # f0 - f7
- 0,0,0,0,0,0,4,5 # f8 - ff
+ 0, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
+ 0, 0, 1, 0, 0, 2, 0, 0, # 08 - 0f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
+ 0, 0, 0, 3, 0, 0, 0, 0, # 18 - 1f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27
+ 0, 3, 3, 3, 3, 3, 0, 0, # 28 - 2f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
+ 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47
+ 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
+ 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
+ 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
+ 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
+ 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
+ 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
+ 0, 0, 0, 0, 0, 0, 0, 0, # a0 - a7
+ 0, 0, 0, 0, 0, 0, 0, 0, # a8 - af
+ 0, 0, 0, 0, 0, 0, 0, 0, # b0 - b7
+ 0, 0, 0, 0, 0, 0, 0, 0, # b8 - bf
+ 0, 0, 0, 0, 0, 0, 0, 0, # c0 - c7
+ 0, 0, 0, 0, 0, 0, 0, 0, # c8 - cf
+ 0, 0, 0, 0, 0, 0, 0, 0, # d0 - d7
+ 0, 0, 0, 0, 0, 0, 0, 0, # d8 - df
+ 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7
+ 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef
+ 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7
+ 0, 0, 0, 0, 0, 0, 4, 5 # f8 - ff
)
UCS2BE_ST = (
@@ -433,50 +512,53 @@ UCS2BE_ST = (
5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f
6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37
)
+# fmt: on
UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
-UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS,
- 'class_factor': 6,
- 'state_table': UCS2BE_ST,
- 'char_len_table': UCS2BE_CHAR_LEN_TABLE,
- 'name': 'UTF-16BE'}
+UCS2BE_SM_MODEL = {
+ "class_table": UCS2BE_CLS,
+ "class_factor": 6,
+ "state_table": UCS2BE_ST,
+ "char_len_table": UCS2BE_CHAR_LEN_TABLE,
+ "name": "UTF-16BE",
+}
# UCS2-LE
-
+# fmt: off
UCS2LE_CLS = (
- 0,0,0,0,0,0,0,0, # 00 - 07
- 0,0,1,0,0,2,0,0, # 08 - 0f
- 0,0,0,0,0,0,0,0, # 10 - 17
- 0,0,0,3,0,0,0,0, # 18 - 1f
- 0,0,0,0,0,0,0,0, # 20 - 27
- 0,3,3,3,3,3,0,0, # 28 - 2f
- 0,0,0,0,0,0,0,0, # 30 - 37
- 0,0,0,0,0,0,0,0, # 38 - 3f
- 0,0,0,0,0,0,0,0, # 40 - 47
- 0,0,0,0,0,0,0,0, # 48 - 4f
- 0,0,0,0,0,0,0,0, # 50 - 57
- 0,0,0,0,0,0,0,0, # 58 - 5f
- 0,0,0,0,0,0,0,0, # 60 - 67
- 0,0,0,0,0,0,0,0, # 68 - 6f
- 0,0,0,0,0,0,0,0, # 70 - 77
- 0,0,0,0,0,0,0,0, # 78 - 7f
- 0,0,0,0,0,0,0,0, # 80 - 87
- 0,0,0,0,0,0,0,0, # 88 - 8f
- 0,0,0,0,0,0,0,0, # 90 - 97
- 0,0,0,0,0,0,0,0, # 98 - 9f
- 0,0,0,0,0,0,0,0, # a0 - a7
- 0,0,0,0,0,0,0,0, # a8 - af
- 0,0,0,0,0,0,0,0, # b0 - b7
- 0,0,0,0,0,0,0,0, # b8 - bf
- 0,0,0,0,0,0,0,0, # c0 - c7
- 0,0,0,0,0,0,0,0, # c8 - cf
- 0,0,0,0,0,0,0,0, # d0 - d7
- 0,0,0,0,0,0,0,0, # d8 - df
- 0,0,0,0,0,0,0,0, # e0 - e7
- 0,0,0,0,0,0,0,0, # e8 - ef
- 0,0,0,0,0,0,0,0, # f0 - f7
- 0,0,0,0,0,0,4,5 # f8 - ff
+ 0, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
+ 0, 0, 1, 0, 0, 2, 0, 0, # 08 - 0f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
+ 0, 0, 0, 3, 0, 0, 0, 0, # 18 - 1f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27
+ 0, 3, 3, 3, 3, 3, 0, 0, # 28 - 2f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
+ 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47
+ 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
+ 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
+ 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
+ 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
+ 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f
+ 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
+ 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
+ 0, 0, 0, 0, 0, 0, 0, 0, # a0 - a7
+ 0, 0, 0, 0, 0, 0, 0, 0, # a8 - af
+ 0, 0, 0, 0, 0, 0, 0, 0, # b0 - b7
+ 0, 0, 0, 0, 0, 0, 0, 0, # b8 - bf
+ 0, 0, 0, 0, 0, 0, 0, 0, # c0 - c7
+ 0, 0, 0, 0, 0, 0, 0, 0, # c8 - cf
+ 0, 0, 0, 0, 0, 0, 0, 0, # d0 - d7
+ 0, 0, 0, 0, 0, 0, 0, 0, # d8 - df
+ 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7
+ 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef
+ 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7
+ 0, 0, 0, 0, 0, 0, 4, 5 # f8 - ff
)
UCS2LE_ST = (
@@ -488,50 +570,53 @@ UCS2LE_ST = (
5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37
)
+# fmt: on
UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
-UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS,
- 'class_factor': 6,
- 'state_table': UCS2LE_ST,
- 'char_len_table': UCS2LE_CHAR_LEN_TABLE,
- 'name': 'UTF-16LE'}
+UCS2LE_SM_MODEL = {
+ "class_table": UCS2LE_CLS,
+ "class_factor": 6,
+ "state_table": UCS2LE_ST,
+ "char_len_table": UCS2LE_CHAR_LEN_TABLE,
+ "name": "UTF-16LE",
+}
# UTF-8
-
+# fmt: off
UTF8_CLS = (
- 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
- 1,1,1,1,1,1,0,0, # 08 - 0f
- 1,1,1,1,1,1,1,1, # 10 - 17
- 1,1,1,0,1,1,1,1, # 18 - 1f
- 1,1,1,1,1,1,1,1, # 20 - 27
- 1,1,1,1,1,1,1,1, # 28 - 2f
- 1,1,1,1,1,1,1,1, # 30 - 37
- 1,1,1,1,1,1,1,1, # 38 - 3f
- 1,1,1,1,1,1,1,1, # 40 - 47
- 1,1,1,1,1,1,1,1, # 48 - 4f
- 1,1,1,1,1,1,1,1, # 50 - 57
- 1,1,1,1,1,1,1,1, # 58 - 5f
- 1,1,1,1,1,1,1,1, # 60 - 67
- 1,1,1,1,1,1,1,1, # 68 - 6f
- 1,1,1,1,1,1,1,1, # 70 - 77
- 1,1,1,1,1,1,1,1, # 78 - 7f
- 2,2,2,2,3,3,3,3, # 80 - 87
- 4,4,4,4,4,4,4,4, # 88 - 8f
- 4,4,4,4,4,4,4,4, # 90 - 97
- 4,4,4,4,4,4,4,4, # 98 - 9f
- 5,5,5,5,5,5,5,5, # a0 - a7
- 5,5,5,5,5,5,5,5, # a8 - af
- 5,5,5,5,5,5,5,5, # b0 - b7
- 5,5,5,5,5,5,5,5, # b8 - bf
- 0,0,6,6,6,6,6,6, # c0 - c7
- 6,6,6,6,6,6,6,6, # c8 - cf
- 6,6,6,6,6,6,6,6, # d0 - d7
- 6,6,6,6,6,6,6,6, # d8 - df
- 7,8,8,8,8,8,8,8, # e0 - e7
- 8,8,8,8,8,9,8,8, # e8 - ef
- 10,11,11,11,11,11,11,11, # f0 - f7
- 12,13,13,13,14,15,0,0 # f8 - ff
+ 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 #allow 0x00 as a legal value
+ 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
+ 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
+ 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
+ 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 40 - 47
+ 1, 1, 1, 1, 1, 1, 1, 1, # 48 - 4f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 50 - 57
+ 1, 1, 1, 1, 1, 1, 1, 1, # 58 - 5f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 60 - 67
+ 1, 1, 1, 1, 1, 1, 1, 1, # 68 - 6f
+ 1, 1, 1, 1, 1, 1, 1, 1, # 70 - 77
+ 1, 1, 1, 1, 1, 1, 1, 1, # 78 - 7f
+ 2, 2, 2, 2, 3, 3, 3, 3, # 80 - 87
+ 4, 4, 4, 4, 4, 4, 4, 4, # 88 - 8f
+ 4, 4, 4, 4, 4, 4, 4, 4, # 90 - 97
+ 4, 4, 4, 4, 4, 4, 4, 4, # 98 - 9f
+ 5, 5, 5, 5, 5, 5, 5, 5, # a0 - a7
+ 5, 5, 5, 5, 5, 5, 5, 5, # a8 - af
+ 5, 5, 5, 5, 5, 5, 5, 5, # b0 - b7
+ 5, 5, 5, 5, 5, 5, 5, 5, # b8 - bf
+ 0, 0, 6, 6, 6, 6, 6, 6, # c0 - c7
+ 6, 6, 6, 6, 6, 6, 6, 6, # c8 - cf
+ 6, 6, 6, 6, 6, 6, 6, 6, # d0 - d7
+ 6, 6, 6, 6, 6, 6, 6, 6, # d8 - df
+ 7, 8, 8, 8, 8, 8, 8, 8, # e0 - e7
+ 8, 8, 8, 8, 8, 9, 8, 8, # e8 - ef
+ 10, 11, 11, 11, 11, 11, 11, 11, # f0 - f7
+ 12, 13, 13, 13, 14, 15, 0, 0 # f8 - ff
)
UTF8_ST = (
@@ -562,11 +647,14 @@ UTF8_ST = (
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf
)
+# fmt: on
UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
-UTF8_SM_MODEL = {'class_table': UTF8_CLS,
- 'class_factor': 16,
- 'state_table': UTF8_ST,
- 'char_len_table': UTF8_CHAR_LEN_TABLE,
- 'name': 'UTF-8'}
+UTF8_SM_MODEL = {
+ "class_table": UTF8_CLS,
+ "class_factor": 16,
+ "state_table": UTF8_ST,
+ "char_len_table": UTF8_CHAR_LEN_TABLE,
+ "name": "UTF-8",
+}
diff --git a/libs/chardet/metadata/languages.py b/libs/chardet/metadata/languages.py
index 3237d5abf..1d37884c3 100644
--- a/libs/chardet/metadata/languages.py
+++ b/libs/chardet/metadata/languages.py
@@ -1,19 +1,16 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
"""
Metadata about languages used by our model training code for our
SingleByteCharSetProbers. Could be used for other things in the future.
This code is based on the language metadata from the uchardet project.
"""
-from __future__ import absolute_import, print_function
from string import ascii_letters
+# TODO: Add Ukrainian (KOI8-U)
-# TODO: Add Ukranian (KOI8-U)
-class Language(object):
+class Language:
"""Metadata about a language useful for training models
:ivar name: The human name for the language, in English.
@@ -33,9 +30,17 @@ class Language(object):
Wikipedia for training data.
:type wiki_start_pages: list of str
"""
- def __init__(self, name=None, iso_code=None, use_ascii=True, charsets=None,
- alphabet=None, wiki_start_pages=None):
- super(Language, self).__init__()
+
+ def __init__(
+ self,
+ name=None,
+ iso_code=None,
+ use_ascii=True,
+ charsets=None,
+ alphabet=None,
+ wiki_start_pages=None,
+ ):
+ super().__init__()
self.name = name
self.iso_code = iso_code
self.use_ascii = use_ascii
@@ -46,265 +51,301 @@ class Language(object):
else:
alphabet = ascii_letters
elif not alphabet:
- raise ValueError('Must supply alphabet if use_ascii is False')
- self.alphabet = ''.join(sorted(set(alphabet))) if alphabet else None
+ raise ValueError("Must supply alphabet if use_ascii is False")
+ self.alphabet = "".join(sorted(set(alphabet))) if alphabet else None
self.wiki_start_pages = wiki_start_pages
def __repr__(self):
- return '{}({})'.format(self.__class__.__name__,
- ', '.join('{}={!r}'.format(k, v)
- for k, v in self.__dict__.items()
- if not k.startswith('_')))
+ param_str = ", ".join(
+ f"{k}={v!r}" for k, v in self.__dict__.items() if not k.startswith("_")
+ )
+ return f"{self.__class__.__name__}({param_str})"
-LANGUAGES = {'Arabic': Language(name='Arabic',
- iso_code='ar',
- use_ascii=False,
- # We only support encodings that use isolated
- # forms, because the current recommendation is
- # that the rendering system handles presentation
- # forms. This means we purposefully skip IBM864.
- charsets=['ISO-8859-6', 'WINDOWS-1256',
- 'CP720', 'CP864'],
- alphabet=u'ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿـفقكلمنهوىيًٌٍَُِّ',
- wiki_start_pages=[u'الصفحة_الرئيسية']),
- 'Belarusian': Language(name='Belarusian',
- iso_code='be',
- use_ascii=False,
- charsets=['ISO-8859-5', 'WINDOWS-1251',
- 'IBM866', 'MacCyrillic'],
- alphabet=(u'АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЫЬЭЮЯ'
- u'абвгдеёжзійклмнопрстуўфхцчшыьэюяʼ'),
- wiki_start_pages=[u'Галоўная_старонка']),
- 'Bulgarian': Language(name='Bulgarian',
- iso_code='bg',
- use_ascii=False,
- charsets=['ISO-8859-5', 'WINDOWS-1251',
- 'IBM855'],
- alphabet=(u'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯ'
- u'абвгдежзийклмнопрстуфхцчшщъьюя'),
- wiki_start_pages=[u'Начална_страница']),
- 'Czech': Language(name='Czech',
- iso_code='cz',
- use_ascii=True,
- charsets=['ISO-8859-2', 'WINDOWS-1250'],
- alphabet=u'áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ',
- wiki_start_pages=[u'Hlavní_strana']),
- 'Danish': Language(name='Danish',
- iso_code='da',
- use_ascii=True,
- charsets=['ISO-8859-1', 'ISO-8859-15',
- 'WINDOWS-1252'],
- alphabet=u'æøåÆØÅ',
- wiki_start_pages=[u'Forside']),
- 'German': Language(name='German',
- iso_code='de',
- use_ascii=True,
- charsets=['ISO-8859-1', 'WINDOWS-1252'],
- alphabet=u'äöüßÄÖÜ',
- wiki_start_pages=[u'Wikipedia:Hauptseite']),
- 'Greek': Language(name='Greek',
- iso_code='el',
- use_ascii=False,
- charsets=['ISO-8859-7', 'WINDOWS-1253'],
- alphabet=(u'αβγδεζηθικλμνξοπρσςτυφχψωάέήίόύώ'
- u'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΣΤΥΦΧΨΩΆΈΉΊΌΎΏ'),
- wiki_start_pages=[u'Πύλη:Κύρια']),
- 'English': Language(name='English',
- iso_code='en',
- use_ascii=True,
- charsets=['ISO-8859-1', 'WINDOWS-1252'],
- wiki_start_pages=[u'Main_Page']),
- 'Esperanto': Language(name='Esperanto',
- iso_code='eo',
- # Q, W, X, and Y not used at all
- use_ascii=False,
- charsets=['ISO-8859-3'],
- alphabet=(u'abcĉdefgĝhĥijĵklmnoprsŝtuŭvz'
- u'ABCĈDEFGĜHĤIJĴKLMNOPRSŜTUŬVZ'),
- wiki_start_pages=[u'Vikipedio:Ĉefpaĝo']),
- 'Spanish': Language(name='Spanish',
- iso_code='es',
- use_ascii=True,
- charsets=['ISO-8859-1', 'ISO-8859-15',
- 'WINDOWS-1252'],
- alphabet=u'ñáéíóúüÑÁÉÍÓÚÜ',
- wiki_start_pages=[u'Wikipedia:Portada']),
- 'Estonian': Language(name='Estonian',
- iso_code='et',
- use_ascii=False,
- charsets=['ISO-8859-4', 'ISO-8859-13',
- 'WINDOWS-1257'],
- # C, F, Š, Q, W, X, Y, Z, Ž are only for
- # loanwords
- alphabet=(u'ABDEGHIJKLMNOPRSTUVÕÄÖÜ'
- u'abdeghijklmnoprstuvõäöü'),
- wiki_start_pages=[u'Esileht']),
- 'Finnish': Language(name='Finnish',
- iso_code='fi',
- use_ascii=True,
- charsets=['ISO-8859-1', 'ISO-8859-15',
- 'WINDOWS-1252'],
- alphabet=u'ÅÄÖŠŽåäöšž',
- wiki_start_pages=[u'Wikipedia:Etusivu']),
- 'French': Language(name='French',
- iso_code='fr',
- use_ascii=True,
- charsets=['ISO-8859-1', 'ISO-8859-15',
- 'WINDOWS-1252'],
- alphabet=u'œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ',
- wiki_start_pages=[u'Wikipédia:Accueil_principal',
- u'Bœuf (animal)']),
- 'Hebrew': Language(name='Hebrew',
- iso_code='he',
- use_ascii=False,
- charsets=['ISO-8859-8', 'WINDOWS-1255'],
- alphabet=u'אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ',
- wiki_start_pages=[u'עמוד_ראשי']),
- 'Croatian': Language(name='Croatian',
- iso_code='hr',
- # Q, W, X, Y are only used for foreign words.
- use_ascii=False,
- charsets=['ISO-8859-2', 'WINDOWS-1250'],
- alphabet=(u'abcčćdđefghijklmnoprsštuvzž'
- u'ABCČĆDĐEFGHIJKLMNOPRSŠTUVZŽ'),
- wiki_start_pages=[u'Glavna_stranica']),
- 'Hungarian': Language(name='Hungarian',
- iso_code='hu',
- # Q, W, X, Y are only used for foreign words.
- use_ascii=False,
- charsets=['ISO-8859-2', 'WINDOWS-1250'],
- alphabet=(u'abcdefghijklmnoprstuvzáéíóöőúüű'
- u'ABCDEFGHIJKLMNOPRSTUVZÁÉÍÓÖŐÚÜŰ'),
- wiki_start_pages=[u'Kezdőlap']),
- 'Italian': Language(name='Italian',
- iso_code='it',
- use_ascii=True,
- charsets=['ISO-8859-1', 'ISO-8859-15',
- 'WINDOWS-1252'],
- alphabet=u'ÀÈÉÌÒÓÙàèéìòóù',
- wiki_start_pages=[u'Pagina_principale']),
- 'Lithuanian': Language(name='Lithuanian',
- iso_code='lt',
- use_ascii=False,
- charsets=['ISO-8859-13', 'WINDOWS-1257',
- 'ISO-8859-4'],
- # Q, W, and X not used at all
- alphabet=(u'AĄBCČDEĘĖFGHIĮYJKLMNOPRSŠTUŲŪVZŽ'
- u'aąbcčdeęėfghiįyjklmnoprsštuųūvzž'),
- wiki_start_pages=[u'Pagrindinis_puslapis']),
- 'Latvian': Language(name='Latvian',
- iso_code='lv',
- use_ascii=False,
- charsets=['ISO-8859-13', 'WINDOWS-1257',
- 'ISO-8859-4'],
- # Q, W, X, Y are only for loanwords
- alphabet=(u'AĀBCČDEĒFGĢHIĪJKĶLĻMNŅOPRSŠTUŪVZŽ'
- u'aābcčdeēfgģhiījkķlļmnņoprsštuūvzž'),
- wiki_start_pages=[u'Sākumlapa']),
- 'Macedonian': Language(name='Macedonian',
- iso_code='mk',
- use_ascii=False,
- charsets=['ISO-8859-5', 'WINDOWS-1251',
- 'MacCyrillic', 'IBM855'],
- alphabet=(u'АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШ'
- u'абвгдѓежзѕијклљмнњопрстќуфхцчџш'),
- wiki_start_pages=[u'Главна_страница']),
- 'Dutch': Language(name='Dutch',
- iso_code='nl',
- use_ascii=True,
- charsets=['ISO-8859-1', 'WINDOWS-1252'],
- wiki_start_pages=[u'Hoofdpagina']),
- 'Polish': Language(name='Polish',
- iso_code='pl',
- # Q and X are only used for foreign words.
- use_ascii=False,
- charsets=['ISO-8859-2', 'WINDOWS-1250'],
- alphabet=(u'AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ'
- u'aąbcćdeęfghijklłmnńoóprsśtuwyzźż'),
- wiki_start_pages=[u'Wikipedia:Strona_główna']),
- 'Portuguese': Language(name='Portuguese',
- iso_code='pt',
- use_ascii=True,
- charsets=['ISO-8859-1', 'ISO-8859-15',
- 'WINDOWS-1252'],
- alphabet=u'ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú',
- wiki_start_pages=[u'Wikipédia:Página_principal']),
- 'Romanian': Language(name='Romanian',
- iso_code='ro',
- use_ascii=True,
- charsets=['ISO-8859-2', 'WINDOWS-1250'],
- alphabet=u'ăâîșțĂÂÎȘȚ',
- wiki_start_pages=[u'Pagina_principală']),
- 'Russian': Language(name='Russian',
- iso_code='ru',
- use_ascii=False,
- charsets=['ISO-8859-5', 'WINDOWS-1251',
- 'KOI8-R', 'MacCyrillic', 'IBM866',
- 'IBM855'],
- alphabet=(u'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'
- u'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'),
- wiki_start_pages=[u'Заглавная_страница']),
- 'Slovak': Language(name='Slovak',
- iso_code='sk',
- use_ascii=True,
- charsets=['ISO-8859-2', 'WINDOWS-1250'],
- alphabet=u'áäčďéíĺľňóôŕšťúýžÁÄČĎÉÍĹĽŇÓÔŔŠŤÚÝŽ',
- wiki_start_pages=[u'Hlavná_stránka']),
- 'Slovene': Language(name='Slovene',
- iso_code='sl',
- # Q, W, X, Y are only used for foreign words.
- use_ascii=False,
- charsets=['ISO-8859-2', 'WINDOWS-1250'],
- alphabet=(u'abcčdefghijklmnoprsštuvzž'
- u'ABCČDEFGHIJKLMNOPRSŠTUVZŽ'),
- wiki_start_pages=[u'Glavna_stran']),
- # Serbian can be written in both Latin and Cyrillic, but there's no
- # simple way to get the Latin alphabet pages from Wikipedia through
- # the API, so for now we just support Cyrillic.
- 'Serbian': Language(name='Serbian',
- iso_code='sr',
- alphabet=(u'АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШ'
- u'абвгдђежзијклљмнњопрстћуфхцчџш'),
- charsets=['ISO-8859-5', 'WINDOWS-1251',
- 'MacCyrillic', 'IBM855'],
- wiki_start_pages=[u'Главна_страна']),
- 'Thai': Language(name='Thai',
- iso_code='th',
- use_ascii=False,
- charsets=['ISO-8859-11', 'TIS-620', 'CP874'],
- alphabet=u'กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛',
- wiki_start_pages=[u'หน้าหลัก']),
- 'Turkish': Language(name='Turkish',
- iso_code='tr',
- # Q, W, and X are not used by Turkish
- use_ascii=False,
- charsets=['ISO-8859-3', 'ISO-8859-9',
- 'WINDOWS-1254'],
- alphabet=(u'abcçdefgğhıijklmnoöprsştuüvyzâîû'
- u'ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZÂÎÛ'),
- wiki_start_pages=[u'Ana_Sayfa']),
- 'Vietnamese': Language(name='Vietnamese',
- iso_code='vi',
- use_ascii=False,
- # Windows-1258 is the only common 8-bit
- # Vietnamese encoding supported by Python.
- # From Wikipedia:
- # For systems that lack support for Unicode,
- # dozens of 8-bit Vietnamese code pages are
- # available.[1] The most common are VISCII
- # (TCVN 5712:1993), VPS, and Windows-1258.[3]
- # Where ASCII is required, such as when
- # ensuring readability in plain text e-mail,
- # Vietnamese letters are often encoded
- # according to Vietnamese Quoted-Readable
- # (VIQR) or VSCII Mnemonic (VSCII-MNEM),[4]
- # though usage of either variable-width
- # scheme has declined dramatically following
- # the adoption of Unicode on the World Wide
- # Web.
- charsets=['WINDOWS-1258'],
- alphabet=(u'aăâbcdđeêghiklmnoôơpqrstuưvxy'
- u'AĂÂBCDĐEÊGHIKLMNOÔƠPQRSTUƯVXY'),
- wiki_start_pages=[u'Chữ_Quốc_ngữ']),
- }
+LANGUAGES = {
+ "Arabic": Language(
+ name="Arabic",
+ iso_code="ar",
+ use_ascii=False,
+ # We only support encodings that use isolated
+ # forms, because the current recommendation is
+ # that the rendering system handles presentation
+ # forms. This means we purposefully skip IBM864.
+ charsets=["ISO-8859-6", "WINDOWS-1256", "CP720", "CP864"],
+ alphabet="ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿـفقكلمنهوىيًٌٍَُِّ",
+ wiki_start_pages=["الصفحة_الرئيسية"],
+ ),
+ "Belarusian": Language(
+ name="Belarusian",
+ iso_code="be",
+ use_ascii=False,
+ charsets=["ISO-8859-5", "WINDOWS-1251", "IBM866", "MacCyrillic"],
+ alphabet="АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЫЬЭЮЯабвгдеёжзійклмнопрстуўфхцчшыьэюяʼ",
+ wiki_start_pages=["Галоўная_старонка"],
+ ),
+ "Bulgarian": Language(
+ name="Bulgarian",
+ iso_code="bg",
+ use_ascii=False,
+ charsets=["ISO-8859-5", "WINDOWS-1251", "IBM855"],
+ alphabet="АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя",
+ wiki_start_pages=["Начална_страница"],
+ ),
+ "Czech": Language(
+ name="Czech",
+ iso_code="cz",
+ use_ascii=True,
+ charsets=["ISO-8859-2", "WINDOWS-1250"],
+ alphabet="áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ",
+ wiki_start_pages=["Hlavní_strana"],
+ ),
+ "Danish": Language(
+ name="Danish",
+ iso_code="da",
+ use_ascii=True,
+ charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+ alphabet="æøåÆØÅ",
+ wiki_start_pages=["Forside"],
+ ),
+ "German": Language(
+ name="German",
+ iso_code="de",
+ use_ascii=True,
+ charsets=["ISO-8859-1", "WINDOWS-1252"],
+ alphabet="äöüßÄÖÜ",
+ wiki_start_pages=["Wikipedia:Hauptseite"],
+ ),
+ "Greek": Language(
+ name="Greek",
+ iso_code="el",
+ use_ascii=False,
+ charsets=["ISO-8859-7", "WINDOWS-1253"],
+ alphabet="αβγδεζηθικλμνξοπρσςτυφχψωάέήίόύώΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΣΤΥΦΧΨΩΆΈΉΊΌΎΏ",
+ wiki_start_pages=["Πύλη:Κύρια"],
+ ),
+ "English": Language(
+ name="English",
+ iso_code="en",
+ use_ascii=True,
+ charsets=["ISO-8859-1", "WINDOWS-1252"],
+ wiki_start_pages=["Main_Page"],
+ ),
+ "Esperanto": Language(
+ name="Esperanto",
+ iso_code="eo",
+ # Q, W, X, and Y not used at all
+ use_ascii=False,
+ charsets=["ISO-8859-3"],
+ alphabet="abcĉdefgĝhĥijĵklmnoprsŝtuŭvzABCĈDEFGĜHĤIJĴKLMNOPRSŜTUŬVZ",
+ wiki_start_pages=["Vikipedio:Ĉefpaĝo"],
+ ),
+ "Spanish": Language(
+ name="Spanish",
+ iso_code="es",
+ use_ascii=True,
+ charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+ alphabet="ñáéíóúüÑÁÉÍÓÚÜ",
+ wiki_start_pages=["Wikipedia:Portada"],
+ ),
+ "Estonian": Language(
+ name="Estonian",
+ iso_code="et",
+ use_ascii=False,
+ charsets=["ISO-8859-4", "ISO-8859-13", "WINDOWS-1257"],
+ # C, F, Š, Q, W, X, Y, Z, Ž are only for
+ # loanwords
+ alphabet="ABDEGHIJKLMNOPRSTUVÕÄÖÜabdeghijklmnoprstuvõäöü",
+ wiki_start_pages=["Esileht"],
+ ),
+ "Finnish": Language(
+ name="Finnish",
+ iso_code="fi",
+ use_ascii=True,
+ charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+ alphabet="ÅÄÖŠŽåäöšž",
+ wiki_start_pages=["Wikipedia:Etusivu"],
+ ),
+ "French": Language(
+ name="French",
+ iso_code="fr",
+ use_ascii=True,
+ charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+ alphabet="œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ",
+ wiki_start_pages=["Wikipédia:Accueil_principal", "Bœuf (animal)"],
+ ),
+ "Hebrew": Language(
+ name="Hebrew",
+ iso_code="he",
+ use_ascii=False,
+ charsets=["ISO-8859-8", "WINDOWS-1255"],
+ alphabet="אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ",
+ wiki_start_pages=["עמוד_ראשי"],
+ ),
+ "Croatian": Language(
+ name="Croatian",
+ iso_code="hr",
+ # Q, W, X, Y are only used for foreign words.
+ use_ascii=False,
+ charsets=["ISO-8859-2", "WINDOWS-1250"],
+ alphabet="abcčćdđefghijklmnoprsštuvzžABCČĆDĐEFGHIJKLMNOPRSŠTUVZŽ",
+ wiki_start_pages=["Glavna_stranica"],
+ ),
+ "Hungarian": Language(
+ name="Hungarian",
+ iso_code="hu",
+ # Q, W, X, Y are only used for foreign words.
+ use_ascii=False,
+ charsets=["ISO-8859-2", "WINDOWS-1250"],
+ alphabet="abcdefghijklmnoprstuvzáéíóöőúüűABCDEFGHIJKLMNOPRSTUVZÁÉÍÓÖŐÚÜŰ",
+ wiki_start_pages=["Kezdőlap"],
+ ),
+ "Italian": Language(
+ name="Italian",
+ iso_code="it",
+ use_ascii=True,
+ charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+ alphabet="ÀÈÉÌÒÓÙàèéìòóù",
+ wiki_start_pages=["Pagina_principale"],
+ ),
+ "Lithuanian": Language(
+ name="Lithuanian",
+ iso_code="lt",
+ use_ascii=False,
+ charsets=["ISO-8859-13", "WINDOWS-1257", "ISO-8859-4"],
+ # Q, W, and X not used at all
+ alphabet="AĄBCČDEĘĖFGHIĮYJKLMNOPRSŠTUŲŪVZŽaąbcčdeęėfghiįyjklmnoprsštuųūvzž",
+ wiki_start_pages=["Pagrindinis_puslapis"],
+ ),
+ "Latvian": Language(
+ name="Latvian",
+ iso_code="lv",
+ use_ascii=False,
+ charsets=["ISO-8859-13", "WINDOWS-1257", "ISO-8859-4"],
+ # Q, W, X, Y are only for loanwords
+ alphabet="AĀBCČDEĒFGĢHIĪJKĶLĻMNŅOPRSŠTUŪVZŽaābcčdeēfgģhiījkķlļmnņoprsštuūvzž",
+ wiki_start_pages=["Sākumlapa"],
+ ),
+ "Macedonian": Language(
+ name="Macedonian",
+ iso_code="mk",
+ use_ascii=False,
+ charsets=["ISO-8859-5", "WINDOWS-1251", "MacCyrillic", "IBM855"],
+ alphabet="АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШабвгдѓежзѕијклљмнњопрстќуфхцчџш",
+ wiki_start_pages=["Главна_страница"],
+ ),
+ "Dutch": Language(
+ name="Dutch",
+ iso_code="nl",
+ use_ascii=True,
+ charsets=["ISO-8859-1", "WINDOWS-1252"],
+ wiki_start_pages=["Hoofdpagina"],
+ ),
+ "Polish": Language(
+ name="Polish",
+ iso_code="pl",
+ # Q and X are only used for foreign words.
+ use_ascii=False,
+ charsets=["ISO-8859-2", "WINDOWS-1250"],
+ alphabet="AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻaąbcćdeęfghijklłmnńoóprsśtuwyzźż",
+ wiki_start_pages=["Wikipedia:Strona_główna"],
+ ),
+ "Portuguese": Language(
+ name="Portuguese",
+ iso_code="pt",
+ use_ascii=True,
+ charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+ alphabet="ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú",
+ wiki_start_pages=["Wikipédia:Página_principal"],
+ ),
+ "Romanian": Language(
+ name="Romanian",
+ iso_code="ro",
+ use_ascii=True,
+ charsets=["ISO-8859-2", "WINDOWS-1250"],
+ alphabet="ăâîșțĂÂÎȘȚ",
+ wiki_start_pages=["Pagina_principală"],
+ ),
+ "Russian": Language(
+ name="Russian",
+ iso_code="ru",
+ use_ascii=False,
+ charsets=[
+ "ISO-8859-5",
+ "WINDOWS-1251",
+ "KOI8-R",
+ "MacCyrillic",
+ "IBM866",
+ "IBM855",
+ ],
+ alphabet="абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ",
+ wiki_start_pages=["Заглавная_страница"],
+ ),
+ "Slovak": Language(
+ name="Slovak",
+ iso_code="sk",
+ use_ascii=True,
+ charsets=["ISO-8859-2", "WINDOWS-1250"],
+ alphabet="áäčďéíĺľňóôŕšťúýžÁÄČĎÉÍĹĽŇÓÔŔŠŤÚÝŽ",
+ wiki_start_pages=["Hlavná_stránka"],
+ ),
+ "Slovene": Language(
+ name="Slovene",
+ iso_code="sl",
+ # Q, W, X, Y are only used for foreign words.
+ use_ascii=False,
+ charsets=["ISO-8859-2", "WINDOWS-1250"],
+ alphabet="abcčdefghijklmnoprsštuvzžABCČDEFGHIJKLMNOPRSŠTUVZŽ",
+ wiki_start_pages=["Glavna_stran"],
+ ),
+ # Serbian can be written in both Latin and Cyrillic, but there's no
+ # simple way to get the Latin alphabet pages from Wikipedia through
+ # the API, so for now we just support Cyrillic.
+ "Serbian": Language(
+ name="Serbian",
+ iso_code="sr",
+ alphabet="АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШабвгдђежзијклљмнњопрстћуфхцчџш",
+ charsets=["ISO-8859-5", "WINDOWS-1251", "MacCyrillic", "IBM855"],
+ wiki_start_pages=["Главна_страна"],
+ ),
+ "Thai": Language(
+ name="Thai",
+ iso_code="th",
+ use_ascii=False,
+ charsets=["ISO-8859-11", "TIS-620", "CP874"],
+ alphabet="กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛",
+ wiki_start_pages=["หน้าหลัก"],
+ ),
+ "Turkish": Language(
+ name="Turkish",
+ iso_code="tr",
+ # Q, W, and X are not used by Turkish
+ use_ascii=False,
+ charsets=["ISO-8859-3", "ISO-8859-9", "WINDOWS-1254"],
+ alphabet="abcçdefgğhıijklmnoöprsştuüvyzâîûABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZÂÎÛ",
+ wiki_start_pages=["Ana_Sayfa"],
+ ),
+ "Vietnamese": Language(
+ name="Vietnamese",
+ iso_code="vi",
+ use_ascii=False,
+ # Windows-1258 is the only common 8-bit
+ # Vietnamese encoding supported by Python.
+ # From Wikipedia:
+ # For systems that lack support for Unicode,
+ # dozens of 8-bit Vietnamese code pages are
+ # available.[1] The most common are VISCII
+ # (TCVN 5712:1993), VPS, and Windows-1258.[3]
+ # Where ASCII is required, such as when
+ # ensuring readability in plain text e-mail,
+ # Vietnamese letters are often encoded
+ # according to Vietnamese Quoted-Readable
+ # (VIQR) or VSCII Mnemonic (VSCII-MNEM),[4]
+ # though usage of either variable-width
+ # scheme has declined dramatically following
+ # the adoption of Unicode on the World Wide
+ # Web.
+ charsets=["WINDOWS-1258"],
+ alphabet="aăâbcdđeêghiklmnoôơpqrstuưvxyAĂÂBCDĐEÊGHIKLMNOÔƠPQRSTUƯVXY",
+ wiki_start_pages=["Chữ_Quốc_ngữ"],
+ ),
+}
diff --git a/libs/chardet/sbcharsetprober.py b/libs/chardet/sbcharsetprober.py
index 46ba835c6..31d70e154 100644
--- a/libs/chardet/sbcharsetprober.py
+++ b/libs/chardet/sbcharsetprober.py
@@ -31,44 +31,49 @@ from collections import namedtuple
from .charsetprober import CharSetProber
from .enums import CharacterCategory, ProbingState, SequenceLikelihood
-
-SingleByteCharSetModel = namedtuple('SingleByteCharSetModel',
- ['charset_name',
- 'language',
- 'char_to_order_map',
- 'language_model',
- 'typical_positive_ratio',
- 'keep_ascii_letters',
- 'alphabet'])
+SingleByteCharSetModel = namedtuple(
+ "SingleByteCharSetModel",
+ [
+ "charset_name",
+ "language",
+ "char_to_order_map",
+ "language_model",
+ "typical_positive_ratio",
+ "keep_ascii_letters",
+ "alphabet",
+ ],
+)
class SingleByteCharSetProber(CharSetProber):
SAMPLE_SIZE = 64
- SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2
+ SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2
POSITIVE_SHORTCUT_THRESHOLD = 0.95
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
- def __init__(self, model, reversed=False, name_prober=None):
- super(SingleByteCharSetProber, self).__init__()
+ def __init__(self, model, is_reversed=False, name_prober=None):
+ super().__init__()
self._model = model
# TRUE if we need to reverse every pair in the model lookup
- self._reversed = reversed
+ self._reversed = is_reversed
# Optional auxiliary prober for name decision
self._name_prober = name_prober
self._last_order = None
self._seq_counters = None
self._total_seqs = None
self._total_char = None
+ self._control_char = None
self._freq_char = None
self.reset()
def reset(self):
- super(SingleByteCharSetProber, self).reset()
+ super().reset()
# char order of last character
self._last_order = 255
self._seq_counters = [0] * SequenceLikelihood.get_num_categories()
self._total_seqs = 0
self._total_char = 0
+ self._control_char = 0
# characters that fall in our sampling range
self._freq_char = 0
@@ -76,20 +81,20 @@ class SingleByteCharSetProber(CharSetProber):
def charset_name(self):
if self._name_prober:
return self._name_prober.charset_name
- else:
- return self._model.charset_name
+ return self._model.charset_name
@property
def language(self):
if self._name_prober:
return self._name_prober.language
- else:
- return self._model.language
+ return self._model.language
def feed(self, byte_str):
# TODO: Make filter_international_words keep things in self.alphabet
if not self._model.keep_ascii_letters:
byte_str = self.filter_international_words(byte_str)
+ else:
+ byte_str = self.remove_xml_tags(byte_str)
if not byte_str:
return self.state
char_to_order_map = self._model.char_to_order_map
@@ -103,9 +108,6 @@ class SingleByteCharSetProber(CharSetProber):
# _total_char purposes.
if order < CharacterCategory.CONTROL:
self._total_char += 1
- # TODO: Follow uchardet's lead and discount confidence for frequent
- # control characters.
- # See https://github.com/BYVoid/uchardet/commit/55b4f23971db61
if order < self.SAMPLE_SIZE:
self._freq_char += 1
if self._last_order < self.SAMPLE_SIZE:
@@ -122,14 +124,17 @@ class SingleByteCharSetProber(CharSetProber):
if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
confidence = self.get_confidence()
if confidence > self.POSITIVE_SHORTCUT_THRESHOLD:
- self.logger.debug('%s confidence = %s, we have a winner',
- charset_name, confidence)
+ self.logger.debug(
+ "%s confidence = %s, we have a winner", charset_name, confidence
+ )
self._state = ProbingState.FOUND_IT
elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD:
- self.logger.debug('%s confidence = %s, below negative '
- 'shortcut threshhold %s', charset_name,
- confidence,
- self.NEGATIVE_SHORTCUT_THRESHOLD)
+ self.logger.debug(
+ "%s confidence = %s, below negative shortcut threshold %s",
+ charset_name,
+ confidence,
+ self.NEGATIVE_SHORTCUT_THRESHOLD,
+ )
self._state = ProbingState.NOT_ME
return self.state
@@ -137,8 +142,18 @@ class SingleByteCharSetProber(CharSetProber):
def get_confidence(self):
r = 0.01
if self._total_seqs > 0:
- r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) /
- self._total_seqs / self._model.typical_positive_ratio)
+ r = (
+ (
+ self._seq_counters[SequenceLikelihood.POSITIVE]
+ + 0.25 * self._seq_counters[SequenceLikelihood.LIKELY]
+ )
+ / self._total_seqs
+ / self._model.typical_positive_ratio
+ )
+ # The more control characters (proportionnaly to the size
+ # of the text), the less confident we become in the current
+ # charset.
+ r = r * (self._total_char - self._control_char) / self._total_char
r = r * self._freq_char / self._total_char
if r >= 1.0:
r = 0.99
diff --git a/libs/chardet/sbcsgroupprober.py b/libs/chardet/sbcsgroupprober.py
index bdeef4e15..cad001cb1 100644
--- a/libs/chardet/sbcsgroupprober.py
+++ b/libs/chardet/sbcsgroupprober.py
@@ -28,16 +28,20 @@
from .charsetgroupprober import CharSetGroupProber
from .hebrewprober import HebrewProber
-from .langbulgarianmodel import (ISO_8859_5_BULGARIAN_MODEL,
- WINDOWS_1251_BULGARIAN_MODEL)
+from .langbulgarianmodel import ISO_8859_5_BULGARIAN_MODEL, WINDOWS_1251_BULGARIAN_MODEL
from .langgreekmodel import ISO_8859_7_GREEK_MODEL, WINDOWS_1253_GREEK_MODEL
from .langhebrewmodel import WINDOWS_1255_HEBREW_MODEL
+
# from .langhungarianmodel import (ISO_8859_2_HUNGARIAN_MODEL,
# WINDOWS_1250_HUNGARIAN_MODEL)
-from .langrussianmodel import (IBM855_RUSSIAN_MODEL, IBM866_RUSSIAN_MODEL,
- ISO_8859_5_RUSSIAN_MODEL, KOI8_R_RUSSIAN_MODEL,
- MACCYRILLIC_RUSSIAN_MODEL,
- WINDOWS_1251_RUSSIAN_MODEL)
+from .langrussianmodel import (
+ IBM855_RUSSIAN_MODEL,
+ IBM866_RUSSIAN_MODEL,
+ ISO_8859_5_RUSSIAN_MODEL,
+ KOI8_R_RUSSIAN_MODEL,
+ MACCYRILLIC_RUSSIAN_MODEL,
+ WINDOWS_1251_RUSSIAN_MODEL,
+)
from .langthaimodel import TIS_620_THAI_MODEL
from .langturkishmodel import ISO_8859_9_TURKISH_MODEL
from .sbcharsetprober import SingleByteCharSetProber
@@ -45,16 +49,17 @@ from .sbcharsetprober import SingleByteCharSetProber
class SBCSGroupProber(CharSetGroupProber):
def __init__(self):
- super(SBCSGroupProber, self).__init__()
+ super().__init__()
hebrew_prober = HebrewProber()
- logical_hebrew_prober = SingleByteCharSetProber(WINDOWS_1255_HEBREW_MODEL,
- False, hebrew_prober)
+ logical_hebrew_prober = SingleByteCharSetProber(
+ WINDOWS_1255_HEBREW_MODEL, is_reversed=False, name_prober=hebrew_prober
+ )
# TODO: See if using ISO-8859-8 Hebrew model works better here, since
# it's actually the visual one
- visual_hebrew_prober = SingleByteCharSetProber(WINDOWS_1255_HEBREW_MODEL,
- True, hebrew_prober)
- hebrew_prober.set_model_probers(logical_hebrew_prober,
- visual_hebrew_prober)
+ visual_hebrew_prober = SingleByteCharSetProber(
+ WINDOWS_1255_HEBREW_MODEL, is_reversed=True, name_prober=hebrew_prober
+ )
+ hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
# TODO: ORDER MATTERS HERE. I changed the order vs what was in master
# and several tests failed that did not before. Some thought
# should be put into the ordering, and we should consider making
diff --git a/libs/chardet/sjisprober.py b/libs/chardet/sjisprober.py
index 9e29623bd..3bcbdb71d 100644
--- a/libs/chardet/sjisprober.py
+++ b/libs/chardet/sjisprober.py
@@ -25,24 +25,24 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
from .chardistribution import SJISDistributionAnalysis
+from .codingstatemachine import CodingStateMachine
+from .enums import MachineState, ProbingState
from .jpcntx import SJISContextAnalysis
+from .mbcharsetprober import MultiByteCharSetProber
from .mbcssm import SJIS_SM_MODEL
-from .enums import ProbingState, MachineState
class SJISProber(MultiByteCharSetProber):
def __init__(self):
- super(SJISProber, self).__init__()
+ super().__init__()
self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
self.distribution_analyzer = SJISDistributionAnalysis()
self.context_analyzer = SJISContextAnalysis()
self.reset()
def reset(self):
- super(SJISProber, self).reset()
+ super().reset()
self.context_analyzer.reset()
@property
@@ -54,34 +54,40 @@ class SJISProber(MultiByteCharSetProber):
return "Japanese"
def feed(self, byte_str):
- for i in range(len(byte_str)):
- coding_state = self.coding_sm.next_state(byte_str[i])
+ for i, byte in enumerate(byte_str):
+ coding_state = self.coding_sm.next_state(byte)
if coding_state == MachineState.ERROR:
- self.logger.debug('%s %s prober hit error at byte %s',
- self.charset_name, self.language, i)
+ self.logger.debug(
+ "%s %s prober hit error at byte %s",
+ self.charset_name,
+ self.language,
+ i,
+ )
self._state = ProbingState.NOT_ME
break
- elif coding_state == MachineState.ITS_ME:
+ if coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
break
- elif coding_state == MachineState.START:
+ if coding_state == MachineState.START:
char_len = self.coding_sm.get_current_charlen()
if i == 0:
- self._last_char[1] = byte_str[0]
- self.context_analyzer.feed(self._last_char[2 - char_len:],
- char_len)
+ self._last_char[1] = byte
+ self.context_analyzer.feed(
+ self._last_char[2 - char_len :], char_len
+ )
self.distribution_analyzer.feed(self._last_char, char_len)
else:
- self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3
- - char_len], char_len)
- self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
- char_len)
+ self.context_analyzer.feed(
+ byte_str[i + 1 - char_len : i + 3 - char_len], char_len
+ )
+ self.distribution_analyzer.feed(byte_str[i - 1 : i + 1], char_len)
self._last_char[0] = byte_str[-1]
if self.state == ProbingState.DETECTING:
- if (self.context_analyzer.got_enough_data() and
- (self.get_confidence() > self.SHORTCUT_THRESHOLD)):
+ if self.context_analyzer.got_enough_data() and (
+ self.get_confidence() > self.SHORTCUT_THRESHOLD
+ ):
self._state = ProbingState.FOUND_IT
return self.state
diff --git a/libs/chardet/universaldetector.py b/libs/chardet/universaldetector.py
index 055a8ac1b..22fcf8290 100644
--- a/libs/chardet/universaldetector.py
+++ b/libs/chardet/universaldetector.py
@@ -46,9 +46,10 @@ from .escprober import EscCharSetProber
from .latin1prober import Latin1Prober
from .mbcsgroupprober import MBCSGroupProber
from .sbcsgroupprober import SBCSGroupProber
+from .utf1632prober import UTF1632Prober
-class UniversalDetector(object):
+class UniversalDetector:
"""
The ``UniversalDetector`` class underlies the ``chardet.detect`` function
and coordinates all of the different charset probers.
@@ -66,20 +67,23 @@ class UniversalDetector(object):
"""
MINIMUM_THRESHOLD = 0.20
- HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]')
- ESC_DETECTOR = re.compile(b'(\033|~{)')
- WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]')
- ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252',
- 'iso-8859-2': 'Windows-1250',
- 'iso-8859-5': 'Windows-1251',
- 'iso-8859-6': 'Windows-1256',
- 'iso-8859-7': 'Windows-1253',
- 'iso-8859-8': 'Windows-1255',
- 'iso-8859-9': 'Windows-1254',
- 'iso-8859-13': 'Windows-1257'}
+ HIGH_BYTE_DETECTOR = re.compile(b"[\x80-\xFF]")
+ ESC_DETECTOR = re.compile(b"(\033|~{)")
+ WIN_BYTE_DETECTOR = re.compile(b"[\x80-\x9F]")
+ ISO_WIN_MAP = {
+ "iso-8859-1": "Windows-1252",
+ "iso-8859-2": "Windows-1250",
+ "iso-8859-5": "Windows-1251",
+ "iso-8859-6": "Windows-1256",
+ "iso-8859-7": "Windows-1253",
+ "iso-8859-8": "Windows-1255",
+ "iso-8859-9": "Windows-1254",
+ "iso-8859-13": "Windows-1257",
+ }
def __init__(self, lang_filter=LanguageFilter.ALL):
self._esc_charset_prober = None
+ self._utf1632_prober = None
self._charset_probers = []
self.result = None
self.done = None
@@ -91,20 +95,34 @@ class UniversalDetector(object):
self._has_win_bytes = None
self.reset()
+ @property
+ def input_state(self):
+ return self._input_state
+
+ @property
+ def has_win_bytes(self):
+ return self._has_win_bytes
+
+ @property
+ def charset_probers(self):
+ return self._charset_probers
+
def reset(self):
"""
Reset the UniversalDetector and all of its probers back to their
initial states. This is called by ``__init__``, so you only need to
call this directly in between analyses of different documents.
"""
- self.result = {'encoding': None, 'confidence': 0.0, 'language': None}
+ self.result = {"encoding": None, "confidence": 0.0, "language": None}
self.done = False
self._got_data = False
self._has_win_bytes = False
self._input_state = InputState.PURE_ASCII
- self._last_char = b''
+ self._last_char = b""
if self._esc_charset_prober:
self._esc_charset_prober.reset()
+ if self._utf1632_prober:
+ self._utf1632_prober.reset()
for prober in self._charset_probers:
prober.reset()
@@ -125,7 +143,7 @@ class UniversalDetector(object):
if self.done:
return
- if not len(byte_str):
+ if not byte_str:
return
if not isinstance(byte_str, bytearray):
@@ -136,35 +154,36 @@ class UniversalDetector(object):
# If the data starts with BOM, we know it is UTF
if byte_str.startswith(codecs.BOM_UTF8):
# EF BB BF UTF-8 with BOM
- self.result = {'encoding': "UTF-8-SIG",
- 'confidence': 1.0,
- 'language': ''}
- elif byte_str.startswith((codecs.BOM_UTF32_LE,
- codecs.BOM_UTF32_BE)):
+ self.result = {
+ "encoding": "UTF-8-SIG",
+ "confidence": 1.0,
+ "language": "",
+ }
+ elif byte_str.startswith((codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE)):
# FF FE 00 00 UTF-32, little-endian BOM
# 00 00 FE FF UTF-32, big-endian BOM
- self.result = {'encoding': "UTF-32",
- 'confidence': 1.0,
- 'language': ''}
- elif byte_str.startswith(b'\xFE\xFF\x00\x00'):
+ self.result = {"encoding": "UTF-32", "confidence": 1.0, "language": ""}
+ elif byte_str.startswith(b"\xFE\xFF\x00\x00"):
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
- self.result = {'encoding': "X-ISO-10646-UCS-4-3412",
- 'confidence': 1.0,
- 'language': ''}
- elif byte_str.startswith(b'\x00\x00\xFF\xFE'):
+ self.result = {
+ "encoding": "X-ISO-10646-UCS-4-3412",
+ "confidence": 1.0,
+ "language": "",
+ }
+ elif byte_str.startswith(b"\x00\x00\xFF\xFE"):
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
- self.result = {'encoding': "X-ISO-10646-UCS-4-2143",
- 'confidence': 1.0,
- 'language': ''}
+ self.result = {
+ "encoding": "X-ISO-10646-UCS-4-2143",
+ "confidence": 1.0,
+ "language": "",
+ }
elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)):
# FF FE UTF-16, little endian BOM
# FE FF UTF-16, big endian BOM
- self.result = {'encoding': "UTF-16",
- 'confidence': 1.0,
- 'language': ''}
+ self.result = {"encoding": "UTF-16", "confidence": 1.0, "language": ""}
self._got_data = True
- if self.result['encoding'] is not None:
+ if self.result["encoding"] is not None:
self.done = True
return
@@ -173,12 +192,29 @@ class UniversalDetector(object):
if self._input_state == InputState.PURE_ASCII:
if self.HIGH_BYTE_DETECTOR.search(byte_str):
self._input_state = InputState.HIGH_BYTE
- elif self._input_state == InputState.PURE_ASCII and \
- self.ESC_DETECTOR.search(self._last_char + byte_str):
+ elif (
+ self._input_state == InputState.PURE_ASCII
+ and self.ESC_DETECTOR.search(self._last_char + byte_str)
+ ):
self._input_state = InputState.ESC_ASCII
self._last_char = byte_str[-1:]
+ # next we will look to see if it is appears to be either a UTF-16 or
+ # UTF-32 encoding
+ if not self._utf1632_prober:
+ self._utf1632_prober = UTF1632Prober()
+
+ if self._utf1632_prober.state == ProbingState.DETECTING:
+ if self._utf1632_prober.feed(byte_str) == ProbingState.FOUND_IT:
+ self.result = {
+ "encoding": self._utf1632_prober.charset_name,
+ "confidence": self._utf1632_prober.get_confidence(),
+ "language": "",
+ }
+ self.done = True
+ return
+
# If we've seen escape sequences, use the EscCharSetProber, which
# uses a simple state machine to check for known escape sequences in
# HZ and ISO-2022 encodings, since those are the only encodings that
@@ -187,12 +223,11 @@ class UniversalDetector(object):
if not self._esc_charset_prober:
self._esc_charset_prober = EscCharSetProber(self.lang_filter)
if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT:
- self.result = {'encoding':
- self._esc_charset_prober.charset_name,
- 'confidence':
- self._esc_charset_prober.get_confidence(),
- 'language':
- self._esc_charset_prober.language}
+ self.result = {
+ "encoding": self._esc_charset_prober.charset_name,
+ "confidence": self._esc_charset_prober.get_confidence(),
+ "language": self._esc_charset_prober.language,
+ }
self.done = True
# If we've seen high bytes (i.e., those with values greater than 127),
# we need to do more complicated checks using all our multi-byte and
@@ -209,9 +244,11 @@ class UniversalDetector(object):
self._charset_probers.append(Latin1Prober())
for prober in self._charset_probers:
if prober.feed(byte_str) == ProbingState.FOUND_IT:
- self.result = {'encoding': prober.charset_name,
- 'confidence': prober.get_confidence(),
- 'language': prober.language}
+ self.result = {
+ "encoding": prober.charset_name,
+ "confidence": prober.get_confidence(),
+ "language": prober.language,
+ }
self.done = True
break
if self.WIN_BYTE_DETECTOR.search(byte_str):
@@ -231,13 +268,11 @@ class UniversalDetector(object):
self.done = True
if not self._got_data:
- self.logger.debug('no data received!')
+ self.logger.debug("no data received!")
# Default to ASCII if it is all we've seen so far
elif self._input_state == InputState.PURE_ASCII:
- self.result = {'encoding': 'ascii',
- 'confidence': 1.0,
- 'language': ''}
+ self.result = {"encoding": "ascii", "confidence": 1.0, "language": ""}
# If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD
elif self._input_state == InputState.HIGH_BYTE:
@@ -257,30 +292,37 @@ class UniversalDetector(object):
confidence = max_prober.get_confidence()
# Use Windows encoding name instead of ISO-8859 if we saw any
# extra Windows-specific bytes
- if lower_charset_name.startswith('iso-8859'):
+ if lower_charset_name.startswith("iso-8859"):
if self._has_win_bytes:
- charset_name = self.ISO_WIN_MAP.get(lower_charset_name,
- charset_name)
- self.result = {'encoding': charset_name,
- 'confidence': confidence,
- 'language': max_prober.language}
+ charset_name = self.ISO_WIN_MAP.get(
+ lower_charset_name, charset_name
+ )
+ self.result = {
+ "encoding": charset_name,
+ "confidence": confidence,
+ "language": max_prober.language,
+ }
# Log all prober confidences if none met MINIMUM_THRESHOLD
if self.logger.getEffectiveLevel() <= logging.DEBUG:
- if self.result['encoding'] is None:
- self.logger.debug('no probers hit minimum threshold')
+ if self.result["encoding"] is None:
+ self.logger.debug("no probers hit minimum threshold")
for group_prober in self._charset_probers:
if not group_prober:
continue
if isinstance(group_prober, CharSetGroupProber):
for prober in group_prober.probers:
- self.logger.debug('%s %s confidence = %s',
- prober.charset_name,
- prober.language,
- prober.get_confidence())
+ self.logger.debug(
+ "%s %s confidence = %s",
+ prober.charset_name,
+ prober.language,
+ prober.get_confidence(),
+ )
else:
- self.logger.debug('%s %s confidence = %s',
- group_prober.charset_name,
- group_prober.language,
- group_prober.get_confidence())
+ self.logger.debug(
+ "%s %s confidence = %s",
+ group_prober.charset_name,
+ group_prober.language,
+ group_prober.get_confidence(),
+ )
return self.result
diff --git a/libs/chardet/utf1632prober.py b/libs/chardet/utf1632prober.py
new file mode 100644
index 000000000..9fd1580b8
--- /dev/null
+++ b/libs/chardet/utf1632prober.py
@@ -0,0 +1,223 @@
+######################## BEGIN LICENSE BLOCK ########################
+#
+# Contributor(s):
+# Jason Zavaglia
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+from .charsetprober import CharSetProber
+from .enums import ProbingState
+
+
+class UTF1632Prober(CharSetProber):
+ """
+ This class simply looks for occurrences of zero bytes, and infers
+ whether the file is UTF16 or UTF32 (low-endian or big-endian)
+ For instance, files looking like ( \0 \0 \0 [nonzero] )+
+ have a good probability to be UTF32BE. Files looking like ( \0 [nonzero] )+
+ may be guessed to be UTF16BE, and inversely for little-endian varieties.
+ """
+
+ # how many logical characters to scan before feeling confident of prediction
+ MIN_CHARS_FOR_DETECTION = 20
+ # a fixed constant ratio of expected zeros or non-zeros in modulo-position.
+ EXPECTED_RATIO = 0.94
+
+ def __init__(self):
+ super().__init__()
+ self.position = 0
+ self.zeros_at_mod = [0] * 4
+ self.nonzeros_at_mod = [0] * 4
+ self._state = ProbingState.DETECTING
+ self.quad = [0, 0, 0, 0]
+ self.invalid_utf16be = False
+ self.invalid_utf16le = False
+ self.invalid_utf32be = False
+ self.invalid_utf32le = False
+ self.first_half_surrogate_pair_detected_16be = False
+ self.first_half_surrogate_pair_detected_16le = False
+ self.reset()
+
+ def reset(self):
+ super().reset()
+ self.position = 0
+ self.zeros_at_mod = [0] * 4
+ self.nonzeros_at_mod = [0] * 4
+ self._state = ProbingState.DETECTING
+ self.invalid_utf16be = False
+ self.invalid_utf16le = False
+ self.invalid_utf32be = False
+ self.invalid_utf32le = False
+ self.first_half_surrogate_pair_detected_16be = False
+ self.first_half_surrogate_pair_detected_16le = False
+ self.quad = [0, 0, 0, 0]
+
+ @property
+ def charset_name(self):
+ if self.is_likely_utf32be():
+ return "utf-32be"
+ if self.is_likely_utf32le():
+ return "utf-32le"
+ if self.is_likely_utf16be():
+ return "utf-16be"
+ if self.is_likely_utf16le():
+ return "utf-16le"
+ # default to something valid
+ return "utf-16"
+
+ @property
+ def language(self):
+ return ""
+
+ def approx_32bit_chars(self):
+ return max(1.0, self.position / 4.0)
+
+ def approx_16bit_chars(self):
+ return max(1.0, self.position / 2.0)
+
+ def is_likely_utf32be(self):
+ approx_chars = self.approx_32bit_chars()
+ return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
+ self.zeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO
+ and self.zeros_at_mod[1] / approx_chars > self.EXPECTED_RATIO
+ and self.zeros_at_mod[2] / approx_chars > self.EXPECTED_RATIO
+ and self.nonzeros_at_mod[3] / approx_chars > self.EXPECTED_RATIO
+ and not self.invalid_utf32be
+ )
+
+ def is_likely_utf32le(self):
+ approx_chars = self.approx_32bit_chars()
+ return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
+ self.nonzeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO
+ and self.zeros_at_mod[1] / approx_chars > self.EXPECTED_RATIO
+ and self.zeros_at_mod[2] / approx_chars > self.EXPECTED_RATIO
+ and self.zeros_at_mod[3] / approx_chars > self.EXPECTED_RATIO
+ and not self.invalid_utf32le
+ )
+
+ def is_likely_utf16be(self):
+ approx_chars = self.approx_16bit_chars()
+ return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
+ (self.nonzeros_at_mod[1] + self.nonzeros_at_mod[3]) / approx_chars
+ > self.EXPECTED_RATIO
+ and (self.zeros_at_mod[0] + self.zeros_at_mod[2]) / approx_chars
+ > self.EXPECTED_RATIO
+ and not self.invalid_utf16be
+ )
+
+ def is_likely_utf16le(self):
+ approx_chars = self.approx_16bit_chars()
+ return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
+ (self.nonzeros_at_mod[0] + self.nonzeros_at_mod[2]) / approx_chars
+ > self.EXPECTED_RATIO
+ and (self.zeros_at_mod[1] + self.zeros_at_mod[3]) / approx_chars
+ > self.EXPECTED_RATIO
+ and not self.invalid_utf16le
+ )
+
+ def validate_utf32_characters(self, quad):
+ """
+ Validate if the quad of bytes is valid UTF-32.
+
+ UTF-32 is valid in the range 0x00000000 - 0x0010FFFF
+ excluding 0x0000D800 - 0x0000DFFF
+
+ https://en.wikipedia.org/wiki/UTF-32
+ """
+ if (
+ quad[0] != 0
+ or quad[1] > 0x10
+ or (quad[0] == 0 and quad[1] == 0 and 0xD8 <= quad[2] <= 0xDF)
+ ):
+ self.invalid_utf32be = True
+ if (
+ quad[3] != 0
+ or quad[2] > 0x10
+ or (quad[3] == 0 and quad[2] == 0 and 0xD8 <= quad[1] <= 0xDF)
+ ):
+ self.invalid_utf32le = True
+
+ def validate_utf16_characters(self, pair):
+ """
+ Validate if the pair of bytes is valid UTF-16.
+
+ UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
+ with an exception for surrogate pairs, which must be in the range
+ 0xD800-0xDBFF followed by 0xDC00-0xDFFF
+
+ https://en.wikipedia.org/wiki/UTF-16
+ """
+ if not self.first_half_surrogate_pair_detected_16be:
+ if 0xD8 <= pair[0] <= 0xDB:
+ self.first_half_surrogate_pair_detected_16be = True
+ elif 0xDC <= pair[0] <= 0xDF:
+ self.invalid_utf16be = True
+ else:
+ if 0xDC <= pair[0] <= 0xDF:
+ self.first_half_surrogate_pair_detected_16be = False
+ else:
+ self.invalid_utf16be = True
+
+ if not self.first_half_surrogate_pair_detected_16le:
+ if 0xD8 <= pair[1] <= 0xDB:
+ self.first_half_surrogate_pair_detected_16le = True
+ elif 0xDC <= pair[1] <= 0xDF:
+ self.invalid_utf16le = True
+ else:
+ if 0xDC <= pair[1] <= 0xDF:
+ self.first_half_surrogate_pair_detected_16le = False
+ else:
+ self.invalid_utf16le = True
+
+ def feed(self, byte_str):
+ for c in byte_str:
+ mod4 = self.position % 4
+ self.quad[mod4] = c
+ if mod4 == 3:
+ self.validate_utf32_characters(self.quad)
+ self.validate_utf16_characters(self.quad[0:2])
+ self.validate_utf16_characters(self.quad[2:4])
+ if c == 0:
+ self.zeros_at_mod[mod4] += 1
+ else:
+ self.nonzeros_at_mod[mod4] += 1
+ self.position += 1
+ return self.state
+
+ @property
+ def state(self):
+ if self._state in {ProbingState.NOT_ME, ProbingState.FOUND_IT}:
+ # terminal, decided states
+ return self._state
+ if self.get_confidence() > 0.80:
+ self._state = ProbingState.FOUND_IT
+ elif self.position > 4 * 1024:
+ # if we get to 4kb into the file, and we can't conclude it's UTF,
+ # let's give up
+ self._state = ProbingState.NOT_ME
+ return self._state
+
+ def get_confidence(self):
+ return (
+ 0.85
+ if (
+ self.is_likely_utf16le()
+ or self.is_likely_utf16be()
+ or self.is_likely_utf32le()
+ or self.is_likely_utf32be()
+ )
+ else 0.00
+ )
diff --git a/libs/chardet/utf8prober.py b/libs/chardet/utf8prober.py
index 6c3196cc2..3aae09e86 100644
--- a/libs/chardet/utf8prober.py
+++ b/libs/chardet/utf8prober.py
@@ -26,23 +26,22 @@
######################### END LICENSE BLOCK #########################
from .charsetprober import CharSetProber
-from .enums import ProbingState, MachineState
from .codingstatemachine import CodingStateMachine
+from .enums import MachineState, ProbingState
from .mbcssm import UTF8_SM_MODEL
-
class UTF8Prober(CharSetProber):
ONE_CHAR_PROB = 0.5
def __init__(self):
- super(UTF8Prober, self).__init__()
+ super().__init__()
self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
self._num_mb_chars = None
self.reset()
def reset(self):
- super(UTF8Prober, self).reset()
+ super().reset()
self.coding_sm.reset()
self._num_mb_chars = 0
@@ -60,10 +59,10 @@ class UTF8Prober(CharSetProber):
if coding_state == MachineState.ERROR:
self._state = ProbingState.NOT_ME
break
- elif coding_state == MachineState.ITS_ME:
+ if coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
break
- elif coding_state == MachineState.START:
+ if coding_state == MachineState.START:
if self.coding_sm.get_current_charlen() >= 2:
self._num_mb_chars += 1
@@ -76,7 +75,6 @@ class UTF8Prober(CharSetProber):
def get_confidence(self):
unlike = 0.99
if self._num_mb_chars < 6:
- unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars
+ unlike *= self.ONE_CHAR_PROB**self._num_mb_chars
return 1.0 - unlike
- else:
- return unlike
+ return unlike
diff --git a/libs/chardet/version.py b/libs/chardet/version.py
index 70369b9d6..a08a06b9a 100644
--- a/libs/chardet/version.py
+++ b/libs/chardet/version.py
@@ -5,5 +5,5 @@ from within setup.py and from chardet subpackages.
:author: Dan Blanchard ([email protected])
"""
-__version__ = "4.0.0"
-VERSION = __version__.split('.')
+__version__ = "5.0.0"
+VERSION = __version__.split(".")