diff options
author | morpheus65535 <[email protected]> | 2023-06-23 10:06:46 -0400 |
---|---|---|
committer | morpheus65535 <[email protected]> | 2023-06-23 10:06:46 -0400 |
commit | f371d0585be39bc528eafa714397c601f13a062d (patch) | |
tree | 01a9909b45ec190a812bb382c1b67326452ef7fb | |
parent | edfbb1a5ca0edb43eb9e88ae83740c10649a8a9b (diff) | |
download | bazarr-f371d0585be39bc528eafa714397c601f13a062d.tar.gz bazarr-f371d0585be39bc528eafa714397c601f13a062d.zip |
no log: added failsafe to encoding detectionv1.2.2-beta.25
-rw-r--r-- | bazarr/subtitles/indexer/utils.py | 40 |
1 files changed, 20 insertions, 20 deletions
diff --git a/bazarr/subtitles/indexer/utils.py b/bazarr/subtitles/indexer/utils.py index f34a26fa6..9ca2405f7 100644 --- a/bazarr/subtitles/indexer/utils.py +++ b/bazarr/subtitles/indexer/utils.py @@ -76,25 +76,26 @@ def guess_external_subtitles(dest_folder, subtitles, media_type, previously_inde with open(subtitle_path, 'rb') as f: text = f.read() - encoding = detect(text)['encoding'] - if not encoding: + encoding = detect(text) + if encoding and 'encoding' in encoding: + encoding = detect(text)['encoding'] + else: logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. " "It's probably a binary file: " + subtitle_path) continue - if 'UTF' in encoding: - text = text.decode('utf-8') - detected_language = guess_language(text) - # add simplified and traditional chinese detection - if detected_language == 'zh': - traditional_chinese_fuzzy = [u"繁", u"雙語"] - traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht", ".zh-hant", ".zhhant", ".zh_hant", - ".hant", ".big5", ".traditional"] - if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy: - detected_language == 'zt' - else: - text = text.decode(encoding) + text = text.decode(encoding) detected_language = guess_language(text) + + # add simplified and traditional chinese detection + if detected_language == 'zh': + traditional_chinese_fuzzy = [u"繁", u"雙語"] + traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht", ".zh-hant", ".zhhant", ".zh_hant", + ".hant", ".big5", ".traditional"] + if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or \ + (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy: + detected_language = 'zt' + if detected_language: logging.debug("BAZARR external subtitles detected and guessed this language: " + str( detected_language)) @@ -127,15 +128,14 @@ def guess_external_subtitles(dest_folder, subtitles, media_type, previously_inde with open(subtitle_path, 'rb') as f: text = f.read() - encoding = detect(text)['encoding'] - if not encoding: + encoding = detect(text) + if encoding and 'encoding' in encoding: + encoding = detect(text)['encoding'] + else: logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. " "It's probably a binary file: " + subtitle_path) continue - if 'UTF' in encoding: - text = text.decode('utf-8') - else: - text = text.decode(encoding) + text = text.decode(encoding) if bool(re.search(hi_regex, text)): subtitles[subtitle] = Language.rebuild(subtitles[subtitle], forced=False, hi=True) |