summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authormorpheus65535 <[email protected]>2023-06-23 10:06:46 -0400
committermorpheus65535 <[email protected]>2023-06-23 10:06:46 -0400
commitf371d0585be39bc528eafa714397c601f13a062d (patch)
tree01a9909b45ec190a812bb382c1b67326452ef7fb
parentedfbb1a5ca0edb43eb9e88ae83740c10649a8a9b (diff)
downloadbazarr-f371d0585be39bc528eafa714397c601f13a062d.tar.gz
bazarr-f371d0585be39bc528eafa714397c601f13a062d.zip
no log: added failsafe to encoding detectionv1.2.2-beta.25
-rw-r--r--bazarr/subtitles/indexer/utils.py40
1 files changed, 20 insertions, 20 deletions
diff --git a/bazarr/subtitles/indexer/utils.py b/bazarr/subtitles/indexer/utils.py
index f34a26fa6..9ca2405f7 100644
--- a/bazarr/subtitles/indexer/utils.py
+++ b/bazarr/subtitles/indexer/utils.py
@@ -76,25 +76,26 @@ def guess_external_subtitles(dest_folder, subtitles, media_type, previously_inde
with open(subtitle_path, 'rb') as f:
text = f.read()
- encoding = detect(text)['encoding']
- if not encoding:
+ encoding = detect(text)
+ if encoding and 'encoding' in encoding:
+ encoding = detect(text)['encoding']
+ else:
logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. "
"It's probably a binary file: " + subtitle_path)
continue
- if 'UTF' in encoding:
- text = text.decode('utf-8')
- detected_language = guess_language(text)
- # add simplified and traditional chinese detection
- if detected_language == 'zh':
- traditional_chinese_fuzzy = [u"繁", u"雙語"]
- traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht", ".zh-hant", ".zhhant", ".zh_hant",
- ".hant", ".big5", ".traditional"]
- if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy:
- detected_language == 'zt'
- else:
- text = text.decode(encoding)
+ text = text.decode(encoding)
detected_language = guess_language(text)
+
+ # add simplified and traditional chinese detection
+ if detected_language == 'zh':
+ traditional_chinese_fuzzy = [u"繁", u"雙語"]
+ traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht", ".zh-hant", ".zhhant", ".zh_hant",
+ ".hant", ".big5", ".traditional"]
+ if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or \
+ (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy:
+ detected_language = 'zt'
+
if detected_language:
logging.debug("BAZARR external subtitles detected and guessed this language: " + str(
detected_language))
@@ -127,15 +128,14 @@ def guess_external_subtitles(dest_folder, subtitles, media_type, previously_inde
with open(subtitle_path, 'rb') as f:
text = f.read()
- encoding = detect(text)['encoding']
- if not encoding:
+ encoding = detect(text)
+ if encoding and 'encoding' in encoding:
+ encoding = detect(text)['encoding']
+ else:
logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. "
"It's probably a binary file: " + subtitle_path)
continue
- if 'UTF' in encoding:
- text = text.decode('utf-8')
- else:
- text = text.decode(encoding)
+ text = text.decode(encoding)
if bool(re.search(hi_regex, text)):
subtitles[subtitle] = Language.rebuild(subtitles[subtitle], forced=False, hi=True)