no log: added failsafe to encoding detectionv1.2.2-beta.25

author: morpheus65535 <[email protected]> 2023-06-23 10:06:46 -0400
committer: morpheus65535 <[email protected]> 2023-06-23 10:06:46 -0400
commit: f371d0585be39bc528eafa714397c601f13a062d (patch)
tree: 01a9909b45ec190a812bb382c1b67326452ef7fb
parent: edfbb1a5ca0edb43eb9e88ae83740c10649a8a9b (diff)
download: bazarr-f371d0585be39bc528eafa714397c601f13a062d.tar.gz
bazarr-f371d0585be39bc528eafa714397c601f13a062d.zip
1 files changed, 20 insertions, 20 deletions
diff --git a/bazarr/subtitles/indexer/utils.py b/bazarr/subtitles/indexer/utils.py
index f34a26fa6..9ca2405f7 100644
--- a/bazarr/subtitles/indexer/utils.py
+++ b/bazarr/subtitles/indexer/utils.py
@@ -76,25 +76,26 @@ def guess_external_subtitles(dest_folder, subtitles, media_type, previously_inde
                 with open(subtitle_path, 'rb') as f:
                     text = f.read()
 
-                encoding = detect(text)['encoding']
-                if not encoding:
+                encoding = detect(text)
+                if encoding and 'encoding' in encoding:
+                    encoding = detect(text)['encoding']
+                else:
                     logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. "
                                   "It's probably a binary file: " + subtitle_path)
                     continue
-                if 'UTF' in encoding:
-                    text = text.decode('utf-8')
-                    detected_language = guess_language(text)
-                    # add simplified and traditional chinese detection
-                    if detected_language == 'zh':
-                        traditional_chinese_fuzzy = [u"繁", u"雙語"]
-                        traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht", ".zh-hant", ".zhhant", ".zh_hant",
-                                               ".hant", ".big5", ".traditional"]
-                        if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy:
-                            detected_language == 'zt'
-                else:
-                    text = text.decode(encoding)
+                text = text.decode(encoding)
 
                 detected_language = guess_language(text)
+
+                # add simplified and traditional chinese detection
+                if detected_language == 'zh':
+                    traditional_chinese_fuzzy = [u"繁", u"雙語"]
+                    traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht", ".zh-hant", ".zhhant", ".zh_hant",
+                                           ".hant", ".big5", ".traditional"]
+                    if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or \
+                            (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy:
+                        detected_language = 'zt'
+
                 if detected_language:
                     logging.debug("BAZARR external subtitles detected and guessed this language: " + str(
                         detected_language))
@@ -127,15 +128,14 @@ def guess_external_subtitles(dest_folder, subtitles, media_type, previously_inde
                 with open(subtitle_path, 'rb') as f:
                     text = f.read()
 
-                encoding = detect(text)['encoding']
-                if not encoding:
+                encoding = detect(text)
+                if encoding and 'encoding' in encoding:
+                    encoding = detect(text)['encoding']
+                else:
                     logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. "
                                   "It's probably a binary file: " + subtitle_path)
                     continue
-                if 'UTF' in encoding:
-                    text = text.decode('utf-8')
-                else:
-                    text = text.decode(encoding)
+                text = text.decode(encoding)
 
                 if bool(re.search(hi_regex, text)):
                     subtitles[subtitle] = Language.rebuild(subtitles[subtitle], forced=False, hi=True)
author	morpheus65535 <[email protected]>	2023-06-23 10:06:46 -0400
committer	morpheus65535 <[email protected]>	2023-06-23 10:06:46 -0400
commit	f371d0585be39bc528eafa714397c601f13a062d (patch)
tree	01a9909b45ec190a812bb382c1b67326452ef7fb
parent	edfbb1a5ca0edb43eb9e88ae83740c10649a8a9b (diff)
download	bazarr-f371d0585be39bc528eafa714397c601f13a062d.tar.gz bazarr-f371d0585be39bc528eafa714397c601f13a062d.zip