When multiple audio streams exist, pick the correct one for whisper to process (#2688)

author: JayZed <[email protected]> 2024-09-30 16:29:11 -0400
committer: GitHub <[email protected]> 2024-09-30 16:29:11 -0400
commit: e544e1fab1a05883e394af401c7b318e5bbabe29 (patch)
tree: d1ed7567fa03ff029d5841e6dda1fe0d91271d83 /custom_libs/subliminal_patch
parent: a1fac160fbf3f71013ee25566029aa6377097940 (diff)
download: bazarr-e544e1fab1a05883e394af401c7b318e5bbabe29.tar.gz
bazarr-e544e1fab1a05883e394af401c7b318e5bbabe29.zip
1 files changed, 17 insertions, 1 deletions
diff --git a/custom_libs/subliminal_patch/providers/whisperai.py b/custom_libs/subliminal_patch/providers/whisperai.py
index 866585cdb..0546717a2 100644
--- a/custom_libs/subliminal_patch/providers/whisperai.py
+++ b/custom_libs/subliminal_patch/providers/whisperai.py
@@ -16,6 +16,7 @@ from babelfish.exceptions import LanguageReverseError
 
 import ffmpeg
 import functools
+from pycountry import languages
 
 # These are all the languages Whisper supports.
 # from whisper.tokenizer import LANGUAGES
@@ -132,6 +133,18 @@ def set_log_level(newLevel="INFO"):
 # initialize to default above
 set_log_level()
 
+# ffmpeg uses the older ISO 639-2 code when extracting audio streams based on language
+# if we give it the newer ISO 639-3 code it can't find that audio stream by name because it's different
+# for example it wants 'ger' instead of 'deu' for the German language
+#                   or 'fre' instead of 'fra' for the French language
+def get_ISO_639_2_code(iso639_3_code):
+    # find the language using ISO 639-3 code
+    language = languages.get(alpha_3=iso639_3_code)
+    # get the ISO 639-2 code or use the original input if there isn't a match
+    iso639_2_code = language.bibliographic if language and hasattr(language, 'bibliographic') else iso639_3_code
+    logger.debug(f"ffmpeg using language code '{iso639_2_code}' (instead of '{iso639_3_code}')")
+    return iso639_2_code
+
 @functools.lru_cache(2)
 def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None):
     logger.debug("Encoding audio stream to WAV with ffmpeg")
@@ -140,7 +153,10 @@ def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None):
         # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
         inp = ffmpeg.input(path, threads=0)
         if audio_stream_language:
-            logger.debug(f"Whisper will only use the {audio_stream_language} audio stream for {path}")
+            # There is more than one audio stream, so pick the requested one by name
+            # Use the ISO 639-2 code if available
+            audio_stream_language = get_ISO_639_2_code(audio_stream_language)
+            logger.debug(f"Whisper will use the '{audio_stream_language}' audio stream for {path}")
             inp = inp[f'a:m:language:{audio_stream_language}']
 
         out, _ = inp.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000, af="aresample=async=1") \
author	JayZed <[email protected]>	2024-09-30 16:29:11 -0400
committer	GitHub <[email protected]>	2024-09-30 16:29:11 -0400
commit	e544e1fab1a05883e394af401c7b318e5bbabe29 (patch)
tree	d1ed7567fa03ff029d5841e6dda1fe0d91271d83 /custom_libs/subliminal_patch
parent	a1fac160fbf3f71013ee25566029aa6377097940 (diff)
download	bazarr-e544e1fab1a05883e394af401c7b318e5bbabe29.tar.gz bazarr-e544e1fab1a05883e394af401c7b318e5bbabe29.zip