Improved whisper provider to not throttle when unsupported audio language is encountered. #2474

As we have noted before, bad input data should be no reason to throttle a provider. In this case, if the input language was not supported by whisper, we were raising a ValueError that was never caught and causing an error in the whisper provider for which it was throttled. Instead, we are now detecting this case and logging an error message. However, given that the input language was not one of the 99 currently known to whisper, it's probably a mislabeled audio track. If the user desired output language is English, then we will tell whisper that the input audio is also English and ask it to transcribe it. Whisper does a very good job of transcribing almost anything to English, so it's worth a try. This should address the throttling in issue #2474.
author: JayZed <[email protected]> 2024-04-29 22:11:47 -0400
committer: GitHub <[email protected]> 2024-04-29 22:11:47 -0400
commit: 5749971d67b7fa7932a8c707f50732a22615a37f (patch)
tree: 35b63d975fc2f62a60530cbba5689f97ae9b1da0 /custom_libs
parent: c5a5dc9ddf45ba6512825e667811a90665328f43 (diff)
download: bazarr-5749971d67b7fa7932a8c707f50732a22615a37f.tar.gz
bazarr-5749971d67b7fa7932a8c707f50732a22615a37f.zip
1 files changed, 15 insertions, 3 deletions
diff --git a/custom_libs/subliminal_patch/providers/whisperai.py b/custom_libs/subliminal_patch/providers/whisperai.py
index dfd733da3..d427f8ad2 100644
--- a/custom_libs/subliminal_patch/providers/whisperai.py
+++ b/custom_libs/subliminal_patch/providers/whisperai.py
@@ -169,7 +169,7 @@ def whisper_get_language_reverse(alpha3):
         lan = whisper_get_language(wl, whisper_languages[wl])
         if lan.alpha3 == alpha3:
             return wl
-    raise ValueError
+    return None
 
 def language_from_alpha3(lang):
     name = Language(lang).name
@@ -317,7 +317,7 @@ class WhisperAIProvider(Provider):
         if out == None:
             logger.info(f"Whisper cannot process {subtitle.video.original_path} because of missing/bad audio track")
             subtitle.content = None
-            return         
+            return  
 
         logger.debug(f'Audio stream length (in WAV format) is {len(out):,} bytes')
 
@@ -326,11 +326,23 @@ class WhisperAIProvider(Provider):
         else:
             output_language = "eng"
 
+        input_language = whisper_get_language_reverse(subtitle.audio_language)
+        if input_language is None:
+            if output_language == "eng":
+                # guess that audio track is mislabelled English and let whisper try to transcribe it
+                input_language = "en"
+                subtitle.task = "transcribe"
+                logger.info(f"Whisper treating unsupported audio track language: '{subtitle.audio_language}' as English")
+            else:
+                logger.info(f"Whisper cannot process {subtitle.video.original_path} because of unsupported audio track language: '{subtitle.audio_language}'")
+                subtitle.content = None
+                return
+        
         logger.info(f'Starting WhisperAI {subtitle.task} to {language_from_alpha3(output_language)} for {subtitle.video.original_path}')
         startTime = time.time()
 
         r = self.session.post(f"{self.endpoint}/asr",
-                              params={'task': subtitle.task, 'language': whisper_get_language_reverse(subtitle.audio_language), 'output': 'srt', 'encode': 'false'},
+                              params={'task': subtitle.task, 'language': input_language, 'output': 'srt', 'encode': 'false'},
                               files={'audio_file': out},
                               timeout=(self.response, self.timeout))
author	JayZed <[email protected]>	2024-04-29 22:11:47 -0400
committer	GitHub <[email protected]>	2024-04-29 22:11:47 -0400
commit	5749971d67b7fa7932a8c707f50732a22615a37f (patch)
tree	35b63d975fc2f62a60530cbba5689f97ae9b1da0 /custom_libs
parent	c5a5dc9ddf45ba6512825e667811a90665328f43 (diff)
download	bazarr-5749971d67b7fa7932a8c707f50732a22615a37f.tar.gz bazarr-5749971d67b7fa7932a8c707f50732a22615a37f.zip