aboutsummaryrefslogtreecommitdiffhomepage
path: root/custom_libs
diff options
context:
space:
mode:
authorJayZed <[email protected]>2024-04-29 22:11:47 -0400
committerGitHub <[email protected]>2024-04-29 22:11:47 -0400
commit5749971d67b7fa7932a8c707f50732a22615a37f (patch)
tree35b63d975fc2f62a60530cbba5689f97ae9b1da0 /custom_libs
parentc5a5dc9ddf45ba6512825e667811a90665328f43 (diff)
downloadbazarr-5749971d67b7fa7932a8c707f50732a22615a37f.tar.gz
bazarr-5749971d67b7fa7932a8c707f50732a22615a37f.zip
Improved whisper provider to not throttle when unsupported audio language is encountered. #2474
As we have noted before, bad input data should be no reason to throttle a provider. In this case, if the input language was not supported by whisper, we were raising a ValueError that was never caught and causing an error in the whisper provider for which it was throttled. Instead, we are now detecting this case and logging an error message. However, given that the input language was not one of the 99 currently known to whisper, it's probably a mislabeled audio track. If the user desired output language is English, then we will tell whisper that the input audio is also English and ask it to transcribe it. Whisper does a very good job of transcribing almost anything to English, so it's worth a try. This should address the throttling in issue #2474.
Diffstat (limited to 'custom_libs')
-rw-r--r--custom_libs/subliminal_patch/providers/whisperai.py18
1 files changed, 15 insertions, 3 deletions
diff --git a/custom_libs/subliminal_patch/providers/whisperai.py b/custom_libs/subliminal_patch/providers/whisperai.py
index dfd733da3..d427f8ad2 100644
--- a/custom_libs/subliminal_patch/providers/whisperai.py
+++ b/custom_libs/subliminal_patch/providers/whisperai.py
@@ -169,7 +169,7 @@ def whisper_get_language_reverse(alpha3):
lan = whisper_get_language(wl, whisper_languages[wl])
if lan.alpha3 == alpha3:
return wl
- raise ValueError
+ return None
def language_from_alpha3(lang):
name = Language(lang).name
@@ -317,7 +317,7 @@ class WhisperAIProvider(Provider):
if out == None:
logger.info(f"Whisper cannot process {subtitle.video.original_path} because of missing/bad audio track")
subtitle.content = None
- return
+ return
logger.debug(f'Audio stream length (in WAV format) is {len(out):,} bytes')
@@ -326,11 +326,23 @@ class WhisperAIProvider(Provider):
else:
output_language = "eng"
+ input_language = whisper_get_language_reverse(subtitle.audio_language)
+ if input_language is None:
+ if output_language == "eng":
+ # guess that audio track is mislabelled English and let whisper try to transcribe it
+ input_language = "en"
+ subtitle.task = "transcribe"
+ logger.info(f"Whisper treating unsupported audio track language: '{subtitle.audio_language}' as English")
+ else:
+ logger.info(f"Whisper cannot process {subtitle.video.original_path} because of unsupported audio track language: '{subtitle.audio_language}'")
+ subtitle.content = None
+ return
+
logger.info(f'Starting WhisperAI {subtitle.task} to {language_from_alpha3(output_language)} for {subtitle.video.original_path}')
startTime = time.time()
r = self.session.post(f"{self.endpoint}/asr",
- params={'task': subtitle.task, 'language': whisper_get_language_reverse(subtitle.audio_language), 'output': 'srt', 'encode': 'false'},
+ params={'task': subtitle.task, 'language': input_language, 'output': 'srt', 'encode': 'false'},
files={'audio_file': out},
timeout=(self.response, self.timeout))