diff options
author | JayZed <[email protected]> | 2024-04-29 22:11:47 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2024-04-29 22:11:47 -0400 |
commit | 5749971d67b7fa7932a8c707f50732a22615a37f (patch) | |
tree | 35b63d975fc2f62a60530cbba5689f97ae9b1da0 /custom_libs | |
parent | c5a5dc9ddf45ba6512825e667811a90665328f43 (diff) | |
download | bazarr-5749971d67b7fa7932a8c707f50732a22615a37f.tar.gz bazarr-5749971d67b7fa7932a8c707f50732a22615a37f.zip |
Improved whisper provider to not throttle when unsupported audio language is encountered. #2474
As we have noted before, bad input data should be no reason to throttle a provider.
In this case, if the input language was not supported by whisper, we were raising a ValueError that was never caught and causing an error in the whisper provider for which it was throttled.
Instead, we are now detecting this case and logging an error message.
However, given that the input language was not one of the 99 currently known to whisper, it's probably a mislabeled audio track. If the user desired output language is English, then we will tell whisper that the input audio is also English and ask it to transcribe it. Whisper does a very good job of transcribing almost anything to English, so it's worth a try.
This should address the throttling in issue #2474.
Diffstat (limited to 'custom_libs')
-rw-r--r-- | custom_libs/subliminal_patch/providers/whisperai.py | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/custom_libs/subliminal_patch/providers/whisperai.py b/custom_libs/subliminal_patch/providers/whisperai.py index dfd733da3..d427f8ad2 100644 --- a/custom_libs/subliminal_patch/providers/whisperai.py +++ b/custom_libs/subliminal_patch/providers/whisperai.py @@ -169,7 +169,7 @@ def whisper_get_language_reverse(alpha3): lan = whisper_get_language(wl, whisper_languages[wl]) if lan.alpha3 == alpha3: return wl - raise ValueError + return None def language_from_alpha3(lang): name = Language(lang).name @@ -317,7 +317,7 @@ class WhisperAIProvider(Provider): if out == None: logger.info(f"Whisper cannot process {subtitle.video.original_path} because of missing/bad audio track") subtitle.content = None - return + return logger.debug(f'Audio stream length (in WAV format) is {len(out):,} bytes') @@ -326,11 +326,23 @@ class WhisperAIProvider(Provider): else: output_language = "eng" + input_language = whisper_get_language_reverse(subtitle.audio_language) + if input_language is None: + if output_language == "eng": + # guess that audio track is mislabelled English and let whisper try to transcribe it + input_language = "en" + subtitle.task = "transcribe" + logger.info(f"Whisper treating unsupported audio track language: '{subtitle.audio_language}' as English") + else: + logger.info(f"Whisper cannot process {subtitle.video.original_path} because of unsupported audio track language: '{subtitle.audio_language}'") + subtitle.content = None + return + logger.info(f'Starting WhisperAI {subtitle.task} to {language_from_alpha3(output_language)} for {subtitle.video.original_path}') startTime = time.time() r = self.session.post(f"{self.endpoint}/asr", - params={'task': subtitle.task, 'language': whisper_get_language_reverse(subtitle.audio_language), 'output': 'srt', 'encode': 'false'}, + params={'task': subtitle.task, 'language': input_language, 'output': 'srt', 'encode': 'false'}, files={'audio_file': out}, timeout=(self.response, self.timeout)) |