diff options
-rw-r--r-- | bazarr/app/get_providers.py | 3 | ||||
-rw-r--r-- | libs/subliminal_patch/providers/whisperai.py | 14 |
2 files changed, 11 insertions, 6 deletions
diff --git a/bazarr/app/get_providers.py b/bazarr/app/get_providers.py index 2ea396a1d..8b51a4e2d 100644 --- a/bazarr/app/get_providers.py +++ b/bazarr/app/get_providers.py @@ -254,7 +254,8 @@ def get_providers_auth(): }, 'whisperai': { 'endpoint': settings.whisperai.endpoint, - 'timeout': settings.whisperai.timeout + 'timeout': settings.whisperai.timeout, + 'ffmpeg_path': _FFMPEG_BINARY, } } diff --git a/libs/subliminal_patch/providers/whisperai.py b/libs/subliminal_patch/providers/whisperai.py index 0c816bb46..8aef88bc0 100644 --- a/libs/subliminal_patch/providers/whisperai.py +++ b/libs/subliminal_patch/providers/whisperai.py @@ -124,7 +124,7 @@ logger = logging.getLogger(__name__) @functools.lru_cache(2) -def encode_audio_stream(path, audio_stream_language=None): +def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None): logger.debug("Encoding audio stream to WAV with ffmpeg") try: @@ -135,7 +135,7 @@ def encode_audio_stream(path, audio_stream_language=None): inp = inp[f'a:m:language:{audio_stream_language}'] out, _ = inp.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000) \ - .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True) + .run(cmd=[ffmpeg_path, "-nostdin"], capture_stdout=True, capture_stderr=True) except ffmpeg.Error as e: raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e @@ -203,16 +203,20 @@ class WhisperAIProvider(Provider): video_types = (Episode, Movie) - def __init__(self, endpoint=None, timeout=None): + def __init__(self, endpoint=None, timeout=None, ffmpeg_path=None): if not endpoint: raise ConfigurationError('Whisper Web Service Endpoint must be provided') if not timeout: raise ConfigurationError('Whisper Web Service Timeout must be provided') + if not ffmpeg_path: + raise ConfigurationError("ffmpeg path must be provided") + self.endpoint = endpoint self.timeout = int(timeout) self.session = None + self.ffmpeg_path = ffmpeg_path def initialize(self): self.session = Session() @@ -224,7 +228,7 @@ class WhisperAIProvider(Provider): @functools.lru_cache(2048) def detect_language(self, path) -> Language: - out = encode_audio_stream(path) + out = encode_audio_stream(path, self.ffmpeg_path) r = self.session.post(f"{self.endpoint}/detect-language", params={'encode': 'false'}, @@ -281,7 +285,7 @@ class WhisperAIProvider(Provider): # Invoke Whisper through the API. This may take a long time depending on the file. # TODO: This loads the entire file into memory, find a good way to stream the file in chunks - out = encode_audio_stream(subtitle.video.original_path, subtitle.force_audio_stream) + out = encode_audio_stream(subtitle.video.original_path, self.ffmpeg_path, subtitle.force_audio_stream) r = self.session.post(f"{self.endpoint}/asr", params={'task': subtitle.task, 'language': whisper_get_language_reverse(subtitle.audio_language), 'output': 'srt', 'encode': 'false'}, |