diff options
author | morpheus65535 <[email protected]> | 2024-08-24 15:24:15 -0400 |
---|---|---|
committer | morpheus65535 <[email protected]> | 2024-08-24 15:24:15 -0400 |
commit | 609349b4002290e771935c7e6d02263c3fdd7ce4 (patch) | |
tree | bcb81e7be1cae984ae18c23d211b017362dd63ca | |
parent | 00c7eabd8c351c32a3fd00338eaf2653c1ce382e (diff) | |
download | bazarr-609349b4002290e771935c7e6d02263c3fdd7ce4.tar.gz bazarr-609349b4002290e771935c7e6d02263c3fdd7ce4.zip |
Refactored the translation routine to prevent Google Translate from messing with subtitles sequence by sending line by line (slower but better). #2558v1.4.4-beta.33
-rw-r--r-- | bazarr/subtitles/tools/translate.py | 71 |
1 files changed, 43 insertions, 28 deletions
diff --git a/bazarr/subtitles/tools/translate.py b/bazarr/subtitles/tools/translate.py index eaaa69182..534200e20 100644 --- a/bazarr/subtitles/tools/translate.py +++ b/bazarr/subtitles/tools/translate.py @@ -6,12 +6,16 @@ import pysubs2 from subliminal_patch.core import get_subtitle_path from subzero.language import Language from deep_translator import GoogleTranslator +from deep_translator.exceptions import TooManyRequests, RequestError, TranslationNotFound +from time import sleep +from concurrent.futures import ThreadPoolExecutor from languages.custom_lang import CustomLanguage from languages.get_languages import alpha3_from_alpha2, language_from_alpha2, language_from_alpha3 from radarr.history import history_log_movie from sonarr.history import history_log from subtitles.processing import ProcessSubtitlesResult +from app.event_handler import show_progress, hide_progress def translate_subtitles_file(video_path, source_srt_file, from_lang, to_lang, forced, hi, media_type, sonarr_series_id, @@ -33,8 +37,6 @@ def translate_subtitles_file(video_path, source_srt_file, from_lang, to_lang, fo logging.debug(f'BAZARR is translating in {lang_obj} this subtitles {source_srt_file}') - max_characters = 5000 - dest_srt_file = get_subtitle_path(video_path, language=lang_obj if isinstance(lang_obj, Language) else lang_obj.subzero_language(), extension='.srt', @@ -44,40 +46,53 @@ def translate_subtitles_file(video_path, source_srt_file, from_lang, to_lang, fo subs = pysubs2.load(source_srt_file, encoding='utf-8') subs.remove_miscellaneous_events() lines_list = [x.plaintext for x in subs] - joined_lines_str = '\n\n'.join(lines_list) - - logging.debug(f'BAZARR splitting subtitles into {max_characters} characters blocks') - lines_block_list = [] - translated_lines_list = [] - while len(joined_lines_str): - partial_lines_str = joined_lines_str[:max_characters] + lines_list_len = len(lines_list) - if len(joined_lines_str) > max_characters: - new_partial_lines_str = partial_lines_str.rsplit('\n\n', 1)[0] + def translate_line(id, line, attempt): + try: + translated_text = GoogleTranslator( + source='auto', + target=language_code_convert_dict.get(lang_obj.alpha2, lang_obj.alpha2) + ).translate(text=line) + except TooManyRequests: + if attempt <= 5: + sleep(1) + super(translate_line(id, line, attempt+1)) + else: + logging.debug(f'Too many requests while translating {line}') + translated_lines.append({'id': id, 'line': line}) + except (RequestError, TranslationNotFound): + logging.debug(f'Unable to translate line {line}') + translated_lines.append({'id': id, 'line': line}) else: - new_partial_lines_str = partial_lines_str + translated_lines.append({'id': id, 'line': translated_text}) + finally: + show_progress(id=f'translate_progress_{dest_srt_file}', + header=f'Translating subtitles lines to {language_from_alpha3(to_lang)}...', + name='', + value=len(translated_lines), + count=lines_list_len) - lines_block_list.append(new_partial_lines_str) - joined_lines_str = joined_lines_str.replace(new_partial_lines_str, '') + logging.debug(f'BAZARR is sending {lines_list_len} blocks to Google Translate') - logging.debug(f'BAZARR is sending {len(lines_block_list)} blocks to Google Translate') - for block_str in lines_block_list: - try: - translated_partial_srt_text = GoogleTranslator(source='auto', - target=language_code_convert_dict.get(lang_obj.alpha2, - lang_obj.alpha2) - ).translate(text=block_str) - except Exception: - logging.exception(f'BAZARR Unable to translate subtitles {source_srt_file}') - return False - else: - translated_partial_srt_list = translated_partial_srt_text.split('\n\n') - translated_lines_list += translated_partial_srt_list + pool = ThreadPoolExecutor(max_workers=10) + + translated_lines = [] + + for i, line in enumerate(lines_list): + pool.submit(translate_line, i, line, 1) + + pool.shutdown(wait=True) + + for i, line in enumerate(translated_lines): + lines_list[line['id']] = line['line'] + + hide_progress(id=f'translate_progress_{dest_srt_file}') logging.debug(f'BAZARR saving translated subtitles to {dest_srt_file}') for i, line in enumerate(subs): try: - line.plaintext = translated_lines_list[i] + line.plaintext = lines_list[i] except IndexError: logging.error(f'BAZARR is unable to translate malformed subtitles: {source_srt_file}') return False |