diff options
author | morpheus65535 <[email protected]> | 2022-05-01 08:00:20 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2022-05-01 08:00:20 -0400 |
commit | 2f01ab852348669e81c3d19b3a12f5084b04fba8 (patch) | |
tree | dc2a55e4d943563bcaedba3d79fc23c54e5b9640 /libs/deep_translator | |
parent | 9be3674f3a08a64aa4d11d9a28cf79dbc14fc2e0 (diff) | |
download | bazarr-2f01ab852348669e81c3d19b3a12f5084b04fba8.tar.gz bazarr-2f01ab852348669e81c3d19b3a12f5084b04fba8.zip |
Reworked Bazarr file structure to improve support and optimizationv1.0.5-beta.0
Diffstat (limited to 'libs/deep_translator')
-rw-r--r-- | libs/deep_translator/__init__.py | 34 | ||||
-rw-r--r-- | libs/deep_translator/__main__.py | 9 | ||||
-rw-r--r-- | libs/deep_translator/base.py | 147 | ||||
-rw-r--r-- | libs/deep_translator/cli.py | 95 | ||||
-rw-r--r-- | libs/deep_translator/constants.py | 370 | ||||
-rw-r--r-- | libs/deep_translator/deepl.py | 136 | ||||
-rw-r--r-- | libs/deep_translator/detection.py | 46 | ||||
-rw-r--r-- | libs/deep_translator/engines.py | 6 | ||||
-rw-r--r-- | libs/deep_translator/exceptions.py | 48 | ||||
-rw-r--r-- | libs/deep_translator/google.py | 123 | ||||
-rw-r--r-- | libs/deep_translator/google_trans.py | 200 | ||||
-rw-r--r-- | libs/deep_translator/libre.py | 170 | ||||
-rw-r--r-- | libs/deep_translator/linguee.py | 112 | ||||
-rw-r--r-- | libs/deep_translator/main.py | 132 | ||||
-rw-r--r-- | libs/deep_translator/microsoft.py | 176 | ||||
-rw-r--r-- | libs/deep_translator/mymemory.py | 169 | ||||
-rw-r--r-- | libs/deep_translator/papago.py | 170 | ||||
-rw-r--r-- | libs/deep_translator/parent.py | 80 | ||||
-rw-r--r-- | libs/deep_translator/pons.py | 110 | ||||
-rw-r--r-- | libs/deep_translator/qcri.py | 64 | ||||
-rw-r--r-- | libs/deep_translator/validate.py | 22 | ||||
-rw-r--r-- | libs/deep_translator/yandex.py | 121 |
22 files changed, 1149 insertions, 1391 deletions
diff --git a/libs/deep_translator/__init__.py b/libs/deep_translator/__init__.py index 5b6100786..1a6b0ab2d 100644 --- a/libs/deep_translator/__init__.py +++ b/libs/deep_translator/__init__.py @@ -1,20 +1,20 @@ """Top-level package for Deep Translator""" -from .google_trans import GoogleTranslator -from .pons import PonsTranslator -from .linguee import LingueeTranslator -from .mymemory import MyMemoryTranslator -from .yandex import YandexTranslator -from .qcri import QCRI -from .deepl import DeepL -from .detection import single_detection, batch_detection -from .microsoft import MicrosoftTranslator -from .papago import PapagoTranslator -from .libre import LibreTranslator +from deep_translator.deepl import DeeplTranslator +from deep_translator.detection import batch_detection, single_detection +from deep_translator.google import GoogleTranslator +from deep_translator.libre import LibreTranslator +from deep_translator.linguee import LingueeTranslator +from deep_translator.microsoft import MicrosoftTranslator +from deep_translator.mymemory import MyMemoryTranslator +from deep_translator.papago import PapagoTranslator +from deep_translator.pons import PonsTranslator +from deep_translator.qcri import QcriTranslator +from deep_translator.yandex import YandexTranslator __author__ = """Nidhal Baccouri""" -__email__ = '[email protected]' -__version__ = '1.6.1' +__email__ = "[email protected]" +__version__ = "1.8.0" __all__ = [ "GoogleTranslator", @@ -23,10 +23,10 @@ __all__ = [ "MyMemoryTranslator", "YandexTranslator", "MicrosoftTranslator", - "QCRI", - "DeepL", + "QcriTranslator", + "DeeplTranslator", "LibreTranslator", "PapagoTranslator", "single_detection", - "batch_detection" - ] + "batch_detection", +] diff --git a/libs/deep_translator/__main__.py b/libs/deep_translator/__main__.py new file mode 100644 index 000000000..9a35ad5e5 --- /dev/null +++ b/libs/deep_translator/__main__.py @@ -0,0 +1,9 @@ +from deep_translator.cli import CLI + + +def main(): + CLI().run() + + +if __name__ == "__main__": + main() diff --git a/libs/deep_translator/base.py b/libs/deep_translator/base.py new file mode 100644 index 000000000..6dd7cc119 --- /dev/null +++ b/libs/deep_translator/base.py @@ -0,0 +1,147 @@ +"""base translator class""" + +from abc import ABC, abstractmethod +from typing import List, Optional, Union + +from deep_translator.constants import GOOGLE_LANGUAGES_TO_CODES +from deep_translator.exceptions import InvalidSourceOrTargetLanguage, LanguageNotSupportedException + + +class BaseTranslator(ABC): + """ + Abstract class that serve as a base translator for other different translators + """ + + def __init__( + self, + base_url: str, + languages: dict = GOOGLE_LANGUAGES_TO_CODES, + source: str = "auto", + target: str = "en", + payload_key: Optional[str] = None, + element_tag: Optional[str] = None, + element_query: Optional[dict] = None, + **url_params + ): + """ + @param source: source language to translate from + @param target: target language to translate to + """ + self._base_url = base_url + self._languages = languages + self._supported_languages = list(self._languages.keys()) + if not source: + raise InvalidSourceOrTargetLanguage(source) + if not target: + raise InvalidSourceOrTargetLanguage(target) + + self._source, self._target = self._map_language_to_code(source, target) + self._url_params = url_params + self._element_tag = element_tag + self._element_query = element_query + self.payload_key = payload_key + super().__init__() + + @property + def source(self): + return self._source + + @source.setter + def source(self, lang): + self._source = lang + + @property + def target(self): + return self._target + + @target.setter + def target(self, lang): + self._target = lang + + def _type(self): + return self.__class__.__name__ + + def _map_language_to_code(self, *languages): + """ + map language to its corresponding code (abbreviation) if the language was passed by its full name by the user + @param languages: list of languages + @return: mapped value of the language or raise an exception if the language is not supported + """ + for language in languages: + if language in self._languages.values() or language == "auto": + yield language + elif language in self._languages.keys(): + yield self._languages[language] + else: + raise LanguageNotSupportedException( + language, + message=f"No support for the provided language.\n" + f"Please select on of the supported languages:\n" + f"{self._languages}") + + def _same_source_target(self) -> bool: + return self._source == self._target + + def get_supported_languages( + self, as_dict: bool = False, **kwargs + ) -> Union[list, dict]: + """ + return the supported languages by the Google translator + @param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations + @return: list or dict + """ + return self._supported_languages if not as_dict else self._languages + + def is_language_supported(self, language: str, **kwargs) -> bool: + """ + check if the language is supported by the translator + @param language: a string for 1 language + @return: bool or raise an Exception + """ + if ( + language == "auto" + or language in self._languages.keys() + or language in self._languages.values() + ): + return True + else: + return False + + @abstractmethod + def translate(self, text: str, **kwargs) -> str: + """ + translate a text using a translator under the hood and return the translated text + @param text: text to translate + @param kwargs: additional arguments + @return: str + """ + return NotImplemented("You need to implement the translate method!") + + def _translate_file(self, path: str, **kwargs) -> str: + """ + translate directly from file + @param path: path to the target file + @type path: str + @param kwargs: additional args + @return: str + """ + try: + with open(path, "r", encoding="utf-8") as f: + text = f.read().strip() + return self.translate(text) + except Exception as e: + raise e + + def _translate_batch(self, batch: List[str], **kwargs) -> List[str]: + """ + translate a list of texts + @param batch: list of texts you want to translate + @return: list of translations + """ + if not batch: + raise Exception("Enter your text list that you want to translate") + arr = [] + for i, text in enumerate(batch): + translated = self.translate(text, **kwargs) + arr.append(translated) + return arr diff --git a/libs/deep_translator/cli.py b/libs/deep_translator/cli.py new file mode 100644 index 000000000..f3e0db9fd --- /dev/null +++ b/libs/deep_translator/cli.py @@ -0,0 +1,95 @@ +"""Console script for deep_translator.""" +import argparse +from typing import Optional + +from deep_translator.engines import __engines__ + + +class CLI(object): + translators_dict = __engines__ + translator = None + + def __init__(self, custom_args: Optional[list] = None): + self.custom_args = custom_args + self.args = self.parse_args() + translator_class = self.translators_dict.get(self.args.translator, None) + if not translator_class: + raise Exception( + f"Translator {self.args.translator} is not supported." + f"Supported translators: {list(self.translators_dict.keys())}" + ) + self.translator = translator_class( + source=self.args.source, target=self.args.target + ) + + def translate(self) -> None: + """ + function used to provide translations from the parsed terminal arguments + @return: None + """ + res = self.translator.translate(self.args.text) + print(f"Translation from {self.args.source} to {self.args.target}") + print("-" * 50) + print(f"Translation result: {res}") + + def get_supported_languages(self) -> None: + """ + function used to return the languages supported by the translator service from the parsed terminal arguments + @return: None + """ + + translator_supported_languages = self.translator.get_supported_languages( + as_dict=True + ) + print(f"Languages supported by '{self.args.translator}' are :\n") + print(translator_supported_languages) + + def parse_args(self) -> argparse.Namespace: + """ + function responsible for parsing terminal arguments and provide them for further use in the translation process + """ + parser = argparse.ArgumentParser( + add_help=True, + description="Official CLI for deep-translator", + usage="dt --help", + ) + + parser.add_argument( + "--translator", + "-trans", + default="google", + type=str, + help="name of the translator you want to use", + ) + parser.add_argument( + "--source", + "-src", + default="auto", + type=str, + help="source language to translate from", + ) + parser.add_argument( + "--target", "-tg", type=str, help="target language to translate to" + ) + parser.add_argument( + "--text", "-txt", type=str, help="text you want to translate" + ) + parser.add_argument( + "--languages", + "-lang", + action="store_true", + help="all the languages available with the translator" + "Run the command deep_translator -trans <translator service> -lang", + ) + parsed_args = ( + parser.parse_args(self.custom_args) + if self.custom_args + else parser.parse_args() + ) + return parsed_args + + def run(self) -> None: + if self.args.languages: + self.get_supported_languages() + else: + self.translate() diff --git a/libs/deep_translator/constants.py b/libs/deep_translator/constants.py index dca6ed41a..d17541a53 100644 --- a/libs/deep_translator/constants.py +++ b/libs/deep_translator/constants.py @@ -1,5 +1,3 @@ -import requests - BASE_URLS = { "GOOGLE_TRANSLATE": "https://translate.google.com/m", "PONS": "https://en.pons.com/translate/", @@ -8,7 +6,7 @@ BASE_URLS = { "MYMEMORY": "http://api.mymemory.translated.net/get", "QCRI": "https://mt.qcri.org/api/v1/{endpoint}?", "DEEPL": "https://api.deepl.com/{version}/", - "DEEPL_FREE": "https://api-free.deepl.com/v2/", + "DEEPL_FREE": "https://api-free.deepl.com/{version}/", "MICROSOFT_TRANSLATE": "https://api.cognitive.microsofttranslator.com/translate?api-version=3.0", "PAPAGO": "https://papago.naver.com/", "PAPAGO_API": "https://openapi.naver.com/v1/papago/n2mt", @@ -16,155 +14,143 @@ BASE_URLS = { "LIBRE_FREE": "https://libretranslate.de/", } -GOOGLE_CODES_TO_LANGUAGES = { - 'af': 'afrikaans', - 'sq': 'albanian', - 'am': 'amharic', - 'ar': 'arabic', - 'hy': 'armenian', - 'az': 'azerbaijani', - 'eu': 'basque', - 'be': 'belarusian', - 'bn': 'bengali', - 'bs': 'bosnian', - 'bg': 'bulgarian', - 'ca': 'catalan', - 'ceb': 'cebuano', - 'ny': 'chichewa', - 'zh-CN': 'chinese (simplified)', - 'zh-TW': 'chinese (traditional)', - 'co': 'corsican', - 'hr': 'croatian', - 'cs': 'czech', - 'da': 'danish', - 'nl': 'dutch', - 'en': 'english', - 'eo': 'esperanto', - 'et': 'estonian', - 'tl': 'filipino', - 'fi': 'finnish', - 'fr': 'french', - 'fy': 'frisian', - 'gl': 'galician', - 'ka': 'georgian', - 'de': 'german', - 'el': 'greek', - 'gu': 'gujarati', - 'ht': 'haitian creole', - 'ha': 'hausa', - 'haw': 'hawaiian', - 'iw': 'hebrew', - 'hi': 'hindi', - 'hmn': 'hmong', - 'hu': 'hungarian', - 'is': 'icelandic', - 'ig': 'igbo', - 'id': 'indonesian', - 'ga': 'irish', - 'it': 'italian', - 'ja': 'japanese', - 'jw': 'javanese', - 'kn': 'kannada', - 'kk': 'kazakh', - 'km': 'khmer', - 'rw': 'kinyarwanda', - 'ko': 'korean', - 'ku': 'kurdish', - 'ky': 'kyrgyz', - 'lo': 'lao', - 'la': 'latin', - 'lv': 'latvian', - 'lt': 'lithuanian', - 'lb': 'luxembourgish', - 'mk': 'macedonian', - 'mg': 'malagasy', - 'ms': 'malay', - 'ml': 'malayalam', - 'mt': 'maltese', - 'mi': 'maori', - 'mr': 'marathi', - 'mn': 'mongolian', - 'my': 'myanmar', - 'ne': 'nepali', - 'no': 'norwegian', - 'or': 'odia', - 'ps': 'pashto', - 'fa': 'persian', - 'pl': 'polish', - 'pt': 'portuguese', - 'pa': 'punjabi', - 'ro': 'romanian', - 'ru': 'russian', - 'sm': 'samoan', - 'gd': 'scots gaelic', - 'sr': 'serbian', - 'st': 'sesotho', - 'sn': 'shona', - 'sd': 'sindhi', - 'si': 'sinhala', - 'sk': 'slovak', - 'sl': 'slovenian', - 'so': 'somali', - 'es': 'spanish', - 'su': 'sundanese', - 'sw': 'swahili', - 'sv': 'swedish', - 'tg': 'tajik', - 'ta': 'tamil', - 'tt': 'tatar', - 'te': 'telugu', - 'th': 'thai', - 'tr': 'turkish', - 'tk': 'turkmen', - 'uk': 'ukrainian', - 'ur': 'urdu', - 'ug': 'uyghur', - 'uz': 'uzbek', - 'vi': 'vietnamese', - 'cy': 'welsh', - 'xh': 'xhosa', - 'yi': 'yiddish', - 'yo': 'yoruba', - 'zu': 'zulu', -} - -GOOGLE_LANGUAGES_TO_CODES = {v: k for k, v in GOOGLE_CODES_TO_LANGUAGES.items()} - -# This dictionary maps the primary name of language to its secondary names in list manner (if any) -GOOGLE_LANGUAGES_SECONDARY_NAMES = { - 'myanmar': ['burmese'], - 'odia': ['oriya'], - 'kurdish': ['kurmanji'] +GOOGLE_LANGUAGES_TO_CODES = { + "afrikaans": "af", + "albanian": "sq", + "amharic": "am", + "arabic": "ar", + "armenian": "hy", + "azerbaijani": "az", + "basque": "eu", + "belarusian": "be", + "bengali": "bn", + "bosnian": "bs", + "bulgarian": "bg", + "catalan": "ca", + "cebuano": "ceb", + "chichewa": "ny", + "chinese (simplified)": "zh-CN", + "chinese (traditional)": "zh-TW", + "corsican": "co", + "croatian": "hr", + "czech": "cs", + "danish": "da", + "dutch": "nl", + "english": "en", + "esperanto": "eo", + "estonian": "et", + "filipino": "tl", + "finnish": "fi", + "french": "fr", + "frisian": "fy", + "galician": "gl", + "georgian": "ka", + "german": "de", + "greek": "el", + "gujarati": "gu", + "haitian creole": "ht", + "hausa": "ha", + "hawaiian": "haw", + "hebrew": "iw", + "hindi": "hi", + "hmong": "hmn", + "hungarian": "hu", + "icelandic": "is", + "igbo": "ig", + "indonesian": "id", + "irish": "ga", + "italian": "it", + "japanese": "ja", + "javanese": "jw", + "kannada": "kn", + "kazakh": "kk", + "khmer": "km", + "kinyarwanda": "rw", + "korean": "ko", + "kurdish": "ku", + "kyrgyz": "ky", + "lao": "lo", + "latin": "la", + "latvian": "lv", + "lithuanian": "lt", + "luxembourgish": "lb", + "macedonian": "mk", + "malagasy": "mg", + "malay": "ms", + "malayalam": "ml", + "maltese": "mt", + "maori": "mi", + "marathi": "mr", + "mongolian": "mn", + "myanmar": "my", + "nepali": "ne", + "norwegian": "no", + "odia": "or", + "pashto": "ps", + "persian": "fa", + "polish": "pl", + "portuguese": "pt", + "punjabi": "pa", + "romanian": "ro", + "russian": "ru", + "samoan": "sm", + "scots gaelic": "gd", + "serbian": "sr", + "sesotho": "st", + "shona": "sn", + "sindhi": "sd", + "sinhala": "si", + "slovak": "sk", + "slovenian": "sl", + "somali": "so", + "spanish": "es", + "sundanese": "su", + "swahili": "sw", + "swedish": "sv", + "tajik": "tg", + "tamil": "ta", + "tatar": "tt", + "telugu": "te", + "thai": "th", + "turkish": "tr", + "turkmen": "tk", + "ukrainian": "uk", + "urdu": "ur", + "uyghur": "ug", + "uzbek": "uz", + "vietnamese": "vi", + "welsh": "cy", + "xhosa": "xh", + "yiddish": "yi", + "yoruba": "yo", + "zulu": "zu", } - PONS_CODES_TO_LANGUAGES = { - 'ar': 'arabic', - 'bg': 'bulgarian', - 'zh-cn': 'chinese', - 'cs': 'czech', - 'da': 'danish', - 'nl': 'dutch', - 'en': 'english', - 'fr': 'french', - 'de': 'german', - 'el': 'greek', - 'hu': 'hungarian', - 'it': 'italian', - 'la': 'latin', - 'no': 'norwegian', - 'pl': 'polish', - 'pt': 'portuguese', - 'ru': 'russian', - 'sl': 'slovenian', - 'es': 'spanish', - 'sv': 'swedish', - 'tr': 'turkish', - 'elv': 'elvish' + "ar": "arabic", + "bg": "bulgarian", + "zh-cn": "chinese", + "cs": "czech", + "da": "danish", + "nl": "dutch", + "en": "english", + "fr": "french", + "de": "german", + "el": "greek", + "hu": "hungarian", + "it": "italian", + "la": "latin", + "no": "norwegian", + "pl": "polish", + "pt": "portuguese", + "ru": "russian", + "sl": "slovenian", + "es": "spanish", + "sv": "swedish", + "tr": "turkish", + "elv": "elvish", } -PONS_LANGUAGES_TO_CODES = {v: k for k, v in PONS_CODES_TO_LANGUAGES.items()} - LINGUEE_LANGUAGES_TO_CODES = { "maltese": "mt", "english": "en", @@ -192,21 +178,9 @@ LINGUEE_LANGUAGES_TO_CODES = { "swedish": "sv", "latvian": "lv", "estonian": "et", - "japanese": "ja" + "japanese": "ja", } -LINGUEE_CODE_TO_LANGUAGE = {v: k for k, v in LINGUEE_LANGUAGES_TO_CODES.items()} - -# "72e9e2cc7c992db4dcbdd6fb9f91a0d1" - -# obtaining the current list of supported Microsoft languages for translation - -microsoft_languages_api_url = "https://api.cognitive.microsofttranslator.com/languages?api-version=3.0&scope=translation" -microsoft_languages_response = requests.get(microsoft_languages_api_url) -translation_dict = microsoft_languages_response.json()['translation'] - -MICROSOFT_CODES_TO_LANGUAGES = {translation_dict[k]['name'].lower(): k for k in translation_dict.keys()} - DEEPL_LANGUAGE_TO_CODE = { "bulgarian": "bg", "czech": "cs", @@ -231,56 +205,40 @@ DEEPL_LANGUAGE_TO_CODE = { "slovak": "sk", "slovenian": "sl", "swedish": "sv", - "chinese": "zh" -} - -DEEPL_CODE_TO_LANGUAGE = {v: k for k, v in DEEPL_LANGUAGE_TO_CODE.items()} - -PAPAGO_CODE_TO_LANGUAGE = { - 'ko': 'Korean', - 'en': 'English', - 'ja': 'Japanese', - 'zh-CN': 'Chinese', - 'zh-TW': 'Chinese traditional', - 'es': 'Spanish', - 'fr': 'French', - 'vi': 'Vietnamese', - 'th': 'Thai', - 'id': 'Indonesia' -} - -PAPAGO_LANGUAGE_TO_CODE = {v: k for v, k in PAPAGO_CODE_TO_LANGUAGE.items()} - -QCRI_CODE_TO_LANGUAGE = { - 'ar': 'Arabic', - 'en': 'English', - 'es': 'Spanish' + "chinese": "zh", } -QCRI_LANGUAGE_TO_CODE = { - v: k for k, v in QCRI_CODE_TO_LANGUAGE.items() +PAPAGO_LANGUAGE_TO_CODE = { + "ko": "Korean", + "en": "English", + "ja": "Japanese", + "zh-CN": "Chinese", + "zh-TW": "Chinese traditional", + "es": "Spanish", + "fr": "French", + "vi": "Vietnamese", + "th": "Thai", + "id": "Indonesia", } -LIBRE_CODES_TO_LANGUAGES = { - 'en': 'English', - 'ar': 'Arabic', - 'zh': 'Chinese', - 'fr': 'French', - 'de': 'German', - 'hi': 'Hindi', - 'id': 'Indonesian', - 'ga': 'Irish', - 'it': 'Italian', - 'ja': 'Japanese', - 'ko': 'Korean', - 'pl': 'Polish', - 'pt': 'Portuguese', - 'ru': 'Russian', - 'es': 'Spanish', - 'tr': 'Turkish', - 'vi': 'Vietnamese' -} +QCRI_LANGUAGE_TO_CODE = {"Arabic": "ar", "English": "en", "Spanish": "es"} LIBRE_LANGUAGES_TO_CODES = { - v: k for k, v in LIBRE_CODES_TO_LANGUAGES.items() + "English": "en", + "Arabic": "ar", + "Chinese": "zh", + "French": "fr", + "German": "de", + "Hindi": "hi", + "Indonesian": "id", + "Irish": "ga", + "Italian": "it", + "Japanese": "ja", + "Korean": "ko", + "Polish": "pl", + "Portuguese": "pt", + "Russian": "ru", + "Spanish": "es", + "Turkish": "tr", + "Vietnamese": "vi", } diff --git a/libs/deep_translator/deepl.py b/libs/deep_translator/deepl.py index 1a9f774d5..0ab7d9cd6 100644 --- a/libs/deep_translator/deepl.py +++ b/libs/deep_translator/deepl.py @@ -1,89 +1,101 @@ +from typing import List, Optional + import requests -from .constants import BASE_URLS, DEEPL_LANGUAGE_TO_CODE -from .exceptions import (ServerException, - TranslationNotFound, - LanguageNotSupportedException, - AuthorizationException) + +from deep_translator.base import BaseTranslator +from deep_translator.constants import BASE_URLS, DEEPL_LANGUAGE_TO_CODE +from deep_translator.exceptions import ( + AuthorizationException, + ServerException, + TranslationNotFound, +) +from deep_translator.validate import is_empty, is_input_valid -class DeepL(object): +class DeeplTranslator(BaseTranslator): """ - class that wraps functions, which use the DeepL translator under the hood to translate word(s) + class that wraps functions, which use the DeeplTranslator translator under the hood to translate word(s) """ - _languages = DEEPL_LANGUAGE_TO_CODE - def __init__(self, api_key=None, source="en", target="en", use_free_api=True, **kwargs): + def __init__( + self, + api_key: Optional[str] = None, + source: str = "de", + target: str = "en", + use_free_api: bool = True, + **kwargs + ): """ - @param api_key: your DeepL api key. + @param api_key: your DeeplTranslator api key. Get one here: https://www.deepl.com/docs-api/accessing-the-api/ @param source: source language @param target: target language """ if not api_key: raise ServerException(401) - self.version = 'v2' + self.version = "v2" self.api_key = api_key - self.source = self._map_language_to_code(source) - self.target = self._map_language_to_code(target) - if use_free_api: - self.__base_url = BASE_URLS.get("DEEPL_FREE").format(version=self.version) - else: - self.__base_url = BASE_URLS.get("DEEPL").format(version=self.version) + url = ( + BASE_URLS.get("DEEPL_FREE").format(version=self.version) + if use_free_api + else BASE_URLS.get("DEEPL").format(version=self.version) + ) + super().__init__( + base_url=url, + source=source, + target=target, + languages=DEEPL_LANGUAGE_TO_CODE, + **kwargs + ) - def translate(self, text, **kwargs): + def translate(self, text: str, **kwargs) -> str: """ @param text: text to translate @return: translated text """ - # Create the request parameters. - translate_endpoint = 'translate' - params = { - "auth_key": self.api_key, - "source_lang": self.source, - "target_lang": self.target, - "text": text - } - # Do the request and check the connection. - try: - response = requests.get(self.__base_url + translate_endpoint, params=params) - except ConnectionError: - raise ServerException(503) - # If the answer is not success, raise server exception. - if response.status_code == 403: - raise AuthorizationException(self.api_key) - elif response.status_code != 200: - raise ServerException(response.status_code) - # Get the response and check is not empty. - res = response.json() - if not res: - raise TranslationNotFound(text) - # Process and return the response. - return res['translations'][0]['text'] + if is_input_valid(text): + if self._same_source_target() or is_empty(text): + return text + + # Create the request parameters. + translate_endpoint = "translate" + params = { + "auth_key": self.api_key, + "source_lang": self._source, + "target_lang": self._target, + "text": text, + } + # Do the request and check the connection. + try: + response = requests.get( + self._base_url + translate_endpoint, params=params + ) + except ConnectionError: + raise ServerException(503) + # If the answer is not success, raise server exception. + if response.status_code == 403: + raise AuthorizationException(self.api_key) + elif response.status_code != 200: + raise ServerException(response.status_code) + # Get the response and check is not empty. + res = response.json() + if not res: + raise TranslationNotFound(text) + # Process and return the response. + return res["translations"][0]["text"] + + def translate_file(self, path: str, **kwargs) -> str: + return self._translate_file(path, **kwargs) - def translate_batch(self, batch, **kwargs): + def translate_batch(self, batch: List[str], **kwargs) -> List[str]: """ @param batch: list of texts to translate @return: list of translations """ - return [self.translate(text, **kwargs) for text in batch] - - @staticmethod - def get_supported_languages(as_dict=False, **kwargs): - return [*DeepL._languages.keys()] if not as_dict else DeepL._languages - - def _is_language_supported(self, lang, **kwargs): - # The language is supported when is in the dicionary. - return lang == 'auto' or lang in self._languages.keys() or lang in self._languages.values() - - def _map_language_to_code(self, lang, **kwargs): - if lang in self._languages.keys(): - return self._languages[lang] - elif lang in self._languages.values(): - return lang - raise LanguageNotSupportedException(lang) + return self._translate_batch(batch, **kwargs) -if __name__ == '__main__': - d = DeepL(target="de") - t = d.translate("I have no idea") +if __name__ == "__main__": + d = DeeplTranslator(target="en", api_key="some-key") + t = d.translate("Ich habe keine ahnung") print("text: ", t) diff --git a/libs/deep_translator/detection.py b/libs/deep_translator/detection.py index c8581b5cc..4759d4462 100644 --- a/libs/deep_translator/detection.py +++ b/libs/deep_translator/detection.py @@ -1,13 +1,22 @@ """ language detection API """ +from typing import List, Optional, Union + import requests from requests.exceptions import HTTPError # Module global config -config = {"url": 'https://ws.detectlanguage.com/0.2/detect',"headers": {'User-Agent': 'Detect Language API Python Client 1.4.0','Authorization': 'Bearer {}',}} +config = { + "url": "https://ws.detectlanguage.com/0.2/detect", + "headers": { + "User-Agent": "Detect Language API Python Client 1.4.0", + "Authorization": "Bearer {}", + }, +} + -def get_request_body(text, api_key, *args, **kwargs): +def get_request_body(text: Union[str, List[str]], api_key: str, *args, **kwargs): """ send a request and return the response body parsed as dictionary @@ -18,20 +27,20 @@ def get_request_body(text, api_key, *args, **kwargs): """ if not api_key: - raise Exception("you need to get an API_KEY for this to work. " - "Get one for free here: https://detectlanguage.com/documentation") + raise Exception( + "you need to get an API_KEY for this to work. " + "Get one for free here: https://detectlanguage.com/documentation" + ) if not text: raise Exception("Please provide an input text") else: try: - headers = config['headers'] - headers['Authorization'] = headers['Authorization'].format(api_key) - response = requests.post(config['url'], - json={'q': text}, - headers=headers) + headers = config["headers"] + headers["Authorization"] = headers["Authorization"].format(api_key) + response = requests.post(config["url"], json={"q": text}, headers=headers) - body = response.json().get('data') + body = response.json().get("data") return body except HTTPError as e: @@ -39,7 +48,9 @@ def get_request_body(text, api_key, *args, **kwargs): raise e -def single_detection(text, api_key=None, detailed=False, *args, **kwargs): +def single_detection( + text: str, api_key: Optional[str] = None, detailed: bool = False, *args, **kwargs +): """ function responsible for detecting the language from a text @@ -50,16 +61,18 @@ def single_detection(text, api_key=None, detailed=False, *args, **kwargs): @param detailed: set to True if you want to get detailed information about the detection process """ body = get_request_body(text, api_key) - detections = body.get('detections') + detections = body.get("detections") if detailed: return detections[0] - lang = detections[0].get('language', None) + lang = detections[0].get("language", None) if lang: return lang -def batch_detection(text_list, api_key, detailed=False, *args, **kwargs): +def batch_detection( + text_list: List[str], api_key: str, detailed: bool = False, *args, **kwargs +): """ function responsible for detecting the language from a text @@ -68,10 +81,9 @@ def batch_detection(text_list, api_key, detailed=False, *args, **kwargs): @param detailed: set to True if you want to get detailed information about the detection process """ body = get_request_body(text_list, api_key) - detections = body.get('detections') + detections = body.get("detections") res = [obj[0] for obj in detections] if detailed: return res else: - return [obj['language'] for obj in res] - + return [obj["language"] for obj in res] diff --git a/libs/deep_translator/engines.py b/libs/deep_translator/engines.py new file mode 100644 index 000000000..fe28b8b33 --- /dev/null +++ b/libs/deep_translator/engines.py @@ -0,0 +1,6 @@ +from deep_translator.base import BaseTranslator + +__engines__ = { + translator.__name__.replace("Translator", "").lower(): translator + for translator in BaseTranslator.__subclasses__() +} diff --git a/libs/deep_translator/exceptions.py b/libs/deep_translator/exceptions.py index c2e174b07..418faaa49 100644 --- a/libs/deep_translator/exceptions.py +++ b/libs/deep_translator/exceptions.py @@ -30,9 +30,11 @@ class NotValidPayload(BaseError): exception thrown if the user enters an invalid payload """ - def __init__(self, - val, - message='text must be a valid text with maximum 5000 character, otherwise it cannot be translated'): + def __init__( + self, + val, + message="text must be a valid text with maximum 5000 character, otherwise it cannot be translated", + ): super(NotValidPayload, self).__init__(val, message) @@ -41,21 +43,20 @@ class InvalidSourceOrTargetLanguage(BaseError): exception thrown if the user enters an invalid payload """ - def __init__(self, - val, - message="source and target language can't be the same"): + def __init__(self, val, message="Invalid source or target language!"): super(InvalidSourceOrTargetLanguage, self).__init__(val, message) - class TranslationNotFound(BaseError): """ exception thrown if no translation was found for the text provided by the user """ - def __init__(self, - val, - message='No translation was found using the current translator. Try another translator?'): + def __init__( + self, + val, + message="No translation was found using the current translator. Try another translator?", + ): super(TranslationNotFound, self).__init__(val, message) @@ -64,9 +65,9 @@ class ElementNotFoundInGetRequest(BaseError): exception thrown if the html element was not found in the body parsed by beautifulsoup """ - def __init__(self, - val, - message='Required element was not found in the API response'): + def __init__( + self, val, message="Required element was not found in the API response" + ): super(ElementNotFoundInGetRequest, self).__init__(val, message) @@ -76,7 +77,9 @@ class NotValidLength(BaseError): """ def __init__(self, val, min_chars, max_chars): - message = "Text length need to be between {} and {} characters".format(min_chars, max_chars) + message = ( + f"Text length need to be between {min_chars} and {max_chars} characters" + ) super(NotValidLength, self).__init__(val, message) @@ -85,8 +88,11 @@ class RequestError(Exception): exception thrown if an error occurred during the request call, e.g a connection problem. """ - def __init__(self, message="Request exception can happen due to an api connection error. " - "Please check your connection and try again"): + def __init__( + self, + message="Request exception can happen due to an api connection error. " + "Please check your connection and try again", + ): self.message = message def __str__(self): @@ -100,7 +106,7 @@ class MicrosoftAPIerror(Exception): def __init__(self, api_message): self.api_message = str(api_message) - self.message="Microsoft API returned the following error" + self.message = "Microsoft API returned the following error" def __str__(self): return "{}: {}".format(self.message, self.api_message) @@ -111,7 +117,10 @@ class TooManyRequests(Exception): exception thrown if an error occurred during the request call, e.g a connection problem. """ - def __init__(self, message="Server Error: You made too many requests to the server. According to google, you are allowed to make 5 requests per second and up to 200k requests per day. You can wait and try again later or you can try the translate_batch function"): + def __init__( + self, + message="Server Error: You made too many requests to the server. According to google, you are allowed to make 5 requests per second and up to 200k requests per day. You can wait and try again later or you can try the translate_batch function", + ): self.message = message def __str__(self): @@ -122,6 +131,7 @@ class ServerException(Exception): """ Default YandexTranslate exception from the official website """ + errors = { 400: "ERR_BAD_REQUEST", 401: "ERR_KEY_INVALID", @@ -143,5 +153,5 @@ class ServerException(Exception): class AuthorizationException(Exception): def __init__(self, api_key, *args): - msg = 'Unauthorized access with the api key ' + api_key + msg = "Unauthorized access with the api key " + api_key super().__init__(msg, *args) diff --git a/libs/deep_translator/google.py b/libs/deep_translator/google.py new file mode 100644 index 000000000..d7d6a6967 --- /dev/null +++ b/libs/deep_translator/google.py @@ -0,0 +1,123 @@ +""" +google translator API +""" + +from typing import List, Optional + +import requests +from bs4 import BeautifulSoup + +from deep_translator.base import BaseTranslator +from deep_translator.constants import BASE_URLS +from deep_translator.exceptions import ( + RequestError, + TooManyRequests, + TranslationNotFound, +) +from deep_translator.validate import is_empty, is_input_valid + + +class GoogleTranslator(BaseTranslator): + """ + class that wraps functions, which use Google Translate under the hood to translate text(s) + """ + + def __init__( + self, + source: str = "auto", + target: str = "en", + proxies: Optional[dict] = None, + **kwargs + ): + """ + @param source: source language to translate from + @param target: target language to translate to + """ + self.proxies = proxies + super().__init__( + base_url=BASE_URLS.get("GOOGLE_TRANSLATE"), + source=source, + target=target, + element_tag="div", + element_query={"class": "t0"}, + payload_key="q", # key of text in the url + **kwargs + ) + + self._alt_element_query = {"class": "result-container"} + + def translate(self, text: str, **kwargs) -> str: + """ + function to translate a text + @param text: desired text to translate + @return: str: translated text + """ + if is_input_valid(text): + text = text.strip() + if self._same_source_target() or is_empty(text): + return text + self._url_params["tl"] = self._target + self._url_params["sl"] = self._source + + if self.payload_key: + self._url_params[self.payload_key] = text + + response = requests.get( + self._base_url, params=self._url_params, proxies=self.proxies + ) + if response.status_code == 429: + raise TooManyRequests() + + if response.status_code != 200: + raise RequestError() + + soup = BeautifulSoup(response.text, "html.parser") + + element = soup.find(self._element_tag, self._element_query) + + if not element: + element = soup.find(self._element_tag, self._alt_element_query) + if not element: + raise TranslationNotFound(text) + if element.get_text(strip=True) == text.strip(): + to_translate_alpha = "".join(ch for ch in text.strip() if ch.isalnum()) + translated_alpha = "".join( + ch for ch in element.get_text(strip=True) if ch.isalnum() + ) + if ( + to_translate_alpha + and translated_alpha + and to_translate_alpha == translated_alpha + ): + self._url_params["tl"] = self._target + if "hl" not in self._url_params: + return text.strip() + del self._url_params["hl"] + return self.translate(text) + + else: + return element.get_text(strip=True) + + def translate_file(self, path: str, **kwargs) -> str: + """ + translate directly from file + @param path: path to the target file + @type path: str + @param kwargs: additional args + @return: str + """ + return self._translate_file(path, **kwargs) + + def translate_batch(self, batch: List[str], **kwargs) -> List[str]: + """ + translate a list of texts + @param batch: list of texts you want to translate + @return: list of translations + """ + return self._translate_batch(batch, **kwargs) + + +if __name__ == "__main__": + trans = GoogleTranslator(source='auto', target='zh-CN') + res = trans.translate("good") + print("translation: ", res) diff --git a/libs/deep_translator/google_trans.py b/libs/deep_translator/google_trans.py deleted file mode 100644 index 3c8c42b14..000000000 --- a/libs/deep_translator/google_trans.py +++ /dev/null @@ -1,200 +0,0 @@ -""" -google translator API -""" - -from .constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES, GOOGLE_LANGUAGES_SECONDARY_NAMES -from .exceptions import TooManyRequests, LanguageNotSupportedException, TranslationNotFound, NotValidPayload, RequestError -from .parent import BaseTranslator -from bs4 import BeautifulSoup -import requests -from time import sleep -import warnings -import logging - - -class GoogleTranslator(BaseTranslator): - """ - class that wraps functions, which use google translate under the hood to translate text(s) - """ - _languages = GOOGLE_LANGUAGES_TO_CODES - supported_languages = list(_languages.keys()) - - def __init__(self, source="auto", target="en", proxies=None, **kwargs): - """ - @param source: source language to translate from - @param target: target language to translate to - """ - self.__base_url = BASE_URLS.get("GOOGLE_TRANSLATE") - self.proxies = proxies - - # code snipppet that converts the language into lower-case and skip lower-case conversion for abbreviations - # since abbreviations like zh-CN if converted to lower-case will result into error - ####################################### - source_lower = source - target_lower = target - if not source in self._languages.values(): - source_lower=source.lower() - if not target in self._languages.values(): - target_lower=target.lower() - ####################################### - - if self.is_language_supported(source_lower, target_lower): - self._source, self._target = self._map_language_to_code(source_lower, target_lower) - - super(GoogleTranslator, self).__init__(base_url=self.__base_url, - source=self._source, - target=self._target, - element_tag='div', - element_query={"class": "t0"}, - payload_key='q', # key of text in the url - tl=self._target, - sl=self._source, - **kwargs) - - self._alt_element_query = {"class": "result-container"} - - @staticmethod - def get_supported_languages(as_dict=False, **kwargs): - """ - return the supported languages by the google translator - @param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations - @return: list or dict - """ - return GoogleTranslator.supported_languages if not as_dict else GoogleTranslator._languages - - def is_secondary(self, lang): - """ - Function to check if lang is a secondary name of any primary language - @param lang: language name - @return: primary name of a language if found otherwise False - """ - for primary_name, secondary_names in GOOGLE_LANGUAGES_SECONDARY_NAMES.items(): - if lang in secondary_names: - return primary_name - return False - - def _map_language_to_code(self, *languages): - """ - map language to its corresponding code (abbreviation) if the language was passed by its full name by the user - @param languages: list of languages - @return: mapped value of the language or raise an exception if the language is not supported - """ - for language in languages: - if language in self._languages.values() or language == 'auto': - yield language - elif language in self._languages.keys(): - yield self._languages[language] - else: - yield self._languages[self.is_secondary(language)] - - def is_language_supported(self, *languages): - """ - check if the language is supported by the translator - @param languages: list of languages - @return: bool or raise an Exception - """ - for lang in languages: - if lang != 'auto' and lang not in self._languages.keys(): - if lang != 'auto' and lang not in self._languages.values(): - if not self.is_secondary(lang): - raise LanguageNotSupportedException(lang) - return True - - def translate(self, text, **kwargs): - """ - function that uses google translate to translate a text - @param text: desired text to translate - @return: str: translated text - """ - - if self._validate_payload(text): - text = text.strip() - - if self.payload_key: - self._url_params[self.payload_key] = text - - response = requests.get(self.__base_url, - params=self._url_params, - proxies=self.proxies) - if response.status_code == 429: - raise TooManyRequests() - - if response.status_code != 200: - raise RequestError() - - soup = BeautifulSoup(response.text, 'html.parser') - - element = soup.find(self._element_tag, self._element_query) - - if not element: - element = soup.find(self._element_tag, self._alt_element_query) - if not element: - raise TranslationNotFound(text) - if element.get_text(strip=True) == text.strip(): - to_translate_alpha = ''.join(ch for ch in text.strip() if ch.isalnum()) - translated_alpha = ''.join(ch for ch in element.get_text(strip=True) if ch.isalnum()) - if to_translate_alpha and translated_alpha and to_translate_alpha == translated_alpha: - self._url_params["tl"] = self._target - if "hl" not in self._url_params: - return text.strip() - del self._url_params["hl"] - return self.translate(text) - - else: - return element.get_text(strip=True) - - def translate_file(self, path, **kwargs): - """ - translate directly from file - @param path: path to the target file - @type path: str - @param kwargs: additional args - @return: str - """ - try: - with open(path, 'r', encoding='utf-8') as f: - text = f.read().strip() - return self.translate(text) - except Exception as e: - raise e - - def translate_sentences(self, sentences=None, **kwargs): - """ - translate many sentences together. This makes sense if you have sentences with different languages - and you want to translate all to unified language. This is handy because it detects - automatically the language of each sentence and then translate it. - - @param sentences: list of sentences to translate - @return: list of all translated sentences - """ - warnings.warn("deprecated. Use the translate_batch function instead", DeprecationWarning, stacklevel=2) - logging.warning("deprecated. Use the translate_batch function instead") - if not sentences: - raise NotValidPayload(sentences) - - translated_sentences = [] - try: - for sentence in sentences: - translated = self.translate(text=sentence) - translated_sentences.append(translated) - - return translated_sentences - - except Exception as e: - raise e - - def translate_batch(self, batch=None, **kwargs): - """ - translate a list of texts - @param batch: list of texts you want to translate - @return: list of translations - """ - if not batch: - raise Exception("Enter your text list that you want to translate") - arr = [] - for i, text in enumerate(batch): - - translated = self.translate(text, **kwargs) - arr.append(translated) - return arr - diff --git a/libs/deep_translator/libre.py b/libs/deep_translator/libre.py index b4c25330d..308e9205f 100644 --- a/libs/deep_translator/libre.py +++ b/libs/deep_translator/libre.py @@ -2,112 +2,93 @@ LibreTranslate API """ +from typing import List, Optional + import requests -from .parent import BaseTranslator -from .constants import BASE_URLS,LIBRE_LANGUAGES_TO_CODES, LIBRE_CODES_TO_LANGUAGES -from .exceptions import (ServerException, - TranslationNotFound, - LanguageNotSupportedException, - AuthorizationException, - NotValidPayload) + +from deep_translator.base import BaseTranslator +from deep_translator.constants import BASE_URLS, LIBRE_LANGUAGES_TO_CODES +from deep_translator.exceptions import ( + AuthorizationException, + ServerException, + TranslationNotFound, +) +from deep_translator.validate import is_empty, is_input_valid class LibreTranslator(BaseTranslator): """ class that wraps functions, which use libre translator under the hood to translate text(s) """ - _languages = LIBRE_LANGUAGES_TO_CODES - supported_languages = list(_languages.keys()) - def __init__(self,source="auto", target="en", base_url = BASE_URLS.get("LIBRE_FREE"), api_key=None, **kwargs): + def __init__( + self, + api_key: Optional[str] = None, + source: str = "en", + target: str = "es", + use_free_api: bool = True, + custom_url: Optional[str] = None, + **kwargs + ): """ + @param api_key: your api key @param source: source language to translate from - List of LibreTranslate nedpoints can be found at : https://github.com/LibreTranslate/LibreTranslate#mirrors + List of LibreTranslate endpoint can be found at : https://github.com/LibreTranslate/LibreTranslate#mirrors Some require an API key @param target: target language to translate to + @param use_free_api: set True if you want to use the free api. This means a url that does not require and api key would be used + @param custom_url: you can use a custom endpoint """ - if base_url == BASE_URLS.get("LIBRE") and not api_key: - raise ServerException(401) - self.__base_url = base_url self.api_key = api_key - if source == "auto": - self.source = "auto" - else: - self.source = self._map_language_to_code(source) - self.target = self._map_language_to_code(target) - - - @staticmethod - def get_supported_languages(as_dict=False, **kwargs): - """ - return the supported languages by the libre translator - @param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations - @return: list or dict - """ - return [*LibreTranslator._languages.keys()] if not as_dict else LibreTranslator._languages - - def _map_language_to_code(self, language, **kwargs): - """ - map language to its corresponding code (abbreviation) if the language was passed by its full name by the user - @param language: a string for 1 language - @return: mapped value of the language or raise an exception if the language is not supported - """ - if language in self._languages.keys(): - return self._languages[language] - elif language in self._languages.values(): - return language - raise LanguageNotSupportedException(language) + url = BASE_URLS.get("LIBRE") if not use_free_api else BASE_URLS.get('LIBRE_FREE') + super().__init__( + base_url=url if not custom_url else custom_url, + source=source, + target=target, + languages=LIBRE_LANGUAGES_TO_CODES, + ) - def _is_language_supported(self, language, **kwargs): - """ - check if the language is supported by the translator - @param language: a string for 1 language - @return: bool or raise an Exception - """ - if language == 'auto' or language in self._languages.keys() or language in self._languages.values(): - return True - else: - raise LanguageNotSupportedException(language) - - def translate(self, text, **kwargs): + def translate(self, text: str, **kwargs) -> str: """ function that uses microsoft translate to translate a text @param text: desired text to translate @return: str: translated text """ - # Create the request parameters. - if type(text) != str or text == "": - raise NotValidPayload(text) + if is_input_valid(text): + if self._same_source_target() or is_empty(text): + return text - translate_endpoint = 'translate' - params = { - "q": text, - "source": self.source, - "target": self.target, - "format": 'text' - } - # Add API Key if required - if self.api_key: - params["api_key"] = self.api_key - # Do the request and check the connection. - try: - response = requests.post(self.__base_url + translate_endpoint, params=params) - except ConnectionError: - raise ServerException(503) - # If the answer is not success, raise server exception. + translate_endpoint = "translate" + params = { + "q": text, + "source": self._source, + "target": self._target, + "format": "text", + } + # Add API Key if required + if self.api_key: + params["api_key"] = self.api_key + # Do the request and check the connection. + try: + response = requests.post( + self._base_url + translate_endpoint, params=params + ) + except ConnectionError: + raise ServerException(503) + # If the answer is not success, raise server exception. - if response.status_code == 403: - raise AuthorizationException(self.api_key) - elif response.status_code != 200: - raise ServerException(response.status_code) - # Get the response and check is not empty. - res = response.json() - if not res: - raise TranslationNotFound(text) - # Process and return the response. - return res['translatedText'] + if response.status_code == 403: + raise AuthorizationException(self.api_key) + elif response.status_code != 200: + raise ServerException(response.status_code) + # Get the response and check is not empty. + res = response.json() + if not res: + raise TranslationNotFound(text) + # Process and return the response. + return res["translatedText"] - def translate_file(self, path, **kwargs): + def translate_file(self, path: str, **kwargs) -> str: """ translate directly from file @param path: path to the target file @@ -115,23 +96,18 @@ class LibreTranslator(BaseTranslator): @param kwargs: additional args @return: str """ - try: - with open(path, 'r', encoding='utf-8') as f: - text = f.read().strip() - return self.translate(text) - except Exception as e: - raise e + return self._translate_file(path, **kwargs) - def translate_batch(self, batch=None, **kwargs): + def translate_batch(self, batch: List[str], **kwargs) -> List[str]: """ translate a list of texts @param batch: list of texts you want to translate @return: list of translations """ - if not batch: - raise Exception("Enter your text list that you want to translate") - arr = [] - for i, text in enumerate(batch): - translated = self.translate(text, **kwargs) - arr.append(translated) - return arr + return self._translate_batch(batch, **kwargs) + + +if __name__ == '__main__': + l = LibreTranslator(source="en", target="de") + res = l.translate("good") + print("res: ", res) diff --git a/libs/deep_translator/linguee.py b/libs/deep_translator/linguee.py index 921eee32c..81a84b99e 100644 --- a/libs/deep_translator/linguee.py +++ b/libs/deep_translator/linguee.py @@ -1,82 +1,50 @@ """ linguee translator API """ +from typing import List, Optional, Union -from .constants import BASE_URLS, LINGUEE_LANGUAGES_TO_CODES, LINGUEE_CODE_TO_LANGUAGE -from .exceptions import (LanguageNotSupportedException, - TranslationNotFound, - NotValidPayload, - ElementNotFoundInGetRequest, - RequestError, - TooManyRequests) -from .parent import BaseTranslator -from bs4 import BeautifulSoup import requests +from bs4 import BeautifulSoup from requests.utils import requote_uri +from deep_translator.base import BaseTranslator +from deep_translator.constants import BASE_URLS, LINGUEE_LANGUAGES_TO_CODES +from deep_translator.exceptions import ( + ElementNotFoundInGetRequest, + NotValidPayload, + RequestError, + TooManyRequests, + TranslationNotFound, +) +from deep_translator.validate import is_empty, is_input_valid + class LingueeTranslator(BaseTranslator): """ class that wraps functions, which use the linguee translator under the hood to translate word(s) """ - _languages = LINGUEE_LANGUAGES_TO_CODES - supported_languages = list(_languages.keys()) - def __init__(self, source, target="en", proxies=None, **kwargs): + def __init__( + self, source: str, target: str = "en", proxies: Optional[dict] = None, **kwargs + ): """ @param source: source language to translate from @param target: target language to translate to """ - self.__base_url = BASE_URLS.get("LINGUEE") self.proxies = proxies - - if self.is_language_supported(source, target): - self._source, self._target = self._map_language_to_code(source.lower(), target.lower()) - - super().__init__(base_url=self.__base_url, - source=self._source, - target=self._target, - element_tag='a', - element_query={'class': 'dictLink featured'}, - payload_key=None, # key of text in the url - ) - - @staticmethod - def get_supported_languages(as_dict=False, **kwargs): - """ - return the supported languages by the linguee translator - @param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations - @return: list or dict - """ - return LingueeTranslator.supported_languages if not as_dict else LingueeTranslator._languages - - def _map_language_to_code(self, *languages, **kwargs): - """ - map language to its corresponding code (abbreviation) if the language was passed by its full name by the user - @param languages: list of languages - @return: mapped value of the language or raise an exception if the language is not supported - """ - for language in languages: - if language in self._languages.values(): - yield LINGUEE_CODE_TO_LANGUAGE[language] - elif language in self._languages.keys(): - yield language - else: - raise LanguageNotSupportedException(language) - - def is_language_supported(self, *languages, **kwargs): - """ - check if the language is supported by the translator - @param languages: list of languages - @return: bool or raise an Exception - """ - for lang in languages: - if lang not in self._languages.keys(): - if lang not in self._languages.values(): - raise LanguageNotSupportedException(lang) - return True - - def translate(self, word, return_all=False, **kwargs): + super().__init__( + base_url=BASE_URLS.get("LINGUEE"), + source=source, + target=target, + languages=LINGUEE_LANGUAGES_TO_CODES, + element_tag="a", + element_query={"class": "dictLink featured"}, + payload_key=None, # key of text in the url + ) + + def translate( + self, word: str, return_all: bool = False, **kwargs + ) -> Union[str, List[str]]: """ function that uses linguee to translate a word @param word: word to translate @@ -85,9 +53,14 @@ class LingueeTranslator(BaseTranslator): @type return_all: bool @return: str: translated word """ - if self._validate_payload(word, max_chars=50): + if self._same_source_target() or is_empty(word): + return word + + if is_input_valid(word, max_chars=50): # %s-%s/translation/%s.html - url = "{}{}-{}/translation/{}.html".format(self.__base_url, self._source, self._target, word) + url = ( + f"{self._base_url}{self._source}-{self._target}/translation/{word}.html" + ) url = requote_uri(url) response = requests.get(url, proxies=self.proxies) @@ -96,7 +69,7 @@ class LingueeTranslator(BaseTranslator): if response.status_code != 200: raise RequestError() - soup = BeautifulSoup(response.text, 'html.parser') + soup = BeautifulSoup(response.text, "html.parser") elements = soup.find_all(self._element_tag, self._element_query) if not elements: raise ElementNotFoundInGetRequest(elements) @@ -104,17 +77,19 @@ class LingueeTranslator(BaseTranslator): filtered_elements = [] for el in elements: try: - pronoun = el.find('span', {'class': 'placeholder'}).get_text(strip=True) + pronoun = el.find("span", {"class": "placeholder"}).get_text( + strip=True + ) except AttributeError: - pronoun = '' - filtered_elements.append(el.get_text(strip=True).replace(pronoun, '')) + pronoun = "" + filtered_elements.append(el.get_text(strip=True).replace(pronoun, "")) if not filtered_elements: raise TranslationNotFound(word) return filtered_elements if return_all else filtered_elements[0] - def translate_words(self, words, **kwargs): + def translate_words(self, words: List[str], **kwargs) -> List[str]: """ translate a batch of words together by providing them in a list @param words: list of words you want to translate @@ -128,4 +103,3 @@ class LingueeTranslator(BaseTranslator): for word in words: translated_words.append(self.translate(word=word, **kwargs)) return translated_words - diff --git a/libs/deep_translator/main.py b/libs/deep_translator/main.py deleted file mode 100644 index 17f2ab805..000000000 --- a/libs/deep_translator/main.py +++ /dev/null @@ -1,132 +0,0 @@ -"""Console script for deep_translator.""" - -import click -from .google_trans import GoogleTranslator -from .mymemory import MyMemoryTranslator -from .deepl import DeepL -from .qcri import QCRI -from .linguee import LingueeTranslator -from .pons import PonsTranslator -from .yandex import YandexTranslator -from .microsoft import MicrosoftTranslator -from .papago import PapagoTranslator -from .libre import LibreTranslator - -CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) -def cli(): - pass - [email protected](context_settings=CONTEXT_SETTINGS, no_args_is_help=True) [email protected]('translator', required=True, default='google', type=str) [email protected]("--source", "-src", required=True, type=str, help="source language to translate from") [email protected]("--target", "-tgt", required=True, type=str, help="target language to translate to") [email protected]("--text", "-txt", type=str,required = True,prompt="Enter the text you want to translate",help="text you want to translate") [email protected]("--api-key",type=str,help="required for DeepL, QCRI, Yandex, Microsoft and Papago translators") -def translate(translator, source, target, text, api_key): - """ - Use TRANSLATOR to translate source material into another language. - \f - Directory function to send arguments to the correct translator. - @param translator: translator name parsed from terminal arguments - @return: None - """ - api_key_required = ["deepl", "qcri", "yandex", "microsoft", "papago"] - if translator in api_key_required and not api_key: - click.echo( - "This translator requires an api key provided through --api-key") - else: - pass - - if translator == "google": - translator = GoogleTranslator(source=source, target=target) - elif translator == "mymemory": - translator = MyMemoryTranslator(source=source, target=target) - elif translator == "deepl": - translator = DeepL(source=source, target=target, api_key=api_key) - elif translator == "qcri": - translator = QCRI(source=source, target=target, api_key=api_key) - elif translator == "linguee": - translator = LingueeTranslator(source=source, target=target) - elif translator == "pons": - translator = PonsTranslator(source=source, target=target) - elif translator == "yandex": - translator = YandexTranslator( - source=source, - target=target, - api_key=api_key) - elif translator == "microsoft": - translator = MicrosoftTranslator( - source=source, - target=target, - api_key=api_key) - elif translator == "papago": - translator = PapagoTranslator( - source=source, - target=target, - api_key=api_key) - elif translator == "libre": - translator= LibreTranslator( - source=source, - target=target - ) - else: - raise AttributeError("The given translator is not supported.") - - res = translator.translate(text) - click.echo(f" | Translation from {source} to {target} |") - click.echo(f"Translated text: \n {res}") - return 0 - [email protected](context_settings=CONTEXT_SETTINGS, no_args_is_help=True) [email protected]('translator') [email protected]('api_key', required=False) -def languages(translator, api_key): - """ - Retrieve the list of available languages from the given translator. - @param translator: Translator given by the user. - @param api_key: Optional API key given by the user. Required for some translators. - @return: None - """ - translator = translator.lower() - api_key_required = ["deepl", "qcri", "yandex", "microsoft", "papago"] - if translator in api_key_required and not api_key: - click.echo("This translator requires an api key provided through --api-key") - else: - pass - - if translator == "google": - translator = GoogleTranslator - elif translator == "mymemory": - translator = MyMemoryTranslator - elif translator == "qcri": - translator = QCRI(api_key=api_key) - elif translator == "linguee": - translator = LingueeTranslator - elif translator == "pons": - translator = PonsTranslator - elif translator == "yandex": - translator = YandexTranslator(api_key=api_key) - elif translator == "microsoft": - translator = MicrosoftTranslator(api_key=api_key) - elif translator == "papago": - translator = PapagoTranslator(api_key=api_key) - elif translator == "libre": - translator = LibreTranslator - else: - raise AttributeError("The given translator is not supported.") - - supported_languages = translator.get_supported_languages(as_dict=True) - click.echo(f"Languages supported by '{translator}' are :") - for k, v in supported_languages.items(): - click.echo(f"|- {k}: {v}") - return 0 - -def list(): - """Lists available translators.""" - click.echo("Available translators include: Google, MyMemory, QCRI, Linguee, Pons, Yandex, Microsoft (Bing), Papago and LibreTranslate.") - return 0 - -if __name__ == "__main__": - cli() diff --git a/libs/deep_translator/microsoft.py b/libs/deep_translator/microsoft.py index 5a0aca795..8284e760d 100644 --- a/libs/deep_translator/microsoft.py +++ b/libs/deep_translator/microsoft.py @@ -1,32 +1,41 @@ # -*- coding: utf-8 -*- -import requests import logging import sys +from typing import List, Optional + +import requests -from .constants import BASE_URLS, MICROSOFT_CODES_TO_LANGUAGES -from .exceptions import LanguageNotSupportedException, ServerException, MicrosoftAPIerror +from deep_translator.base import BaseTranslator +from deep_translator.constants import BASE_URLS +from deep_translator.exceptions import MicrosoftAPIerror, ServerException +from deep_translator.validate import is_input_valid -class MicrosoftTranslator: +class MicrosoftTranslator(BaseTranslator): """ the class that wraps functions, which use the Microsoft translator under the hood to translate word(s) """ - _languages = MICROSOFT_CODES_TO_LANGUAGES - supported_languages = list(_languages.values()) - - def __init__(self, api_key=None, region=None, source=None, target=None, proxies=None, **kwargs): + def __init__( + self, + api_key: Optional[str] = None, + region: Optional[str] = None, + source: str = "auto", + target: str = "en", + proxies: Optional[dict] = None, + **kwargs, + ): """ @params api_key and target are the required params @param api_key: your Microsoft API key @param region: your Microsoft Location """ + if not api_key: raise ServerException(401) - else: - self.api_key = api_key + self.api_key = api_key self.proxies = proxies self.headers = { "Ocp-Apim-Subscription-Key": self.api_key, @@ -36,67 +45,27 @@ class MicrosoftTranslator: if region: self.region = region self.headers["Ocp-Apim-Subscription-Region"] = self.region - - if not target: - raise ServerException(401) - else: - if type(target) is str: - self.target = target.lower() - else: - self.target = [i.lower() for i in target] - if self.is_language_supported(self.target): - self.target = self._map_language_to_code(self.target) - - self.url_params = {'to': self.target, **kwargs} - - if source: - self.source = source.lower() - if self.is_language_supported(self.source): - self.source = self._map_language_to_code(self.source) - self.url_params['from'] = self.source - - self.__base_url = BASE_URLS.get("MICROSOFT_TRANSLATE") - - @staticmethod - def get_supported_languages(as_dict=False, **kwargs): - """ - return the languages supported by the microsoft translator - @param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations - @return: list or dict - """ - return MicrosoftTranslator.supported_languages if not as_dict else MicrosoftTranslator._languages - - def _map_language_to_code(self, language, **kwargs): - """ - map the language to its corresponding code (abbreviation) if the language was passed by its full name by the user - @param language: a string (if 1 lang) or a list (if multiple langs) - @return: mapped value of the language or raise an exception if the language is not supported - """ - if type(language) is str: - language = [language] - for lang in language: - if lang in self._languages.values(): - yield lang - elif lang in self._languages.keys(): - yield self._languages[lang] - else: - raise LanguageNotSupportedException(lang) - - def is_language_supported(self, language, **kwargs): - """ - check if the language is supported by the translator - @param language: a string (if 1 lang) or a list (if multiple langs) - @return: bool or raise an Exception - """ - if type(language) is str: - language = [language] - for lang in language: - if lang not in self._languages.keys(): - if lang not in self._languages.values(): - raise LanguageNotSupportedException(lang) - return True - - def translate(self, text, **kwargs): + super().__init__( + base_url=BASE_URLS.get("MICROSOFT_TRANSLATE"), + source=source, + target=target, + languages=self._get_supported_languages(), + **kwargs, + ) + + # this function get the actual supported languages of the msft translator and store them in a dict, where + # the keys are the abbreviations and the values are the languages + # a common variable used in the other translators would be: MICROSOFT_CODES_TO_LANGUAGES + def _get_supported_languages(self): + + microsoft_languages_api_url = "https://api.cognitive.microsofttranslator.com/languages?api-version=3.0&scope" \ + "=translation " + microsoft_languages_response = requests.get(microsoft_languages_api_url) + translation_dict = microsoft_languages_response.json()["translation"] + + return {translation_dict[k]["name"].lower(): k.lower() for k in translation_dict.keys()} + + def translate(self, text: str, **kwargs) -> str: """ function that uses microsoft translate to translate a text @param text: desired text to translate @@ -104,43 +73,48 @@ class MicrosoftTranslator: """ # a body must be a list of dicts to process multiple texts; # I have not added multiple text processing here since it is covered by the translate_batch method - valid_microsoft_json = [{'text': text}] - try: - requested = requests.post(self.__base_url, - params=self.url_params, - headers=self.headers, - json=valid_microsoft_json, - proxies=self.proxies) - except requests.exceptions.RequestException: - exc_type, value, traceback = sys.exc_info() - logging.warning(f"Returned error: {exc_type.__name__}") - - # Where Microsoft API responds with an api error, it returns a dict in response.json() - if type(requested.json()) is dict: - error_message = requested.json()['error'] - raise MicrosoftAPIerror(error_message) - # Where it responds with a translation, its response.json() is a list e.g. [{'translations': [{'text': 'Hello world!', 'to': 'en'}]}] - elif type(requested.json()) is list: - all_translations = [i['text'] for i in requested.json()[0]['translations']] - return "\n".join(all_translations) - - def translate_file(self, path, **kwargs): + response = None + if is_input_valid(text): + self._url_params["from"] = self._source + self._url_params["to"] = self._target + + valid_microsoft_json = [{"text": text}] + try: + response = requests.post( + self._base_url, + params=self._url_params, + headers=self.headers, + json=valid_microsoft_json, + proxies=self.proxies, + ) + except requests.exceptions.RequestException: + exc_type, value, traceback = sys.exc_info() + logging.warning(f"Returned error: {exc_type.__name__}") + + # Where Microsoft API responds with an api error, it returns a dict in response.json() + if type(response.json()) is dict: + error_message = response.json()["error"] + raise MicrosoftAPIerror(error_message) + # Where it responds with a translation, its response.json() is a list + # e.g. [{'translations': [{'text':'Hello world!', 'to': 'en'}]}] + elif type(response.json()) is list: + all_translations = [ + i["text"] for i in response.json()[0]["translations"] + ] + return "\n".join(all_translations) + + def translate_file(self, path: str, **kwargs) -> str: """ translate from a file @param path: path to file @return: translated text """ - try: - with open(path, 'r', encoding='utf-8') as f: - text = f.read().strip() - return self.translate(text) - except Exception as e: - raise e - - def translate_batch(self, batch, **kwargs): + return self._translate_file(path, **kwargs) + + def translate_batch(self, batch: List[str], **kwargs) -> List[str]: """ translate a batch of texts @param batch: list of texts to translate @return: list of translations """ - return [self.translate(text, **kwargs) for text in batch] + return self._translate_batch(batch, **kwargs) diff --git a/libs/deep_translator/mymemory.py b/libs/deep_translator/mymemory.py index 8575c48ed..ba16b9b81 100644 --- a/libs/deep_translator/mymemory.py +++ b/libs/deep_translator/mymemory.py @@ -1,81 +1,48 @@ """ mymemory translator API """ -import logging -import warnings +from typing import List, Optional, Union -from .constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES -from .exceptions import (NotValidPayload, - TranslationNotFound, - LanguageNotSupportedException, - RequestError, - TooManyRequests) -from .parent import BaseTranslator import requests -from time import sleep + +from deep_translator.base import BaseTranslator +from deep_translator.constants import BASE_URLS +from deep_translator.exceptions import ( + RequestError, + TooManyRequests, + TranslationNotFound, +) +from deep_translator.validate import is_empty, is_input_valid class MyMemoryTranslator(BaseTranslator): """ class that uses the mymemory translator to translate texts """ - _languages = GOOGLE_LANGUAGES_TO_CODES - supported_languages = list(_languages.keys()) - def __init__(self, source="auto", target="en", proxies=None, **kwargs): + def __init__( + self, + source: str = "auto", + target: str = "en", + proxies: Optional[dict] = None, + **kwargs, + ): """ @param source: source language to translate from @param target: target language to translate to """ - self.__base_url = BASE_URLS.get("MYMEMORY") self.proxies = proxies - if self.is_language_supported(source, target): - self._source, self._target = self._map_language_to_code(source.lower(), target.lower()) - self._source = self._source if self._source != 'auto' else 'Lao' - - self.email = kwargs.get('email', None) - super(MyMemoryTranslator, self).__init__(base_url=self.__base_url, - source=self._source, - target=self._target, - payload_key='q', - langpair='{}|{}'.format(self._source, self._target)) - - @staticmethod - def get_supported_languages(as_dict=False, **kwargs): - """ - return the supported languages by the mymemory translator - @param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations - @return: list or dict - """ - return MyMemoryTranslator.supported_languages if not as_dict else MyMemoryTranslator._languages - - def _map_language_to_code(self, *languages): - """ - map language to its corresponding code (abbreviation) if the language was passed by its full name by the user - @param languages: list of languages - @return: mapped value of the language or raise an exception if the language is not supported - """ - for language in languages: - if language in self._languages.values() or language == 'auto': - yield language - elif language in self._languages.keys(): - yield self._languages[language] - else: - raise LanguageNotSupportedException(language) - - def is_language_supported(self, *languages): - """ - check if the language is supported by the translator - @param languages: list of languages - @return: bool or raise an Exception - """ - for lang in languages: - if lang != 'auto' and lang not in self._languages.keys(): - if lang != 'auto' and lang not in self._languages.values(): - raise LanguageNotSupportedException(lang) - return True - - def translate(self, text, return_all=False, **kwargs): + self.email = kwargs.get("email", None) + super().__init__( + base_url=BASE_URLS.get("MYMEMORY"), + source=source, + target=target, + payload_key="q", + ) + + def translate( + self, text: str, return_all: bool = False, **kwargs + ) -> Union[str, List[str]]: """ function that uses the mymemory translator to translate a text @param text: desired text to translate @@ -83,19 +50,20 @@ class MyMemoryTranslator(BaseTranslator): @param return_all: set to True to return all synonym/similars of the translated text @return: str or list """ - - if self._validate_payload(text, max_chars=500): + if is_input_valid(text, max_chars=500): text = text.strip() + if self._same_source_target() or is_empty(text): + return text + self._url_params["langpair"] = f"{self._source}|{self._target}" if self.payload_key: self._url_params[self.payload_key] = text if self.email: - self._url_params['de'] = self.email + self._url_params["de"] = self.email - response = requests.get(self.__base_url, - params=self._url_params, - headers=self.headers, - proxies=self.proxies) + response = requests.get( + self._base_url, params=self._url_params, proxies=self.proxies + ) if response.status_code == 429: raise TooManyRequests() @@ -106,71 +74,30 @@ class MyMemoryTranslator(BaseTranslator): if not data: TranslationNotFound(text) - translation = data.get('responseData').get('translatedText') + translation = data.get("responseData").get("translatedText") if translation: return translation elif not translation: - all_matches = data.get('matches') - matches = (match['translation'] for match in all_matches) + all_matches = data.get("matches") + matches = (match["translation"] for match in all_matches) next_match = next(matches) return next_match if not return_all else list(all_matches) - def translate_sentences(self, sentences=None, **kwargs): - """ - translate many sentences together. This makes sense if you have sentences with different languages - and you want to translate all to unified language. This is handy because it detects - automatically the language of each sentence and then translate it. - - @param sentences: list of sentences to translate - @return: list of all translated sentences + def translate_file(self, path: str, **kwargs) -> str: """ - warn_msg = "deprecated. Use the translate_batch function instead" - warnings.warn(warn_msg, DeprecationWarning, stacklevel=2) - logging.warning(warn_msg) - if not sentences: - raise NotValidPayload(sentences) - - translated_sentences = [] - try: - for sentence in sentences: - translated = self.translate(text=sentence, **kwargs) - translated_sentences.append(translated) - - return translated_sentences - - except Exception as e: - raise e - - def translate_file(self, path, **kwargs): + translate directly from file + @param path: path to the target file + @type path: str + @param kwargs: additional args + @return: str """ - translate directly from file - @param path: path to the target file - @type path: str - @param kwargs: additional args - @return: str - """ - try: - with open(path, 'r', encoding='utf-8') as f: - text = f.read().strip() - - return self.translate(text=text) - except Exception as e: - raise e + return self._translate_file(path, **kwargs) - def translate_batch(self, batch=None, **kwargs): + def translate_batch(self, batch: List[str], **kwargs) -> List[str]: """ translate a list of texts @param batch: list of texts you want to translate @return: list of translations """ - if not batch: - raise Exception("Enter your text list that you want to translate") - - arr = [] - for text in batch: - translated = self.translate(text, **kwargs) - arr.append(translated) - sleep(2) - - return arr + return self._translate_batch(batch, **kwargs) diff --git a/libs/deep_translator/papago.py b/libs/deep_translator/papago.py index 7cb5a8a29..814a28e83 100644 --- a/libs/deep_translator/papago.py +++ b/libs/deep_translator/papago.py @@ -2,101 +2,78 @@ google translator API """ import json -from .constants import BASE_URLS, PAPAGO_LANGUAGE_TO_CODE -from .exceptions import LanguageNotSupportedException, TranslationNotFound, NotValidPayload +from typing import List, Optional + import requests -import warnings -import logging + +from deep_translator.base import BaseTranslator +from deep_translator.constants import BASE_URLS, PAPAGO_LANGUAGE_TO_CODE +from deep_translator.exceptions import TranslationNotFound +from deep_translator.validate import is_input_valid -class PapagoTranslator(object): +class PapagoTranslator(BaseTranslator): """ class that wraps functions, which use google translate under the hood to translate text(s) """ - _languages = PAPAGO_LANGUAGE_TO_CODE - supported_languages = list(_languages.keys()) - def __init__(self, client_id=None, secret_key=None, source="auto", target="en", **kwargs): + def __init__( + self, + client_id: Optional[str] = None, + secret_key: Optional[str] = None, + source: str = "auto", + target: str = "en", + **kwargs, + ): """ @param source: source language to translate from @param target: target language to translate to """ if not client_id or not secret_key: - raise Exception("Please pass your client id and secret key! visit the papago website for more infos") + raise Exception( + "Please pass your client id and secret key! visit the papago website for more infos" + ) - self.__base_url = BASE_URLS.get("PAPAGO_API") self.client_id = client_id self.secret_key = secret_key - if self.is_language_supported(source, target): - self._source, self._target = self._map_language_to_code(source.lower(), target.lower()) - - @staticmethod - def get_supported_languages(as_dict=False, **kwargs): - """ - return the supported languages by the google translator - @param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations - @return: list or dict - """ - return PapagoTranslator.supported_languages if not as_dict else PapagoTranslator._languages - - def _map_language_to_code(self, *languages): - """ - map language to its corresponding code (abbreviation) if the language was passed by its full name by the user - @param languages: list of languages - @return: mapped value of the language or raise an exception if the language is not supported - """ - for language in languages: - if language in self._languages.values() or language == 'auto': - yield language - elif language in self._languages.keys(): - yield self._languages[language] - else: - raise LanguageNotSupportedException(language) - - def is_language_supported(self, *languages): - """ - check if the language is supported by the translator - @param languages: list of languages - @return: bool or raise an Exception - """ - for lang in languages: - if lang != 'auto' and lang not in self._languages.keys(): - if lang != 'auto' and lang not in self._languages.values(): - raise LanguageNotSupportedException(lang) - return True + super().__init__( + base_url=BASE_URLS.get("PAPAGO_API"), + source=source, + target=target, + languages=PAPAGO_LANGUAGE_TO_CODE, + **kwargs, + ) - def translate(self, text, **kwargs): + def translate(self, text: str, **kwargs) -> str: """ function that uses google translate to translate a text @param text: desired text to translate @return: str: translated text """ - - payload = { - "source": self._source, - "target": self._target, - "text": text - } - headers = { - 'X-Naver-Client-Id': self.client_id, - 'X-Naver-Client-Secret': self.secret_key, - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' - } - response = requests.post(self.__base_url, headers=headers, data=payload) - if response.status_code != 200: - raise Exception(f'Translation error! -> status code: {response.status_code}') - res_body = json.loads(response.text) - if "message" not in res_body: - raise TranslationNotFound(text) - - msg = res_body.get("message") - result = msg.get("result", None) - if not result: - raise TranslationNotFound(text) - translated_text = result.get("translatedText") - return translated_text - - def translate_file(self, path, **kwargs): + if is_input_valid(text): + payload = {"source": self._source, "target": self._target, "text": text} + headers = { + "X-Naver-Client-Id": self.client_id, + "X-Naver-Client-Secret": self.secret_key, + "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", + } + response = requests.post(self._base_url, headers=headers, data=payload) + if response.status_code != 200: + raise Exception( + f"Translation error! -> status code: {response.status_code}" + ) + res_body = json.loads(response.text) + if "message" not in res_body: + raise TranslationNotFound(text) + + msg = res_body.get("message") + result = msg.get("result", None) + if not result: + raise TranslationNotFound(text) + translated_text = result.get("translatedText") + return translated_text + + def translate_file(self, path: str, **kwargs) -> str: """ translate directly from file @param path: path to the target file @@ -104,51 +81,12 @@ class PapagoTranslator(object): @param kwargs: additional args @return: str """ - try: - with open(path, 'r', encoding='utf-8') as f: - text = f.read().strip() - return self.translate(text) - except Exception as e: - raise e - - def translate_sentences(self, sentences=None, **kwargs): - """ - translate many sentences together. This makes sense if you have sentences with different languages - and you want to translate all to unified language. This is handy because it detects - automatically the language of each sentence and then translate it. - - @param sentences: list of sentences to translate - @return: list of all translated sentences - """ - warnings.warn("deprecated. Use the translate_batch function instead", DeprecationWarning, stacklevel=2) - logging.warning("deprecated. Use the translate_batch function instead") - if not sentences: - raise NotValidPayload(sentences) + return self._translate_file(path, **kwargs) - translated_sentences = [] - try: - for sentence in sentences: - translated = self.translate(text=sentence) - translated_sentences.append(translated) - - return translated_sentences - - except Exception as e: - raise e - - def translate_batch(self, batch=None, **kwargs): + def translate_batch(self, batch: List[str], **kwargs) -> List[str]: """ translate a list of texts @param batch: list of texts you want to translate @return: list of translations """ - if not batch: - raise Exception("Enter your text list that you want to translate") - arr = [] - for i, text in enumerate(batch): - - translated = self.translate(text, **kwargs) - arr.append(translated) - return arr - - + return self._translate_batch(batch, **kwargs) diff --git a/libs/deep_translator/parent.py b/libs/deep_translator/parent.py deleted file mode 100644 index 440492e8d..000000000 --- a/libs/deep_translator/parent.py +++ /dev/null @@ -1,80 +0,0 @@ -"""parent translator class""" - -from .exceptions import NotValidPayload, NotValidLength, InvalidSourceOrTargetLanguage -from abc import ABC, abstractmethod -import string - - -class BaseTranslator(ABC): - """ - Abstract class that serve as a parent translator for other different translators - """ - def __init__(self, - base_url=None, - source="auto", - target="en", - payload_key=None, - element_tag=None, - element_query=None, - **url_params): - """ - @param source: source language to translate from - @param target: target language to translate to - """ - if source == target: - raise InvalidSourceOrTargetLanguage(source) - - self.__base_url = base_url - self._source = source - self._target = target - self._url_params = url_params - self._element_tag = element_tag - self._element_query = element_query - self.payload_key = payload_key - self.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) ' - 'AppleWebit/535.19' - '(KHTML, like Gecko) Chrome/18.0.1025.168 Safari/535.19'} - super(BaseTranslator, self).__init__() - - @staticmethod - def _validate_payload(payload, min_chars=1, max_chars=5000): - """ - validate the target text to translate - @param payload: text to translate - @return: bool - """ - - if not payload or not isinstance(payload, str) or not payload.strip() or payload.isdigit(): - raise NotValidPayload(payload) - - # check if payload contains only symbols - if all(i in string.punctuation for i in payload): - raise NotValidPayload(payload) - - if not BaseTranslator.__check_length(payload, min_chars, max_chars): - raise NotValidLength(payload, min_chars, max_chars) - return True - - @staticmethod - def __check_length(payload, min_chars, max_chars): - """ - check length of the provided target text to translate - @param payload: text to translate - @param min_chars: minimum characters allowed - @param max_chars: maximum characters allowed - @return: bool - """ - return True if min_chars <= len(payload) < max_chars else False - - @abstractmethod - def translate(self, text, **kwargs): - """ - translate a text using a translator under the hood and return the translated text - @param text: text to translate - @param kwargs: additional arguments - @return: str - """ - return NotImplemented('You need to implement the translate method!') - - - diff --git a/libs/deep_translator/pons.py b/libs/deep_translator/pons.py index 50d17f324..c7b711af7 100644 --- a/libs/deep_translator/pons.py +++ b/libs/deep_translator/pons.py @@ -1,80 +1,51 @@ """ pons translator API """ -from bs4 import BeautifulSoup +from typing import List, Optional, Union + import requests -from .constants import BASE_URLS, PONS_LANGUAGES_TO_CODES, PONS_CODES_TO_LANGUAGES -from .exceptions import (LanguageNotSupportedException, - TranslationNotFound, - NotValidPayload, - ElementNotFoundInGetRequest, - RequestError, - TooManyRequests) -from .parent import BaseTranslator +from bs4 import BeautifulSoup from requests.utils import requote_uri +from deep_translator.base import BaseTranslator +from deep_translator.constants import BASE_URLS, PONS_CODES_TO_LANGUAGES +from deep_translator.exceptions import ( + ElementNotFoundInGetRequest, + NotValidPayload, + RequestError, + TooManyRequests, + TranslationNotFound, +) +from deep_translator.validate import is_empty, is_input_valid + class PonsTranslator(BaseTranslator): """ class that uses PONS translator to translate words """ - _languages = PONS_LANGUAGES_TO_CODES - supported_languages = list(_languages.keys()) - def __init__(self, source, target="en", proxies=None, **kwargs): + def __init__( + self, source: str, target: str = "en", proxies: Optional[dict] = None, **kwargs + ): """ @param source: source language to translate from @param target: target language to translate to """ - self.__base_url = BASE_URLS.get("PONS") self.proxies = proxies - if self.is_language_supported(source, target): - self._source, self._target = self._map_language_to_code(source, target) - - super().__init__(base_url=self.__base_url, - source=self._source, - target=self._target, - payload_key=None, - element_tag='div', - element_query={"class": "target"} - ) - - @staticmethod - def get_supported_languages(as_dict=False, **kwargs): - """ - return the supported languages by the linguee translator - @param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations - @return: list or dict - """ - return PonsTranslator.supported_languages if not as_dict else PonsTranslator._languages - - def _map_language_to_code(self, *languages, **kwargs): - """ - map language to its corresponding code (abbreviation) if the language was passed by its full name by the user - @param languages: list of languages - @return: mapped value of the language or raise an exception if the language is not supported - """ - for language in languages: - if language in self._languages.values(): - yield PONS_CODES_TO_LANGUAGES[language] - elif language in self._languages.keys(): - yield language - else: - raise LanguageNotSupportedException(language) - - def is_language_supported(self, *languages, **kwargs): - """ - check if the language is supported by the translator - @param languages: list of languages - @return: bool or raise an Exception - """ - for lang in languages: - if lang not in self._languages.keys(): - if lang not in self._languages.values(): - raise LanguageNotSupportedException(lang) - return True - - def translate(self, word, return_all=False, **kwargs): + super().__init__( + base_url=BASE_URLS.get("PONS"), + languages=PONS_CODES_TO_LANGUAGES, + source=source, + target=target, + payload_key=None, + element_tag="div", + element_query={"class": "target"}, + **kwargs, + ) + + def translate( + self, word: str, return_all: bool = False, **kwargs + ) -> Union[str, List[str]]: """ function that uses PONS to translate a word @param word: word to translate @@ -83,8 +54,10 @@ class PonsTranslator(BaseTranslator): @type return_all: bool @return: str: translated word """ - if self._validate_payload(word, max_chars=50): - url = "{}{}-{}/{}".format(self.__base_url, self._source, self._target, word) + if is_input_valid(word, max_chars=50): + if self._same_source_target() or is_empty(word): + return word + url = f"{self._base_url}{self._source}-{self._target}/{word}" url = requote_uri(url) response = requests.get(url, proxies=self.proxies) @@ -94,7 +67,7 @@ class PonsTranslator(BaseTranslator): if response.status_code != 200: raise RequestError() - soup = BeautifulSoup(response.text, 'html.parser') + soup = BeautifulSoup(response.text, "html.parser") elements = soup.findAll(self._element_tag, self._element_query) if not elements: @@ -102,11 +75,9 @@ class PonsTranslator(BaseTranslator): filtered_elements = [] for el in elements: - temp = '' - for e in el.findAll('a'): - if e.parent.name == 'div': - if e and "/translate/{}-{}/".format(self._target, self._source) in e.get('href'): - temp += e.get_text() + ' ' + temp = "" + for e in el.findAll("a"): + temp += e.get_text() + " " filtered_elements.append(temp) if not filtered_elements: @@ -119,7 +90,7 @@ class PonsTranslator(BaseTranslator): return word_list if return_all else word_list[0] - def translate_words(self, words, **kwargs): + def translate_words(self, words: List[str], **kwargs) -> List[str]: """ translate a batch of words together by providing them in a list @param words: list of words you want to translate @@ -133,4 +104,3 @@ class PonsTranslator(BaseTranslator): for word in words: translated_words.append(self.translate(word=word, **kwargs)) return translated_words - diff --git a/libs/deep_translator/qcri.py b/libs/deep_translator/qcri.py index 6f2bdf91b..a8827143e 100644 --- a/libs/deep_translator/qcri.py +++ b/libs/deep_translator/qcri.py @@ -1,24 +1,30 @@ +from typing import List, Optional import requests -from .constants import BASE_URLS, QCRI_LANGUAGE_TO_CODE -from .exceptions import (ServerException, TranslationNotFound) +from deep_translator.base import BaseTranslator +from deep_translator.constants import BASE_URLS, QCRI_LANGUAGE_TO_CODE +from deep_translator.exceptions import ServerException, TranslationNotFound -class QCRI(object): + +class QcriTranslator(BaseTranslator): """ class that wraps functions, which use the QRCI translator under the hood to translate word(s) """ - def __init__(self, api_key=None, source="en", target="en", **kwargs): + def __init__( + self, + api_key: Optional[str] = None, + source: str = "en", + target: str = "en", + **kwargs, + ): """ @param api_key: your qrci api key. Get one for free here https://mt.qcri.org/api/v1/ref """ if not api_key: raise ServerException(401) - self.__base_url = BASE_URLS.get("QCRI") - self.source = source - self.target = target self.api_key = api_key self.api_endpoints = { "get_languages": "getLanguagePairs", @@ -26,27 +32,29 @@ class QCRI(object): "translate": "translate", } - self.params = { - "key": self.api_key - } + self.params = {"key": self.api_key} + super().__init__( + base_url=BASE_URLS.get("QCRI"), + source=source, + target=target, + languages=QCRI_LANGUAGE_TO_CODE, + **kwargs, + ) - def _get(self, endpoint, params=None, return_text=True): + def _get( + self, endpoint: str, params: Optional[dict] = None, return_text: bool = True + ): if not params: params = self.params try: - res = requests.get(self.__base_url.format(endpoint=self.api_endpoints[endpoint]), params=params) + res = requests.get( + self._base_url.format(endpoint=self.api_endpoints[endpoint]), + params=params, + ) return res.text if return_text else res except Exception as e: raise e - @staticmethod - def get_supported_languages(as_dict=False, **kwargs): - # Have no use for this as the format is not what we need - # Save this for whenever - # pairs = self._get("get_languages") - # Using a this one instead - return [*QCRI_LANGUAGE_TO_CODE.keys()] if not as_dict else QCRI_LANGUAGE_TO_CODE - @property def languages(self): return self.get_supported_languages() @@ -59,12 +67,12 @@ class QCRI(object): def domains(self): return self.get_domains() - def translate(self, text, domain, **kwargs): + def translate(self, text: str, **kwargs) -> str: params = { "key": self.api_key, - "langpair": "{}-{}".format(self.source, self.target), - "domain": domain, - "text": text + "langpair": f"{self._source}-{self._target}", + "domain": kwargs["domain"], + "text": text, } try: response = self._get("translate", params=params, return_text=False) @@ -81,12 +89,14 @@ class QCRI(object): raise TranslationNotFound(text) return translation - def translate_batch(self, batch, domain, **kwargs): + def translate_file(self, path: str, **kwargs) -> str: + return self._translate_file(path, **kwargs) + + def translate_batch(self, batch: List[str], **kwargs) -> List[str]: """ translate a batch of texts @domain: domain @param batch: list of texts to translate @return: list of translations """ - return [self.translate(domain, text, **kwargs) for text in batch] - + return self._translate_batch(batch, **kwargs) diff --git a/libs/deep_translator/validate.py b/libs/deep_translator/validate.py new file mode 100644 index 000000000..f15207638 --- /dev/null +++ b/libs/deep_translator/validate.py @@ -0,0 +1,22 @@ +from deep_translator.exceptions import NotValidLength, NotValidPayload + + +def is_empty(text: str) -> bool: + return text == "" + + +def is_input_valid(text: str, min_chars: int = 0, max_chars: int = 5000) -> bool: + """ + validate the target text to translate + @param min_chars: min characters + @param max_chars: max characters + @param text: text to translate + @return: bool + """ + + if not isinstance(text, str) or text.isdigit(): + raise NotValidPayload(text) + if not min_chars <= len(text) < max_chars: + raise NotValidLength(text, min_chars, max_chars) + + return True diff --git a/libs/deep_translator/yandex.py b/libs/deep_translator/yandex.py index c6bd6ad8c..016a246d1 100644 --- a/libs/deep_translator/yandex.py +++ b/libs/deep_translator/yandex.py @@ -1,26 +1,38 @@ """ Yandex translator API """ +from typing import List, Optional + import requests -from .constants import BASE_URLS -from .exceptions import (RequestError, ServerException, TranslationNotFound, TooManyRequests) + +from deep_translator.base import BaseTranslator +from deep_translator.constants import BASE_URLS +from deep_translator.exceptions import ( + RequestError, + ServerException, + TooManyRequests, + TranslationNotFound, +) +from deep_translator.validate import is_input_valid -class YandexTranslator(object): +class YandexTranslator(BaseTranslator): """ class that wraps functions, which use the yandex translator under the hood to translate word(s) """ - def __init__(self, api_key=None, source="en", target="de", **kwargs): + def __init__( + self, + api_key: Optional[str] = None, + source: str = "en", + target: str = "de", + **kwargs + ): """ @param api_key: your yandex api key """ if not api_key: raise ServerException(401) - self.__base_url = BASE_URLS.get("YANDEX") - self.source = source - self.target = target - self.api_key = api_key self.api_version = "v1.5" self.api_endpoints = { @@ -28,13 +40,9 @@ class YandexTranslator(object): "detect": "detect", "translate": "translate", } - - @staticmethod - def get_supported_languages(as_dict=False, **kwargs): - """ this method is just for consistency.""" - return """ this method is just for consistency. You need to create an instance of yandex and access - supported languages using the languages property or call _get_supported_languages - """ + super().__init__( + base_url=BASE_URLS.get("YANDEX"), source=source, target=target, **kwargs + ) def _get_supported_languages(self): return set(x.split("-")[0] for x in self.dirs) @@ -44,10 +52,10 @@ class YandexTranslator(object): return self.get_supported_languages() @property - def dirs(self, proxies=None): + def dirs(self, proxies: Optional[dict] = None): try: - url = self.__base_url.format(version=self.api_version, endpoint="getLangs") + url = self._base_url.format(version=self.api_version, endpoint="getLangs") print("url: ", url) response = requests.get(url, params={"key": self.api_key}, proxies=proxies) except requests.exceptions.ConnectionError: @@ -59,7 +67,7 @@ class YandexTranslator(object): raise ServerException(response.status_code) return data.get("dirs") - def detect(self, text, proxies=None): + def detect(self, text: str, proxies: Optional[dict] = None): response = None params = { "text": text, @@ -67,7 +75,7 @@ class YandexTranslator(object): "key": self.api_key, } try: - url = self.__base_url.format(version=self.api_version, endpoint="detect") + url = self._base_url.format(version=self.api_version, endpoint="detect") response = requests.post(url, data=params, proxies=proxies) except RequestError: @@ -78,58 +86,57 @@ class YandexTranslator(object): raise ServerException(response.status_code) else: response = response.json() - language = response['lang'] - status_code = response['code'] + language = response["lang"] + status_code = response["code"] if status_code != 200: raise RequestError() elif not language: raise ServerException(501) return language - def translate(self, text, proxies=None, **kwargs): - params = { - "text": text, - "format": "plain", - "lang": self.target if self.source == "auto" else "{}-{}".format(self.source, self.target), - "key": self.api_key - } - try: - url = self.__base_url.format(version=self.api_version, endpoint="translate") - response = requests.post(url, data=params, proxies=proxies) - except ConnectionError: - raise ServerException(503) - else: - response = response.json() - - if response['code'] == 429: - raise TooManyRequests() - - if response['code'] != 200: - raise ServerException(response['code']) - - if not response['text']: - raise TranslationNotFound() - - return response['text'] - - def translate_file(self, path, **kwargs): + def translate(self, text: str, proxies: Optional[dict] = None, **kwargs) -> str: + if is_input_valid(text): + params = { + "text": text, + "format": "plain", + "lang": self._target + if self._source == "auto" + else "{}-{}".format(self._source, self._target), + "key": self.api_key, + } + try: + url = self._base_url.format( + version=self.api_version, endpoint="translate" + ) + response = requests.post(url, data=params, proxies=proxies) + except ConnectionError: + raise ServerException(503) + else: + response = response.json() + + if response["code"] == 429: + raise TooManyRequests() + + if response["code"] != 200: + raise ServerException(response["code"]) + + if not response["text"]: + raise TranslationNotFound() + + return response["text"] + + def translate_file(self, path: str, **kwargs) -> str: """ translate from a file @param path: path to file @return: translated text """ - try: - with open(path, 'r', encoding='utf-8') as f: - text = f.read() - - return self.translate(text) - except Exception as e: - raise e + return self._translate_file(path, **kwargs) - def translate_batch(self, batch, **kwargs): + def translate_batch(self, batch: List[str], **kwargs) -> List[str]: """ translate a batch of texts @param batch: list of texts to translate @return: list of translations """ - return [self.translate(text, **kwargs) for text in batch] + return self._translate_batch(batch, **kwargs) |