diff options
author | morpheus65535 <[email protected]> | 2021-02-02 16:05:09 -0500 |
---|---|---|
committer | morpheus65535 <[email protected]> | 2021-02-02 16:05:09 -0500 |
commit | 49c6e8b3fbccf0a83021ed927b90c6b480e71ce8 (patch) | |
tree | e126cad24183f00902e5f4f069c461b7cc052d9f /libs | |
parent | 6e3f4bf80490c478a9638a9cff21cf532ecc113c (diff) | |
download | bazarr-49c6e8b3fbccf0a83021ed927b90c6b480e71ce8.tar.gz bazarr-49c6e8b3fbccf0a83021ed927b90c6b480e71ce8.zip |
Implemented manual subtitles translation using Google Translate
Diffstat (limited to 'libs')
-rw-r--r-- | libs/deep_translator/__init__.py | 25 | ||||
-rw-r--r-- | libs/deep_translator/cli.py | 52 | ||||
-rw-r--r-- | libs/deep_translator/configs.py | 11 | ||||
-rw-r--r-- | libs/deep_translator/constants.py | 183 | ||||
-rw-r--r-- | libs/deep_translator/deepl.py | 59 | ||||
-rw-r--r-- | libs/deep_translator/detection.py | 76 | ||||
-rw-r--r-- | libs/deep_translator/exceptions.py | 113 | ||||
-rw-r--r-- | libs/deep_translator/google_trans.py | 173 | ||||
-rw-r--r-- | libs/deep_translator/linguee.py | 130 | ||||
-rw-r--r-- | libs/deep_translator/mymemory.py | 174 | ||||
-rw-r--r-- | libs/deep_translator/parent.py | 71 | ||||
-rw-r--r-- | libs/deep_translator/pons.py | 136 | ||||
-rw-r--r-- | libs/deep_translator/qcri.py | 91 | ||||
-rw-r--r-- | libs/deep_translator/tests/__init__.py | 1 | ||||
-rw-r--r-- | libs/deep_translator/tests/test_google_trans.py | 57 | ||||
-rw-r--r-- | libs/deep_translator/tests/test_linguee.py | 49 | ||||
-rw-r--r-- | libs/deep_translator/tests/test_mymemory.py | 48 | ||||
-rw-r--r-- | libs/deep_translator/tests/test_pons.py | 48 | ||||
-rw-r--r-- | libs/deep_translator/utils.py | 3 | ||||
-rw-r--r-- | libs/deep_translator/yandex.py | 132 |
20 files changed, 1632 insertions, 0 deletions
diff --git a/libs/deep_translator/__init__.py b/libs/deep_translator/__init__.py new file mode 100644 index 000000000..36321add6 --- /dev/null +++ b/libs/deep_translator/__init__.py @@ -0,0 +1,25 @@ +"""Top-level package for deep_translator.""" + +from .google_trans import GoogleTranslator +from .pons import PonsTranslator +from .linguee import LingueeTranslator +from .mymemory import MyMemoryTranslator +from .yandex import YandexTranslator +from .qcri import QCRI +from .deepl import DeepL +from .detection import single_detection, batch_detection + + +__author__ = """Nidhal Baccouri""" +__email__ = '[email protected]' +__version__ = '1.3.2' + +__all__ = [GoogleTranslator, + PonsTranslator, + LingueeTranslator, + MyMemoryTranslator, + YandexTranslator, + QCRI, + DeepL, + single_detection, + batch_detection] diff --git a/libs/deep_translator/cli.py b/libs/deep_translator/cli.py new file mode 100644 index 000000000..d690708fd --- /dev/null +++ b/libs/deep_translator/cli.py @@ -0,0 +1,52 @@ +"""Console script for deep_translator.""" + +import argparse +import sys +from .google_trans import GoogleTranslator +from .mymemory import MyMemoryTranslator +from .pons import PonsTranslator +from .linguee import LingueeTranslator + + +def translate(args): + """ + function used to provide translations from the parsed terminal arguments + @param args: parsed terminal arguments + @return: None + """ + translator = None + if args.translator == 'google': + translator = GoogleTranslator(source=args.source, target=args.target) + elif args.translator == 'pons': + translator = PonsTranslator(source=args.source, target=args.target) + elif args.translator == 'linguee': + translator = LingueeTranslator(source=args.source, target=args.target) + elif args.translator == 'mymemory': + translator = MyMemoryTranslator(source=args.source, target=args.target) + else: + print("given translator is not supported. Please use a supported translator from the deep_translator tool") + + res = translator.translate(args.text) + print(" | Translation from {} to {} |".format(args.source, args.target)) + print("Translated text: \n {}".format(res)) + + +def main(): + """ + function responsible for parsing terminal arguments and provide them for further use in the translation process + + """ + parser = argparse.ArgumentParser() + parser.add_argument('--translator', '-trans', + default='google', type=str, help="name of the translator you want to use") + parser.add_argument('--source', '-src', type=str, help="source language to translate from", required=True) + parser.add_argument('--target', '-tg', type=str, help="target language to translate to", required=True) + parser.add_argument('--text', '-txt', type=str, help="text you want to translate", required=True) + + args = parser.parse_args() + translate(args) + # sys.exit() + + +if __name__ == "__main__": + main() diff --git a/libs/deep_translator/configs.py b/libs/deep_translator/configs.py new file mode 100644 index 000000000..9df609808 --- /dev/null +++ b/libs/deep_translator/configs.py @@ -0,0 +1,11 @@ +""" +configuration object that holds data about the language detection api +""" + +config = { + "url": 'https://ws.detectlanguage.com/0.2/detect', + "headers": { + 'User-Agent': 'Detect Language API Python Client 1.4.0', + 'Authorization': 'Bearer {}', + } +} diff --git a/libs/deep_translator/constants.py b/libs/deep_translator/constants.py new file mode 100644 index 000000000..5f773997b --- /dev/null +++ b/libs/deep_translator/constants.py @@ -0,0 +1,183 @@ + + +BASE_URLS = { + "GOOGLE_TRANSLATE": "https://translate.google.com/m", + "PONS": "https://en.pons.com/translate/", + "YANDEX": "https://translate.yandex.net/api/{version}/tr.json/{endpoint}", + "LINGUEE": "https://www.linguee.com/", + "MYMEMORY": "http://api.mymemory.translated.net/get", + "QCRI": "https://mt.qcri.org/api/v1/{endpoint}?", + "DEEPL": "https://api.deepl.com/{version}/" +} + +GOOGLE_CODES_TO_LANGUAGES = { + 'af': 'afrikaans', + 'sq': 'albanian', + 'am': 'amharic', + 'ar': 'arabic', + 'hy': 'armenian', + 'az': 'azerbaijani', + 'eu': 'basque', + 'be': 'belarusian', + 'bn': 'bengali', + 'bs': 'bosnian', + 'bg': 'bulgarian', + 'ca': 'catalan', + 'ceb': 'cebuano', + 'ny': 'chichewa', + 'zh-cn': 'chinese (simplified)', + 'zh-tw': 'chinese (traditional)', + 'co': 'corsican', + 'hr': 'croatian', + 'cs': 'czech', + 'da': 'danish', + 'nl': 'dutch', + 'en': 'english', + 'eo': 'esperanto', + 'et': 'estonian', + 'tl': 'filipino', + 'fi': 'finnish', + 'fr': 'french', + 'fy': 'frisian', + 'gl': 'galician', + 'ka': 'georgian', + 'de': 'german', + 'el': 'greek', + 'gu': 'gujarati', + 'ht': 'haitian creole', + 'ha': 'hausa', + 'haw': 'hawaiian', + 'iw': 'hebrew', + 'hi': 'hindi', + 'hmn': 'hmong', + 'hu': 'hungarian', + 'is': 'icelandic', + 'ig': 'igbo', + 'id': 'indonesian', + 'ga': 'irish', + 'it': 'italian', + 'ja': 'japanese', + 'jw': 'javanese', + 'kn': 'kannada', + 'kk': 'kazakh', + 'km': 'khmer', + 'ko': 'korean', + 'ku': 'kurdish (kurmanji)', + 'ky': 'kyrgyz', + 'lo': 'lao', + 'la': 'latin', + 'lv': 'latvian', + 'lt': 'lithuanian', + 'lb': 'luxembourgish', + 'mk': 'macedonian', + 'mg': 'malagasy', + 'ms': 'malay', + 'ml': 'malayalam', + 'mt': 'maltese', + 'mi': 'maori', + 'mr': 'marathi', + 'mn': 'mongolian', + 'my': 'myanmar (burmese)', + 'ne': 'nepali', + 'no': 'norwegian', + 'ps': 'pashto', + 'fa': 'persian', + 'pl': 'polish', + 'pt': 'portuguese', + 'pa': 'punjabi', + 'ro': 'romanian', + 'ru': 'russian', + 'sm': 'samoan', + 'gd': 'scots gaelic', + 'sr': 'serbian', + 'st': 'sesotho', + 'sn': 'shona', + 'sd': 'sindhi', + 'si': 'sinhala', + 'sk': 'slovak', + 'sl': 'slovenian', + 'so': 'somali', + 'es': 'spanish', + 'su': 'sundanese', + 'sw': 'swahili', + 'sv': 'swedish', + 'tg': 'tajik', + 'ta': 'tamil', + 'te': 'telugu', + 'th': 'thai', + 'tr': 'turkish', + 'uk': 'ukrainian', + 'ur': 'urdu', + 'uz': 'uzbek', + 'vi': 'vietnamese', + 'cy': 'welsh', + 'xh': 'xhosa', + 'yi': 'yiddish', + 'yo': 'yoruba', + 'zu': 'zulu', + 'fil': 'Filipino', + 'he': 'Hebrew' +} + +GOOGLE_LANGUAGES_TO_CODES = {v: k for k, v in GOOGLE_CODES_TO_LANGUAGES.items()} + +PONS_CODES_TO_LANGUAGES = { + 'ar': 'arabic', + 'bg': 'bulgarian', + 'zh-cn': 'chinese', + 'cs': 'czech', + 'da': 'danish', + 'nl': 'dutch', + 'en': 'english', + 'fr': 'french', + 'de': 'german', + 'el': 'greek', + 'hu': 'hungarian', + 'it': 'italian', + 'la': 'latin', + 'no': 'norwegian', + 'pl': 'polish', + 'pt': 'portuguese', + 'ru': 'russian', + 'sl': 'slovenian', + 'es': 'spanish', + 'sv': 'swedish', + 'tr': 'turkish', + 'elv': 'elvish' +} + +PONS_LANGUAGES_TO_CODES = {v: k for k, v in PONS_CODES_TO_LANGUAGES.items()} + +LINGUEE_LANGUAGES_TO_CODES = { + "maltese": "mt", + "english": "en", + "german": "de", + "bulgarian": "bg", + "polish": "pl", + "portuguese": "pt", + "hungarian": "hu", + "romanian": "ro", + "russian": "ru", + #"serbian": "sr", + "dutch": "nl", + "slovakian": "sk", + "greek": "el", + "slovenian": "sl", + "danish": "da", + "italian": "it", + "spanish": "es", + "finnish": "fi", + "chinese": "zh", + "french": "fr", + #"croatian": "hr", + "czech": "cs", + "laotian": "lo", + "swedish": "sv", + "latvian": "lv", + "estonian": "et", + "japanese": "ja" +} + +LINGUEE_CODE_TO_LANGUAGE = {v: k for k, v in LINGUEE_LANGUAGES_TO_CODES.items()} + +# "72e9e2cc7c992db4dcbdd6fb9f91a0d1" diff --git a/libs/deep_translator/deepl.py b/libs/deep_translator/deepl.py new file mode 100644 index 000000000..b08a20d49 --- /dev/null +++ b/libs/deep_translator/deepl.py @@ -0,0 +1,59 @@ + +import requests +from requests.utils import requote_uri +from deep_translator.constants import BASE_URLS +from deep_translator.exceptions import (RequestError, + ServerException, TranslationNotFound, TooManyRequests) + + +class DeepL(object): + """ + class that wraps functions, which use the DeepL translator under the hood to translate word(s) + """ + + def __init__(self, api_key=None): + """ + @param api_key: your DeepL api key. Get one here: https://www.deepl.com/docs-api/accessing-the-api/ + """ + + if not api_key: + raise ServerException(401) + self.version = 'v2' + self.api_key = api_key + self.__base_url = BASE_URLS.get("DEEPL").format(version=self.version) + + def translate(self, source, target, text): + params = { + "auth_key": self.api_key, + "target_lang": target, + "source_lang": source, + "text": text + } + try: + response = requests.get(self.__base_url, params=params) + except ConnectionError: + raise ServerException(503) + + else: + if response.status_code != 200: + ServerException(response.status_code) + else: + res = response.json() + if not res: + raise TranslationNotFound(text) + return res + + def translate_batch(self, source, target, batch): + """ + translate a batch of texts + @param source: source language + @param target: target language + @param batch: list of texts to translate + @return: list of translations + """ + return [self.translate(source, target, text) for text in batch] + + +if __name__ == '__main__': + d = DeepL(api_key="key") + print(d) diff --git a/libs/deep_translator/detection.py b/libs/deep_translator/detection.py new file mode 100644 index 000000000..6cf8f950d --- /dev/null +++ b/libs/deep_translator/detection.py @@ -0,0 +1,76 @@ +""" +language detection API +""" +import requests +from deep_translator.configs import config +from requests.exceptions import HTTPError + + +def get_request_body(text, api_key, *args): + """ + send a request and return the response body parsed as dictionary + + @param text: target text that you want to detect its language + @type text: str + @type api_key: str + @param api_key: your private API key + + """ + if not api_key: + raise Exception("you need to get an API_KEY for this to work. " + "Get one for free here: https://detectlanguage.com/documentation") + if not text: + raise Exception("Please provide an input text") + + else: + try: + headers = config['headers'] + headers['Authorization'] = headers['Authorization'].format(api_key) + response = requests.post(config['url'], + json={'q': text}, + headers=headers) + + body = response.json().get('data') + return body + + except HTTPError as e: + print("Error occured while requesting from server: ", e.args) + raise e + + +def single_detection(text, api_key=None, detailed=False, *args, **kwargs): + """ + function responsible for detecting the language from a text + + @param text: target text that you want to detect its language + @type text: str + @type api_key: str + @param api_key: your private API key + @param detailed: set to True if you want to get detailed information about the detection process + """ + body = get_request_body(text, api_key) + detections = body.get('detections') + if detailed: + return detections[0] + + lang = detections[0].get('language', None) + if lang: + return lang + + +def batch_detection(text_list, api_key, detailed=False, *args): + """ + function responsible for detecting the language from a text + + @param text_list: target batch that you want to detect its language + @param api_key: your private API key + @param detailed: set to True if you want to get detailed information about the detection process + """ + body = get_request_body(text_list, api_key) + detections = body.get('detections') + res = [obj[0] for obj in detections] + if detailed: + return res + else: + return [obj['language'] for obj in res] + diff --git a/libs/deep_translator/exceptions.py b/libs/deep_translator/exceptions.py new file mode 100644 index 000000000..7d0ea875b --- /dev/null +++ b/libs/deep_translator/exceptions.py @@ -0,0 +1,113 @@ +class BaseError(Exception): + """ + base error structure class + """ + + def __init__(self, val, message): + """ + @param val: actual value + @param message: message shown to the user + """ + self.val = val + self.message = message + super().__init__() + + def __str__(self): + return "{} --> {}".format(self.val, self.message) + + +class LanguageNotSupportedException(BaseError): + """ + exception thrown if the user uses a language that is not supported by the deep_translator + """ + + def __init__(self, val, message="There is no support for the chosen language"): + super().__init__(val, message) + + +class NotValidPayload(BaseError): + """ + exception thrown if the user enters an invalid payload + """ + + def __init__(self, + val, + message='text must be a valid text with maximum 5000 character, otherwise it cannot be translated'): + super(NotValidPayload, self).__init__(val, message) + + +class TranslationNotFound(BaseError): + """ + exception thrown if no translation was found for the text provided by the user + """ + + def __init__(self, + val, + message='No translation was found using the current translator. Try another translator?'): + super(TranslationNotFound, self).__init__(val, message) + + +class ElementNotFoundInGetRequest(BaseError): + """ + exception thrown if the html element was not found in the body parsed by beautifulsoup + """ + + def __init__(self, + val, + message='Required element was not found in the API response'): + super(ElementNotFoundInGetRequest, self).__init__(val, message) + + +class NotValidLength(BaseError): + """ + exception thrown if the provided text exceed the length limit of the translator + """ + + def __init__(self, val, min_chars, max_chars): + message = "Text length need to be between {} and {} characters".format(min_chars, max_chars) + super(NotValidLength, self).__init__(val, message) + + +class RequestError(Exception): + """ + exception thrown if an error occured during the request call, e.g a connection problem. + """ + + def __init__(self, message="Request exception can happen due to an api connection error. " + "Please check your connection and try again"): + self.message = message + + def __str__(self): + return self.message + + +class TooManyRequests(Exception): + """ + exception thrown if an error occured during the request call, e.g a connection problem. + """ + + def __init__(self, message="Server Error: You made too many requests to the server. According to google, you are allowed to make 5 requests per second and up to 200k requests per day. You can wait and try again later or you can try the translate_batch function"): + self.message = message + + def __str__(self): + return self.message + + +class ServerException(Exception): + """ + Default YandexTranslate exception from the official website + """ + errors = { + 401: "ERR_KEY_INVALID", + 402: "ERR_KEY_BLOCKED", + 403: "ERR_DAILY_REQ_LIMIT_EXCEEDED", + 404: "ERR_DAILY_CHAR_LIMIT_EXCEEDED", + 413: "ERR_TEXT_TOO_LONG", + 422: "ERR_UNPROCESSABLE_TEXT", + 501: "ERR_LANG_NOT_SUPPORTED", + 503: "ERR_SERVICE_NOT_AVAIBLE", + } + + def __init__(self, status_code, *args): + message = self.errors.get(status_code, "API server error") + super(ServerException, self).__init__(message, *args) diff --git a/libs/deep_translator/google_trans.py b/libs/deep_translator/google_trans.py new file mode 100644 index 000000000..fc8d63470 --- /dev/null +++ b/libs/deep_translator/google_trans.py @@ -0,0 +1,173 @@ +""" +google translator API +""" + +from deep_translator.constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES +from deep_translator.exceptions import TooManyRequests, LanguageNotSupportedException, TranslationNotFound, NotValidPayload, RequestError +from deep_translator.parent import BaseTranslator +from bs4 import BeautifulSoup +import requests +from time import sleep +import warnings +import logging + + +class GoogleTranslator(BaseTranslator): + """ + class that wraps functions, which use google translate under the hood to translate text(s) + """ + _languages = GOOGLE_LANGUAGES_TO_CODES + supported_languages = list(_languages.keys()) + + def __init__(self, source="auto", target="en"): + """ + @param source: source language to translate from + @param target: target language to translate to + """ + self.__base_url = BASE_URLS.get("GOOGLE_TRANSLATE") + + if self.is_language_supported(source, target): + self._source, self._target = self._map_language_to_code(source.lower(), target.lower()) + + super(GoogleTranslator, self).__init__(base_url=self.__base_url, + source=self._source, + target=self._target, + element_tag='div', + element_query={"class": "t0"}, + payload_key='q', # key of text in the url + hl=self._target, + sl=self._source) + + self._alt_element_query = {"class": "result-container"} + + @staticmethod + def get_supported_languages(as_dict=False): + """ + return the supported languages by the google translator + @param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations + @return: list or dict + """ + return GoogleTranslator.supported_languages if not as_dict else GoogleTranslator._languages + + def _map_language_to_code(self, *languages): + """ + map language to its corresponding code (abbreviation) if the language was passed by its full name by the user + @param languages: list of languages + @return: mapped value of the language or raise an exception if the language is not supported + """ + for language in languages: + if language in self._languages.values() or language == 'auto': + yield language + elif language in self._languages.keys(): + yield self._languages[language] + else: + raise LanguageNotSupportedException(language) + + def is_language_supported(self, *languages): + """ + check if the language is supported by the translator + @param languages: list of languages + @return: bool or raise an Exception + """ + for lang in languages: + if lang != 'auto' and lang not in self._languages.keys(): + if lang != 'auto' and lang not in self._languages.values(): + raise LanguageNotSupportedException(lang) + return True + + def translate(self, text, **kwargs): + """ + function that uses google translate to translate a text + @param text: desired text to translate + @return: str: translated text + """ + + if self._validate_payload(text): + text = text.strip() + + if self.payload_key: + self._url_params[self.payload_key] = text + + response = requests.get(self.__base_url, + params=self._url_params, headers ={'User-agent': 'your bot 0.1'}) + + if response.status_code == 429: + raise TooManyRequests() + + if response.status_code != 200: + # print("status code", response.status_code) + raise RequestError() + + soup = BeautifulSoup(response.text, 'html.parser') + element = soup.find(self._element_tag, self._element_query) + + if not element: + element = soup.find(self._element_tag, self._alt_element_query) + if not element: + raise TranslationNotFound(text) + + return element.get_text(strip=True) + + def translate_file(self, path, **kwargs): + """ + translate directly from file + @param path: path to the target file + @type path: str + @param kwargs: additional args + @return: str + """ + try: + with open(path) as f: + text = f.read() + + return self.translate(text=text) + except Exception as e: + raise e + + def translate_sentences(self, sentences=None, **kwargs): + """ + translate many sentences together. This makes sense if you have sentences with different languages + and you want to translate all to unified language. This is handy because it detects + automatically the language of each sentence and then translate it. + + @param sentences: list of sentences to translate + @return: list of all translated sentences + """ + warnings.warn("deprecated. Use the translate_batch function instead", DeprecationWarning, stacklevel=2) + logging.warning("deprecated. Use the translate_batch function instead") + if not sentences: + raise NotValidPayload(sentences) + + translated_sentences = [] + try: + for sentence in sentences: + translated = self.translate(text=sentence) + translated_sentences.append(translated) + + return translated_sentences + + except Exception as e: + raise e + + def translate_batch(self, batch=None): + """ + translate a list of texts + @param batch: list of texts you want to translate + @return: list of translations + """ + if not batch: + raise Exception("Enter your text list that you want to translate") + + arr = [] + for text in batch: + translated = self.translate(text) + arr.append(translated) + sleep(2) + + return arr + + +# if __name__ == '__main__': +# for _ in range(10): +# txt = GoogleTranslator(source="en", target="ar").translate("Hello how are you") +# print("text: ", txt) diff --git a/libs/deep_translator/linguee.py b/libs/deep_translator/linguee.py new file mode 100644 index 000000000..ee2013693 --- /dev/null +++ b/libs/deep_translator/linguee.py @@ -0,0 +1,130 @@ +""" +linguee translator API +""" + +from deep_translator.constants import BASE_URLS, LINGUEE_LANGUAGES_TO_CODES, LINGUEE_CODE_TO_LANGUAGE +from deep_translator.exceptions import (LanguageNotSupportedException, + TranslationNotFound, + NotValidPayload, + ElementNotFoundInGetRequest, + RequestError, + TooManyRequests) +from deep_translator.parent import BaseTranslator +from bs4 import BeautifulSoup +import requests +from requests.utils import requote_uri + + +class LingueeTranslator(BaseTranslator): + """ + class that wraps functions, which use the linguee translator under the hood to translate word(s) + """ + _languages = LINGUEE_LANGUAGES_TO_CODES + supported_languages = list(_languages.keys()) + + def __init__(self, source, target="en"): + """ + @param source: source language to translate from + @param target: target language to translate to + """ + self.__base_url = BASE_URLS.get("LINGUEE") + + if self.is_language_supported(source, target): + self._source, self._target = self._map_language_to_code(source.lower(), target.lower()) + + super().__init__(base_url=self.__base_url, + source=self._source, + target=self._target, + element_tag='a', + element_query={'class': 'dictLink featured'}, + payload_key=None, # key of text in the url + ) + + @staticmethod + def get_supported_languages(as_dict=False): + """ + return the supported languages by the linguee translator + @param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations + @return: list or dict + """ + return LingueeTranslator.supported_languages if not as_dict else LingueeTranslator._languages + + def _map_language_to_code(self, *languages, **kwargs): + """ + map language to its corresponding code (abbreviation) if the language was passed by its full name by the user + @param languages: list of languages + @return: mapped value of the language or raise an exception if the language is not supported + """ + for language in languages: + if language in self._languages.values(): + yield LINGUEE_CODE_TO_LANGUAGE[language] + elif language in self._languages.keys(): + yield language + else: + raise LanguageNotSupportedException(language) + + def is_language_supported(self, *languages, **kwargs): + """ + check if the language is supported by the translator + @param languages: list of languages + @return: bool or raise an Exception + """ + for lang in languages: + if lang not in self._languages.keys(): + if lang not in self._languages.values(): + raise LanguageNotSupportedException(lang) + return True + + def translate(self, word, return_all=False, **kwargs): + """ + function that uses linguee to translate a word + @param word: word to translate + @type word: str + @param return_all: set to True to return all synonym of the translated word + @type return_all: bool + @return: str: translated word + """ + if self._validate_payload(word, max_chars=50): + # %s-%s/translation/%s.html + url = "{}{}-{}/translation/{}.html".format(self.__base_url, self._source, self._target, word) + url = requote_uri(url) + response = requests.get(url) + + if response.status_code == 429: + raise TooManyRequests() + + if response.status_code != 200: + raise RequestError() + soup = BeautifulSoup(response.text, 'html.parser') + elements = soup.find_all(self._element_tag, self._element_query) + if not elements: + raise ElementNotFoundInGetRequest(elements) + + filtered_elements = [] + for el in elements: + try: + pronoun = el.find('span', {'class': 'placeholder'}).get_text(strip=True) + except AttributeError: + pronoun = '' + filtered_elements.append(el.get_text(strip=True).replace(pronoun, '')) + + if not filtered_elements: + raise TranslationNotFound(word) + + return filtered_elements if return_all else filtered_elements[0] + + def translate_words(self, words, **kwargs): + """ + translate a batch of words together by providing them in a list + @param words: list of words you want to translate + @param kwargs: additional args + @return: list of translated words + """ + if not words: + raise NotValidPayload(words) + + translated_words = [] + for word in words: + translated_words.append(self.translate(payload=word)) + return translated_words + diff --git a/libs/deep_translator/mymemory.py b/libs/deep_translator/mymemory.py new file mode 100644 index 000000000..6b9fc1ca7 --- /dev/null +++ b/libs/deep_translator/mymemory.py @@ -0,0 +1,174 @@ +""" +mymemory translator API +""" +import logging +import warnings + +from deep_translator.constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES +from deep_translator.exceptions import (NotValidPayload, + TranslationNotFound, + LanguageNotSupportedException, + RequestError, + TooManyRequests) +from deep_translator.parent import BaseTranslator +import requests +from time import sleep + + +class MyMemoryTranslator(BaseTranslator): + """ + class that uses the mymemory translator to translate texts + """ + _languages = GOOGLE_LANGUAGES_TO_CODES + supported_languages = list(_languages.keys()) + + def __init__(self, source="auto", target="en", **kwargs): + """ + @param source: source language to translate from + @param target: target language to translate to + """ + self.__base_url = BASE_URLS.get("MYMEMORY") + if self.is_language_supported(source, target): + self._source, self._target = self._map_language_to_code(source.lower(), target.lower()) + self._source = self._source if self._source != 'auto' else 'Lao' + + self.email = kwargs.get('email', None) + super(MyMemoryTranslator, self).__init__(base_url=self.__base_url, + source=self._source, + target=self._target, + payload_key='q', + langpair='{}|{}'.format(self._source, self._target)) + + @staticmethod + def get_supported_languages(as_dict=False): + """ + return the supported languages by the mymemory translator + @param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations + @return: list or dict + """ + return MyMemoryTranslator.supported_languages if not as_dict else MyMemoryTranslator._languages + + def _map_language_to_code(self, *languages): + """ + map language to its corresponding code (abbreviation) if the language was passed by its full name by the user + @param languages: list of languages + @return: mapped value of the language or raise an exception if the language is not supported + """ + for language in languages: + if language in self._languages.values() or language == 'auto': + yield language + elif language in self._languages.keys(): + yield self._languages[language] + else: + raise LanguageNotSupportedException(language) + + def is_language_supported(self, *languages): + """ + check if the language is supported by the translator + @param languages: list of languages + @return: bool or raise an Exception + """ + for lang in languages: + if lang != 'auto' and lang not in self._languages.keys(): + if lang != 'auto' and lang not in self._languages.values(): + raise LanguageNotSupportedException(lang) + return True + + def translate(self, text, return_all=False, **kwargs): + """ + function that uses the mymemory translator to translate a text + @param text: desired text to translate + @type text: str + @param return_all: set to True to return all synonym/similars of the translated text + @return: str or list + """ + + if self._validate_payload(text, max_chars=500): + text = text.strip() + + if self.payload_key: + self._url_params[self.payload_key] = text + if self.email: + self._url_params['de'] = self.email + + response = requests.get(self.__base_url, + params=self._url_params, + headers=self.headers) + + if response.status_code == 429: + raise TooManyRequests() + if response.status_code != 200: + raise RequestError() + + data = response.json() + if not data: + TranslationNotFound(text) + + translation = data.get('responseData').get('translatedText') + if translation: + return translation + + elif not translation: + all_matches = data.get('matches') + matches = (match['translation'] for match in all_matches) + next_match = next(matches) + return next_match if not return_all else list(all_matches) + + def translate_sentences(self, sentences=None, **kwargs): + """ + translate many sentences together. This makes sense if you have sentences with different languages + and you want to translate all to unified language. This is handy because it detects + automatically the language of each sentence and then translate it. + + @param sentences: list of sentences to translate + @return: list of all translated sentences + """ + warn_msg = "deprecated. Use the translate_batch function instead" + warnings.warn(warn_msg, DeprecationWarning, stacklevel=2) + logging.warning(warn_msg) + if not sentences: + raise NotValidPayload(sentences) + + translated_sentences = [] + try: + for sentence in sentences: + translated = self.translate(text=sentence, **kwargs) + translated_sentences.append(translated) + + return translated_sentences + + except Exception as e: + raise e + + def translate_file(self, path, **kwargs): + """ + translate directly from file + @param path: path to the target file + @type path: str + @param kwargs: additional args + @return: str + """ + try: + with open(path) as f: + text = f.read() + + return self.translate(text=text) + except Exception as e: + raise e + + def translate_batch(self, batch=None): + """ + translate a list of texts + @param batch: list of texts you want to translate + @return: list of translations + """ + if not batch: + raise Exception("Enter your text list that you want to translate") + + arr = [] + for text in batch: + translated = self.translate(text) + arr.append(translated) + sleep(2) + + return arr diff --git a/libs/deep_translator/parent.py b/libs/deep_translator/parent.py new file mode 100644 index 000000000..674d93e48 --- /dev/null +++ b/libs/deep_translator/parent.py @@ -0,0 +1,71 @@ +"""parent translator class""" + +from deep_translator.exceptions import NotValidPayload, NotValidLength +from abc import ABC, abstractmethod + + +class BaseTranslator(ABC): + """ + Abstract class that serve as a parent translator for other different translators + """ + def __init__(self, + base_url=None, + source="auto", + target="en", + payload_key=None, + element_tag=None, + element_query=None, + **url_params): + """ + @param source: source language to translate from + @param target: target language to translate to + """ + self.__base_url = base_url + self._source = source + self._target = target + self._url_params = url_params + self._element_tag = element_tag + self._element_query = element_query + self.payload_key = payload_key + self.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) ' + 'AppleWebit/535.19' + '(KHTML, like Gecko) Chrome/18.0.1025.168 Safari/535.19'} + super(BaseTranslator, self).__init__() + + @staticmethod + def _validate_payload(payload, min_chars=1, max_chars=5000): + """ + validate the target text to translate + @param payload: text to translate + @return: bool + """ + + if not payload or not isinstance(payload, str): + raise NotValidPayload(payload) + if not BaseTranslator.__check_length(payload, min_chars, max_chars): + raise NotValidLength(payload, min_chars, max_chars) + return True + + @staticmethod + def __check_length(payload, min_chars, max_chars): + """ + check length of the provided target text to translate + @param payload: text to translate + @param min_chars: minimum characters allowed + @param max_chars: maximum characters allowed + @return: bool + """ + return True if min_chars < len(payload) < max_chars else False + + @abstractmethod + def translate(self, text, **kwargs): + """ + translate a text using a translator under the hood and return the translated text + @param text: text to translate + @param kwargs: additional arguments + @return: str + """ + return NotImplemented('You need to implement the translate method!') + + + diff --git a/libs/deep_translator/pons.py b/libs/deep_translator/pons.py new file mode 100644 index 000000000..e4383eb5b --- /dev/null +++ b/libs/deep_translator/pons.py @@ -0,0 +1,136 @@ +""" +pons translator API +""" +from bs4 import BeautifulSoup +import requests +from deep_translator.constants import BASE_URLS, PONS_LANGUAGES_TO_CODES, PONS_CODES_TO_LANGUAGES +from deep_translator.exceptions import (LanguageNotSupportedException, + TranslationNotFound, + NotValidPayload, + ElementNotFoundInGetRequest, + RequestError, + TooManyRequests) +from deep_translator.parent import BaseTranslator +from requests.utils import requote_uri + + +class PonsTranslator(BaseTranslator): + """ + class that uses PONS translator to translate words + """ + _languages = PONS_LANGUAGES_TO_CODES + supported_languages = list(_languages.keys()) + + def __init__(self, source, target="english"): + """ + @param source: source language to translate from + @param target: target language to translate to + """ + self.__base_url = BASE_URLS.get("PONS") + + if self.is_language_supported(source, target): + self._source, self._target = self._map_language_to_code(source, target) + + super().__init__(base_url=self.__base_url, + source=self._source, + target=self._target, + payload_key=None, + element_tag='div', + element_query={"class": "target"} + ) + + @staticmethod + def get_supported_languages(as_dict=False): + """ + return the supported languages by the linguee translator + @param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations + @return: list or dict + """ + return PonsTranslator.supported_languages if not as_dict else PonsTranslator._languages + + def _map_language_to_code(self, *languages, **kwargs): + """ + map language to its corresponding code (abbreviation) if the language was passed by its full name by the user + @param languages: list of languages + @return: mapped value of the language or raise an exception if the language is not supported + """ + for language in languages: + if language in self._languages.values(): + yield PONS_CODES_TO_LANGUAGES[language] + elif language in self._languages.keys(): + yield language + else: + raise LanguageNotSupportedException(language) + + def is_language_supported(self, *languages, **kwargs): + """ + check if the language is supported by the translator + @param languages: list of languages + @return: bool or raise an Exception + """ + for lang in languages: + if lang not in self._languages.keys(): + if lang not in self._languages.values(): + raise LanguageNotSupportedException(lang) + return True + + def translate(self, word, return_all=False, **kwargs): + """ + function that uses PONS to translate a word + @param word: word to translate + @type word: str + @param return_all: set to True to return all synonym of the translated word + @type return_all: bool + @return: str: translated word + """ + if self._validate_payload(word, max_chars=50): + url = "{}{}-{}/{}".format(self.__base_url, self._source, self._target, word) + url = requote_uri(url) + response = requests.get(url) + + if response.status_code == 429: + raise TooManyRequests() + + if response.status_code != 200: + raise RequestError() + + soup = BeautifulSoup(response.text, 'html.parser') + elements = soup.findAll(self._element_tag, self._element_query) + + if not elements: + raise ElementNotFoundInGetRequest(word) + + filtered_elements = [] + for el in elements: + temp = '' + for e in el.findAll('a'): + if e.parent.name == 'div': + if e and "/translate/{}-{}/".format(self._target, self._source) in e.get('href'): + temp += e.get_text() + ' ' + filtered_elements.append(temp) + + if not filtered_elements: + raise ElementNotFoundInGetRequest(word) + + word_list = [word for word in filtered_elements if word and len(word) > 1] + + if not word_list: + raise TranslationNotFound(word) + + return word_list if return_all else word_list[0] + + def translate_words(self, words, **kwargs): + """ + translate a batch of words together by providing them in a list + @param words: list of words you want to translate + @param kwargs: additional args + @return: list of translated words + """ + if not words: + raise NotValidPayload(words) + + translated_words = [] + for word in words: + translated_words.append(self.translate(payload=word)) + return translated_words + diff --git a/libs/deep_translator/qcri.py b/libs/deep_translator/qcri.py new file mode 100644 index 000000000..318dee46e --- /dev/null +++ b/libs/deep_translator/qcri.py @@ -0,0 +1,91 @@ + +import requests +from requests.utils import requote_uri +from deep_translator.constants import BASE_URLS +from deep_translator.exceptions import (RequestError, + ServerException, TranslationNotFound, TooManyRequests) + + +class QCRI(object): + """ + class that wraps functions, which use the QRCI translator under the hood to translate word(s) + """ + + def __init__(self, api_key=None): + """ + @param api_key: your qrci api key. Get one for free here https://mt.qcri.org/api/v1/ref + """ + + if not api_key: + raise ServerException(401) + self.__base_url = BASE_URLS.get("QCRI") + + self.api_key = api_key + self.api_endpoints = { + "get_languages": "getLanguagePairs", + "get_domains": "getDomains", + "translate": "translate", + } + + self.params = { + "key": self.api_key + } + + def _get(self, endpoint, params=None, return_text=True): + if not params: + params = self.params + try: + res = requests.get(self.__base_url.format(endpoint=self.api_endpoints[endpoint]), params=params) + return res.text if return_text else res + except Exception as e: + raise e + + def get_supported_languages(self): + + pairs = self._get("get_languages") + return pairs + + @property + def languages(self): + return self.get_supported_languages() + + def get_domains(self): + domains = self._get("get_domains") + return domains + + @property + def domains(self): + return self.get_domains() + + def translate(self, source, target, domain, text): + params = { + "key": self.api_key, + "langpair": "{}-{}".format(source, target), + "domain": domain, + "text": text + } + try: + response = self._get("translate", params=params, return_text=False) + except ConnectionError: + raise ServerException(503) + + else: + if response.status_code != 200: + ServerException(response.status_code) + else: + res = response.json() + translation = res["translatedText"] + if not translation: + raise TranslationNotFound(text) + return translation + + def translate_batch(self, source, target, domain, batch): + """ + translate a batch of texts + @param source: source language + @param target: target language + @param batch: list of texts to translate + @return: list of translations + """ + return [self.translate(source, target, domain, text) for text in batch] + diff --git a/libs/deep_translator/tests/__init__.py b/libs/deep_translator/tests/__init__.py new file mode 100644 index 000000000..7bc0eaa21 --- /dev/null +++ b/libs/deep_translator/tests/__init__.py @@ -0,0 +1 @@ +"""Unit test package for deep_translator.""" diff --git a/libs/deep_translator/tests/test_google_trans.py b/libs/deep_translator/tests/test_google_trans.py new file mode 100644 index 000000000..51f51a8fc --- /dev/null +++ b/libs/deep_translator/tests/test_google_trans.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +"""Tests for `deep_translator` package.""" + +import pytest +from deep_translator import exceptions, GoogleTranslator + + +def google_translator(): + """Sample pytest fixture. + + See more at: http://doc.pytest.org/en/latest/fixture.html + """ + return GoogleTranslator(target='en') + + +def test_content(google_translator): + """Sample pytest test function with the pytest fixture as an argument.""" + # from bs4 import BeautifulSoup + # assert 'GitHub' in BeautifulSoup(response.content).title.string + assert google_translator.translate(text='좋은') == "good" + + +def test_inputs(): + with pytest.raises(exceptions.LanguageNotSupportedException): + GoogleTranslator(source="", target="") + + with pytest.raises(exceptions.LanguageNotSupportedException): + GoogleTranslator(source="auto", target="nothing") + + # test abbreviations and languages + g1 = GoogleTranslator("en", "fr") + g2 = GoogleTranslator("english", "french") + assert g1._source == g2._source + assert g1._target == g2._target + + +def test_payload(google_translator): + + with pytest.raises(exceptions.NotValidPayload): + google_translator.translate(text="") + + with pytest.raises(exceptions.NotValidPayload): + google_translator.translate(text=123) + + with pytest.raises(exceptions.NotValidPayload): + google_translator.translate(text={}) + + with pytest.raises(exceptions.NotValidPayload): + google_translator.translate(text=[]) + + with pytest.raises(exceptions.NotValidLength): + google_translator.translate("a"*5001) + + #for _ in range(1): + #assert google_translator.translate(text='좋은') == "good" diff --git a/libs/deep_translator/tests/test_linguee.py b/libs/deep_translator/tests/test_linguee.py new file mode 100644 index 000000000..57ff9cecb --- /dev/null +++ b/libs/deep_translator/tests/test_linguee.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +"""Tests for `deep_translator` package.""" + +import pytest +from deep_translator import exceptions, LingueeTranslator + + +def linguee(): + return LingueeTranslator(source="english", target='french') + + +def test_content(linguee): + """Sample pytest test function with the pytest fixture as an argument.""" + # from bs4 import BeautifulSoup + # assert 'GitHub' in BeautifulSoup(response.content).title.string + assert linguee.translate(word='good') is not None + + +def test_inputs(): + with pytest.raises(exceptions.LanguageNotSupportedException): + LingueeTranslator(source="", target="") + + with pytest.raises(exceptions.LanguageNotSupportedException): + LingueeTranslator(source="auto", target="nothing") + + l1 = LingueeTranslator("en", "fr") + l2 = LingueeTranslator("english", "french") + assert l1._source == l2._source + assert l1._target == l2._target + + +def test_payload(linguee): + + with pytest.raises(exceptions.NotValidPayload): + linguee.translate("") + + with pytest.raises(exceptions.NotValidPayload): + linguee.translate(123) + + with pytest.raises(exceptions.NotValidPayload): + linguee.translate({}) + + with pytest.raises(exceptions.NotValidPayload): + linguee.translate([]) + + with pytest.raises(exceptions.NotValidLength): + linguee.translate("a"*51) diff --git a/libs/deep_translator/tests/test_mymemory.py b/libs/deep_translator/tests/test_mymemory.py new file mode 100644 index 000000000..c6ca6ee1f --- /dev/null +++ b/libs/deep_translator/tests/test_mymemory.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +"""Tests for `deep_translator` package.""" + +import pytest +from deep_translator import exceptions, MyMemoryTranslator + + +def mymemory(): + return MyMemoryTranslator(source="en", target='fr') + + +def test_content(mymemory): + """Sample pytest test function with the pytest fixture as an argument.""" + # from bs4 import BeautifulSoup + # assert 'GitHub' in BeautifulSoup(response.content).title.string + assert mymemory.translate(text='good') is not None + + +def test_inputs(): + with pytest.raises(exceptions.LanguageNotSupportedException): + MyMemoryTranslator(source="", target="") + + with pytest.raises(exceptions.LanguageNotSupportedException): + MyMemoryTranslator(source="auto", target="nothing") + m1 = MyMemoryTranslator("en", "fr") + m2 = MyMemoryTranslator("english", "french") + assert m1._source == m2._source + assert m1._target == m2._target + + +def test_payload(mymemory): + + with pytest.raises(exceptions.NotValidPayload): + mymemory.translate(text="") + + with pytest.raises(exceptions.NotValidPayload): + mymemory.translate(text=123) + + with pytest.raises(exceptions.NotValidPayload): + mymemory.translate(text={}) + + with pytest.raises(exceptions.NotValidPayload): + mymemory.translate(text=[]) + + with pytest.raises(exceptions.NotValidLength): + mymemory.translate(text="a"*501) diff --git a/libs/deep_translator/tests/test_pons.py b/libs/deep_translator/tests/test_pons.py new file mode 100644 index 000000000..1ad561125 --- /dev/null +++ b/libs/deep_translator/tests/test_pons.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +"""Tests for `deep_translator` package.""" + +import pytest +from deep_translator import exceptions, PonsTranslator + + +def pons(): + return PonsTranslator(source="english", target='french') + + +def test_content(pons): + """Sample pytest test function with the pytest fixture as an argument.""" + # from bs4 import BeautifulSoup + # assert 'GitHub' in BeautifulSoup(response.content).title.string + assert pons.translate(word='good') is not None + + +def test_inputs(): + with pytest.raises(exceptions.LanguageNotSupportedException): + PonsTranslator(source="", target="") + + with pytest.raises(exceptions.LanguageNotSupportedException): + PonsTranslator(source="auto", target="nothing") + l1 = PonsTranslator("en", "fr") + l2 = PonsTranslator("english", "french") + assert l1._source == l2._source + assert l1._target == l2._target + + +def test_payload(pons): + + with pytest.raises(exceptions.NotValidPayload): + pons.translate("") + + with pytest.raises(exceptions.NotValidPayload): + pons.translate(123) + + with pytest.raises(exceptions.NotValidPayload): + pons.translate({}) + + with pytest.raises(exceptions.NotValidPayload): + pons.translate([]) + + with pytest.raises(exceptions.NotValidLength): + pons.translate("a" * 51) diff --git a/libs/deep_translator/utils.py b/libs/deep_translator/utils.py new file mode 100644 index 000000000..930372b7a --- /dev/null +++ b/libs/deep_translator/utils.py @@ -0,0 +1,3 @@ +""" +utilities +""" diff --git a/libs/deep_translator/yandex.py b/libs/deep_translator/yandex.py new file mode 100644 index 000000000..87e47a83e --- /dev/null +++ b/libs/deep_translator/yandex.py @@ -0,0 +1,132 @@ +""" +Yandex translator API +""" +import requests +from requests import exceptions +from deep_translator.constants import BASE_URLS +from deep_translator.exceptions import (RequestError, + ServerException, TranslationNotFound, TooManyRequests) + + +class YandexTranslator(object): + """ + class that wraps functions, which use the yandex translator under the hood to translate word(s) + """ + + def __init__(self, api_key=None): + """ + @param api_key: your yandex api key + """ + if not api_key: + raise ServerException(401) + self.__base_url = BASE_URLS.get("YANDEX") + + self.api_key = api_key + self.api_version = "v1.5" + self.api_endpoints = { + "langs": "getLangs", + "detect": "detect", + "translate": "translate", + } + + def get_supported_languages(self): + return set(x.split("-")[0] for x in self.dirs) + + @property + def languages(self): + return self.get_supported_languages() + + @property + def dirs(self, proxies=None): + + try: + url = self.__base_url.format(version=self.api_version, endpoint="getLangs") + print("url: ", url) + response = requests.get(url, params={"key": self.api_key}, proxies=proxies) + except requests.exceptions.ConnectionError: + raise ServerException(503) + else: + data = response.json() + + if response.status_code != 200: + raise ServerException(response.status_code) + return data.get("dirs") + + def detect(self, text, proxies=None): + response = None + params = { + "text": text, + "format": "plain", + "key": self.api_key, + } + try: + url = self.__base_url.format(version=self.api_version, endpoint="detect") + response = requests.post(url, data=params, proxies=proxies) + + except RequestError: + raise + except ConnectionError: + raise ServerException(503) + except ValueError: + raise ServerException(response.status_code) + else: + response = response.json() + language = response['lang'] + status_code = response['code'] + if status_code != 200: + raise RequestError() + elif not language: + raise ServerException(501) + return language + + def translate(self, source, target, text, proxies=None): + params = { + "text": text, + "format": "plain", + "lang": target if source == "auto" else "{}-{}".format(source, target), + "key": self.api_key + } + try: + url = self.__base_url.format(version=self.api_version, endpoint="translate") + response = requests.post(url, data=params, proxies=proxies) + except ConnectionError: + raise ServerException(503) + else: + response = response.json() + + if response['code'] == 429: + raise TooManyRequests() + + if response['code'] != 200: + raise ServerException(response['code']) + + if not response['text']: + raise TranslationNotFound() + + return response['text'] + + def translate_file(self, source, target, path): + """ + translate from a file + @param source: source language + @param target: target language + @param path: path to file + @return: translated text + """ + try: + with open(path) as f: + text = f.read() + + return self.translate(source, target, text) + except Exception as e: + raise e + + def translate_batch(self, source, target, batch): + """ + translate a batch of texts + @param source: source language + @param target: target language + @param batch: list of texts to translate + @return: list of translations + """ + return [self.translate(source, target, text) for text in batch] |