aboutsummaryrefslogtreecommitdiffhomepage
path: root/libs/deep_translator/linguee.py
blob: d65ac33829f814354c7cca8a6efc5c2f3f8acc15 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""
linguee translator API
"""

__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"

from typing import List, Optional, Union

import requests
from bs4 import BeautifulSoup
from requests.utils import requote_uri

from deep_translator.base import BaseTranslator
from deep_translator.constants import BASE_URLS, LINGUEE_LANGUAGES_TO_CODES
from deep_translator.exceptions import (
    ElementNotFoundInGetRequest,
    NotValidPayload,
    RequestError,
    TooManyRequests,
    TranslationNotFound,
)
from deep_translator.validate import is_empty, is_input_valid, request_failed


class LingueeTranslator(BaseTranslator):
    """
    class that wraps functions, which use the linguee translator under the hood to translate word(s)
    """

    def __init__(
        self,
        source: str = "en",
        target: str = "de",
        proxies: Optional[dict] = None,
        **kwargs,
    ):
        """
        @param source: source language to translate from
        @param target: target language to translate to
        """
        self.proxies = proxies
        super().__init__(
            base_url=BASE_URLS.get("LINGUEE"),
            source=source,
            target=target,
            languages=LINGUEE_LANGUAGES_TO_CODES,
            element_tag="a",
            element_query={"class": "dictLink featured"},
            payload_key=None,  # key of text in the url
        )

    def translate(
        self, word: str, return_all: bool = False, **kwargs
    ) -> Union[str, List[str]]:
        """
        function that uses linguee to translate a word
        @param word: word to translate
        @type word: str
        @param return_all: set to True to return all synonym of the translated word
        @type return_all: bool
        @return: str: translated word
        """
        if self._same_source_target() or is_empty(word):
            return word

        if is_input_valid(word, max_chars=50):
            # %s-%s/translation/%s.html
            url = f"{self._base_url}{self._source}-{self._target}/search/?source={self._source}&query={word}"
            url = requote_uri(url)
            response = requests.get(url, proxies=self.proxies)

            if response.status_code == 429:
                raise TooManyRequests()

            if request_failed(status_code=response.status_code):
                raise RequestError()

            soup = BeautifulSoup(response.text, "html.parser")
            elements = soup.find_all(self._element_tag, self._element_query)
            response.close()

            if not elements:
                raise ElementNotFoundInGetRequest(elements)

            filtered_elements = []
            for el in elements:
                try:
                    pronoun = el.find(
                        "span", {"class": "placeholder"}
                    ).get_text(strip=True)
                except AttributeError:
                    pronoun = ""
                filtered_elements.append(
                    el.get_text(strip=True).replace(pronoun, "")
                )

            if not filtered_elements:
                raise TranslationNotFound(word)

            return filtered_elements if return_all else filtered_elements[0]

    def translate_words(self, words: List[str], **kwargs) -> List[str]:
        """
        translate a batch of words together by providing them in a list
        @param words: list of words you want to translate
        @param kwargs: additional args
        @return: list of translated words
        """
        if not words:
            raise NotValidPayload(words)

        translated_words = []
        for word in words:
            translated_words.append(self.translate(word=word, **kwargs))
        return translated_words