diff options
author | Jakub Wilk <[email protected]> | 2019-05-10 20:42:32 +0200 |
---|---|---|
committer | Sergey M <[email protected]> | 2019-05-11 01:42:31 +0700 |
commit | fd35d8cdfdc77ca6ec6d87677fe0d00df0cbb22a (patch) | |
tree | 266fb842f011010adb2d0b0a3a55810cde82f95c | |
parent | 4eec112740910621a7fd9c50158fb2388649d8b7 (diff) | |
download | youtube-dl-fd35d8cdfdc77ca6ec6d87677fe0d00df0cbb22a.tar.gz youtube-dl-fd35d8cdfdc77ca6ec6d87677fe0d00df0cbb22a.zip |
[utils] Transliterate "þ" as "th" (#20897)
Despite visual similarity "þ" is unrelated to "p".
It is normally transliterated as "th":
$ echo þ-Þ | iconv -t ASCII//TRANSLIT
th-TH
-rw-r--r-- | test/test_utils.py | 2 | ||||
-rw-r--r-- | youtube_dl/utils.py | 4 |
2 files changed, 3 insertions, 3 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index ca6d832a4..9ef0e422b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -183,7 +183,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_filename( 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True), - 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy') + 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy') def test_sanitize_ids(self): self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 71713f63a..99ee54942 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -125,8 +125,8 @@ KNOWN_EXTENSIONS = ( # needed for sanitizing filenames in restricted mode ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', - itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], - 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) + itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'], + 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y'))) DATE_FORMATS = ( '%d %B %Y', |