aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authordirkf <[email protected]>2022-10-11 12:18:12 +0000
committerGitHub <[email protected]>2022-10-11 12:18:12 +0000
commitc94a459a248352fd97dccc79ed6604a558459bfd (patch)
tree03fb6ac8458fb27d5bcf16d399d7d9252797f4e1
parent6e2626f092c63a5fa22a31df409610b5deaf3968 (diff)
downloadyoutube-dl-c94a459a248352fd97dccc79ed6604a558459bfd.tar.gz
youtube-dl-c94a459a248352fd97dccc79ed6604a558459bfd.zip
[utils] Sanitize look-alike Unicode glyphs in non-ID filename fields when --restrict-filenames
Implements https://github.com/ytdl-org/youtube-dl/issues/31216#issuecomment-1236102822, which has a test.
-rw-r--r--youtube_dl/utils.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index fea38ed32..23a65a81c 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -33,6 +33,7 @@ import sys
import tempfile
import time
import traceback
+import unicodedata
import xml.etree.ElementTree
import zlib
@@ -2118,6 +2119,9 @@ def sanitize_filename(s, restricted=False, is_id=False):
return '_'
return char
+ # Replace look-alike Unicode glyphs
+ if restricted and not is_id:
+ s = unicodedata.normalize('NFKC', s)
# Handle timestamps
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
result = ''.join(map(replace_insane, s))