From c94a459a248352fd97dccc79ed6604a558459bfd Mon Sep 17 00:00:00 2001 From: dirkf Date: Tue, 11 Oct 2022 12:18:12 +0000 Subject: [utils] Sanitize look-alike Unicode glyphs in non-ID filename fields when --restrict-filenames Implements https://github.com/ytdl-org/youtube-dl/issues/31216#issuecomment-1236102822, which has a test. --- youtube_dl/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index fea38ed32..23a65a81c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -33,6 +33,7 @@ import sys import tempfile import time import traceback +import unicodedata import xml.etree.ElementTree import zlib @@ -2118,6 +2119,9 @@ def sanitize_filename(s, restricted=False, is_id=False): return '_' return char + # Replace look-alike Unicode glyphs + if restricted and not is_id: + s = unicodedata.normalize('NFKC', s) # Handle timestamps s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) result = ''.join(map(replace_insane, s)) -- cgit v1.2.3