summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--libs/subliminal_patch/providers/utils.py67
-rw-r--r--tests/subliminal_patch/test_utils.py38
2 files changed, 85 insertions, 20 deletions
diff --git a/libs/subliminal_patch/providers/utils.py b/libs/subliminal_patch/providers/utils.py
index 0158abfee..1748bfe9c 100644
--- a/libs/subliminal_patch/providers/utils.py
+++ b/libs/subliminal_patch/providers/utils.py
@@ -1,59 +1,86 @@
+from collections import namedtuple
+from difflib import SequenceMatcher
import io
import logging
import os
+import re
import zipfile
-import rarfile
-
from guessit import guessit
-
+import rarfile
from subliminal.subtitle import fix_line_ending
-
from subliminal_patch.core import Episode
from subliminal_patch.subtitle import guess_matches
from ._agent_list import FIRST_THOUSAND_OR_SO_USER_AGENTS
-
logger = logging.getLogger(__name__)
-def _get_matching_sub(sub_names, forced=False, episode=None):
- matching_sub = None
+_MatchingSub = namedtuple("_MatchingSub", ("file", "priority"))
+
+def _get_matching_sub(sub_names, forced=False, episode=None, episode_title=None):
guess_options = {"single_value": True}
if episode is not None:
guess_options["type"] = "episode" # type: ignore
+ matching_subs = []
+
for sub_name in sub_names:
if not forced and os.path.splitext(sub_name.lower())[0].endswith("forced"):
logger.debug("Ignoring forced subtitle: %s", sub_name)
continue
# If it's a movie then get the first subtitle
- if episode is None:
+ if episode is None and episode_title is None:
logger.debug("Movie subtitle found: %s", sub_name)
- matching_sub = sub_name
+ matching_subs.append(_MatchingSub(sub_name, 2))
break
guess = guessit(sub_name, options=guess_options)
- if guess.get("episode") is None:
- logger.debug("No episode info found in file: %s", sub_name)
- continue
+ matched_episode_num = guess.get("episode")
+ if matched_episode_num:
+ logger.debug("No episode number found in file: %s", sub_name)
+
+ matched_title = None
+ if episode_title is not None:
+ matched_title = _analize_sub_name(sub_name, episode_title)
+
+ if episode == matched_episode_num:
+ logger.debug("Episode matched from number: %s", sub_name)
+ matching_subs.append(_MatchingSub(sub_name, 2))
+ elif matched_title:
+ matching_subs.append(_MatchingSub(sub_name, 1))
+ else:
+ logger.debug("Ignoring incorrect episode: '%s'", sub_name)
+
+ if matching_subs:
+ matching_subs.sort(key=lambda x: x.priority, reverse=True)
+ logger.debug("Matches: %s", matching_subs)
+ return matching_subs[0].file
+ else:
+ logger.debug("Nothing matched")
+ return None
- if episode == guess["episode"]:
- logger.debug("Episode matched: %s", sub_name)
- matching_sub = sub_name
- break
- logger.debug("Ignoring incorrect episode: %s", sub_name)
+def _analize_sub_name(sub_name: str, title_):
+ titles = re.split(r"[.-]", os.path.splitext(sub_name)[0])
+ for title in titles:
+ ratio = SequenceMatcher(None, title, title_).ratio()
+ if ratio > 0.85:
+ logger.debug(
+ "Episode title matched: '%s' -> '%s' [%s]", title, sub_name, ratio
+ )
+ return True
- return matching_sub
+ logger.debug("No episode title matched from file")
+ return False
def get_subtitle_from_archive(
- archive, forced=False, episode=None, get_first_subtitle=False
+ archive, forced=False, episode=None, get_first_subtitle=False, **kwargs
):
"Get subtitle from Rarfile/Zipfile object. Return None if nothing is found."
subs_in_archive = [
@@ -72,7 +99,7 @@ def get_subtitle_from_archive(
logger.debug("Getting first subtitle in archive: %s", subs_in_archive)
return fix_line_ending(archive.read(subs_in_archive[0]))
- matching_sub = _get_matching_sub(subs_in_archive, forced, episode)
+ matching_sub = _get_matching_sub(subs_in_archive, forced, episode, **kwargs)
if matching_sub is not None:
logger.info("Using %s from archive", matching_sub)
diff --git a/tests/subliminal_patch/test_utils.py b/tests/subliminal_patch/test_utils.py
index e4a8b4efb..409b4eb18 100644
--- a/tests/subliminal_patch/test_utils.py
+++ b/tests/subliminal_patch/test_utils.py
@@ -27,6 +27,44 @@ def test_get_matching_sub(sub_names, episode, forced, expected):
assert utils._get_matching_sub(sub_names, forced, episode) == expected
+def test_get_matching_sub_complex_season_pack():
+ files = [
+ "30. Hard Drive Courage. The Ride Of The Valkyries.srt",
+ "34. So In Louvre Are We Two. Night Of The Scarecrow.srt",
+ "31. Scuba Scuba Doo. Conway The Contaminationist.srt",
+ "32. Katz Under The Sea. Curtain Of Cruelty.srt",
+ "27. Muriel Meets Her Match. Courage Vs. Mecha-Courage.srt",
+ "36. Fishy Business. Angry Nasty People.srt",
+ "28. Campsite Of Terror. The Record Deal.srt",
+ "33. Feast Of The Bullfrogs. Tulip's Worm.srt",
+ "37. Dome Of Doom. Snowman's Revenge.srt",
+ "35. Mondo Magic. Watch The Birdies.srt",
+ "29. Stormy Weather. The Sandman Sleeps.srt",
+ "38. The Quilt Club. Swindlin' Wind.srt",
+ ]
+ # Courage the Cowardly Dog S03E17 "Mondo Magic"
+ matched = utils._get_matching_sub(files, False, 17, episode_title="Mondo Magic")
+ assert matched == "35. Mondo Magic. Watch The Birdies.srt"
+
+
+def test_get_matching_sub_complex_season_pack_mixed_files():
+ files = [
+ "30. Hard Drive Courage. The Ride Of The Valkyries.srt",
+ "S03E15.srt",
+ "S03E16.srt",
+ "S03E17.srt",
+ "28. Campsite Of Terror. The Record Deal.srt",
+ "33. Feast Of The Bullfrogs. Tulip's Worm.srt",
+ "37. Dome Of Doom. Snowman's Revenge.srt",
+ "35. Mondo Magic. Watch The Birdies.srt",
+ "29. Stormy Weather. The Sandman Sleeps.srt",
+ "38. The Quilt Club. Swindlin' Wind.srt",
+ ]
+ # Courage the Cowardly Dog S03E17 "Mondo Magic"
+ matched = utils._get_matching_sub(files, False, 17, episode_title="Mondo Magic")
+ assert matched == "S03E17.srt"
+
+
def test_get_subtitle_from_archive_movie(data):
with ZipFile(os.path.join(data, "archive_1.zip")) as zf:
assert utils.get_subtitle_from_archive(zf) is not None