diff options
author | Louis Vézina <[email protected]> | 2020-05-20 11:29:39 -0400 |
---|---|---|
committer | Louis Vézina <[email protected]> | 2020-05-20 11:29:39 -0400 |
commit | 376e13d7f1ab8e6d9202c1a51a96526de0f11163 (patch) | |
tree | 77e911bffc9a59ed2868a673f0733ad38f5b4ccd /libs/rebulk | |
parent | 5b44007bbb7ef49bbf4087b43dbb948433639fbe (diff) | |
download | bazarr-376e13d7f1ab8e6d9202c1a51a96526de0f11163.tar.gz bazarr-376e13d7f1ab8e6d9202c1a51a96526de0f11163.zip |
Upgraded GuessIt to 3.0.1
Diffstat (limited to 'libs/rebulk')
-rw-r--r-- | libs/rebulk/__version__.py | 2 | ||||
-rw-r--r-- | libs/rebulk/builder.py | 217 | ||||
-rw-r--r-- | libs/rebulk/chain.py | 347 | ||||
-rw-r--r-- | libs/rebulk/formatters.py | 10 | ||||
-rw-r--r-- | libs/rebulk/introspector.py | 5 | ||||
-rw-r--r-- | libs/rebulk/loose.py | 72 | ||||
-rw-r--r-- | libs/rebulk/match.py | 28 | ||||
-rw-r--r-- | libs/rebulk/pattern.py | 332 | ||||
-rw-r--r-- | libs/rebulk/processors.py | 2 | ||||
-rw-r--r-- | libs/rebulk/rebulk.py | 183 | ||||
-rw-r--r-- | libs/rebulk/rules.py | 18 | ||||
-rw-r--r-- | libs/rebulk/test/test_chain.py | 106 | ||||
-rw-r--r-- | libs/rebulk/test/test_debug.py | 36 | ||||
-rw-r--r-- | libs/rebulk/test/test_match.py | 3 | ||||
-rw-r--r-- | libs/rebulk/utils.py | 5 | ||||
-rw-r--r-- | libs/rebulk/validators.py | 11 |
16 files changed, 771 insertions, 606 deletions
diff --git a/libs/rebulk/__version__.py b/libs/rebulk/__version__.py index 11d0431dc..939c554ca 100644 --- a/libs/rebulk/__version__.py +++ b/libs/rebulk/__version__.py @@ -4,4 +4,4 @@ Version module """ # pragma: no cover -__version__ = '0.9.0' +__version__ = '2.0.1' diff --git a/libs/rebulk/builder.py b/libs/rebulk/builder.py new file mode 100644 index 000000000..c91420aa3 --- /dev/null +++ b/libs/rebulk/builder.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Base builder class for Rebulk +""" +from abc import ABCMeta, abstractmethod +from copy import deepcopy +from logging import getLogger + +from six import add_metaclass + +from .loose import set_defaults +from .pattern import RePattern, StringPattern, FunctionalPattern + +log = getLogger(__name__).log + + +@add_metaclass(ABCMeta) +class Builder(object): + """ + Base builder class for patterns + """ + + def __init__(self): + self._defaults = {} + self._regex_defaults = {} + self._string_defaults = {} + self._functional_defaults = {} + self._chain_defaults = {} + + def reset(self): + """ + Reset all defaults. + + :return: + """ + self.__init__() + + def defaults(self, **kwargs): + """ + Define default keyword arguments for all patterns + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(kwargs, self._defaults, override=True) + return self + + def regex_defaults(self, **kwargs): + """ + Define default keyword arguments for functional patterns. + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(kwargs, self._regex_defaults, override=True) + return self + + def string_defaults(self, **kwargs): + """ + Define default keyword arguments for string patterns. + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(kwargs, self._string_defaults, override=True) + return self + + def functional_defaults(self, **kwargs): + """ + Define default keyword arguments for functional patterns. + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(kwargs, self._functional_defaults, override=True) + return self + + def chain_defaults(self, **kwargs): + """ + Define default keyword arguments for patterns chain. + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(kwargs, self._chain_defaults, override=True) + return self + + def build_re(self, *pattern, **kwargs): + """ + Builds a new regular expression pattern + + :param pattern: + :type pattern: + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(self._regex_defaults, kwargs) + set_defaults(self._defaults, kwargs) + return RePattern(*pattern, **kwargs) + + def build_string(self, *pattern, **kwargs): + """ + Builds a new string pattern + + :param pattern: + :type pattern: + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(self._string_defaults, kwargs) + set_defaults(self._defaults, kwargs) + return StringPattern(*pattern, **kwargs) + + def build_functional(self, *pattern, **kwargs): + """ + Builds a new functional pattern + + :param pattern: + :type pattern: + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + set_defaults(self._functional_defaults, kwargs) + set_defaults(self._defaults, kwargs) + return FunctionalPattern(*pattern, **kwargs) + + def build_chain(self, **kwargs): + """ + Builds a new patterns chain + + :param pattern: + :type pattern: + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + from .chain import Chain + set_defaults(self._chain_defaults, kwargs) + set_defaults(self._defaults, kwargs) + chain = Chain(self, **kwargs) + chain._defaults = deepcopy(self._defaults) # pylint: disable=protected-access + chain._regex_defaults = deepcopy(self._regex_defaults) # pylint: disable=protected-access + chain._functional_defaults = deepcopy(self._functional_defaults) # pylint: disable=protected-access + chain._string_defaults = deepcopy(self._string_defaults) # pylint: disable=protected-access + chain._chain_defaults = deepcopy(self._chain_defaults) # pylint: disable=protected-access + return chain + + @abstractmethod + def pattern(self, *pattern): + """ + Register a list of Pattern instance + :param pattern: + :return: + """ + pass + + def regex(self, *pattern, **kwargs): + """ + Add re pattern + + :param pattern: + :type pattern: + :return: self + :rtype: Rebulk + """ + return self.pattern(self.build_re(*pattern, **kwargs)) + + def string(self, *pattern, **kwargs): + """ + Add string pattern + + :param pattern: + :type pattern: + :return: self + :rtype: Rebulk + """ + return self.pattern(self.build_string(*pattern, **kwargs)) + + def functional(self, *pattern, **kwargs): + """ + Add functional pattern + + :param pattern: + :type pattern: + :return: self + :rtype: Rebulk + """ + functional = self.build_functional(*pattern, **kwargs) + return self.pattern(functional) + + def chain(self, **kwargs): + """ + Add patterns chain, using configuration of this rebulk + + :param pattern: + :type pattern: + :param kwargs: + :type kwargs: + :return: + :rtype: + """ + chain = self.build_chain(**kwargs) + self.pattern(chain) + return chain diff --git a/libs/rebulk/chain.py b/libs/rebulk/chain.py index dfb6ea442..ba31ec9a5 100644 --- a/libs/rebulk/chain.py +++ b/libs/rebulk/chain.py @@ -6,9 +6,10 @@ Chain patterns and handle repetiting capture group # pylint: disable=super-init-not-called import itertools -from .loose import call, set_defaults +from .builder import Builder +from .loose import call from .match import Match, Matches -from .pattern import Pattern, filter_match_kwargs +from .pattern import Pattern, filter_match_kwargs, BasePattern from .remodule import re @@ -19,150 +20,46 @@ class _InvalidChainException(Exception): pass -class Chain(Pattern): +class Chain(Pattern, Builder): """ Definition of a pattern chain to search for. """ - def __init__(self, rebulk, chain_breaker=None, **kwargs): - call(super(Chain, self).__init__, **kwargs) + def __init__(self, parent, chain_breaker=None, **kwargs): + Builder.__init__(self) + call(Pattern.__init__, self, **kwargs) self._kwargs = kwargs self._match_kwargs = filter_match_kwargs(kwargs) - self._defaults = {} - self._regex_defaults = {} - self._string_defaults = {} - self._functional_defaults = {} if callable(chain_breaker): self.chain_breaker = chain_breaker else: self.chain_breaker = None - self.rebulk = rebulk + self.parent = parent self.parts = [] - def defaults(self, **kwargs): + def pattern(self, *pattern): """ - Define default keyword arguments for all patterns - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._defaults = kwargs - return self - - def regex_defaults(self, **kwargs): - """ - Define default keyword arguments for functional patterns. - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._regex_defaults = kwargs - return self - - def string_defaults(self, **kwargs): - """ - Define default keyword arguments for string patterns. - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._string_defaults = kwargs - return self - - def functional_defaults(self, **kwargs): - """ - Define default keyword arguments for functional patterns. - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._functional_defaults = kwargs - return self - - def chain(self): - """ - Add patterns chain, using configuration from this chain - - :return: - :rtype: - """ - # pylint: disable=protected-access - chain = self.rebulk.chain(**self._kwargs) - chain._defaults = dict(self._defaults) - chain._regex_defaults = dict(self._regex_defaults) - chain._functional_defaults = dict(self._functional_defaults) - chain._string_defaults = dict(self._string_defaults) - return chain - - def regex(self, *pattern, **kwargs): - """ - Add re pattern - - :param pattern: - :type pattern: - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - set_defaults(self._kwargs, kwargs) - set_defaults(self._regex_defaults, kwargs) - set_defaults(self._defaults, kwargs) - pattern = self.rebulk.build_re(*pattern, **kwargs) - part = ChainPart(self, pattern) - self.parts.append(part) - return part - - def functional(self, *pattern, **kwargs): - """ - Add functional pattern - - :param pattern: - :type pattern: - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - set_defaults(self._kwargs, kwargs) - set_defaults(self._functional_defaults, kwargs) - set_defaults(self._defaults, kwargs) - pattern = self.rebulk.build_functional(*pattern, **kwargs) - part = ChainPart(self, pattern) - self.parts.append(part) - return part - - def string(self, *pattern, **kwargs): - """ - Add string pattern :param pattern: - :type pattern: - :param kwargs: - :type kwargs: :return: - :rtype: """ - set_defaults(self._kwargs, kwargs) - set_defaults(self._functional_defaults, kwargs) - set_defaults(self._defaults, kwargs) - pattern = self.rebulk.build_string(*pattern, **kwargs) - part = ChainPart(self, pattern) + if not pattern: + raise ValueError("One pattern should be given to the chain") + if len(pattern) > 1: + raise ValueError("Only one pattern can be given to the chain") + part = ChainPart(self, pattern[0]) self.parts.append(part) return part def close(self): """ - Close chain builder to continue registering other pattern - - :return: - :rtype: + Deeply close the chain + :return: Rebulk instance """ - return self.rebulk + parent = self.parent + while isinstance(parent, Chain): + parent = parent.parent + return parent def _match(self, pattern, input_string, context=None): # pylint: disable=too-many-locals,too-many-nested-blocks @@ -173,42 +70,20 @@ class Chain(Pattern): chain_found = False current_chain_matches = [] valid_chain = True - is_chain_start = True for chain_part in self.parts: try: - chain_part_matches, raw_chain_part_matches = Chain._match_chain_part(is_chain_start, chain_part, - chain_input_string, - context) - - Chain._fix_matches_offset(chain_part_matches, input_string, offset) - Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset) - - if raw_chain_part_matches: - grouped_matches_dict = dict() - for match_index, match in itertools.groupby(chain_part_matches, - lambda m: m.match_index): - grouped_matches_dict[match_index] = list(match) - - grouped_raw_matches_dict = dict() - for match_index, raw_match in itertools.groupby(raw_chain_part_matches, - lambda m: m.match_index): - grouped_raw_matches_dict[match_index] = list(raw_match) - - for match_index, grouped_raw_matches in grouped_raw_matches_dict.items(): - chain_found = True - offset = grouped_raw_matches[-1].raw_end - chain_input_string = input_string[offset:] - if not chain_part.is_hidden: - grouped_matches = grouped_matches_dict.get(match_index, []) - if self._chain_breaker_eval(current_chain_matches + grouped_matches): - current_chain_matches.extend(grouped_matches) + chain_part_matches, raw_chain_part_matches = chain_part.matches(chain_input_string, + context, + with_raw_matches=True) + chain_found, chain_input_string, offset = \ + self._to_next_chain_part(chain_part, chain_part_matches, raw_chain_part_matches, chain_found, + input_string, chain_input_string, offset, current_chain_matches) except _InvalidChainException: valid_chain = False if current_chain_matches: offset = current_chain_matches[0].raw_end break - is_chain_start = False if not chain_found: break if current_chain_matches and valid_chain: @@ -217,38 +92,66 @@ class Chain(Pattern): return chain_matches - def _match_parent(self, match, yield_parent): + def _to_next_chain_part(self, chain_part, chain_part_matches, raw_chain_part_matches, chain_found, + input_string, chain_input_string, offset, current_chain_matches): + Chain._fix_matches_offset(chain_part_matches, input_string, offset) + Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset) + + if raw_chain_part_matches: + grouped_matches_dict = self._group_by_match_index(chain_part_matches) + grouped_raw_matches_dict = self._group_by_match_index(raw_chain_part_matches) + + for match_index, grouped_raw_matches in grouped_raw_matches_dict.items(): + chain_found = True + offset = grouped_raw_matches[-1].raw_end + chain_input_string = input_string[offset:] + + if not chain_part.is_hidden: + grouped_matches = grouped_matches_dict.get(match_index, []) + if self._chain_breaker_eval(current_chain_matches + grouped_matches): + current_chain_matches.extend(grouped_matches) + return chain_found, chain_input_string, offset + + def _process_match(self, match, match_index, child=False): """ - Handle a parent match + Handle a match :param match: :type match: - :param yield_parent: - :type yield_parent: + :param match_index: + :type match_index: + :param child: + :type child: :return: :rtype: """ - ret = super(Chain, self)._match_parent(match, yield_parent) - original_children = Matches(match.children) - original_end = match.end - while not ret and match.children: + # pylint: disable=too-many-locals + ret = super(Chain, self)._process_match(match, match_index, child=child) + if ret: + return True + + if match.children: last_pattern = match.children[-1].pattern - last_pattern_children = [child for child in match.children if child.pattern == last_pattern] - last_pattern_groups_iter = itertools.groupby(last_pattern_children, lambda child: child.match_index) - last_pattern_groups = {} - for index, matches in last_pattern_groups_iter: - last_pattern_groups[index] = list(matches) - - for index in reversed(list(last_pattern_groups)): - last_matches = list(last_pattern_groups[index]) - for last_match in last_matches: - match.children.remove(last_match) - match.end = match.children[-1].end if match.children else match.start - ret = super(Chain, self)._match_parent(match, yield_parent) - if ret: - return True - match.children = original_children - match.end = original_end - return ret + last_pattern_groups = self._group_by_match_index( + [child_ for child_ in match.children if child_.pattern == last_pattern] + ) + + if last_pattern_groups: + original_children = Matches(match.children) + original_end = match.end + + for index in reversed(list(last_pattern_groups)): + last_matches = last_pattern_groups[index] + for last_match in last_matches: + match.children.remove(last_match) + match.end = match.children[-1].end if match.children else match.start + ret = super(Chain, self)._process_match(match, match_index, child=child) + if ret: + return True + + match.children = original_children + match.end = original_end + + return False def _build_chain_match(self, current_chain_matches, input_string): start = None @@ -282,46 +185,11 @@ class Chain(Pattern): Chain._fix_matches_offset(chain_part_match.children, input_string, offset) @staticmethod - def _match_chain_part(is_chain_start, chain_part, chain_input_string, context): - chain_part_matches, raw_chain_part_matches = chain_part.pattern.matches(chain_input_string, context, - with_raw_matches=True) - chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part, - chain_input_string) - raw_chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, raw_chain_part_matches, chain_part, - chain_input_string) - - Chain._validate_chain_part_matches(raw_chain_part_matches, chain_part) - return chain_part_matches, raw_chain_part_matches - - @staticmethod - def _truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part, chain_input_string): - if not chain_part_matches: - return chain_part_matches - - if not is_chain_start: - separator = chain_input_string[0:chain_part_matches[0].initiator.raw_start] - if separator: - return [] - - j = 1 - for i in range(0, len(chain_part_matches) - 1): - separator = chain_input_string[chain_part_matches[i].initiator.raw_end: - chain_part_matches[i + 1].initiator.raw_start] - if separator: - break - j += 1 - truncated = chain_part_matches[:j] - if chain_part.repeater_end is not None: - truncated = [m for m in truncated if m.match_index < chain_part.repeater_end] - return truncated - - @staticmethod - def _validate_chain_part_matches(chain_part_matches, chain_part): - max_match_index = -1 - if chain_part_matches: - max_match_index = max([m.match_index for m in chain_part_matches]) - if max_match_index + 1 < chain_part.repeater_start: - raise _InvalidChainException + def _group_by_match_index(matches): + grouped_matches_dict = dict() + for match_index, match in itertools.groupby(matches, lambda m: m.match_index): + grouped_matches_dict[match_index] = list(match) + return grouped_matches_dict @property def match_options(self): @@ -338,7 +206,7 @@ class Chain(Pattern): return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts) -class ChainPart(object): +class ChainPart(BasePattern): """ Part of a pattern chain. """ @@ -350,6 +218,51 @@ class ChainPart(object): self.repeater_end = 1 self._hidden = False + @property + def _is_chain_start(self): + return self._chain.parts[0] == self + + def matches(self, input_string, context=None, with_raw_matches=False): + matches, raw_matches = self.pattern.matches(input_string, context=context, with_raw_matches=True) + + matches = self._truncate_repeater(matches, input_string) + raw_matches = self._truncate_repeater(raw_matches, input_string) + + self._validate_repeater(raw_matches) + + if with_raw_matches: + return matches, raw_matches + + return matches + + def _truncate_repeater(self, matches, input_string): + if not matches: + return matches + + if not self._is_chain_start: + separator = input_string[0:matches[0].initiator.raw_start] + if separator: + return [] + + j = 1 + for i in range(0, len(matches) - 1): + separator = input_string[matches[i].initiator.raw_end: + matches[i + 1].initiator.raw_start] + if separator: + break + j += 1 + truncated = matches[:j] + if self.repeater_end is not None: + truncated = [m for m in truncated if m.match_index < self.repeater_end] + return truncated + + def _validate_repeater(self, matches): + max_match_index = -1 + if matches: + max_match_index = max([m.match_index for m in matches]) + if max_match_index + 1 < self.repeater_start: + raise _InvalidChainException + def chain(self): """ Add patterns chain, using configuration from this chain diff --git a/libs/rebulk/formatters.py b/libs/rebulk/formatters.py index 470469426..7175a54ab 100644 --- a/libs/rebulk/formatters.py +++ b/libs/rebulk/formatters.py @@ -15,9 +15,19 @@ def formatters(*chained_formatters): :return: :rtype: """ + def formatters_chain(input_string): # pylint:disable=missing-docstring for chained_formatter in chained_formatters: input_string = chained_formatter(input_string) return input_string return formatters_chain + + +def default_formatter(input_string): + """ + Default formatter + :param input_string: + :return: + """ + return input_string diff --git a/libs/rebulk/introspector.py b/libs/rebulk/introspector.py index 64b9836f0..bfefcb757 100644 --- a/libs/rebulk/introspector.py +++ b/libs/rebulk/introspector.py @@ -3,7 +3,7 @@ """ Introspect rebulk object to retrieve capabilities. """ -from abc import ABCMeta, abstractproperty +from abc import ABCMeta, abstractmethod from collections import defaultdict import six @@ -16,7 +16,8 @@ class Description(object): """ Abstract class for a description. """ - @abstractproperty + @property + @abstractmethod def properties(self): # pragma: no cover """ Properties of described object. diff --git a/libs/rebulk/loose.py b/libs/rebulk/loose.py index 72543b1e0..423b4ea7a 100644 --- a/libs/rebulk/loose.py +++ b/libs/rebulk/loose.py @@ -3,8 +3,18 @@ """ Various utilities functions """ -import inspect + import sys + +from inspect import isclass +try: + from inspect import getfullargspec as getargspec + + _fullargspec_supported = True +except ImportError: + _fullargspec_supported = False + from inspect import getargspec + from .utils import is_iterable if sys.version_info < (3, 4, 0): # pragma: no cover @@ -45,8 +55,8 @@ def call(function, *args, **kwargs): :return: sale vakye as default function call :rtype: object """ - func = constructor_args if inspect.isclass(function) else function_args - call_args, call_kwargs = func(function, *args, **kwargs) + func = constructor_args if isclass(function) else function_args + call_args, call_kwargs = func(function, *args, ignore_unused=True, **kwargs) # @see #20 return function(*call_args, **call_kwargs) @@ -63,7 +73,7 @@ def function_args(callable_, *args, **kwargs): :return: (args, kwargs) matching the function signature :rtype: tuple """ - argspec = inspect.getargspec(callable_) # pylint:disable=deprecated-method + argspec = getargspec(callable_) # pylint:disable=deprecated-method return argspec_args(argspec, False, *args, **kwargs) @@ -80,7 +90,7 @@ def constructor_args(class_, *args, **kwargs): :return: (args, kwargs) matching the function signature :rtype: tuple """ - argspec = inspect.getargspec(_constructor(class_)) # pylint:disable=deprecated-method + argspec = getargspec(_constructor(class_)) # pylint:disable=deprecated-method return argspec_args(argspec, True, *args, **kwargs) @@ -99,7 +109,7 @@ def argspec_args(argspec, constructor, *args, **kwargs): :return: (args, kwargs) matching the function signature :rtype: tuple """ - if argspec.keywords: + if argspec.varkw: call_kwarg = kwargs else: call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension @@ -110,6 +120,36 @@ def argspec_args(argspec, constructor, *args, **kwargs): return call_args, call_kwarg +if not _fullargspec_supported: + def argspec_args_legacy(argspec, constructor, *args, **kwargs): + """ + Return (args, kwargs) matching the argspec object + + :param argspec: argspec to use + :type argspec: argspec + :param constructor: is it a constructor ? + :type constructor: bool + :param args: + :type args: + :param kwargs: + :type kwargs: + :return: (args, kwargs) matching the function signature + :rtype: tuple + """ + if argspec.keywords: + call_kwarg = kwargs + else: + call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension + if argspec.varargs: + call_args = args + else: + call_args = args[:len(argspec.args) - (1 if constructor else 0)] + return call_args, call_kwarg + + + argspec_args = argspec_args_legacy + + def ensure_list(param): """ Retrieves a list from given parameter. @@ -177,9 +217,12 @@ def filter_index(collection, predicate=None, index=None): return collection -def set_defaults(defaults, kwargs): +def set_defaults(defaults, kwargs, override=False): """ Set defaults from defaults dict to kwargs dict + + :param override: + :type override: :param defaults: :type defaults: :param kwargs: @@ -187,12 +230,13 @@ def set_defaults(defaults, kwargs): :return: :rtype: """ + if 'clear' in defaults.keys() and defaults.pop('clear'): + kwargs.clear() for key, value in defaults.items(): - if key not in kwargs and value is not None: + if key in kwargs: + if isinstance(value, list) and isinstance(kwargs[key], list): + kwargs[key] = list(value) + kwargs[key] + elif isinstance(value, dict) and isinstance(kwargs[key], dict): + set_defaults(value, kwargs[key]) + if key not in kwargs or override: kwargs[key] = value - elif isinstance(value, list) and isinstance(kwargs[key], list): - kwargs[key] = list(value) + kwargs[key] - elif isinstance(value, dict) and isinstance(kwargs[key], dict): - set_defaults(value, kwargs[key]) - elif key in kwargs and value is None: - kwargs[key] = None diff --git a/libs/rebulk/match.py b/libs/rebulk/match.py index a786df4df..d8e72df42 100644 --- a/libs/rebulk/match.py +++ b/libs/rebulk/match.py @@ -5,7 +5,11 @@ Classes and functions related to matches """ import copy import itertools -from collections import defaultdict, MutableSequence +from collections import defaultdict +try: + from collections.abc import MutableSequence +except ImportError: + from collections import MutableSequence try: from collections import OrderedDict # pylint:disable=ungrouped-imports @@ -778,9 +782,9 @@ class Match(object): right.start = end if right: ret.append(right) - elif end <= current.end and end > current.start: + elif current.end >= end > current.start: current.start = end - elif start >= current.start and start < current.end: + elif current.start <= start < current.end: current.end = start return filter_index(ret, predicate, index) @@ -811,6 +815,24 @@ class Match(object): return filter_index(ret, predicate, index) + def tagged(self, *tags): + """ + Check if this match has at least one of the provided tags + + :param tags: + :return: True if at least one tag is defined, False otherwise. + """ + return any(tag in self.tags for tag in tags) + + def named(self, *names): + """ + Check if one of the children match has one of the provided name + + :param names: + :return: True if at least one child is named with a given name is defined, False otherwise. + """ + return any(name in self.names for name in names) + def __len__(self): return self.end - self.start diff --git a/libs/rebulk/pattern.py b/libs/rebulk/pattern.py index 57b274e89..beb8b2731 100644 --- a/libs/rebulk/pattern.py +++ b/libs/rebulk/pattern.py @@ -10,14 +10,39 @@ from abc import ABCMeta, abstractmethod, abstractproperty import six from . import debug +from .formatters import default_formatter from .loose import call, ensure_list, ensure_dict from .match import Match from .remodule import re, REGEX_AVAILABLE from .utils import find_all, is_iterable, get_first_defined +from .validators import allways_true @six.add_metaclass(ABCMeta) -class Pattern(object): +class BasePattern(object): + """ + Base class for Pattern like objects + """ + + @abstractmethod + def matches(self, input_string, context=None, with_raw_matches=False): + """ + Computes all matches for a given input + + :param input_string: the string to parse + :type input_string: str + :param context: the context + :type context: dict + :param with_raw_matches: should return details + :type with_raw_matches: dict + :return: matches based on input_string for this pattern + :rtype: iterator[Match] + """ + pass + + [email protected]_metaclass(ABCMeta) +class Pattern(BasePattern): """ Definition of a particular pattern to search for. """ @@ -25,7 +50,7 @@ class Pattern(object): def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False, private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None, marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None, - properties=None, post_processor=None, **kwargs): + properties=None, post_processor=None, pre_match_processor=None, post_match_processor=None, **kwargs): """ :param name: Name of this pattern :type name: str @@ -66,15 +91,19 @@ class Pattern(object): :type disabled: bool|function :param log_lvl: Log level associated to this pattern :type log_lvl: int - :param post_process: Post processing function + :param post_processor: Post processing function :type post_processor: func + :param pre_match_processor: Pre match processing function + :type pre_match_processor: func + :param post_match_processor: Post match processing function + :type post_match_processor: func """ # pylint:disable=too-many-locals,unused-argument self.name = name self.tags = ensure_list(tags) - self.formatters, self._default_formatter = ensure_dict(formatter, lambda x: x) + self.formatters, self._default_formatter = ensure_dict(formatter, default_formatter) self.values, self._default_value = ensure_dict(value, None) - self.validators, self._default_validator = ensure_dict(validator, lambda match: True) + self.validators, self._default_validator = ensure_dict(validator, allways_true) self.every = every self.children = children self.private = private @@ -96,6 +125,14 @@ class Pattern(object): self.post_processor = None else: self.post_processor = post_processor + if not callable(pre_match_processor): + self.pre_match_processor = None + else: + self.pre_match_processor = pre_match_processor + if not callable(post_match_processor): + self.post_match_processor = None + else: + self.post_match_processor = post_match_processor @property def log_level(self): @@ -106,19 +143,52 @@ class Pattern(object): """ return self._log_level if self._log_level is not None else debug.LOG_LEVEL - def _yield_children(self, match): + def matches(self, input_string, context=None, with_raw_matches=False): + """ + Computes all matches for a given input + + :param input_string: the string to parse + :type input_string: str + :param context: the context + :type context: dict + :param with_raw_matches: should return details + :type with_raw_matches: dict + :return: matches based on input_string for this pattern + :rtype: iterator[Match] + """ + # pylint: disable=too-many-branches + + matches = [] + raw_matches = [] + + for pattern in self.patterns: + match_index = 0 + for match in self._match(pattern, input_string, context): + raw_matches.append(match) + matches.extend(self._process_matches(match, match_index)) + match_index += 1 + + matches = self._post_process_matches(matches) + + if with_raw_matches: + return matches, raw_matches + return matches + + @property + def _should_include_children(self): """ - Does this match has children + Check if children matches from this pattern should be included in matches results. :param match: :type match: :return: :rtype: """ - return match.children and (self.children or self.every) + return self.children or self.every - def _yield_parent(self): + @property + def _should_include_parent(self): """ - Does this mat + Check is a match from this pattern should be included in matches results. :param match: :type match: :return: @@ -126,115 +196,132 @@ class Pattern(object): """ return not self.children or self.every - def _match_parent(self, match, yield_parent): + @staticmethod + def _match_config_property_keys(match, child=False): + if match.name: + yield match.name + if child: + yield '__children__' + else: + yield '__parent__' + yield None + + @staticmethod + def _process_match_index(match, match_index): """ - Handle a parent match + Process match index from this pattern process state. + :param match: - :type match: - :param yield_parent: - :type yield_parent: :return: - :rtype: """ - if not match or match.value == "": - return False - - pattern_value = get_first_defined(self.values, [match.name, '__parent__', None], - self._default_value) - if pattern_value: - match.value = pattern_value + match.match_index = match_index - if yield_parent or self.format_all: - match.formatter = get_first_defined(self.formatters, [match.name, '__parent__', None], - self._default_formatter) - if yield_parent or self.validate_all: - validator = get_first_defined(self.validators, [match.name, '__parent__', None], - self._default_validator) - if validator and not validator(match): - return False - return True - - def _match_child(self, child, yield_children): + def _process_match_private(self, match, child=False): """ - Handle a children match + Process match privacy from this pattern configuration. + + :param match: :param child: - :type child: - :param yield_children: - :type yield_children: :return: - :rtype: """ - if not child or child.value == "": - return False - pattern_value = get_first_defined(self.values, [child.name, '__children__', None], - self._default_value) + if match.name and match.name in self.private_names or \ + not child and self.private_parent or \ + child and self.private_children: + match.private = True + + def _process_match_value(self, match, child=False): + """ + Process match value from this pattern configuration. + :param match: + :return: + """ + keys = self._match_config_property_keys(match, child=child) + pattern_value = get_first_defined(self.values, keys, self._default_value) if pattern_value: - child.value = pattern_value + match.value = pattern_value + + def _process_match_formatter(self, match, child=False): + """ + Process match formatter from this pattern configuration. + + :param match: + :return: + """ + included = self._should_include_children if child else self._should_include_parent + if included or self.format_all: + keys = self._match_config_property_keys(match, child=child) + match.formatter = get_first_defined(self.formatters, keys, self._default_formatter) - if yield_children or self.format_all: - child.formatter = get_first_defined(self.formatters, [child.name, '__children__', None], - self._default_formatter) + def _process_match_validator(self, match, child=False): + """ + Process match validation from this pattern configuration. - if yield_children or self.validate_all: - validator = get_first_defined(self.validators, [child.name, '__children__', None], - self._default_validator) - if validator and not validator(child): + :param match: + :return: True if match is validated by the configured validator, False otherwise. + """ + included = self._should_include_children if child else self._should_include_parent + if included or self.validate_all: + keys = self._match_config_property_keys(match, child=child) + validator = get_first_defined(self.validators, keys, self._default_validator) + if validator and not validator(match): return False return True - def matches(self, input_string, context=None, with_raw_matches=False): + def _process_match(self, match, match_index, child=False): """ - Computes all matches for a given input + Process match from this pattern by setting all properties from defined configuration + (index, private, value, formatter, validator, ...). - :param input_string: the string to parse - :type input_string: str - :param context: the context - :type context: dict - :param with_raw_matches: should return details - :type with_raw_matches: dict - :return: matches based on input_string for this pattern - :rtype: iterator[Match] + :param match: + :type match: + :return: True if match is validated by the configured validator, False otherwise. + :rtype: """ - # pylint: disable=too-many-branches + self._process_match_index(match, match_index) + self._process_match_private(match, child) + self._process_match_value(match, child) + self._process_match_formatter(match, child) + return self._process_match_validator(match, child) + + @staticmethod + def _process_match_processor(match, processor): + if processor: + ret = processor(match) + if ret is not None: + return ret + return match + + def _process_matches(self, match, match_index): + """ + Process and generate all matches for the given unprocessed match. + :param match: + :param match_index: + :return: Process and dispatched matches. + """ + match = self._process_match_processor(match, self.pre_match_processor) + if not match: + return - matches = [] - raw_matches = [] - for pattern in self.patterns: - yield_parent = self._yield_parent() - match_index = -1 - for match in self._match(pattern, input_string, context): - match_index += 1 - match.match_index = match_index - raw_matches.append(match) - yield_children = self._yield_children(match) - if not self._match_parent(match, yield_parent): - continue - validated = True - for child in match.children: - if not self._match_child(child, yield_children): - validated = False - break - if validated: - if self.private_parent: - match.private = True - if self.private_children: - for child in match.children: - child.private = True - if yield_parent or self.private_parent: - matches.append(match) - if yield_children or self.private_children: - for child in match.children: - child.match_index = match_index - matches.append(child) - matches = self._matches_post_process(matches) - self._matches_privatize(matches) - self._matches_ignore(matches) - if with_raw_matches: - return matches, raw_matches - return matches + if not self._process_match(match, match_index): + return + + for child in match.children: + if not self._process_match(child, match_index, child=True): + return + + match = self._process_match_processor(match, self.post_match_processor) + if not match: + return - def _matches_post_process(self, matches): + if (self._should_include_parent or self.private_parent) and match.name not in self.ignore_names: + yield match + if self._should_include_children or self.private_children: + children = [x for x in match.children if x.name not in self.ignore_names] + for child in children: + yield child + + def _post_process_matches(self, matches): """ Post process matches with user defined function :param matches: @@ -246,32 +333,6 @@ class Pattern(object): return self.post_processor(matches, self) return matches - def _matches_privatize(self, matches): - """ - Mark matches included in private_names with private flag. - :param matches: - :type matches: - :return: - :rtype: - """ - if self.private_names: - for match in matches: - if match.name in self.private_names: - match.private = True - - def _matches_ignore(self, matches): - """ - Ignore matches included in ignore_names. - :param matches: - :type matches: - :return: - :rtype: - """ - if self.ignore_names: - for match in list(matches): - if match.name in self.ignore_names: - matches.remove(match) - @abstractproperty def patterns(self): # pragma: no cover """ @@ -306,7 +367,7 @@ class Pattern(object): @abstractmethod def _match(self, pattern, input_string, context=None): # pragma: no cover """ - Computes all matches for a given pattern and input + Computes all unprocess matches for a given pattern and input. :param pattern: the pattern to use :param input_string: the string to parse @@ -350,7 +411,9 @@ class StringPattern(Pattern): def _match(self, pattern, input_string, context=None): for index in find_all(input_string, pattern, **self._kwargs): - yield Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs) + match = Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs) + if match: + yield match class RePattern(Pattern): @@ -411,15 +474,18 @@ class RePattern(Pattern): for start, end in match_object.spans(i): child_match = Match(start, end, name=name, parent=main_match, pattern=self, input_string=input_string, **self._children_match_kwargs) - main_match.children.append(child_match) + if child_match: + main_match.children.append(child_match) else: start, end = match_object.span(i) if start > -1 and end > -1: child_match = Match(start, end, name=name, parent=main_match, pattern=self, input_string=input_string, **self._children_match_kwargs) - main_match.children.append(child_match) + if child_match: + main_match.children.append(child_match) - yield main_match + if main_match: + yield main_match class FunctionalPattern(Pattern): @@ -457,14 +523,18 @@ class FunctionalPattern(Pattern): if self._match_kwargs: options = self._match_kwargs.copy() options.update(args) - yield Match(pattern=self, input_string=input_string, **options) + match = Match(pattern=self, input_string=input_string, **options) + if match: + yield match else: kwargs = self._match_kwargs if isinstance(args[-1], dict): kwargs = dict(kwargs) kwargs.update(args[-1]) args = args[:-1] - yield Match(*args, pattern=self, input_string=input_string, **kwargs) + match = Match(*args, pattern=self, input_string=input_string, **kwargs) + if match: + yield match def filter_match_kwargs(kwargs, children=False): diff --git a/libs/rebulk/processors.py b/libs/rebulk/processors.py index b9fa52b43..6a4f0bab4 100644 --- a/libs/rebulk/processors.py +++ b/libs/rebulk/processors.py @@ -30,7 +30,7 @@ def _default_conflict_solver(match, conflicting_match): """ if len(conflicting_match.initiator) < len(match.initiator): return conflicting_match - elif len(match.initiator) < len(conflicting_match.initiator): + if len(match.initiator) < len(conflicting_match.initiator): return match return None diff --git a/libs/rebulk/rebulk.py b/libs/rebulk/rebulk.py index 42fb6440c..a6a0fd2fb 100644 --- a/libs/rebulk/rebulk.py +++ b/libs/rebulk/rebulk.py @@ -5,20 +5,16 @@ Entry point functions and classes for Rebulk """ from logging import getLogger +from .builder import Builder from .match import Matches - -from .pattern import RePattern, StringPattern, FunctionalPattern -from .chain import Chain - from .processors import ConflictSolver, PrivateRemover -from .loose import set_defaults -from .utils import extend_safe from .rules import Rules +from .utils import extend_safe log = getLogger(__name__).log -class Rebulk(object): +class Rebulk(Builder): r""" Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to chain ``string``, ``regex``, and ``functional`` methods to define various patterns types. @@ -44,6 +40,7 @@ class Rebulk(object): >>> bulk.matches("the lakers are from la") [<lakers:(4, 10)>, <la:(20, 22)>] """ + # pylint:disable=protected-access def __init__(self, disabled=lambda context: False, default_rules=True): @@ -56,6 +53,7 @@ class Rebulk(object): :return: :rtype: """ + super(Rebulk, self).__init__() if not callable(disabled): self.disabled = lambda context: disabled else: @@ -64,11 +62,6 @@ class Rebulk(object): self._rules = Rules() if default_rules: self.rules(ConflictSolver, PrivateRemover) - self._defaults = {} - self._regex_defaults = {} - self._string_defaults = {} - self._functional_defaults = {} - self._chain_defaults = {} self._rebulks = [] def pattern(self, *pattern): @@ -83,172 +76,6 @@ class Rebulk(object): self._patterns.extend(pattern) return self - def defaults(self, **kwargs): - """ - Define default keyword arguments for all patterns - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._defaults = kwargs - return self - - def regex_defaults(self, **kwargs): - """ - Define default keyword arguments for functional patterns. - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._regex_defaults = kwargs - return self - - def regex(self, *pattern, **kwargs): - """ - Add re pattern - - :param pattern: - :type pattern: - :return: self - :rtype: Rebulk - """ - self.pattern(self.build_re(*pattern, **kwargs)) - return self - - def build_re(self, *pattern, **kwargs): - """ - Builds a new regular expression pattern - - :param pattern: - :type pattern: - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - set_defaults(self._regex_defaults, kwargs) - set_defaults(self._defaults, kwargs) - return RePattern(*pattern, **kwargs) - - def string_defaults(self, **kwargs): - """ - Define default keyword arguments for string patterns. - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._string_defaults = kwargs - return self - - def string(self, *pattern, **kwargs): - """ - Add string pattern - - :param pattern: - :type pattern: - :return: self - :rtype: Rebulk - """ - self.pattern(self.build_string(*pattern, **kwargs)) - return self - - def build_string(self, *pattern, **kwargs): - """ - Builds a new string pattern - - :param pattern: - :type pattern: - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - set_defaults(self._string_defaults, kwargs) - set_defaults(self._defaults, kwargs) - return StringPattern(*pattern, **kwargs) - - def functional_defaults(self, **kwargs): - """ - Define default keyword arguments for functional patterns. - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._functional_defaults = kwargs - return self - - def functional(self, *pattern, **kwargs): - """ - Add functional pattern - - :param pattern: - :type pattern: - :return: self - :rtype: Rebulk - """ - self.pattern(self.build_functional(*pattern, **kwargs)) - return self - - def build_functional(self, *pattern, **kwargs): - """ - Builds a new functional pattern - - :param pattern: - :type pattern: - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - set_defaults(self._functional_defaults, kwargs) - set_defaults(self._defaults, kwargs) - return FunctionalPattern(*pattern, **kwargs) - - def chain_defaults(self, **kwargs): - """ - Define default keyword arguments for patterns chain. - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - self._chain_defaults = kwargs - return self - - def chain(self, **kwargs): - """ - Add patterns chain, using configuration of this rebulk - - :param pattern: - :type pattern: - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - chain = self.build_chain(**kwargs) - self._patterns.append(chain) - return chain - - def build_chain(self, **kwargs): - """ - Builds a new patterns chain - - :param pattern: - :type pattern: - :param kwargs: - :type kwargs: - :return: - :rtype: - """ - set_defaults(self._chain_defaults, kwargs) - set_defaults(self._defaults, kwargs) - return Chain(self, **kwargs) - def rules(self, *rules): """ Add rules as a module, class or instance. diff --git a/libs/rebulk/rules.py b/libs/rebulk/rules.py index 19b563ab8..2514904f4 100644 --- a/libs/rebulk/rules.py +++ b/libs/rebulk/rules.py @@ -140,10 +140,9 @@ class RemoveMatch(Consequence): # pylint: disable=abstract-method matches.remove(match) ret.append(match) return ret - else: - if when_response in matches: - matches.remove(when_response) - return when_response + if when_response in matches: + matches.remove(when_response) + return when_response class AppendMatch(Consequence): # pylint: disable=abstract-method @@ -164,12 +163,11 @@ class AppendMatch(Consequence): # pylint: disable=abstract-method matches.append(match) ret.append(match) return ret - else: - if self.match_name: - when_response.name = self.match_name - if when_response not in matches: - matches.append(when_response) - return when_response + if self.match_name: + when_response.name = self.match_name + if when_response not in matches: + matches.append(when_response) + return when_response class RenameMatch(Consequence): # pylint: disable=abstract-method diff --git a/libs/rebulk/test/test_chain.py b/libs/rebulk/test/test_chain.py index 2715abc25..f39955465 100644 --- a/libs/rebulk/test/test_chain.py +++ b/libs/rebulk/test/test_chain.py @@ -2,11 +2,11 @@ # -*- coding: utf-8 -*- # pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition import re - from functools import partial +from rebulk.pattern import FunctionalPattern, StringPattern, RePattern +from ..rebulk import Rebulk from ..validators import chars_surround -from ..rebulk import Rebulk, FunctionalPattern, RePattern, StringPattern def test_chain_close(): @@ -63,18 +63,61 @@ def test_build_chain(): def test_chain_defaults(): rebulk = Rebulk() - rebulk.defaults(validator=lambda x: True, ignore_names=['testIgnore'], children=True) + rebulk.defaults(validator=lambda x: x.value.startswith('t'), ignore_names=['testIgnore'], children=True) - rebulk.chain()\ + rebulk.chain() \ .regex("(?P<test>test)") \ .regex(" ").repeater("*") \ + .regex("(?P<best>best)") \ + .regex(" ").repeater("*") \ .regex("(?P<testIgnore>testIgnore)") - matches = rebulk.matches("test testIgnore") + matches = rebulk.matches("test best testIgnore") assert len(matches) == 1 assert matches[0].name == "test" +def test_chain_with_validators(): + def chain_validator(match): + return match.value.startswith('t') and match.value.endswith('t') + + def default_validator(match): + return match.value.startswith('t') and match.value.endswith('g') + + def custom_validator(match): + return match.value.startswith('b') and match.value.endswith('t') + + rebulk = Rebulk() + rebulk.defaults(children=True, validator=default_validator) + + rebulk.chain(validate_all=True, validator={'__parent__': chain_validator}) \ + .regex("(?P<test>testing)", validator=default_validator).repeater("+") \ + .regex(" ").repeater("+") \ + .regex("(?P<best>best)", validator=custom_validator).repeater("+") + matches = rebulk.matches("some testing best end") + + assert len(matches) == 2 + assert matches[0].name == "test" + assert matches[1].name == "best" + + +def test_matches_docs(): + rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE) \ + .defaults(children=True, formatter={'episode': int, 'version': int}) \ + .chain() \ + .regex(r'e(?P<episode>\d{1,4})').repeater(1) \ + .regex(r'v(?P<version>\d+)').repeater('?') \ + .regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \ + .close() # .repeater(1) could be omitted as it's the default behavior + + result = rebulk.matches("This is E14v2-15-16-17").to_dict() # converts matches to dict + + assert 'episode' in result + assert result['episode'] == [14, 15, 16, 17] + assert 'version' in result + assert result['version'] == 2 + + def test_matches(): rebulk = Rebulk() @@ -144,8 +187,8 @@ def test_matches(): def test_matches_2(): rebulk = Rebulk() \ .regex_defaults(flags=re.IGNORECASE) \ - .chain(children=True, formatter={'episode': int}) \ - .defaults(formatter={'version': int}) \ + .defaults(children=True, formatter={'episode': int, 'version': int}) \ + .chain() \ .regex(r'e(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \ @@ -173,25 +216,32 @@ def test_matches_2(): def test_matches_3(): alt_dash = (r'@', r'[\W_]') # abbreviation - rebulk = Rebulk() + match_names = ['season', 'episode'] + other_names = ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date'] - rebulk.chain(formatter={'season': int, 'episode': int}, - tags=['SxxExx'], - abbreviations=[alt_dash], - private_names=['episodeSeparator', 'seasonSeparator'], - children=True, - private_parent=True, - conflict_solver=lambda match, other: match - if match.name in ['season', 'episode'] and other.name in - ['screen_size', 'video_codec', 'audio_codec', - 'audio_channels', 'container', 'date'] - else '__default__') \ + rebulk = Rebulk() + rebulk.defaults(formatter={'season': int, 'episode': int}, + tags=['SxxExx'], + abbreviations=[alt_dash], + private_names=['episodeSeparator', 'seasonSeparator'], + children=True, + private_parent=True, + conflict_solver=lambda match, other: match + if match.name in match_names and other.name in other_names + else '__default__') + + rebulk.chain() \ + .defaults(children=True, private_parent=True) \ .regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \ .regex(r'(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+)').repeater('*') \ + .close() \ .chain() \ + .defaults(children=True, private_parent=True) \ .regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \ .regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \ + .close() \ .chain() \ + .defaults(children=True, private_parent=True) \ .regex(r'S(?P<season>\d+)') \ .regex(r'(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+)').repeater('*') @@ -240,11 +290,11 @@ def test_matches_4(): rebulk = Rebulk() rebulk.regex_defaults(flags=re.IGNORECASE) - rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, - validator={'__parent__': seps_surround}, children=True, private_parent=True) + rebulk.defaults(validate_all=True, children=True) + rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], private_parent=True) - rebulk.chain(formatter={'episode': int, 'version': int}) \ - .defaults(validator=None) \ + rebulk.chain(validator={'__parent__': seps_surround}, formatter={'episode': int, 'version': int}) \ + .defaults(formatter={'episode': int, 'version': int}) \ .regex(r'e(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') @@ -262,11 +312,11 @@ def test_matches_5(): rebulk = Rebulk() rebulk.regex_defaults(flags=re.IGNORECASE) - rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, - validator={'__parent__': seps_surround}, children=True, private_parent=True) - rebulk.chain(formatter={'episode': int, 'version': int}) \ - .defaults(validator=None) \ + rebulk.chain(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, + validator={'__parent__': seps_surround}, children=True, private_parent=True, + formatter={'episode': int, 'version': int}) \ + .defaults(children=True, private_parent=True) \ .regex(r'e(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}') @@ -288,7 +338,7 @@ def test_matches_6(): validator=None, children=True, private_parent=True) rebulk.chain(formatter={'episode': int, 'version': int}) \ - .defaults(validator=None) \ + .defaults(children=True, private_parent=True) \ .regex(r'e(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}') diff --git a/libs/rebulk/test/test_debug.py b/libs/rebulk/test/test_debug.py index cd9e556df..8abdac5f4 100644 --- a/libs/rebulk/test/test_debug.py +++ b/libs/rebulk/test/test_debug.py @@ -2,19 +2,15 @@ # -*- coding: utf-8 -*- # pylint: disable=no-self-use, pointless-statement, missing-docstring, protected-access, invalid-name, len-as-condition +from .default_rules_module import RuleRemove0 +from .. import debug +from ..match import Match from ..pattern import StringPattern from ..rebulk import Rebulk -from ..match import Match -from .. import debug -from .default_rules_module import RuleRemove0 class TestDebug(object): - - - #request.addfinalizer(disable_debug) - - + # request.addfinalizer(disable_debug) debug.DEBUG = True pattern = StringPattern(1, 3, value="es") @@ -38,43 +34,43 @@ class TestDebug(object): debug.DEBUG = False def test_pattern(self): - assert self.pattern.defined_at.lineno == 20 + assert self.pattern.defined_at.lineno > 0 assert self.pattern.defined_at.name == 'rebulk.test.test_debug' assert self.pattern.defined_at.filename.endswith('test_debug.py') - assert str(self.pattern.defined_at) == 'test_debug.py#L20' - assert repr(self.pattern) == '<StringPattern@test_debug.py#L20:(1, 3)>' + assert str(self.pattern.defined_at).startswith('test_debug.py#L') + assert repr(self.pattern).startswith('<StringPattern@test_debug.py#L') def test_match(self): - assert self.match.defined_at.lineno == 22 + assert self.match.defined_at.lineno > 0 assert self.match.defined_at.name == 'rebulk.test.test_debug' assert self.match.defined_at.filename.endswith('test_debug.py') - assert str(self.match.defined_at) == 'test_debug.py#L22' + assert str(self.match.defined_at).startswith('test_debug.py#L') def test_rule(self): - assert self.rule.defined_at.lineno == 23 + assert self.rule.defined_at.lineno > 0 assert self.rule.defined_at.name == 'rebulk.test.test_debug' assert self.rule.defined_at.filename.endswith('test_debug.py') - assert str(self.rule.defined_at) == 'test_debug.py#L23' - assert repr(self.rule) == '<RuleRemove0@test_debug.py#L23>' + assert str(self.rule.defined_at).startswith('test_debug.py#L') + assert repr(self.rule).startswith('<RuleRemove0@test_debug.py#L') def test_rebulk(self): """ This test fails on travis CI, can't find out why there's 1 line offset ... """ - assert self.rebulk._patterns[0].defined_at.lineno in [26, 27] + assert self.rebulk._patterns[0].defined_at.lineno > 0 assert self.rebulk._patterns[0].defined_at.name == 'rebulk.test.test_debug' assert self.rebulk._patterns[0].defined_at.filename.endswith('test_debug.py') - assert str(self.rebulk._patterns[0].defined_at) in ['test_debug.py#L26', 'test_debug.py#L27'] + assert str(self.rebulk._patterns[0].defined_at).startswith('test_debug.py#L') - assert self.rebulk._patterns[1].defined_at.lineno in [27, 28] + assert self.rebulk._patterns[1].defined_at.lineno > 0 assert self.rebulk._patterns[1].defined_at.name == 'rebulk.test.test_debug' assert self.rebulk._patterns[1].defined_at.filename.endswith('test_debug.py') - assert str(self.rebulk._patterns[1].defined_at) in ['test_debug.py#L27', 'test_debug.py#L28'] + assert str(self.rebulk._patterns[1].defined_at).startswith('test_debug.py#L') assert self.matches[0].defined_at == self.rebulk._patterns[0].defined_at assert self.matches[1].defined_at == self.rebulk._patterns[1].defined_at diff --git a/libs/rebulk/test/test_match.py b/libs/rebulk/test/test_match.py index 87273d540..8750733a5 100644 --- a/libs/rebulk/test/test_match.py +++ b/libs/rebulk/test/test_match.py @@ -116,6 +116,9 @@ class TestMatchesClass(object): assert "tag1" in matches.tags assert "tag2" in matches.tags + assert self.match3.tagged("tag1") + assert not self.match3.tagged("start") + tag1 = matches.tagged("tag1") assert len(tag1) == 2 assert tag1[0] == self.match2 diff --git a/libs/rebulk/utils.py b/libs/rebulk/utils.py index 9aaf56d14..85ddd41ec 100644 --- a/libs/rebulk/utils.py +++ b/libs/rebulk/utils.py @@ -3,7 +3,10 @@ """ Various utilities functions """ -from collections import MutableSet +try: + from collections.abc import MutableSet +except ImportError: + from collections import MutableSet from types import GeneratorType diff --git a/libs/rebulk/validators.py b/libs/rebulk/validators.py index 5fd3dcb6f..b8959c54c 100644 --- a/libs/rebulk/validators.py +++ b/libs/rebulk/validators.py @@ -62,9 +62,20 @@ def validators(*chained_validators): :return: :rtype: """ + def validator_chain(match): # pylint:disable=missing-docstring for chained_validator in chained_validators: if not chained_validator(match): return False return True + return validator_chain + + +def allways_true(match): # pylint:disable=unused-argument + """ + A validator which is allways true + :param match: + :return: + """ + return True |