summaryrefslogtreecommitdiffhomepage
path: root/libs/rebulk
diff options
context:
space:
mode:
authorLouis Vézina <[email protected]>2020-05-20 11:29:39 -0400
committerLouis Vézina <[email protected]>2020-05-20 11:29:39 -0400
commit376e13d7f1ab8e6d9202c1a51a96526de0f11163 (patch)
tree77e911bffc9a59ed2868a673f0733ad38f5b4ccd /libs/rebulk
parent5b44007bbb7ef49bbf4087b43dbb948433639fbe (diff)
downloadbazarr-376e13d7f1ab8e6d9202c1a51a96526de0f11163.tar.gz
bazarr-376e13d7f1ab8e6d9202c1a51a96526de0f11163.zip
Upgraded GuessIt to 3.0.1
Diffstat (limited to 'libs/rebulk')
-rw-r--r--libs/rebulk/__version__.py2
-rw-r--r--libs/rebulk/builder.py217
-rw-r--r--libs/rebulk/chain.py347
-rw-r--r--libs/rebulk/formatters.py10
-rw-r--r--libs/rebulk/introspector.py5
-rw-r--r--libs/rebulk/loose.py72
-rw-r--r--libs/rebulk/match.py28
-rw-r--r--libs/rebulk/pattern.py332
-rw-r--r--libs/rebulk/processors.py2
-rw-r--r--libs/rebulk/rebulk.py183
-rw-r--r--libs/rebulk/rules.py18
-rw-r--r--libs/rebulk/test/test_chain.py106
-rw-r--r--libs/rebulk/test/test_debug.py36
-rw-r--r--libs/rebulk/test/test_match.py3
-rw-r--r--libs/rebulk/utils.py5
-rw-r--r--libs/rebulk/validators.py11
16 files changed, 771 insertions, 606 deletions
diff --git a/libs/rebulk/__version__.py b/libs/rebulk/__version__.py
index 11d0431dc..939c554ca 100644
--- a/libs/rebulk/__version__.py
+++ b/libs/rebulk/__version__.py
@@ -4,4 +4,4 @@
Version module
"""
# pragma: no cover
-__version__ = '0.9.0'
+__version__ = '2.0.1'
diff --git a/libs/rebulk/builder.py b/libs/rebulk/builder.py
new file mode 100644
index 000000000..c91420aa3
--- /dev/null
+++ b/libs/rebulk/builder.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Base builder class for Rebulk
+"""
+from abc import ABCMeta, abstractmethod
+from copy import deepcopy
+from logging import getLogger
+
+from six import add_metaclass
+
+from .loose import set_defaults
+from .pattern import RePattern, StringPattern, FunctionalPattern
+
+log = getLogger(__name__).log
+
+
+@add_metaclass(ABCMeta)
+class Builder(object):
+ """
+ Base builder class for patterns
+ """
+
+ def __init__(self):
+ self._defaults = {}
+ self._regex_defaults = {}
+ self._string_defaults = {}
+ self._functional_defaults = {}
+ self._chain_defaults = {}
+
+ def reset(self):
+ """
+ Reset all defaults.
+
+ :return:
+ """
+ self.__init__()
+
+ def defaults(self, **kwargs):
+ """
+ Define default keyword arguments for all patterns
+ :param kwargs:
+ :type kwargs:
+ :return:
+ :rtype:
+ """
+ set_defaults(kwargs, self._defaults, override=True)
+ return self
+
+ def regex_defaults(self, **kwargs):
+ """
+ Define default keyword arguments for functional patterns.
+ :param kwargs:
+ :type kwargs:
+ :return:
+ :rtype:
+ """
+ set_defaults(kwargs, self._regex_defaults, override=True)
+ return self
+
+ def string_defaults(self, **kwargs):
+ """
+ Define default keyword arguments for string patterns.
+ :param kwargs:
+ :type kwargs:
+ :return:
+ :rtype:
+ """
+ set_defaults(kwargs, self._string_defaults, override=True)
+ return self
+
+ def functional_defaults(self, **kwargs):
+ """
+ Define default keyword arguments for functional patterns.
+ :param kwargs:
+ :type kwargs:
+ :return:
+ :rtype:
+ """
+ set_defaults(kwargs, self._functional_defaults, override=True)
+ return self
+
+ def chain_defaults(self, **kwargs):
+ """
+ Define default keyword arguments for patterns chain.
+ :param kwargs:
+ :type kwargs:
+ :return:
+ :rtype:
+ """
+ set_defaults(kwargs, self._chain_defaults, override=True)
+ return self
+
+ def build_re(self, *pattern, **kwargs):
+ """
+ Builds a new regular expression pattern
+
+ :param pattern:
+ :type pattern:
+ :param kwargs:
+ :type kwargs:
+ :return:
+ :rtype:
+ """
+ set_defaults(self._regex_defaults, kwargs)
+ set_defaults(self._defaults, kwargs)
+ return RePattern(*pattern, **kwargs)
+
+ def build_string(self, *pattern, **kwargs):
+ """
+ Builds a new string pattern
+
+ :param pattern:
+ :type pattern:
+ :param kwargs:
+ :type kwargs:
+ :return:
+ :rtype:
+ """
+ set_defaults(self._string_defaults, kwargs)
+ set_defaults(self._defaults, kwargs)
+ return StringPattern(*pattern, **kwargs)
+
+ def build_functional(self, *pattern, **kwargs):
+ """
+ Builds a new functional pattern
+
+ :param pattern:
+ :type pattern:
+ :param kwargs:
+ :type kwargs:
+ :return:
+ :rtype:
+ """
+ set_defaults(self._functional_defaults, kwargs)
+ set_defaults(self._defaults, kwargs)
+ return FunctionalPattern(*pattern, **kwargs)
+
+ def build_chain(self, **kwargs):
+ """
+ Builds a new patterns chain
+
+ :param pattern:
+ :type pattern:
+ :param kwargs:
+ :type kwargs:
+ :return:
+ :rtype:
+ """
+ from .chain import Chain
+ set_defaults(self._chain_defaults, kwargs)
+ set_defaults(self._defaults, kwargs)
+ chain = Chain(self, **kwargs)
+ chain._defaults = deepcopy(self._defaults) # pylint: disable=protected-access
+ chain._regex_defaults = deepcopy(self._regex_defaults) # pylint: disable=protected-access
+ chain._functional_defaults = deepcopy(self._functional_defaults) # pylint: disable=protected-access
+ chain._string_defaults = deepcopy(self._string_defaults) # pylint: disable=protected-access
+ chain._chain_defaults = deepcopy(self._chain_defaults) # pylint: disable=protected-access
+ return chain
+
+ @abstractmethod
+ def pattern(self, *pattern):
+ """
+ Register a list of Pattern instance
+ :param pattern:
+ :return:
+ """
+ pass
+
+ def regex(self, *pattern, **kwargs):
+ """
+ Add re pattern
+
+ :param pattern:
+ :type pattern:
+ :return: self
+ :rtype: Rebulk
+ """
+ return self.pattern(self.build_re(*pattern, **kwargs))
+
+ def string(self, *pattern, **kwargs):
+ """
+ Add string pattern
+
+ :param pattern:
+ :type pattern:
+ :return: self
+ :rtype: Rebulk
+ """
+ return self.pattern(self.build_string(*pattern, **kwargs))
+
+ def functional(self, *pattern, **kwargs):
+ """
+ Add functional pattern
+
+ :param pattern:
+ :type pattern:
+ :return: self
+ :rtype: Rebulk
+ """
+ functional = self.build_functional(*pattern, **kwargs)
+ return self.pattern(functional)
+
+ def chain(self, **kwargs):
+ """
+ Add patterns chain, using configuration of this rebulk
+
+ :param pattern:
+ :type pattern:
+ :param kwargs:
+ :type kwargs:
+ :return:
+ :rtype:
+ """
+ chain = self.build_chain(**kwargs)
+ self.pattern(chain)
+ return chain
diff --git a/libs/rebulk/chain.py b/libs/rebulk/chain.py
index dfb6ea442..ba31ec9a5 100644
--- a/libs/rebulk/chain.py
+++ b/libs/rebulk/chain.py
@@ -6,9 +6,10 @@ Chain patterns and handle repetiting capture group
# pylint: disable=super-init-not-called
import itertools
-from .loose import call, set_defaults
+from .builder import Builder
+from .loose import call
from .match import Match, Matches
-from .pattern import Pattern, filter_match_kwargs
+from .pattern import Pattern, filter_match_kwargs, BasePattern
from .remodule import re
@@ -19,150 +20,46 @@ class _InvalidChainException(Exception):
pass
-class Chain(Pattern):
+class Chain(Pattern, Builder):
"""
Definition of a pattern chain to search for.
"""
- def __init__(self, rebulk, chain_breaker=None, **kwargs):
- call(super(Chain, self).__init__, **kwargs)
+ def __init__(self, parent, chain_breaker=None, **kwargs):
+ Builder.__init__(self)
+ call(Pattern.__init__, self, **kwargs)
self._kwargs = kwargs
self._match_kwargs = filter_match_kwargs(kwargs)
- self._defaults = {}
- self._regex_defaults = {}
- self._string_defaults = {}
- self._functional_defaults = {}
if callable(chain_breaker):
self.chain_breaker = chain_breaker
else:
self.chain_breaker = None
- self.rebulk = rebulk
+ self.parent = parent
self.parts = []
- def defaults(self, **kwargs):
+ def pattern(self, *pattern):
"""
- Define default keyword arguments for all patterns
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- self._defaults = kwargs
- return self
-
- def regex_defaults(self, **kwargs):
- """
- Define default keyword arguments for functional patterns.
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- self._regex_defaults = kwargs
- return self
-
- def string_defaults(self, **kwargs):
- """
- Define default keyword arguments for string patterns.
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- self._string_defaults = kwargs
- return self
-
- def functional_defaults(self, **kwargs):
- """
- Define default keyword arguments for functional patterns.
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- self._functional_defaults = kwargs
- return self
-
- def chain(self):
- """
- Add patterns chain, using configuration from this chain
-
- :return:
- :rtype:
- """
- # pylint: disable=protected-access
- chain = self.rebulk.chain(**self._kwargs)
- chain._defaults = dict(self._defaults)
- chain._regex_defaults = dict(self._regex_defaults)
- chain._functional_defaults = dict(self._functional_defaults)
- chain._string_defaults = dict(self._string_defaults)
- return chain
-
- def regex(self, *pattern, **kwargs):
- """
- Add re pattern
-
- :param pattern:
- :type pattern:
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- set_defaults(self._kwargs, kwargs)
- set_defaults(self._regex_defaults, kwargs)
- set_defaults(self._defaults, kwargs)
- pattern = self.rebulk.build_re(*pattern, **kwargs)
- part = ChainPart(self, pattern)
- self.parts.append(part)
- return part
-
- def functional(self, *pattern, **kwargs):
- """
- Add functional pattern
-
- :param pattern:
- :type pattern:
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- set_defaults(self._kwargs, kwargs)
- set_defaults(self._functional_defaults, kwargs)
- set_defaults(self._defaults, kwargs)
- pattern = self.rebulk.build_functional(*pattern, **kwargs)
- part = ChainPart(self, pattern)
- self.parts.append(part)
- return part
-
- def string(self, *pattern, **kwargs):
- """
- Add string pattern
:param pattern:
- :type pattern:
- :param kwargs:
- :type kwargs:
:return:
- :rtype:
"""
- set_defaults(self._kwargs, kwargs)
- set_defaults(self._functional_defaults, kwargs)
- set_defaults(self._defaults, kwargs)
- pattern = self.rebulk.build_string(*pattern, **kwargs)
- part = ChainPart(self, pattern)
+ if not pattern:
+ raise ValueError("One pattern should be given to the chain")
+ if len(pattern) > 1:
+ raise ValueError("Only one pattern can be given to the chain")
+ part = ChainPart(self, pattern[0])
self.parts.append(part)
return part
def close(self):
"""
- Close chain builder to continue registering other pattern
-
- :return:
- :rtype:
+ Deeply close the chain
+ :return: Rebulk instance
"""
- return self.rebulk
+ parent = self.parent
+ while isinstance(parent, Chain):
+ parent = parent.parent
+ return parent
def _match(self, pattern, input_string, context=None):
# pylint: disable=too-many-locals,too-many-nested-blocks
@@ -173,42 +70,20 @@ class Chain(Pattern):
chain_found = False
current_chain_matches = []
valid_chain = True
- is_chain_start = True
for chain_part in self.parts:
try:
- chain_part_matches, raw_chain_part_matches = Chain._match_chain_part(is_chain_start, chain_part,
- chain_input_string,
- context)
-
- Chain._fix_matches_offset(chain_part_matches, input_string, offset)
- Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
-
- if raw_chain_part_matches:
- grouped_matches_dict = dict()
- for match_index, match in itertools.groupby(chain_part_matches,
- lambda m: m.match_index):
- grouped_matches_dict[match_index] = list(match)
-
- grouped_raw_matches_dict = dict()
- for match_index, raw_match in itertools.groupby(raw_chain_part_matches,
- lambda m: m.match_index):
- grouped_raw_matches_dict[match_index] = list(raw_match)
-
- for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
- chain_found = True
- offset = grouped_raw_matches[-1].raw_end
- chain_input_string = input_string[offset:]
- if not chain_part.is_hidden:
- grouped_matches = grouped_matches_dict.get(match_index, [])
- if self._chain_breaker_eval(current_chain_matches + grouped_matches):
- current_chain_matches.extend(grouped_matches)
+ chain_part_matches, raw_chain_part_matches = chain_part.matches(chain_input_string,
+ context,
+ with_raw_matches=True)
+ chain_found, chain_input_string, offset = \
+ self._to_next_chain_part(chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
+ input_string, chain_input_string, offset, current_chain_matches)
except _InvalidChainException:
valid_chain = False
if current_chain_matches:
offset = current_chain_matches[0].raw_end
break
- is_chain_start = False
if not chain_found:
break
if current_chain_matches and valid_chain:
@@ -217,38 +92,66 @@ class Chain(Pattern):
return chain_matches
- def _match_parent(self, match, yield_parent):
+ def _to_next_chain_part(self, chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
+ input_string, chain_input_string, offset, current_chain_matches):
+ Chain._fix_matches_offset(chain_part_matches, input_string, offset)
+ Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
+
+ if raw_chain_part_matches:
+ grouped_matches_dict = self._group_by_match_index(chain_part_matches)
+ grouped_raw_matches_dict = self._group_by_match_index(raw_chain_part_matches)
+
+ for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
+ chain_found = True
+ offset = grouped_raw_matches[-1].raw_end
+ chain_input_string = input_string[offset:]
+
+ if not chain_part.is_hidden:
+ grouped_matches = grouped_matches_dict.get(match_index, [])
+ if self._chain_breaker_eval(current_chain_matches + grouped_matches):
+ current_chain_matches.extend(grouped_matches)
+ return chain_found, chain_input_string, offset
+
+ def _process_match(self, match, match_index, child=False):
"""
- Handle a parent match
+ Handle a match
:param match:
:type match:
- :param yield_parent:
- :type yield_parent:
+ :param match_index:
+ :type match_index:
+ :param child:
+ :type child:
:return:
:rtype:
"""
- ret = super(Chain, self)._match_parent(match, yield_parent)
- original_children = Matches(match.children)
- original_end = match.end
- while not ret and match.children:
+ # pylint: disable=too-many-locals
+ ret = super(Chain, self)._process_match(match, match_index, child=child)
+ if ret:
+ return True
+
+ if match.children:
last_pattern = match.children[-1].pattern
- last_pattern_children = [child for child in match.children if child.pattern == last_pattern]
- last_pattern_groups_iter = itertools.groupby(last_pattern_children, lambda child: child.match_index)
- last_pattern_groups = {}
- for index, matches in last_pattern_groups_iter:
- last_pattern_groups[index] = list(matches)
-
- for index in reversed(list(last_pattern_groups)):
- last_matches = list(last_pattern_groups[index])
- for last_match in last_matches:
- match.children.remove(last_match)
- match.end = match.children[-1].end if match.children else match.start
- ret = super(Chain, self)._match_parent(match, yield_parent)
- if ret:
- return True
- match.children = original_children
- match.end = original_end
- return ret
+ last_pattern_groups = self._group_by_match_index(
+ [child_ for child_ in match.children if child_.pattern == last_pattern]
+ )
+
+ if last_pattern_groups:
+ original_children = Matches(match.children)
+ original_end = match.end
+
+ for index in reversed(list(last_pattern_groups)):
+ last_matches = last_pattern_groups[index]
+ for last_match in last_matches:
+ match.children.remove(last_match)
+ match.end = match.children[-1].end if match.children else match.start
+ ret = super(Chain, self)._process_match(match, match_index, child=child)
+ if ret:
+ return True
+
+ match.children = original_children
+ match.end = original_end
+
+ return False
def _build_chain_match(self, current_chain_matches, input_string):
start = None
@@ -282,46 +185,11 @@ class Chain(Pattern):
Chain._fix_matches_offset(chain_part_match.children, input_string, offset)
@staticmethod
- def _match_chain_part(is_chain_start, chain_part, chain_input_string, context):
- chain_part_matches, raw_chain_part_matches = chain_part.pattern.matches(chain_input_string, context,
- with_raw_matches=True)
- chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part,
- chain_input_string)
- raw_chain_part_matches = Chain._truncate_chain_part_matches(is_chain_start, raw_chain_part_matches, chain_part,
- chain_input_string)
-
- Chain._validate_chain_part_matches(raw_chain_part_matches, chain_part)
- return chain_part_matches, raw_chain_part_matches
-
- @staticmethod
- def _truncate_chain_part_matches(is_chain_start, chain_part_matches, chain_part, chain_input_string):
- if not chain_part_matches:
- return chain_part_matches
-
- if not is_chain_start:
- separator = chain_input_string[0:chain_part_matches[0].initiator.raw_start]
- if separator:
- return []
-
- j = 1
- for i in range(0, len(chain_part_matches) - 1):
- separator = chain_input_string[chain_part_matches[i].initiator.raw_end:
- chain_part_matches[i + 1].initiator.raw_start]
- if separator:
- break
- j += 1
- truncated = chain_part_matches[:j]
- if chain_part.repeater_end is not None:
- truncated = [m for m in truncated if m.match_index < chain_part.repeater_end]
- return truncated
-
- @staticmethod
- def _validate_chain_part_matches(chain_part_matches, chain_part):
- max_match_index = -1
- if chain_part_matches:
- max_match_index = max([m.match_index for m in chain_part_matches])
- if max_match_index + 1 < chain_part.repeater_start:
- raise _InvalidChainException
+ def _group_by_match_index(matches):
+ grouped_matches_dict = dict()
+ for match_index, match in itertools.groupby(matches, lambda m: m.match_index):
+ grouped_matches_dict[match_index] = list(match)
+ return grouped_matches_dict
@property
def match_options(self):
@@ -338,7 +206,7 @@ class Chain(Pattern):
return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts)
-class ChainPart(object):
+class ChainPart(BasePattern):
"""
Part of a pattern chain.
"""
@@ -350,6 +218,51 @@ class ChainPart(object):
self.repeater_end = 1
self._hidden = False
+ @property
+ def _is_chain_start(self):
+ return self._chain.parts[0] == self
+
+ def matches(self, input_string, context=None, with_raw_matches=False):
+ matches, raw_matches = self.pattern.matches(input_string, context=context, with_raw_matches=True)
+
+ matches = self._truncate_repeater(matches, input_string)
+ raw_matches = self._truncate_repeater(raw_matches, input_string)
+
+ self._validate_repeater(raw_matches)
+
+ if with_raw_matches:
+ return matches, raw_matches
+
+ return matches
+
+ def _truncate_repeater(self, matches, input_string):
+ if not matches:
+ return matches
+
+ if not self._is_chain_start:
+ separator = input_string[0:matches[0].initiator.raw_start]
+ if separator:
+ return []
+
+ j = 1
+ for i in range(0, len(matches) - 1):
+ separator = input_string[matches[i].initiator.raw_end:
+ matches[i + 1].initiator.raw_start]
+ if separator:
+ break
+ j += 1
+ truncated = matches[:j]
+ if self.repeater_end is not None:
+ truncated = [m for m in truncated if m.match_index < self.repeater_end]
+ return truncated
+
+ def _validate_repeater(self, matches):
+ max_match_index = -1
+ if matches:
+ max_match_index = max([m.match_index for m in matches])
+ if max_match_index + 1 < self.repeater_start:
+ raise _InvalidChainException
+
def chain(self):
"""
Add patterns chain, using configuration from this chain
diff --git a/libs/rebulk/formatters.py b/libs/rebulk/formatters.py
index 470469426..7175a54ab 100644
--- a/libs/rebulk/formatters.py
+++ b/libs/rebulk/formatters.py
@@ -15,9 +15,19 @@ def formatters(*chained_formatters):
:return:
:rtype:
"""
+
def formatters_chain(input_string): # pylint:disable=missing-docstring
for chained_formatter in chained_formatters:
input_string = chained_formatter(input_string)
return input_string
return formatters_chain
+
+
+def default_formatter(input_string):
+ """
+ Default formatter
+ :param input_string:
+ :return:
+ """
+ return input_string
diff --git a/libs/rebulk/introspector.py b/libs/rebulk/introspector.py
index 64b9836f0..bfefcb757 100644
--- a/libs/rebulk/introspector.py
+++ b/libs/rebulk/introspector.py
@@ -3,7 +3,7 @@
"""
Introspect rebulk object to retrieve capabilities.
"""
-from abc import ABCMeta, abstractproperty
+from abc import ABCMeta, abstractmethod
from collections import defaultdict
import six
@@ -16,7 +16,8 @@ class Description(object):
"""
Abstract class for a description.
"""
- @abstractproperty
+ @property
+ @abstractmethod
def properties(self): # pragma: no cover
"""
Properties of described object.
diff --git a/libs/rebulk/loose.py b/libs/rebulk/loose.py
index 72543b1e0..423b4ea7a 100644
--- a/libs/rebulk/loose.py
+++ b/libs/rebulk/loose.py
@@ -3,8 +3,18 @@
"""
Various utilities functions
"""
-import inspect
+
import sys
+
+from inspect import isclass
+try:
+ from inspect import getfullargspec as getargspec
+
+ _fullargspec_supported = True
+except ImportError:
+ _fullargspec_supported = False
+ from inspect import getargspec
+
from .utils import is_iterable
if sys.version_info < (3, 4, 0): # pragma: no cover
@@ -45,8 +55,8 @@ def call(function, *args, **kwargs):
:return: sale vakye as default function call
:rtype: object
"""
- func = constructor_args if inspect.isclass(function) else function_args
- call_args, call_kwargs = func(function, *args, **kwargs)
+ func = constructor_args if isclass(function) else function_args
+ call_args, call_kwargs = func(function, *args, ignore_unused=True, **kwargs) # @see #20
return function(*call_args, **call_kwargs)
@@ -63,7 +73,7 @@ def function_args(callable_, *args, **kwargs):
:return: (args, kwargs) matching the function signature
:rtype: tuple
"""
- argspec = inspect.getargspec(callable_) # pylint:disable=deprecated-method
+ argspec = getargspec(callable_) # pylint:disable=deprecated-method
return argspec_args(argspec, False, *args, **kwargs)
@@ -80,7 +90,7 @@ def constructor_args(class_, *args, **kwargs):
:return: (args, kwargs) matching the function signature
:rtype: tuple
"""
- argspec = inspect.getargspec(_constructor(class_)) # pylint:disable=deprecated-method
+ argspec = getargspec(_constructor(class_)) # pylint:disable=deprecated-method
return argspec_args(argspec, True, *args, **kwargs)
@@ -99,7 +109,7 @@ def argspec_args(argspec, constructor, *args, **kwargs):
:return: (args, kwargs) matching the function signature
:rtype: tuple
"""
- if argspec.keywords:
+ if argspec.varkw:
call_kwarg = kwargs
else:
call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension
@@ -110,6 +120,36 @@ def argspec_args(argspec, constructor, *args, **kwargs):
return call_args, call_kwarg
+if not _fullargspec_supported:
+ def argspec_args_legacy(argspec, constructor, *args, **kwargs):
+ """
+ Return (args, kwargs) matching the argspec object
+
+ :param argspec: argspec to use
+ :type argspec: argspec
+ :param constructor: is it a constructor ?
+ :type constructor: bool
+ :param args:
+ :type args:
+ :param kwargs:
+ :type kwargs:
+ :return: (args, kwargs) matching the function signature
+ :rtype: tuple
+ """
+ if argspec.keywords:
+ call_kwarg = kwargs
+ else:
+ call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension
+ if argspec.varargs:
+ call_args = args
+ else:
+ call_args = args[:len(argspec.args) - (1 if constructor else 0)]
+ return call_args, call_kwarg
+
+
+ argspec_args = argspec_args_legacy
+
+
def ensure_list(param):
"""
Retrieves a list from given parameter.
@@ -177,9 +217,12 @@ def filter_index(collection, predicate=None, index=None):
return collection
-def set_defaults(defaults, kwargs):
+def set_defaults(defaults, kwargs, override=False):
"""
Set defaults from defaults dict to kwargs dict
+
+ :param override:
+ :type override:
:param defaults:
:type defaults:
:param kwargs:
@@ -187,12 +230,13 @@ def set_defaults(defaults, kwargs):
:return:
:rtype:
"""
+ if 'clear' in defaults.keys() and defaults.pop('clear'):
+ kwargs.clear()
for key, value in defaults.items():
- if key not in kwargs and value is not None:
+ if key in kwargs:
+ if isinstance(value, list) and isinstance(kwargs[key], list):
+ kwargs[key] = list(value) + kwargs[key]
+ elif isinstance(value, dict) and isinstance(kwargs[key], dict):
+ set_defaults(value, kwargs[key])
+ if key not in kwargs or override:
kwargs[key] = value
- elif isinstance(value, list) and isinstance(kwargs[key], list):
- kwargs[key] = list(value) + kwargs[key]
- elif isinstance(value, dict) and isinstance(kwargs[key], dict):
- set_defaults(value, kwargs[key])
- elif key in kwargs and value is None:
- kwargs[key] = None
diff --git a/libs/rebulk/match.py b/libs/rebulk/match.py
index a786df4df..d8e72df42 100644
--- a/libs/rebulk/match.py
+++ b/libs/rebulk/match.py
@@ -5,7 +5,11 @@ Classes and functions related to matches
"""
import copy
import itertools
-from collections import defaultdict, MutableSequence
+from collections import defaultdict
+try:
+ from collections.abc import MutableSequence
+except ImportError:
+ from collections import MutableSequence
try:
from collections import OrderedDict # pylint:disable=ungrouped-imports
@@ -778,9 +782,9 @@ class Match(object):
right.start = end
if right:
ret.append(right)
- elif end <= current.end and end > current.start:
+ elif current.end >= end > current.start:
current.start = end
- elif start >= current.start and start < current.end:
+ elif current.start <= start < current.end:
current.end = start
return filter_index(ret, predicate, index)
@@ -811,6 +815,24 @@ class Match(object):
return filter_index(ret, predicate, index)
+ def tagged(self, *tags):
+ """
+ Check if this match has at least one of the provided tags
+
+ :param tags:
+ :return: True if at least one tag is defined, False otherwise.
+ """
+ return any(tag in self.tags for tag in tags)
+
+ def named(self, *names):
+ """
+ Check if one of the children match has one of the provided name
+
+ :param names:
+ :return: True if at least one child is named with a given name is defined, False otherwise.
+ """
+ return any(name in self.names for name in names)
+
def __len__(self):
return self.end - self.start
diff --git a/libs/rebulk/pattern.py b/libs/rebulk/pattern.py
index 57b274e89..beb8b2731 100644
--- a/libs/rebulk/pattern.py
+++ b/libs/rebulk/pattern.py
@@ -10,14 +10,39 @@ from abc import ABCMeta, abstractmethod, abstractproperty
import six
from . import debug
+from .formatters import default_formatter
from .loose import call, ensure_list, ensure_dict
from .match import Match
from .remodule import re, REGEX_AVAILABLE
from .utils import find_all, is_iterable, get_first_defined
+from .validators import allways_true
@six.add_metaclass(ABCMeta)
-class Pattern(object):
+class BasePattern(object):
+ """
+ Base class for Pattern like objects
+ """
+
+ @abstractmethod
+ def matches(self, input_string, context=None, with_raw_matches=False):
+ """
+ Computes all matches for a given input
+
+ :param input_string: the string to parse
+ :type input_string: str
+ :param context: the context
+ :type context: dict
+ :param with_raw_matches: should return details
+ :type with_raw_matches: dict
+ :return: matches based on input_string for this pattern
+ :rtype: iterator[Match]
+ """
+ pass
+
+
[email protected]_metaclass(ABCMeta)
+class Pattern(BasePattern):
"""
Definition of a particular pattern to search for.
"""
@@ -25,7 +50,7 @@ class Pattern(object):
def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False,
private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None,
marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None,
- properties=None, post_processor=None, **kwargs):
+ properties=None, post_processor=None, pre_match_processor=None, post_match_processor=None, **kwargs):
"""
:param name: Name of this pattern
:type name: str
@@ -66,15 +91,19 @@ class Pattern(object):
:type disabled: bool|function
:param log_lvl: Log level associated to this pattern
:type log_lvl: int
- :param post_process: Post processing function
+ :param post_processor: Post processing function
:type post_processor: func
+ :param pre_match_processor: Pre match processing function
+ :type pre_match_processor: func
+ :param post_match_processor: Post match processing function
+ :type post_match_processor: func
"""
# pylint:disable=too-many-locals,unused-argument
self.name = name
self.tags = ensure_list(tags)
- self.formatters, self._default_formatter = ensure_dict(formatter, lambda x: x)
+ self.formatters, self._default_formatter = ensure_dict(formatter, default_formatter)
self.values, self._default_value = ensure_dict(value, None)
- self.validators, self._default_validator = ensure_dict(validator, lambda match: True)
+ self.validators, self._default_validator = ensure_dict(validator, allways_true)
self.every = every
self.children = children
self.private = private
@@ -96,6 +125,14 @@ class Pattern(object):
self.post_processor = None
else:
self.post_processor = post_processor
+ if not callable(pre_match_processor):
+ self.pre_match_processor = None
+ else:
+ self.pre_match_processor = pre_match_processor
+ if not callable(post_match_processor):
+ self.post_match_processor = None
+ else:
+ self.post_match_processor = post_match_processor
@property
def log_level(self):
@@ -106,19 +143,52 @@ class Pattern(object):
"""
return self._log_level if self._log_level is not None else debug.LOG_LEVEL
- def _yield_children(self, match):
+ def matches(self, input_string, context=None, with_raw_matches=False):
+ """
+ Computes all matches for a given input
+
+ :param input_string: the string to parse
+ :type input_string: str
+ :param context: the context
+ :type context: dict
+ :param with_raw_matches: should return details
+ :type with_raw_matches: dict
+ :return: matches based on input_string for this pattern
+ :rtype: iterator[Match]
+ """
+ # pylint: disable=too-many-branches
+
+ matches = []
+ raw_matches = []
+
+ for pattern in self.patterns:
+ match_index = 0
+ for match in self._match(pattern, input_string, context):
+ raw_matches.append(match)
+ matches.extend(self._process_matches(match, match_index))
+ match_index += 1
+
+ matches = self._post_process_matches(matches)
+
+ if with_raw_matches:
+ return matches, raw_matches
+ return matches
+
+ @property
+ def _should_include_children(self):
"""
- Does this match has children
+ Check if children matches from this pattern should be included in matches results.
:param match:
:type match:
:return:
:rtype:
"""
- return match.children and (self.children or self.every)
+ return self.children or self.every
- def _yield_parent(self):
+ @property
+ def _should_include_parent(self):
"""
- Does this mat
+ Check is a match from this pattern should be included in matches results.
:param match:
:type match:
:return:
@@ -126,115 +196,132 @@ class Pattern(object):
"""
return not self.children or self.every
- def _match_parent(self, match, yield_parent):
+ @staticmethod
+ def _match_config_property_keys(match, child=False):
+ if match.name:
+ yield match.name
+ if child:
+ yield '__children__'
+ else:
+ yield '__parent__'
+ yield None
+
+ @staticmethod
+ def _process_match_index(match, match_index):
"""
- Handle a parent match
+ Process match index from this pattern process state.
+
:param match:
- :type match:
- :param yield_parent:
- :type yield_parent:
:return:
- :rtype:
"""
- if not match or match.value == "":
- return False
-
- pattern_value = get_first_defined(self.values, [match.name, '__parent__', None],
- self._default_value)
- if pattern_value:
- match.value = pattern_value
+ match.match_index = match_index
- if yield_parent or self.format_all:
- match.formatter = get_first_defined(self.formatters, [match.name, '__parent__', None],
- self._default_formatter)
- if yield_parent or self.validate_all:
- validator = get_first_defined(self.validators, [match.name, '__parent__', None],
- self._default_validator)
- if validator and not validator(match):
- return False
- return True
-
- def _match_child(self, child, yield_children):
+ def _process_match_private(self, match, child=False):
"""
- Handle a children match
+ Process match privacy from this pattern configuration.
+
+ :param match:
:param child:
- :type child:
- :param yield_children:
- :type yield_children:
:return:
- :rtype:
"""
- if not child or child.value == "":
- return False
- pattern_value = get_first_defined(self.values, [child.name, '__children__', None],
- self._default_value)
+ if match.name and match.name in self.private_names or \
+ not child and self.private_parent or \
+ child and self.private_children:
+ match.private = True
+
+ def _process_match_value(self, match, child=False):
+ """
+ Process match value from this pattern configuration.
+ :param match:
+ :return:
+ """
+ keys = self._match_config_property_keys(match, child=child)
+ pattern_value = get_first_defined(self.values, keys, self._default_value)
if pattern_value:
- child.value = pattern_value
+ match.value = pattern_value
+
+ def _process_match_formatter(self, match, child=False):
+ """
+ Process match formatter from this pattern configuration.
+
+ :param match:
+ :return:
+ """
+ included = self._should_include_children if child else self._should_include_parent
+ if included or self.format_all:
+ keys = self._match_config_property_keys(match, child=child)
+ match.formatter = get_first_defined(self.formatters, keys, self._default_formatter)
- if yield_children or self.format_all:
- child.formatter = get_first_defined(self.formatters, [child.name, '__children__', None],
- self._default_formatter)
+ def _process_match_validator(self, match, child=False):
+ """
+ Process match validation from this pattern configuration.
- if yield_children or self.validate_all:
- validator = get_first_defined(self.validators, [child.name, '__children__', None],
- self._default_validator)
- if validator and not validator(child):
+ :param match:
+ :return: True if match is validated by the configured validator, False otherwise.
+ """
+ included = self._should_include_children if child else self._should_include_parent
+ if included or self.validate_all:
+ keys = self._match_config_property_keys(match, child=child)
+ validator = get_first_defined(self.validators, keys, self._default_validator)
+ if validator and not validator(match):
return False
return True
- def matches(self, input_string, context=None, with_raw_matches=False):
+ def _process_match(self, match, match_index, child=False):
"""
- Computes all matches for a given input
+ Process match from this pattern by setting all properties from defined configuration
+ (index, private, value, formatter, validator, ...).
- :param input_string: the string to parse
- :type input_string: str
- :param context: the context
- :type context: dict
- :param with_raw_matches: should return details
- :type with_raw_matches: dict
- :return: matches based on input_string for this pattern
- :rtype: iterator[Match]
+ :param match:
+ :type match:
+ :return: True if match is validated by the configured validator, False otherwise.
+ :rtype:
"""
- # pylint: disable=too-many-branches
+ self._process_match_index(match, match_index)
+ self._process_match_private(match, child)
+ self._process_match_value(match, child)
+ self._process_match_formatter(match, child)
+ return self._process_match_validator(match, child)
+
+ @staticmethod
+ def _process_match_processor(match, processor):
+ if processor:
+ ret = processor(match)
+ if ret is not None:
+ return ret
+ return match
+
+ def _process_matches(self, match, match_index):
+ """
+ Process and generate all matches for the given unprocessed match.
+ :param match:
+ :param match_index:
+ :return: Process and dispatched matches.
+ """
+ match = self._process_match_processor(match, self.pre_match_processor)
+ if not match:
+ return
- matches = []
- raw_matches = []
- for pattern in self.patterns:
- yield_parent = self._yield_parent()
- match_index = -1
- for match in self._match(pattern, input_string, context):
- match_index += 1
- match.match_index = match_index
- raw_matches.append(match)
- yield_children = self._yield_children(match)
- if not self._match_parent(match, yield_parent):
- continue
- validated = True
- for child in match.children:
- if not self._match_child(child, yield_children):
- validated = False
- break
- if validated:
- if self.private_parent:
- match.private = True
- if self.private_children:
- for child in match.children:
- child.private = True
- if yield_parent or self.private_parent:
- matches.append(match)
- if yield_children or self.private_children:
- for child in match.children:
- child.match_index = match_index
- matches.append(child)
- matches = self._matches_post_process(matches)
- self._matches_privatize(matches)
- self._matches_ignore(matches)
- if with_raw_matches:
- return matches, raw_matches
- return matches
+ if not self._process_match(match, match_index):
+ return
+
+ for child in match.children:
+ if not self._process_match(child, match_index, child=True):
+ return
+
+ match = self._process_match_processor(match, self.post_match_processor)
+ if not match:
+ return
- def _matches_post_process(self, matches):
+ if (self._should_include_parent or self.private_parent) and match.name not in self.ignore_names:
+ yield match
+ if self._should_include_children or self.private_children:
+ children = [x for x in match.children if x.name not in self.ignore_names]
+ for child in children:
+ yield child
+
+ def _post_process_matches(self, matches):
"""
Post process matches with user defined function
:param matches:
@@ -246,32 +333,6 @@ class Pattern(object):
return self.post_processor(matches, self)
return matches
- def _matches_privatize(self, matches):
- """
- Mark matches included in private_names with private flag.
- :param matches:
- :type matches:
- :return:
- :rtype:
- """
- if self.private_names:
- for match in matches:
- if match.name in self.private_names:
- match.private = True
-
- def _matches_ignore(self, matches):
- """
- Ignore matches included in ignore_names.
- :param matches:
- :type matches:
- :return:
- :rtype:
- """
- if self.ignore_names:
- for match in list(matches):
- if match.name in self.ignore_names:
- matches.remove(match)
-
@abstractproperty
def patterns(self): # pragma: no cover
"""
@@ -306,7 +367,7 @@ class Pattern(object):
@abstractmethod
def _match(self, pattern, input_string, context=None): # pragma: no cover
"""
- Computes all matches for a given pattern and input
+ Computes all unprocess matches for a given pattern and input.
:param pattern: the pattern to use
:param input_string: the string to parse
@@ -350,7 +411,9 @@ class StringPattern(Pattern):
def _match(self, pattern, input_string, context=None):
for index in find_all(input_string, pattern, **self._kwargs):
- yield Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
+ match = Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
+ if match:
+ yield match
class RePattern(Pattern):
@@ -411,15 +474,18 @@ class RePattern(Pattern):
for start, end in match_object.spans(i):
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
input_string=input_string, **self._children_match_kwargs)
- main_match.children.append(child_match)
+ if child_match:
+ main_match.children.append(child_match)
else:
start, end = match_object.span(i)
if start > -1 and end > -1:
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
input_string=input_string, **self._children_match_kwargs)
- main_match.children.append(child_match)
+ if child_match:
+ main_match.children.append(child_match)
- yield main_match
+ if main_match:
+ yield main_match
class FunctionalPattern(Pattern):
@@ -457,14 +523,18 @@ class FunctionalPattern(Pattern):
if self._match_kwargs:
options = self._match_kwargs.copy()
options.update(args)
- yield Match(pattern=self, input_string=input_string, **options)
+ match = Match(pattern=self, input_string=input_string, **options)
+ if match:
+ yield match
else:
kwargs = self._match_kwargs
if isinstance(args[-1], dict):
kwargs = dict(kwargs)
kwargs.update(args[-1])
args = args[:-1]
- yield Match(*args, pattern=self, input_string=input_string, **kwargs)
+ match = Match(*args, pattern=self, input_string=input_string, **kwargs)
+ if match:
+ yield match
def filter_match_kwargs(kwargs, children=False):
diff --git a/libs/rebulk/processors.py b/libs/rebulk/processors.py
index b9fa52b43..6a4f0bab4 100644
--- a/libs/rebulk/processors.py
+++ b/libs/rebulk/processors.py
@@ -30,7 +30,7 @@ def _default_conflict_solver(match, conflicting_match):
"""
if len(conflicting_match.initiator) < len(match.initiator):
return conflicting_match
- elif len(match.initiator) < len(conflicting_match.initiator):
+ if len(match.initiator) < len(conflicting_match.initiator):
return match
return None
diff --git a/libs/rebulk/rebulk.py b/libs/rebulk/rebulk.py
index 42fb6440c..a6a0fd2fb 100644
--- a/libs/rebulk/rebulk.py
+++ b/libs/rebulk/rebulk.py
@@ -5,20 +5,16 @@ Entry point functions and classes for Rebulk
"""
from logging import getLogger
+from .builder import Builder
from .match import Matches
-
-from .pattern import RePattern, StringPattern, FunctionalPattern
-from .chain import Chain
-
from .processors import ConflictSolver, PrivateRemover
-from .loose import set_defaults
-from .utils import extend_safe
from .rules import Rules
+from .utils import extend_safe
log = getLogger(__name__).log
-class Rebulk(object):
+class Rebulk(Builder):
r"""
Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to
chain ``string``, ``regex``, and ``functional`` methods to define various patterns types.
@@ -44,6 +40,7 @@ class Rebulk(object):
>>> bulk.matches("the lakers are from la")
[<lakers:(4, 10)>, <la:(20, 22)>]
"""
+
# pylint:disable=protected-access
def __init__(self, disabled=lambda context: False, default_rules=True):
@@ -56,6 +53,7 @@ class Rebulk(object):
:return:
:rtype:
"""
+ super(Rebulk, self).__init__()
if not callable(disabled):
self.disabled = lambda context: disabled
else:
@@ -64,11 +62,6 @@ class Rebulk(object):
self._rules = Rules()
if default_rules:
self.rules(ConflictSolver, PrivateRemover)
- self._defaults = {}
- self._regex_defaults = {}
- self._string_defaults = {}
- self._functional_defaults = {}
- self._chain_defaults = {}
self._rebulks = []
def pattern(self, *pattern):
@@ -83,172 +76,6 @@ class Rebulk(object):
self._patterns.extend(pattern)
return self
- def defaults(self, **kwargs):
- """
- Define default keyword arguments for all patterns
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- self._defaults = kwargs
- return self
-
- def regex_defaults(self, **kwargs):
- """
- Define default keyword arguments for functional patterns.
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- self._regex_defaults = kwargs
- return self
-
- def regex(self, *pattern, **kwargs):
- """
- Add re pattern
-
- :param pattern:
- :type pattern:
- :return: self
- :rtype: Rebulk
- """
- self.pattern(self.build_re(*pattern, **kwargs))
- return self
-
- def build_re(self, *pattern, **kwargs):
- """
- Builds a new regular expression pattern
-
- :param pattern:
- :type pattern:
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- set_defaults(self._regex_defaults, kwargs)
- set_defaults(self._defaults, kwargs)
- return RePattern(*pattern, **kwargs)
-
- def string_defaults(self, **kwargs):
- """
- Define default keyword arguments for string patterns.
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- self._string_defaults = kwargs
- return self
-
- def string(self, *pattern, **kwargs):
- """
- Add string pattern
-
- :param pattern:
- :type pattern:
- :return: self
- :rtype: Rebulk
- """
- self.pattern(self.build_string(*pattern, **kwargs))
- return self
-
- def build_string(self, *pattern, **kwargs):
- """
- Builds a new string pattern
-
- :param pattern:
- :type pattern:
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- set_defaults(self._string_defaults, kwargs)
- set_defaults(self._defaults, kwargs)
- return StringPattern(*pattern, **kwargs)
-
- def functional_defaults(self, **kwargs):
- """
- Define default keyword arguments for functional patterns.
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- self._functional_defaults = kwargs
- return self
-
- def functional(self, *pattern, **kwargs):
- """
- Add functional pattern
-
- :param pattern:
- :type pattern:
- :return: self
- :rtype: Rebulk
- """
- self.pattern(self.build_functional(*pattern, **kwargs))
- return self
-
- def build_functional(self, *pattern, **kwargs):
- """
- Builds a new functional pattern
-
- :param pattern:
- :type pattern:
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- set_defaults(self._functional_defaults, kwargs)
- set_defaults(self._defaults, kwargs)
- return FunctionalPattern(*pattern, **kwargs)
-
- def chain_defaults(self, **kwargs):
- """
- Define default keyword arguments for patterns chain.
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- self._chain_defaults = kwargs
- return self
-
- def chain(self, **kwargs):
- """
- Add patterns chain, using configuration of this rebulk
-
- :param pattern:
- :type pattern:
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- chain = self.build_chain(**kwargs)
- self._patterns.append(chain)
- return chain
-
- def build_chain(self, **kwargs):
- """
- Builds a new patterns chain
-
- :param pattern:
- :type pattern:
- :param kwargs:
- :type kwargs:
- :return:
- :rtype:
- """
- set_defaults(self._chain_defaults, kwargs)
- set_defaults(self._defaults, kwargs)
- return Chain(self, **kwargs)
-
def rules(self, *rules):
"""
Add rules as a module, class or instance.
diff --git a/libs/rebulk/rules.py b/libs/rebulk/rules.py
index 19b563ab8..2514904f4 100644
--- a/libs/rebulk/rules.py
+++ b/libs/rebulk/rules.py
@@ -140,10 +140,9 @@ class RemoveMatch(Consequence): # pylint: disable=abstract-method
matches.remove(match)
ret.append(match)
return ret
- else:
- if when_response in matches:
- matches.remove(when_response)
- return when_response
+ if when_response in matches:
+ matches.remove(when_response)
+ return when_response
class AppendMatch(Consequence): # pylint: disable=abstract-method
@@ -164,12 +163,11 @@ class AppendMatch(Consequence): # pylint: disable=abstract-method
matches.append(match)
ret.append(match)
return ret
- else:
- if self.match_name:
- when_response.name = self.match_name
- if when_response not in matches:
- matches.append(when_response)
- return when_response
+ if self.match_name:
+ when_response.name = self.match_name
+ if when_response not in matches:
+ matches.append(when_response)
+ return when_response
class RenameMatch(Consequence): # pylint: disable=abstract-method
diff --git a/libs/rebulk/test/test_chain.py b/libs/rebulk/test/test_chain.py
index 2715abc25..f39955465 100644
--- a/libs/rebulk/test/test_chain.py
+++ b/libs/rebulk/test/test_chain.py
@@ -2,11 +2,11 @@
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, no-member, len-as-condition
import re
-
from functools import partial
+from rebulk.pattern import FunctionalPattern, StringPattern, RePattern
+from ..rebulk import Rebulk
from ..validators import chars_surround
-from ..rebulk import Rebulk, FunctionalPattern, RePattern, StringPattern
def test_chain_close():
@@ -63,18 +63,61 @@ def test_build_chain():
def test_chain_defaults():
rebulk = Rebulk()
- rebulk.defaults(validator=lambda x: True, ignore_names=['testIgnore'], children=True)
+ rebulk.defaults(validator=lambda x: x.value.startswith('t'), ignore_names=['testIgnore'], children=True)
- rebulk.chain()\
+ rebulk.chain() \
.regex("(?P<test>test)") \
.regex(" ").repeater("*") \
+ .regex("(?P<best>best)") \
+ .regex(" ").repeater("*") \
.regex("(?P<testIgnore>testIgnore)")
- matches = rebulk.matches("test testIgnore")
+ matches = rebulk.matches("test best testIgnore")
assert len(matches) == 1
assert matches[0].name == "test"
+def test_chain_with_validators():
+ def chain_validator(match):
+ return match.value.startswith('t') and match.value.endswith('t')
+
+ def default_validator(match):
+ return match.value.startswith('t') and match.value.endswith('g')
+
+ def custom_validator(match):
+ return match.value.startswith('b') and match.value.endswith('t')
+
+ rebulk = Rebulk()
+ rebulk.defaults(children=True, validator=default_validator)
+
+ rebulk.chain(validate_all=True, validator={'__parent__': chain_validator}) \
+ .regex("(?P<test>testing)", validator=default_validator).repeater("+") \
+ .regex(" ").repeater("+") \
+ .regex("(?P<best>best)", validator=custom_validator).repeater("+")
+ matches = rebulk.matches("some testing best end")
+
+ assert len(matches) == 2
+ assert matches[0].name == "test"
+ assert matches[1].name == "best"
+
+
+def test_matches_docs():
+ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE) \
+ .defaults(children=True, formatter={'episode': int, 'version': int}) \
+ .chain() \
+ .regex(r'e(?P<episode>\d{1,4})').repeater(1) \
+ .regex(r'v(?P<version>\d+)').repeater('?') \
+ .regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
+ .close() # .repeater(1) could be omitted as it's the default behavior
+
+ result = rebulk.matches("This is E14v2-15-16-17").to_dict() # converts matches to dict
+
+ assert 'episode' in result
+ assert result['episode'] == [14, 15, 16, 17]
+ assert 'version' in result
+ assert result['version'] == 2
+
+
def test_matches():
rebulk = Rebulk()
@@ -144,8 +187,8 @@ def test_matches():
def test_matches_2():
rebulk = Rebulk() \
.regex_defaults(flags=re.IGNORECASE) \
- .chain(children=True, formatter={'episode': int}) \
- .defaults(formatter={'version': int}) \
+ .defaults(children=True, formatter={'episode': int, 'version': int}) \
+ .chain() \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'[ex-](?P<episode>\d{1,4})').repeater('*') \
@@ -173,25 +216,32 @@ def test_matches_2():
def test_matches_3():
alt_dash = (r'@', r'[\W_]') # abbreviation
- rebulk = Rebulk()
+ match_names = ['season', 'episode']
+ other_names = ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']
- rebulk.chain(formatter={'season': int, 'episode': int},
- tags=['SxxExx'],
- abbreviations=[alt_dash],
- private_names=['episodeSeparator', 'seasonSeparator'],
- children=True,
- private_parent=True,
- conflict_solver=lambda match, other: match
- if match.name in ['season', 'episode'] and other.name in
- ['screen_size', 'video_codec', 'audio_codec',
- 'audio_channels', 'container', 'date']
- else '__default__') \
+ rebulk = Rebulk()
+ rebulk.defaults(formatter={'season': int, 'episode': int},
+ tags=['SxxExx'],
+ abbreviations=[alt_dash],
+ private_names=['episodeSeparator', 'seasonSeparator'],
+ children=True,
+ private_parent=True,
+ conflict_solver=lambda match, other: match
+ if match.name in match_names and other.name in other_names
+ else '__default__')
+
+ rebulk.chain() \
+ .defaults(children=True, private_parent=True) \
.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \
.regex(r'(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+)').repeater('*') \
+ .close() \
.chain() \
+ .defaults(children=True, private_parent=True) \
.regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \
.regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \
+ .close() \
.chain() \
+ .defaults(children=True, private_parent=True) \
.regex(r'S(?P<season>\d+)') \
.regex(r'(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+)').repeater('*')
@@ -240,11 +290,11 @@ def test_matches_4():
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
- rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
- validator={'__parent__': seps_surround}, children=True, private_parent=True)
+ rebulk.defaults(validate_all=True, children=True)
+ rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], private_parent=True)
- rebulk.chain(formatter={'episode': int, 'version': int}) \
- .defaults(validator=None) \
+ rebulk.chain(validator={'__parent__': seps_surround}, formatter={'episode': int, 'version': int}) \
+ .defaults(formatter={'episode': int, 'version': int}) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*')
@@ -262,11 +312,11 @@ def test_matches_5():
rebulk = Rebulk()
rebulk.regex_defaults(flags=re.IGNORECASE)
- rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
- validator={'__parent__': seps_surround}, children=True, private_parent=True)
- rebulk.chain(formatter={'episode': int, 'version': int}) \
- .defaults(validator=None) \
+ rebulk.chain(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True,
+ validator={'__parent__': seps_surround}, children=True, private_parent=True,
+ formatter={'episode': int, 'version': int}) \
+ .defaults(children=True, private_parent=True) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
@@ -288,7 +338,7 @@ def test_matches_6():
validator=None, children=True, private_parent=True)
rebulk.chain(formatter={'episode': int, 'version': int}) \
- .defaults(validator=None) \
+ .defaults(children=True, private_parent=True) \
.regex(r'e(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('{2,3}')
diff --git a/libs/rebulk/test/test_debug.py b/libs/rebulk/test/test_debug.py
index cd9e556df..8abdac5f4 100644
--- a/libs/rebulk/test/test_debug.py
+++ b/libs/rebulk/test/test_debug.py
@@ -2,19 +2,15 @@
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, protected-access, invalid-name, len-as-condition
+from .default_rules_module import RuleRemove0
+from .. import debug
+from ..match import Match
from ..pattern import StringPattern
from ..rebulk import Rebulk
-from ..match import Match
-from .. import debug
-from .default_rules_module import RuleRemove0
class TestDebug(object):
-
-
- #request.addfinalizer(disable_debug)
-
-
+ # request.addfinalizer(disable_debug)
debug.DEBUG = True
pattern = StringPattern(1, 3, value="es")
@@ -38,43 +34,43 @@ class TestDebug(object):
debug.DEBUG = False
def test_pattern(self):
- assert self.pattern.defined_at.lineno == 20
+ assert self.pattern.defined_at.lineno > 0
assert self.pattern.defined_at.name == 'rebulk.test.test_debug'
assert self.pattern.defined_at.filename.endswith('test_debug.py')
- assert str(self.pattern.defined_at) == 'test_debug.py#L20'
- assert repr(self.pattern) == '<StringPattern@test_debug.py#L20:(1, 3)>'
+ assert str(self.pattern.defined_at).startswith('test_debug.py#L')
+ assert repr(self.pattern).startswith('<StringPattern@test_debug.py#L')
def test_match(self):
- assert self.match.defined_at.lineno == 22
+ assert self.match.defined_at.lineno > 0
assert self.match.defined_at.name == 'rebulk.test.test_debug'
assert self.match.defined_at.filename.endswith('test_debug.py')
- assert str(self.match.defined_at) == 'test_debug.py#L22'
+ assert str(self.match.defined_at).startswith('test_debug.py#L')
def test_rule(self):
- assert self.rule.defined_at.lineno == 23
+ assert self.rule.defined_at.lineno > 0
assert self.rule.defined_at.name == 'rebulk.test.test_debug'
assert self.rule.defined_at.filename.endswith('test_debug.py')
- assert str(self.rule.defined_at) == 'test_debug.py#L23'
- assert repr(self.rule) == '<RuleRemove0@test_debug.py#L23>'
+ assert str(self.rule.defined_at).startswith('test_debug.py#L')
+ assert repr(self.rule).startswith('<RuleRemove0@test_debug.py#L')
def test_rebulk(self):
"""
This test fails on travis CI, can't find out why there's 1 line offset ...
"""
- assert self.rebulk._patterns[0].defined_at.lineno in [26, 27]
+ assert self.rebulk._patterns[0].defined_at.lineno > 0
assert self.rebulk._patterns[0].defined_at.name == 'rebulk.test.test_debug'
assert self.rebulk._patterns[0].defined_at.filename.endswith('test_debug.py')
- assert str(self.rebulk._patterns[0].defined_at) in ['test_debug.py#L26', 'test_debug.py#L27']
+ assert str(self.rebulk._patterns[0].defined_at).startswith('test_debug.py#L')
- assert self.rebulk._patterns[1].defined_at.lineno in [27, 28]
+ assert self.rebulk._patterns[1].defined_at.lineno > 0
assert self.rebulk._patterns[1].defined_at.name == 'rebulk.test.test_debug'
assert self.rebulk._patterns[1].defined_at.filename.endswith('test_debug.py')
- assert str(self.rebulk._patterns[1].defined_at) in ['test_debug.py#L27', 'test_debug.py#L28']
+ assert str(self.rebulk._patterns[1].defined_at).startswith('test_debug.py#L')
assert self.matches[0].defined_at == self.rebulk._patterns[0].defined_at
assert self.matches[1].defined_at == self.rebulk._patterns[1].defined_at
diff --git a/libs/rebulk/test/test_match.py b/libs/rebulk/test/test_match.py
index 87273d540..8750733a5 100644
--- a/libs/rebulk/test/test_match.py
+++ b/libs/rebulk/test/test_match.py
@@ -116,6 +116,9 @@ class TestMatchesClass(object):
assert "tag1" in matches.tags
assert "tag2" in matches.tags
+ assert self.match3.tagged("tag1")
+ assert not self.match3.tagged("start")
+
tag1 = matches.tagged("tag1")
assert len(tag1) == 2
assert tag1[0] == self.match2
diff --git a/libs/rebulk/utils.py b/libs/rebulk/utils.py
index 9aaf56d14..85ddd41ec 100644
--- a/libs/rebulk/utils.py
+++ b/libs/rebulk/utils.py
@@ -3,7 +3,10 @@
"""
Various utilities functions
"""
-from collections import MutableSet
+try:
+ from collections.abc import MutableSet
+except ImportError:
+ from collections import MutableSet
from types import GeneratorType
diff --git a/libs/rebulk/validators.py b/libs/rebulk/validators.py
index 5fd3dcb6f..b8959c54c 100644
--- a/libs/rebulk/validators.py
+++ b/libs/rebulk/validators.py
@@ -62,9 +62,20 @@ def validators(*chained_validators):
:return:
:rtype:
"""
+
def validator_chain(match): # pylint:disable=missing-docstring
for chained_validator in chained_validators:
if not chained_validator(match):
return False
return True
+
return validator_chain
+
+
+def allways_true(match): # pylint:disable=unused-argument
+ """
+ A validator which is allways true
+ :param match:
+ :return:
+ """
+ return True