aboutsummaryrefslogtreecommitdiffhomepage
path: root/libs/guessit/rules/properties/release_group.py
blob: ecff808b445fb6ed6773f251678b80df9ea6b99f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
release_group property
"""
import copy

from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
from rebulk.match import Match

from ..common import seps
from ..common.comparators import marker_sorted
from ..common.expected import build_expected_function
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import int_coercable, seps_surround
from ..properties.title import TitleFromPosition


def release_group(config):
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    forbidden_groupnames = config['forbidden_names']

    groupname_ignore_seps = config['ignored_seps']
    groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps])

    def clean_groupname(string):
        """
        Removes and strip separators from input_string
        :param string:
        :type string:
        :return:
        :rtype:
        """
        string = string.strip(groupname_seps)
        if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \
                and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps):
            string = string.strip(groupname_ignore_seps)
        for forbidden in forbidden_groupnames:
            if string.lower().startswith(forbidden) and string[len(forbidden):len(forbidden) + 1] in seps:
                string = string[len(forbidden):]
                string = string.strip(groupname_seps)
            if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
                string = string[:len(forbidden)]
                string = string.strip(groupname_seps)
        return string.strip()

    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))

    expected_group = build_expected_function('expected_group')

    rebulk.functional(expected_group, name='release_group', tags=['expected'],
                      validator=seps_surround,
                      conflict_solver=lambda match, other: other,
                      disabled=lambda context: not context.get('expected_group'))

    return rebulk.rules(
        DashSeparatedReleaseGroup(clean_groupname),
        SceneReleaseGroup(clean_groupname),
        AnimeReleaseGroup
    )


_scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'audio_profile', 'video_profile',
                         'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
                         'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')

_scene_previous_tags = ('release-group-prefix',)

_scene_no_previous_tags = ('no-release-group-prefix',)


class DashSeparatedReleaseGroup(Rule):
    """
    Detect dash separated release groups that might appear at the end or at the beginning of a release name.

    Series.S01E02.Pilot.DVDRip.x264-CS.mkv
        release_group: CS
    abc-the.title.name.1983.1080p.bluray.x264.mkv
        release_group: abc

    At the end: Release groups should be dash-separated and shouldn't contain spaces nor
    appear in a group with other matches. The preceding matches should be separated by dot.
    If a release group is found, the conflicting matches are removed.

    At the beginning: Release groups should be dash-separated and shouldn't contain spaces nor appear in a group.
    It should be followed by a hole with dot-separated words.
    Detection only happens if no matches exist at the beginning.
    """
    consequence = [RemoveMatch, AppendMatch]

    def __init__(self, value_formatter):
        """Default constructor."""
        super(DashSeparatedReleaseGroup, self).__init__()
        self.value_formatter = value_formatter

    @classmethod
    def is_valid(cls, matches, candidate, start, end, at_end):  # pylint:disable=inconsistent-return-statements
        """
        Whether a candidate is a valid release group.
        """
        if not at_end:
            if len(candidate.value) <= 1:
                return False

            if matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group'):
                return False

            first_hole = matches.holes(candidate.end, end, predicate=lambda m: m.start == candidate.end, index=0)
            if not first_hole:
                return False

            raw_value = first_hole.raw
            return raw_value[0] == '-' and '-' not in raw_value[1:] and '.' in raw_value and ' ' not in raw_value

        group = matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group', index=0)
        if group and matches.at_match(group, predicate=lambda m: not m.private and m.span != candidate.span):
            return False

        count = 0
        match = candidate
        while match:
            current = matches.range(start,
                                    match.start,
                                    index=-1,
                                    predicate=lambda m: not m.private and not 'expected' in m.tags)
            if not current:
                break

            separator = match.input_string[current.end:match.start]
            if not separator and match.raw[0] == '-':
                separator = '-'

            match = current

            if count == 0:
                if separator != '-':
                    break

                count += 1
                continue

            if separator == '.':
                return True

    def detect(self, matches, start, end, at_end):  # pylint:disable=inconsistent-return-statements
        """
        Detect release group at the end or at the beginning of a filepart.
        """
        candidate = None
        if at_end:
            container = matches.ending(end, lambda m: m.name == 'container', index=0)
            if container:
                end = container.start

            candidate = matches.ending(end, index=0, predicate=(
                lambda m: not m.private and not (
                    m.name == 'other' and 'not-a-release-group' in m.tags
                ) and '-' not in m.raw and m.raw.strip() == m.raw))

        if not candidate:
            if at_end:
                candidate = matches.holes(start, end, seps=seps, index=-1,
                                          predicate=lambda m: m.end == end and m.raw.strip(seps) and m.raw[0] == '-')
            else:
                candidate = matches.holes(start, end, seps=seps, index=0,
                                          predicate=lambda m: m.start == start and m.raw.strip(seps))

        if candidate and self.is_valid(matches, candidate, start, end, at_end):
            return candidate

    def when(self, matches, context):  # pylint:disable=inconsistent-return-statements
        if matches.named('release_group'):
            return

        to_remove = []
        to_append = []
        for filepart in matches.markers.named('path'):
            candidate = self.detect(matches, filepart.start, filepart.end, True)
            if candidate:
                to_remove.extend(matches.at_match(candidate))
            else:
                candidate = self.detect(matches, filepart.start, filepart.end, False)

            if candidate:
                releasegroup = Match(candidate.start, candidate.end, name='release_group',
                                     formatter=self.value_formatter, input_string=candidate.input_string)

                if releasegroup.value:
                    to_append.append(releasegroup)
                if to_remove or to_append:
                    return to_remove, to_append


class SceneReleaseGroup(Rule):
    """
    Add release_group match in existing matches (scene format).

    Something.XViD-ReleaseGroup.mkv
    """
    dependency = [TitleFromPosition]
    consequence = AppendMatch

    properties = {'release_group': [None]}

    def __init__(self, value_formatter):
        """Default constructor."""
        super(SceneReleaseGroup, self).__init__()
        self.value_formatter = value_formatter

    @staticmethod
    def is_previous_match(match):
        """
        Check if match can precede release_group

        :param match:
        :return:
        """
        return not match.tagged(*_scene_no_previous_tags) if match.name in _scene_previous_names else \
            match.tagged(*_scene_previous_tags)

    def when(self, matches, context):  # pylint:disable=too-many-locals
        # If a release_group is found before, ignore this kind of release_group rule.

        ret = []

        for filepart in marker_sorted(matches.markers.named('path'), matches):
            # pylint:disable=cell-var-from-loop
            start, end = filepart.span
            if matches.named('release_group', predicate=lambda m: m.start >= start and m.end <= end):
                continue

            titles = matches.named('title', predicate=lambda m: m.start >= start and m.end <= end)

            def keep_only_first_title(match):
                """
                Keep only first title from this filepart, as other ones are most likely release group.

                :param match:
                :type match:
                :return:
                :rtype:
                """
                return match in titles[1:]

            last_hole = matches.holes(start, end + 1, formatter=self.value_formatter,
                                      ignore=keep_only_first_title,
                                      predicate=lambda hole: cleanup(hole.value), index=-1)

            if last_hole:
                def previous_match_filter(match):
                    """
                    Filter to apply to find previous match

                    :param match:
                    :type match:
                    :return:
                    :rtype:
                    """

                    if match.start < filepart.start:
                        return False
                    return not match.private or self.is_previous_match(match)

                previous_match = matches.previous(last_hole,
                                                  previous_match_filter,
                                                  index=0)
                if previous_match and (self.is_previous_match(previous_match)) and \
                        not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
                        and not int_coercable(last_hole.value.strip(seps)):

                    last_hole.name = 'release_group'
                    last_hole.tags = ['scene']

                    # if hole is inside a group marker with same value, remove [](){} ...
                    group = matches.markers.at_match(last_hole, lambda marker: marker.name == 'group', 0)
                    if group:
                        group.formatter = self.value_formatter
                        if group.value == last_hole.value:
                            last_hole.start = group.start + 1
                            last_hole.end = group.end - 1
                            last_hole.tags = ['anime']

                    ignored_matches = matches.range(last_hole.start, last_hole.end, keep_only_first_title)

                    for ignored_match in ignored_matches:
                        matches.remove(ignored_match)

                    ret.append(last_hole)
        return ret


class AnimeReleaseGroup(Rule):
    """
    Add release_group match in existing matches (anime format)
    ...[ReleaseGroup] Something.mkv
    """
    dependency = [SceneReleaseGroup, TitleFromPosition]
    consequence = [RemoveMatch, AppendMatch]

    properties = {'release_group': [None]}

    def when(self, matches, context):
        to_remove = []
        to_append = []

        # If a release_group is found before, ignore this kind of release_group rule.
        if matches.named('release_group'):
            return False

        if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
            # This doesn't seems to be an anime, and we already found another release_group.
            return False

        for filepart in marker_sorted(matches.markers.named('path'), matches):

            # pylint:disable=bad-continuation
            empty_group = matches.markers.range(filepart.start,
                                                filepart.end,
                                                lambda marker: (marker.name == 'group'
                                                                and not matches.range(marker.start, marker.end,
                                                                                      lambda m:
                                                                                      'weak-language' not in m.tags)
                                                                and marker.value.strip(seps)
                                                                and not int_coercable(marker.value.strip(seps))), 0)

            if empty_group:
                group = copy.copy(empty_group)
                group.marker = False
                group.raw_start += 1
                group.raw_end -= 1
                group.tags = ['anime']
                group.name = 'release_group'
                to_append.append(group)
                to_remove.extend(matches.range(empty_group.start, empty_group.end,
                                               lambda m: 'weak-language' in m.tags))

        if to_remove or to_append:
            return to_remove, to_append
        return False