1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
|
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from datetime import datetime
import hashlib
import os
import re
import struct
from six.moves import range
def hash_opensubtitles(video_path):
"""Compute a hash using OpenSubtitles' algorithm.
:param str video_path: path of the video.
:return: the hash.
:rtype: str
"""
bytesize = struct.calcsize(b'<q')
with open(video_path, 'rb') as f:
filesize = os.path.getsize(video_path)
filehash = filesize
if filesize < 65536 * 2:
return
for _ in range(65536 // bytesize):
filebuffer = f.read(bytesize)
(l_value,) = struct.unpack(b'<q', filebuffer)
filehash += l_value
filehash &= 0xFFFFFFFFFFFFFFFF # to remain as 64bit number
f.seek(max(0, filesize - 65536), 0)
for _ in range(65536 // bytesize):
filebuffer = f.read(bytesize)
(l_value,) = struct.unpack(b'<q', filebuffer)
filehash += l_value
filehash &= 0xFFFFFFFFFFFFFFFF
returnedhash = '%016x' % filehash
return returnedhash
def hash_thesubdb(video_path):
"""Compute a hash using TheSubDB's algorithm.
:param str video_path: path of the video.
:return: the hash.
:rtype: str
"""
readsize = 64 * 1024
if os.path.getsize(video_path) < readsize:
return
with open(video_path, 'rb') as f:
data = f.read(readsize)
f.seek(-readsize, os.SEEK_END)
data += f.read(readsize)
return hashlib.md5(data).hexdigest()
def hash_napiprojekt(video_path):
"""Compute a hash using NapiProjekt's algorithm.
:param str video_path: path of the video.
:return: the hash.
:rtype: str
"""
readsize = 1024 * 1024 * 10
with open(video_path, 'rb') as f:
data = f.read(readsize)
return hashlib.md5(data).hexdigest()
def hash_shooter(video_path):
"""Compute a hash using Shooter's algorithm
:param string video_path: path of the video
:return: the hash
:rtype: string
"""
filesize = os.path.getsize(video_path)
readsize = 4096
if os.path.getsize(video_path) < readsize * 2:
return None
offsets = (readsize, filesize // 3 * 2, filesize // 3, filesize - readsize * 2)
filehash = []
with open(video_path, 'rb') as f:
for offset in offsets:
f.seek(offset)
filehash.append(hashlib.md5(f.read(readsize)).hexdigest())
return ';'.join(filehash)
def sanitize(string, ignore_characters=None):
"""Sanitize a string to strip special characters.
:param str string: the string to sanitize.
:param set ignore_characters: characters to ignore.
:return: the sanitized string.
:rtype: str
"""
# only deal with strings
if string is None:
return
ignore_characters = ignore_characters or set()
# replace some characters with one space
characters = {'-', ':', '(', ')', '.'} - ignore_characters
if characters:
string = re.sub(r'[%s]' % re.escape(''.join(characters)), ' ', string)
# remove some characters
characters = {'\''} - ignore_characters
if characters:
string = re.sub(r'[%s]' % re.escape(''.join(characters)), '', string)
# replace multiple spaces with one
string = re.sub(r'\s+', ' ', string)
# strip and lower case
return string.strip().lower()
def sanitize_release_group(string):
"""Sanitize a `release_group` string to remove content in square brackets.
:param str string: the release group to sanitize.
:return: the sanitized release group.
:rtype: str
"""
# only deal with strings
if string is None:
return
# remove content in square brackets
string = re.sub(r'\[\w+\]', '', string)
# strip and upper case
return string.strip().upper()
def timestamp(date):
"""Get the timestamp of the `date`, python2/3 compatible
:param datetime.datetime date: the utc date.
:return: the timestamp of the date.
:rtype: float
"""
return (date - datetime(1970, 1, 1)).total_seconds()
|