├── LICENSE
├── README.md
├── __main__.py
└── requirements.txt


/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <https://unlicense.org>
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Free copyright filter to fulfill EU Copyright Directive
 2 | 
 3 | ### Install dependencies
 4 | 
 5 | The python client depends on `opencv-contrib-python` or `opencv-contrib-python-headless`
 6 | 
 7 | ```
 8 | git clone https://github.com/framespot/client-py.git
 9 | cd client-py
10 | pip install -r requirements.txt
11 | ```
12 | 
13 | ### Inference copyright
14 | 
15 | ```
16 | python . --verbose /path/to/movie.mp4
17 | python . --verbose /path/to/stockphoto.jpg
18 | ```
19 | 
20 | ### Example result
21 | 
22 | ```JSON
23 | [{
24 |   "uri": "https://www.imdb.com/title/tt2380307/",
25 |   "ids": {"imdb": "tt2380307", "tmdb": "movie/354912"},
26 |   "title": "Coco",
27 |   "year": "2017",
28 |   "genres": ["Animation","Family","Comedy","Adventure","Fantasy"],
29 |   "companies": ["Pixar","Walt Disney Pictures"],
30 |   "homepage": "https://www.pixar.com/feature-films/coco",
31 |   "poster": "https://www.themoviedb.org/t/p/original/gGEsBPAijhVUFoiNpgZXqRVWJt2.jpg",
32 |   "frames": [{
33 |     "type": "movie",
34 |     "season": null,
35 |     "episode": null,
36 |     "offset": 1855,
37 |     "hamming": 8,
38 |     "matrix": [[ 1.001, 0.008,-0.001],
39 |                [-0.008, 1.001, 0.004]]
40 |   }]
41 | }]
42 | ```
43 | 


--------------------------------------------------------------------------------
/__main__.py:
--------------------------------------------------------------------------------
  1 | import sys, os, traceback
  2 | import math, io
  3 | import itertools, getopt
  4 | import json, zlib, random
  5 | import urllib.request
  6 | # pip install opencv-contrib-python
  7 | import numpy, cv2
  8 | 
  9 | 
 10 | # Result from yielded frame
 11 | def inference( frame_generator ):
 12 | 
 13 |     # Guess copyright
 14 |     grouped, results = [], []
 15 |     previous = None
 16 |     for framecounter, (frame, offset) in enumerate(frame_generator):
 17 |         assert type(frame) is numpy.ndarray and frame.shape[2] == 3
 18 |         # ask
 19 |         lookup = postframe(frame)
 20 |         if lookup is None:
 21 |             continue
 22 |         # yield server-result
 23 |         if previous:
 24 |             yield *previous, None
 25 |         previous = frame, offset, lookup
 26 |         # ignore if frame too common
 27 |         if len(lookup) >= 20:
 28 |             continue
 29 |         # whitelist result if frame also in trailer/teaser/...
 30 |         lookup_whitelist = [result for result in lookup if not any(True for f in result['frames'] if f['type'] == 'trailer')]
 31 |         if not lookup_whitelist:
 32 |             continue
 33 |         # group by uri (3 matches -> unlikely false positive)
 34 |         results.append(lookup_whitelist)
 35 |         grouped = [list(group) for k, group in
 36 |                    itertools.groupby(sorted([item for result in results for item in result], key=lambda x: x['uri']), lambda x: x['uri'])]
 37 |         grouped.sort(key=lambda x:len(x), reverse=True)
 38 |         if len(set(frame['offset'] for result in grouped[0] for frame in result['frames'] if frame['matrix'] is not None)) >= 3:
 39 |             break
 40 |     if not previous:
 41 |         return
 42 | 
 43 |     # To filter, or not to filter: that is the question...
 44 |     copyrights = []
 45 |     for group in grouped:
 46 |         copyright = False
 47 |         # video: accurate if 3 different frame-offset
 48 |         if len(set(frame['offset'] for result in group for frame in result['frames'] if frame['matrix'] is not None)) >= 3:
 49 |             copyright = True
 50 |         # still-image or short-video: if matches 'image' or translation-matrix + perceptual-hash
 51 |         elif framecounter <= 2 and any(True for result in group for frame in result['frames'] if
 52 |                 frame['type'] == 'image' or (frame['matrix'] is not None and frame['hamming'] is not None)):
 53 |             copyright = True
 54 |         if copyright:
 55 |             copyrights.append(group[0])
 56 |     yield *previous, copyrights or None
 57 | 
 58 | 
 59 | # ask server
 60 | def postframe( frame ):
 61 | 
 62 |     # filepath or numpy.array
 63 |     assert type(frame) is numpy.ndarray
 64 |     res, pngimage = cv2.imencode('.png', frame)
 65 |     content_type = 'image/png'
 66 |     filename = 'frame.png'
 67 |     filedata = pngimage.tobytes()
 68 | 
 69 |     # https://bugs.python.org/issue3244
 70 |     url = 'https://framespot.com/'
 71 |     boundary = ''.join(random.choices('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', k=70)) # RFC2046: boundary must be no longer than 70 characters
 72 |     headers = {
 73 |         'Content-Type': 'multipart/form-data; boundary=%s' % boundary,
 74 |         'Accept': 'application/json',
 75 |         'Accept-Encoding': 'gzip, deflate',
 76 |         'User-Agent': 'framespot-client/1.0',
 77 |     }
 78 |     data = (b'--%s\r\n' % boundary.encode() +
 79 |             b'Content-Disposition: form-data; name="frame"; filename="%s"\r\n' % filename.encode() +
 80 |             b'Content-Type: %s\r\n\r\n' % content_type.encode() +
 81 |             filedata + b'\r\n'
 82 |             b'--%s--\r\n' % boundary.encode())
 83 |     try:
 84 |         request = urllib.request.Request(url, method='POST', headers=headers, data=data)
 85 |         with urllib.request.urlopen(request, timeout=120) as response:
 86 |             result_code = response.getcode()
 87 |             result_url = response.geturl()
 88 |             result_headers = response.info()
 89 |             result_type = result_headers.get_content_type()
 90 |             if result_code != 200:
 91 |                 return None
 92 |             assert result_url == url and result_type == 'application/json'
 93 | 
 94 |             # Uncompress
 95 |             decompressor = None
 96 |             if result_headers.get('Content-Encoding') == 'zlib':
 97 |                 decompressor = zlib.decompressobj()
 98 |             elif result_headers.get('Content-Encoding') == 'gzip':
 99 |                 decompressor = zlib.decompressobj(zlib.MAX_WBITS|16)
100 |             elif result_headers.get('Content-Encoding') == 'deflate':
101 |                 decompressor = zlib.decompressobj(-zlib.MAX_WBITS)
102 |             elif result_headers.get('Content-Encoding'):
103 |                 decompressor = zlib.decompressobj(zlib.MAX_WBITS|32) # automatic header detection
104 |             result_data = b''
105 |             while True:
106 |                 buf = response.read(0x1000)
107 |                 if not buf:
108 |                     break
109 |                 result_data += decompressor.decompress(buf) if decompressor else buf
110 |                 assert len(result_data) < 0x1000000
111 |             if decompressor:
112 |                 result_data += decompressor.flush()
113 |         return json.loads(result_data)
114 | 
115 |     except (urllib.error.HTTPError, urllib.error.URLError):
116 |         traceback.print_exc(file=sys.stderr)
117 |     return None
118 | 
119 | 
120 | # scenecut @ 500 fps
121 | def scenecut(filepath, scene_min=None, scene_max=None, seek=None, duration=None):
122 | 
123 |     kp_detector = cv2.FastFeatureDetector_create()
124 |     kp_descriptor = cv2.ORB_create()
125 |     bf_hamming = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
126 |     pyramid_down = 240
127 |     truncate_keypoints = 256
128 |     min_kpmatched = 8
129 |     bits_kpmatched = 32
130 |     min_keypoints = 256
131 |     # Brief is faster than ORB - but not rotation invariant
132 |     if hasattr(cv2, 'xfeatures2d'):
133 |         kp_descriptor = cv2.xfeatures2d.BriefDescriptorExtractor_create(bytes=16)
134 |         bits_kpmatched = 16
135 | 
136 |     cap = cv2.VideoCapture(filepath)
137 |     if seek is not None:
138 |         cap.set(cv2.CAP_PROP_POS_MSEC, seek)
139 | 
140 |     best_frame = None
141 |     best_offset = None
142 |     best_quality = 0.0
143 |     des_prev = None
144 |     scene_start = (0 if seek is None else seek)
145 |     stop = scene_start + duration if duration else None
146 | 
147 |     while True:
148 |         ret, frame = cap.read()
149 |         cap_time = cap.get(cv2.CAP_PROP_POS_MSEC)
150 |         if not ret or (best_frame is not None and stop is not None and cap_time >= stop):
151 |             if best_frame is not None:
152 |                 yield (best_frame, best_offset)
153 |             break
154 | 
155 |         # Keypoints on simplified frame
156 |         grayframe = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
157 |         width = math.ceil(math.sqrt(pyramid_down*pyramid_down * grayframe.shape[1] / grayframe.shape[0]))
158 |         height = math.ceil(math.sqrt(pyramid_down*pyramid_down * grayframe.shape[0] / grayframe.shape[1]))
159 |         smallframe = cv2.resize(grayframe, (width, height), interpolation=cv2.INTER_AREA)
160 | 
161 |         kpf = kp_detector.detect(smallframe, None)
162 |         kpf = list(kpf) # opencv 4.5.4 replaced list-results with tuples
163 |         if len(kpf) < min_keypoints:
164 |             continue
165 |         kpf.sort(key=lambda kp: kp.response, reverse=True)
166 |         kps, des = kp_descriptor.compute(smallframe, kpf[:truncate_keypoints])
167 | 
168 |         # Best frame within scene
169 |         if des_prev is not None and des is not None:
170 | 
171 |             # Scene cut?
172 |             newscene = False
173 |             if scene_max is not None and scene_start + scene_max < cap_time:
174 |                 newscene = True
175 |             elif scene_min is None or scene_start + scene_min < cap_time:
176 |                 matches = bf_hamming.match(des_prev, des)
177 |                 matched = list(filter(lambda match: match.distance <= bits_kpmatched, matches))
178 |                 if len(matched) < min_kpmatched:
179 |                     newscene = True
180 | 
181 |             # Yield frame
182 |             if newscene and best_frame is not None:
183 |                 yield (best_frame, best_offset)
184 |                 scene_start = cap_time
185 |                 best_frame = None
186 |                 best_offset = None
187 |                 best_quality = 0.0
188 | 
189 |             # Better frame?
190 |             else:
191 |                 quality = sum(kp.response for kp in kps)
192 |                 if best_quality < quality:
193 |                     best_frame = frame
194 |                     best_offset = cap_time
195 |                     best_quality = quality
196 |         des_prev = des
197 | 
198 |     cap.release()
199 | 
200 | 
201 | # main
202 | if __name__ == '__main__':
203 | 
204 |     # Params
205 |     opts, args = getopt.getopt(sys.argv[1:],'s:d:v',['seek=','duration=','min-scene=','max-scene=','verbose'])
206 |     if len(args) != 1:
207 |         print('Usage: python3 . --seek=#sec --duration=#sec /path/to/file', file=sys.stderr)
208 |         sys.exit(os.EX_USAGE)
209 |     filepath = args[0]
210 |     if not os.path.exists(filepath):
211 |         print('File not found:', filepath, file=sys.stderr)
212 |         sys.exit(os.EX_USAGE)
213 |     scene_min, scene_max = 5000, 60000  # scene: [5s..60s]
214 |     seek, duration = None, None
215 |     verbose = False
216 |     for o, a in opts:
217 |         if o in ('-s', '--seek'):
218 |             seek = float(a) * 1000
219 |         elif o in ('-d', '--duration'):
220 |             duration = float(a) * 1000
221 |         elif o in ('min-scene'):
222 |             scene_min = float(a) * 1000
223 |         elif o in ('max-scene'):
224 |             scene_max = float(a) * 1000
225 |         elif o in ('-v','--verbose'):
226 |             verbose = True
227 | 
228 |     if verbose:
229 |         print('Inference:', filepath, 'seek:',seek, 'duration:',duration, 'scene:['+str(scene_min)+':'+str(scene_max)+']', file=sys.stderr)
230 | 
231 |     # Detect video (container only, could also match an audio)
232 |     is_video = False
233 |     with open(filepath, 'rb') as fp:
234 |         buf = bytearray(fp.read(8192))
235 |     # video/mp4 (.mp4) + video/quicktime (.mov) + video/x-m4v (.m4v)
236 |     if len(buf) > 8 and buf[4] == 0x66 and buf[5] == 0x74 and buf[6] == 0x79 and buf[7] == 0x70:
237 |         ftyp_len = int.from_bytes(buf[0:4], byteorder='big')
238 |         if len(buf) > 10 and buf[0] == 0x0 and buf[1] == 0x0 and buf[2] == 0x0 and buf[3] == 0x1C and buf[8] == 0x4D and buf[9] == 0x34 and buf[10] == 0x56:
239 |             is_video = True
240 |         elif len(buf) >= ftyp_len:
241 |             major_brand = buf[8:12].decode(errors='ignore')
242 |             compatible_brands = [buf[i:i+4].decode(errors='ignore') for i in range(16, ftyp_len, 4)]
243 |             if major_brand in ['mp41','mp42','isom','qt  ']:
244 |                 is_video = True
245 |             elif 'mp41' in compatible_brands or 'mp42' in compatible_brands or 'isom' in compatible_brands:
246 |                 is_video = True
247 |     # video/webm (.webm) + video/x-matroska (.mkv)
248 |     elif buf.startswith(b'\x1A\x45\xDF\xA3') and (buf.find(b'\x42\x82\x84webm') > -1 or buf.find(b'\x42\x82\x88matroska') > -1):
249 |         is_video = True
250 |     # video/mpeg (.mpg)
251 |     elif len(buf) > 3 and buf[0] == 0x0 and buf[1] == 0x0 and buf[2] == 0x1 and buf[3] >= 0xb0 and buf[3] <= 0xbf:
252 |         is_video = True
253 |     # video/mp2t (.ts)
254 |     #elif len(buf) > 12 and buf[0] == 0x47 and ...:
255 |     #    is_video = True
256 |     # video/x-msvideo (.avi)
257 |     elif len(buf) > 11 and buf[0] == 0x52 and buf[1] == 0x49 and buf[2] == 0x46 and buf[3] == 0x46 and buf[8] == 0x41 and buf[9] == 0x56 and buf[10] == 0x49 and buf[11] == 0x20:
258 |         is_video = True
259 |     # video/x-ms-wmv (.wmv)
260 |     elif len(buf) > 9 and buf[0] == 0x30 and buf[1] == 0x26 and buf[2] == 0xB2 and buf[3] == 0x75 and buf[4] == 0x8E and buf[5] == 0x66 and buf[6] == 0xCF and buf[7] == 0x11 and buf[8] == 0xA6 and buf[9] == 0xD9:
261 |         is_video = True
262 |     # video/3gpp (.3gp)
263 |     elif len(buf) > 7 and buf[0] == 0x66 and buf[1] == 0x74 and buf[2] == 0x79 and buf[3] == 0x70 and buf[4] == 0x33 and buf[5] == 0x67 and buf[6] == 0x70:
264 |         is_video = True
265 |     # video/x-flv (.flv)
266 |     elif len(buf) > 3 and buf[0] == 0x46 and buf[1] == 0x4C and buf[2] == 0x56 and buf[3] == 0x01:
267 |         is_video = True
268 |     # image/gif (.gif)
269 |     elif len(buf) > 2 and buf[0] == 0x47 and buf[1] == 0x49 and buf[2] == 0x46:
270 |         if b'\x21\xFF\x0B\x4E\x45\x54\x53\x43\x41\x50\x45\x32\x2E\x30' in buf:  # animated
271 |             is_video = True
272 |     # image/webp (.webp)
273 |     elif len(buf) > 16 and buf[0] == 0x52 and buf[1] == 0x49 and buf[2] == 0x46 and buf[3] == 0x46 and buf[8] == 0x57 and buf[9] == 0x45 and buf[10] == 0x42 and buf[11] == 0x50 and buf[12] == 0x56 and buf[13] == 0x50:
274 |         if buf[12:16] == b'VP8X' and buf[16] & 2 != 0:  # animated
275 |             is_video = True
276 | 
277 |     # Frame generator
278 |     if is_video:
279 |         frame_generator = scenecut(filepath, scene_min=scene_min, scene_max=scene_max, seek=seek, duration=duration)
280 |     else:
281 |         frame = cv2.imread( filepath, cv2.IMREAD_UNCHANGED )
282 |         if frame is None or frame.dtype != numpy.uint8 or len(frame.shape) == 2 or frame.shape[2] != 3:
283 |             try:
284 |                 import PIL.Image
285 |                 with PIL.Image.open(filepath) as img:
286 |                     if img.mode != 'RGB':
287 |                         white_background = PIL.Image.new('RGBA', img.size, (255,255,255))
288 |                         img = PIL.Image.alpha_composite(white_background, img.convert('RGBA')).convert('RGB')
289 |                     frame = numpy.array(img, dtype=numpy.uint8)[...,::-1].copy() # RGB->BGR
290 |             except ImportError:
291 |                 frame = None
292 |             except PIL.UnidentifiedImageError:
293 |                 frame = None
294 |         frame_generator = [(frame, None)] if frame is not None else []
295 | 
296 |     # Lookup frames
297 |     got_frames = False
298 |     for frame, offset, lookup, copyrights in inference(frame_generator):
299 |         got_frames = True
300 |         if verbose:
301 |             label = '{:02d}:{:02d}:{:02d}'.format(int(offset/3600000) % 24,int(offset/60000) % 60,int(offset/1000) % 60) if type(offset) in [float,int] else offset
302 |             print(label, 'response:', json.dumps(lookup), file=sys.stderr)
303 |             frameoffset = frame
304 |             cv2.putText(frameoffset,label, (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 1)
305 |             cv2.imshow('frame',frameoffset)
306 |             if cv2.waitKey(1) & 0xFF == ord('q'):
307 |                 quit()
308 |         if copyrights:
309 |             print(json.dumps(copyrights, indent=2))
310 |             break
311 | 
312 |     if not got_frames:
313 |         print('Did not yield frames:', filepath, file=sys.stderr)
314 |         sys.exit(os.EX_NOINPUT)
315 |     if verbose:
316 |         cv2.waitKey()
317 | 
318 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-contrib-python
2 | 


--------------------------------------------------------------------------------