├── LICENSE ├── README.md ├── __main__.py └── requirements.txt /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Free copyright filter to fulfill EU Copyright Directive 2 | 3 | ### Install dependencies 4 | 5 | The python client depends on `opencv-contrib-python` or `opencv-contrib-python-headless` 6 | 7 | ``` 8 | git clone https://github.com/framespot/client-py.git 9 | cd client-py 10 | pip install -r requirements.txt 11 | ``` 12 | 13 | ### Inference copyright 14 | 15 | ``` 16 | python . --verbose /path/to/movie.mp4 17 | python . --verbose /path/to/stockphoto.jpg 18 | ``` 19 | 20 | ### Example result 21 | 22 | ```JSON 23 | [{ 24 | "uri": "https://www.imdb.com/title/tt2380307/", 25 | "ids": {"imdb": "tt2380307", "tmdb": "movie/354912"}, 26 | "title": "Coco", 27 | "year": "2017", 28 | "genres": ["Animation","Family","Comedy","Adventure","Fantasy"], 29 | "companies": ["Pixar","Walt Disney Pictures"], 30 | "homepage": "https://www.pixar.com/feature-films/coco", 31 | "poster": "https://www.themoviedb.org/t/p/original/gGEsBPAijhVUFoiNpgZXqRVWJt2.jpg", 32 | "frames": [{ 33 | "type": "movie", 34 | "season": null, 35 | "episode": null, 36 | "offset": 1855, 37 | "hamming": 8, 38 | "matrix": [[ 1.001, 0.008,-0.001], 39 | [-0.008, 1.001, 0.004]] 40 | }] 41 | }] 42 | ``` 43 | -------------------------------------------------------------------------------- /__main__.py: -------------------------------------------------------------------------------- 1 | import sys, os, traceback 2 | import math, io 3 | import itertools, getopt 4 | import json, zlib, random 5 | import urllib.request 6 | # pip install opencv-contrib-python 7 | import numpy, cv2 8 | 9 | 10 | # Result from yielded frame 11 | def inference( frame_generator ): 12 | 13 | # Guess copyright 14 | grouped, results = [], [] 15 | previous = None 16 | for framecounter, (frame, offset) in enumerate(frame_generator): 17 | assert type(frame) is numpy.ndarray and frame.shape[2] == 3 18 | # ask 19 | lookup = postframe(frame) 20 | if lookup is None: 21 | continue 22 | # yield server-result 23 | if previous: 24 | yield *previous, None 25 | previous = frame, offset, lookup 26 | # ignore if frame too common 27 | if len(lookup) >= 20: 28 | continue 29 | # whitelist result if frame also in trailer/teaser/... 30 | lookup_whitelist = [result for result in lookup if not any(True for f in result['frames'] if f['type'] == 'trailer')] 31 | if not lookup_whitelist: 32 | continue 33 | # group by uri (3 matches -> unlikely false positive) 34 | results.append(lookup_whitelist) 35 | grouped = [list(group) for k, group in 36 | itertools.groupby(sorted([item for result in results for item in result], key=lambda x: x['uri']), lambda x: x['uri'])] 37 | grouped.sort(key=lambda x:len(x), reverse=True) 38 | if len(set(frame['offset'] for result in grouped[0] for frame in result['frames'] if frame['matrix'] is not None)) >= 3: 39 | break 40 | if not previous: 41 | return 42 | 43 | # To filter, or not to filter: that is the question... 44 | copyrights = [] 45 | for group in grouped: 46 | copyright = False 47 | # video: accurate if 3 different frame-offset 48 | if len(set(frame['offset'] for result in group for frame in result['frames'] if frame['matrix'] is not None)) >= 3: 49 | copyright = True 50 | # still-image or short-video: if matches 'image' or translation-matrix + perceptual-hash 51 | elif framecounter <= 2 and any(True for result in group for frame in result['frames'] if 52 | frame['type'] == 'image' or (frame['matrix'] is not None and frame['hamming'] is not None)): 53 | copyright = True 54 | if copyright: 55 | copyrights.append(group[0]) 56 | yield *previous, copyrights or None 57 | 58 | 59 | # ask server 60 | def postframe( frame ): 61 | 62 | # filepath or numpy.array 63 | assert type(frame) is numpy.ndarray 64 | res, pngimage = cv2.imencode('.png', frame) 65 | content_type = 'image/png' 66 | filename = 'frame.png' 67 | filedata = pngimage.tobytes() 68 | 69 | # https://bugs.python.org/issue3244 70 | url = 'https://framespot.com/' 71 | boundary = ''.join(random.choices('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', k=70)) # RFC2046: boundary must be no longer than 70 characters 72 | headers = { 73 | 'Content-Type': 'multipart/form-data; boundary=%s' % boundary, 74 | 'Accept': 'application/json', 75 | 'Accept-Encoding': 'gzip, deflate', 76 | 'User-Agent': 'framespot-client/1.0', 77 | } 78 | data = (b'--%s\r\n' % boundary.encode() + 79 | b'Content-Disposition: form-data; name="frame"; filename="%s"\r\n' % filename.encode() + 80 | b'Content-Type: %s\r\n\r\n' % content_type.encode() + 81 | filedata + b'\r\n' 82 | b'--%s--\r\n' % boundary.encode()) 83 | try: 84 | request = urllib.request.Request(url, method='POST', headers=headers, data=data) 85 | with urllib.request.urlopen(request, timeout=120) as response: 86 | result_code = response.getcode() 87 | result_url = response.geturl() 88 | result_headers = response.info() 89 | result_type = result_headers.get_content_type() 90 | if result_code != 200: 91 | return None 92 | assert result_url == url and result_type == 'application/json' 93 | 94 | # Uncompress 95 | decompressor = None 96 | if result_headers.get('Content-Encoding') == 'zlib': 97 | decompressor = zlib.decompressobj() 98 | elif result_headers.get('Content-Encoding') == 'gzip': 99 | decompressor = zlib.decompressobj(zlib.MAX_WBITS|16) 100 | elif result_headers.get('Content-Encoding') == 'deflate': 101 | decompressor = zlib.decompressobj(-zlib.MAX_WBITS) 102 | elif result_headers.get('Content-Encoding'): 103 | decompressor = zlib.decompressobj(zlib.MAX_WBITS|32) # automatic header detection 104 | result_data = b'' 105 | while True: 106 | buf = response.read(0x1000) 107 | if not buf: 108 | break 109 | result_data += decompressor.decompress(buf) if decompressor else buf 110 | assert len(result_data) < 0x1000000 111 | if decompressor: 112 | result_data += decompressor.flush() 113 | return json.loads(result_data) 114 | 115 | except (urllib.error.HTTPError, urllib.error.URLError): 116 | traceback.print_exc(file=sys.stderr) 117 | return None 118 | 119 | 120 | # scenecut @ 500 fps 121 | def scenecut(filepath, scene_min=None, scene_max=None, seek=None, duration=None): 122 | 123 | kp_detector = cv2.FastFeatureDetector_create() 124 | kp_descriptor = cv2.ORB_create() 125 | bf_hamming = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) 126 | pyramid_down = 240 127 | truncate_keypoints = 256 128 | min_kpmatched = 8 129 | bits_kpmatched = 32 130 | min_keypoints = 256 131 | # Brief is faster than ORB - but not rotation invariant 132 | if hasattr(cv2, 'xfeatures2d'): 133 | kp_descriptor = cv2.xfeatures2d.BriefDescriptorExtractor_create(bytes=16) 134 | bits_kpmatched = 16 135 | 136 | cap = cv2.VideoCapture(filepath) 137 | if seek is not None: 138 | cap.set(cv2.CAP_PROP_POS_MSEC, seek) 139 | 140 | best_frame = None 141 | best_offset = None 142 | best_quality = 0.0 143 | des_prev = None 144 | scene_start = (0 if seek is None else seek) 145 | stop = scene_start + duration if duration else None 146 | 147 | while True: 148 | ret, frame = cap.read() 149 | cap_time = cap.get(cv2.CAP_PROP_POS_MSEC) 150 | if not ret or (best_frame is not None and stop is not None and cap_time >= stop): 151 | if best_frame is not None: 152 | yield (best_frame, best_offset) 153 | break 154 | 155 | # Keypoints on simplified frame 156 | grayframe = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 157 | width = math.ceil(math.sqrt(pyramid_down*pyramid_down * grayframe.shape[1] / grayframe.shape[0])) 158 | height = math.ceil(math.sqrt(pyramid_down*pyramid_down * grayframe.shape[0] / grayframe.shape[1])) 159 | smallframe = cv2.resize(grayframe, (width, height), interpolation=cv2.INTER_AREA) 160 | 161 | kpf = kp_detector.detect(smallframe, None) 162 | kpf = list(kpf) # opencv 4.5.4 replaced list-results with tuples 163 | if len(kpf) < min_keypoints: 164 | continue 165 | kpf.sort(key=lambda kp: kp.response, reverse=True) 166 | kps, des = kp_descriptor.compute(smallframe, kpf[:truncate_keypoints]) 167 | 168 | # Best frame within scene 169 | if des_prev is not None and des is not None: 170 | 171 | # Scene cut? 172 | newscene = False 173 | if scene_max is not None and scene_start + scene_max < cap_time: 174 | newscene = True 175 | elif scene_min is None or scene_start + scene_min < cap_time: 176 | matches = bf_hamming.match(des_prev, des) 177 | matched = list(filter(lambda match: match.distance <= bits_kpmatched, matches)) 178 | if len(matched) < min_kpmatched: 179 | newscene = True 180 | 181 | # Yield frame 182 | if newscene and best_frame is not None: 183 | yield (best_frame, best_offset) 184 | scene_start = cap_time 185 | best_frame = None 186 | best_offset = None 187 | best_quality = 0.0 188 | 189 | # Better frame? 190 | else: 191 | quality = sum(kp.response for kp in kps) 192 | if best_quality < quality: 193 | best_frame = frame 194 | best_offset = cap_time 195 | best_quality = quality 196 | des_prev = des 197 | 198 | cap.release() 199 | 200 | 201 | # main 202 | if __name__ == '__main__': 203 | 204 | # Params 205 | opts, args = getopt.getopt(sys.argv[1:],'s:d:v',['seek=','duration=','min-scene=','max-scene=','verbose']) 206 | if len(args) != 1: 207 | print('Usage: python3 . --seek=#sec --duration=#sec /path/to/file', file=sys.stderr) 208 | sys.exit(os.EX_USAGE) 209 | filepath = args[0] 210 | if not os.path.exists(filepath): 211 | print('File not found:', filepath, file=sys.stderr) 212 | sys.exit(os.EX_USAGE) 213 | scene_min, scene_max = 5000, 60000 # scene: [5s..60s] 214 | seek, duration = None, None 215 | verbose = False 216 | for o, a in opts: 217 | if o in ('-s', '--seek'): 218 | seek = float(a) * 1000 219 | elif o in ('-d', '--duration'): 220 | duration = float(a) * 1000 221 | elif o in ('min-scene'): 222 | scene_min = float(a) * 1000 223 | elif o in ('max-scene'): 224 | scene_max = float(a) * 1000 225 | elif o in ('-v','--verbose'): 226 | verbose = True 227 | 228 | if verbose: 229 | print('Inference:', filepath, 'seek:',seek, 'duration:',duration, 'scene:['+str(scene_min)+':'+str(scene_max)+']', file=sys.stderr) 230 | 231 | # Detect video (container only, could also match an audio) 232 | is_video = False 233 | with open(filepath, 'rb') as fp: 234 | buf = bytearray(fp.read(8192)) 235 | # video/mp4 (.mp4) + video/quicktime (.mov) + video/x-m4v (.m4v) 236 | if len(buf) > 8 and buf[4] == 0x66 and buf[5] == 0x74 and buf[6] == 0x79 and buf[7] == 0x70: 237 | ftyp_len = int.from_bytes(buf[0:4], byteorder='big') 238 | if len(buf) > 10 and buf[0] == 0x0 and buf[1] == 0x0 and buf[2] == 0x0 and buf[3] == 0x1C and buf[8] == 0x4D and buf[9] == 0x34 and buf[10] == 0x56: 239 | is_video = True 240 | elif len(buf) >= ftyp_len: 241 | major_brand = buf[8:12].decode(errors='ignore') 242 | compatible_brands = [buf[i:i+4].decode(errors='ignore') for i in range(16, ftyp_len, 4)] 243 | if major_brand in ['mp41','mp42','isom','qt ']: 244 | is_video = True 245 | elif 'mp41' in compatible_brands or 'mp42' in compatible_brands or 'isom' in compatible_brands: 246 | is_video = True 247 | # video/webm (.webm) + video/x-matroska (.mkv) 248 | elif buf.startswith(b'\x1A\x45\xDF\xA3') and (buf.find(b'\x42\x82\x84webm') > -1 or buf.find(b'\x42\x82\x88matroska') > -1): 249 | is_video = True 250 | # video/mpeg (.mpg) 251 | elif len(buf) > 3 and buf[0] == 0x0 and buf[1] == 0x0 and buf[2] == 0x1 and buf[3] >= 0xb0 and buf[3] <= 0xbf: 252 | is_video = True 253 | # video/mp2t (.ts) 254 | #elif len(buf) > 12 and buf[0] == 0x47 and ...: 255 | # is_video = True 256 | # video/x-msvideo (.avi) 257 | elif len(buf) > 11 and buf[0] == 0x52 and buf[1] == 0x49 and buf[2] == 0x46 and buf[3] == 0x46 and buf[8] == 0x41 and buf[9] == 0x56 and buf[10] == 0x49 and buf[11] == 0x20: 258 | is_video = True 259 | # video/x-ms-wmv (.wmv) 260 | elif len(buf) > 9 and buf[0] == 0x30 and buf[1] == 0x26 and buf[2] == 0xB2 and buf[3] == 0x75 and buf[4] == 0x8E and buf[5] == 0x66 and buf[6] == 0xCF and buf[7] == 0x11 and buf[8] == 0xA6 and buf[9] == 0xD9: 261 | is_video = True 262 | # video/3gpp (.3gp) 263 | elif len(buf) > 7 and buf[0] == 0x66 and buf[1] == 0x74 and buf[2] == 0x79 and buf[3] == 0x70 and buf[4] == 0x33 and buf[5] == 0x67 and buf[6] == 0x70: 264 | is_video = True 265 | # video/x-flv (.flv) 266 | elif len(buf) > 3 and buf[0] == 0x46 and buf[1] == 0x4C and buf[2] == 0x56 and buf[3] == 0x01: 267 | is_video = True 268 | # image/gif (.gif) 269 | elif len(buf) > 2 and buf[0] == 0x47 and buf[1] == 0x49 and buf[2] == 0x46: 270 | if b'\x21\xFF\x0B\x4E\x45\x54\x53\x43\x41\x50\x45\x32\x2E\x30' in buf: # animated 271 | is_video = True 272 | # image/webp (.webp) 273 | elif len(buf) > 16 and buf[0] == 0x52 and buf[1] == 0x49 and buf[2] == 0x46 and buf[3] == 0x46 and buf[8] == 0x57 and buf[9] == 0x45 and buf[10] == 0x42 and buf[11] == 0x50 and buf[12] == 0x56 and buf[13] == 0x50: 274 | if buf[12:16] == b'VP8X' and buf[16] & 2 != 0: # animated 275 | is_video = True 276 | 277 | # Frame generator 278 | if is_video: 279 | frame_generator = scenecut(filepath, scene_min=scene_min, scene_max=scene_max, seek=seek, duration=duration) 280 | else: 281 | frame = cv2.imread( filepath, cv2.IMREAD_UNCHANGED ) 282 | if frame is None or frame.dtype != numpy.uint8 or len(frame.shape) == 2 or frame.shape[2] != 3: 283 | try: 284 | import PIL.Image 285 | with PIL.Image.open(filepath) as img: 286 | if img.mode != 'RGB': 287 | white_background = PIL.Image.new('RGBA', img.size, (255,255,255)) 288 | img = PIL.Image.alpha_composite(white_background, img.convert('RGBA')).convert('RGB') 289 | frame = numpy.array(img, dtype=numpy.uint8)[...,::-1].copy() # RGB->BGR 290 | except ImportError: 291 | frame = None 292 | except PIL.UnidentifiedImageError: 293 | frame = None 294 | frame_generator = [(frame, None)] if frame is not None else [] 295 | 296 | # Lookup frames 297 | got_frames = False 298 | for frame, offset, lookup, copyrights in inference(frame_generator): 299 | got_frames = True 300 | if verbose: 301 | label = '{:02d}:{:02d}:{:02d}'.format(int(offset/3600000) % 24,int(offset/60000) % 60,int(offset/1000) % 60) if type(offset) in [float,int] else offset 302 | print(label, 'response:', json.dumps(lookup), file=sys.stderr) 303 | frameoffset = frame 304 | cv2.putText(frameoffset,label, (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 1) 305 | cv2.imshow('frame',frameoffset) 306 | if cv2.waitKey(1) & 0xFF == ord('q'): 307 | quit() 308 | if copyrights: 309 | print(json.dumps(copyrights, indent=2)) 310 | break 311 | 312 | if not got_frames: 313 | print('Did not yield frames:', filepath, file=sys.stderr) 314 | sys.exit(os.EX_NOINPUT) 315 | if verbose: 316 | cv2.waitKey() 317 | 318 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-contrib-python 2 | --------------------------------------------------------------------------------