├── protos ├── __init__.py ├── face_label_map.pbtxt └── string_int_label_map_pb2.py ├── utils ├── __init__.py ├── label_map_util.py └── visualization_utils_color.py ├── image1.png ├── image2.png ├── requirements.txt ├── README.md ├── LICENSE.md ├── .gitignore ├── labels.pbtxt └── app.py /protos/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /image1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toxtli/deepiracy/HEAD/image1.png -------------------------------------------------------------------------------- /image2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/toxtli/deepiracy/HEAD/image2.png -------------------------------------------------------------------------------- /protos/face_label_map.pbtxt: -------------------------------------------------------------------------------- 1 | item { 2 | id: 2 3 | name: 'background' 4 | } 5 | 6 | item { 7 | id: 1 8 | name: 'face' 9 | } 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | imutils==0.4.5 2 | numpy==1.14.1 3 | youtube_dl==2018.3.20 4 | tensorflow==1.6.0 5 | six==1.10.0 6 | opencv-contrib-python 7 | edit_distance==1.0.1 8 | Pillow==5.1.0 9 | protobuf==3.5.2.post1 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deepiracy 2 | 3 | Deepary is a tool that is able to find a source video on a taget video. It is capable to detect subsequences of the source video, even if it is highly distorted. You can read a complete explanation of how this work by reading [this article](https://medium.com/hci-wvu/piracy-detection-using-longest-common-subsequence-and-neural-networks-a6f689a541a6). 4 | 5 | ## Installation 6 | 7 | Run 8 | 9 | > pip install -r requirements.txt 10 | 11 | ## Quickstart 12 | 13 | Parameters: 14 | 15 | python app.py [from_frame_number] [to_frame_number] [how_many_frames] [video_path_1_or_url] [video_path_2_or_url] 16 | 17 | Example how to run it with files 18 | 19 | > python app.py 1 1 -1 video1.mp4 video1.mp4 20 | 21 | Example how to run it with youtube URLs 22 | 23 | > python app.py 1 1 -1 https://www.youtube.com/watch?v=E5K_Ug0Gq0Y https://www.youtube.com/watch?v=E5K_Ug0Gq0Y 24 | 25 | Example how to run the real-time detection from webcam 26 | 27 | > python app.py 1 1 -1 video1.mp4 0 28 | 29 | The required files can be downloaded from here: 30 | 31 | https://drive.google.com/drive/folders/1BPR6j-3xc0NnlbmO96LD55tRV7e07Ynp?usp=sharing 32 | 33 | The results can be found here: 34 | 35 | https://drive.google.com/open?id=1iyquDYv1o48mA_ymI7AEjrlZZtqXOOz1 36 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.mp4 2 | *.pb 3 | *.avi 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # Environments 89 | .env 90 | .venv 91 | env/ 92 | venv/ 93 | ENV/ 94 | env.bak/ 95 | venv.bak/ 96 | 97 | # Spyder project settings 98 | .spyderproject 99 | .spyproject 100 | 101 | # Rope project settings 102 | .ropeproject 103 | 104 | # mkdocs documentation 105 | /site 106 | 107 | # mypy 108 | .mypy_cache/ 109 | -------------------------------------------------------------------------------- /protos/string_int_label_map_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: object_detection/protos/string_int_label_map.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='object_detection/protos/string_int_label_map.proto', 20 | package='object_detection.protos', 21 | serialized_pb=_b('\n2object_detection/protos/string_int_label_map.proto\x12\x17object_detection.protos\"G\n\x15StringIntLabelMapItem\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\x05\x12\x14\n\x0c\x64isplay_name\x18\x03 \x01(\t\"Q\n\x11StringIntLabelMap\x12<\n\x04item\x18\x01 \x03(\x0b\x32..object_detection.protos.StringIntLabelMapItem') 22 | ) 23 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 24 | 25 | 26 | 27 | 28 | _STRINGINTLABELMAPITEM = _descriptor.Descriptor( 29 | name='StringIntLabelMapItem', 30 | full_name='object_detection.protos.StringIntLabelMapItem', 31 | filename=None, 32 | file=DESCRIPTOR, 33 | containing_type=None, 34 | fields=[ 35 | _descriptor.FieldDescriptor( 36 | name='name', full_name='object_detection.protos.StringIntLabelMapItem.name', index=0, 37 | number=1, type=9, cpp_type=9, label=1, 38 | has_default_value=False, default_value=_b("").decode('utf-8'), 39 | message_type=None, enum_type=None, containing_type=None, 40 | is_extension=False, extension_scope=None, 41 | options=None), 42 | _descriptor.FieldDescriptor( 43 | name='id', full_name='object_detection.protos.StringIntLabelMapItem.id', index=1, 44 | number=2, type=5, cpp_type=1, label=1, 45 | has_default_value=False, default_value=0, 46 | message_type=None, enum_type=None, containing_type=None, 47 | is_extension=False, extension_scope=None, 48 | options=None), 49 | _descriptor.FieldDescriptor( 50 | name='display_name', full_name='object_detection.protos.StringIntLabelMapItem.display_name', index=2, 51 | number=3, type=9, cpp_type=9, label=1, 52 | has_default_value=False, default_value=_b("").decode('utf-8'), 53 | message_type=None, enum_type=None, containing_type=None, 54 | is_extension=False, extension_scope=None, 55 | options=None), 56 | ], 57 | extensions=[ 58 | ], 59 | nested_types=[], 60 | enum_types=[ 61 | ], 62 | options=None, 63 | is_extendable=False, 64 | extension_ranges=[], 65 | oneofs=[ 66 | ], 67 | serialized_start=79, 68 | serialized_end=150, 69 | ) 70 | 71 | 72 | _STRINGINTLABELMAP = _descriptor.Descriptor( 73 | name='StringIntLabelMap', 74 | full_name='object_detection.protos.StringIntLabelMap', 75 | filename=None, 76 | file=DESCRIPTOR, 77 | containing_type=None, 78 | fields=[ 79 | _descriptor.FieldDescriptor( 80 | name='item', full_name='object_detection.protos.StringIntLabelMap.item', index=0, 81 | number=1, type=11, cpp_type=10, label=3, 82 | has_default_value=False, default_value=[], 83 | message_type=None, enum_type=None, containing_type=None, 84 | is_extension=False, extension_scope=None, 85 | options=None), 86 | ], 87 | extensions=[ 88 | ], 89 | nested_types=[], 90 | enum_types=[ 91 | ], 92 | options=None, 93 | is_extendable=False, 94 | extension_ranges=[], 95 | oneofs=[ 96 | ], 97 | serialized_start=152, 98 | serialized_end=233, 99 | ) 100 | 101 | _STRINGINTLABELMAP.fields_by_name['item'].message_type = _STRINGINTLABELMAPITEM 102 | DESCRIPTOR.message_types_by_name['StringIntLabelMapItem'] = _STRINGINTLABELMAPITEM 103 | DESCRIPTOR.message_types_by_name['StringIntLabelMap'] = _STRINGINTLABELMAP 104 | 105 | StringIntLabelMapItem = _reflection.GeneratedProtocolMessageType('StringIntLabelMapItem', (_message.Message,), dict( 106 | DESCRIPTOR = _STRINGINTLABELMAPITEM, 107 | __module__ = 'object_detection.protos.string_int_label_map_pb2' 108 | # @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMapItem) 109 | )) 110 | _sym_db.RegisterMessage(StringIntLabelMapItem) 111 | 112 | StringIntLabelMap = _reflection.GeneratedProtocolMessageType('StringIntLabelMap', (_message.Message,), dict( 113 | DESCRIPTOR = _STRINGINTLABELMAP, 114 | __module__ = 'object_detection.protos.string_int_label_map_pb2' 115 | # @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMap) 116 | )) 117 | _sym_db.RegisterMessage(StringIntLabelMap) 118 | 119 | 120 | # @@protoc_insertion_point(module_scope) 121 | -------------------------------------------------------------------------------- /utils/label_map_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Label map utility functions.""" 17 | 18 | import logging 19 | 20 | import tensorflow as tf 21 | from google.protobuf import text_format 22 | from protos import string_int_label_map_pb2 23 | 24 | 25 | def _validate_label_map(label_map): 26 | """Checks if a label map is valid. 27 | 28 | Args: 29 | label_map: StringIntLabelMap to validate. 30 | 31 | Raises: 32 | ValueError: if label map is invalid. 33 | """ 34 | for item in label_map.item: 35 | if item.id < 1: 36 | raise ValueError('Label map ids should be >= 1.') 37 | 38 | 39 | def create_category_index(categories): 40 | """Creates dictionary of COCO compatible categories keyed by category id. 41 | 42 | Args: 43 | categories: a list of dicts, each of which has the following keys: 44 | 'id': (required) an integer id uniquely identifying this category. 45 | 'name': (required) string representing category name 46 | e.g., 'cat', 'dog', 'pizza'. 47 | 48 | Returns: 49 | category_index: a dict containing the same entries as categories, but keyed 50 | by the 'id' field of each category. 51 | """ 52 | category_index = {} 53 | for cat in categories: 54 | category_index[cat['id']] = cat 55 | return category_index 56 | 57 | 58 | def convert_label_map_to_categories(label_map, 59 | max_num_classes, 60 | use_display_name=True): 61 | """Loads label map proto and returns categories list compatible with eval. 62 | 63 | This function loads a label map and returns a list of dicts, each of which 64 | has the following keys: 65 | 'id': (required) an integer id uniquely identifying this category. 66 | 'name': (required) string representing category name 67 | e.g., 'cat', 'dog', 'pizza'. 68 | We only allow class into the list if its id-label_id_offset is 69 | between 0 (inclusive) and max_num_classes (exclusive). 70 | If there are several items mapping to the same id in the label map, 71 | we will only keep the first one in the categories list. 72 | 73 | Args: 74 | label_map: a StringIntLabelMapProto or None. If None, a default categories 75 | list is created with max_num_classes categories. 76 | max_num_classes: maximum number of (consecutive) label indices to include. 77 | use_display_name: (boolean) choose whether to load 'display_name' field 78 | as category name. If False or if the display_name field does not exist, 79 | uses 'name' field as category names instead. 80 | Returns: 81 | categories: a list of dictionaries representing all possible categories. 82 | """ 83 | categories = [] 84 | list_of_ids_already_added = [] 85 | if not label_map: 86 | label_id_offset = 1 87 | for class_id in range(max_num_classes): 88 | categories.append({ 89 | 'id': class_id + label_id_offset, 90 | 'name': 'category_{}'.format(class_id + label_id_offset) 91 | }) 92 | return categories 93 | for item in label_map.item: 94 | if not 0 < item.id <= max_num_classes: 95 | logging.info('Ignore item %d since it falls outside of requested ' 96 | 'label range.', item.id) 97 | continue 98 | if use_display_name and item.HasField('display_name'): 99 | name = item.display_name 100 | else: 101 | name = item.name 102 | if item.id not in list_of_ids_already_added: 103 | list_of_ids_already_added.append(item.id) 104 | char = chr(item.id + 48) 105 | categories.append({'id': item.id, 'name': name, 'char': char}) 106 | return categories 107 | 108 | 109 | def load_labelmap(path): 110 | """Loads label map proto. 111 | 112 | Args: 113 | path: path to StringIntLabelMap proto text file. 114 | Returns: 115 | a StringIntLabelMapProto 116 | """ 117 | with tf.gfile.GFile(path, 'r') as fid: 118 | label_map_string = fid.read() 119 | label_map = string_int_label_map_pb2.StringIntLabelMap() 120 | try: 121 | text_format.Merge(label_map_string, label_map) 122 | except text_format.ParseError: 123 | label_map.ParseFromString(label_map_string) 124 | _validate_label_map(label_map) 125 | return label_map 126 | 127 | 128 | def get_label_map_dict(label_map_path): 129 | """Reads a label map and returns a dictionary of label names to id. 130 | 131 | Args: 132 | label_map_path: path to label_map. 133 | 134 | Returns: 135 | A dictionary mapping label names to id. 136 | """ 137 | label_map = load_labelmap(label_map_path) 138 | label_map_dict = {} 139 | for item in label_map.item: 140 | label_map_dict[item.name] = item.id 141 | return label_map_dict 142 | -------------------------------------------------------------------------------- /labels.pbtxt: -------------------------------------------------------------------------------- 1 | item { 2 | name: "/m/01g317" 3 | id: 1 4 | display_name: "person" 5 | } 6 | item { 7 | name: "/m/0199g" 8 | id: 2 9 | display_name: "bicycle" 10 | } 11 | item { 12 | name: "/m/0k4j" 13 | id: 3 14 | display_name: "car" 15 | } 16 | item { 17 | name: "/m/04_sv" 18 | id: 4 19 | display_name: "motorcycle" 20 | } 21 | item { 22 | name: "/m/05czz6l" 23 | id: 5 24 | display_name: "airplane" 25 | } 26 | item { 27 | name: "/m/01bjv" 28 | id: 6 29 | display_name: "bus" 30 | } 31 | item { 32 | name: "/m/07jdr" 33 | id: 7 34 | display_name: "train" 35 | } 36 | item { 37 | name: "/m/07r04" 38 | id: 8 39 | display_name: "truck" 40 | } 41 | item { 42 | name: "/m/019jd" 43 | id: 9 44 | display_name: "boat" 45 | } 46 | item { 47 | name: "/m/015qff" 48 | id: 10 49 | display_name: "traffic light" 50 | } 51 | item { 52 | name: "/m/01pns0" 53 | id: 11 54 | display_name: "fire hydrant" 55 | } 56 | item { 57 | name: "/m/02pv19" 58 | id: 13 59 | display_name: "stop sign" 60 | } 61 | item { 62 | name: "/m/015qbp" 63 | id: 14 64 | display_name: "parking meter" 65 | } 66 | item { 67 | name: "/m/0cvnqh" 68 | id: 15 69 | display_name: "bench" 70 | } 71 | item { 72 | name: "/m/015p6" 73 | id: 16 74 | display_name: "bird" 75 | } 76 | item { 77 | name: "/m/01yrx" 78 | id: 17 79 | display_name: "cat" 80 | } 81 | item { 82 | name: "/m/0bt9lr" 83 | id: 18 84 | display_name: "dog" 85 | } 86 | item { 87 | name: "/m/03k3r" 88 | id: 19 89 | display_name: "horse" 90 | } 91 | item { 92 | name: "/m/07bgp" 93 | id: 20 94 | display_name: "sheep" 95 | } 96 | item { 97 | name: "/m/01xq0k1" 98 | id: 21 99 | display_name: "cow" 100 | } 101 | item { 102 | name: "/m/0bwd_0j" 103 | id: 22 104 | display_name: "elephant" 105 | } 106 | item { 107 | name: "/m/01dws" 108 | id: 23 109 | display_name: "bear" 110 | } 111 | item { 112 | name: "/m/0898b" 113 | id: 24 114 | display_name: "zebra" 115 | } 116 | item { 117 | name: "/m/03bk1" 118 | id: 25 119 | display_name: "giraffe" 120 | } 121 | item { 122 | name: "/m/01940j" 123 | id: 27 124 | display_name: "backpack" 125 | } 126 | item { 127 | name: "/m/0hnnb" 128 | id: 28 129 | display_name: "umbrella" 130 | } 131 | item { 132 | name: "/m/080hkjn" 133 | id: 31 134 | display_name: "handbag" 135 | } 136 | item { 137 | name: "/m/01rkbr" 138 | id: 32 139 | display_name: "tie" 140 | } 141 | item { 142 | name: "/m/01s55n" 143 | id: 33 144 | display_name: "suitcase" 145 | } 146 | item { 147 | name: "/m/02wmf" 148 | id: 34 149 | display_name: "frisbee" 150 | } 151 | item { 152 | name: "/m/071p9" 153 | id: 35 154 | display_name: "skis" 155 | } 156 | item { 157 | name: "/m/06__v" 158 | id: 36 159 | display_name: "snowboard" 160 | } 161 | item { 162 | name: "/m/018xm" 163 | id: 37 164 | display_name: "sports ball" 165 | } 166 | item { 167 | name: "/m/02zt3" 168 | id: 38 169 | display_name: "kite" 170 | } 171 | item { 172 | name: "/m/03g8mr" 173 | id: 39 174 | display_name: "baseball bat" 175 | } 176 | item { 177 | name: "/m/03grzl" 178 | id: 40 179 | display_name: "baseball glove" 180 | } 181 | item { 182 | name: "/m/06_fw" 183 | id: 41 184 | display_name: "skateboard" 185 | } 186 | item { 187 | name: "/m/019w40" 188 | id: 42 189 | display_name: "surfboard" 190 | } 191 | item { 192 | name: "/m/0dv9c" 193 | id: 43 194 | display_name: "tennis racket" 195 | } 196 | item { 197 | name: "/m/04dr76w" 198 | id: 44 199 | display_name: "bottle" 200 | } 201 | item { 202 | name: "/m/09tvcd" 203 | id: 46 204 | display_name: "wine glass" 205 | } 206 | item { 207 | name: "/m/08gqpm" 208 | id: 47 209 | display_name: "cup" 210 | } 211 | item { 212 | name: "/m/0dt3t" 213 | id: 48 214 | display_name: "fork" 215 | } 216 | item { 217 | name: "/m/04ctx" 218 | id: 49 219 | display_name: "knife" 220 | } 221 | item { 222 | name: "/m/0cmx8" 223 | id: 50 224 | display_name: "spoon" 225 | } 226 | item { 227 | name: "/m/04kkgm" 228 | id: 51 229 | display_name: "bowl" 230 | } 231 | item { 232 | name: "/m/09qck" 233 | id: 52 234 | display_name: "banana" 235 | } 236 | item { 237 | name: "/m/014j1m" 238 | id: 53 239 | display_name: "apple" 240 | } 241 | item { 242 | name: "/m/0l515" 243 | id: 54 244 | display_name: "sandwich" 245 | } 246 | item { 247 | name: "/m/0cyhj_" 248 | id: 55 249 | display_name: "orange" 250 | } 251 | item { 252 | name: "/m/0hkxq" 253 | id: 56 254 | display_name: "broccoli" 255 | } 256 | item { 257 | name: "/m/0fj52s" 258 | id: 57 259 | display_name: "carrot" 260 | } 261 | item { 262 | name: "/m/01b9xk" 263 | id: 58 264 | display_name: "hot dog" 265 | } 266 | item { 267 | name: "/m/0663v" 268 | id: 59 269 | display_name: "pizza" 270 | } 271 | item { 272 | name: "/m/0jy4k" 273 | id: 60 274 | display_name: "donut" 275 | } 276 | item { 277 | name: "/m/0fszt" 278 | id: 61 279 | display_name: "cake" 280 | } 281 | item { 282 | name: "/m/01mzpv" 283 | id: 62 284 | display_name: "chair" 285 | } 286 | item { 287 | name: "/m/02crq1" 288 | id: 63 289 | display_name: "couch" 290 | } 291 | item { 292 | name: "/m/03fp41" 293 | id: 64 294 | display_name: "potted plant" 295 | } 296 | item { 297 | name: "/m/03ssj5" 298 | id: 65 299 | display_name: "bed" 300 | } 301 | item { 302 | name: "/m/04bcr3" 303 | id: 67 304 | display_name: "dining table" 305 | } 306 | item { 307 | name: "/m/09g1w" 308 | id: 70 309 | display_name: "toilet" 310 | } 311 | item { 312 | name: "/m/07c52" 313 | id: 72 314 | display_name: "tv" 315 | } 316 | item { 317 | name: "/m/01c648" 318 | id: 73 319 | display_name: "laptop" 320 | } 321 | item { 322 | name: "/m/020lf" 323 | id: 74 324 | display_name: "mouse" 325 | } 326 | item { 327 | name: "/m/0qjjc" 328 | id: 75 329 | display_name: "remote" 330 | } 331 | item { 332 | name: "/m/01m2v" 333 | id: 76 334 | display_name: "keyboard" 335 | } 336 | item { 337 | name: "/m/050k8" 338 | id: 77 339 | display_name: "cell phone" 340 | } 341 | item { 342 | name: "/m/0fx9l" 343 | id: 78 344 | display_name: "microwave" 345 | } 346 | item { 347 | name: "/m/029bxz" 348 | id: 79 349 | display_name: "oven" 350 | } 351 | item { 352 | name: "/m/01k6s3" 353 | id: 80 354 | display_name: "toaster" 355 | } 356 | item { 357 | name: "/m/0130jx" 358 | id: 81 359 | display_name: "sink" 360 | } 361 | item { 362 | name: "/m/040b_t" 363 | id: 82 364 | display_name: "refrigerator" 365 | } 366 | item { 367 | name: "/m/0bt_c3" 368 | id: 84 369 | display_name: "book" 370 | } 371 | item { 372 | name: "/m/01x3z" 373 | id: 85 374 | display_name: "clock" 375 | } 376 | item { 377 | name: "/m/02s195" 378 | id: 86 379 | display_name: "vase" 380 | } 381 | item { 382 | name: "/m/01lsmm" 383 | id: 87 384 | display_name: "scissors" 385 | } 386 | item { 387 | name: "/m/0kmg4" 388 | id: 88 389 | display_name: "teddy bear" 390 | } 391 | item { 392 | name: "/m/03wvsk" 393 | id: 89 394 | display_name: "hair drier" 395 | } 396 | item { 397 | name: "/m/012xff" 398 | id: 90 399 | display_name: "toothbrush" 400 | } 401 | -------------------------------------------------------------------------------- /utils/visualization_utils_color.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """A set of functions that are used for visualization. 17 | 18 | These functions often receive an image, perform some visualization on the image. 19 | The functions do not return a value, instead they modify the image itself. 20 | 21 | """ 22 | import collections 23 | import numpy as np 24 | import PIL.Image as Image 25 | import PIL.ImageColor as ImageColor 26 | import PIL.ImageDraw as ImageDraw 27 | import PIL.ImageFont as ImageFont 28 | import six 29 | import tensorflow as tf 30 | 31 | 32 | _TITLE_LEFT_MARGIN = 10 33 | _TITLE_TOP_MARGIN = 10 34 | STANDARD_COLORS = [ 35 | 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 36 | 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 37 | 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 38 | 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 39 | 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 40 | 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 41 | 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 42 | 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 43 | 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 44 | 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 45 | 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 46 | 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 47 | 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 48 | 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 49 | 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 50 | 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 51 | 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 52 | 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 53 | 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 54 | 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 55 | 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 56 | 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 57 | 'WhiteSmoke', 'Yellow', 'YellowGreen' 58 | ] 59 | 60 | 61 | def save_image_array_as_png(image, output_path): 62 | """Saves an image (represented as a numpy array) to PNG. 63 | 64 | Args: 65 | image: a numpy array with shape [height, width, 3]. 66 | output_path: path to which image should be written. 67 | """ 68 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB') 69 | with tf.gfile.Open(output_path, 'w') as fid: 70 | image_pil.save(fid, 'PNG') 71 | 72 | 73 | def encode_image_array_as_png_str(image): 74 | """Encodes a numpy array into a PNG string. 75 | 76 | Args: 77 | image: a numpy array with shape [height, width, 3]. 78 | 79 | Returns: 80 | PNG encoded image string. 81 | """ 82 | image_pil = Image.fromarray(np.uint8(image)) 83 | output = six.BytesIO() 84 | image_pil.save(output, format='PNG') 85 | png_string = output.getvalue() 86 | output.close() 87 | return png_string 88 | 89 | 90 | def draw_bounding_box_on_image_array(image, 91 | ymin, 92 | xmin, 93 | ymax, 94 | xmax, 95 | color='red', 96 | thickness=4, 97 | display_str_list=(), 98 | use_normalized_coordinates=True): 99 | """Adds a bounding box to an image (numpy array). 100 | 101 | Args: 102 | image: a numpy array with shape [height, width, 3]. 103 | ymin: ymin of bounding box in normalized coordinates (same below). 104 | xmin: xmin of bounding box. 105 | ymax: ymax of bounding box. 106 | xmax: xmax of bounding box. 107 | color: color to draw bounding box. Default is red. 108 | thickness: line thickness. Default value is 4. 109 | display_str_list: list of strings to display in box 110 | (each to be shown on its own line). 111 | use_normalized_coordinates: If True (default), treat coordinates 112 | ymin, xmin, ymax, xmax as relative to the image. Otherwise treat 113 | coordinates as absolute. 114 | """ 115 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB') 116 | draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color, 117 | thickness, display_str_list, 118 | use_normalized_coordinates) 119 | np.copyto(image, np.array(image_pil)) 120 | 121 | 122 | def draw_bounding_box_on_image(image, 123 | ymin, 124 | xmin, 125 | ymax, 126 | xmax, 127 | color='red', 128 | thickness=4, 129 | display_str_list=(), 130 | use_normalized_coordinates=True): 131 | """Adds a bounding box to an image. 132 | 133 | Each string in display_str_list is displayed on a separate line above the 134 | bounding box in black text on a rectangle filled with the input 'color'. 135 | 136 | Args: 137 | image: a PIL.Image object. 138 | ymin: ymin of bounding box. 139 | xmin: xmin of bounding box. 140 | ymax: ymax of bounding box. 141 | xmax: xmax of bounding box. 142 | color: color to draw bounding box. Default is red. 143 | thickness: line thickness. Default value is 4. 144 | display_str_list: list of strings to display in box 145 | (each to be shown on its own line). 146 | use_normalized_coordinates: If True (default), treat coordinates 147 | ymin, xmin, ymax, xmax as relative to the image. Otherwise treat 148 | coordinates as absolute. 149 | """ 150 | is_drawing = True 151 | draw = ImageDraw.Draw(image) 152 | im_width, im_height = image.size 153 | if use_normalized_coordinates: 154 | (left, right, top, bottom) = (xmin * im_width, xmax * im_width, 155 | ymin * im_height, ymax * im_height) 156 | else: 157 | (left, right, top, bottom) = (xmin, xmax, ymin, ymax) 158 | if is_drawing: 159 | draw.line([(left, top), (left, bottom), (right, bottom), 160 | (right, top), (left, top)], width=thickness, fill=color) 161 | try: 162 | font = ImageFont.truetype('arial.ttf', 24) 163 | except IOError: 164 | font = ImageFont.load_default() 165 | 166 | text_bottom = top 167 | # Reverse list and print from bottom to top. 168 | for display_str in display_str_list[::-1]: 169 | text_width, text_height = font.getsize(display_str) 170 | margin = np.ceil(0.05 * text_height) 171 | if is_drawing: 172 | draw.rectangle( 173 | [(left, text_bottom - text_height - 2 * margin), (left + text_width, 174 | text_bottom)], 175 | fill=color) 176 | draw.text( 177 | (left + margin, text_bottom - text_height - margin), 178 | display_str, 179 | fill='black', 180 | font=font) 181 | text_bottom -= text_height - 2 * margin 182 | 183 | 184 | def draw_bounding_boxes_on_image_array(image, 185 | boxes, 186 | color='red', 187 | thickness=4, 188 | display_str_list_list=()): 189 | """Draws bounding boxes on image (numpy array). 190 | 191 | Args: 192 | image: a numpy array object. 193 | boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). 194 | The coordinates are in normalized format between [0, 1]. 195 | color: color to draw bounding box. Default is red. 196 | thickness: line thickness. Default value is 4. 197 | display_str_list_list: list of list of strings. 198 | a list of strings for each bounding box. 199 | The reason to pass a list of strings for a 200 | bounding box is that it might contain 201 | multiple labels. 202 | 203 | Raises: 204 | ValueError: if boxes is not a [N, 4] array 205 | """ 206 | image_pil = Image.fromarray(image) 207 | draw_bounding_boxes_on_image(image_pil, boxes, color, thickness, 208 | display_str_list_list) 209 | np.copyto(image, np.array(image_pil)) 210 | 211 | 212 | def draw_bounding_boxes_on_image(image, 213 | boxes, 214 | color='red', 215 | thickness=4, 216 | display_str_list_list=()): 217 | """Draws bounding boxes on image. 218 | 219 | Args: 220 | image: a PIL.Image object. 221 | boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). 222 | The coordinates are in normalized format between [0, 1]. 223 | color: color to draw bounding box. Default is red. 224 | thickness: line thickness. Default value is 4. 225 | display_str_list_list: list of list of strings. 226 | a list of strings for each bounding box. 227 | The reason to pass a list of strings for a 228 | bounding box is that it might contain 229 | multiple labels. 230 | 231 | Raises: 232 | ValueError: if boxes is not a [N, 4] array 233 | """ 234 | boxes_shape = boxes.shape 235 | if not boxes_shape: 236 | return 237 | if len(boxes_shape) != 2 or boxes_shape[1] != 4: 238 | raise ValueError('Input must be of size [N, 4]') 239 | for i in range(boxes_shape[0]): 240 | display_str_list = () 241 | if display_str_list_list: 242 | display_str_list = display_str_list_list[i] 243 | draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2], 244 | boxes[i, 3], color, thickness, display_str_list) 245 | 246 | 247 | def draw_keypoints_on_image_array(image, 248 | keypoints, 249 | color='red', 250 | radius=2, 251 | use_normalized_coordinates=True): 252 | """Draws keypoints on an image (numpy array). 253 | 254 | Args: 255 | image: a numpy array with shape [height, width, 3]. 256 | keypoints: a numpy array with shape [num_keypoints, 2]. 257 | color: color to draw the keypoints with. Default is red. 258 | radius: keypoint radius. Default value is 2. 259 | use_normalized_coordinates: if True (default), treat keypoint values as 260 | relative to the image. Otherwise treat them as absolute. 261 | """ 262 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB') 263 | draw_keypoints_on_image(image_pil, keypoints, color, radius, 264 | use_normalized_coordinates) 265 | np.copyto(image, np.array(image_pil)) 266 | 267 | 268 | def draw_keypoints_on_image(image, 269 | keypoints, 270 | color='red', 271 | radius=2, 272 | use_normalized_coordinates=True): 273 | """Draws keypoints on an image. 274 | 275 | Args: 276 | image: a PIL.Image object. 277 | keypoints: a numpy array with shape [num_keypoints, 2]. 278 | color: color to draw the keypoints with. Default is red. 279 | radius: keypoint radius. Default value is 2. 280 | use_normalized_coordinates: if True (default), treat keypoint values as 281 | relative to the image. Otherwise treat them as absolute. 282 | """ 283 | draw = ImageDraw.Draw(image) 284 | im_width, im_height = image.size 285 | keypoints_x = [k[1] for k in keypoints] 286 | keypoints_y = [k[0] for k in keypoints] 287 | if use_normalized_coordinates: 288 | keypoints_x = tuple([im_width * x for x in keypoints_x]) 289 | keypoints_y = tuple([im_height * y for y in keypoints_y]) 290 | for keypoint_x, keypoint_y in zip(keypoints_x, keypoints_y): 291 | draw.ellipse([(keypoint_x - radius, keypoint_y - radius), 292 | (keypoint_x + radius, keypoint_y + radius)], 293 | outline=color, fill=color) 294 | 295 | 296 | def draw_mask_on_image_array(image, mask, color='red', alpha=0.7): 297 | """Draws mask on an image. 298 | 299 | Args: 300 | image: uint8 numpy array with shape (img_height, img_height, 3) 301 | mask: a float numpy array of shape (img_height, img_height) with 302 | values between 0 and 1 303 | color: color to draw the keypoints with. Default is red. 304 | alpha: transparency value between 0 and 1. (default: 0.7) 305 | 306 | Raises: 307 | ValueError: On incorrect data type for image or masks. 308 | """ 309 | if image.dtype != np.uint8: 310 | raise ValueError('`image` not of type np.uint8') 311 | if mask.dtype != np.float32: 312 | raise ValueError('`mask` not of type np.float32') 313 | if np.any(np.logical_or(mask > 1.0, mask < 0.0)): 314 | raise ValueError('`mask` elements should be in [0, 1]') 315 | rgb = ImageColor.getrgb(color) 316 | pil_image = Image.fromarray(image) 317 | 318 | solid_color = np.expand_dims( 319 | np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3]) 320 | pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA') 321 | pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L') 322 | pil_image = Image.composite(pil_solid_color, pil_image, pil_mask) 323 | np.copyto(image, np.array(pil_image.convert('RGB'))) 324 | 325 | 326 | def visualize_boxes_and_labels_on_image_array(image, 327 | boxes, 328 | classes, 329 | scores, 330 | category_index, 331 | instance_masks=None, 332 | keypoints=None, 333 | use_normalized_coordinates=False, 334 | max_boxes_to_draw=20, 335 | min_score_thresh=.7, 336 | agnostic_mode=False, 337 | line_thickness=4, 338 | sequence_sorted=False, 339 | sequence_type='char', 340 | matched_area=None): 341 | """Overlay labeled boxes on an image with formatted scores and label names. 342 | 343 | This function groups boxes that correspond to the same location 344 | and creates a display string for each detection and overlays these 345 | on the image. Note that this function modifies the image array in-place 346 | and does not return anything. 347 | 348 | Args: 349 | image: uint8 numpy array with shape (img_height, img_width, 3) 350 | boxes: a numpy array of shape [N, 4] 351 | classes: a numpy array of shape [N] 352 | scores: a numpy array of shape [N] or None. If scores=None, then 353 | this function assumes that the boxes to be plotted are groundtruth 354 | boxes and plot all boxes as black with no classes or scores. 355 | category_index: a dict containing category dictionaries (each holding 356 | category index `id` and category name `name`) keyed by category indices. 357 | instance_masks: a numpy array of shape [N, image_height, image_width], can 358 | be None 359 | keypoints: a numpy array of shape [N, num_keypoints, 2], can 360 | be None 361 | use_normalized_coordinates: whether boxes is to be interpreted as 362 | normalized coordinates or not. 363 | max_boxes_to_draw: maximum number of boxes to visualize. If None, draw 364 | all boxes. 365 | min_score_thresh: minimum score threshold for a box to be visualized 366 | agnostic_mode: boolean (default: False) controlling whether to evaluate in 367 | class-agnostic mode or not. This mode will display scores but ignore 368 | classes. 369 | line_thickness: integer (default: 4) controlling line width of the boxes. 370 | """ 371 | # Create a display string (and color) for every box location, group any boxes 372 | # that correspond to the same location. 373 | cur_char = 48 374 | result = {'sequence': '', 'objects': []} 375 | class_values = {'id': 0, 'name': 'N/A', 'char': '', 'uid': '', 'score': 0} 376 | box_to_display_str_map = collections.defaultdict(list) 377 | box_to_color_map = collections.defaultdict(str) 378 | box_to_instance_masks_map = {} 379 | box_to_keypoints_map = collections.defaultdict(list) 380 | if not max_boxes_to_draw: 381 | max_boxes_to_draw = boxes.shape[0] 382 | for i in range(min(max_boxes_to_draw, boxes.shape[0])): 383 | if scores is None or scores[i] > min_score_thresh: 384 | box = tuple(boxes[i].tolist()) 385 | if instance_masks is not None: 386 | box_to_instance_masks_map[box] = instance_masks[i] 387 | if keypoints is not None: 388 | box_to_keypoints_map[box].extend(keypoints[i]) 389 | if scores is None: 390 | #box_to_color_map[box] = 'black' 391 | box_to_color_map[box] = class_values 392 | else: 393 | if not agnostic_mode: 394 | if classes[i] in category_index.keys(): 395 | class_values = category_index[classes[i]] 396 | class_values['score'] = int(100*scores[i]) 397 | class_values['uid'] = chr(cur_char) 398 | cur_char += 1 399 | display_str = '{}: {}%'.format( 400 | class_values['name'], 401 | class_values['score']) 402 | else: 403 | class_values['score'] = int(100*scores[i]) 404 | display_str = 'score: {}%'.format(class_values['score']) 405 | box_to_display_str_map[box].append(display_str) 406 | box_to_color_map[box] = class_values 407 | 408 | # Draw all boxes onto image. 409 | elements = {} 410 | for box, class_values in box_to_color_map.items(): 411 | color = 'Violet' 412 | ymin, xmin, ymax, xmax = box 413 | im_height, im_width, _ = image.shape 414 | (left, right, top, bottom) = (int(xmin * im_width), int(xmax * im_width), int(ymin * im_height), int(ymax * im_height)) 415 | coords = (left, right, top, bottom) 416 | if matched_area is None: 417 | global_coords = coords 418 | else: 419 | global_coords = (matched_area[2] + left, matched_area[2] + right, matched_area[0] + top, matched_area[0] + bottom) 420 | crop = image[top:bottom,left:right] 421 | elements[xmin] = class_values[sequence_type] 422 | result['objects'].append({'values': class_values, 'coords': coords, 'global_coords': global_coords, 'norm': box, 'image': crop}) 423 | if instance_masks is not None: 424 | draw_mask_on_image_array( 425 | image, 426 | box_to_instance_masks_map[box], 427 | color=color 428 | ) 429 | draw_bounding_box_on_image_array( 430 | image, 431 | ymin, 432 | xmin, 433 | ymax, 434 | xmax, 435 | color=color, 436 | thickness=line_thickness, 437 | display_str_list=box_to_display_str_map[box], 438 | use_normalized_coordinates=use_normalized_coordinates) 439 | if keypoints is not None: 440 | draw_keypoints_on_image_array( 441 | image, 442 | box_to_keypoints_map[box], 443 | color=color, 444 | radius=line_thickness / 2, 445 | use_normalized_coordinates=use_normalized_coordinates) 446 | if sequence_sorted: 447 | for i in sorted(elements): 448 | result['sequence'] += elements[i] 449 | else: 450 | for i in elements: 451 | result['sequence'] += elements[i] 452 | return result 453 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | # pylint: disable=C0103 4 | # pylint: disable=E1101 5 | 6 | # python app.py 1 1 -1 video1.mp4 video1.mp4 7 | 8 | import os 9 | import sys 10 | import cv2 11 | import time 12 | import math 13 | import signal 14 | import youtube_dl 15 | import numpy as np 16 | import edit_distance 17 | import tensorflow as tf 18 | from imutils.video import FileVideoStream 19 | from imutils.video import FPS 20 | from utils import label_map_util 21 | from utils import visualization_utils_color as vis_util 22 | 23 | DEBUG_TIME = False 24 | DEBUG_ALPHA = False 25 | DEBUG_SKIPS = False 26 | out = None 27 | last_frame = None 28 | last_time = None 29 | out_fps = 30 30 | video_num = 0 31 | max_videos = 0 32 | video_path_1 = 0 33 | video_path_2 = 0 34 | download_list = [] 35 | download_item = None 36 | last_message = '' 37 | frames_skipped = 0 38 | recalculate_fps = False 39 | cv2.ocl.setUseOpenCL(False) 40 | 41 | def compare_videos(path_video_1, path_video_2): 42 | global detection_graph, from_frame, recalculate_fps, out 43 | PATH_TO_CKPT = './ssd_inception2.pb' 44 | PATH_TO_LABELS = './labels.pbtxt' 45 | thresh = 0.2 46 | sequence_sorted = False 47 | store_output = True 48 | enable_tracking = True 49 | enable_detection = True 50 | adjust_frame = True 51 | adjust_perspective = True 52 | enable_tracking_template = True 53 | only_use_template_when_none = True 54 | enable_objects_threshold = False 55 | at_least_one_match = False 56 | recalculate_time = 0 57 | sequence_type = 'char' 58 | descriptor = "surf" 59 | tracker_type = 'MEDIANFLOW' # 'BOOSTING','MIL','KCF','TLD','MEDIANFLOW','GOTURN' 60 | NUM_CLASSES = 90 61 | MIN_MATCH_COUNT = 10 62 | SIMILARITY_THRESHOLD = 0.1 63 | trackers = {} 64 | positions = {} 65 | source_frame = 0 66 | ok = None 67 | font = cv2.FONT_HERSHEY_SIMPLEX 68 | size = 1 69 | weight = 2 70 | color = (255,255,255) 71 | skips_max = 0 72 | skips_number = 0 73 | total_frames = 0 74 | 75 | last_message = '' 76 | label_map = label_map_util.load_labelmap(PATH_TO_LABELS) 77 | categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) 78 | category_index = label_map_util.create_category_index(categories) 79 | detection_graph = tf.Graph() 80 | with detection_graph.as_default(): 81 | od_graph_def = tf.GraphDef() 82 | with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: 83 | serialized_graph = fid.read() 84 | od_graph_def.ParseFromString(serialized_graph) 85 | tf.import_graph_def(od_graph_def, name='') 86 | with detection_graph.as_default(): 87 | config = tf.ConfigProto() 88 | config.gpu_options.allow_growth = True 89 | with tf.Session(graph=detection_graph, config=config) as sess: 90 | total_time_init = time.time() 91 | fourcc = cv2.VideoWriter_fourcc(*'mp4v') 92 | #sift = cv2.xfeatures2d.SIFT_create() 93 | surf = cv2.xfeatures2d.SURF_create() 94 | #fast = cv2.FastFeatureDetector_create() 95 | #orb = cv2.ORB_create() 96 | desc = surf 97 | show_points = 20 98 | video_1 = cv2.VideoCapture(path_video_1) 99 | fps_1 = video_1.get(cv2.CAP_PROP_FPS) 100 | if DEBUG_TIME: 101 | print('fps_1', fps_1) 102 | video_2 = cv2.VideoCapture(path_video_2) 103 | fps_2 = video_2.get(cv2.CAP_PROP_FPS) 104 | if DEBUG_TIME: 105 | print('fps_2', fps_2) 106 | out = None 107 | use_descriptor = True 108 | use_detection = False 109 | use_tracking = False 110 | matched_area = None 111 | frames_to_skip = 0 112 | processed_frames = 0 113 | from_frame_1 = int(sys.argv[1]) 114 | from_frame_2 = int(sys.argv[2]) 115 | video_1.set(cv2.CAP_PROP_POS_FRAMES, from_frame_1) 116 | video_2.set(cv2.CAP_PROP_POS_FRAMES, from_frame_2) 117 | _, frame_1 = video_1.read() 118 | objects_1 = detect_objects(frame_1, thresh, detection_graph, sess, category_index, sequence_sorted=sequence_sorted, sequence_type=sequence_type) 119 | sequence_1 = objects_1['sequence'] 120 | cv2.putText(frame_1, "skip: %s src: %s" % (processed_frames, sequence_1), (10, 30), font, size, color, weight) 121 | objects_2 = None 122 | area_2 = None 123 | sequence_2 = '' 124 | desc_kp_1, desc_des_1 = desc.detectAndCompute(frame_1, None) 125 | #print( desc.descriptorSize() ) 126 | #print( desc_des_1.shape ) 127 | until_end = False 128 | frame_num = int(sys.argv[3]) 129 | if frame_num == -1: 130 | until_end = True 131 | while frame_num or until_end: 132 | total_frames += 1 133 | if recalculate_fps: 134 | if at_least_one_match: 135 | to_frame = from_frame_1 + math.ceil((time.time() - recalculate_time) * fps_1) 136 | if to_frame >= frame_num: 137 | break 138 | frame_num -= 1 139 | ok, frame_2 = video_2.read() 140 | if not ok: 141 | break 142 | 143 | if use_tracking: 144 | sequence_tmp = '' 145 | for object_2 in objects_2['objects']: 146 | if object_2['coords'] in trackers: 147 | start_time = time.time() 148 | ok, box = trackers[object_2['coords']].update(frame_2) 149 | box = (int(box[0]), int(box[1]), int(box[2]), int(box[3])) 150 | elapsed_time = time.time() - start_time 151 | if DEBUG_TIME: 152 | print('tracking method', elapsed_time) 153 | if ok: 154 | sequence_tmp += object_2['values'][sequence_type] 155 | cv2.rectangle(frame_2, (box[0], box[2]), (box[1], box[3]), (255, 0, 0), 2) 156 | else: 157 | if enable_tracking_template: 158 | process_static = True 159 | if only_use_template_when_none: 160 | num_matches = get_sequence_matches(sequence_1, sequence_tmp) 161 | if num_matches > 0: 162 | process_static = False 163 | if process_static: 164 | start_time = time.time() 165 | res = cv2.matchTemplate(frame_2, object_2['image'], cv2.TM_CCOEFF_NORMED) 166 | elapsed_time = time.time() - start_time 167 | if DEBUG_TIME: 168 | print('tracking match', elapsed_time) 169 | min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) 170 | threshold = 0.8 171 | if max_val > threshold: 172 | sequence_tmp += object_2['values'][sequence_type] 173 | top_left = max_loc 174 | h, w, _ = object_2['image'].shape 175 | bottom_right = (top_left[0] + w, top_left[1] + h) 176 | cv2.rectangle(frame_2, top_left, bottom_right, (0, 255, 0), 2) 177 | 178 | cv2.putText(frame_2, "alp: %s" % (sequence_2), (10, 30), font, size, color, weight) 179 | num_matches = get_sequence_matches(sequence_1, sequence_tmp) 180 | if num_matches > 0: 181 | if DEBUG_ALPHA: 182 | print_once('eq: %s ref: %s new: %s' % (num_matches, sequence_1, sequence_tmp)) 183 | else: 184 | source_frame += processed_frames 185 | if DEBUG_SKIPS: 186 | print('skipped frames: %s' % (processed_frames)) 187 | skips_number += 1 188 | skips_max = processed_frames if processed_frames > skips_max else skips_max 189 | if not recalculate_fps: 190 | video_1.set(cv2.CAP_PROP_POS_FRAMES, from_frame_1 + source_frame) 191 | else: 192 | to_frame = from_frame_1 + math.ceil((time.time() - recalculate_time) * fps_1) 193 | video_1.set(cv2.CAP_PROP_POS_FRAMES, to_frame) 194 | ok, frame_1 = video_1.read() 195 | objects_1 = detect_objects(frame_1, thresh, detection_graph, sess, category_index, sequence_sorted=sequence_sorted, sequence_type=sequence_type) 196 | sequence_1 = objects_1['sequence'] 197 | cv2.putText(frame_1, "skip: %s src: %s" % (processed_frames, sequence_1), (10, 30), font, size, color, weight) 198 | desc_kp_1, desc_des_1 = desc.detectAndCompute(frame_1, None) 199 | use_tracking = False 200 | use_detection = True 201 | use_descriptor = False 202 | processed_frames = 0 203 | 204 | if use_detection: 205 | if adjust_frame: 206 | area_2 = frame_2[matched_area[0]:matched_area[1],matched_area[2]:matched_area[3]] 207 | else: 208 | area_2 = frame_2 209 | objects_2 = detect_objects(area_2, thresh, detection_graph, sess, category_index, matched_area=matched_area, sequence_sorted=sequence_sorted, sequence_type=sequence_type) 210 | sequence_2 = objects_2['sequence'] 211 | cv2.putText(frame_2, "alp: %s" % (sequence_2), (10, 30), font, size, color, weight) 212 | num_matches = get_sequence_matches(sequence_1, sequence_2) 213 | if DEBUG_ALPHA: 214 | print_once('eq: %s ref: %s new: %s' % (num_matches, sequence_1, sequence_2)) 215 | if num_matches > 0: 216 | trackers = {} 217 | were_coords_valid = False 218 | if enable_tracking: 219 | for object_2 in objects_2['objects']: 220 | if are_coords_valid(object_2['coords'], area_2.shape): 221 | trackers[object_2['coords']] = create_tracker(tracker_type) 222 | if adjust_frame: 223 | trackers[object_2['coords']].init(frame_2, object_2['global_coords']) 224 | else: 225 | trackers[object_2['coords']].init(frame_2, object_2['coords']) 226 | were_coords_valid = True 227 | if were_coords_valid: 228 | if enable_tracking: 229 | use_tracking = True 230 | use_detection = False 231 | use_descriptor = False 232 | else: 233 | use_tracking = False 234 | use_detection = True 235 | use_descriptor = False 236 | else: 237 | use_tracking = False 238 | use_detection = True 239 | use_descriptor = False 240 | else: 241 | use_tracking = False 242 | use_detection = False 243 | use_descriptor = True 244 | if not enable_tracking: 245 | source_frame += processed_frames 246 | if DEBUG_SKIPS: 247 | print('detector skipped frames: %s' % (processed_frames)) 248 | skips_number += 1 249 | skips_max = processed_frames if processed_frames > skips_max else skips_max 250 | if not recalculate_fps: 251 | video_1.set(cv2.CAP_PROP_POS_FRAMES, from_frame_1 + source_frame) 252 | else: 253 | to_frame = from_frame_1 + math.ceil((time.time() - recalculate_time) * fps_1) 254 | video_1.set(cv2.CAP_PROP_POS_FRAMES, to_frame) 255 | ok, frame_1 = video_1.read() 256 | cv2.putText(frame_1, "skip: %s" % (processed_frames), (10, 30), font, size, color, weight) 257 | desc_kp_1, desc_des_1 = desc.detectAndCompute(frame_1, None) 258 | processed_frames = 0 259 | 260 | if use_descriptor: 261 | matched_area = None 262 | descriptor_matched = False 263 | start_time = time.time() 264 | desc_kp_2, desc_des_2 = desc.detectAndCompute(frame_2, None) 265 | elapsed_time = time.time() - start_time 266 | if DEBUG_TIME: 267 | print(descriptor, elapsed_time) 268 | 269 | if descriptor == "sift" or descriptor == "surf" or descriptor == "fast": 270 | FLANN_INDEX_KDTREE = 1 271 | index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5) 272 | search_params = dict(checks=50) # or pass empty dictionary 273 | start_time = time.time() 274 | flann = cv2.FlannBasedMatcher(index_params, search_params) 275 | try: 276 | matches = flann.knnMatch(desc_des_1, desc_des_2, k=2) 277 | except: 278 | matches = [] 279 | elapsed_time = time.time() - start_time 280 | if DEBUG_TIME: 281 | print('FLANN', elapsed_time) 282 | good = [] 283 | for m,n in matches: 284 | if m.distance < 0.7*n.distance: 285 | good.append(m) 286 | area_2 = frame_2 287 | similarity = 0 288 | if len(matches) > 0: 289 | similarity = len(good) / len(matches) 290 | if len(good) > MIN_MATCH_COUNT: 291 | src_pts = np.float32([ desc_kp_1[m.queryIdx].pt for m in good ]).reshape(-1,1,2) 292 | dst_pts = np.float32([ desc_kp_2[m.trainIdx].pt for m in good ]).reshape(-1,1,2) 293 | M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0) 294 | matchesMask = mask.ravel().tolist() 295 | h,w,d = frame_1.shape 296 | pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2) 297 | try: 298 | dst = cv2.perspectiveTransform(pts,M) 299 | matched_area = get_rect_from_dst(dst, frame_2.shape) 300 | trans_coords = get_transformed_coords(dst, matched_area) 301 | frame_2 = cv2.polylines(frame_2,[np.int32(dst)],True,255,3, cv2.LINE_AA) 302 | calc_height = matched_area[1] - matched_area[0] 303 | calc_width = matched_area[3] - matched_area[2] 304 | frame_height = frame_2.shape[0] 305 | frame_width = frame_2.shape[1] 306 | sim_rate = 1 + (((1 - (calc_height / frame_height)) + (1 - (calc_width / frame_width))) / 2) 307 | similarity *= sim_rate 308 | if similarity > SIMILARITY_THRESHOLD: 309 | descriptor_matched = True 310 | except: 311 | pass 312 | else: 313 | if DEBUG_TIME: 314 | print( "Not enough matches were found - {}/{}".format(len(good), MIN_MATCH_COUNT) ) 315 | matchesMask = None 316 | 317 | if not descriptor_matched: 318 | if at_least_one_match: 319 | source_frame += processed_frames 320 | if DEBUG_SKIPS: 321 | print('descriptor skipped frames: %s' % (processed_frames)) 322 | skips_number += 1 323 | skips_max = processed_frames if processed_frames > skips_max else skips_max 324 | if not recalculate_fps: 325 | video_1.set(cv2.CAP_PROP_POS_FRAMES, from_frame_1 + source_frame) 326 | else: 327 | to_frame = from_frame_1 + math.ceil((time.time() - recalculate_time) * fps_1) 328 | video_1.set(cv2.CAP_PROP_POS_FRAMES, to_frame) 329 | ok, frame_1 = video_1.read() 330 | cv2.putText(frame_1, "skip: %s" % (processed_frames), (10, 30), font, size, color, weight) 331 | desc_kp_1, desc_des_1 = desc.detectAndCompute(frame_1, None) 332 | processed_frames = 0 333 | else: 334 | if not at_least_one_match and recalculate_fps: 335 | recalculate_time = time.time() 336 | at_least_one_match = True 337 | if enable_detection: 338 | if adjust_frame: 339 | if is_matched_area_okay(trans_coords, frame_2.shape): 340 | area_2 = frame_2[matched_area[0]:matched_area[1],matched_area[2]:matched_area[3]] 341 | area_2 = cv2.polylines(area_2,[np.array(trans_coords)],True,255,3, cv2.LINE_AA) 342 | if adjust_perspective: 343 | area_2 = four_point_transform(area_2, trans_coords) 344 | else: 345 | area_2 = frame_2 346 | else: 347 | area_2 = frame_2 348 | objects_2 = detect_objects(area_2, thresh, detection_graph, sess, category_index, matched_area=matched_area, sequence_sorted=sequence_sorted, sequence_type=sequence_type) 349 | sequence_2 = objects_2['sequence'] 350 | cv2.putText(frame_2, "alp: %s" % (sequence_2), (10, 30), font, size, color, weight) 351 | num_matches = get_sequence_matches(sequence_1, sequence_2) 352 | if DEBUG_ALPHA: 353 | print_once('eq: %s ref: %s new: %s' % (num_matches, sequence_1, sequence_2)) 354 | if num_matches > 0: 355 | use_descriptor = False 356 | use_detection = True 357 | use_tracking = False 358 | else: 359 | source_frame += processed_frames 360 | if DEBUG_SKIPS: 361 | print('descriptor detector skipped frames: %s' % (processed_frames)) 362 | skips_number += 1 363 | skips_max = processed_frames if processed_frames > skips_max else skips_max 364 | if not recalculate_fps: 365 | video_1.set(cv2.CAP_PROP_POS_FRAMES, from_frame_1 + source_frame) 366 | else: 367 | to_frame = from_frame_1 + math.ceil((time.time() - recalculate_time) * fps_1) 368 | video_1.set(cv2.CAP_PROP_POS_FRAMES, to_frame) 369 | ok, frame_1 = video_1.read() 370 | desc_kp_1, desc_des_1 = desc.detectAndCompute(frame_1, None) 371 | objects_1 = detect_objects(frame_1, thresh, detection_graph, sess, category_index, sequence_sorted=sequence_sorted, sequence_type=sequence_type) 372 | sequence_1 = objects_1['sequence'] 373 | processed_frames = 0 374 | use_descriptor = True 375 | use_detection = False 376 | use_tracking = False 377 | 378 | #matches_img = cv2.drawMatches(frame_1, desc_kp_1, frame_2, desc_kp_2, good, None, **draw_params) 379 | 380 | if at_least_one_match: 381 | processed_frames += 1 382 | if matchesMask is None: 383 | matchesMask = [] 384 | draw_params = dict( 385 | matchesMask = matchesMask[:show_points], # draw only inliers 386 | flags = 2) 387 | #print("%s of %s rate %s" % (len(good), len(matches), len(good)/len(matches))) 388 | try: 389 | matches_img = cv2.drawMatches(frame_1, desc_kp_1, frame_2, desc_kp_2, good[:show_points], None, **draw_params) 390 | except: 391 | matches_img = frame_1 392 | if store_output: 393 | if out == None: 394 | out = cv2.VideoWriter('out.avi', fourcc, 30.0, (matches_img.shape[1], matches_img.shape[0]), True) 395 | if not recalculate_fps: 396 | out.write(matches_img) 397 | else: 398 | video_insert(matches_img) 399 | cv2.imshow("Matches", matches_img) 400 | cv2.waitKey(1) 401 | if store_output: 402 | if out is not None: 403 | if recalculate_fps: 404 | video_close() 405 | out.release() 406 | print('--- STATS ---') 407 | total_time = time.time() - total_time_init 408 | print('TOTAL TIME: ', total_time) 409 | print('TOTAL FRAMES: ', total_frames) 410 | print('SKIPS NUMBER: ', skips_number) 411 | print('MAX SKIP: ', skips_max) 412 | 413 | def video_insert(frame): 414 | global out, last_frame, last_time 415 | if last_time is None: 416 | last_time = time.time() 417 | else: 418 | num_frames = math.floor((time.time() - last_time) * out_fps) 419 | last_time = time.time() 420 | for i in range(num_frames): 421 | out.write(last_frame) 422 | last_frame = frame 423 | 424 | def video_close(): 425 | global out, last_frame, last_time 426 | num_frames = math.floor((time.time() - last_time) * out_fps) 427 | for i in range(num_frames): 428 | out.write(last_frame) 429 | 430 | def is_matched_area_okay(matched_area, frame_2_shape): 431 | return True 432 | 433 | def print_once(message): 434 | global last_message 435 | if message != last_message: 436 | last_message = message 437 | print(last_message) 438 | 439 | def youtube_download_hook(download): 440 | global download_item 441 | if download["status"] == "finished": 442 | print(download["filename"]) 443 | video_num = download_item['index'] 444 | os.rename(download["filename"], "internet%s.mp4" % (video_num)) 445 | continue_downloads() 446 | 447 | def load_from_youtube(video): 448 | ydl_opts = {"format": "mp4", "progress_hooks": [youtube_download_hook]} 449 | youtube_dl.YoutubeDL(ydl_opts).download([video]) 450 | 451 | def get_and_compare_videos(path_1, path_2, skip=False): 452 | global video_path_1, video_path_2, max_videos, download_list, recalculate_fps 453 | need_download = False 454 | video_path_1 = path_1 455 | if 'http' in path_1: 456 | download_list.append({'source': path_1, 'index': 1}) 457 | video_path_1 = 'internet1.mp4' 458 | elif path_1 == '0': 459 | video_path_1 = 0 460 | recalculate_fps = True 461 | video_path_2 = path_2 462 | if 'http' in path_2: 463 | download_list.append({'source': path_2, 'index': 2}) 464 | video_path_2 = 'internet2.mp4' 465 | elif path_2 == '0': 466 | video_path_2 = 0 467 | recalculate_fps = True 468 | if len(download_list) == 0: 469 | compare_videos(video_path_1, video_path_2) 470 | else: 471 | continue_downloads() 472 | 473 | def continue_downloads(): 474 | global download_list, download_item, video_path_1, video_path_2 475 | if len(download_list) > 0: 476 | download_item = download_list.pop(0) 477 | load_from_youtube(download_item['source']) 478 | else: 479 | compare_videos(video_path_1, video_path_2) 480 | 481 | def create_tracker(tracker_type): 482 | if tracker_type == 'BOOSTING': 483 | return cv2.TrackerBoosting_create() 484 | elif tracker_type == 'MIL': 485 | return cv2.TrackerMIL_create() 486 | elif tracker_type == 'KCF': 487 | return cv2.TrackerKCF_create() 488 | elif tracker_type == 'TLD': 489 | return cv2.TrackerTLD_create() 490 | elif tracker_type == 'MEDIANFLOW': 491 | return cv2.TrackerMedianFlow_create() 492 | elif tracker_type == 'GOTURN': 493 | return cv2.TrackerGOTURN_create() 494 | else: 495 | return cv2.TrackerKCF_create() 496 | 497 | def are_coords_valid(box, orig): 498 | threshold = 0.8 499 | calc_height = ((box[1] - box[0])/orig[1]) 500 | calc_width = ((box[3] - box[2])/orig[0]) 501 | if calc_height >= threshold and calc_width >= threshold: 502 | return False 503 | return True 504 | 505 | def detect_objects(image, thresh, detection_graph, sess, category_index, matched_area=None, sequence_sorted=False, sequence_type='char'): 506 | image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 507 | image_np_expanded = np.expand_dims(image_np, axis=0) 508 | image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') 509 | boxes = detection_graph.get_tensor_by_name('detection_boxes:0') 510 | scores = detection_graph.get_tensor_by_name('detection_scores:0') 511 | classes = detection_graph.get_tensor_by_name('detection_classes:0') 512 | num_detections = detection_graph.get_tensor_by_name('num_detections:0') 513 | start_time = time.time() 514 | if image_np_expanded[0] is not None: 515 | (boxes, scores, classes, num_detections) = sess.run( 516 | [boxes, scores, classes, num_detections], 517 | feed_dict={image_tensor: image_np_expanded}) 518 | elapsed_time = time.time() - start_time 519 | if DEBUG_TIME: 520 | print('cnn', elapsed_time) 521 | box = vis_util.visualize_boxes_and_labels_on_image_array( 522 | image, 523 | np.squeeze(boxes), 524 | np.squeeze(classes).astype(np.int32), 525 | np.squeeze(scores), 526 | category_index, 527 | min_score_thresh=thresh, 528 | use_normalized_coordinates=True, 529 | line_thickness=4, 530 | sequence_sorted=sequence_sorted, 531 | sequence_type=sequence_type, 532 | matched_area=matched_area) 533 | else: 534 | box = {'sequence': '', 'objects': []} 535 | return box 536 | 537 | def get_sequence_matches(sequence_1, sequence_2): 538 | if sequence_1 and sequence_2: 539 | sm = edit_distance.SequenceMatcher(a=sequence_1, b=sequence_2) 540 | sm.get_opcodes() 541 | sm.ratio() 542 | sm.get_matching_blocks() 543 | distance = sm.distance() 544 | num_matches = sm.matches() 545 | return num_matches 546 | else: 547 | return 0 548 | 549 | def get_rect_from_dst(dst, orig): 550 | top = int(dst[0][0][1]) if dst[0][0][1] < dst[3][0][1] else int(dst[3][0][1]) 551 | bottom = int(dst[1][0][1]) if dst[1][0][1] > dst[2][0][1] else int(dst[2][0][1]) 552 | left = int(dst[0][0][0]) if dst[0][0][0] < dst[1][0][0] else int(dst[1][0][0]) 553 | right = int(dst[2][0][0]) if dst[2][0][0] > dst[3][0][0] else int(dst[3][0][0]) 554 | top = 0 if top < 0 else top 555 | left = 0 if left < 0 else left 556 | bottom = orig[0] if bottom > orig[0] else bottom 557 | right = orig[1] if right > orig[1] else right 558 | return (top, bottom, left, right) 559 | 560 | def get_area_coords(dts): 561 | (top, bottom, left, right) = matched_area 562 | tl = (int(dst[0][0][0]), int(dst[0][0][1])) 563 | tr = (int(dst[3][0][0]), int(dst[3][0][1])) 564 | bl = (int(dst[1][0][0]), int(dst[1][0][1])) 565 | br = (int(dst[2][0][0]), int(dst[2][0][1])) 566 | return [tl, tr, br, bl] 567 | 568 | def get_transformed_coords(dst, matched_area): 569 | (top, bottom, left, right) = matched_area 570 | tl = (-(left - int(dst[0][0][0])), -(top - int(dst[0][0][1]))) 571 | tr = ((int(dst[3][0][0]) - left), -(top - int(dst[3][0][1]))) 572 | bl = (-(left - int(dst[1][0][0])), int(dst[1][0][1]) - top) 573 | br = ((int(dst[2][0][0]) - left), (int(dst[2][0][1]) - top)) 574 | return [tl, tr, br, bl] 575 | 576 | def order_points(pts): 577 | # initialzie a list of coordinates that will be ordered 578 | # such that the first entry in the list is the top-left, 579 | # the second entry is the top-right, the third is the 580 | # bottom-right, and the fourth is the bottom-left 581 | rect = np.zeros((4, 2), dtype = "float32") 582 | 583 | # the top-left point will have the smallest sum, whereas 584 | # the bottom-right point will have the largest sum 585 | s = pts.sum(axis = 1) 586 | rect[0] = pts[np.argmin(s)] 587 | rect[2] = pts[np.argmax(s)] 588 | 589 | # now, compute the difference between the points, the 590 | # top-right point will have the smallest difference, 591 | # whereas the bottom-left will have the largest difference 592 | diff = np.diff(pts, axis = 1) 593 | rect[1] = pts[np.argmin(diff)] 594 | rect[3] = pts[np.argmax(diff)] 595 | 596 | # return the ordered coordinates 597 | return rect 598 | 599 | def four_point_transform(image, pts): 600 | # obtain a consistent order of the points and unpack them 601 | # individually 602 | pts = np.array(pts) 603 | rect = order_points(pts) 604 | # rect = np.array(pts) 605 | (tl, tr, br, bl) = rect 606 | 607 | # compute the width of the new image, which will be the 608 | # maximum distance between bottom-right and bottom-left 609 | # x-coordiates or the top-right and top-left x-coordinates 610 | widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) 611 | widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) 612 | maxWidth = max(int(widthA), int(widthB)) 613 | 614 | # compute the height of the new image, which will be the 615 | # maximum distance between the top-right and bottom-right 616 | # y-coordinates or the top-left and bottom-left y-coordinates 617 | heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) 618 | heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) 619 | maxHeight = max(int(heightA), int(heightB)) 620 | 621 | # now that we have the dimensions of the new image, construct 622 | # the set of destination points to obtain a "birds eye view", 623 | # (i.e. top-down view) of the image, again specifying points 624 | # in the top-left, top-right, bottom-right, and bottom-left 625 | # order 626 | dst = np.array([ 627 | [0, 0], 628 | [maxWidth - 1, 0], 629 | [maxWidth - 1, maxHeight - 1], 630 | [0, maxHeight - 1]], dtype = "float32") 631 | 632 | # compute the perspective transform matrix and then apply it 633 | M = cv2.getPerspectiveTransform(rect, dst) 634 | try: 635 | warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) 636 | return warped 637 | except: 638 | return image 639 | # return the warped image 640 | 641 | 642 | def compare_2d_color_images(frame_1, frame_2): 643 | start_time = time.time() 644 | matches_num = 0 645 | height = frame_1.shape[0] 646 | width = frame_1.shape[1] 647 | size = width * height 648 | for i in range(height): 649 | for j in range(width): 650 | if frame_1[i][j][0] == frame_2[i][j][0] and frame_1[i][j][1] == frame_2[i][j][1] and frame_1[i][j][2] == frame_2[i][j][2]: 651 | matches_num += 1 652 | rate = matches_num / size 653 | elapsed_time = time.time() - start_time 654 | if DEBUG_TIME: 655 | print('iterate_2d', elapsed_time) 656 | 657 | def compare_2d_gray_images(frame_1, frame_2): 658 | start_time = time.time() 659 | matches_num = 0 660 | gray_1 = cv2.cvtColor(frame_1, cv2.COLOR_BGR2GRAY) 661 | gray_2 = cv2.cvtColor(frame_2, cv2.COLOR_BGR2GRAY) 662 | height = gray_1.shape[0] 663 | width = gray_1.shape[1] 664 | size = width * height 665 | for i in range(height): 666 | for j in range(width): 667 | if gray_1[i][j] == gray_2[i][j]: 668 | matches_num += 1 669 | rate = matches_num / size 670 | elapsed_time = time.time() - start_time 671 | if DEBUG_TIME: 672 | print('iterate_2d', elapsed_time) 673 | 674 | def compare_1d_gray_images(frame_1, frame_2): 675 | start_time = time.time() 676 | matches_num = 0 677 | gray_1 = cv2.cvtColor(frame_1, cv2.COLOR_BGR2GRAY) 678 | flat_1 = [j for i in gray_1 for j in i] 679 | gray_2 = cv2.cvtColor(frame_2, cv2.COLOR_BGR2GRAY) 680 | flat_2 = [j for i in gray_2 for j in i] 681 | size = len(flat_1) 682 | for i in range(size): 683 | if flat_1[i] == flat_2[i]: 684 | matches_num += 1 685 | rate = matches_num / size 686 | elapsed_time = time.time() - start_time 687 | if DEBUG_TIME: 688 | print('iterate_1d', elapsed_time) 689 | 690 | #load_from_youtube() 691 | get_and_compare_videos(sys.argv[4], sys.argv[5]) 692 | 693 | # 694 | """ 695 | import sys 696 | import time 697 | import numpy as np 698 | import tensorflow as tf 699 | import cv2 700 | import PIL.Image as Image 701 | 702 | sys.path.append("..") 703 | 704 | from utils import label_map_util 705 | from utils import visualization_utils_color as vis_util 706 | 707 | def find_homography(kp1, des1, kp2, des2): 708 | bf = cv2.BFMatcher(cv2.NORM_L2) 709 | # Match descriptors. 710 | matches = bf.knnMatch(des1,des2,k=2) 711 | # Apply ratio test 712 | good = [] 713 | for m,n in matches: 714 | if m.distance < 0.9*n.distance: 715 | good.append(m) 716 | pts1 = [] 717 | pts2 = [] 718 | for elem in good: 719 | pts1.append(kp1[elem.queryIdx].pt) 720 | pts2.append(kp2[elem.trainIdx].pt) 721 | pts1 = np.array(pts1) 722 | pts2 = np.array(pts2) 723 | M, mask = cv2.findHomography(pts1, pts2, cv2.RANSAC,5.0) 724 | count_inliers = np.count_nonzero(mask) 725 | #print('Number of inliers: ', np.count_nonzero(mask)) 726 | return count_inliers, M 727 | 728 | frames = [] 729 | 730 | def image_alg(image, box): 731 | border = 0.2 732 | im_height = len(image) 733 | im_width = len(image[0]) 734 | (ymin, xmin, ymax, xmax) = (box[0], box[1], box[2], box[3]) 735 | (left, right, top, bottom) = (int(xmin * im_width), int(xmax * im_width), 736 | int(ymin * im_height), int(ymax * im_height)) 737 | #print((left, right, top, bottom)) 738 | border_height = (bottom - top) * border 739 | top = 0 if (top - border_height) < 0 else (top - border_height) 740 | bottom = im_height if (bottom + border_height) > im_height else (bottom + border_height) 741 | scale_y = im_height/(bottom - top) 742 | output = cv2.resize(image, (0,0), fy=scale_y, fx=scale_y) 743 | (xleft, xright, xtop, xbottom, xim_width) = (int(left*scale_y), int(right*scale_y), 744 | int(top*scale_y), int(bottom*scale_y), 745 | int(im_width*scale_y)) 746 | extra_width = (im_width - (xright - xleft)) // 2 747 | new_left = 0 if (xleft - extra_width) < 0 else (xleft - extra_width) 748 | new_right = xim_width if (xright + extra_width) > xim_width else (xright + extra_width) 749 | output = output[xtop:xbottom, new_left:new_right] 750 | #output = image[top:bottom, left:right] 751 | #output = cv2.resize(output, (0,0), fy=scale_y, fx=scale_y) 752 | return output 753 | 754 | def image_stab(image): 755 | global frames 756 | WINDOW_SIZE = 15 757 | skip = 1 # speedup -- set 1 for original speed 758 | resize = 0.5 #scale video resolution 759 | frames = [] 760 | mean_homographies = [] 761 | median_homographies = [] 762 | corrected_frames = [] 763 | i = 0 764 | 765 | frames.append(image) 766 | if len(frames) > 20: 767 | frames = frames[1:] 768 | 769 | orb = cv2.xfeatures2d.SIFT_create(nfeatures=1000) 770 | # orb = cv2.FeatureDetector_create("SIFT") 771 | # orb = cv2.SIFT_create(nfeatures=1000) 772 | # orb = cv2.SIFT(nfeatures=1000) 773 | 774 | vec_kps = [] 775 | vec_descs = [] 776 | 777 | #print('extracting keypoints...') 778 | 779 | for i in range(len(frames)): 780 | # find the keypoints and descriptors 781 | kp1, des1 = orb.detectAndCompute(frames[i],None) 782 | 783 | vec_kps.append(kp1) 784 | vec_descs.append(des1) 785 | 786 | #print('Frame %d/%d: found %d keypoints'% (i,len(frames),len(kp1))) 787 | 788 | 789 | 790 | for i in range(len(frames)): 791 | mean_H = np.zeros((3,3), dtype='float64') 792 | median_H = [] 793 | mean_C = 0 794 | median_vals = [] 795 | k = int(WINDOW_SIZE/2.0)+1 796 | for j in range(1,k,1): #for each couple neighbor frames iterated by distance 797 | if i-j >= 0 and i+j < len(frames): 798 | inliers_c, H = find_homography(vec_kps[i],vec_descs[i], vec_kps[i-j], vec_descs[i-j]) 799 | inliers_c2, H2 = find_homography(vec_kps[i],vec_descs[i], vec_kps[i+j], vec_descs[i+j]) 800 | #print('pair (%d,%d) has %d inliers'% (i,i-j,inliers_c)) 801 | #print('pair (%d,%d) has %d inliers'% (i,i+j,inliers_c2)) 802 | if inliers_c > 80 and inliers_c2 > 80: #ensures that neighbors are equally selected by distance to correctly balance the homography 803 | mean_H = mean_H + H 804 | mean_H = mean_H + H2 805 | mean_C+=2 806 | 807 | if mean_C > 0: 808 | mean_homographies.append(mean_H/mean_C) # Mean homography 809 | else: 810 | mean_homographies.append(np.eye(3, dtype='float64')) 811 | 812 | #print mean_H/mean_C 813 | #print median_vals 814 | #raw_input() 815 | 816 | #fourcc = cv2.cv.CV_FOURCC('D','I','V','X') 817 | #fourcc = cv2.cv.CV_FOURCC('R','G','B',' ') 818 | #fourcc = cv2.cv.CV_FOURCC('Y','U','Y','2') 819 | #fourcc = cv2.cv.CV_FOURCC('Y','U','Y','U') 820 | #fourcc = cv2.cv.CV_FOURCC('U','Y','V','Y') 821 | #fourcc = cv2.cv.CV_FOURCC('I','4','2','0') 822 | #fourcc = cv2.cv.CV_FOURCC('I','Y','U','V') 823 | #fourcc = cv2.cv.CV_FOURCC('Y','U','1','2') 824 | #fourcc = cv2.cv.CV_FOURCC('Y','8','0','0') 825 | #fourcc = cv2.cv.CV_FOURCC('G','R','E','Y') 826 | #fourcc = cv2.cv.CV_FOURCC('B','Y','8',' ') 827 | #fourcc = cv2.cv.CV_FOURCC('Y','1','6',' ') 828 | 829 | #fourcc = cv2.cv.CV_FOURCC('M','J','P','G') 830 | #fourcc = cv2.cv.CV_FOURCC('M','P','E','G') 831 | 832 | crop_x = 80 833 | crop_y = 60 834 | 835 | size = (frames[0].shape[1]-crop_x*2, frames[0].shape[0]-crop_y*2) 836 | 837 | #fourcc = cv2.VideoWriter_fourcc('X','V','I','D') 838 | #out = cv2.VideoWriter(file+'__estabilizado.avi',fourcc,30.0,size)#cv2.VideoWriter('stab.mp4',-1, 30.0, (frames[0].shape[0], frames[0].shape[1])) 839 | 840 | #for i in range(len(frames)): 841 | #corrected = cv2.warpPerspective(frames[i],mean_homographies[i],(0,0)) 842 | #cv2.imshow('video corrected', corrected) 843 | #cv2.waitKey(1) 844 | #new_img = corrected[crop_y:frames[0].shape[0]-crop_y, crop_x:frames[0].shape[1]-crop_x] 845 | #out.write(new_img) 846 | #out.write(corrected[crop_y:frames[0].shape[0]-crop_y, crop_x:frames[0].shape[1]-crop_x]) 847 | 848 | corrected = cv2.warpPerspective(frames[-1],mean_homographies[-1],(0,0)) 849 | new_img = corrected[crop_y:frames[0].shape[0]-crop_y, crop_x:frames[0].shape[1]-crop_x] 850 | return new_img 851 | 852 | 853 | # Path to frozen detection graph. This is the actual model that is used for the object detection. 854 | PATH_TO_CKPT = './model/frozen_inference_graph_face.pb' 855 | 856 | # List of the strings that is used to add correct label for each box. 857 | PATH_TO_LABELS = './protos/face_label_map.pbtxt' 858 | 859 | NUM_CLASSES = 2 860 | 861 | label_map = label_map_util.load_labelmap(PATH_TO_LABELS) 862 | categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) 863 | category_index = label_map_util.create_category_index(categories) 864 | 865 | def load_image_into_numpy_array(image): 866 | (im_width, im_height) = image.size 867 | return np.array(image.getdata()).reshape( 868 | (im_height, im_width, 3)).astype(np.uint8) 869 | 870 | #cap = cv2.VideoCapture("./media/test.mp4") 871 | cap = cv2.VideoCapture(0) 872 | cap.open(0) 873 | # time.sleep(2.0) 874 | out = None 875 | 876 | detection_graph = tf.Graph() 877 | with detection_graph.as_default(): 878 | od_graph_def = tf.GraphDef() 879 | with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: 880 | serialized_graph = fid.read() 881 | od_graph_def.ParseFromString(serialized_graph) 882 | tf.import_graph_def(od_graph_def, name='') 883 | 884 | with detection_graph.as_default(): 885 | config = tf.ConfigProto() 886 | config.gpu_options.allow_growth = True 887 | with tf.Session(graph=detection_graph, config=config) as sess: 888 | frame_num = 100; 889 | while frame_num: 890 | frame_num -= 1 891 | ret, image = cap.read() 892 | if ret == 0: 893 | break 894 | 895 | if out is None: 896 | [h, w] = image.shape[:2] 897 | out = cv2.VideoWriter("./media/test_out.avi", cv2.VideoWriter_fourcc(*'H264'), 25.0, (w, h)) 898 | 899 | image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 900 | 901 | # the array based representation of the image will be used later in order to prepare the 902 | # result image with boxes and labels on it. 903 | # Expand dimensions since the model expects images to have shape: [1, None, None, 3] 904 | image_np_expanded = np.expand_dims(image_np, axis=0) 905 | image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') 906 | # Each box represents a part of the image where a particular object was detected. 907 | boxes = detection_graph.get_tensor_by_name('detection_boxes:0') 908 | # Each score represent how level of confidence for each of the objects. 909 | # Score is shown on the result image, together with the class label. 910 | scores = detection_graph.get_tensor_by_name('detection_scores:0') 911 | classes = detection_graph.get_tensor_by_name('detection_classes:0') 912 | num_detections = detection_graph.get_tensor_by_name('num_detections:0') 913 | # Actual detection. 914 | start_time = time.time() 915 | (boxes, scores, classes, num_detections) = sess.run( 916 | [boxes, scores, classes, num_detections], 917 | feed_dict={image_tensor: image_np_expanded}) 918 | elapsed_time = time.time() - start_time 919 | #print('inference time cost: {}'.format(elapsed_time)) 920 | #print(boxes.shape, boxes) 921 | #print(scores.shape,scores) 922 | #print(classes.shape,classes) 923 | #print(num_detections) 924 | # Visualization of the results of a detection. 925 | box = vis_util.visualize_boxes_and_labels_on_image_array( 926 | # image_np, 927 | image, 928 | np.squeeze(boxes), 929 | np.squeeze(classes).astype(np.int32), 930 | np.squeeze(scores), 931 | category_index, 932 | use_normalized_coordinates=True, 933 | line_thickness=4) 934 | 935 | #cv2.imshow('Video', image) 936 | if len(box) > 0: 937 | stab = image_alg(image, box[0]) 938 | cv2.imshow('Stab', stab) 939 | out.write(stab) 940 | cv2.waitKey(1) 941 | 942 | 943 | cap.release() 944 | out.release() 945 | 946 | #sift_kp_1, sift_des_1 = sift.detectAndCompute(frame_1, None) 947 | #surf_kp_1, surf_des_1 = surf.detectAndCompute(frame_1, None) 948 | #orb_kp_1, orb_des_1 = orb.detectAndCompute(frame_1, None) 949 | #fast_kp_1, fast_des_1 = fast.detectAndCompute(frame_1, None) 950 | #print( sift.descriptorSize() ) 951 | #print( surf.descriptorSize() ) 952 | #print( orb.descriptorSize() ) 953 | #print( sift_des_1.shape ) 954 | #print( surf_des_1.shape ) 955 | #print( orb_des_1.shape ) 956 | #sift_kp_2, sift_des_2 = sift.detectAndCompute(frame_2, None) 957 | #surf_kp_2, surf_des_2 = surf.detectAndCompute(frame_2, None) 958 | #orb_kp_2, orb_des_2 = orb.detectAndCompute(frame_2, None) 959 | #fast_kp_2, fast_des_2 = orb.detectAndCompute(frame_2, None) 960 | #print(len(matches)) 961 | #matches_img = cv2.drawMatchesKnn(frame_1, desc_kp_1, frame_2, desc_kp_2, matches[:10], None) 962 | 963 | matchesMask = [[0,0] for i in range(len(matches))] # ratio test as per Lowe's paper 964 | for i,(m,n) in enumerate(matches): 965 | if m.distance < 0.7*n.distance: 966 | matchesMask[i] = [1,0] 967 | draw_params = dict(matchColor = (0,255,0), 968 | singlePointColor = (255,0,0), 969 | matchesMask = matchesMask[:10], 970 | flags = 0) 971 | matches_img = cv2.drawMatchesKnn(frame_1, desc_kp_1, frame_2, desc_kp_2, matches[:10], None, **draw_params) 972 | 973 | start_time = time.time() 974 | bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True) # create BFMatcher object 975 | matches = bf.match(desc_des_1, desc_des_2) # Match descriptors. 976 | matches = sorted(matches, key = lambda x:x.distance) # Sort them in the order of their distance. 977 | elapsed_time = time.time() - start_time 978 | print('BFMatcher', elapsed_time) 979 | print(len(matches)) 980 | matches_img = cv2.drawMatches(frame_1, desc_kp_1, frame_2, desc_kp_2, matches[:10], None, flags=2) 981 | 982 | #compare_2d_color_images(frame_1, frame_2) 983 | #compare_2d_gray_images(frame_1, frame_2) 984 | 985 | res = cv2.matchTemplate(gray_1, gray_2, cv2.TM_CCOEFF_NORMED) 986 | _, max_val, _, max_loc = cv2.minMaxLoc(res) 987 | print(max_val) 988 | threshold = 0.8 989 | if max_val > threshold: 990 | print("FOUND") 991 | 992 | #cv2.imshow("image 1", gray_1) 993 | #cv2.imshow("image 2", gray_2) 994 | 995 | elif descriptor == "orb": 996 | FLANN_INDEX_LSH = 6 997 | index_params = dict(algorithm = FLANN_INDEX_LSH, 998 | table_number = 6, # 12 999 | key_size = 12, # 20 1000 | multi_probe_level = 1) #2 1001 | # index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5) 1002 | search_params = dict(checks=50) # or pass empty dictionary 1003 | flann = cv2.FlannBasedMatcher(index_params, search_params) 1004 | matches = flann.knnMatch(desc_des_1, desc_des_2, k=2) 1005 | good = [] 1006 | for m,n in matches: 1007 | if m.distance < 0.7*n.distance: 1008 | good.append(m) 1009 | print(len(good)) 1010 | matches_img = cv2.drawMatches(frame_1, desc_kp_1, frame_2, desc_kp_2, good[:show_points], None) 1011 | """ 1012 | 1013 | 1014 | # Resolution: 1920 x 1080 1015 | # 1016 | # 2D color 1017 | # Operations = 1920 x 1080 x 3 1018 | # Time = 2.372 1019 | # 1020 | # 2D gray 1021 | # Operations = 1920 x 1080 1022 | # Time = 0.521 1023 | # 1024 | # SURF 1025 | # Time = 0.243 1026 | # 1027 | # CNN 1028 | # Time = 0.087 1029 | # 1030 | # Tracking 1031 | # Time = 0.003 1032 | # 1033 | # FLANN 1034 | # Size 1 = 4542 1035 | # Size 2 = 4117 1036 | # Time = 0.109 1037 | # 1038 | # BFMatcher 1039 | # Size 1 = 4542 1040 | # Size 2 = 4117 1041 | # Time = 0.164 --------------------------------------------------------------------------------