├── protos
    ├── __init__.py
    ├── face_label_map.pbtxt
    └── string_int_label_map_pb2.py
├── utils
    ├── __init__.py
    ├── label_map_util.py
    └── visualization_utils_color.py
├── image1.png
├── image2.png
├── requirements.txt
├── README.md
├── LICENSE.md
├── .gitignore
├── labels.pbtxt
└── app.py


/protos/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/image1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toxtli/deepiracy/HEAD/image1.png


--------------------------------------------------------------------------------
/image2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toxtli/deepiracy/HEAD/image2.png


--------------------------------------------------------------------------------
/protos/face_label_map.pbtxt:
--------------------------------------------------------------------------------
 1 | item {
 2 |   id: 2
 3 |   name: 'background'
 4 | }
 5 | 
 6 | item {
 7 |   id: 1
 8 |   name: 'face'
 9 | }
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | imutils==0.4.5
 2 | numpy==1.14.1
 3 | youtube_dl==2018.3.20
 4 | tensorflow==1.6.0
 5 | six==1.10.0
 6 | opencv-contrib-python
 7 | edit_distance==1.0.1
 8 | Pillow==5.1.0
 9 | protobuf==3.5.2.post1
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deepiracy
 2 | 
 3 | Deepary is a tool that is able to find a source video on a taget video. It is capable to detect subsequences of the source video, even if it is highly distorted. You can read a complete explanation of how this work by reading [this article](https://medium.com/hci-wvu/piracy-detection-using-longest-common-subsequence-and-neural-networks-a6f689a541a6).
 4 | 
 5 | ## Installation
 6 | 
 7 | Run
 8 | 
 9 | > pip install -r requirements.txt
10 | 
11 | ## Quickstart
12 | 
13 | Parameters:
14 | 
15 | python app.py [from_frame_number] [to_frame_number] [how_many_frames] [video_path_1_or_url] [video_path_2_or_url]
16 | 
17 | Example how to run it with files
18 | 
19 | > python app.py 1 1 -1 video1.mp4 video1.mp4
20 | 
21 | Example how to run it with youtube URLs
22 | 
23 | > python app.py 1 1 -1 https://www.youtube.com/watch?v=E5K_Ug0Gq0Y https://www.youtube.com/watch?v=E5K_Ug0Gq0Y
24 | 
25 | Example how to run the real-time detection from webcam
26 | 
27 | > python app.py 1 1 -1 video1.mp4 0
28 | 
29 | The required files can be downloaded from here:
30 | 
31 | https://drive.google.com/drive/folders/1BPR6j-3xc0NnlbmO96LD55tRV7e07Ynp?usp=sharing
32 | 
33 | The results can be found here:
34 | 
35 | https://drive.google.com/open?id=1iyquDYv1o48mA_ymI7AEjrlZZtqXOOz1
36 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org>
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.mp4
  2 | *.pb
  3 | *.avi
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # pyenv
 80 | .python-version
 81 | 
 82 | # celery beat schedule file
 83 | celerybeat-schedule
 84 | 
 85 | # SageMath parsed files
 86 | *.sage.py
 87 | 
 88 | # Environments
 89 | .env
 90 | .venv
 91 | env/
 92 | venv/
 93 | ENV/
 94 | env.bak/
 95 | venv.bak/
 96 | 
 97 | # Spyder project settings
 98 | .spyderproject
 99 | .spyproject
100 | 
101 | # Rope project settings
102 | .ropeproject
103 | 
104 | # mkdocs documentation
105 | /site
106 | 
107 | # mypy
108 | .mypy_cache/
109 | 


--------------------------------------------------------------------------------
/protos/string_int_label_map_pb2.py:
--------------------------------------------------------------------------------
  1 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
  2 | # source: object_detection/protos/string_int_label_map.proto
  3 | 
  4 | import sys
  5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
  6 | from google.protobuf import descriptor as _descriptor
  7 | from google.protobuf import message as _message
  8 | from google.protobuf import reflection as _reflection
  9 | from google.protobuf import symbol_database as _symbol_database
 10 | from google.protobuf import descriptor_pb2
 11 | # @@protoc_insertion_point(imports)
 12 | 
 13 | _sym_db = _symbol_database.Default()
 14 | 
 15 | 
 16 | 
 17 | 
 18 | DESCRIPTOR = _descriptor.FileDescriptor(
 19 |   name='object_detection/protos/string_int_label_map.proto',
 20 |   package='object_detection.protos',
 21 |   serialized_pb=_b('\n2object_detection/protos/string_int_label_map.proto\x12\x17object_detection.protos\"G\n\x15StringIntLabelMapItem\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\x05\x12\x14\n\x0c\x64isplay_name\x18\x03 \x01(\t\"Q\n\x11StringIntLabelMap\x12<\n\x04item\x18\x01 \x03(\x0b\x32..object_detection.protos.StringIntLabelMapItem')
 22 | )
 23 | _sym_db.RegisterFileDescriptor(DESCRIPTOR)
 24 | 
 25 | 
 26 | 
 27 | 
 28 | _STRINGINTLABELMAPITEM = _descriptor.Descriptor(
 29 |   name='StringIntLabelMapItem',
 30 |   full_name='object_detection.protos.StringIntLabelMapItem',
 31 |   filename=None,
 32 |   file=DESCRIPTOR,
 33 |   containing_type=None,
 34 |   fields=[
 35 |     _descriptor.FieldDescriptor(
 36 |       name='name', full_name='object_detection.protos.StringIntLabelMapItem.name', index=0,
 37 |       number=1, type=9, cpp_type=9, label=1,
 38 |       has_default_value=False, default_value=_b("").decode('utf-8'),
 39 |       message_type=None, enum_type=None, containing_type=None,
 40 |       is_extension=False, extension_scope=None,
 41 |       options=None),
 42 |     _descriptor.FieldDescriptor(
 43 |       name='id', full_name='object_detection.protos.StringIntLabelMapItem.id', index=1,
 44 |       number=2, type=5, cpp_type=1, label=1,
 45 |       has_default_value=False, default_value=0,
 46 |       message_type=None, enum_type=None, containing_type=None,
 47 |       is_extension=False, extension_scope=None,
 48 |       options=None),
 49 |     _descriptor.FieldDescriptor(
 50 |       name='display_name', full_name='object_detection.protos.StringIntLabelMapItem.display_name', index=2,
 51 |       number=3, type=9, cpp_type=9, label=1,
 52 |       has_default_value=False, default_value=_b("").decode('utf-8'),
 53 |       message_type=None, enum_type=None, containing_type=None,
 54 |       is_extension=False, extension_scope=None,
 55 |       options=None),
 56 |   ],
 57 |   extensions=[
 58 |   ],
 59 |   nested_types=[],
 60 |   enum_types=[
 61 |   ],
 62 |   options=None,
 63 |   is_extendable=False,
 64 |   extension_ranges=[],
 65 |   oneofs=[
 66 |   ],
 67 |   serialized_start=79,
 68 |   serialized_end=150,
 69 | )
 70 | 
 71 | 
 72 | _STRINGINTLABELMAP = _descriptor.Descriptor(
 73 |   name='StringIntLabelMap',
 74 |   full_name='object_detection.protos.StringIntLabelMap',
 75 |   filename=None,
 76 |   file=DESCRIPTOR,
 77 |   containing_type=None,
 78 |   fields=[
 79 |     _descriptor.FieldDescriptor(
 80 |       name='item', full_name='object_detection.protos.StringIntLabelMap.item', index=0,
 81 |       number=1, type=11, cpp_type=10, label=3,
 82 |       has_default_value=False, default_value=[],
 83 |       message_type=None, enum_type=None, containing_type=None,
 84 |       is_extension=False, extension_scope=None,
 85 |       options=None),
 86 |   ],
 87 |   extensions=[
 88 |   ],
 89 |   nested_types=[],
 90 |   enum_types=[
 91 |   ],
 92 |   options=None,
 93 |   is_extendable=False,
 94 |   extension_ranges=[],
 95 |   oneofs=[
 96 |   ],
 97 |   serialized_start=152,
 98 |   serialized_end=233,
 99 | )
100 | 
101 | _STRINGINTLABELMAP.fields_by_name['item'].message_type = _STRINGINTLABELMAPITEM
102 | DESCRIPTOR.message_types_by_name['StringIntLabelMapItem'] = _STRINGINTLABELMAPITEM
103 | DESCRIPTOR.message_types_by_name['StringIntLabelMap'] = _STRINGINTLABELMAP
104 | 
105 | StringIntLabelMapItem = _reflection.GeneratedProtocolMessageType('StringIntLabelMapItem', (_message.Message,), dict(
106 |   DESCRIPTOR = _STRINGINTLABELMAPITEM,
107 |   __module__ = 'object_detection.protos.string_int_label_map_pb2'
108 |   # @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMapItem)
109 |   ))
110 | _sym_db.RegisterMessage(StringIntLabelMapItem)
111 | 
112 | StringIntLabelMap = _reflection.GeneratedProtocolMessageType('StringIntLabelMap', (_message.Message,), dict(
113 |   DESCRIPTOR = _STRINGINTLABELMAP,
114 |   __module__ = 'object_detection.protos.string_int_label_map_pb2'
115 |   # @@protoc_insertion_point(class_scope:object_detection.protos.StringIntLabelMap)
116 |   ))
117 | _sym_db.RegisterMessage(StringIntLabelMap)
118 | 
119 | 
120 | # @@protoc_insertion_point(module_scope)
121 | 


--------------------------------------------------------------------------------
/utils/label_map_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Label map utility functions."""
 17 | 
 18 | import logging
 19 | 
 20 | import tensorflow as tf
 21 | from google.protobuf import text_format
 22 | from protos import string_int_label_map_pb2    
 23 | 
 24 | 
 25 | def _validate_label_map(label_map):
 26 |   """Checks if a label map is valid.
 27 | 
 28 |   Args:
 29 |     label_map: StringIntLabelMap to validate.
 30 | 
 31 |   Raises:
 32 |     ValueError: if label map is invalid.
 33 |   """
 34 |   for item in label_map.item:
 35 |     if item.id < 1:
 36 |       raise ValueError('Label map ids should be >= 1.')
 37 | 
 38 | 
 39 | def create_category_index(categories):
 40 |   """Creates dictionary of COCO compatible categories keyed by category id.
 41 | 
 42 |   Args:
 43 |     categories: a list of dicts, each of which has the following keys:
 44 |       'id': (required) an integer id uniquely identifying this category.
 45 |       'name': (required) string representing category name
 46 |         e.g., 'cat', 'dog', 'pizza'.
 47 | 
 48 |   Returns:
 49 |     category_index: a dict containing the same entries as categories, but keyed
 50 |       by the 'id' field of each category.
 51 |   """
 52 |   category_index = {}
 53 |   for cat in categories:
 54 |     category_index[cat['id']] = cat
 55 |   return category_index
 56 | 
 57 | 
 58 | def convert_label_map_to_categories(label_map,
 59 |                                     max_num_classes,
 60 |                                     use_display_name=True):
 61 |   """Loads label map proto and returns categories list compatible with eval.
 62 | 
 63 |   This function loads a label map and returns a list of dicts, each of which
 64 |   has the following keys:
 65 |     'id': (required) an integer id uniquely identifying this category.
 66 |     'name': (required) string representing category name
 67 |       e.g., 'cat', 'dog', 'pizza'.
 68 |   We only allow class into the list if its id-label_id_offset is
 69 |   between 0 (inclusive) and max_num_classes (exclusive).
 70 |   If there are several items mapping to the same id in the label map,
 71 |   we will only keep the first one in the categories list.
 72 | 
 73 |   Args:
 74 |     label_map: a StringIntLabelMapProto or None.  If None, a default categories
 75 |       list is created with max_num_classes categories.
 76 |     max_num_classes: maximum number of (consecutive) label indices to include.
 77 |     use_display_name: (boolean) choose whether to load 'display_name' field
 78 |       as category name.  If False or if the display_name field does not exist,
 79 |       uses 'name' field as category names instead.
 80 |   Returns:
 81 |     categories: a list of dictionaries representing all possible categories.
 82 |   """
 83 |   categories = []
 84 |   list_of_ids_already_added = []
 85 |   if not label_map:
 86 |     label_id_offset = 1
 87 |     for class_id in range(max_num_classes):
 88 |       categories.append({
 89 |           'id': class_id + label_id_offset,
 90 |           'name': 'category_{}'.format(class_id + label_id_offset)
 91 |       })
 92 |     return categories
 93 |   for item in label_map.item:
 94 |     if not 0 < item.id <= max_num_classes:
 95 |       logging.info('Ignore item %d since it falls outside of requested '
 96 |                    'label range.', item.id)
 97 |       continue
 98 |     if use_display_name and item.HasField('display_name'):
 99 |       name = item.display_name
100 |     else:
101 |       name = item.name
102 |     if item.id not in list_of_ids_already_added:
103 |       list_of_ids_already_added.append(item.id)
104 |       char = chr(item.id + 48)
105 |       categories.append({'id': item.id, 'name': name, 'char': char})
106 |   return categories
107 | 
108 | 
109 | def load_labelmap(path):
110 |   """Loads label map proto.
111 | 
112 |   Args:
113 |     path: path to StringIntLabelMap proto text file.
114 |   Returns:
115 |     a StringIntLabelMapProto
116 |   """
117 |   with tf.gfile.GFile(path, 'r') as fid:
118 |     label_map_string = fid.read()
119 |     label_map = string_int_label_map_pb2.StringIntLabelMap()
120 |     try:
121 |       text_format.Merge(label_map_string, label_map)
122 |     except text_format.ParseError:
123 |       label_map.ParseFromString(label_map_string)
124 |   _validate_label_map(label_map)
125 |   return label_map
126 | 
127 | 
128 | def get_label_map_dict(label_map_path):
129 |   """Reads a label map and returns a dictionary of label names to id.
130 | 
131 |   Args:
132 |     label_map_path: path to label_map.
133 | 
134 |   Returns:
135 |     A dictionary mapping label names to id.
136 |   """
137 |   label_map = load_labelmap(label_map_path)
138 |   label_map_dict = {}
139 |   for item in label_map.item:
140 |     label_map_dict[item.name] = item.id
141 |   return label_map_dict
142 | 


--------------------------------------------------------------------------------
/labels.pbtxt:
--------------------------------------------------------------------------------
  1 | item {
  2 |   name: "/m/01g317"
  3 |   id: 1
  4 |   display_name: "person"
  5 | }
  6 | item {
  7 |   name: "/m/0199g"
  8 |   id: 2
  9 |   display_name: "bicycle"
 10 | }
 11 | item {
 12 |   name: "/m/0k4j"
 13 |   id: 3
 14 |   display_name: "car"
 15 | }
 16 | item {
 17 |   name: "/m/04_sv"
 18 |   id: 4
 19 |   display_name: "motorcycle"
 20 | }
 21 | item {
 22 |   name: "/m/05czz6l"
 23 |   id: 5
 24 |   display_name: "airplane"
 25 | }
 26 | item {
 27 |   name: "/m/01bjv"
 28 |   id: 6
 29 |   display_name: "bus"
 30 | }
 31 | item {
 32 |   name: "/m/07jdr"
 33 |   id: 7
 34 |   display_name: "train"
 35 | }
 36 | item {
 37 |   name: "/m/07r04"
 38 |   id: 8
 39 |   display_name: "truck"
 40 | }
 41 | item {
 42 |   name: "/m/019jd"
 43 |   id: 9
 44 |   display_name: "boat"
 45 | }
 46 | item {
 47 |   name: "/m/015qff"
 48 |   id: 10
 49 |   display_name: "traffic light"
 50 | }
 51 | item {
 52 |   name: "/m/01pns0"
 53 |   id: 11
 54 |   display_name: "fire hydrant"
 55 | }
 56 | item {
 57 |   name: "/m/02pv19"
 58 |   id: 13
 59 |   display_name: "stop sign"
 60 | }
 61 | item {
 62 |   name: "/m/015qbp"
 63 |   id: 14
 64 |   display_name: "parking meter"
 65 | }
 66 | item {
 67 |   name: "/m/0cvnqh"
 68 |   id: 15
 69 |   display_name: "bench"
 70 | }
 71 | item {
 72 |   name: "/m/015p6"
 73 |   id: 16
 74 |   display_name: "bird"
 75 | }
 76 | item {
 77 |   name: "/m/01yrx"
 78 |   id: 17
 79 |   display_name: "cat"
 80 | }
 81 | item {
 82 |   name: "/m/0bt9lr"
 83 |   id: 18
 84 |   display_name: "dog"
 85 | }
 86 | item {
 87 |   name: "/m/03k3r"
 88 |   id: 19
 89 |   display_name: "horse"
 90 | }
 91 | item {
 92 |   name: "/m/07bgp"
 93 |   id: 20
 94 |   display_name: "sheep"
 95 | }
 96 | item {
 97 |   name: "/m/01xq0k1"
 98 |   id: 21
 99 |   display_name: "cow"
100 | }
101 | item {
102 |   name: "/m/0bwd_0j"
103 |   id: 22
104 |   display_name: "elephant"
105 | }
106 | item {
107 |   name: "/m/01dws"
108 |   id: 23
109 |   display_name: "bear"
110 | }
111 | item {
112 |   name: "/m/0898b"
113 |   id: 24
114 |   display_name: "zebra"
115 | }
116 | item {
117 |   name: "/m/03bk1"
118 |   id: 25
119 |   display_name: "giraffe"
120 | }
121 | item {
122 |   name: "/m/01940j"
123 |   id: 27
124 |   display_name: "backpack"
125 | }
126 | item {
127 |   name: "/m/0hnnb"
128 |   id: 28
129 |   display_name: "umbrella"
130 | }
131 | item {
132 |   name: "/m/080hkjn"
133 |   id: 31
134 |   display_name: "handbag"
135 | }
136 | item {
137 |   name: "/m/01rkbr"
138 |   id: 32
139 |   display_name: "tie"
140 | }
141 | item {
142 |   name: "/m/01s55n"
143 |   id: 33
144 |   display_name: "suitcase"
145 | }
146 | item {
147 |   name: "/m/02wmf"
148 |   id: 34
149 |   display_name: "frisbee"
150 | }
151 | item {
152 |   name: "/m/071p9"
153 |   id: 35
154 |   display_name: "skis"
155 | }
156 | item {
157 |   name: "/m/06__v"
158 |   id: 36
159 |   display_name: "snowboard"
160 | }
161 | item {
162 |   name: "/m/018xm"
163 |   id: 37
164 |   display_name: "sports ball"
165 | }
166 | item {
167 |   name: "/m/02zt3"
168 |   id: 38
169 |   display_name: "kite"
170 | }
171 | item {
172 |   name: "/m/03g8mr"
173 |   id: 39
174 |   display_name: "baseball bat"
175 | }
176 | item {
177 |   name: "/m/03grzl"
178 |   id: 40
179 |   display_name: "baseball glove"
180 | }
181 | item {
182 |   name: "/m/06_fw"
183 |   id: 41
184 |   display_name: "skateboard"
185 | }
186 | item {
187 |   name: "/m/019w40"
188 |   id: 42
189 |   display_name: "surfboard"
190 | }
191 | item {
192 |   name: "/m/0dv9c"
193 |   id: 43
194 |   display_name: "tennis racket"
195 | }
196 | item {
197 |   name: "/m/04dr76w"
198 |   id: 44
199 |   display_name: "bottle"
200 | }
201 | item {
202 |   name: "/m/09tvcd"
203 |   id: 46
204 |   display_name: "wine glass"
205 | }
206 | item {
207 |   name: "/m/08gqpm"
208 |   id: 47
209 |   display_name: "cup"
210 | }
211 | item {
212 |   name: "/m/0dt3t"
213 |   id: 48
214 |   display_name: "fork"
215 | }
216 | item {
217 |   name: "/m/04ctx"
218 |   id: 49
219 |   display_name: "knife"
220 | }
221 | item {
222 |   name: "/m/0cmx8"
223 |   id: 50
224 |   display_name: "spoon"
225 | }
226 | item {
227 |   name: "/m/04kkgm"
228 |   id: 51
229 |   display_name: "bowl"
230 | }
231 | item {
232 |   name: "/m/09qck"
233 |   id: 52
234 |   display_name: "banana"
235 | }
236 | item {
237 |   name: "/m/014j1m"
238 |   id: 53
239 |   display_name: "apple"
240 | }
241 | item {
242 |   name: "/m/0l515"
243 |   id: 54
244 |   display_name: "sandwich"
245 | }
246 | item {
247 |   name: "/m/0cyhj_"
248 |   id: 55
249 |   display_name: "orange"
250 | }
251 | item {
252 |   name: "/m/0hkxq"
253 |   id: 56
254 |   display_name: "broccoli"
255 | }
256 | item {
257 |   name: "/m/0fj52s"
258 |   id: 57
259 |   display_name: "carrot"
260 | }
261 | item {
262 |   name: "/m/01b9xk"
263 |   id: 58
264 |   display_name: "hot dog"
265 | }
266 | item {
267 |   name: "/m/0663v"
268 |   id: 59
269 |   display_name: "pizza"
270 | }
271 | item {
272 |   name: "/m/0jy4k"
273 |   id: 60
274 |   display_name: "donut"
275 | }
276 | item {
277 |   name: "/m/0fszt"
278 |   id: 61
279 |   display_name: "cake"
280 | }
281 | item {
282 |   name: "/m/01mzpv"
283 |   id: 62
284 |   display_name: "chair"
285 | }
286 | item {
287 |   name: "/m/02crq1"
288 |   id: 63
289 |   display_name: "couch"
290 | }
291 | item {
292 |   name: "/m/03fp41"
293 |   id: 64
294 |   display_name: "potted plant"
295 | }
296 | item {
297 |   name: "/m/03ssj5"
298 |   id: 65
299 |   display_name: "bed"
300 | }
301 | item {
302 |   name: "/m/04bcr3"
303 |   id: 67
304 |   display_name: "dining table"
305 | }
306 | item {
307 |   name: "/m/09g1w"
308 |   id: 70
309 |   display_name: "toilet"
310 | }
311 | item {
312 |   name: "/m/07c52"
313 |   id: 72
314 |   display_name: "tv"
315 | }
316 | item {
317 |   name: "/m/01c648"
318 |   id: 73
319 |   display_name: "laptop"
320 | }
321 | item {
322 |   name: "/m/020lf"
323 |   id: 74
324 |   display_name: "mouse"
325 | }
326 | item {
327 |   name: "/m/0qjjc"
328 |   id: 75
329 |   display_name: "remote"
330 | }
331 | item {
332 |   name: "/m/01m2v"
333 |   id: 76
334 |   display_name: "keyboard"
335 | }
336 | item {
337 |   name: "/m/050k8"
338 |   id: 77
339 |   display_name: "cell phone"
340 | }
341 | item {
342 |   name: "/m/0fx9l"
343 |   id: 78
344 |   display_name: "microwave"
345 | }
346 | item {
347 |   name: "/m/029bxz"
348 |   id: 79
349 |   display_name: "oven"
350 | }
351 | item {
352 |   name: "/m/01k6s3"
353 |   id: 80
354 |   display_name: "toaster"
355 | }
356 | item {
357 |   name: "/m/0130jx"
358 |   id: 81
359 |   display_name: "sink"
360 | }
361 | item {
362 |   name: "/m/040b_t"
363 |   id: 82
364 |   display_name: "refrigerator"
365 | }
366 | item {
367 |   name: "/m/0bt_c3"
368 |   id: 84
369 |   display_name: "book"
370 | }
371 | item {
372 |   name: "/m/01x3z"
373 |   id: 85
374 |   display_name: "clock"
375 | }
376 | item {
377 |   name: "/m/02s195"
378 |   id: 86
379 |   display_name: "vase"
380 | }
381 | item {
382 |   name: "/m/01lsmm"
383 |   id: 87
384 |   display_name: "scissors"
385 | }
386 | item {
387 |   name: "/m/0kmg4"
388 |   id: 88
389 |   display_name: "teddy bear"
390 | }
391 | item {
392 |   name: "/m/03wvsk"
393 |   id: 89
394 |   display_name: "hair drier"
395 | }
396 | item {
397 |   name: "/m/012xff"
398 |   id: 90
399 |   display_name: "toothbrush"
400 | }
401 | 


--------------------------------------------------------------------------------
/utils/visualization_utils_color.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """A set of functions that are used for visualization.
 17 | 
 18 | These functions often receive an image, perform some visualization on the image.
 19 | The functions do not return a value, instead they modify the image itself.
 20 | 
 21 | """
 22 | import collections
 23 | import numpy as np
 24 | import PIL.Image as Image
 25 | import PIL.ImageColor as ImageColor
 26 | import PIL.ImageDraw as ImageDraw
 27 | import PIL.ImageFont as ImageFont
 28 | import six
 29 | import tensorflow as tf
 30 | 
 31 | 
 32 | _TITLE_LEFT_MARGIN = 10
 33 | _TITLE_TOP_MARGIN = 10
 34 | STANDARD_COLORS = [
 35 |     'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
 36 |     'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
 37 |     'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
 38 |     'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
 39 |     'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
 40 |     'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
 41 |     'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
 42 |     'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
 43 |     'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
 44 |     'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
 45 |     'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
 46 |     'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
 47 |     'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
 48 |     'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
 49 |     'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
 50 |     'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
 51 |     'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
 52 |     'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
 53 |     'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
 54 |     'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
 55 |     'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
 56 |     'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
 57 |     'WhiteSmoke', 'Yellow', 'YellowGreen'
 58 | ]
 59 | 
 60 | 
 61 | def save_image_array_as_png(image, output_path):
 62 |   """Saves an image (represented as a numpy array) to PNG.
 63 | 
 64 |   Args:
 65 |     image: a numpy array with shape [height, width, 3].
 66 |     output_path: path to which image should be written.
 67 |   """
 68 |   image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
 69 |   with tf.gfile.Open(output_path, 'w') as fid:
 70 |     image_pil.save(fid, 'PNG')
 71 | 
 72 | 
 73 | def encode_image_array_as_png_str(image):
 74 |   """Encodes a numpy array into a PNG string.
 75 | 
 76 |   Args:
 77 |     image: a numpy array with shape [height, width, 3].
 78 | 
 79 |   Returns:
 80 |     PNG encoded image string.
 81 |   """
 82 |   image_pil = Image.fromarray(np.uint8(image))
 83 |   output = six.BytesIO()
 84 |   image_pil.save(output, format='PNG')
 85 |   png_string = output.getvalue()
 86 |   output.close()
 87 |   return png_string
 88 | 
 89 | 
 90 | def draw_bounding_box_on_image_array(image,
 91 |                                      ymin,
 92 |                                      xmin,
 93 |                                      ymax,
 94 |                                      xmax,
 95 |                                      color='red',
 96 |                                      thickness=4,
 97 |                                      display_str_list=(),
 98 |                                      use_normalized_coordinates=True):
 99 |   """Adds a bounding box to an image (numpy array).
100 | 
101 |   Args:
102 |     image: a numpy array with shape [height, width, 3].
103 |     ymin: ymin of bounding box in normalized coordinates (same below).
104 |     xmin: xmin of bounding box.
105 |     ymax: ymax of bounding box.
106 |     xmax: xmax of bounding box.
107 |     color: color to draw bounding box. Default is red.
108 |     thickness: line thickness. Default value is 4.
109 |     display_str_list: list of strings to display in box
110 |                       (each to be shown on its own line).
111 |     use_normalized_coordinates: If True (default), treat coordinates
112 |       ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
113 |       coordinates as absolute.
114 |   """
115 |   image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
116 |   draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color,
117 |                              thickness, display_str_list,
118 |                              use_normalized_coordinates)
119 |   np.copyto(image, np.array(image_pil))
120 | 
121 | 
122 | def draw_bounding_box_on_image(image,
123 |                                ymin,
124 |                                xmin,
125 |                                ymax,
126 |                                xmax,
127 |                                color='red',
128 |                                thickness=4,
129 |                                display_str_list=(),
130 |                                use_normalized_coordinates=True):
131 |   """Adds a bounding box to an image.
132 | 
133 |   Each string in display_str_list is displayed on a separate line above the
134 |   bounding box in black text on a rectangle filled with the input 'color'.
135 | 
136 |   Args:
137 |     image: a PIL.Image object.
138 |     ymin: ymin of bounding box.
139 |     xmin: xmin of bounding box.
140 |     ymax: ymax of bounding box.
141 |     xmax: xmax of bounding box.
142 |     color: color to draw bounding box. Default is red.
143 |     thickness: line thickness. Default value is 4.
144 |     display_str_list: list of strings to display in box
145 |                       (each to be shown on its own line).
146 |     use_normalized_coordinates: If True (default), treat coordinates
147 |       ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
148 |       coordinates as absolute.
149 |   """
150 |   is_drawing = True
151 |   draw = ImageDraw.Draw(image)
152 |   im_width, im_height = image.size
153 |   if use_normalized_coordinates:
154 |     (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
155 |                                   ymin * im_height, ymax * im_height)
156 |   else:
157 |     (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
158 |   if is_drawing:
159 |     draw.line([(left, top), (left, bottom), (right, bottom),
160 |                (right, top), (left, top)], width=thickness, fill=color)
161 |   try:
162 |     font = ImageFont.truetype('arial.ttf', 24)
163 |   except IOError:
164 |     font = ImageFont.load_default()
165 | 
166 |   text_bottom = top
167 |   # Reverse list and print from bottom to top.
168 |   for display_str in display_str_list[::-1]:
169 |     text_width, text_height = font.getsize(display_str)
170 |     margin = np.ceil(0.05 * text_height)
171 |     if is_drawing:
172 |       draw.rectangle(
173 |           [(left, text_bottom - text_height - 2 * margin), (left + text_width,
174 |                                                             text_bottom)],
175 |           fill=color)
176 |       draw.text(
177 |           (left + margin, text_bottom - text_height - margin),
178 |           display_str,
179 |           fill='black',
180 |           font=font)
181 |     text_bottom -= text_height - 2 * margin
182 | 
183 | 
184 | def draw_bounding_boxes_on_image_array(image,
185 |                                        boxes,
186 |                                        color='red',
187 |                                        thickness=4,
188 |                                        display_str_list_list=()):
189 |   """Draws bounding boxes on image (numpy array).
190 | 
191 |   Args:
192 |     image: a numpy array object.
193 |     boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
194 |            The coordinates are in normalized format between [0, 1].
195 |     color: color to draw bounding box. Default is red.
196 |     thickness: line thickness. Default value is 4.
197 |     display_str_list_list: list of list of strings.
198 |                            a list of strings for each bounding box.
199 |                            The reason to pass a list of strings for a
200 |                            bounding box is that it might contain
201 |                            multiple labels.
202 | 
203 |   Raises:
204 |     ValueError: if boxes is not a [N, 4] array
205 |   """
206 |   image_pil = Image.fromarray(image)
207 |   draw_bounding_boxes_on_image(image_pil, boxes, color, thickness,
208 |                                display_str_list_list)
209 |   np.copyto(image, np.array(image_pil))
210 | 
211 | 
212 | def draw_bounding_boxes_on_image(image,
213 |                                  boxes,
214 |                                  color='red',
215 |                                  thickness=4,
216 |                                  display_str_list_list=()):
217 |   """Draws bounding boxes on image.
218 | 
219 |   Args:
220 |     image: a PIL.Image object.
221 |     boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
222 |            The coordinates are in normalized format between [0, 1].
223 |     color: color to draw bounding box. Default is red.
224 |     thickness: line thickness. Default value is 4.
225 |     display_str_list_list: list of list of strings.
226 |                            a list of strings for each bounding box.
227 |                            The reason to pass a list of strings for a
228 |                            bounding box is that it might contain
229 |                            multiple labels.
230 | 
231 |   Raises:
232 |     ValueError: if boxes is not a [N, 4] array
233 |   """
234 |   boxes_shape = boxes.shape
235 |   if not boxes_shape:
236 |     return
237 |   if len(boxes_shape) != 2 or boxes_shape[1] != 4:
238 |     raise ValueError('Input must be of size [N, 4]')
239 |   for i in range(boxes_shape[0]):
240 |     display_str_list = ()
241 |     if display_str_list_list:
242 |       display_str_list = display_str_list_list[i]
243 |     draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2],
244 |                                boxes[i, 3], color, thickness, display_str_list)
245 | 
246 | 
247 | def draw_keypoints_on_image_array(image,
248 |                                   keypoints,
249 |                                   color='red',
250 |                                   radius=2,
251 |                                   use_normalized_coordinates=True):
252 |   """Draws keypoints on an image (numpy array).
253 | 
254 |   Args:
255 |     image: a numpy array with shape [height, width, 3].
256 |     keypoints: a numpy array with shape [num_keypoints, 2].
257 |     color: color to draw the keypoints with. Default is red.
258 |     radius: keypoint radius. Default value is 2.
259 |     use_normalized_coordinates: if True (default), treat keypoint values as
260 |       relative to the image.  Otherwise treat them as absolute.
261 |   """
262 |   image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
263 |   draw_keypoints_on_image(image_pil, keypoints, color, radius,
264 |                           use_normalized_coordinates)
265 |   np.copyto(image, np.array(image_pil))
266 | 
267 | 
268 | def draw_keypoints_on_image(image,
269 |                             keypoints,
270 |                             color='red',
271 |                             radius=2,
272 |                             use_normalized_coordinates=True):
273 |   """Draws keypoints on an image.
274 | 
275 |   Args:
276 |     image: a PIL.Image object.
277 |     keypoints: a numpy array with shape [num_keypoints, 2].
278 |     color: color to draw the keypoints with. Default is red.
279 |     radius: keypoint radius. Default value is 2.
280 |     use_normalized_coordinates: if True (default), treat keypoint values as
281 |       relative to the image.  Otherwise treat them as absolute.
282 |   """
283 |   draw = ImageDraw.Draw(image)
284 |   im_width, im_height = image.size
285 |   keypoints_x = [k[1] for k in keypoints]
286 |   keypoints_y = [k[0] for k in keypoints]
287 |   if use_normalized_coordinates:
288 |     keypoints_x = tuple([im_width * x for x in keypoints_x])
289 |     keypoints_y = tuple([im_height * y for y in keypoints_y])
290 |   for keypoint_x, keypoint_y in zip(keypoints_x, keypoints_y):
291 |     draw.ellipse([(keypoint_x - radius, keypoint_y - radius),
292 |                   (keypoint_x + radius, keypoint_y + radius)],
293 |                  outline=color, fill=color)
294 | 
295 | 
296 | def draw_mask_on_image_array(image, mask, color='red', alpha=0.7):
297 |   """Draws mask on an image.
298 | 
299 |   Args:
300 |     image: uint8 numpy array with shape (img_height, img_height, 3)
301 |     mask: a float numpy array of shape (img_height, img_height) with
302 |       values between 0 and 1
303 |     color: color to draw the keypoints with. Default is red.
304 |     alpha: transparency value between 0 and 1. (default: 0.7)
305 | 
306 |   Raises:
307 |     ValueError: On incorrect data type for image or masks.
308 |   """
309 |   if image.dtype != np.uint8:
310 |     raise ValueError('`image` not of type np.uint8')
311 |   if mask.dtype != np.float32:
312 |     raise ValueError('`mask` not of type np.float32')
313 |   if np.any(np.logical_or(mask > 1.0, mask < 0.0)):
314 |     raise ValueError('`mask` elements should be in [0, 1]')
315 |   rgb = ImageColor.getrgb(color)
316 |   pil_image = Image.fromarray(image)
317 | 
318 |   solid_color = np.expand_dims(
319 |       np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3])
320 |   pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA')
321 |   pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L')
322 |   pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)
323 |   np.copyto(image, np.array(pil_image.convert('RGB')))
324 | 
325 | 
326 | def visualize_boxes_and_labels_on_image_array(image,
327 |                                               boxes,
328 |                                               classes,
329 |                                               scores,
330 |                                               category_index,
331 |                                               instance_masks=None,
332 |                                               keypoints=None,
333 |                                               use_normalized_coordinates=False,
334 |                                               max_boxes_to_draw=20,
335 |                                               min_score_thresh=.7,
336 |                                               agnostic_mode=False,
337 |                                               line_thickness=4,
338 |                                               sequence_sorted=False,
339 |                                               sequence_type='char',
340 |                                               matched_area=None):
341 |   """Overlay labeled boxes on an image with formatted scores and label names.
342 | 
343 |   This function groups boxes that correspond to the same location
344 |   and creates a display string for each detection and overlays these
345 |   on the image.  Note that this function modifies the image array in-place
346 |   and does not return anything.
347 | 
348 |   Args:
349 |     image: uint8 numpy array with shape (img_height, img_width, 3)
350 |     boxes: a numpy array of shape [N, 4]
351 |     classes: a numpy array of shape [N]
352 |     scores: a numpy array of shape [N] or None.  If scores=None, then
353 |       this function assumes that the boxes to be plotted are groundtruth
354 |       boxes and plot all boxes as black with no classes or scores.
355 |     category_index: a dict containing category dictionaries (each holding
356 |       category index `id` and category name `name`) keyed by category indices.
357 |     instance_masks: a numpy array of shape [N, image_height, image_width], can
358 |       be None
359 |     keypoints: a numpy array of shape [N, num_keypoints, 2], can
360 |       be None
361 |     use_normalized_coordinates: whether boxes is to be interpreted as
362 |       normalized coordinates or not.
363 |     max_boxes_to_draw: maximum number of boxes to visualize.  If None, draw
364 |       all boxes.
365 |     min_score_thresh: minimum score threshold for a box to be visualized
366 |     agnostic_mode: boolean (default: False) controlling whether to evaluate in
367 |       class-agnostic mode or not.  This mode will display scores but ignore
368 |       classes.
369 |     line_thickness: integer (default: 4) controlling line width of the boxes.
370 |   """
371 |   # Create a display string (and color) for every box location, group any boxes
372 |   # that correspond to the same location.
373 |   cur_char = 48
374 |   result = {'sequence': '', 'objects': []}
375 |   class_values = {'id': 0, 'name': 'N/A', 'char': '', 'uid': '', 'score': 0}
376 |   box_to_display_str_map = collections.defaultdict(list)
377 |   box_to_color_map = collections.defaultdict(str)
378 |   box_to_instance_masks_map = {}
379 |   box_to_keypoints_map = collections.defaultdict(list)
380 |   if not max_boxes_to_draw:
381 |     max_boxes_to_draw = boxes.shape[0]
382 |   for i in range(min(max_boxes_to_draw, boxes.shape[0])):
383 |     if scores is None or scores[i] > min_score_thresh:
384 |       box = tuple(boxes[i].tolist())
385 |       if instance_masks is not None:
386 |         box_to_instance_masks_map[box] = instance_masks[i]
387 |       if keypoints is not None:
388 |         box_to_keypoints_map[box].extend(keypoints[i])
389 |       if scores is None:
390 |         #box_to_color_map[box] = 'black'
391 |         box_to_color_map[box] = class_values
392 |       else:
393 |         if not agnostic_mode:
394 |           if classes[i] in category_index.keys():
395 |             class_values = category_index[classes[i]]
396 |           class_values['score'] = int(100*scores[i])
397 |           class_values['uid'] = chr(cur_char)
398 |           cur_char += 1
399 |           display_str = '{}: {}%'.format(
400 |               class_values['name'],
401 |               class_values['score'])
402 |         else:
403 |           class_values['score'] = int(100*scores[i])
404 |           display_str = 'score: {}%'.format(class_values['score'])
405 |         box_to_display_str_map[box].append(display_str)
406 |         box_to_color_map[box] = class_values
407 | 
408 |   # Draw all boxes onto image.
409 |   elements = {}
410 |   for box, class_values in box_to_color_map.items():
411 |     color = 'Violet'
412 |     ymin, xmin, ymax, xmax = box
413 |     im_height, im_width, _ = image.shape
414 |     (left, right, top, bottom) = (int(xmin * im_width), int(xmax * im_width), int(ymin * im_height), int(ymax * im_height))
415 |     coords = (left, right, top, bottom)
416 |     if matched_area is None:
417 |       global_coords = coords
418 |     else:
419 |       global_coords = (matched_area[2] + left, matched_area[2] + right, matched_area[0] + top, matched_area[0] + bottom)
420 |     crop = image[top:bottom,left:right]
421 |     elements[xmin] = class_values[sequence_type]
422 |     result['objects'].append({'values': class_values, 'coords': coords, 'global_coords': global_coords, 'norm': box, 'image': crop})
423 |     if instance_masks is not None:
424 |       draw_mask_on_image_array(
425 |           image,
426 |           box_to_instance_masks_map[box],
427 |           color=color
428 |       )
429 |     draw_bounding_box_on_image_array(
430 |         image,
431 |         ymin,
432 |         xmin,
433 |         ymax,
434 |         xmax,
435 |         color=color,
436 |         thickness=line_thickness,
437 |         display_str_list=box_to_display_str_map[box],
438 |         use_normalized_coordinates=use_normalized_coordinates)
439 |     if keypoints is not None:
440 |       draw_keypoints_on_image_array(
441 |           image,
442 |           box_to_keypoints_map[box],
443 |           color=color,
444 |           radius=line_thickness / 2,
445 |           use_normalized_coordinates=use_normalized_coordinates)
446 |   if sequence_sorted:
447 |     for i in sorted(elements):
448 |       result['sequence'] += elements[i]
449 |   else:
450 |     for i in elements:
451 |       result['sequence'] += elements[i]
452 |   return result
453 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/python
   2 | # -*- coding: utf-8 -*-
   3 | # pylint: disable=C0103
   4 | # pylint: disable=E1101
   5 | 
   6 | # python app.py 1 1 -1 video1.mp4 video1.mp4
   7 | 
   8 | import os
   9 | import sys
  10 | import cv2
  11 | import time
  12 | import math
  13 | import signal
  14 | import youtube_dl
  15 | import numpy as np
  16 | import edit_distance
  17 | import tensorflow as tf
  18 | from imutils.video import FileVideoStream
  19 | from imutils.video import FPS
  20 | from utils import label_map_util
  21 | from utils import visualization_utils_color as vis_util
  22 | 
  23 | DEBUG_TIME = False
  24 | DEBUG_ALPHA = False
  25 | DEBUG_SKIPS = False
  26 | out = None
  27 | last_frame = None
  28 | last_time = None
  29 | out_fps = 30
  30 | video_num = 0
  31 | max_videos = 0
  32 | video_path_1 = 0
  33 | video_path_2 = 0
  34 | download_list = []
  35 | download_item = None
  36 | last_message = ''
  37 | frames_skipped = 0
  38 | recalculate_fps = False
  39 | cv2.ocl.setUseOpenCL(False)
  40 | 
  41 | def compare_videos(path_video_1, path_video_2):
  42 |   global detection_graph, from_frame, recalculate_fps, out
  43 |   PATH_TO_CKPT = './ssd_inception2.pb'
  44 |   PATH_TO_LABELS = './labels.pbtxt'
  45 |   thresh = 0.2
  46 |   sequence_sorted = False
  47 |   store_output = True
  48 |   enable_tracking = True
  49 |   enable_detection = True
  50 |   adjust_frame = True
  51 |   adjust_perspective = True
  52 |   enable_tracking_template = True
  53 |   only_use_template_when_none = True
  54 |   enable_objects_threshold = False
  55 |   at_least_one_match = False
  56 |   recalculate_time = 0
  57 |   sequence_type = 'char'
  58 |   descriptor = "surf"
  59 |   tracker_type = 'MEDIANFLOW' # 'BOOSTING','MIL','KCF','TLD','MEDIANFLOW','GOTURN'
  60 |   NUM_CLASSES = 90
  61 |   MIN_MATCH_COUNT = 10
  62 |   SIMILARITY_THRESHOLD = 0.1
  63 |   trackers = {}
  64 |   positions = {}
  65 |   source_frame = 0
  66 |   ok = None
  67 |   font = cv2.FONT_HERSHEY_SIMPLEX
  68 |   size = 1
  69 |   weight = 2
  70 |   color = (255,255,255)
  71 |   skips_max = 0
  72 |   skips_number = 0
  73 |   total_frames = 0
  74 |   
  75 |   last_message = ''
  76 |   label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
  77 |   categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
  78 |   category_index = label_map_util.create_category_index(categories)
  79 |   detection_graph = tf.Graph()
  80 |   with detection_graph.as_default():
  81 |       od_graph_def = tf.GraphDef()
  82 |       with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
  83 |           serialized_graph = fid.read()
  84 |           od_graph_def.ParseFromString(serialized_graph)
  85 |           tf.import_graph_def(od_graph_def, name='')
  86 |   with detection_graph.as_default():
  87 |     config = tf.ConfigProto()
  88 |     config.gpu_options.allow_growth = True
  89 |     with tf.Session(graph=detection_graph, config=config) as sess:
  90 |       total_time_init = time.time()
  91 |       fourcc = cv2.VideoWriter_fourcc(*'mp4v')
  92 |       #sift = cv2.xfeatures2d.SIFT_create()
  93 |       surf = cv2.xfeatures2d.SURF_create()
  94 |       #fast = cv2.FastFeatureDetector_create()
  95 |       #orb = cv2.ORB_create()
  96 |       desc = surf
  97 |       show_points = 20
  98 |       video_1 = cv2.VideoCapture(path_video_1)
  99 |       fps_1 = video_1.get(cv2.CAP_PROP_FPS)
 100 |       if DEBUG_TIME:
 101 |         print('fps_1', fps_1)
 102 |       video_2 = cv2.VideoCapture(path_video_2)
 103 |       fps_2 = video_2.get(cv2.CAP_PROP_FPS)
 104 |       if DEBUG_TIME:
 105 |         print('fps_2', fps_2)
 106 |       out = None
 107 |       use_descriptor = True
 108 |       use_detection = False
 109 |       use_tracking = False
 110 |       matched_area = None
 111 |       frames_to_skip = 0
 112 |       processed_frames = 0
 113 |       from_frame_1 = int(sys.argv[1])
 114 |       from_frame_2 = int(sys.argv[2])
 115 |       video_1.set(cv2.CAP_PROP_POS_FRAMES, from_frame_1)
 116 |       video_2.set(cv2.CAP_PROP_POS_FRAMES, from_frame_2)
 117 |       _, frame_1 = video_1.read()
 118 |       objects_1 = detect_objects(frame_1, thresh, detection_graph, sess, category_index, sequence_sorted=sequence_sorted, sequence_type=sequence_type)
 119 |       sequence_1 = objects_1['sequence']
 120 |       cv2.putText(frame_1, "skip: %s src: %s" % (processed_frames, sequence_1), (10, 30), font, size, color, weight)
 121 |       objects_2 = None
 122 |       area_2 = None
 123 |       sequence_2 = ''
 124 |       desc_kp_1, desc_des_1 = desc.detectAndCompute(frame_1, None)
 125 |       #print( desc.descriptorSize() )
 126 |       #print( desc_des_1.shape )
 127 |       until_end = False
 128 |       frame_num = int(sys.argv[3])
 129 |       if frame_num == -1:
 130 |         until_end = True
 131 |       while frame_num or until_end:
 132 |         total_frames += 1
 133 |         if recalculate_fps:
 134 |           if at_least_one_match:
 135 |             to_frame = from_frame_1 + math.ceil((time.time() - recalculate_time) * fps_1)
 136 |             if to_frame >= frame_num:
 137 |               break
 138 |         frame_num -= 1
 139 |         ok, frame_2 = video_2.read()
 140 |         if not ok:
 141 |           break
 142 | 
 143 |         if use_tracking:
 144 |           sequence_tmp = ''
 145 |           for object_2 in objects_2['objects']:
 146 |             if object_2['coords'] in trackers:
 147 |               start_time = time.time()
 148 |               ok, box = trackers[object_2['coords']].update(frame_2)
 149 |               box = (int(box[0]), int(box[1]), int(box[2]), int(box[3]))
 150 |               elapsed_time = time.time() - start_time
 151 |               if DEBUG_TIME:
 152 |                 print('tracking method', elapsed_time)  
 153 |               if ok:
 154 |                 sequence_tmp += object_2['values'][sequence_type]
 155 |                 cv2.rectangle(frame_2, (box[0], box[2]), (box[1], box[3]), (255, 0, 0), 2)
 156 |               else:
 157 |                 if enable_tracking_template:
 158 |                   process_static = True
 159 |                   if only_use_template_when_none:
 160 |                     num_matches = get_sequence_matches(sequence_1, sequence_tmp)
 161 |                     if num_matches > 0:
 162 |                       process_static = False
 163 |                   if process_static:
 164 |                     start_time = time.time()
 165 |                     res = cv2.matchTemplate(frame_2, object_2['image'], cv2.TM_CCOEFF_NORMED)
 166 |                     elapsed_time = time.time() - start_time
 167 |                     if DEBUG_TIME:
 168 |                       print('tracking match', elapsed_time) 
 169 |                     min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
 170 |                     threshold = 0.8
 171 |                     if max_val > threshold:
 172 |                       sequence_tmp += object_2['values'][sequence_type]
 173 |                       top_left = max_loc
 174 |                       h, w, _ = object_2['image'].shape
 175 |                       bottom_right = (top_left[0] + w, top_left[1] + h)
 176 |                       cv2.rectangle(frame_2, top_left, bottom_right, (0, 255, 0), 2)
 177 | 
 178 |           cv2.putText(frame_2, "alp: %s" % (sequence_2), (10, 30), font, size, color, weight)
 179 |           num_matches = get_sequence_matches(sequence_1, sequence_tmp)
 180 |           if num_matches > 0:
 181 |             if DEBUG_ALPHA:
 182 |               print_once('eq: %s ref: %s new: %s' % (num_matches, sequence_1, sequence_tmp))
 183 |           else:
 184 |             source_frame += processed_frames
 185 |             if DEBUG_SKIPS:
 186 |               print('skipped frames: %s' % (processed_frames))
 187 |             skips_number += 1
 188 |             skips_max = processed_frames if processed_frames > skips_max else skips_max
 189 |             if not recalculate_fps:
 190 |               video_1.set(cv2.CAP_PROP_POS_FRAMES, from_frame_1 + source_frame)
 191 |             else:
 192 |               to_frame = from_frame_1 + math.ceil((time.time() - recalculate_time) * fps_1)
 193 |               video_1.set(cv2.CAP_PROP_POS_FRAMES, to_frame)
 194 |             ok, frame_1 = video_1.read()
 195 |             objects_1 = detect_objects(frame_1, thresh, detection_graph, sess, category_index, sequence_sorted=sequence_sorted, sequence_type=sequence_type)
 196 |             sequence_1 = objects_1['sequence']
 197 |             cv2.putText(frame_1, "skip: %s src: %s" % (processed_frames, sequence_1), (10, 30), font, size, color, weight)
 198 |             desc_kp_1, desc_des_1 = desc.detectAndCompute(frame_1, None)
 199 |             use_tracking = False
 200 |             use_detection = True
 201 |             use_descriptor = False
 202 |             processed_frames = 0
 203 | 
 204 |         if use_detection:
 205 |           if adjust_frame:
 206 |             area_2 = frame_2[matched_area[0]:matched_area[1],matched_area[2]:matched_area[3]]
 207 |           else:
 208 |             area_2 = frame_2
 209 |           objects_2 =  detect_objects(area_2, thresh, detection_graph, sess, category_index, matched_area=matched_area, sequence_sorted=sequence_sorted, sequence_type=sequence_type)
 210 |           sequence_2 = objects_2['sequence']
 211 |           cv2.putText(frame_2, "alp: %s" % (sequence_2), (10, 30), font, size, color, weight)
 212 |           num_matches = get_sequence_matches(sequence_1, sequence_2)
 213 |           if DEBUG_ALPHA:
 214 |             print_once('eq: %s ref: %s new: %s' % (num_matches, sequence_1, sequence_2))
 215 |           if num_matches > 0:
 216 |             trackers = {}
 217 |             were_coords_valid = False
 218 |             if enable_tracking:
 219 |               for object_2 in objects_2['objects']:
 220 |                 if are_coords_valid(object_2['coords'], area_2.shape):
 221 |                     trackers[object_2['coords']] = create_tracker(tracker_type)
 222 |                     if adjust_frame:
 223 |                       trackers[object_2['coords']].init(frame_2, object_2['global_coords'])
 224 |                     else:
 225 |                       trackers[object_2['coords']].init(frame_2, object_2['coords'])
 226 |                     were_coords_valid = True
 227 |             if were_coords_valid:
 228 |               if enable_tracking:
 229 |                 use_tracking = True
 230 |                 use_detection = False
 231 |                 use_descriptor = False
 232 |               else:
 233 |                 use_tracking = False
 234 |                 use_detection = True
 235 |                 use_descriptor = False
 236 |             else:
 237 |               use_tracking = False
 238 |               use_detection = True
 239 |               use_descriptor = False
 240 |           else:
 241 |             use_tracking = False
 242 |             use_detection = False
 243 |             use_descriptor = True
 244 |             if not enable_tracking:
 245 |               source_frame += processed_frames
 246 |               if DEBUG_SKIPS:
 247 |                 print('detector skipped frames: %s' % (processed_frames))
 248 |               skips_number += 1
 249 |               skips_max = processed_frames if processed_frames > skips_max else skips_max
 250 |               if not recalculate_fps:
 251 |                 video_1.set(cv2.CAP_PROP_POS_FRAMES, from_frame_1 + source_frame)
 252 |               else:
 253 |                 to_frame = from_frame_1 + math.ceil((time.time() - recalculate_time) * fps_1)
 254 |                 video_1.set(cv2.CAP_PROP_POS_FRAMES, to_frame)
 255 |               ok, frame_1 = video_1.read()
 256 |               cv2.putText(frame_1, "skip: %s" % (processed_frames), (10, 30), font, size, color, weight)
 257 |               desc_kp_1, desc_des_1 = desc.detectAndCompute(frame_1, None)
 258 |               processed_frames = 0           
 259 | 
 260 |         if use_descriptor:
 261 |           matched_area = None
 262 |           descriptor_matched = False
 263 |           start_time = time.time()
 264 |           desc_kp_2, desc_des_2 = desc.detectAndCompute(frame_2, None)
 265 |           elapsed_time = time.time() - start_time
 266 |           if DEBUG_TIME:
 267 |             print(descriptor, elapsed_time)
 268 | 
 269 |           if descriptor == "sift" or descriptor == "surf" or descriptor == "fast":
 270 |             FLANN_INDEX_KDTREE = 1
 271 |             index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
 272 |             search_params = dict(checks=50)   # or pass empty dictionary
 273 |             start_time = time.time()
 274 |             flann = cv2.FlannBasedMatcher(index_params, search_params)
 275 |             try:
 276 |               matches = flann.knnMatch(desc_des_1, desc_des_2, k=2)
 277 |             except:
 278 |               matches = []
 279 |             elapsed_time = time.time() - start_time
 280 |             if DEBUG_TIME:
 281 |               print('FLANN', elapsed_time)
 282 |             good = []
 283 |             for m,n in matches:
 284 |               if m.distance < 0.7*n.distance:
 285 |                 good.append(m)
 286 |             area_2 = frame_2
 287 |             similarity = 0
 288 |             if len(matches) > 0:
 289 |               similarity = len(good) / len(matches)
 290 |             if len(good) > MIN_MATCH_COUNT:
 291 |               src_pts = np.float32([ desc_kp_1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
 292 |               dst_pts = np.float32([ desc_kp_2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
 293 |               M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
 294 |               matchesMask = mask.ravel().tolist()
 295 |               h,w,d = frame_1.shape
 296 |               pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
 297 |               try:
 298 |                 dst = cv2.perspectiveTransform(pts,M)
 299 |                 matched_area = get_rect_from_dst(dst, frame_2.shape)
 300 |                 trans_coords = get_transformed_coords(dst, matched_area)
 301 |                 frame_2 = cv2.polylines(frame_2,[np.int32(dst)],True,255,3, cv2.LINE_AA)
 302 |                 calc_height = matched_area[1] - matched_area[0]
 303 |                 calc_width = matched_area[3] - matched_area[2]
 304 |                 frame_height = frame_2.shape[0]
 305 |                 frame_width = frame_2.shape[1]
 306 |                 sim_rate = 1 + (((1 - (calc_height / frame_height)) + (1 - (calc_width / frame_width))) / 2)
 307 |                 similarity *= sim_rate
 308 |                 if similarity > SIMILARITY_THRESHOLD:
 309 |                   descriptor_matched = True
 310 |               except:
 311 |                 pass
 312 |             else:
 313 |               if DEBUG_TIME:
 314 |                 print( "Not enough matches were found - {}/{}".format(len(good), MIN_MATCH_COUNT) )
 315 |               matchesMask = None
 316 | 
 317 |           if not descriptor_matched:
 318 |             if at_least_one_match:
 319 |               source_frame += processed_frames
 320 |               if DEBUG_SKIPS:
 321 |                 print('descriptor skipped frames: %s' % (processed_frames))
 322 |               skips_number += 1
 323 |               skips_max = processed_frames if processed_frames > skips_max else skips_max
 324 |               if not recalculate_fps:
 325 |                 video_1.set(cv2.CAP_PROP_POS_FRAMES, from_frame_1 + source_frame)
 326 |               else:
 327 |                 to_frame = from_frame_1 + math.ceil((time.time() - recalculate_time) * fps_1)
 328 |                 video_1.set(cv2.CAP_PROP_POS_FRAMES, to_frame)
 329 |               ok, frame_1 = video_1.read()
 330 |               cv2.putText(frame_1, "skip: %s" % (processed_frames), (10, 30), font, size, color, weight)
 331 |               desc_kp_1, desc_des_1 = desc.detectAndCompute(frame_1, None)
 332 |               processed_frames = 0
 333 |           else:
 334 |             if not at_least_one_match and recalculate_fps:
 335 |               recalculate_time = time.time()
 336 |             at_least_one_match = True
 337 |             if enable_detection:
 338 |               if adjust_frame:
 339 |                 if is_matched_area_okay(trans_coords, frame_2.shape):
 340 |                   area_2 = frame_2[matched_area[0]:matched_area[1],matched_area[2]:matched_area[3]]
 341 |                   area_2 = cv2.polylines(area_2,[np.array(trans_coords)],True,255,3, cv2.LINE_AA)
 342 |                   if adjust_perspective:
 343 |                     area_2 = four_point_transform(area_2, trans_coords)
 344 |                 else:
 345 |                   area_2 = frame_2  
 346 |               else:
 347 |                 area_2 = frame_2
 348 |               objects_2 =  detect_objects(area_2, thresh, detection_graph, sess, category_index, matched_area=matched_area, sequence_sorted=sequence_sorted, sequence_type=sequence_type)
 349 |               sequence_2 = objects_2['sequence']
 350 |               cv2.putText(frame_2, "alp: %s" % (sequence_2), (10, 30), font, size, color, weight)
 351 |               num_matches = get_sequence_matches(sequence_1, sequence_2)
 352 |               if DEBUG_ALPHA:
 353 |                 print_once('eq: %s ref: %s new: %s' % (num_matches, sequence_1, sequence_2))
 354 |               if num_matches > 0:
 355 |                 use_descriptor = False
 356 |                 use_detection = True
 357 |                 use_tracking = False
 358 |               else:
 359 |                 source_frame += processed_frames
 360 |                 if DEBUG_SKIPS:
 361 |                   print('descriptor detector skipped frames: %s' % (processed_frames))
 362 |                 skips_number += 1
 363 |                 skips_max = processed_frames if processed_frames > skips_max else skips_max
 364 |                 if not recalculate_fps:
 365 |                   video_1.set(cv2.CAP_PROP_POS_FRAMES, from_frame_1 + source_frame)
 366 |                 else:
 367 |                   to_frame = from_frame_1 + math.ceil((time.time() - recalculate_time) * fps_1)
 368 |                   video_1.set(cv2.CAP_PROP_POS_FRAMES, to_frame)
 369 |                 ok, frame_1 = video_1.read()
 370 |                 desc_kp_1, desc_des_1 = desc.detectAndCompute(frame_1, None)
 371 |                 objects_1 = detect_objects(frame_1, thresh, detection_graph, sess, category_index, sequence_sorted=sequence_sorted, sequence_type=sequence_type)
 372 |                 sequence_1 = objects_1['sequence']
 373 |                 processed_frames = 0
 374 |                 use_descriptor = True
 375 |                 use_detection = False
 376 |                 use_tracking = False 
 377 | 
 378 |           #matches_img = cv2.drawMatches(frame_1, desc_kp_1, frame_2, desc_kp_2, good, None, **draw_params)
 379 | 
 380 |         if at_least_one_match:
 381 |           processed_frames += 1
 382 |         if matchesMask is None:
 383 |           matchesMask = []
 384 |         draw_params = dict(
 385 |              matchesMask = matchesMask[:show_points], # draw only inliers
 386 |              flags = 2)
 387 |         #print("%s of %s rate %s" % (len(good), len(matches), len(good)/len(matches)))
 388 |         try:
 389 |           matches_img = cv2.drawMatches(frame_1, desc_kp_1, frame_2, desc_kp_2, good[:show_points], None, **draw_params)
 390 |         except:
 391 |           matches_img = frame_1
 392 |         if store_output:
 393 |           if out == None:
 394 |             out = cv2.VideoWriter('out.avi', fourcc, 30.0, (matches_img.shape[1], matches_img.shape[0]), True)
 395 |           if not recalculate_fps:
 396 |             out.write(matches_img)
 397 |           else:
 398 |             video_insert(matches_img)
 399 |         cv2.imshow("Matches", matches_img)
 400 |         cv2.waitKey(1)
 401 |       if store_output:
 402 |         if out is not None:
 403 |           if recalculate_fps:
 404 |             video_close()
 405 |           out.release()
 406 |       print('--- STATS ---')
 407 |       total_time = time.time() - total_time_init
 408 |       print('TOTAL TIME: ', total_time)
 409 |       print('TOTAL FRAMES: ', total_frames)
 410 |       print('SKIPS NUMBER: ', skips_number)
 411 |       print('MAX SKIP: ', skips_max)
 412 | 
 413 | def video_insert(frame):
 414 |   global out, last_frame, last_time
 415 |   if last_time is None:
 416 |     last_time = time.time()
 417 |   else:
 418 |     num_frames = math.floor((time.time() - last_time) * out_fps)
 419 |     last_time = time.time()
 420 |     for i in range(num_frames):
 421 |       out.write(last_frame)
 422 |   last_frame = frame
 423 | 
 424 | def video_close():
 425 |   global out, last_frame, last_time
 426 |   num_frames = math.floor((time.time() - last_time) * out_fps)
 427 |   for i in range(num_frames):
 428 |     out.write(last_frame)
 429 | 
 430 | def is_matched_area_okay(matched_area, frame_2_shape):
 431 |   return True
 432 | 
 433 | def print_once(message):
 434 |   global last_message
 435 |   if message != last_message:
 436 |     last_message = message
 437 |     print(last_message)
 438 | 
 439 | def youtube_download_hook(download):
 440 |   global download_item
 441 |   if download["status"] == "finished":
 442 |     print(download["filename"])
 443 |     video_num = download_item['index']
 444 |     os.rename(download["filename"], "internet%s.mp4" % (video_num))
 445 |     continue_downloads()
 446 | 
 447 | def load_from_youtube(video):
 448 |   ydl_opts = {"format": "mp4", "progress_hooks": [youtube_download_hook]}
 449 |   youtube_dl.YoutubeDL(ydl_opts).download([video])
 450 | 
 451 | def get_and_compare_videos(path_1, path_2, skip=False):
 452 |   global video_path_1, video_path_2, max_videos, download_list, recalculate_fps
 453 |   need_download = False
 454 |   video_path_1 = path_1
 455 |   if 'http' in path_1:
 456 |     download_list.append({'source': path_1, 'index': 1})
 457 |     video_path_1 = 'internet1.mp4'
 458 |   elif path_1 == '0':
 459 |     video_path_1 = 0
 460 |     recalculate_fps = True
 461 |   video_path_2 = path_2
 462 |   if 'http' in path_2:
 463 |     download_list.append({'source': path_2, 'index': 2})
 464 |     video_path_2 = 'internet2.mp4'
 465 |   elif path_2 == '0':
 466 |     video_path_2 = 0
 467 |     recalculate_fps = True
 468 |   if len(download_list) == 0:
 469 |     compare_videos(video_path_1, video_path_2)
 470 |   else:
 471 |     continue_downloads()
 472 | 
 473 | def continue_downloads():
 474 |   global download_list, download_item, video_path_1, video_path_2
 475 |   if len(download_list) > 0:
 476 |     download_item = download_list.pop(0)
 477 |     load_from_youtube(download_item['source'])
 478 |   else:
 479 |     compare_videos(video_path_1, video_path_2)
 480 | 
 481 | def create_tracker(tracker_type):
 482 |   if tracker_type == 'BOOSTING':
 483 |     return cv2.TrackerBoosting_create()
 484 |   elif tracker_type == 'MIL':
 485 |     return cv2.TrackerMIL_create()
 486 |   elif tracker_type == 'KCF':
 487 |     return cv2.TrackerKCF_create()
 488 |   elif tracker_type == 'TLD':
 489 |     return cv2.TrackerTLD_create()
 490 |   elif tracker_type == 'MEDIANFLOW':
 491 |     return cv2.TrackerMedianFlow_create()
 492 |   elif tracker_type == 'GOTURN':
 493 |     return cv2.TrackerGOTURN_create()
 494 |   else:
 495 |     return cv2.TrackerKCF_create()
 496 | 
 497 | def are_coords_valid(box, orig):
 498 |   threshold = 0.8
 499 |   calc_height = ((box[1] - box[0])/orig[1])
 500 |   calc_width = ((box[3] - box[2])/orig[0])
 501 |   if calc_height >= threshold and calc_width >= threshold:
 502 |     return False
 503 |   return True
 504 | 
 505 | def detect_objects(image, thresh, detection_graph, sess, category_index, matched_area=None, sequence_sorted=False, sequence_type='char'):
 506 |   image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 507 |   image_np_expanded = np.expand_dims(image_np, axis=0)
 508 |   image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
 509 |   boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
 510 |   scores = detection_graph.get_tensor_by_name('detection_scores:0')
 511 |   classes = detection_graph.get_tensor_by_name('detection_classes:0')
 512 |   num_detections = detection_graph.get_tensor_by_name('num_detections:0')
 513 |   start_time = time.time()
 514 |   if image_np_expanded[0] is not None:
 515 |     (boxes, scores, classes, num_detections) = sess.run(
 516 |         [boxes, scores, classes, num_detections],
 517 |         feed_dict={image_tensor: image_np_expanded})
 518 |     elapsed_time = time.time() - start_time
 519 |     if DEBUG_TIME:
 520 |       print('cnn', elapsed_time)
 521 |     box = vis_util.visualize_boxes_and_labels_on_image_array(
 522 |         image,
 523 |         np.squeeze(boxes),
 524 |         np.squeeze(classes).astype(np.int32),
 525 |         np.squeeze(scores),
 526 |         category_index,
 527 |         min_score_thresh=thresh,
 528 |         use_normalized_coordinates=True,
 529 |         line_thickness=4,
 530 |         sequence_sorted=sequence_sorted,
 531 |         sequence_type=sequence_type,
 532 |         matched_area=matched_area)
 533 |   else:
 534 |     box = {'sequence': '', 'objects': []}
 535 |   return box
 536 | 
 537 | def get_sequence_matches(sequence_1, sequence_2):
 538 |   if sequence_1 and sequence_2:
 539 |     sm = edit_distance.SequenceMatcher(a=sequence_1, b=sequence_2)
 540 |     sm.get_opcodes()
 541 |     sm.ratio()
 542 |     sm.get_matching_blocks()
 543 |     distance = sm.distance()
 544 |     num_matches = sm.matches()
 545 |     return num_matches
 546 |   else:
 547 |     return 0
 548 | 
 549 | def get_rect_from_dst(dst, orig):
 550 |   top = int(dst[0][0][1]) if dst[0][0][1] < dst[3][0][1] else int(dst[3][0][1])
 551 |   bottom = int(dst[1][0][1]) if dst[1][0][1] > dst[2][0][1] else int(dst[2][0][1])
 552 |   left = int(dst[0][0][0]) if dst[0][0][0] < dst[1][0][0] else int(dst[1][0][0])
 553 |   right = int(dst[2][0][0]) if dst[2][0][0] > dst[3][0][0] else int(dst[3][0][0])
 554 |   top = 0 if top < 0 else top
 555 |   left = 0 if left < 0 else left
 556 |   bottom = orig[0] if bottom > orig[0] else bottom
 557 |   right = orig[1] if right > orig[1] else right
 558 |   return (top, bottom, left, right)
 559 | 
 560 | def get_area_coords(dts):
 561 |   (top, bottom, left, right) = matched_area
 562 |   tl = (int(dst[0][0][0]), int(dst[0][0][1]))
 563 |   tr = (int(dst[3][0][0]), int(dst[3][0][1]))
 564 |   bl = (int(dst[1][0][0]), int(dst[1][0][1]))
 565 |   br = (int(dst[2][0][0]), int(dst[2][0][1]))
 566 |   return [tl, tr, br, bl]
 567 | 
 568 | def get_transformed_coords(dst, matched_area):
 569 |   (top, bottom, left, right) = matched_area
 570 |   tl = (-(left - int(dst[0][0][0])), -(top - int(dst[0][0][1])))
 571 |   tr = ((int(dst[3][0][0]) - left), -(top - int(dst[3][0][1])))
 572 |   bl = (-(left - int(dst[1][0][0])), int(dst[1][0][1]) - top)
 573 |   br = ((int(dst[2][0][0]) - left), (int(dst[2][0][1]) - top))
 574 |   return [tl, tr, br, bl]
 575 | 
 576 | def order_points(pts):
 577 |   # initialzie a list of coordinates that will be ordered
 578 |   # such that the first entry in the list is the top-left,
 579 |   # the second entry is the top-right, the third is the
 580 |   # bottom-right, and the fourth is the bottom-left
 581 |   rect = np.zeros((4, 2), dtype = "float32")
 582 |  
 583 |   # the top-left point will have the smallest sum, whereas
 584 |   # the bottom-right point will have the largest sum
 585 |   s = pts.sum(axis = 1)
 586 |   rect[0] = pts[np.argmin(s)]
 587 |   rect[2] = pts[np.argmax(s)]
 588 |  
 589 |   # now, compute the difference between the points, the
 590 |   # top-right point will have the smallest difference,
 591 |   # whereas the bottom-left will have the largest difference
 592 |   diff = np.diff(pts, axis = 1)
 593 |   rect[1] = pts[np.argmin(diff)]
 594 |   rect[3] = pts[np.argmax(diff)]
 595 |  
 596 |   # return the ordered coordinates
 597 |   return rect
 598 | 
 599 | def four_point_transform(image, pts):
 600 |   # obtain a consistent order of the points and unpack them
 601 |   # individually
 602 |   pts = np.array(pts)
 603 |   rect = order_points(pts)
 604 |   # rect = np.array(pts)
 605 |   (tl, tr, br, bl) = rect
 606 |  
 607 |   # compute the width of the new image, which will be the
 608 |   # maximum distance between bottom-right and bottom-left
 609 |   # x-coordiates or the top-right and top-left x-coordinates
 610 |   widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
 611 |   widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
 612 |   maxWidth = max(int(widthA), int(widthB))
 613 |  
 614 |   # compute the height of the new image, which will be the
 615 |   # maximum distance between the top-right and bottom-right
 616 |   # y-coordinates or the top-left and bottom-left y-coordinates
 617 |   heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
 618 |   heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
 619 |   maxHeight = max(int(heightA), int(heightB))
 620 |  
 621 |   # now that we have the dimensions of the new image, construct
 622 |   # the set of destination points to obtain a "birds eye view",
 623 |   # (i.e. top-down view) of the image, again specifying points
 624 |   # in the top-left, top-right, bottom-right, and bottom-left
 625 |   # order
 626 |   dst = np.array([
 627 |     [0, 0],
 628 |     [maxWidth - 1, 0],
 629 |     [maxWidth - 1, maxHeight - 1],
 630 |     [0, maxHeight - 1]], dtype = "float32")
 631 |  
 632 |   # compute the perspective transform matrix and then apply it
 633 |   M = cv2.getPerspectiveTransform(rect, dst)
 634 |   try:
 635 |     warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
 636 |     return warped
 637 |   except:
 638 |     return image
 639 |   # return the warped image
 640 |   
 641 | 
 642 | def compare_2d_color_images(frame_1, frame_2):
 643 |   start_time = time.time()
 644 |   matches_num = 0
 645 |   height = frame_1.shape[0]
 646 |   width = frame_1.shape[1]
 647 |   size = width * height
 648 |   for i in range(height):
 649 |     for j in range(width):
 650 |       if frame_1[i][j][0] == frame_2[i][j][0] and frame_1[i][j][1] == frame_2[i][j][1] and frame_1[i][j][2] == frame_2[i][j][2]:
 651 |         matches_num += 1
 652 |   rate = matches_num / size
 653 |   elapsed_time = time.time() - start_time
 654 |   if DEBUG_TIME:
 655 |     print('iterate_2d', elapsed_time)
 656 | 
 657 | def compare_2d_gray_images(frame_1, frame_2):
 658 |   start_time = time.time()
 659 |   matches_num = 0
 660 |   gray_1 = cv2.cvtColor(frame_1, cv2.COLOR_BGR2GRAY)
 661 |   gray_2 = cv2.cvtColor(frame_2, cv2.COLOR_BGR2GRAY)
 662 |   height = gray_1.shape[0]
 663 |   width = gray_1.shape[1]
 664 |   size = width * height
 665 |   for i in range(height):
 666 |     for j in range(width):
 667 |       if gray_1[i][j] == gray_2[i][j]:
 668 |         matches_num += 1
 669 |   rate = matches_num / size
 670 |   elapsed_time = time.time() - start_time
 671 |   if DEBUG_TIME:
 672 |     print('iterate_2d', elapsed_time)
 673 | 
 674 | def compare_1d_gray_images(frame_1, frame_2):
 675 |   start_time = time.time()
 676 |   matches_num = 0
 677 |   gray_1 = cv2.cvtColor(frame_1, cv2.COLOR_BGR2GRAY)
 678 |   flat_1 = [j for i in gray_1 for j in i]
 679 |   gray_2 = cv2.cvtColor(frame_2, cv2.COLOR_BGR2GRAY)
 680 |   flat_2 = [j for i in gray_2 for j in i]
 681 |   size = len(flat_1)
 682 |   for i in range(size):
 683 |       if flat_1[i] == flat_2[i]:
 684 |         matches_num += 1
 685 |   rate = matches_num / size
 686 |   elapsed_time = time.time() - start_time
 687 |   if DEBUG_TIME:
 688 |     print('iterate_1d', elapsed_time)
 689 | 
 690 | #load_from_youtube()
 691 | get_and_compare_videos(sys.argv[4], sys.argv[5])
 692 | 
 693 | # 
 694 | """
 695 | import sys
 696 | import time
 697 | import numpy as np
 698 | import tensorflow as tf
 699 | import cv2
 700 | import PIL.Image as Image
 701 | 
 702 | sys.path.append("..")
 703 | 
 704 | from utils import label_map_util
 705 | from utils import visualization_utils_color as vis_util
 706 | 
 707 | def find_homography(kp1, des1, kp2, des2):
 708 |   bf = cv2.BFMatcher(cv2.NORM_L2)
 709 |   # Match descriptors.
 710 |   matches = bf.knnMatch(des1,des2,k=2)
 711 |   # Apply ratio test
 712 |   good = []
 713 |   for m,n in matches:
 714 |       if m.distance < 0.9*n.distance:
 715 |          good.append(m)
 716 |   pts1 = []
 717 |   pts2 = []
 718 |   for elem in good:
 719 |     pts1.append(kp1[elem.queryIdx].pt)
 720 |     pts2.append(kp2[elem.trainIdx].pt)
 721 |   pts1 = np.array(pts1)
 722 |   pts2 = np.array(pts2)
 723 |   M, mask = cv2.findHomography(pts1, pts2, cv2.RANSAC,5.0)
 724 |   count_inliers = np.count_nonzero(mask)
 725 |   #print('Number of inliers: ', np.count_nonzero(mask))
 726 |   return count_inliers, M
 727 | 
 728 | frames = []
 729 | 
 730 | def image_alg(image, box):
 731 |   border = 0.2
 732 |   im_height = len(image)
 733 |   im_width = len(image[0])
 734 |   (ymin, xmin, ymax, xmax) = (box[0], box[1], box[2], box[3])
 735 |   (left, right, top, bottom) = (int(xmin * im_width), int(xmax * im_width), 
 736 |                                 int(ymin * im_height), int(ymax * im_height))
 737 |   #print((left, right, top, bottom))
 738 |   border_height = (bottom - top) * border
 739 |   top = 0 if (top - border_height) < 0 else (top - border_height)
 740 |   bottom = im_height if (bottom + border_height) > im_height else (bottom + border_height)
 741 |   scale_y = im_height/(bottom - top)
 742 |   output = cv2.resize(image, (0,0), fy=scale_y, fx=scale_y)
 743 |   (xleft, xright, xtop, xbottom, xim_width) = (int(left*scale_y), int(right*scale_y),
 744 |                                               int(top*scale_y), int(bottom*scale_y),
 745 |                                               int(im_width*scale_y))
 746 |   extra_width = (im_width - (xright - xleft)) // 2
 747 |   new_left = 0 if (xleft - extra_width) < 0 else (xleft - extra_width)
 748 |   new_right = xim_width if (xright + extra_width) > xim_width else (xright + extra_width)
 749 |   output = output[xtop:xbottom, new_left:new_right]
 750 |   #output = image[top:bottom, left:right]
 751 |   #output = cv2.resize(output, (0,0), fy=scale_y, fx=scale_y) 
 752 |   return output
 753 | 
 754 | def image_stab(image):
 755 |   global frames
 756 |   WINDOW_SIZE = 15 
 757 |   skip = 1 # speedup -- set 1 for original speed
 758 |   resize = 0.5 #scale video resolution
 759 |   frames = []
 760 |   mean_homographies = []
 761 |   median_homographies = []
 762 |   corrected_frames = []
 763 |   i = 0
 764 | 
 765 |   frames.append(image)
 766 |   if len(frames) > 20:
 767 |     frames = frames[1:]
 768 | 
 769 |   orb = cv2.xfeatures2d.SIFT_create(nfeatures=1000)
 770 |   # orb = cv2.FeatureDetector_create("SIFT")
 771 |   # orb = cv2.SIFT_create(nfeatures=1000)
 772 |   # orb = cv2.SIFT(nfeatures=1000)
 773 | 
 774 |   vec_kps = []
 775 |   vec_descs = []
 776 | 
 777 |   #print('extracting keypoints...')
 778 | 
 779 |   for i in range(len(frames)):
 780 |     # find the keypoints and descriptors 
 781 |     kp1, des1 = orb.detectAndCompute(frames[i],None)
 782 | 
 783 |     vec_kps.append(kp1)
 784 |     vec_descs.append(des1)
 785 | 
 786 |     #print('Frame %d/%d: found %d keypoints'% (i,len(frames),len(kp1)))
 787 | 
 788 | 
 789 | 
 790 |   for i in range(len(frames)):
 791 |     mean_H = np.zeros((3,3), dtype='float64')
 792 |     median_H = []
 793 |     mean_C = 0
 794 |     median_vals = []
 795 |     k =  int(WINDOW_SIZE/2.0)+1
 796 |     for j in range(1,k,1): #for each couple neighbor frames iterated by distance
 797 |       if i-j >= 0 and i+j < len(frames):
 798 |         inliers_c, H = find_homography(vec_kps[i],vec_descs[i], vec_kps[i-j], vec_descs[i-j])
 799 |         inliers_c2, H2 = find_homography(vec_kps[i],vec_descs[i], vec_kps[i+j], vec_descs[i+j])
 800 |         #print('pair (%d,%d) has %d inliers'% (i,i-j,inliers_c))
 801 |         #print('pair (%d,%d) has %d inliers'% (i,i+j,inliers_c2))
 802 |         if inliers_c > 80 and inliers_c2 > 80: #ensures that neighbors are equally selected by distance to correctly balance the homography
 803 |           mean_H = mean_H + H
 804 |           mean_H = mean_H + H2
 805 |           mean_C+=2
 806 | 
 807 |     if mean_C > 0:
 808 |       mean_homographies.append(mean_H/mean_C) # Mean homography
 809 |     else:
 810 |       mean_homographies.append(np.eye(3, dtype='float64'))
 811 |     
 812 |     #print mean_H/mean_C
 813 |     #print median_vals
 814 |     #raw_input()
 815 | 
 816 |       #fourcc = cv2.cv.CV_FOURCC('D','I','V','X')
 817 |       #fourcc = cv2.cv.CV_FOURCC('R','G','B',' ')
 818 |       #fourcc = cv2.cv.CV_FOURCC('Y','U','Y','2')
 819 |       #fourcc = cv2.cv.CV_FOURCC('Y','U','Y','U')
 820 |       #fourcc = cv2.cv.CV_FOURCC('U','Y','V','Y')
 821 |       #fourcc = cv2.cv.CV_FOURCC('I','4','2','0')
 822 |       #fourcc = cv2.cv.CV_FOURCC('I','Y','U','V')
 823 |       #fourcc = cv2.cv.CV_FOURCC('Y','U','1','2')
 824 |       #fourcc = cv2.cv.CV_FOURCC('Y','8','0','0')
 825 |       #fourcc = cv2.cv.CV_FOURCC('G','R','E','Y')
 826 |       #fourcc = cv2.cv.CV_FOURCC('B','Y','8',' ')
 827 |       #fourcc = cv2.cv.CV_FOURCC('Y','1','6',' ')
 828 |   
 829 |       #fourcc = cv2.cv.CV_FOURCC('M','J','P','G')
 830 |   #fourcc = cv2.cv.CV_FOURCC('M','P','E','G')
 831 | 
 832 |   crop_x = 80
 833 |   crop_y = 60
 834 | 
 835 |   size = (frames[0].shape[1]-crop_x*2, frames[0].shape[0]-crop_y*2)
 836 |   
 837 |   #fourcc = cv2.VideoWriter_fourcc('X','V','I','D')
 838 |   #out =  cv2.VideoWriter(file+'__estabilizado.avi',fourcc,30.0,size)#cv2.VideoWriter('stab.mp4',-1, 30.0, (frames[0].shape[0], frames[0].shape[1]))
 839 | 
 840 |   #for i in range(len(frames)):
 841 |     #corrected = cv2.warpPerspective(frames[i],mean_homographies[i],(0,0))
 842 |     #cv2.imshow('video corrected', corrected)
 843 |     #cv2.waitKey(1)
 844 |     #new_img = corrected[crop_y:frames[0].shape[0]-crop_y, crop_x:frames[0].shape[1]-crop_x]
 845 |     #out.write(new_img)
 846 |     #out.write(corrected[crop_y:frames[0].shape[0]-crop_y, crop_x:frames[0].shape[1]-crop_x])
 847 | 
 848 |   corrected = cv2.warpPerspective(frames[-1],mean_homographies[-1],(0,0))
 849 |   new_img = corrected[crop_y:frames[0].shape[0]-crop_y, crop_x:frames[0].shape[1]-crop_x]
 850 |   return new_img
 851 | 
 852 | 
 853 | # Path to frozen detection graph. This is the actual model that is used for the object detection.
 854 | PATH_TO_CKPT = './model/frozen_inference_graph_face.pb'
 855 | 
 856 | # List of the strings that is used to add correct label for each box.
 857 | PATH_TO_LABELS = './protos/face_label_map.pbtxt'
 858 | 
 859 | NUM_CLASSES = 2
 860 | 
 861 | label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
 862 | categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
 863 | category_index = label_map_util.create_category_index(categories)
 864 | 
 865 | def load_image_into_numpy_array(image):
 866 |   (im_width, im_height) = image.size
 867 |   return np.array(image.getdata()).reshape(
 868 |       (im_height, im_width, 3)).astype(np.uint8)
 869 | 
 870 | #cap = cv2.VideoCapture("./media/test.mp4")
 871 | cap = cv2.VideoCapture(0)
 872 | cap.open(0)
 873 | # time.sleep(2.0)
 874 | out = None
 875 | 
 876 | detection_graph = tf.Graph()
 877 | with detection_graph.as_default():
 878 |     od_graph_def = tf.GraphDef()
 879 |     with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
 880 |         serialized_graph = fid.read()
 881 |         od_graph_def.ParseFromString(serialized_graph)
 882 |         tf.import_graph_def(od_graph_def, name='')
 883 | 
 884 | with detection_graph.as_default():
 885 |   config = tf.ConfigProto()
 886 |   config.gpu_options.allow_growth = True
 887 |   with tf.Session(graph=detection_graph, config=config) as sess:
 888 |     frame_num = 100;
 889 |     while frame_num:
 890 |       frame_num -= 1
 891 |       ret, image = cap.read()
 892 |       if ret == 0:
 893 |           break
 894 | 
 895 |       if out is None:
 896 |           [h, w] = image.shape[:2]
 897 |           out = cv2.VideoWriter("./media/test_out.avi", cv2.VideoWriter_fourcc(*'H264'), 25.0, (w, h))
 898 | 
 899 |       image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 900 | 
 901 |       # the array based representation of the image will be used later in order to prepare the
 902 |       # result image with boxes and labels on it.
 903 |       # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
 904 |       image_np_expanded = np.expand_dims(image_np, axis=0)
 905 |       image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
 906 |       # Each box represents a part of the image where a particular object was detected.
 907 |       boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
 908 |       # Each score represent how level of confidence for each of the objects.
 909 |       # Score is shown on the result image, together with the class label.
 910 |       scores = detection_graph.get_tensor_by_name('detection_scores:0')
 911 |       classes = detection_graph.get_tensor_by_name('detection_classes:0')
 912 |       num_detections = detection_graph.get_tensor_by_name('num_detections:0')
 913 |       # Actual detection.
 914 |       start_time = time.time()
 915 |       (boxes, scores, classes, num_detections) = sess.run(
 916 |           [boxes, scores, classes, num_detections],
 917 |           feed_dict={image_tensor: image_np_expanded})
 918 |       elapsed_time = time.time() - start_time
 919 |       #print('inference time cost: {}'.format(elapsed_time))
 920 |       #print(boxes.shape, boxes)
 921 |       #print(scores.shape,scores)
 922 |       #print(classes.shape,classes)
 923 |       #print(num_detections)
 924 |       # Visualization of the results of a detection.
 925 |       box = vis_util.visualize_boxes_and_labels_on_image_array(
 926 | #          image_np,
 927 |           image,
 928 |           np.squeeze(boxes),
 929 |           np.squeeze(classes).astype(np.int32),
 930 |           np.squeeze(scores),
 931 |           category_index,
 932 |           use_normalized_coordinates=True,
 933 |           line_thickness=4)
 934 |       
 935 |       #cv2.imshow('Video', image)
 936 |       if len(box) > 0:
 937 |         stab = image_alg(image, box[0])
 938 |         cv2.imshow('Stab', stab)
 939 |         out.write(stab)
 940 |         cv2.waitKey(1)
 941 | 
 942 | 
 943 |     cap.release()
 944 |     out.release()
 945 | 
 946 |     #sift_kp_1, sift_des_1 = sift.detectAndCompute(frame_1, None)
 947 |     #surf_kp_1, surf_des_1 = surf.detectAndCompute(frame_1, None)
 948 |     #orb_kp_1, orb_des_1 = orb.detectAndCompute(frame_1, None)
 949 |     #fast_kp_1, fast_des_1 = fast.detectAndCompute(frame_1, None)
 950 |     #print( sift.descriptorSize() )
 951 |     #print( surf.descriptorSize() )
 952 |     #print( orb.descriptorSize() )
 953 |     #print( sift_des_1.shape )
 954 |     #print( surf_des_1.shape )
 955 |     #print( orb_des_1.shape )
 956 |     #sift_kp_2, sift_des_2 = sift.detectAndCompute(frame_2, None)
 957 |     #surf_kp_2, surf_des_2 = surf.detectAndCompute(frame_2, None)
 958 |     #orb_kp_2, orb_des_2 = orb.detectAndCompute(frame_2, None)
 959 |     #fast_kp_2, fast_des_2 = orb.detectAndCompute(frame_2, None)
 960 |     #print(len(matches))
 961 |     #matches_img = cv2.drawMatchesKnn(frame_1, desc_kp_1, frame_2, desc_kp_2, matches[:10], None)
 962 | 
 963 |     matchesMask = [[0,0] for i in range(len(matches))] # ratio test as per Lowe's paper
 964 |     for i,(m,n) in enumerate(matches):
 965 |         if m.distance < 0.7*n.distance:
 966 |             matchesMask[i] = [1,0]
 967 |     draw_params = dict(matchColor = (0,255,0),
 968 |                        singlePointColor = (255,0,0),
 969 |                        matchesMask = matchesMask[:10],
 970 |                        flags = 0)
 971 |     matches_img = cv2.drawMatchesKnn(frame_1, desc_kp_1, frame_2, desc_kp_2, matches[:10], None, **draw_params)
 972 | 
 973 |     start_time = time.time()
 974 |     bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True) # create BFMatcher object
 975 |     matches = bf.match(desc_des_1, desc_des_2) # Match descriptors.
 976 |     matches = sorted(matches, key = lambda x:x.distance) # Sort them in the order of their distance.
 977 |     elapsed_time = time.time() - start_time
 978 |     print('BFMatcher', elapsed_time)
 979 |     print(len(matches))
 980 |     matches_img = cv2.drawMatches(frame_1, desc_kp_1, frame_2, desc_kp_2, matches[:10], None, flags=2)    
 981 | 
 982 |     #compare_2d_color_images(frame_1, frame_2)
 983 |     #compare_2d_gray_images(frame_1, frame_2)
 984 | 
 985 |     res = cv2.matchTemplate(gray_1, gray_2, cv2.TM_CCOEFF_NORMED)
 986 |     _, max_val, _, max_loc = cv2.minMaxLoc(res)
 987 |     print(max_val)
 988 |     threshold = 0.8
 989 |     if max_val > threshold:
 990 |       print("FOUND")
 991 | 
 992 |     #cv2.imshow("image 1", gray_1)
 993 |     #cv2.imshow("image 2", gray_2)  
 994 | 
 995 |     elif descriptor == "orb":
 996 |       FLANN_INDEX_LSH = 6
 997 |       index_params = dict(algorithm = FLANN_INDEX_LSH,
 998 |         table_number = 6, # 12
 999 |         key_size = 12,     # 20
1000 |         multi_probe_level = 1) #2
1001 |       # index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
1002 |       search_params = dict(checks=50)   # or pass empty dictionary
1003 |       flann = cv2.FlannBasedMatcher(index_params, search_params)
1004 |       matches = flann.knnMatch(desc_des_1, desc_des_2, k=2)
1005 |       good = []
1006 |       for m,n in matches:
1007 |         if m.distance < 0.7*n.distance:
1008 |           good.append(m)
1009 |       print(len(good))
1010 |       matches_img = cv2.drawMatches(frame_1, desc_kp_1, frame_2, desc_kp_2, good[:show_points], None)
1011 | """        
1012 | 
1013 | 
1014 | # Resolution: 1920 x 1080
1015 | #
1016 | # 2D color
1017 | # Operations = 1920 x 1080 x 3
1018 | # Time = 2.372
1019 | #
1020 | # 2D gray
1021 | # Operations = 1920 x 1080
1022 | # Time = 0.521
1023 | #
1024 | # SURF
1025 | # Time = 0.243
1026 | #
1027 | # CNN
1028 | # Time = 0.087
1029 | #
1030 | # Tracking
1031 | # Time = 0.003
1032 | #
1033 | # FLANN
1034 | # Size 1 = 4542
1035 | # Size 2 = 4117
1036 | # Time = 0.109
1037 | #
1038 | # BFMatcher
1039 | # Size 1 = 4542
1040 | # Size 2 = 4117
1041 | # Time = 0.164


--------------------------------------------------------------------------------