├── .gitignore ├── DataAug ├── README.md ├── _init_paths.py ├── all_areas.csv ├── bounding_boxes_areas_distribution.csv ├── environment.yml ├── generate_data_aug.py ├── notebooks │ └── Statistics_VDAO.ipynb └── utils │ ├── _init_paths.py │ ├── blending.py │ ├── definitions.py │ ├── generic_utils.py │ └── my_enums.py ├── ICIP_replication ├── Generate_Features.py ├── Grow_RandomForest.py ├── My_Resnet.py ├── Object_Research.ipynb ├── PlotResults.py ├── Sizes output Resnet50.ods ├── _init_paths_.py ├── codigo_bruno │ ├── diretorios.txt │ ├── icip │ │ ├── global_metrics.csv │ │ ├── group_stats.csv │ │ ├── models │ │ │ └── models.zip │ │ ├── resultatos_icip.ipynb │ │ ├── test-results.csv │ │ ├── thres_stats.csv │ │ ├── train-preds.csv │ │ └── vid_stats.csv │ ├── imagenet_utils.py │ ├── instrucoes.txt │ ├── train_FC_59_vids_DIS.py │ └── train_HDF5_generator_VDAO.py ├── codigo_cinelli │ └── vdao-anomaly │ │ ├── .gitignore │ │ ├── README.md │ │ ├── archs │ │ ├── __init__.py │ │ ├── classics.py │ │ └── networks.py │ │ ├── compute_results.py │ │ ├── datasets │ │ ├── __init__.py │ │ ├── download_vdao.sh │ │ └── hdf5_vdao.py │ │ ├── main.py │ │ ├── metrics.py │ │ └── utils.py ├── icip_replication.yml ├── instructions.txt ├── vdao_object.json └── vdao_research.json ├── README.md ├── VDAO.md ├── VDAO_Access ├── Annotation.py ├── Demo.py ├── ObjectHelper.py ├── README.md ├── VDAOHelper.py ├── VDAOVideo.py ├── VDAO_files │ ├── CheckBar.py │ ├── InputWindow.py │ ├── ListBox.py │ ├── Main.py │ ├── MyEnums.py │ ├── PanelDetails.py │ ├── Player.py │ ├── Table.py │ ├── VideoPlayer.py │ ├── _init_paths.py │ ├── aux_images │ │ ├── add.png │ │ ├── add.ppm │ │ ├── forward_1.png │ │ ├── forward_1.ppm │ │ ├── forward_10.png │ │ ├── forward_10.ppm │ │ ├── forward_5.png │ │ ├── forward_5.ppm │ │ ├── forward_end.png │ │ ├── forward_end.ppm │ │ ├── pause.png │ │ ├── pause.ppm │ │ ├── play.png │ │ ├── play.ppm │ │ ├── rec.png │ │ ├── remove.png │ │ ├── remove.ppm │ │ ├── rewind_1.png │ │ ├── rewind_1.ppm │ │ ├── rewind_10.png │ │ ├── rewind_10.ppm │ │ ├── rewind_5.png │ │ ├── rewind_5.ppm │ │ ├── rewind_beg.png │ │ ├── rewind_beg.ppm │ │ ├── select_frame.png │ │ └── select_frame.ppm │ └── vdao_videos.json ├── YoloTrainingHelper.py ├── _init_.py ├── downloadVDAO.py ├── images │ ├── ex_maskBB.jpg │ ├── ex_mergedImages.jpg │ ├── ex_noInfo_frame.jpg │ ├── ex_withInfo.jpg │ ├── ex_withWithoutInfo.jpg │ ├── logo.png │ ├── mask_963_l.png │ ├── mergedImage.jpg │ └── original_963_l.png └── utils.py ├── VDAO_Alignment.md ├── aligned_frames_object.zip ├── aligned_frames_research.zip ├── data_aug_env.yml ├── download_vdao_object.sh ├── download_vdao_research.sh └── images ├── aux_images ├── output.jpg ├── output_list.jpg └── result.jpg ├── ex_frames_reference.jpg ├── ex_frames_target.jpg ├── ex_withBBfromAnnotation.jpg ├── table01_video_alignment.png ├── table02_video_alignment.png ├── table03_video_alignment.png ├── table04_video_alignment.png ├── table05_video_alignment.png ├── table06_video_alignment.png ├── table07_video_alignment.png ├── table08_video_alignment.png ├── table09_video_alignment.png ├── table10_video_alignment.png └── yolo_youtube.jpg /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python,visualstudiocode 3 | 4 | # My ignores 5 | RF_results/ 6 | 7 | ### Python ### 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | .pytest_cache/ 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | .hypothesis/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule.* 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | 107 | ### VisualStudioCode ### 108 | *.vscode 109 | !.vscode/settings.json 110 | !.vscode/tasks.json 111 | !.vscode/launch.json 112 | !.vscode/extensions.json 113 | .history 114 | 115 | # End of https://www.gitignore.io/api/python,visualstudiocode 116 | -------------------------------------------------------------------------------- /DataAug/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/DataAug/README.md -------------------------------------------------------------------------------- /DataAug/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | def add_path(path): 6 | if path not in sys.path: 7 | sys.path.insert(0, path) 8 | 9 | 10 | currentPath = os.path.dirname(os.path.realpath(__file__)) 11 | 12 | # Add lib to PYTHONPATH 13 | libPath = os.path.join(currentPath, '..', '..', 'Video-Alignment') 14 | add_path(libPath) 15 | libPath = os.path.join(currentPath, 'utils') 16 | add_path(libPath) 17 | libPath = os.path.join(currentPath, '..') 18 | add_path(libPath) 19 | -------------------------------------------------------------------------------- /DataAug/environment.yml: -------------------------------------------------------------------------------- 1 | name: vdao 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - bzip2=1.0.6=h14c3975_1002 7 | - cairo=1.16.0=ha4e643d_1000 8 | - dbus=1.13.6=he372182_0 9 | - expat=2.2.5=hf484d3e_1002 10 | - ffmpeg=4.1.3=h167e202_0 11 | - fontconfig=2.13.1=he4413a7_1000 12 | - freetype=2.10.0=he983fc9_0 13 | - gettext=0.19.8.1=hc5be6a0_1002 14 | - giflib=5.1.9=h516909a_0 15 | - glib=2.58.3=hf63aee3_1001 16 | - gmp=6.1.2=hf484d3e_1000 17 | - gnutls=3.6.5=hd3a4fd2_1002 18 | - graphite2=1.3.13=hf484d3e_1000 19 | - gst-plugins-base=1.14.5=h0935bb2_0 20 | - gstreamer=1.14.5=h36ae1b5_0 21 | - harfbuzz=2.4.0=h37c48d4_0 22 | - hdf5=1.10.5=nompi_h3c11f04_1100 23 | - icu=58.2=hf484d3e_1000 24 | - jasper=1.900.1=h07fcdf6_1006 25 | - jpeg=9c=h14c3975_1001 26 | - lame=3.100=h14c3975_1001 27 | - libblas=3.8.0=7_openblas 28 | - libcblas=3.8.0=7_openblas 29 | - libiconv=1.15=h516909a_1005 30 | - liblapack=3.8.0=7_openblas 31 | - liblapacke=3.8.0=7_openblas 32 | - libpng=1.6.37=hed695b0_0 33 | - libtiff=4.0.10=h57b8799_1003 34 | - libuuid=2.32.1=h14c3975_1000 35 | - libwebp=1.0.2=h576950b_1 36 | - libxcb=1.13=h14c3975_1002 37 | - libxml2=2.9.9=h13577e0_0 38 | - lz4-c=1.8.3=he1b5a44_1001 39 | - nettle=3.4.1=h1bed415_1002 40 | - numpy=1.16.4=py37h95a1406_0 41 | - openblas=0.3.5=h9ac9557_1001 42 | - opencv=4.1.0=py37h3aa1047_5 43 | - openh264=1.8.0=hdbcaa40_1000 44 | - pcre=8.41=hf484d3e_1003 45 | - pixman=0.34.0=h14c3975_1003 46 | - pthread-stubs=0.4=h14c3975_1001 47 | - qt=5.9.7=h52cfd70_2 48 | - x264=1!152.20180806=h14c3975_0 49 | - xorg-kbproto=1.0.7=h14c3975_1002 50 | - xorg-libice=1.0.9=h516909a_1004 51 | - xorg-libsm=1.2.3=h84519dc_1000 52 | - xorg-libx11=1.6.7=h14c3975_1000 53 | - xorg-libxau=1.0.9=h14c3975_0 54 | - xorg-libxdmcp=1.1.3=h516909a_0 55 | - xorg-libxext=1.3.4=h516909a_0 56 | - xorg-libxrender=0.9.10=h516909a_1002 57 | - xorg-renderproto=0.11.1=h14c3975_1002 58 | - xorg-xextproto=7.3.0=h14c3975_1002 59 | - xorg-xproto=7.0.31=h14c3975_1007 60 | - zstd=1.4.0=h3b9ef0a_0 61 | - attrs=19.1.0=py37_1 62 | - backcall=0.1.0=py37_0 63 | - bleach=3.1.0=py37_0 64 | - ca-certificates=2019.5.15=0 65 | - certifi=2019.3.9=py37_0 66 | - decorator=4.4.0=py37_1 67 | - defusedxml=0.6.0=py_0 68 | - entrypoints=0.3=py37_0 69 | - ipykernel=5.1.1=py37h39e3cac_0 70 | - ipython=7.5.0=py37h39e3cac_0 71 | - ipython_genutils=0.2.0=py37_0 72 | - ipywidgets=7.4.2=py37_0 73 | - jedi=0.13.3=py37_0 74 | - jinja2=2.10.1=py37_0 75 | - jsonschema=3.0.1=py37_0 76 | - jupyter=1.0.0=py37_7 77 | - jupyter_client=5.2.4=py37_0 78 | - jupyter_console=6.0.0=py37_0 79 | - jupyter_core=4.4.0=py37_0 80 | - libedit=3.1.20181209=hc058e9b_0 81 | - libffi=3.2.1=hd88cf55_4 82 | - libgcc-ng=8.2.0=hdf63c60_1 83 | - libgfortran-ng=7.3.0=hdf63c60_0 84 | - libsodium=1.0.16=h1bed415_0 85 | - libstdcxx-ng=8.2.0=hdf63c60_1 86 | - markupsafe=1.1.1=py37h7b6447c_0 87 | - mistune=0.8.4=py37h7b6447c_0 88 | - nbconvert=5.5.0=py_0 89 | - nbformat=4.4.0=py37_0 90 | - ncurses=6.1=he6710b0_1 91 | - notebook=5.7.8=py37_0 92 | - openssl=1.1.1c=h7b6447c_1 93 | - pandoc=2.2.3.2=0 94 | - pandocfilters=1.4.2=py37_1 95 | - parso=0.4.0=py_0 96 | - pexpect=4.7.0=py37_0 97 | - pickleshare=0.7.5=py37_0 98 | - pip=19.1.1=py37_0 99 | - prometheus_client=0.6.0=py37_0 100 | - prompt_toolkit=2.0.9=py37_0 101 | - ptyprocess=0.6.0=py37_0 102 | - pygments=2.4.2=py_0 103 | - pyqt=5.9.2=py37h05f1152_2 104 | - pyrsistent=0.14.11=py37h7b6447c_0 105 | - python=3.7.3=h0371630_0 106 | - python-dateutil=2.8.0=py37_0 107 | - pyzmq=18.0.0=py37he6710b0_0 108 | - qtconsole=4.5.1=py_0 109 | - readline=7.0=h7b6447c_5 110 | - send2trash=1.5.0=py37_0 111 | - setuptools=41.0.1=py37_0 112 | - sip=4.19.8=py37hf484d3e_0 113 | - six=1.12.0=py37_0 114 | - sqlite=3.28.0=h7b6447c_0 115 | - terminado=0.8.2=py37_0 116 | - testpath=0.4.2=py37_0 117 | - tk=8.6.8=hbc83047_0 118 | - tornado=6.0.2=py37h7b6447c_0 119 | - traitlets=4.3.2=py37_0 120 | - wcwidth=0.1.7=py37_0 121 | - webencodings=0.5.1=py37_1 122 | - wheel=0.33.4=py37_0 123 | - widgetsnbextension=3.4.2=py37_0 124 | - xz=5.2.4=h14c3975_4 125 | - zeromq=4.3.1=he6710b0_3 126 | - zlib=1.2.11=h7b6447c_3 127 | prefix: /home/rafael/anaconda3/envs/vdao 128 | 129 | -------------------------------------------------------------------------------- /DataAug/generate_data_aug.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | import random 4 | 5 | import cv2 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import pandas as pd 9 | from mpl_toolkits.mplot3d import Axes3D 10 | 11 | import _init_paths 12 | from blending import apply_transformations, blend_iterative_blur, rotate_image 13 | from definitions import (aloi_paths, csv_file_distribution_areas, dir_save_images, random_seed, 14 | vdao_videos_dir) 15 | from generic_utils import get_files_paths, get_target_reference_frames 16 | from my_enums import MethodToBlend 17 | from VDAO_Access.ObjectHelper import ObjectDatabase 18 | from VDAO_Access.VDAOVideo import VDAOVideo 19 | 20 | #################################################################################################### 21 | # Step 1: Get only frames of reference videos that are not present in the testing (research) videos 22 | #################################################################################################### 23 | vdao_dir = vdao_videos_dir['train'] 24 | # Get all all videos files from VDAO 25 | all_videos_vdao = get_files_paths(vdao_dir, 'avi') 26 | # Separate the reference reference videos 27 | all_reference_videos = [v for v in all_videos_vdao if 'ref' in v] 28 | # There are sections of the reference videos that are seen in test (research) 29 | # We must disconsider those sections 30 | target_reference_frames = get_target_reference_frames() 31 | patches_to_disconsider = {} 32 | for i in target_reference_frames: 33 | if i['reference file'] not in patches_to_disconsider: 34 | patches_to_disconsider[i['reference file']] = [] 35 | # Adding 1 changes the reference from the annotation to the API's 36 | patches_to_disconsider[i['reference file']].append( 37 | (i['reference start frame'] + 1, i['reference final frame'] + 1)) 38 | # For each reference video, get the frames that are not used in the test (research) 39 | frames_to_consider = {} 40 | total_reference_frames = 0 41 | for ref_vid in all_reference_videos: 42 | name_file = os.path.split(ref_vid)[-1] 43 | candidate_frames = list(range(1, VDAOVideo(ref_vid).videoInfo.getNumberOfFrames() + 1)) 44 | total_reference_frames += len(candidate_frames) 45 | # Get the videos patches to disconsider and remove it from candidate_frames 46 | for patch in patches_to_disconsider.get(name_file, []): 47 | [ 48 | candidate_frames.remove(i) for i in list(range(patch[0], patch[1] + 1)) 49 | if i in candidate_frames 50 | ] 51 | frames_to_consider[ref_vid] = candidate_frames 52 | 53 | print('There is a total of %d reference frames in the VDAO database.' % total_reference_frames) 54 | total_frames_to_be_used = sum([len(k) for v, k in frames_to_consider.items()]) 55 | print('There is a total of %d reference frames that can be used in data augmentation.' % 56 | total_frames_to_be_used) 57 | print('In other words, only %.2f%% of the reference frames are not presented in the test set.' % 58 | (100 * total_frames_to_be_used / total_reference_frames)) 59 | 60 | #################################################################################################### 61 | # Step 2: Getting distribution of areas in the VDAO videos 62 | #################################################################################################### 63 | # Load probability distributions disconsidering areas=0 64 | csv = pd.read_csv(csv_file_distribution_areas) 65 | areas_bb = list(csv['área bounding box']) 66 | occurencies_bb = list(csv['ocorrências']) 67 | # Disconsidering areas=0 68 | areas_bb = areas_bb[1:] 69 | occurencies_bb = occurencies_bb[1:] 70 | total_occurencies_bb = sum(occurencies_bb) 71 | distributions = [i / total_occurencies_bb for i in occurencies_bb] 72 | 73 | #################################################################################################### 74 | # Step 3: Generating data augmentation 75 | #################################################################################################### 76 | # Create dict with 100 groups of samples. Each group with 1000 data augmented images 77 | # Areas of the augmented images have to follow the distribution of the original dataset 78 | dict_sizes_per_group = {} 79 | [ 80 | dict_sizes_per_group.update({'%s' % i: np.random.choice(areas_bb, 1000, p=distributions)}) 81 | for i in range(1, 101) 82 | ] 83 | # Create dictionary with VDAOVideo objects 84 | dict_ref_videos = {} 85 | [dict_ref_videos.update({i: VDAOVideo(i)}) for i in all_reference_videos] 86 | width_background, height_background = (1280, 720) 87 | # Get all possible ALOI images 88 | aloi_images = get_files_paths(aloi_paths['images'], 'png') 89 | width_aloi, height_aloi = (768, 576) 90 | 91 | # For each one of the 100 groups create 1000 background with 1 aloi image inserted on each 92 | for group, rand_areas in dict_sizes_per_group.items(): 93 | count_image = 0 94 | data_images_group = [] 95 | # Create folder to save images 96 | folder_to_save_samples = os.path.join(dir_save_images, 'teste_%s' % group) 97 | if not os.path.isdir(folder_to_save_samples): 98 | os.makedirs(folder_to_save_samples) 99 | # For each area, get a random aloi object and a random background 100 | for area in rand_areas: 101 | # choose a random ALOI image 102 | rand_aloi_img = random.choice(aloi_images) 103 | # get its associated mask 104 | rand_aloi_mask = os.path.split(rand_aloi_img)[1].split('_')[0] 105 | rand_aloi_mask = os.path.join( 106 | os.path.split(rand_aloi_img)[0].replace(aloi_paths['images'], aloi_paths['masks']), 107 | rand_aloi_mask + '_c1.png') 108 | # choose a random frame from a random reference video 109 | rand_ref_video_path = random.choice(all_reference_videos) 110 | # rand_ref_video_path = '/media/storage/VDAO/vdao_object/table_04/Table_04-Reference_01/ref-sing-ext-part01-video01.avi' 111 | rand_ref_frame_number = random.choice(frames_to_consider[rand_ref_video_path]) 112 | frame_background = dict_ref_videos[rand_ref_video_path].GetFrame(rand_ref_frame_number)[1] 113 | # get a random rotation angle 114 | rand_angle = random.randrange(0, 361, 1) 115 | # get random flip 116 | rand_flip = random.choice([True, False]) 117 | # Como uma área pode aparecer com várias proporções (ex. área 133200 aparece em 24 diferentes 118 | # proporções), escolho uma proporção aleatória entre a mín e a máx para poder calcular a 119 | # altura (new_height) e largura (new_width) que a imagem da ALOI será redimensionada. Desta 120 | # forma, tento manter a proporção do objeto verdadeiro da VDAO que contém aquela área escolhida. 121 | # get random proportional factor between min and max among all occurrences of this area 122 | proportions = csv.loc[csv['área bounding box'] == area] 123 | prop_min = float(proportions['prop min'].iloc[0].replace(',', '.')) 124 | prop_max = float(proportions['prop max'].iloc[0].replace(',', '.')) 125 | count_loop = 0 126 | while True: 127 | rand_proportion = np.random.uniform(low=prop_min, high=prop_max) 128 | # Calculate new height and width 129 | new_height = int(round(np.sqrt(area / rand_proportion))) 130 | new_width = int(round(area / new_height)) 131 | if count_loop > 100: 132 | area = area - 1 133 | # get random position disconsidering proportions after rotating 134 | _, shape_new_size_rotation = apply_transformations(rand_aloi_img, new_height, new_width, 135 | rand_angle, rand_flip) 136 | # Escolhe uma posição randomica dentro da imagem 137 | rand_pos_x, rand_pos_y = -1, -1 138 | while rand_pos_x < 0: 139 | rand_pos_x = int(np.random.uniform(0, 140 | width_background - shape_new_size_rotation[1])) 141 | while rand_pos_y < 0: 142 | rand_pos_y = int( 143 | np.random.uniform(0, height_background - shape_new_size_rotation[0])) 144 | if (rand_pos_x + new_width <= width_background) and (rand_pos_y + new_height <= 145 | height_background): 146 | break 147 | count_loop += 1 148 | 149 | # Gather all random data used for this image 150 | data_images_group.append({ 151 | 'count': count_image, 152 | 'aloi_image': os.sep.join(rand_aloi_img.split(os.sep)[-3:]), 153 | 'aloi_mask': os.sep.join(rand_aloi_mask.split(os.sep)[-3:]), 154 | 'vdao_video': os.sep.join(rand_ref_video_path.split(os.sep)[-4:]), 155 | 'frame_number': rand_ref_frame_number, 156 | 'rot_angle': rand_angle, 157 | 'initial_position': (rand_pos_x, rand_pos_y), 158 | 'object_size': (new_height, new_width), 159 | 'area': (new_height * new_width), 160 | 'flip': rand_flip 161 | }) 162 | 163 | novo_background, [min_x, min_y, max_x, 164 | max_y], pos_mask = blend_iterative_blur(rand_aloi_img, 165 | rand_aloi_mask, 166 | frame_background, 167 | xIni=rand_pos_x, 168 | yIni=rand_pos_y, 169 | new_height=new_height, 170 | new_width=new_width, 171 | rotation_angle=rand_angle, 172 | flip_horizontally=rand_flip) 173 | 174 | count_image += 1 175 | frame_background[min_y:max_y, min_x:max_x, :] = novo_background 176 | frame_background = cv2.rectangle(frame_background, pos_mask[0], pos_mask[1], (0, 0, 255), 2) 177 | cv2.imwrite(os.path.join(folder_to_save_samples, 'sample_%s.png' % count_image), 178 | frame_background) 179 | print('[Group %s]: sample_%s.png saved' % (group, count_image)) 180 | pd.DataFrame(data_images_group).to_csv(os.path.join(folder_to_save_samples, 181 | 'group_%s.csv' % group), 182 | sep='\t', 183 | index=False, 184 | columns=[ 185 | "count", "aloi_image", "aloi_mask", 'vdao_video', 186 | 'frame_number', 'rot_angle', 'initial_position', 187 | 'object_size', 'area', 'flip' 188 | ]) 189 | -------------------------------------------------------------------------------- /DataAug/utils/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | def add_path(path): 6 | if path not in sys.path: 7 | sys.path.insert(0, path) 8 | 9 | 10 | currentPath = os.path.dirname(os.path.realpath(__file__)) 11 | 12 | # Add lib to PYTHONPATH 13 | libPath = os.path.join(currentPath, '..', '..', '..', 'Video-Alignment') 14 | add_path(libPath) 15 | -------------------------------------------------------------------------------- /DataAug/utils/blending.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | 4 | import cv2 5 | import numpy as np 6 | 7 | import _init_paths 8 | from generic_utils import euclidean_distance 9 | 10 | 11 | def blur_measurement(image): 12 | if isinstance(image, str): 13 | assert os.path.isfile(image), f'It was not possible to load image {image}' 14 | image = cv2.imread(image) 15 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 16 | channelR = image[:, :, 2] 17 | channelG = image[:, :, 1] 18 | channelB = image[:, :, 0] 19 | try: 20 | grayVar = cv2.Laplacian(gray, cv2.CV_64F) 21 | grayVar = grayVar.var() 22 | RVar = cv2.Laplacian(channelR, cv2.CV_64F).var() 23 | GVar = cv2.Laplacian(channelG, cv2.CV_64F).var() 24 | BVar = cv2.Laplacian(channelB, cv2.CV_64F).var() 25 | except IOError as e: 26 | print("I/O error({0}): {1}".format(e.errno, e.strerror)) 27 | return [RVar, GVar, BVar, grayVar] 28 | 29 | 30 | def enlarge_mask(mask, iterations): 31 | inv_mask = 255 - mask 32 | se = cv2.getStructuringElement(shape=cv2.MORPH_RECT, ksize=(3, 3)) 33 | enlarged_mask = cv2.erode(src=inv_mask, kernel=se, iterations=iterations) 34 | enlarged_mask_bin = enlarged_mask / 255 35 | diffMask = np.add(enlarged_mask, mask) 36 | diffMask_bin = diffMask / 255 37 | return enlarged_mask, enlarged_mask_bin.astype(np.uint8), diffMask, diffMask_bin.astype( 38 | np.uint8) 39 | 40 | 41 | def rotate_image(mat, angle): 42 | height, width = mat.shape[:2] 43 | image_center = (width / 2, height / 2) 44 | rotation_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0) 45 | rotated_mat = cv2.warpAffine(mat, rotation_mat, (width, height)) 46 | # Get cos and sin of the rotation 47 | cos = np.abs(rotation_mat[0, 0]) 48 | sin = np.abs(rotation_mat[0, 1]) 49 | # compute the new bounding dimensions of the image 50 | new_width = int((height * sin) + (width * cos)) 51 | new_height = int((height * cos) + (width * sin)) 52 | return rotated_mat, (new_height, new_width) 53 | 54 | 55 | # def apply_transformations(image, scale_factor_x, scale_factor_y, rotation_angle, flip_horizontally): 56 | def apply_transformations(image, new_height, new_width, rotation_angle, flip_horizontally): 57 | if isinstance(image, str): 58 | assert os.path.isfile(image), f'Image could not be found in the path: {image}' 59 | image = cv2.imread(image) 60 | # Flip horizontally 61 | if flip_horizontally is True: 62 | image = cv2.flip(image, 0) 63 | # Rescale image 64 | image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_CUBIC) 65 | # Rotate image counter-clockwise considering the angle (in degrees) 66 | image, new_size = rotate_image(image, rotation_angle) 67 | return image, new_size 68 | 69 | 70 | def blend_image_into_mask(image, mask): 71 | # A paths were passed instead of loaded images 72 | if isinstance(image, str): 73 | assert os.path.isfile(image), f'Image could not be found in the path: {image}' 74 | image = cv2.imread(image) 75 | if isinstance(mask, str): 76 | assert os.path.isfile(mask), f'Mask image could not be found in the path: {mask}' 77 | mask = cv2.imread(mask) 78 | if mask.ndim == 1: 79 | mask = cv2.merge((mask, mask, mask)) 80 | # Multiply mask by image so we have the object only 81 | mergedImage = np.multiply(image, mask / 255) 82 | mergedImage = mergedImage.astype(np.uint8) 83 | return mergedImage 84 | 85 | 86 | def extract_bounding_box_mask(mask): 87 | # A path was passed instead of a loaded image mask 88 | if isinstance(mask, str) and os.path.isfile(mask) is True: 89 | mask = cv2.imread(mask) 90 | h, w, _ = mask.shape 91 | min_x = w 92 | max_x = 0 93 | # Obs: masks' channels are identical, so checking existence of a pixel with value 255 could be done in any channel 94 | 95 | # For every line(row) of the image 96 | for i in range(h): 97 | # Get the max position of the pixel whose value is 255 98 | b = mask[i, :, 0][::-1] 99 | pos_x = len(b) - np.argmax(b) 100 | if pos_x != w and pos_x > max_x: # checking if pixel of channel 0 is 255 101 | max_x = pos_x - 1 102 | # Get the min position of the pixel whose value is 255 103 | pos_x = np.argmax(mask[i, :, 0] == 255) 104 | if pos_x != 0 and pos_x < min_x: # checking if pixel of channel 0 is 255 105 | min_x = pos_x 106 | 107 | min_y = h 108 | max_y = 0 109 | # For every column of the image 110 | for i in range(w): 111 | # Get the max position of the pixel whose value is 255 112 | b = mask[:, i, 0][::-1] 113 | pos_y = len(b) - np.argmax(b) 114 | if pos_y != h and pos_y > max_y: # checking if pixel of channel 0 is 255 115 | max_y = pos_y - 1 116 | # Get the min position of the pixel whose value is 255 117 | pos_y = np.argmax(mask[:, i, 0] == 255) 118 | if pos_y != 0 and pos_y < min_y: # checking if pixel of channel 0 is 255 119 | min_y = pos_y 120 | return min_x, min_y, max_x, max_y 121 | 122 | 123 | def blend_iterative_blur(image, 124 | mask, 125 | background, 126 | xIni=0, 127 | yIni=0, 128 | new_height=0, 129 | new_width=0, 130 | rotation_angle=0, 131 | flip_horizontally=False): 132 | # Check if files exist 133 | if isinstance(image, str): 134 | assert os.path.isfile(image), f'Image could not be found in the path: {image}' 135 | image = cv2.imread(image) 136 | if isinstance(mask, str): 137 | assert os.path.isfile(mask), f'Mask image could not be found in the path: {mask}' 138 | mask = cv2.imread(mask) 139 | if isinstance(background, str): 140 | assert os.path.isfile( 141 | background), f'Background image could not be found in the path: {background}' 142 | background = cv2.imread(background) 143 | image, image_size = apply_transformations(image, new_height, new_width, rotation_angle, 144 | flip_horizontally) 145 | mask, mask_size = apply_transformations(mask, new_height, new_width, rotation_angle, 146 | flip_horizontally) 147 | assert image.shape == mask.shape 148 | # Before rotating the mask, the values are either 0 or 255. After rotation, some of these values 149 | # are changed. Therefore, we need to threshold 150 | _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY) 151 | mask_inv_bin = ((255 - mask) / 255).astype(np.uint8) 152 | # Blend shoe with mask 153 | img_mask_blended = blend_image_into_mask(image, mask) 154 | # Obtain 2 layers: 155 | # layer_1 => layer between mask_bin and enlarged by 5 iterations 156 | # layer_2 => layer between enlarged by 7 iterations and 3 iterations 157 | _, mask_larger_2, _, _ = enlarge_mask(mask, 3) 158 | _, mask_larger_1_bin, _, layer_1 = enlarge_mask(mask, 5) 159 | layer_1 = 1 - layer_1 160 | _, mask_larger_3, _, _ = enlarge_mask(mask, 7) 161 | layer_2_bin = np.subtract(mask_larger_2, mask_larger_3) 162 | layer_2_bin_inv = 1 - layer_2_bin 163 | # It's necessary to get width and height once the image was rescaled 164 | rsz_rows, rsz_cols, rsz_channels = image.shape 165 | # Get blur level of the region of the background: 166 | min_x = xIni 167 | min_y = yIni 168 | max_x = xIni + rsz_cols 169 | max_y = yIni + rsz_rows 170 | # define background roi 171 | roi_background = background[min_y:max_y, min_x:max_x, :] 172 | blur_level_reference = blur_measurement(roi_background) 173 | # add image in the background 174 | background_with_image = np.multiply(roi_background, mask_inv_bin) 175 | background_with_image = background_with_image + img_mask_blended 176 | # get level of blur 177 | blur_level_background_with_img = blur_measurement(background_with_image) 178 | # measure distance between blur levels 179 | dist = euclidean_distance(blur_level_background_with_img, blur_level_reference) 180 | rsz_rows, rsz_cols, rsz_channels = img_mask_blended.shape 181 | layer_1_with_background = np.multiply(layer_1, roi_background) 182 | layer_2_with_background = np.multiply(layer_2_bin, roi_background) 183 | object_with_border_background = np.add(img_mask_blended, layer_1_with_background) 184 | background_with_large_object_mask = np.multiply(roi_background, mask_larger_1_bin) 185 | # Looping and bluring 186 | iteration = 0 187 | return_image = roi_background 188 | while True: 189 | blurred_R = cv2.GaussianBlur(object_with_border_background[:, :, 2], (3, 3), 0) 190 | blurred_G = cv2.GaussianBlur(object_with_border_background[:, :, 1], (3, 3), 0) 191 | blurred_B = cv2.GaussianBlur(object_with_border_background[:, :, 0], (3, 3), 0) 192 | blurred_object_background = cv2.merge((blurred_B, blurred_G, blurred_R)) 193 | ghosty_background_with_object = np.add(background_with_large_object_mask, 194 | blurred_object_background) 195 | pre_background_with_object = np.multiply(ghosty_background_with_object, layer_2_bin_inv) 196 | final_image = layer_2_with_background + pre_background_with_object 197 | blur_level = blur_measurement(final_image) 198 | distLoop = euclidean_distance(blur_level, blur_level_reference) 199 | # Distance was increased or did not change, break loop 200 | if distLoop >= dist: 201 | break 202 | else: 203 | # Continue 204 | dist = distLoop 205 | iteration = iteration + 1 206 | return_image = final_image 207 | # Define coordenadas do bounding box do objeto após rotação e redimensionamento 208 | coord_bounding_box = extract_bounding_box_mask(mask) 209 | coord_bounding_box_init = (xIni + coord_bounding_box[0], yIni + coord_bounding_box[1]) 210 | coord_bounding_box_end = (xIni + coord_bounding_box[2], yIni + coord_bounding_box[3]) 211 | return return_image, [min_x, min_y, max_x, 212 | max_y], (coord_bounding_box_init, coord_bounding_box_end) 213 | -------------------------------------------------------------------------------- /DataAug/utils/definitions.py: -------------------------------------------------------------------------------- 1 | import os 2 | import socket 3 | 4 | # Get local machine 5 | hostname = socket.gethostname() 6 | current_path = os.path.dirname(os.path.abspath(__file__)) 7 | 8 | random_seed = 123 9 | 10 | if hostname == 'notesmt': 11 | BASE_DIR = '/media/storage/datasets/' 12 | DATABASE_DIR = '/media/storage/VDAO' 13 | elif 'smt.ufrj.br' in hostname: 14 | BASE_DIR = '/nfs/proc/rafael.padilla' 15 | DATABASE_DIR = '/home/rafael.padilla/workspace/rafael.padilla/' 16 | 17 | vdao_videos_dir = { 18 | 'train': os.path.join(DATABASE_DIR, 'vdao_object'), 19 | 'test': os.path.join(DATABASE_DIR, 'vdao_research') 20 | } 21 | 22 | csv_file_distribution_areas = os.path.join(current_path, '..', 23 | 'bounding_boxes_areas_distribution.csv') 24 | 25 | aloi_root_path = os.path.join(BASE_DIR, 'aloi') 26 | aloi_paths = { 27 | 'images': os.path.join(aloi_root_path, 'png'), 28 | 'masks': os.path.join(aloi_root_path, 'mask') 29 | } 30 | 31 | dir_save_images = os.path.join(current_path, DATABASE_DIR, 'data_aug_sample') 32 | -------------------------------------------------------------------------------- /DataAug/utils/generic_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import numpy as np 5 | 6 | import _init_paths 7 | from Align_Annotations_Research import define_tables 8 | 9 | 10 | # Get files paths 11 | def get_files_paths(folder, extension): 12 | ret = [] 13 | for root, dirs, files in os.walk(folder): 14 | ret += [os.path.join(root, file) for file in files if file.endswith('.%s' % extension)] 15 | return ret 16 | 17 | 18 | def get_target_reference_frames(): 19 | ret = [] 20 | tables_in_target_videos = define_tables() 21 | # Nao considerar table de 01 a 07, pois estes são os videos do Mateus que eu não tenho as referências 22 | tables_to_disconsider = ['table 0%d' % i for i in range(1, 8)] 23 | for table, info in tables_in_target_videos.items(): 24 | if table in tables_to_disconsider: 25 | continue 26 | 27 | ret.append({ 28 | 'reference file': info['reference file'], 29 | 'reference start frame': info['reference start frame'], 30 | 'reference final frame': info['reference final frame'] 31 | }) 32 | return ret 33 | 34 | 35 | def euclidean_distance(list1, list2): 36 | return np.linalg.norm(np.asarray(list1).astype(float) - np.asarray(list2).astype(float)) 37 | -------------------------------------------------------------------------------- /DataAug/utils/my_enums.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class MethodToBlend(Enum): 5 | ONLY_BLEND = 0 6 | ITERATIVE_BLUR = 1 7 | -------------------------------------------------------------------------------- /ICIP_replication/My_Resnet.py: -------------------------------------------------------------------------------- 1 | ############################################################################################# 2 | # Resnet # 3 | # Reference: https://pytorch.org/docs/0.4.0/_modules/torchvision/models/resnet.html # 4 | ############################################################################################# 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.utils.model_zoo as model_zoo 9 | import torchvision.transforms as transforms 10 | 11 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'] 12 | 13 | mean_imagenet = [0.485, 0.456, 0.406] 14 | std_imagenet = [0.229, 0.224, 0.225] 15 | 16 | model_urls = { 17 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 18 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 19 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 20 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 21 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 22 | } 23 | 24 | 25 | def conv3x3(in_planes, out_planes, stride=1): 26 | """3x3 convolution with padding""" 27 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 28 | 29 | 30 | def conv1x1(in_planes, out_planes, stride=1): 31 | """1x1 convolution""" 32 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 33 | 34 | 35 | class BasicBlock(nn.Module): 36 | expansion = 1 37 | 38 | def __init__(self, inplanes, planes, stride=1, downsample=None): 39 | super(BasicBlock, self).__init__() 40 | self.conv1 = conv3x3(inplanes, planes, stride) 41 | self.bn1 = nn.BatchNorm2d(planes) 42 | self.relu = nn.ReLU(inplace=True) 43 | self.conv2 = conv3x3(planes, planes) 44 | self.bn2 = nn.BatchNorm2d(planes) 45 | self.downsample = downsample 46 | self.stride = stride 47 | 48 | def forward(self, x): 49 | identity = x 50 | 51 | out = self.conv1(x) 52 | out = self.bn1(out) 53 | out = self.relu(out) 54 | 55 | out = self.conv2(out) 56 | out = self.bn2(out) 57 | 58 | if self.downsample is not None: 59 | identity = self.downsample(x) 60 | 61 | out += identity 62 | out = self.relu(out) 63 | 64 | return out 65 | 66 | 67 | class Bottleneck(nn.Module): 68 | expansion = 4 69 | 70 | def __init__(self, inplanes, planes, stride=1, downsample=None): 71 | super(Bottleneck, self).__init__() 72 | self.conv1 = conv1x1(inplanes, planes) 73 | self.bn1 = nn.BatchNorm2d(planes) 74 | self.conv2 = conv3x3(planes, planes, stride) 75 | self.bn2 = nn.BatchNorm2d(planes) 76 | self.conv3 = conv1x1(planes, planes * self.expansion) 77 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 78 | self.relu = nn.ReLU(inplace=True) 79 | self.downsample = downsample 80 | self.stride = stride 81 | 82 | def forward(self, x): 83 | identity = x 84 | 85 | out = self.conv1(x) 86 | out = self.bn1(out) 87 | out = self.relu(out) 88 | 89 | out = self.conv2(out) 90 | out = self.bn2(out) 91 | out = self.relu(out) 92 | 93 | out = self.conv3(out) 94 | out = self.bn3(out) 95 | 96 | if self.downsample is not None: 97 | identity = self.downsample(x) 98 | 99 | out += identity 100 | out = self.relu(out) 101 | 102 | return out 103 | 104 | 105 | class ResNet(nn.Module): 106 | def __init__(self, block, layers, num_classes=1000, zero_init_residual=False): 107 | super(ResNet, self).__init__() 108 | self.hooks = {} 109 | self.inplanes = 64 110 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 111 | self.bn1 = nn.BatchNorm2d(64) 112 | self.relu = nn.ReLU(inplace=True) 113 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 114 | self.layer1 = self._make_layer(block, 64, layers[0]) 115 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 116 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 117 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 118 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 119 | self.fc = nn.Linear(512 * block.expansion, num_classes) 120 | 121 | for m in self.modules(): 122 | if isinstance(m, nn.Conv2d): 123 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 124 | elif isinstance(m, nn.BatchNorm2d): 125 | nn.init.constant_(m.weight, 1) 126 | nn.init.constant_(m.bias, 0) 127 | 128 | # Zero-initialize the last BN in each residual branch, 129 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 130 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 131 | if zero_init_residual: 132 | for m in self.modules(): 133 | if isinstance(m, Bottleneck): 134 | nn.init.constant_(m.bn3.weight, 0) 135 | elif isinstance(m, BasicBlock): 136 | nn.init.constant_(m.bn2.weight, 0) 137 | 138 | def _make_layer(self, block, planes, blocks, stride=1): 139 | downsample = None 140 | if stride != 1 or self.inplanes != planes * block.expansion: 141 | downsample = nn.Sequential( 142 | conv1x1(self.inplanes, planes * block.expansion, stride), 143 | nn.BatchNorm2d(planes * block.expansion), 144 | ) 145 | 146 | layers = [] 147 | layers.append(block(self.inplanes, planes, stride, downsample)) 148 | self.inplanes = planes * block.expansion 149 | for _ in range(1, blocks): 150 | layers.append(block(self.inplanes, planes)) 151 | 152 | return nn.Sequential(*layers) 153 | 154 | def forward(self, x): 155 | x = self.conv1(x) 156 | x = self.bn1(x) 157 | x = self.relu(x) 158 | x = self.maxpool(x) 159 | 160 | x = self.layer1(x) 161 | x = self.layer2(x) 162 | x = self.layer3(x) 163 | x = self.layer4(x) 164 | 165 | x = self.avgpool(x) 166 | x = x.view(x.size(0), -1) 167 | x = self.fc(x) 168 | 169 | return x 170 | 171 | def add_hook(self, name, layer_name, id_module, callback, parameters_callback): 172 | if id_module is None: 173 | hook = self._modules.get(layer_name) 174 | else: 175 | hook = self._modules.get(layer_name)._modules.get(str(id_module)) 176 | self.hooks[name] = hook.register_forward_hook(callback(parameters_callback)) 177 | 178 | def remove_all_hooks(self): 179 | for hook in self.hooks: 180 | self.hooks[hook].remove() 181 | self.hooks.clear() 182 | 183 | 184 | def resnet18(pretrained=False, **kwargs): 185 | """Constructs a ResNet-18 model. 186 | Args: 187 | pretrained (bool): If True, returns a model pre-trained on ImageNet 188 | """ 189 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 190 | if pretrained: 191 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 192 | return model 193 | 194 | 195 | def resnet34(pretrained=False, **kwargs): 196 | """Constructs a ResNet-34 model. 197 | Args: 198 | pretrained (bool): If True, returns a model pre-trained on ImageNet 199 | """ 200 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 201 | if pretrained: 202 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 203 | return model 204 | 205 | 206 | def resnet50(pretrained=False, **kwargs): 207 | """Constructs a ResNet-50 model. 208 | Args: 209 | pretrained (bool): If True, returns a model pre-trained on ImageNet 210 | """ 211 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 212 | if pretrained: 213 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 214 | return model 215 | 216 | 217 | def resnet101(pretrained=False, **kwargs): 218 | """Constructs a ResNet-101 model. 219 | Args: 220 | pretrained (bool): If True, returns a model pre-trained on ImageNet 221 | """ 222 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 223 | if pretrained: 224 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 225 | return model 226 | 227 | 228 | def resnet152(pretrained=False, **kwargs): 229 | """Constructs a ResNet-152 model. 230 | Args: 231 | pretrained (bool): If True, returns a model pre-trained on ImageNet 232 | """ 233 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 234 | if pretrained: 235 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 236 | return model 237 | -------------------------------------------------------------------------------- /ICIP_replication/Sizes output Resnet50.ods: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/ICIP_replication/Sizes output Resnet50.ods -------------------------------------------------------------------------------- /ICIP_replication/_init_paths_.py: -------------------------------------------------------------------------------- 1 | ########################################################################################### 2 | # # 3 | # Set up paths for Video Alignment # 4 | # # 5 | # Developed by: Rafael Padilla (rafael.padilla@smt.ufrj.br) # 6 | # SMT - Signal Multimedia and Telecommunications Lab # 7 | # COPPE - Universidade Federal do Rio de Janeiro # 8 | # Last modification: July 2nd, 2018 # 9 | ########################################################################################### 10 | 11 | import sys 12 | import os 13 | import sys 14 | 15 | def add_path(path): 16 | if path not in sys.path: 17 | sys.path.insert(0, path) 18 | 19 | currentPath = os.path.dirname(os.path.realpath(__file__)) 20 | 21 | # Add lib to PYTHONPATH 22 | libPath = os.path.join(currentPath, '..','VDAO_Access') 23 | add_path(libPath) -------------------------------------------------------------------------------- /ICIP_replication/codigo_bruno/diretorios.txt: -------------------------------------------------------------------------------- 1 | ### LOCAL DOS FRAMES VDAO: /home/bruno.afonso/datasets/Reference_Object_frames_skip_17_full_aligned 2 | 3 | ### LOCAL DOS FRAMES DOS 59 VIDEOS TESTE: /home/bruno.afonso/datasets/TEST_FRAMES_59V 4 | 5 | ### LOCAL DOS HDF5: /home/bruno.afonso/datasets/article_HDF5 6 | 7 | ### Nomes dos Datasets: (IGNORAR COLCHETES -- X/y = X ou y, X para os mapas de features, y para as labels) 8 | 9 | 59_videos_test_batch.h5 - 'video[X]_[NOME DA CAMADA]_X/y_TEST_SET' 10 | train_batch_VDAO.h5 - '[TIPO ILUMINACAO]_[NOME DO OBJETO]_[NUMERO DA POSICAO]_[NOME DA CAMADA]_X/y_TRAIN_SET' 11 | train_batch_ALOI.h5 - '[NOME DA CAMADA]_X/y_TRAIN_SET' 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_bruno/icip/global_metrics.csv: -------------------------------------------------------------------------------- 1 | ,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP c/ consistencia,MLP c/ consistencia,MLP c/ consistencia 2 | ,fnr,fpr,tnr,tpr,dis,fnr,fpr,tnr,tpr,dis,fnr,fpr,dis,fnr,fpr,dis 3 | res2a_branch2a,0.3204853487866605,0.3250147676128733,0.6749852323871267,0.6795146512133395,0.4564487462501759,0.32104574840890765,0.3188966430138912,0.6811033569861087,0.6789542515910925,0.4525101562362604,0.2975738222013413,0.38983033916665827,0.49042621564747874,0.2967258892191076,0.3919627270188157,0.4916106515372676 4 | res2b_branch2a,0.26913939846489426,0.31132824782748525,0.6886717521725146,0.7308606015351059,0.41153528852502697,0.2679040972894136,0.3006232431914861,0.6993767568085142,0.7320959027105863,0.402674731876018,0.2987499799743518,0.3666144360889293,0.4729246190287404,0.30111454121684417,0.36248195338956957,0.4712357514719661 5 | res2c_branch2a,0.30750203630324013,0.26278133868179554,0.7372186613182046,0.6924979636967601,0.40448922642022955,0.3066120677665623,0.24492258633989414,0.755077413660106,0.6933879322334378,0.392425831717931,0.2954420101723526,0.3469229158052142,0.45567695891439614,0.28950320882725017,0.340362440815129,0.4468318464913916 6 | res3a_branch2a,0.2671582522418179,0.2628442522061727,0.7371557477938274,0.7328417477581819,0.37478077946811106,0.26102405469482454,0.24555301171534138,0.7544469882846587,0.7389759453051753,0.35837109075900825,0.34227446346285584,0.29717286439342044,0.4532808397346794,0.3433521566566431,0.2812910796569521,0.4438641402223678 7 | res3b_branch2a,0.3200552931588707,0.28929564399764923,0.7107043560023505,0.6799447068411288,0.4314248026192111,0.3224004178853139,0.2728557887104708,0.7271442112895294,0.677599582114686,0.4223651392876051,0.24408794953785048,0.4116630825898461,0.4785868998175427,0.24338954067435792,0.40458879612851234,0.47215523131952464 8 | res3c_branch2a,0.3201659515456892,0.2882658854600252,0.7117341145399747,0.6798340484543106,0.4308171970212064,0.32411286507006026,0.27176153557544336,0.7282384644245568,0.6758871349299396,0.4229698352391646,0.27042297124910003,0.3984673079624515,0.48156492697665887,0.2687012650496149,0.38711585359864026,0.4712314229194264 9 | res3d_branch2a,0.3504363747606372,0.25561592261095556,0.7443840773890443,0.6495636252393627,0.4337570202862748,0.3517435039138353,0.25042401533560804,0.749575984664392,0.6482564960861649,0.4317819820261044,0.34260409324058755,0.35484747514290615,0.4932487154778009,0.34620201439697434,0.3485680034039439,0.49127943959577086 10 | res4a_branch2a,0.3299285577269546,0.25850518015529517,0.7414948198447048,0.6700714422730456,0.41913933407747606,0.32928699440801124,0.25074880573777614,0.7492511942622241,0.670713005591989,0.41388994704532583,0.29868935121896406,0.3687468321381415,0.4745414152036894,0.29713806894216366,0.35737926757092436,0.4647698063603189 11 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_bruno/icip/group_stats.csv: -------------------------------------------------------------------------------- 1 | ,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP c/ consistencia,MLP c/ consistencia,MLP c/ consistencia,MLP c/ consistencia,MLP c/ consistencia,MLP c/ consistencia,MLP c/ consistencia,MLP c/ consistencia,MLP c/ consistencia,MLP c/ consistencia 2 | ,fnr,fnr,fpr,fpr,tnr,tnr,tpr,tpr,dis,dis,fnr,fnr,fpr,fpr,tnr,tnr,tpr,tpr,dis,dis,fnr,fnr,fpr,fpr,tnr,tnr,tpr,tpr,dis,dis,fnr,fnr,fpr,fpr,tnr,tnr,tpr,tpr,dis,dis 3 | ,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std 4 | res2a_branch2a,0.3200571730081323,0.10838633860459557,0.3219254920049816,0.13861332835620443,0.6780745079950184,0.13861332835620446,0.6799428269918678,0.10838633860459557,0.4783211226637926,0.07350485157887875,0.3202702462676386,0.11850153056314545,0.3167261669028323,0.13971175688868878,0.6832738330971677,0.13971175688868878,0.6797297537323614,0.11850153056314545,0.4782322100104882,0.06721043525177778,0.31130011056939766,0.13507826765373712,0.3677825487399635,0.20812674553989413,0.6322174512600366,0.2081267455398942,0.6886998894306023,0.1350782676537371,0.525104282268028,0.11204816904380839,0.31054818171201437,0.13914736302775596,0.36914120370053777,0.21491235513807347,0.6308587962994623,0.21491235513807347,0.6894518182879855,0.1391473630277559,0.5276698458384413,0.11875599420196353 5 | res2b_branch2a,0.26108124873459493,0.12565987405558407,0.3129457580864256,0.21143295712784496,0.6870542419135743,0.21143295712784496,0.7389187512654051,0.12565987405558407,0.4483913074253001,0.14549386017746618,0.2592165228516187,0.1317453075816561,0.30224883772418265,0.21916663334688977,0.6977511622758173,0.21916663334688974,0.7407834771483811,0.13174530758165615,0.4432730748531664,0.1506799731862455,0.2901318367457322,0.16471199735047298,0.3628129179245985,0.1888509281700134,0.6371870820754016,0.18885092817001342,0.7098681632542679,0.1647119973504729,0.5056549869573848,0.13391335518663228,0.2920770796944117,0.1689695055313376,0.3588297076636691,0.19204374493329268,0.6411702923363308,0.1920437449332927,0.7079229203055881,0.16896950553133758,0.5056136879664934,0.13659024181454632 6 | res2c_branch2a,0.3018246415221353,0.14794946195503955,0.2650726329854776,0.1667457815980214,0.7349273670145223,0.16674578159802142,0.6981753584778647,0.14794946195503958,0.4381494154852126,0.12350401515145053,0.30088731693573256,0.15785081518120767,0.24776906252743652,0.1740516439083001,0.7522309374725634,0.17405164390830008,0.6991126830642674,0.15785081518120767,0.4316179044213436,0.12862001720054958,0.3080241548143846,0.21601644836623438,0.3247150487592798,0.1713359766402009,0.6752849512407201,0.1713359766402009,0.6919758451856155,0.21601644836623438,0.4961268157234586,0.15642302934979582,0.3026262437982942,0.21694317922209802,0.31693978450066623,0.17693589557838552,0.6830602154993338,0.17693589557838554,0.6973737562017059,0.21694317922209802,0.48846071677956104,0.1612147715870243 7 | res3a_branch2a,0.2642229434443959,0.14557272240229027,0.26379730599622886,0.18719082757498445,0.7362026940037713,0.18719082757498445,0.735777056555604,0.1455727224022903,0.41918422464519345,0.12401658266318785,0.25874690256352856,0.1521586954762687,0.24580710289774269,0.18214478283692523,0.7541928971022572,0.18214478283692526,0.7412530974364714,0.15215869547626867,0.4044369300662768,0.12492431377033303,0.3370035762275865,0.15460028693374,0.29058436212845956,0.09036662435041533,0.7094156378715405,0.09036662435041533,0.6629964237724134,0.15460028693374006,0.46574611791985515,0.10389906402923504,0.3381305015206653,0.1578765228992637,0.27398290927613506,0.09516259699951435,0.7260170907238649,0.09516259699951435,0.6618694984793347,0.1578765228992637,0.4574245142453843,0.10799449662846335 8 | res3b_branch2a,0.31566321780798545,0.17370594835019057,0.28730072911980004,0.1733799404996183,0.7126992708801999,0.1733799404996183,0.6843367821920147,0.17370594835019054,0.468764768267708,0.13410629861436588,0.31852720883905533,0.1849953347087632,0.2718154833389466,0.17903951787042782,0.7281845166610534,0.17903951787042788,0.6814727911609446,0.18499533470876323,0.46601165475821094,0.13866090789861496,0.24451307154739765,0.14362595307484782,0.39985796867168555,0.16828015447977157,0.6001420313283146,0.16828015447977163,0.7554869284526023,0.14362595307484785,0.5030221327146998,0.1068579312157342,0.24297983546157015,0.1499720306194003,0.3919055800872045,0.17490097391005147,0.6080944199127954,0.1749009739100515,0.7570201645384299,0.1499720306194003,0.5000325775055499,0.10489586225187338 9 | res3c_branch2a,0.308009687886525,0.17079275835637306,0.29143798129530785,0.16703058332572723,0.7085620187046922,0.16703058332572723,0.6919903121134751,0.170792758356373,0.4628253520229305,0.13552854001532869,0.31102256048876215,0.18139547893226113,0.2744833895390683,0.17904419515597303,0.7255166104609317,0.179044195155973,0.688977439511238,0.18139547893226113,0.45960564736020465,0.14458495227415402,0.2680994270984279,0.1398104749982318,0.39512685062370073,0.21749292809444576,0.6048731493762993,0.21749292809444576,0.7319005729015722,0.13981047499823182,0.5178829629717678,0.14705400774190333,0.26523898383033345,0.13841411934333814,0.3824753448660345,0.22328104235167434,0.6175246551339656,0.2232810423516743,0.7347610161696667,0.13841411934333817,0.5075690182618722,0.1513350321008441 10 | res3d_branch2a,0.34418730819651727,0.1830936085583785,0.25542424853023293,0.17460727974499723,0.7445757514697671,0.17460727974499726,0.6558126918034826,0.18309360855837856,0.4779627940215807,0.11694546250600939,0.3452342482477577,0.1947524256733544,0.2499715801166315,0.18830401213462591,0.7500284198833685,0.1883040121346259,0.6547657517522423,0.19475242567335443,0.48305970860272457,0.12350476757896772,0.3246063937896821,0.2078612327726247,0.3651664958786147,0.1973257316557266,0.6348335041213852,0.1973257316557266,0.675393606210318,0.2078612327726247,0.5467355278064213,0.12005997502632083,0.32698480150350895,0.21413008801547073,0.35969491291658784,0.21257889042101055,0.640305087083412,0.21257889042101055,0.673015198496491,0.2141300880154708,0.5502342182844067,0.12757742568775088 11 | res4a_branch2a,0.3235677973419475,0.14305216906110574,0.25987766027524056,0.12878995086602016,0.7401223397247594,0.12878995086602016,0.6764322026580525,0.14305216906110574,0.44245192775029957,0.10285030438053105,0.32313855130133246,0.1520248975909357,0.2530650235740081,0.14769381412917068,0.7469349764259918,0.1476938141291707,0.6768614486986676,0.1520248975909357,0.4443638895200851,0.11091140970072716,0.30980207091784046,0.2164336142842628,0.3441234996575464,0.2236358515097331,0.6558765003424536,0.22363585150973306,0.6901979290821596,0.21643361428426278,0.5309384127752108,0.14464456427419656,0.30928294928681244,0.2236429664191588,0.3304900188716099,0.2398568804335224,0.6695099811283901,0.23985688043352235,0.6907170507131875,0.22364296641915873,0.5274483969051359,0.15830352618469654 12 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_bruno/icip/models/models.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/ICIP_replication/codigo_bruno/icip/models/models.zip -------------------------------------------------------------------------------- /ICIP_replication/codigo_bruno/icip/test-results.csv: -------------------------------------------------------------------------------- 1 | distance,f1,fn,fnr,fp,fpr,tn,tp 2 | 0.5600000023841858,0.6111111044883728,112.0,0.5600000023841858,0.0,0.0,0.0,88.0 3 | 0.9171211123466492,0.21621622145175934,130.0,0.8666666746139526,15.0,0.30000001192092896,35.0,20.0 4 | 0.07692307978868484,0.9599999785423279,11.0,0.07692307978868484,0.0,0.0,57.0,132.0 5 | 0.3221476376056671,0.8080000281333923,48.0,0.3221476376056671,0.0,0.0,51.0,101.0 6 | 0.12162162363529205,0.935251772403717,18.0,0.12162162363529205,0.0,0.0,52.0,130.0 7 | 0.918367326259613,0.8703169822692871,0.0,0.0,45.0,0.918367326259613,4.0,151.0 8 | 0.4727526903152466,0.7428571581840515,34.0,0.2720000147819519,29.0,0.3866666555404663,46.0,91.0 9 | 0.42144685983657837,0.9102166891098022,12.0,0.07547169923782349,17.0,0.4146341383457184,24.0,147.0 10 | 0.07382550090551376,0.9616724848747253,11.0,0.07382550090551376,0.0,0.0,51.0,138.0 11 | 0.5336706042289734,0.6481481194496155,46.0,0.3965517282485962,30.0,0.3571428656578064,54.0,70.0 12 | 0.8799999952316284,0.2142857164144516,176.0,0.8799999952316284,0.0,0.0,0.0,24.0 13 | 0.27659574151039124,0.9592476487159729,0.0,0.0,13.0,0.27659574151039124,34.0,153.0 14 | 0.10499999672174454,0.9445910453796387,21.0,0.10499999672174454,0.0,0.0,0.0,179.0 15 | 1.003504991531372,0.901098906993866,15.0,0.08379888534545898,21.0,1.0,0.0,164.0 16 | 0.07554486393928528,0.9720279574394226,4.0,0.0279720276594162,4.0,0.07017543911933899,53.0,139.0 17 | 0.5171457529067993,0.6694214940071106,67.0,0.45270270109176636,13.0,0.25,39.0,81.0 18 | 0.0882352963089943,0.9538461565971375,15.0,0.0882352963089943,0.0,0.0,30.0,155.0 19 | 0.9864864945411682,0.02666666731238365,146.0,0.9864864945411682,0.0,0.0,52.0,2.0 20 | 0.33869031071662903,0.790513813495636,50.0,0.3333333432674408,3.0,0.05999999865889549,47.0,100.0 21 | 0.952397346496582,0.9442970752716064,1.0,0.005586592014878988,20.0,0.9523809552192688,1.0,178.0 22 | 0.2875295579433441,0.9729729890823364,6.0,0.032258063554763794,4.0,0.2857142984867096,10.0,180.0 23 | 1.0,0.9304812550544739,0.0,0.0,26.0,1.0,0.0,174.0 24 | 0.0,1.0,0.0,0.0,0.0,0.0,0.0,200.0 25 | 0.9171974658966064,0.15294118225574493,144.0,0.9171974658966064,0.0,0.0,43.0,13.0 26 | 0.7891156673431396,0.3483146131038666,116.0,0.7891156673431396,0.0,0.0,53.0,31.0 27 | 0.13093045353889465,0.9568106532096863,7.0,0.046357616782188416,6.0,0.12244898080825806,43.0,144.0 28 | 0.4457155764102936,0.71074378490448,69.0,0.44516128301620483,1.0,0.02222222276031971,44.0,86.0 29 | 0.9538461565971375,0.8132529854774475,0.0,0.0,62.0,0.9538461565971375,3.0,135.0 30 | 0.0,1.0,0.0,0.0,0.0,0.0,0.0,200.0 31 | 0.004999999888241291,0.9974937438964844,1.0,0.004999999888241291,0.0,0.0,0.0,199.0 32 | 0.13375796377658844,0.9283276200294495,21.0,0.13375796377658844,0.0,0.0,43.0,136.0 33 | 0.2679738700389862,0.845283031463623,41.0,0.2679738700389862,0.0,0.0,47.0,112.0 34 | 0.43318063020706177,0.7228915691375732,62.0,0.40789473056793213,7.0,0.1458333283662796,41.0,90.0 35 | 0.35384616255760193,0.7850467562675476,46.0,0.35384616255760193,0.0,0.0,70.0,84.0 36 | 0.0,1.0,0.0,0.0,0.0,0.0,0.0,200.0 37 | 0.8663337230682373,0.8477612137794495,6.0,0.04054053872823715,45.0,0.8653846383094788,7.0,142.0 38 | 0.4764779508113861,0.6774193644523621,57.0,0.4749999940395355,3.0,0.03750000149011612,77.0,63.0 39 | 1.2491023540496826,0.3471074402332306,125.0,0.7485029697418213,33.0,1.0,0.0,42.0 40 | 1.0046120882034302,0.0,148.0,1.0,5.0,0.09615384787321091,47.0,0.0 41 | 0.1794871836900711,0.9014084339141846,28.0,0.1794871836900711,0.0,0.0,44.0,128.0 42 | 0.3306879699230194,0.8091602921485901,40.0,0.27397260069847107,10.0,0.18518517911434174,44.0,106.0 43 | 0.0,1.0,0.0,0.0,0.0,0.0,0.0,200.0 44 | 0.5953541994094849,0.791208803653717,6.0,0.05263157933950424,51.0,0.5930232405662537,35.0,108.0 45 | 0.30380621552467346,0.8171206116676331,40.0,0.27586206793785095,7.0,0.12727272510528564,48.0,105.0 46 | 0.22772927582263947,0.8863636255264282,18.0,0.13333334028720856,12.0,0.1846153885126114,53.0,117.0 47 | 0.22435897588729858,0.8736461997032166,35.0,0.22435897588729858,0.0,0.0,44.0,121.0 48 | 1.000022530555725,0.8505747318267822,1.0,0.00671140942722559,51.0,1.0,0.0,148.0 49 | 0.0,1.0,0.0,0.0,0.0,0.0,0.0,200.0 50 | 1.0023112297058105,0.8130564093589783,10.0,0.06802721321582794,53.0,1.0,0.0,137.0 51 | 0.5871202945709229,0.8399999737739563,12.0,0.08695652335882187,36.0,0.5806451439857483,26.0,126.0 52 | 0.24452956020832062,0.9624999761581421,1.0,0.006451612804085016,11.0,0.24444444477558136,34.0,154.0 53 | 1.0,0.8571428656578064,0.0,0.0,50.0,1.0,0.0,150.0 54 | 0.9697154760360718,0.9095890522003174,1.0,0.005988024175167084,32.0,0.9696969985961914,1.0,166.0 55 | 0.0,1.0,0.0,0.0,0.0,0.0,0.0,200.0 56 | 0.9877141714096069,0.02666666731238365,143.0,0.9862068891525269,3.0,0.05454545468091965,52.0,2.0 57 | 0.03184713423252106,0.983818769454956,5.0,0.03184713423252106,0.0,0.0,43.0,152.0 58 | 0.8995426297187805,0.1818181872367859,134.0,0.899328887462616,1.0,0.019607843831181526,50.0,15.0 59 | 0.14024390280246735,0.9245901703834534,23.0,0.14024390280246735,0.0,0.0,36.0,141.0 60 | 1.00056791305542,0.0,111.0,1.0,3.0,0.033707864582538605,86.0,0.0 61 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_bruno/icip/thres_stats.csv: -------------------------------------------------------------------------------- 1 | ,MLP,MLP,RF,RF 2 | ,mean,std,mean,std 3 | res2a_branch2a,0.25938063666666666,0.08618118553158868,0.47333333333333333,0.02054804667656325 4 | res2b_branch2a,0.24714428,0.07524258617215801,0.4588888888888889,0.010999438818457392 5 | res2c_branch2a,0.23238775,0.07766054887940194,0.48000000000000004,0.02357022603955158 6 | res3a_branch2a,0.21479881666666667,0.0419030861985895,0.46111111111111114,0.015947444549341463 7 | res3b_branch2a,0.2443793088888889,0.07155689931585842,0.4444444444444444,0.023622546250521435 8 | res3c_branch2a,0.24225484444444442,0.10314050171173787,0.44000000000000006,0.02624669291337271 9 | res3d_branch2a,0.2282747377777778,0.05643173647762811,0.4566666666666667,0.02260776661041756 10 | res4a_branch2a,0.2178091811111111,0.06998504444594501,0.4444444444444444,0.017069212773041367 11 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_bruno/icip/vid_stats.csv: -------------------------------------------------------------------------------- 1 | ,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF s/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,RF c/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP s/ consistencia,MLP c/ consistencia,MLP c/ consistencia,MLP c/ consistencia,MLP c/ consistencia,MLP c/ consistencia,MLP c/ consistencia 2 | ,fnr,fnr,fpr,fpr,tnr,tnr,tpr,tpr,dis,dis,fnr,fnr,fpr,fpr,tnr,tnr,tpr,tpr,dis,dis,fnr,fnr,fpr,fpr,dis,dis,fnr,fnr,fpr,fpr,dis,dis 3 | ,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std 4 | res2a_branch2a,0.3204853487866605,0.34217541910538907,0.3250147676128733,0.37838696238029584,0.6749852323871267,0.3783869623802958,0.6795146512133395,0.34217541910538907,0.5734708750380544,0.371024709085894,0.32104574840890765,0.3545045107615684,0.3188966430138912,0.39620596322596313,0.6811033569861087,0.39620596322596313,0.6789542515910925,0.3545045107615684,0.5778461166213822,0.3889541833896316,0.2975738222013413,0.32140392011280056,0.38983033916665827,0.4201287584986476,0.6072000468889356,0.3865549542681586,0.2967258892191076,0.32558371765226857,0.3919627270188157,0.4343576002963739,0.6131294387202673,0.3976304922340146 5 | res2b_branch2a,0.26913939846489426,0.31630251386272396,0.31132824782748525,0.38802503566151875,0.6886717521725146,0.38802503566151886,0.7308606015351059,0.31630251386272396,0.5210360101743627,0.3830572235045453,0.2679040972894136,0.3256424896051677,0.3006232431914861,0.4004756813544143,0.6993767568085142,0.4004756813544144,0.7320959027105863,0.3256424896051677,0.522108603379358,0.39251628015696277,0.2987499799743518,0.3406615591106953,0.3666144360889293,0.40575856236963137,0.5984116171515367,0.3793858599579094,0.30111454121684417,0.34776114253066015,0.36248195338956957,0.41798903340080773,0.6049250979209011,0.38639470109820245 6 | res2c_branch2a,0.30750203630324013,0.3302412077905394,0.26278133868179554,0.3561334696769243,0.7372186613182046,0.3561334696769243,0.6924979636967601,0.3302412077905393,0.504681151273576,0.3784550564635527,0.3066120677665623,0.3453380796239386,0.24492258633989414,0.36874892559201256,0.755077413660106,0.36874892559201256,0.6933879322334378,0.34533807962393853,0.5031788992248741,0.3928523882820418,0.2954420101723526,0.32411820235414707,0.3469229158052142,0.38430567996068177,0.5757316138377092,0.35606620699811803,0.28950320882725017,0.3294833895839627,0.340362440815129,0.39436833156664847,0.5729462996048671,0.36504729733771224 7 | res3a_branch2a,0.2671582522418179,0.3273087390082923,0.2628442522061727,0.36968104276934954,0.7371557477938274,0.36968104276934965,0.7328417477581819,0.32730873900829244,0.4861209397368989,0.38247811938092424,0.26102405469482454,0.3386552453014971,0.24555301171534138,0.38377894792201683,0.7544469882846587,0.3837789479220169,0.7389759453051753,0.3386552453014971,0.477354646012043,0.4010256786634253,0.34227446346285584,0.33209678259912334,0.29717286439342044,0.34269409488169694,0.5536924404607948,0.3533729048018856,0.3433521566566431,0.34416254098731863,0.2812910796569521,0.3557026534323357,0.5518775117245814,0.3681907795104412 8 | res3b_branch2a,0.3200552931588707,0.3463416416781292,0.28929564399764923,0.38158662504656443,0.7107043560023505,0.3815866250465645,0.6799447068411288,0.3463416416781292,0.5549444540537178,0.37633001201501903,0.3224004178853139,0.3630044798719422,0.2728557887104708,0.3897414037346272,0.7271442112895294,0.3897414037346272,0.677599582114686,0.36300447987194223,0.5544568661631649,0.390404728006533,0.24408794953785048,0.3165172156162754,0.4116630825898461,0.4154565976459175,0.6016972445856225,0.3708101619401635,0.24338954067435792,0.3285601449631766,0.40458879612851234,0.43337431463887754,0.6056005287695154,0.3866056744257442 9 | res3c_branch2a,0.3201659515456892,0.34352362703327677,0.2882658854600252,0.37856272549338443,0.7117341145399747,0.3785627254933845,0.6798340484543106,0.3435236270332768,0.5489562046281663,0.3789103875765058,0.32411286507006026,0.3599481131680072,0.27176153557544336,0.38601545773499946,0.7282384644245568,0.3860154577349995,0.6758871349299396,0.35994811316800723,0.5482382844139229,0.39346057831558917,0.27042297124910003,0.3136928031031244,0.3984673079624515,0.4028544896701089,0.6027860365457589,0.3563458619762676,0.2687012650496149,0.328868255963544,0.38711585359864026,0.41755753631872694,0.6022768837560982,0.3733694492870451 10 | res3d_branch2a,0.3504363747606372,0.35651081845601,0.25561592261095556,0.3596981679228985,0.7443840773890443,0.3596981679228985,0.6495636252393627,0.35651081845601,0.5499441278803778,0.37445796670845305,0.3517435039138353,0.37513036154161594,0.25042401533560804,0.38123344833523287,0.749575984664392,0.38123344833523287,0.6482564960861649,0.37513036154161594,0.559518921361814,0.39655111202555443,0.34260409324058755,0.3754670263157017,0.35484747514290615,0.3845002368204414,0.6394727832974593,0.3468848012044314,0.34620201439697434,0.3873958873242742,0.3485680034039439,0.40861528325725155,0.6500630957661145,0.36426760354200616 11 | res4a_branch2a,0.3299285577269546,0.33312395730138444,0.25850518015529517,0.34773838826331344,0.7414948198447048,0.3477383882633135,0.6700714422730456,0.33312395730138444,0.5222937805258476,0.36483720139608583,0.32928699440801124,0.3504028981894792,0.25074880573777614,0.36768120061749265,0.7492511942622241,0.36768120061749265,0.670713005591989,0.3504028981894792,0.5295984353732504,0.3833011963586879,0.29868935121896406,0.3328742253375914,0.3687468321381415,0.3686098108275417,0.5885092170227538,0.35133466175734507,0.29713806894216366,0.35022210106220575,0.35737926757092436,0.3859195260757293,0.5907606286124788,0.3692009905357323 12 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_bruno/imagenet_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json 3 | 4 | from keras.utils.data_utils import get_file 5 | from keras import backend as K 6 | 7 | CLASS_INDEX = None 8 | CLASS_INDEX_PATH = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json' 9 | 10 | 11 | def preprocess_input(x, dim_ordering='default'): 12 | if dim_ordering == 'default': 13 | dim_ordering = K.image_dim_ordering() 14 | assert dim_ordering in {'tf', 'th'} 15 | 16 | if dim_ordering == 'th': 17 | x[:, 0, :, :] -= 103.939 18 | x[:, 1, :, :] -= 116.779 19 | x[:, 2, :, :] -= 123.68 20 | # 'RGB'->'BGR' 21 | x = x[:, ::-1, :, :] 22 | else: 23 | # x[:, :, :, 0] -= 103.939 24 | # x[:, :, :, 1] -= 116.779 25 | # x[:, :, :, 2] -= 123.68 26 | x[:, :, :, 0] -= np.mean(x[:,:,:,0]) 27 | x[:, :, :, 1] -= np.mean(x[:,:,:,1]) 28 | x[:, :, :, 2] -= np.mean(x[:,:,:,2]) 29 | # 'RGB'->'BGR' 30 | x = x[:, :, :, ::-1] 31 | return x 32 | 33 | 34 | def decode_predictions(preds, top=5): 35 | global CLASS_INDEX 36 | if len(preds.shape) != 2 or preds.shape[1] != 1000: 37 | raise ValueError('`decode_predictions` expects ' 38 | 'a batch of predictions ' 39 | '(i.e. a 2D array of shape (samples, 1000)). ' 40 | 'Found array with shape: ' + str(preds.shape)) 41 | if CLASS_INDEX is None: 42 | fpath = get_file('imagenet_class_index.json', 43 | CLASS_INDEX_PATH, 44 | cache_subdir='models') 45 | CLASS_INDEX = json.load(open(fpath)) 46 | results = [] 47 | for pred in preds: 48 | top_indices = pred.argsort()[-top:][::-1] 49 | result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices] 50 | results.append(result) 51 | return results 52 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_bruno/instrucoes.txt: -------------------------------------------------------------------------------- 1 | Lucas Pinheiro Cinelli 2 | Anexos 3 | 27 de nov de 2018 13:24 4 | para eu, Rafael 5 | 6 | Segue em anexo o zip com o código do Bruno. 7 | O arquivo que te interessa é esse train_HDF5_generator_VDAO.py 8 | 9 | 10 | Lucas CINELLI 11 | 12 | 13 | ---------- Forwarded message --------- 14 | From: Bruno Machado Afonso 15 | Date: jeu. 29 mars 2018 à 08:22 16 | Subject: Códigos do treinamento ICIP 17 | To: Lucas Pinheiro Cinelli 18 | 19 | 20 | Bom dia Lucas, 21 | 22 | Ontem a noite organizei da melhor maneira que pude os códigos para que você possa dar continuidade ao trabalho. Se te passasse tudo sem as devidas orientações, muito provavelmente você ficaria perdido. Acredito que dessa forma vai facilitar seu trabalho. 23 | 24 | Segue em anexo o zip com os códigos que precisa do treinamento do ICIP. 25 | 26 | O train_FC_59_vids_DIS.py treina a FC para os blocos dos 59 videos. 27 | O train_HDF5_generator_VDAO.py gera o HDF5 com os frames da VDAO, caso você precise gerar um novo HDF5 28 | O imagenet_utils.py é usado para tirar o nivel DC das imagens durante o proceso para salvar 29 | 30 | Os diretórios aonde estão localizados os frames de treinamento/teste e os arquivos HDF5 estão 31 | escritos no arquivo diretorios.txt. Os HDF5 de treinamento (ALOI e VDAO) e os HDF5 de teste (os que representam os frames dos 59 videos) já tenho salvo e estão localizados no diretório escrito no arquivo de texto. 32 | 33 | Qualquer dúvida estou a disposição. 34 | 35 | Att., 36 | Bruno Afonso 37 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_cinelli/vdao-anomaly/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/vim,linux,macos,video,images,python,windows,sublimetext,visualstudiocode 3 | 4 | ### Images ### 5 | # JPEG 6 | *.jpg 7 | *.jpeg 8 | *.jpe 9 | *.jif 10 | *.jfif 11 | *.jfi 12 | 13 | # JPEG 2000 14 | *.jp2 15 | *.j2k 16 | *.jpf 17 | *.jpx 18 | *.jpm 19 | *.mj2 20 | 21 | # JPEG XR 22 | *.jxr 23 | *.hdp 24 | *.wdp 25 | 26 | # Graphics Interchange Format 27 | *.gif 28 | 29 | # RAW 30 | *.raw 31 | 32 | # Web P 33 | *.webp 34 | 35 | # Portable Network Graphics 36 | *.png 37 | 38 | # Animated Portable Network Graphics 39 | *.apng 40 | 41 | # Multiple-image Network Graphics 42 | *.mng 43 | 44 | # Tagged Image File Format 45 | *.tiff 46 | *.tif 47 | 48 | # Scalable Vector Graphics 49 | *.svg 50 | *.svgz 51 | 52 | # Portable Document Format 53 | *.pdf 54 | 55 | # X BitMap 56 | *.xbm 57 | 58 | # BMP 59 | *.bmp 60 | *.dib 61 | 62 | # ICO 63 | *.ico 64 | 65 | # 3D Images 66 | *.3dm 67 | *.max 68 | 69 | ### Linux ### 70 | *~ 71 | 72 | # temporary files which can be created if a process still has a handle open of a deleted file 73 | .fuse_hidden* 74 | 75 | # KDE directory preferences 76 | .directory 77 | 78 | # Linux trash folder which might appear on any partition or disk 79 | .Trash-* 80 | 81 | # .nfs files are created when an open file is removed but is still being accessed 82 | .nfs* 83 | 84 | ### macOS ### 85 | *.DS_Store 86 | .AppleDouble 87 | .LSOverride 88 | 89 | # Icon must end with two \r 90 | Icon 91 | 92 | # Thumbnails 93 | ._* 94 | 95 | # Files that might appear in the root of a volume 96 | .DocumentRevisions-V100 97 | .fseventsd 98 | .Spotlight-V100 99 | .TemporaryItems 100 | .Trashes 101 | .VolumeIcon.icns 102 | .com.apple.timemachine.donotpresent 103 | 104 | # Directories potentially created on remote AFP share 105 | .AppleDB 106 | .AppleDesktop 107 | Network Trash Folder 108 | Temporary Items 109 | .apdisk 110 | 111 | ### Python ### 112 | # Byte-compiled / optimized / DLL files 113 | **/__pycache__/ 114 | *.py[cod] 115 | *$py.class 116 | 117 | # C extensions 118 | *.so 119 | 120 | # Distribution / packaging 121 | .Python 122 | build/ 123 | develop-eggs/ 124 | dist/ 125 | downloads/ 126 | eggs/ 127 | .eggs/ 128 | lib/ 129 | lib64/ 130 | parts/ 131 | sdist/ 132 | var/ 133 | wheels/ 134 | *.egg-info/ 135 | .installed.cfg 136 | *.egg 137 | 138 | # PyInstaller 139 | # Usually these files are written by a python script from a template 140 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 141 | *.manifest 142 | *.spec 143 | 144 | # Installer logs 145 | pip-log.txt 146 | pip-delete-this-directory.txt 147 | 148 | # Unit test / coverage reports 149 | htmlcov/ 150 | .tox/ 151 | .coverage 152 | .coverage.* 153 | .cache 154 | nosetests.xml 155 | coverage.xml 156 | *.cover 157 | .hypothesis/ 158 | 159 | # Translations 160 | *.mo 161 | *.pot 162 | 163 | # Django stuff: 164 | *.log 165 | local_settings.py 166 | 167 | # Flask stuff: 168 | instance/ 169 | .webassets-cache 170 | 171 | # Scrapy stuff: 172 | .scrapy 173 | 174 | # Sphinx documentation 175 | docs/_build/ 176 | 177 | # PyBuilder 178 | target/ 179 | 180 | # Jupyter Notebook 181 | .ipynb_checkpoints 182 | 183 | # pyenv 184 | .python-version 185 | 186 | # celery beat schedule file 187 | celerybeat-schedule.* 188 | 189 | # SageMath parsed files 190 | *.sage.py 191 | 192 | # Environments 193 | .env 194 | .venv 195 | env/ 196 | venv/ 197 | ENV/ 198 | env.bak/ 199 | venv.bak/ 200 | 201 | # Spyder project settings 202 | .spyderproject 203 | .spyproject 204 | 205 | # Rope project settings 206 | .ropeproject 207 | 208 | # mkdocs documentation 209 | /site 210 | 211 | # mypy 212 | .mypy_cache/ 213 | 214 | ### SublimeText ### 215 | # cache files for sublime text 216 | *.tmlanguage.cache 217 | *.tmPreferences.cache 218 | *.stTheme.cache 219 | 220 | # workspace files are user-specific 221 | *.sublime-workspace 222 | 223 | # project files should be checked into the repository, unless a significant 224 | # proportion of contributors will probably not be using SublimeText 225 | # *.sublime-project 226 | 227 | # sftp configuration file 228 | sftp-config.json 229 | 230 | # Package control specific files 231 | Package Control.last-run 232 | Package Control.ca-list 233 | Package Control.ca-bundle 234 | Package Control.system-ca-bundle 235 | Package Control.cache/ 236 | Package Control.ca-certs/ 237 | Package Control.merged-ca-bundle 238 | Package Control.user-ca-bundle 239 | oscrypto-ca-bundle.crt 240 | bh_unicode_properties.cache 241 | 242 | # Sublime-github package stores a github token in this file 243 | # https://packagecontrol.io/packages/sublime-github 244 | GitHub.sublime-settings 245 | 246 | ### Video ### 247 | *.3g2 248 | *.3gp 249 | *.asf 250 | *.asx 251 | *.avi 252 | *.flv 253 | *.mov 254 | *.mp4 255 | *.mpg 256 | *.rm 257 | *.swf 258 | *.vob 259 | *.wmv 260 | 261 | ### Vim ### 262 | # swap 263 | [._]*.s[a-v][a-z] 264 | [._]*.sw[a-p] 265 | [._]s[a-v][a-z] 266 | [._]sw[a-p] 267 | # session 268 | Session.vim 269 | # temporary 270 | .netrwhist 271 | # auto-generated tag files 272 | tags 273 | 274 | ### VisualStudioCode ### 275 | .vscode/* 276 | !.vscode/settings.json 277 | !.vscode/tasks.json 278 | !.vscode/launch.json 279 | !.vscode/extensions.json 280 | .history 281 | 282 | ### Windows ### 283 | # Windows thumbnail cache files 284 | Thumbs.db 285 | ehthumbs.db 286 | ehthumbs_vista.db 287 | 288 | # Folder config file 289 | Desktop.ini 290 | 291 | # Recycle Bin used on file shares 292 | $RECYCLE.BIN/ 293 | 294 | # Windows Installer files 295 | *.cab 296 | *.msi 297 | *.msm 298 | *.msp 299 | 300 | # Windows shortcuts 301 | *.lnk 302 | 303 | # End of https://www.gitignore.io/api/vim,linux,macos,video,images,python,windows,sublimetext,visualstudiocode 304 | 305 | # Custom ignore 306 | datasets/features 307 | data/**/* 308 | .vscode 309 | models/* 310 | **/results*/ 311 | **/*.csv 312 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_cinelli/vdao-anomaly/README.md: -------------------------------------------------------------------------------- 1 | # Moving-camera Video Surveillance in Cluttered Environments using Deep Features 2 | This is code for the paper [Moving-camera Video Surveillance in Cluttered Environments using Deep Features](https://www.researchgate.net/publication/327995320_Moving-Camera_Video_Surveillance_in_Cluttered_Environments_Using_Deep_Features) by Bruno Afonso, Lucas Cinelli, Lucas Thomaz, Allan Silva, Eduardo Barros and Sergio Netto. 3 | 4 | ## VDAO dataset 5 | To download the ResNet50 features extracted from the [VDAO dataset](http://www02.smt.ufrj.br/%7Etvdigital/database/objects/page_01.html) run: 6 | 7 | ``` bash 8 | sh datasets/download_vdao.sh /path/to/data/ 9 | ``` 10 | 11 | This will download the dataset in hdf5 format into the specified directory. 12 | 13 | [This repository](https://github.com/rafaelpadilla/DeepLearning-VDAO/tree/master/VDAO_Access) may be useful for handling VDAO 14 | 15 | ## Requirements 16 | 17 | * python 3.6.6 18 | * hdf5 1.10.2 19 | * numpy 1.15.0 20 | * pandas 0.23.4 21 | * tensorflow 1.3.0 22 | * tensorflow-gpu 1.3.0 23 | * keras 2.2.2 24 | * matplotlib 2.2.3 25 | * scipy 1.1.0 26 | * scikit-learn 0.19.1 27 | 28 | ## Training 29 | 30 | To train the model on the VDAO dataset using a fully-connected layer as classifier, run: 31 | 32 | ``` bash 33 | python main.py --dataset-dir /path/to/data/ --file train_batch_VDAO.h5 -b 32 --save models/mlp --cv-params 'method=leave_one_out' --arch mlp --arch-params 'nb_neurons=[50, 1600]' --optim adamax train --epochs 20 --lr 0.002 --wd 0.005 --val-roc 34 | ``` 35 | 36 | Or to train using a Random Forest as classifier, run: 37 | 38 | ``` bash 39 | python main.py --dataset-dir /path/to/data/ --file train_batch_VDAO.h5 -b 32 --save models/rf --cv-params 'method=leave_one_out' -arch randomforest --arch-params 'nb_trees=100' --optim adamax train --epochs 20 --lr 0.002 --wd 0.005 --val-roc 40 | ``` 41 | 42 | ## Testing 43 | 44 | To get the prediction results from the trained model, run: 45 | 46 | ``` bash 47 | python main.py --dataset-dir /path/to/data/ --file 59_videos_test_batch.h5 --load path/to/your/model --arch mlp/randomforest --cv-params 'method=leave_one_out' --save test_results predict --optim-thres 48 | ``` 49 | 50 | ## Paper results 51 | 52 | To obtain the Table 1 of our paper, run: 53 | 54 | ``` bash 55 | python compute_results.py --file /path/to/test/results --save paper_results --med-filter 5 56 | ``` 57 | 58 | ## Citation 59 | 60 | If you find this useful for your research, please cite the following paper. 61 | 62 | ``` 63 | @inproceedings{afonso2018anomaly, 64 | author = {B. M. Afonso and L. P. Cinelli and L. A. Thomaz and A. F. da Silva and E. A. B. da Silva and S. L. Netto}, 65 | title = {Moving-camera Video Surveillance in Cluttered Environments using Deep Features}, 66 | booktitle = {2018 25th IEEE International Conference on Image Processing (ICIP)}, 67 | year = {2018}, 68 | pages = {2296-2300}, 69 | doi = {10.1109/ICIP.2018.8451540} 70 | } 71 | ``` 72 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_cinelli/vdao-anomaly/archs/__init__.py: -------------------------------------------------------------------------------- 1 | from .networks import mlp 2 | from .classics import randomforest 3 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_cinelli/vdao-anomaly/archs/classics.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import pickle 4 | from sklearn.ensemble import RandomForestClassifier 5 | from sklearn.pipeline import Pipeline 6 | from utils import Flatten 7 | from sklearn.externals import joblib 8 | 9 | import pdb 10 | 11 | __all__ = ['randomforest'] 12 | 13 | 14 | class RandomForest(object): 15 | """ Random Forest Classifier 16 | """ 17 | def __init__(self, model_path=None, layer=None, group_idx=None, 18 | nb_trees=None, max_depth=None, bootstrap=True, oob_score=False, 19 | save_path=None, nb_jobs=None): 20 | """ 21 | Keyword Arguments: 22 | model_path {str} -- The path from which to load the models (default: {None}) 23 | layer {str} -- The layer from which features were extracted (default: {None}) 24 | group_idx {int} -- The data fold being used (default: {None}) 25 | nb_trees {int} -- The number of trees in the forest (default: {None}) 26 | max_depth {int} -- The maximum depth of the tree. If None go as deep as possible (default: {None}) 27 | bootstrap {bool} -- Whether bootstrap samples are used when building trees (default: {True}) 28 | oob_score {bool} -- Whether to use out-of-bag samples to estimate the generalization accuracy (default: {False}) 29 | save_path {str} -- The target dir where the model should be saved (default: {None}) 30 | nb_jobs {int} -- The number of jobs to run in parallel for both fit and predict (default: {None}) 31 | """ 32 | 33 | self.group_idx = group_idx 34 | self.subdir = os.path.join(save_path, layer) 35 | if model_path is not None: 36 | self.load(model_path, layer, group_idx) 37 | else: 38 | self.create(nb_trees, max_depth, bootstrap, nb_jobs, oob_score) 39 | 40 | def load(self, model_path, layer, group_idx): 41 | model_name = os.path.join( 42 | model_path, layer, 'model.test{:02d}.pkl'.format(group_idx)) 43 | self.model = joblib.load(model_name) 44 | 45 | def create(self, nb_trees, max_depth, bootstrap=True, nb_jobs=4, oob_score=False): 46 | """ Creates an instance of the sklearn ensemble model Random Forest. 47 | 48 | Arguments: 49 | nb_trees {int} -- The number of trees in the forest. 50 | max_depth {int} -- The maximum depth of the tree. 51 | 52 | Keyword Arguments: 53 | bootstrap {bool} -- Whether bootstrap samples are used when building 54 | trees. (default: {True}) 55 | nb_jobs {int} -- The number of jobs to run in parallel for both fit 56 | and predict (default: {4}) 57 | oob_score {bool} -- Whether to use out-of-bag samples to estimate 58 | the generalization accuracy. (default: {False}) 59 | """ 60 | classifier = RandomForestClassifier( 61 | n_estimators=nb_trees, max_depth=max_depth, max_features='auto', 62 | bootstrap=bootstrap, oob_score=oob_score, n_jobs=nb_jobs, random_state=0) 63 | 64 | self.model = Pipeline([('flatten', Flatten()), ('forest', classifier)]) 65 | 66 | def fit(self, X, y, val_data=None): 67 | self.model.fit(X, y) 68 | self.save_model() 69 | 70 | def predict_proba(self, samples): 71 | return self.model.predict_proba(samples)[:, 1] 72 | 73 | def save_model(self): 74 | if not os.path.exists(self.subdir): 75 | os.makedirs(self.subdir) 76 | 77 | filename = os.path.join( 78 | self.subdir, 'model.test{:02d}.pkl'.format(self.group_idx)) 79 | joblib.dump(self.model, filename) 80 | 81 | 82 | def randomforest(load_path=None, save_path=None, layer=None, group_idx=None, **kwargs): 83 | """Instantiates a sklearn pipeline containing the desired Random Forest 84 | ensemble model 85 | 86 | Keyword Arguments: 87 | load_path {str} -- The path from which to load the models (default: {None}) 88 | save_path {str} -- The target dir where the model should be saved (default: {None}) 89 | layer {str} -- The layer from which features were extracted (default: {None}) 90 | group_idx {int} -- The data fold being used (default: {None}) 91 | 92 | Returns: 93 | sklearn.pipeline.Pipeline -- Pipeline containing specified Random Forest 94 | (sklearn.ensemble.RandomForestClassifier) 95 | """ 96 | 97 | nb_trees = kwargs.pop('nb_trees', None) 98 | max_depth = kwargs.pop('max_depth', None) 99 | bootstrap = kwargs.pop('bootstrap', True) 100 | oob_score = kwargs.pop('oob_score', False) 101 | nb_jobs = kwargs.pop('nb_jobs', 4) 102 | 103 | classifier = RandomForest(model_path=load_path, layer=layer, group_idx=group_idx, 104 | nb_trees=nb_trees, max_depth=max_depth, bootstrap=bootstrap, 105 | oob_score=oob_score, nb_jobs=nb_jobs, save_path=save_path) 106 | return classifier 107 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_cinelli/vdao-anomaly/archs/networks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | 4 | import keras.models 5 | from keras.layers import Activation, Dense, Flatten 6 | from keras.models import Model, Sequential 7 | from keras.regularizers import l1, l2 8 | from keras.optimizers import SGD, Adam, Adamax 9 | from keras import callbacks 10 | 11 | __all__ = ['mlp'] 12 | 13 | optimizers = {'adam': Adam, 'adamax': Adamax, 'sgd': SGD} 14 | 15 | 16 | class MLP(object): 17 | """ Multilayer Perceptron Classifier (Fully-Connected network) 18 | """ 19 | 20 | def __init__(self, load_path=None, layer=None, group_idx=None, 21 | input_shape=None, nb_neurons=None, weight_decay=0, 22 | save_path=None): 23 | """ 24 | Keyword Arguments: 25 | load_path {str} -- The path from which to load the models (default: {None}) 26 | layer {str} -- The layer from which features were extracted (default: {None}) 27 | group_idx {int} -- The data fold being used (default: {None}) 28 | input_shape {list} -- [description] (default: {None}) 29 | nb_neurons {int} -- [description] (default: {None}) 30 | weight_decay {int} -- [description] (default: {0}) 31 | save_path {str} -- The target dir where the model should be saved (default: {None}) 32 | """ 33 | self.group_idx = group_idx 34 | self.subdir = os.path.join(save_path, layer) 35 | if load_path is not None: 36 | self.load(load_path, layer) 37 | else: 38 | self.create(input_shape, nb_neurons, weight_decay=weight_decay) 39 | 40 | def load(self, load_path, layer): 41 | 42 | model_name = os.path.join( 43 | load_path, layer, 'model.test{:02d}-ep'.format(self.group_idx)) 44 | model_name = glob.glob(model_name + '*')[-1] 45 | 46 | self.model = keras.models.load_model(model_name, compile=False) 47 | 48 | def create(self, input_shape, nb_neurons, weight_decay=0): 49 | """ Creates an instance of keras Sequential class with the specified 50 | number of layer, each one consting of a Fully-Connnected and Relu 51 | activation. The last activation layer is a Sigmoid. 52 | 53 | Arguments: 54 | input_shape {list} -- Size of the tensor at the input of the model 55 | nb_neurons {iterable} -- iterable containing the nb of neurons on 56 | each layer of the network 57 | 58 | Keyword Arguments: 59 | weight_decay {int} -- The L2 regularization weight for the loss (default: {0}) 60 | """ 61 | classifier = Sequential() 62 | classifier.add(Flatten(input_shape=input_shape)) 63 | 64 | for idx, neurons_in_layer in enumerate(nb_neurons): 65 | classifier.add(Dense(neurons_in_layer, 66 | name='Dense_feat_{}'.format(idx), 67 | kernel_regularizer=l2(weight_decay))) 68 | classifier.add(Activation('relu')) 69 | 70 | classifier.add(Dense(1, name='Dense_feat', 71 | kernel_regularizer=l2(weight_decay))) 72 | classifier.add(Activation('sigmoid')) 73 | 74 | self.model = classifier 75 | 76 | def set_callbacks(self, lr_span, lr_factor): 77 | """Set useful periodic Keras callbacks: 78 | `LearningRateScheduler` updates the lr at the end of each epoch 79 | `ModelCheckpoint` saves model at the end of each epoch (if conditions are met) 80 | `CSVLogger` writes results to a csv file 81 | 82 | Arguments: 83 | lr_span {int} -- The number of epoch to wait until changing lr 84 | lr_factor {float} -- By how much to modify the lr 85 | """ 86 | if not os.path.exists(self.subdir): 87 | os.makedirs(self.subdir) 88 | 89 | # learning rate schedule 90 | def schedule(epoch, lr): return lr * lr_factor**(epoch // lr_span) 91 | lr_scheduler = callbacks.LearningRateScheduler(schedule) 92 | 93 | # Should I monitor here the best val_loss or the metrics of interest? 94 | # If not all samples are used in an epoch val_loss is noisy 95 | checkpointer = callbacks.ModelCheckpoint( 96 | os.path.join(self.subdir, 97 | 'model.test{:02d}-ep{{epoch:02d}}.pth'.format( 98 | self.group_idx)), 99 | monitor='val_loss', save_best_only=True, mode='min') 100 | 101 | csv_logger = callbacks.CSVLogger( 102 | os.path.join(self.subdir, 103 | 'training.test{:02d}.log'.format(self.group_idx))) 104 | 105 | self.callbacks = [lr_scheduler, csv_logger, checkpointer] 106 | 107 | def compile(self, optim, lr, lr_span, lr_factor, metrics, 108 | loss='binary_crossentropy'): 109 | optimizer = optimizers[optim.lower()](lr=lr) 110 | self.set_callbacks(lr_span, lr_factor) 111 | self.model.compile( 112 | loss=loss, 113 | optimizer=optimizer, 114 | metrics=metrics) 115 | 116 | def fit(self, X, y, val_data=None): 117 | history = self.model.fit(x=X, 118 | y=y, 119 | batch_size=self.batch_size, 120 | epochs=self.epochs, 121 | initial_epoch=self.init_epoch, 122 | shuffle=True, 123 | verbose=1, 124 | callbacks=self.callbacks, 125 | validation_data=val_data 126 | ) 127 | return history 128 | 129 | def predict_proba(self, samples): 130 | return self.model.predict_proba(samples, batch_size=self.batch_size, 131 | verbose=0) 132 | 133 | def set_batch(self, batch_size): 134 | """Set the mini-batch size 135 | 136 | Arguments: 137 | batch_size {int} -- Mini-batch size 138 | """ 139 | self.batch_size = batch_size 140 | 141 | def set_epochs(self, nb_epochs, start_from=0): 142 | """Set the number of epochs to be trained on 143 | 144 | Arguments: 145 | nb_epochs {int} -- The total number of epochs desired 146 | 147 | Keyword Arguments: 148 | start_from {int} -- From which epoch to start training. 149 | Useful on retraining (default: {0}) 150 | """ 151 | self.epochs = nb_epochs 152 | self.init_epoch = start_from 153 | 154 | 155 | def mlp(load_path=None, save_path=None, layer=None, group_idx=None, **kwargs): 156 | """Instantiates a Sequential keras model containing the fully connected net 157 | 158 | Keyword Arguments: 159 | load_path {str} -- The path from which to load the models (default: {None}) 160 | save_path {str} -- The target dir where the model should be saved (default: {None}) 161 | layer {str} -- The layer from which features were extracted (default: {None}) 162 | group_idx {int} -- The data fold being used (default: {None}) 163 | 164 | Returns: 165 | keras.model.Sequential -- The specified dense network 166 | """ 167 | input_shape = kwargs.pop('input_shape', None) 168 | nb_neurons = kwargs.pop('nb_neurons', None) 169 | weight_decay = kwargs.pop('weight_decay', None) 170 | 171 | classifier = MLP(load_path=load_path, layer=layer, group_idx=group_idx, 172 | input_shape=input_shape, nb_neurons=nb_neurons, 173 | weight_decay=weight_decay, save_path=save_path) 174 | return classifier 175 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_cinelli/vdao-anomaly/compute_results.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from scipy.signal import medfilt 7 | from sklearn.metrics import confusion_matrix 8 | 9 | 10 | # Metrics 11 | def _fpr(tn, fp, fn, tp, eps=1e-12): 12 | return fp / (fp + tn + eps) 13 | 14 | def _fnr(tn, fp, fn, tp, eps=1e-12): 15 | return fn/ (fn + tp + eps) 16 | 17 | def _tpr(tn, fp, fn, tp, eps=1e-12): 18 | return 1 - fnr(tn, fp, fn, tp) 19 | 20 | def _tnr(tn, fp, fn, tp, eps=1e-12): 21 | return 1 - fpr(tn, fp, fn, tp) 22 | 23 | def _f1(tn, fp, fn, tp, eps=1e-12): 24 | return (2*tp)/(2*tp + fn + fp) 25 | 26 | def _dis(tn, fp, fn, tp, eps=1e-12): 27 | return np.sqrt(_fpr(tn, fp, fn, tp, eps)**2 + _fnr(tn, fp, fn, tp, eps)**2) 28 | 29 | def compute_results(args): 30 | # Load the prediction results 31 | preds = pd.read_pickle(args.file) 32 | 33 | if args.kernel_size > 1: 34 | for layer in preds.columns.levels[0]: 35 | preds.loc[:, (layer, 'predictions')] = \ 36 | preds.loc[:, (layer, 'predictions')].agg( 37 | medfilt, kernel_size=args.kernel_size) 38 | 39 | res_layer = {} 40 | for layer in preds.columns.levels[0]: 41 | predictions = preds.loc[:, (layer, 'predictions')] 42 | labels = preds.loc[:, (layer, 'labels')] 43 | vids = [] 44 | 45 | for idx in range(len(predictions)): 46 | tn, fp, fn, tp = confusion_matrix( 47 | labels[idx], predictions[idx], labels=[0,1]).ravel() 48 | vids += [{meter: eval('_' + meter)(tn, fp, fn, tp) for meter in ['fpr', 'fnr', 'f1']}] 49 | 50 | res_layer[layer] = pd.concat([pd.Series(vid) for vid in vids], 51 | keys=preds.index, axis=1) 52 | 53 | final_res = pd.concat(res_layer, axis=0) 54 | 55 | # Compute avg results across videos in the (test) set 56 | mean_results = final_res.mean(axis=1).unstack() 57 | mean_results['dis'] = mean_results[['fpr', 'fnr']].apply(np.linalg.norm, axis=1) 58 | 59 | # Save results to file 60 | mean_results.to_csv(args.save_dir) 61 | 62 | 63 | if __name__ == '__main__': 64 | parser = argparse.ArgumentParser(description='Compute final results for dataset') 65 | 66 | parser.add_argument( 67 | '--file', 68 | dest='file', 69 | type=str, 70 | metavar='PATH', 71 | default=None, 72 | help='Path to the prediction results' 73 | ) 74 | parser.add_argument( 75 | '--save', 76 | dest='save_dir', 77 | type=str, 78 | metavar='PATH', 79 | help='path to save the global metric results' 80 | ) 81 | parser.add_argument( 82 | '--med-filter', 83 | dest='kernel_size', 84 | default=1, 85 | type=int, 86 | metavar='N', 87 | help='kernel size of the temporal median filter' 88 | ) 89 | 90 | args = parser.parse_args() 91 | print(args) 92 | compute_results(args) 93 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_cinelli/vdao-anomaly/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/ICIP_replication/codigo_cinelli/vdao-anomaly/datasets/__init__.py -------------------------------------------------------------------------------- /ICIP_replication/codigo_cinelli/vdao-anomaly/datasets/download_vdao.sh: -------------------------------------------------------------------------------- 1 | TARGET_DIR=$1 2 | if [ -z $TARGET_DIR ] 3 | then 4 | echo "Must specify target directory" 5 | else 6 | mkdir $TARGET_DIR/ 7 | URL=https://www.dropbox.com/s/39guyaakreu5o2r/vdao_feat_dataset.tar.gz?dl=0 8 | wget $URL -P $TARGET_DIR 9 | tar -xvf $TARGET_DIR/vdao_feat_dataset.tar.gz -C $TARGET_DIR 10 | fi 11 | -------------------------------------------------------------------------------- /ICIP_replication/codigo_cinelli/vdao-anomaly/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pickle 3 | 4 | import numpy as np 5 | 6 | from sklearn.base import TransformerMixin 7 | 8 | class Flatten(TransformerMixin): 9 | def fit(self, X, y=None): 10 | return self 11 | def transform(self, X): 12 | return X.reshape(X.shape[0], -1) 13 | 14 | class History(object): 15 | def __init__(self): 16 | self.history = {} 17 | 18 | def update(self, name, value, mode=None): 19 | if isinstance(value, dict): 20 | self.history[mode][name] = value 21 | return 22 | try: 23 | self.history[mode][name] += [value] 24 | except KeyError: 25 | try: 26 | self.history[mode][name] = [value] 27 | except KeyError: 28 | self.history[mode] = {name: [value]} 29 | 30 | def averages(self, weights_key, exclude=[]): 31 | exclude += [weights_key] 32 | for mode, history in self.history.items(): 33 | weights = self.history[mode].get(weights_key) 34 | weights = [len(frames) for frames in weights] 35 | for name, meter in history.items(): 36 | if name in exclude: 37 | continue 38 | self.history[mode][name] += [ 39 | np.average(meter, weights=weights, axis=0)] 40 | 41 | 42 | def print_result(history, title, exclude=[]): 43 | # print('\n{}'.format(title)) 44 | msg = ['\n{}\n'.format(title)] 45 | msg += ['{0}: {1}\n'.format(name, meter[-1]) for name, meter in 46 | history.items() if name not in exclude] 47 | 48 | print(''.join(msg)) 49 | 50 | 51 | class NumpyEncoder(json.JSONEncoder): 52 | def default(self, obj): 53 | if isinstance(obj, np.ndarray): 54 | return obj.tolist() 55 | if isinstance(obj, History): 56 | return obj.history 57 | return json.JSONEncoder.default(self, obj) 58 | 59 | 60 | def save_data(data, filename, json_format=True, pickle_format=True): 61 | if json_format: 62 | with open(filename + '.json', 'w') as fp: 63 | json.dump(data, fp, sort_keys=True, indent=4, cls=NumpyEncoder) 64 | if pickle_format: 65 | with open(filename + '.pkl', 'wb') as fp: 66 | pickle.dump(data, fp, protocol=pickle.HIGHEST_PROTOCOL) 67 | 68 | 69 | def parse_kwparams(kwlst): 70 | ''' 71 | Parses key-worded parameters. 72 | 73 | @param kwstr key-worded parameters list to be parsed. 74 | 75 | @return dictionary with the key-worded parameters. 76 | ''' 77 | 78 | # Set in dictionary form 79 | kwparams = {} 80 | for param in kwlst: 81 | k, v = param.split('=') 82 | try: 83 | kwparams[k] = json.loads(v) 84 | except json.JSONDecodeError: 85 | kwparams[k] = v 86 | return kwparams 87 | 88 | -------------------------------------------------------------------------------- /ICIP_replication/icip_replication.yml: -------------------------------------------------------------------------------- 1 | name: icip_replication 2 | channels: 3 | - conda-forge 4 | - soumith 5 | - defaults 6 | dependencies: 7 | - blas=1.0=mkl 8 | - cycler=0.10.0=py_1 9 | - expat=2.2.5=hf484d3e_1002 10 | - fontconfig=2.13.1=h2176d3f_1000 11 | - gettext=0.19.8.1=h9745a5d_1001 12 | - glib=2.56.2=had28632_1001 13 | - icu=58.2=hf484d3e_1000 14 | - kiwisolver=1.0.1=py37h6bb024c_1002 15 | - libiconv=1.15=h14c3975_1004 16 | - libuuid=2.32.1=h14c3975_1000 17 | - libxcb=1.13=h14c3975_1002 18 | - libxml2=2.9.8=h143f9aa_1005 19 | - matplotlib=3.0.2=py37_1002 20 | - matplotlib-base=3.0.2=py37h167e16e_1002 21 | - pexpect=4.6.0=py37_0 22 | - pip=19.0.1=py37_0 23 | - prompt_toolkit=2.0.8=py_0 24 | - pthread-stubs=0.4=h14c3975_1001 25 | - ptyprocess=0.6.0=py37_0 26 | - pyparsing=2.3.1=py_0 27 | - pyqt=5.6.0=py37h13b7fb3_1008 28 | - python-dateutil=2.8.0=py_0 29 | - setuptools=40.7.3=py37_0 30 | - sip=4.18.1=py37hf484d3e_1000 31 | - terminado=0.8.1=py37_1 32 | - tk=8.6.9=h84994c4_1000 33 | - tornado=5.1.1=py37h14c3975_1000 34 | - traitlets=4.3.2=py37_0 35 | - wheel=0.32.3=py37_0 36 | - xorg-libxau=1.0.8=h14c3975_1006 37 | - xorg-libxdmcp=1.1.2=h14c3975_1007 38 | - backcall=0.1.0=py37_0 39 | - bleach=3.1.0=py37_0 40 | - bzip2=1.0.6=h14c3975_5 41 | - ca-certificates=2018.12.5=0 42 | - cairo=1.14.12=h8948797_3 43 | - certifi=2018.11.29=py37_0 44 | - cffi=1.11.5=py37he75722e_1 45 | - dbus=1.13.2=h714fa37_1 46 | - decorator=4.3.2=py37_0 47 | - entrypoints=0.3=py37_0 48 | - ffmpeg=4.0=hcdf2ecd_0 49 | - freeglut=3.0.0=hf484d3e_5 50 | - freetype=2.9.1=h8a8886c_1 51 | - gmp=6.1.2=h6c8ec71_1 52 | - graphite2=1.3.13=h23475e2_0 53 | - gst-plugins-base=1.14.0=hbbd80ab_1 54 | - gstreamer=1.14.0=hb453b48_1 55 | - harfbuzz=1.8.8=hffaf4a1_0 56 | - hdf5=1.10.2=hba1933b_1 57 | - imageio=2.5.0=py37_0 58 | - intel-openmp=2019.1=144 59 | - ipykernel=5.1.0=py37h39e3cac_0 60 | - ipython=7.2.0=py37h39e3cac_0 61 | - ipython_genutils=0.2.0=py37_0 62 | - jasper=2.0.14=h07fcdf6_1 63 | - jedi=0.13.2=py37_0 64 | - jinja2=2.10=py37_0 65 | - jpeg=9b=h024ee3a_2 66 | - jsonschema=2.6.0=py37_0 67 | - jupyter_client=5.2.4=py37_0 68 | - jupyter_core=4.4.0=py37_0 69 | - libedit=3.1.20181209=hc058e9b_0 70 | - libffi=3.2.1=hd88cf55_4 71 | - libgcc-ng=8.2.0=hdf63c60_1 72 | - libgfortran-ng=7.3.0=hdf63c60_0 73 | - libglu=9.0.0=hf484d3e_1 74 | - libopencv=3.4.2=hb342d67_1 75 | - libopus=1.3=h7b6447c_0 76 | - libpng=1.6.36=hbc83047_0 77 | - libsodium=1.0.16=h1bed415_0 78 | - libstdcxx-ng=8.2.0=hdf63c60_1 79 | - libtiff=4.0.10=h2733197_2 80 | - libvpx=1.7.0=h439df22_0 81 | - markupsafe=1.1.0=py37h7b6447c_0 82 | - mistune=0.8.4=py37h7b6447c_0 83 | - mkl=2019.1=144 84 | - mkl_fft=1.0.10=py37ha843d7b_0 85 | - mkl_random=1.0.2=py37hd81dba3_0 86 | - nb_conda=2.2.1=py37_0 87 | - nb_conda_kernels=2.2.0=py37_0 88 | - nbconvert=5.3.1=py37_0 89 | - nbformat=4.4.0=py37_0 90 | - ncurses=6.1=he6710b0_1 91 | - ninja=1.8.2=py37h6bb024c_1 92 | - notebook=5.7.4=py37_0 93 | - numpy=1.15.4=py37h7e9f1db_0 94 | - numpy-base=1.15.4=py37hde5b4d6_0 95 | - olefile=0.46=py37_0 96 | - opencv=3.4.2=py37h6fd60c2_1 97 | - openssl=1.1.1a=h7b6447c_0 98 | - pandoc=2.2.3.2=0 99 | - pandocfilters=1.4.2=py37_1 100 | - parso=0.3.2=py37_0 101 | - pcre=8.42=h439df22_0 102 | - pickleshare=0.7.5=py37_0 103 | - pillow=5.4.1=py37h34e0f95_0 104 | - pixman=0.38.0=h7b6447c_0 105 | - prometheus_client=0.5.0=py37_0 106 | - py-opencv=3.4.2=py37hb342d67_1 107 | - pycparser=2.19=py37_0 108 | - pygments=2.3.1=py37_0 109 | - python=3.7.2=h0371630_0 110 | - pyzmq=17.1.2=py37he6710b0_2 111 | - qt=5.6.3=h8bf5577_3 112 | - readline=7.0=h7b6447c_5 113 | - scikit-learn=0.20.2=py37hd81dba3_0 114 | - scipy=1.2.1=py37h7c811a0_0 115 | - send2trash=1.5.0=py37_0 116 | - six=1.12.0=py37_0 117 | - sqlite=3.26.0=h7b6447c_0 118 | - testpath=0.4.2=py37_0 119 | - wcwidth=0.1.7=py37_0 120 | - webencodings=0.5.1=py37_1 121 | - xz=5.2.4=h14c3975_4 122 | - zeromq=4.3.1=he6710b0_3 123 | - zlib=1.2.11=h7b6447c_3 124 | - zstd=1.3.7=h0b5b093_0 125 | - pytorch=1.0.0=py3.7_cuda9.0.176_cudnn7.4.1_1 126 | - torchvision=0.2.1=py_2 127 | - pip: 128 | - torch==1.0.0 129 | - torchsummary==1.5.1 130 | prefix: /home/rafael/anaconda3/envs/icip_replication 131 | 132 | -------------------------------------------------------------------------------- /ICIP_replication/instructions.txt: -------------------------------------------------------------------------------- 1 | Para alinhar os vídeos da base de teste (research): 2 | 3 | Rodar o script Align_research.py 4 | 1) Método AlignWithShortestDistance() gera os seguintes arquivos intermediários: 5 | /{shortest_distance}/intermediate_files/table_0X/Table_0X-Reference_01/reference_01_path_0.npy -> Arquivo com os frames já carregados e colocados em flatten() do vídeo de referência 6 | /{shortest_distance}/intermediate_files/table_0X/Table_0X-Object_0X/aligned_path_0-obj-video0X.npy -> Arquivo com a ordem de alinhamento dos frames. 7 | 8 | 2) Método Generate_CSV_Files() procura o arquivo de alinhamento .npy gerado no passo anterior (aligned_path_0-obj-video0X.npy) e o arquivo de anotação original distribuído com a base (exemplo: /vdao_research/table_0X/Table_0X-Object_01/obj-video0X.txt - Lembrando que este arquivo foi gerado pelo script AlignResearch.py e não é o vídeo alinhado. São apenas as anotações dos arquivos da base research NÃO ALINHADOS) 9 | O Método Generate_CVS_Files() irá gerar o .cvs /{shortest_distance}/intermediate_files/table_0X/Table_0X-Object_0X/aligned_frames_path_0_table_0X_object_01.csv com os frames de referência (sempre começando em 0 (e sequenciais) e os frames target correspondentes alinhados. Também mostrará uma coluna com as anotações dos objetos. 10 | Lembre-se que o arquivo csv tem como base tanto para o referencia quando para o target, o frame inicial sendo o 0. 11 | 12 | 13 | 3) Método Generate_Frames() procura o arquivo com a ordem do alinhamento .csv gerado no passo anterior (aligned_frames_path_0_table_0X_object_01.csv) 14 | 15 | Nomenclatura para frames COM OBJETOS: 16 | t{tabela}_obj{objeto}_path{path*}_tar{numero_frame_target**}_{frame_count***}.png 17 | 18 | path* = para a base research, só tem um path (0), pois o robô só faz uma passada. 19 | numero_frame_target** = número do frame target do arquivo csv. Sempre considerando o frame inicial sendo o 0. 20 | frame_count*** = é a ordem do frame target (comecando no 0) para ser montado o vídeo posteriormente. 21 | 22 | Nomenclatura para frames sem objetos REFERENCIA: 23 | t{tabela}_path{path*}_ref{numero_frame_referencia****}_{frame_count*****}.png 24 | 25 | numero_frame_referencia**** => número do frame de referencia do arquivo csv. Sempre considerando o frame incial sendo o 0. 26 | frame_count***** => é a ordem do frame TARGET (começando no 0). 27 | 28 | Obs: Para localizar o frame alvo associado ao frame de referencia basta procurar pelo último dígito: 29 | Ex: 30 | t{tabela}_obj{objeto}_path{path*}_tar{numero_frame_target**}_{111}.png está associado ao frame de referëncia t{tabela}_path{path*}_ref{numero_frame_referencia****}_{111}.png 31 | 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Detecting lost objects with Deep Learning 2 | 3 | In this repository you can find my experiments using Deep Learning on VDAO database. 4 | 5 | ## VDAO Database ## 6 | 7 | VDAO is a video database containing annotated videos in a cluttered industrial environment. The videos were captured using a camera on a moving platform. 8 | 9 | The complete database comprises a total 6 multi-object, 56 single-object and 4 no-object (for reference purposes) footages, acquired with two different cameras and two different light conditions, yielding an approximate total of 8.2 hours of video. A total of 77 videos form the VDAO base. Those videos are grouped into tables according to configurations such as number of lost objects and illuminations. 10 | 11 | See [here](http://www02.smt.ufrj.br/~tvdigital/database/objects/docs/an_annotated_video_database_for_abandoned_object_detection_in_a_cluttered_environment.pdf) the paper presenting the database. You can download the database videos and related annotation files from the **[official VDAO database webpage](http://www02.smt.ufrj.br/~tvdigital/database/objects/page_01.html)**. 12 | 13 | Or if you prefer, you can download the videos and annotations directly from [here](VDAO.md). The links just point to the official distribution. 14 | 15 | You can have a bunch of useful tools to play with VDAO database in the **[VDAO_Access Project](https://github.com/rafaelpadilla/DeepLearning-VDAO/tree/master/VDAO_Access)**. 16 | 17 | The images below show examples of **reference frames** (no object) and **target frames** (with objects to be detected). 18 | 19 | 20 |
21 | AAAAAA 22 |

Examples of the VDAO dataset reference frames (no objects)

23 |
24 | 25 |
26 | 27 |

Examples of the VDAO dataset target frames (objects manually annotated with bounding boxes)

28 |
29 | 30 | ### VDAO Alignment ### 31 | 32 | As mentioned before, VDAO database has 77 videos divided into 10 tables. Each table (except the table 01) has one reference video and multiple videos containing lost objects. Some applications it is necessary to make temporal alignment between the target videos (with objects) and the reference videos. 33 | 34 | Part of this project focused on performing temporal alignment of the target videos to their corresponding reference ones. The frames correspondences can be found [here](https://github.com/rafaelpadilla/DeepLearning-VDAO/blob/master/VDAO_Alignment.md). 35 | 36 | ## YOLO ## 37 | 38 | Yolo (You Only Live Look Once) is a real-time object detection and classification that obtained excellent results on the [Pascal VOC dataset](http://host.robots.ox.ac.uk:8080/pascal/VOC/). So far, yolo has two versions: **Yolo V1** and Yolo V2, also refered as **Yolo 9000**. Click on the image below to watch Yolo 9000's promo video. 39 | 40 | 41 |

42 | 43 |

44 | 45 | The authors have created a website explaining how it works, how to use it and how to train yolo with your images. Check the references below: 46 | 47 | YOLO: **You Only Look Once: Unified, Readl-Time Object Detection** (2016) 48 | (Joseph Redmon, Santosh Divvala, Ross Girshick, Ali Farhadi) 49 | [[site](https://arxiv.org/abs/1506.02640)] 50 | [[pdf](https://bitbucket.org/rafaelpadilla/mythesis/src/ad0d4d320df4c5897bdda58bbffd83055902d98b/materials/%5Bpaper%5D%20YOLO.pdf)] 51 | [[slides](https://bitbucket.org/rafaelpadilla/mythesis/src/ad0d4d320df4c5897bdda58bbffd83055902d98b/materials/%5Bslides%5D%20YOLO%20CVPR%202016.pdf)] 52 | [[talk](https://www.youtube.com/watch?v=NM6lrxy0bxs)] 53 | [[ted talk](https://www.youtube.com/watch?v=Cgxsv1riJhI)] 54 | 55 | **YOLO9000: Better, Faster, Stronger** (2017) 56 | (Joseph Redmon, Ali Farhadi) 57 | [[site](https://arxiv.org/abs/1612.08242)] 58 | [[pdf](https://bitbucket.org/rafaelpadilla/mythesis/src/636e8f075be4e5186777c66ddbe8cb2ad0797fab/materials/%5Bpaper%5D%20YOLO9000.pdf)] 59 | [[talk](https://www.youtube.com/watch?v=GBu2jofRJtk)] 60 | [[slides](https://bitbucket.org/rafaelpadilla/mythesis/src/ad0d4d320df4c5897bdda58bbffd83055902d98b/materials/%5Bslides%5D%20YOLO9000%20CVPR%202017.pdf)] 61 | 62 | **YOLO: People talking about it** 63 | [[Andrew NG](https://www.youtube.com/watch?v=9s_FpMpdYW8)] 64 | [[Siraj Raval](https://www.youtube.com/watch?v=4eIBisqx9_g)] 65 | 66 | **YOLO: People writing about it (Explanations and codes)** 67 | [[Towards data science](https://towardsdatascience.com/yolo-you-only-look-once-real-time-object-detection-explained-492dc9230006)]: A brief summary about yolo and how it works. 68 | [[Machine Think blog](http://machinethink.net/blog/object-detection-with-yolo/)]: A brief summary about yolo and how it works. 69 | [[Timebutt's github](https://timebutt.github.io/static/how-to-train-yolov2-to-detect-custom-objects/)]: A tutorial explaing how to train yolo 9000 to detect a single class object. 70 | [[Timebutt's github](https://timebutt.github.io/static/understanding-yolov2-training-output/)]: Read this if you want to understand yolo's training output. 71 | [[Cvjena's github](https://github.com/cvjena/darknet/blob/master/cfg/yolo.cfg)]: Comments of some of the tags used in the cfg files. 72 | [[Guanghan Ning's blog](http://guanghan.info/blog/en/my-works/train-yolo/)]: A tutorial explaining how to train yolo v1 with your own data. The author used two classes (yield and stop signs).   73 | [[AlexeyAB's github](https://github.com/AlexeyAB/)]: Very good project forked from yolo 9000 supporting Windows and Linux. 74 | [[Google's Group](https://groups.google.com/forum/#!forum/darknet)]: Excellent source of information. People ask and answer doubts about darknet and yolo. 75 | [[Guanghan Ning's blog](http://guanghan.info/blog/en/my-works/yolo-cpu-running-time-reduction-basic-knowledge-and-strategies)]: Studies and analysis on reducing the running time of Yolo on CPU. 76 | [[Guanghan Ning's blog](http://guanghan.info/projects/ROLO/)]: Recurrent YOLO. This is an interesting work mixing recurrent network and yolo for object tracking. 77 | 78 | ### Yolo's pretrained weights and cfg files ### 79 | 80 | Find below pretrained weights to be used with its respective networks: 81 | 82 | **To do** 83 | -------------------------------------------------------------------------------- /VDAO_Access/Annotation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | 5 | import utils 6 | 7 | 8 | class Annotation: 9 | """ 10 | This class represents a .txt file containing the labels and bounding 11 | boxes of objects in a given video. 12 | 13 | Developed by: Rafael Padilla 14 | SMT - Signal Multimedia and Telecommunications Lab 15 | COPPE - Universidade Federal do Rio de Janeiro 16 | Last modification: March 9th 2018 17 | """ 18 | 19 | def __init__(self, annotationFilePath=None, totalFrames=None): 20 | self.totalFrames = totalFrames 21 | self.annotationFilePath = annotationFilePath 22 | self.listAnnotation = [] 23 | self.parsed = False 24 | self.error = False 25 | 26 | # Returns True if parse was successfully done, otherwise returns False 27 | def _parseFile(self): 28 | if self.annotationFilePath is None or os.path.exists(self.annotationFilePath) is False: 29 | self.parsed = False 30 | self.error = True 31 | return self.parsed 32 | 33 | # If total number of frames were not provided, read it from the video file 34 | if self.totalFrames is None: 35 | import VDAO_Access.VDAOVideo as VDAOVideo 36 | vdao_video = VDAOVideo.VDAOVideo(self.annotationFilePath.replace('.txt', '.avi')) 37 | self.totalFrames = vdao_video.videoInfo.getNumberOfFrames() 38 | 39 | # (*) Annotation objects (not the file) will always start counting from 1 40 | # it means that if there are 8632 frames, listAnnotation will have 41 | # length of 8633, the first position (annot[0]) will always be discarded 42 | # Annotation file has its first frame count as 0 43 | 44 | # create array from 0 to totalFrames+1 45 | items = self.totalFrames + 1 46 | # self.listAnnotation.clear() 47 | self.listAnnotation = [] 48 | [self.listAnnotation.append([]) for i in range(items)] 49 | 50 | f = open(self.annotationFilePath, "r") 51 | for line in f: 52 | params = line.split(' ') 53 | readFrame = int(params[1]) + 1 #(*) 54 | # Sometimes VDAO annotation file has annotated more frames than the video has. 55 | # Ex: annotation file 'obj-mult-ext-part02-video01.avi' has line: 'greenBox0 29039 0 0 0 0 3', but this file contains only 29034 frames 56 | if readFrame < items: 57 | # class, (x,y,r,b), subObj, 'class (subobj)', frame 58 | self.listAnnotation[readFrame].append([ 59 | params[0], (int(params[2]), int(params[3]), int(params[4]), int(params[5])), 60 | int(params[6]), ('%s (%s)' % (params[0], params[6].replace('\n', ''))), 61 | int(params[1]) 62 | ]) 63 | f.close() 64 | self.parsed = True 65 | self.error = False 66 | return self.parsed 67 | 68 | # Return True if Annotation file is valid, otherwise return False 69 | def IsValid(self): 70 | if self.parsed is False: 71 | return self._parseFile() 72 | else: 73 | return not self.error 74 | 75 | def GetClassesObjects(self): 76 | if self.parsed is False: 77 | self._parseFile() 78 | listObjects = [] 79 | [[listObjects.append(bb[0][0:len(bb[0]) - 1]) for bb in annotation] 80 | for annotation in self.listAnnotation] 81 | return list(set(listObjects)) 82 | 83 | # Ex: [0] = ('shoe0', (a,b,c,d), ..., 1) -> Frame 1 has 'shoe0' in bb (a,b,c,d) 84 | # [1] = ('backpack1', (e,f,g,h), ...,3) -> Frame 3 has 'backpack1' in bb (e,f,g,h) 85 | # [2] = ('backpack1', (e,f,g,h), ...,4) ('bottle1', (i,j,k,l), 4) -> Frame 4 has 'backpack1' in bb (e,f,g,h) and 'bottle1' in bb (i,j,k,l) 86 | def GetNonEmptyFrames(self): 87 | if self.parsed is False: 88 | self._parseFile() 89 | return list(filter(lambda annot: annot != [], (annot for annot in self.listAnnotation))) 90 | 91 | def GetNumberOfAnnotatedFrames(self): 92 | if self.parsed is False: 93 | self._parseFile() 94 | nonEmptyFrames = self.GetNonEmptyFrames() 95 | minObj = (sys.maxsize, -1, -1, -1, -1, -1) # (area,frame,x,y,r,b) 96 | maxObj = (-1, -1, -1, -1, -1, -1) # (area,frame,x,y,r,b) 97 | counted = [] 98 | for nef in nonEmptyFrames: 99 | for f in nef: 100 | counted.append(f[4]) 101 | area = (f[1][0] - f[1][2]) * (f[1][1] - f[1][3]) #(x2-x1)*(y2-y1) 102 | if area < minObj[0]: 103 | minObj = (area, f[4], f[1]) 104 | if area > maxObj[0]: 105 | maxObj = (area, f[4], f[1]) 106 | return [len(set(counted)), min(counted), max(counted), minObj, maxObj] 107 | 108 | @staticmethod 109 | def FilterOnlySpecificObjects(refAnnotation, labels): 110 | if refAnnotation.parsed is False: 111 | refAnnotation._parseFile() 112 | # Create a new annotation object the with the same annotations as the reference one 113 | annot = Annotation() 114 | items = len(refAnnotation.listAnnotation) 115 | [annot.listAnnotation.append([]) for i in range(items)] 116 | 117 | frameNumber = 0 118 | for annotation in refAnnotation.listAnnotation: 119 | if annotation != []: 120 | for label in labels: 121 | for a in annotation: 122 | if a[0].lower().startswith(label.lower()): 123 | annot.listAnnotation[frameNumber].append(a) 124 | frameNumber = frameNumber + 1 125 | # sort by frame position 126 | return annot 127 | 128 | @staticmethod 129 | def FilterOnlyNonOverlappingObjects(refAnnotation): 130 | if refAnnotation.parsed is False: 131 | refAnnotation._parseFile() 132 | 133 | # Create a new annotation object the with the same annotations as the reference one 134 | annot = Annotation() 135 | items = len(refAnnotation.listAnnotation) 136 | [annot.listAnnotation.append([]) for i in range(items)] 137 | 138 | # go through each annotation (bounding box) 139 | boxes = [] 140 | for frameId in range(len(refAnnotation.listAnnotation)): 141 | boxes.clear() 142 | for a in range(len(refAnnotation.listAnnotation[frameId])): 143 | box = refAnnotation.listAnnotation[frameId][a][1] 144 | boxes.append(box) 145 | nonOverlappedBoxes, idx = utils.getNonOverlappedBoxes(boxes) 146 | 147 | for b in range(len(nonOverlappedBoxes)): 148 | annot.listAnnotation[frameId].append(refAnnotation.listAnnotation[frameId][idx[b]]) 149 | return annot 150 | 151 | @staticmethod 152 | def FilterByObjectsArea(refAnnotation, minArea=-1, maxArea=sys.float_info.max): 153 | if refAnnotation.parsed is False: 154 | refAnnotation._parseFile() 155 | # Create a new annotation object the with the same annotations as the reference one 156 | annot = Annotation() 157 | items = len(refAnnotation.listAnnotation) 158 | [annot.listAnnotation.append([]) for i in range(items)] 159 | # Get all annotations that have the specific bounding boxes 160 | for frameId in range(len(refAnnotation.listAnnotation)): 161 | filteredItems = [] 162 | for f in refAnnotation.listAnnotation[frameId]: 163 | area = abs(f[1][0] - f[1][2]) * abs(f[1][1] - f[1][3]) #(x2-x1)*(y2-y1) 164 | if area >= minArea and area <= maxArea: 165 | filteredItems.append(f) 166 | annot.listAnnotation[frameId] = filteredItems 167 | annot.error = False 168 | annot.parsed = True 169 | annot.annotationFilePath = refAnnotation.annotationFilePath 170 | annot.totalFrames = len(annot.listAnnotation) 171 | return annot 172 | 173 | def GetObjectsArea(self, classes_to_filter=None): 174 | if self.parsed is False: 175 | self._parseFile() 176 | 177 | # If no classes are specified, consider all classes in the file 178 | if classes_to_filter is None: 179 | classes_to_filter = self.GetClassesObjects() 180 | 181 | # List containing all areas to be returned 182 | ret_areas_classes = {} 183 | 184 | # Get only annotations of the classes specified in the filter 185 | for _ann in self.listAnnotation: 186 | if _ann == []: 187 | continue 188 | # Get areas of bounding boxes of all classses 189 | areas = [abs(bb[1][0] - bb[1][2]) * abs(bb[1][1] - bb[1][3]) for bb in _ann] 190 | # classes = [bb[0] for bb in _ann] 191 | classes = [re.sub("\d+", "", bb[0]) for bb in _ann] 192 | # Adding classes and quantities to the dictinary 193 | for c, qty in zip(classes, areas): 194 | if c not in ret_areas_classes: 195 | ret_areas_classes[c] = [] 196 | ret_areas_classes[c].append(qty) 197 | 198 | return ret_areas_classes 199 | 200 | def get_object_areas_and_proportions(self, classes_to_filter=None): 201 | if self.parsed is False: 202 | self._parseFile() 203 | 204 | # If no classes are specified, consider all classes in the file 205 | if classes_to_filter is None: 206 | classes_to_filter = self.GetClassesObjects() 207 | 208 | # List containing all areas to be returned 209 | ret_areas_proportions_classes = {} 210 | 211 | # Get only annotations of the classes specified in the filter 212 | for _ann in self.listAnnotation: 213 | if _ann == []: 214 | continue 215 | # Get areas of bounding boxes of all classses 216 | areas = [abs(bb[1][0] - bb[1][2]) * abs(bb[1][1] - bb[1][3]) for bb in _ann] 217 | # proportion: width/height 218 | proportions = [] 219 | for bb in _ann: 220 | width = (bb[1][2] - bb[1][0]) 221 | height = (bb[1][3] - bb[1][1]) 222 | 223 | if height != 0: 224 | prop = abs(width / height) 225 | if prop != 0: 226 | proportions.append(prop) 227 | else: 228 | proportions.append(None) 229 | else: 230 | proportions.append(None) 231 | # classes = [bb[0] for bb in _ann] 232 | classes = [re.sub("\d+", "", bb[0]) for bb in _ann] 233 | # Adding classes and quantities to the dictinary 234 | for c, area, prop in zip(classes, areas, proportions): 235 | if c not in ret_areas_proportions_classes: 236 | ret_areas_proportions_classes[c] = [] 237 | ret_areas_proportions_classes[c].append((area, prop)) 238 | 239 | return ret_areas_proportions_classes 240 | -------------------------------------------------------------------------------- /VDAO_Access/Demo.py: -------------------------------------------------------------------------------- 1 | ########################################################################################### 2 | # Here you can find demo codes showing how to access and manipulate VDAO database. # 3 | # 4 | # Developed by: Rafael Padilla 5 | # SMT - Signal Multimedia and Telecommunications Lab 6 | # COPPE - Universidade Federal do Rio de Janeiro 7 | # Last modification: Dec 17th 2017 8 | ############################################################################################ 9 | import os 10 | import cv2 11 | from VDAOVideo import VDAOVideo 12 | from VDAOHelper import VideoType, ImageExtension 13 | from ObjectHelper import ObjectDatabase 14 | 15 | ####################################################################### 16 | # Example 00: Create the object VDAOVideo 17 | ####################################################################### 18 | # Creating a VDAOVideo object using a video path. 19 | myVideo = VDAOVideo("/home/user/VDAO/ref-mult-ext-part02-video01.avi") 20 | # Creating a VDAOVideo object identifying it as a reference video (it contains no lost objects) 21 | myVideo = VDAOVideo("/home/user/VDAO/ref-mult-ext-part02-video01.avi", VideoType.Reference) 22 | 23 | ####################################################################### 24 | # Example 01: Obtain different information of a video 25 | ####################################################################### 26 | # Get the number of frames 27 | numberOfFrames = myVideo.videoInfo.getNumberOfFrames() 28 | print("Number of frames: "+ str(numberOfFrames)) 29 | # Get frame rate 30 | frameRate = myVideo.videoInfo.getFrameRate() 31 | print("Frame rate: "+ frameRate) 32 | 33 | 34 | ####################################################################### 35 | # Example 02: Obtain all information of a video 36 | ####################################################################### 37 | # Print all information about the video 38 | myVideo.videoInfo.printAllInformation() 39 | 40 | ####################################################################### 41 | # Example 03: Obtaining and saving specific frames within the video 42 | ####################################################################### 43 | # Get every 2 frames starting from 1 to 15 and save them 44 | myVideo.SkipAndSaveFrames(startingFrame=1, endingFrame=15, framesToSkip=2, outputFolder='/home/user/VDAO/outputs', filePrefix='myFrame_', showInfo=True) 45 | 46 | ####################################################################### 47 | # Example 04: Play a VDAO video 48 | ####################################################################### 49 | # Play video showing information about the video 50 | myVideo.PlayVideo(showInfo=True) 51 | # Play video showing information about the video 52 | myVideo.PlayVideo(showInfo=False) 53 | 54 | # Set the folder where the annotation file (.txt) and the video (.avi) are 55 | folder = '/home/user/VDAO' 56 | # Set the paths for the video and its annotation file 57 | videoPath = os.path.join(folder, 'obj-sing-amb-part01-video02.avi') 58 | annotationPath = os.path.join(folder, 'obj-sing-amb-part01-video02.txt') 59 | # Create VDAOvideo object 60 | vdao = VDAOVideo(videoPath, annotationFilePath=annotationPath) 61 | # Play the video setting the parameter showBoundingBoxes to True 62 | vdao.PlayVideo(showBoundingBoxes=True) 63 | 64 | 65 | ####################################################################### 66 | # Example 05: Get the bounding box of a mask 67 | ####################################################################### 68 | # Read image from the ALOI database 69 | mask = cv2.imread('/home/user/ALOI/mask4/1/1_c1.png') 70 | # Use static method to get the ROI of the mask 71 | [min_x, min_y, max_x, max_y] = ObjectDatabase.getBoundingBoxMask(mask) 72 | # Draw rectangle representing the ROI and show it 73 | cv2.rectangle(mask,(min_x,min_y),(max_x,max_y),(0,255,0),1) 74 | # Show image 75 | cv2.imshow('ROI', mask) 76 | cv2.waitKey(0) 77 | 78 | ####################################################################### 79 | # Example 06: Merge an object with its mask 80 | ####################################################################### 81 | # Read image from the ALOI database 82 | myObjectPath = '/home/user/ALOI/png4/259/259_c.png' 83 | myMaskPath = '/home/user/ALOI/mask4/259/259_c.png' 84 | blendedImage = ObjectDatabase.blendImageAndMask(objPath = myObjectPath, maskPath=myMaskPath) 85 | cv2.imshow('Blended image', blendedImage) 86 | cv2.waitKey(0) 87 | 88 | ####################################################################### 89 | # Example 07: Get random ALOI object and merge it with its mask 90 | ####################################################################### 91 | # Creating ObjectDatabase passing the images and masks paths 92 | aloi = ObjectDatabase(imagesPath='/home/user/ALOI/png4', \ 93 | masksPath='/home/user/ALOI/mask4') 94 | # Get a random object and its mask 95 | [mergedImage, (min_x, min_y, max_x, max_y)] = aloi.getRandomObject() 96 | # Draw a green rectangle around the merged image 97 | cv2.rectangle(mergedImage,(min_x,min_y),(max_x,max_y),(0,255,0),1) 98 | cv2.imshow('image', mergedImage) 99 | cv2.waitKey(0) 100 | 101 | ####################################################################### 102 | # FAQ 103 | ####################################################################### 104 | 105 | # Q: How do I slice a video from frame 1 to 1001 skipping every 100 frames and save it in JPEG format? 106 | # Create myVideo object 107 | myVideo = VDAOVideo("/home/user/VDAO/ref-mult-ext-part02-video01.avi") 108 | # Save videos skipping every 100 frames and save it in the output folder 109 | # The jpegQuality can go from 0 to 100 (the higher, the better). Default is 95. 110 | myVideo.SkipAndSaveFrames(startingFrame=1, endingFrame=1001, framesToSkip=100, \ 111 | outputFolder='/home/user/VDAO/outputs', \ 112 | extension=ImageExtension.JPG, jpegQuality=100) 113 | 114 | # Q: How do I slice a video from frame 1 to 1001 skipping every 100 frames and save it in PNG format? 115 | # Create myVideo object 116 | myVideo = VDAOVideo("/home/user/VDAO/ref-mult-ext-part02-video01.avi") 117 | # Save videos skipping every 100 frames and save it in the output folder 118 | # The compressionLevel can go from 0 to 9 (higher value means a smaller size 119 | # and longer compression time). Default is 3. 120 | myVideo.SkipAndSaveFrames(startingFrame=1, endingFrame=1001, framesToSkip=100, \ 121 | outputFolder='/home/user/VDAO/outputs', \ 122 | extension=ImageExtension.PNG, compressionLevel=1) 123 | 124 | #Q: How do I visualize a specific frame within a video? 125 | # Create myVideo object 126 | myVideo = VDAOVideo("/home/user/VDAO/ref-mult-ext-part02-video01.avi") 127 | # Get frame number 530 128 | ret, frame, _ = myVideo.GetFrame(530) 129 | # Check if frame was successfully retrieved 130 | if ret: 131 | # Show frame 132 | cv2.imshow('frame', frame) 133 | cv2.waitKey(0) 134 | 135 | 136 | #Q: How to know which compression method was used in a video? 137 | # Create myVideo object 138 | myVideo = VDAOVideo("/home/user/VDAO/ref-mult-ext-part02-video01.avi") 139 | # Get codec info 140 | codec = myVideo.videoInfo.getCodecLongType() 141 | print('Codec: %s' % codec) 142 | 143 | #Q: How do I get the frame rate of a video? 144 | # Create myVideo object 145 | myVideo = VDAOVideo("/home/user/VDAO/ref-mult-ext-part02-video01.avi") 146 | # Get frame rate 147 | frameRate = myVideo.videoInfo.getFrameRate() 148 | print('Frame rate: %s' % frameRate) 149 | 150 | #Q: How do I get the total number of frame in a video? 151 | # Create myVideo object 152 | myVideo = VDAOVideo("/home/user/VDAO/ref-mult-ext-part02-video01.avi") 153 | # Get frames 154 | totalFrames = myVideo.videoInfo.getNumberOfFrames() 155 | print('Total frames: %s' % totalFrames) 156 | 157 | #Q: How to merge an image and its mask in a single image? 158 | # Get image and mask paths 159 | myObjectPath = '/home/user/ALOI/png4/259/259_c.png' 160 | myMaskPath = '/home/user/ALOI/mask4/259/259_c.png' 161 | # Merge image with its mask 162 | blendedImage = ObjectDatabase.blendImageAndMask(objPath = myObjectPath, maskPath=myMaskPath) 163 | # Show result 164 | cv2.imshow('Blended image', blendedImage) 165 | cv2.waitKey(0) 166 | 167 | #Q: How to obtain a bounding box of a mask? 168 | # Get mask path 169 | myMaskPath = '/home/user/ALOI/mask4/259/259_c.png' 170 | # Get bounding box 171 | [min_x, min_y, max_x, max_y] = ObjectDatabase.getBoundingBoxMask(myMaskPath) 172 | print("Upper left point: (%s, %s)" % (min_x, min_y)) 173 | print("Width: %s" % (max_x-min_x)) 174 | print("Height: %s" % (max_y-min_y)) 175 | 176 | #Q: How do I play a VDAO video? 177 | # Create myVideo object 178 | myVideo = VDAOVideo("/home/user/VDAO/ref-mult-ext-part02-video01.avi") 179 | # Play 180 | myVideo.PlayVideo(showInfo=False) 181 | 182 | #Q: How do I play a VDAO video showing the bounding boxes in the annotation file? 183 | # Create myVideo object 184 | myVideo = VDAOVideo(videoPath="/home/user/VDAO/obj-sing-amb-part01-video02.avi", annotationFilePath='/home/user/VDAO/obj-sing-amb-part01-video02.txt') 185 | # Play 186 | myVideo.PlayVideo(showInfo=False, showBoundingBoxes=True) 187 | 188 | #Q: How do I add an object in an image making a smooth transition in the border regions? 189 | objectPath = '/home/user/ALOI/png4/259/259_c.png' 190 | maskPath = '/home/user/ALOI/mask4/259/259_c.png' 191 | backgroundPath = '/home/user/Backgrounds/bg_1.jpg' 192 | # Set some parameters for the image 193 | scale = 1 194 | angle = 30 195 | # Blend mask, image and background 196 | resultImage, _ = ObjectDatabase.blendImageAndBackground_2(objectPath, maskPath, backgroundPath, xIni=20, yIni=70, scaleFactor=scale, rotAngle=angle) 197 | # Show result 198 | cv2.imshow('final image', resultImage/255) 199 | cv2.waitKey(0) -------------------------------------------------------------------------------- /VDAO_Access/VDAO_files/CheckBar.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | if sys.version_info[0] < 2: # add tkinker depending on the 4 | import Tkinter as tk 5 | else: 6 | import tkinter as tk 7 | 8 | 9 | class CheckBar: 10 | def __init__(self, 11 | parent=None, 12 | picks=[], 13 | maxCol=5, 14 | commands=None, 15 | root=None, 16 | initialState='normal'): 17 | self.DescriptionElementAll = None 18 | self.CommandElementAll = None 19 | self.vars = [] 20 | self.checkbuttons = [] 21 | self.parent = parent 22 | self.maxCol = maxCol 23 | self.allPicks = picks 24 | if commands == None: 25 | self.commands = [None for i in picks] 26 | else: 27 | self.commands = commands 28 | self.maxWidth = len(CheckBar.getMaxWord(self.allPicks)) 29 | self.AddElements() 30 | self.initialState = initialState 31 | if self.initialState == 'disable': 32 | self.DisableAllElements() 33 | 34 | def Add(self, picks=[], anchor=tk.W, commands=None): 35 | # r = int(len(self.allPicks)/self.maxCol) 36 | # c = len(self.allPicks)%self.maxCol 37 | self.RemoveAllElements() 38 | self.allPicks = self.allPicks + picks 39 | if commands != None: 40 | self.commands = self.commands + commands 41 | self.maxWidth = len(CheckBar.getMaxWord(self.allPicks)) 42 | self.AddElements() 43 | 44 | def AddElements(self): 45 | r = c = 0 46 | var = tk.IntVar() 47 | for idx in range(len(self.allPicks)): 48 | var = tk.IntVar() 49 | chk = tk.Checkbutton( 50 | self.parent, 51 | text=' %s' % self.allPicks[idx], 52 | variable=var, 53 | onvalue=1, 54 | offvalue=0, 55 | pady=2, 56 | width=self.maxWidth + 2, 57 | anchor=tk.W, 58 | command=self.commands[idx]) 59 | self.checkbuttons.append(chk) 60 | chk.grid(row=r, column=c) 61 | c = c + 1 62 | if c >= self.maxCol: 63 | c = 0 64 | r = r + 1 65 | self.vars.append(var) 66 | 67 | def AddElementAll(self, description, command=None): 68 | self.RemoveAllElements(removeCommands=False) # dont remove all commands. they'll be used 69 | self.DescriptionElementAll = description 70 | self.CommandElementAll = command 71 | self.commands = [self.AllIsSelected] + [c for c in self.commands] 72 | self.allPicks = [description] + self.allPicks 73 | self.maxWidth = len(CheckBar.getMaxWord(self.allPicks)) 74 | self.AddElements() 75 | if self.initialState == 'disable': #Apply initial state 76 | self.DisableAllElements() 77 | 78 | def RemoveAllElements(self, removeCommands=False): 79 | for widget in self.parent.winfo_children(): 80 | widget.destroy() 81 | self.vars = [] 82 | self.checkbuttons = [] 83 | self.DescriptionElementAll = None 84 | if removeCommands: 85 | self.commands = [] 86 | 87 | def DisableAllElements(self): 88 | for widget in self.parent.winfo_children(): 89 | widget.configure(state='disable') 90 | 91 | def EnableAllElements(self): 92 | for widget in self.parent.winfo_children(): 93 | widget.configure(state='normal') 94 | 95 | def GetStates(self): 96 | myDict = {} 97 | for idx in range(len(self.vars)): 98 | myDict[self.allPicks[idx]] = self.vars[idx].get() 99 | # return map((lambda var: var.get()), self.vars) 100 | return myDict 101 | 102 | def GetOnlySelected(self): 103 | ret = [] 104 | for idx in range(len(self.vars)): 105 | if self.vars[idx].get() == 1: 106 | ret.append(self.allPicks[idx]) 107 | if self.DescriptionElementAll in ret: 108 | ret.remove(self.DescriptionElementAll) 109 | return ret 110 | 111 | def GetOnlySelectedAndEnabled(self): 112 | ret = [] 113 | for idx in range(len(self.vars)): 114 | if self.vars[idx].get() == 1 and self.checkbuttons[idx].cget('state') != 'disabled': 115 | ret.append(self.allPicks[idx]) 116 | if self.DescriptionElementAll in ret: 117 | ret.remove(self.DescriptionElementAll) 118 | return ret 119 | 120 | def AllIsSelected(self): 121 | states = self.GetStates() 122 | if states[next(iter(states))] == 1: 123 | # Uncheck or check all items 124 | for chkButton in self.checkbuttons: 125 | chkButton.select() 126 | else: 127 | # Uncheck or check all items 128 | for chkButton in self.checkbuttons: 129 | chkButton.deselect() 130 | if self.CommandElementAll != None: 131 | self.CommandElementAll() 132 | 133 | @staticmethod 134 | def getMaxWord(words): 135 | if len(words) == 0: 136 | return [] 137 | maxWord = words[0] 138 | for w in words: 139 | if len(w) > len(maxWord): 140 | maxWord = w 141 | return maxWord 142 | -------------------------------------------------------------------------------- /VDAO_Access/VDAO_files/InputWindow.py: -------------------------------------------------------------------------------- 1 | import sys 2 | if sys.version_info[0] <= 2: # add tkinker depending on the 3 | import Tkinter as tk 4 | import ttk 5 | else: 6 | import tkinter as tk 7 | import tkinter.ttk as ttk 8 | 9 | class InputWindow: 10 | 11 | def btnOK_Click(self): 12 | self.Enter_keyDown() 13 | 14 | def btnCancel_Click(self): 15 | self.eventClose(frameNumber=None) 16 | self.root.destroy() 17 | 18 | def Enter_keyDown(self, event=None): 19 | try: 20 | value = int(self.entryVar.get()) 21 | if value < self.minValue or value > self.maxValue: 22 | self.lblFrame.config(foreground="red") 23 | else: 24 | self.lblFrame.config(foreground="black") 25 | self.eventClose(value) 26 | self.root.destroy() 27 | except ValueError: 28 | self.lblFrame.config(foreground="red") 29 | 30 | def Escape_keyDown(self,event=None): 31 | self.root.destroy() 32 | 33 | def __init__(self, parent, eventClose, title="", minValue=0, maxValue=0): 34 | self.root = parent 35 | self.eventClose = eventClose 36 | self.minValue = minValue 37 | self.maxValue = maxValue 38 | # Panel with label and entry box 39 | pnlEntry = tk.PanedWindow(self.root) 40 | pnlEntry.pack(fill=tk.BOTH, expand=True) 41 | # Label 42 | lblInstructions = tk.Label(pnlEntry, text='Enter with the frame number between %d and %d:\n' % (minValue, maxValue)) 43 | lblInstructions.pack(fill=tk.BOTH, expand=True) 44 | # Label 45 | self.lblFrame = tk.Label(pnlEntry, text='Frame: ') 46 | self.lblFrame.pack(side=tk.LEFT) 47 | # Entry 48 | self.entryVar = tk.StringVar() 49 | entry = tk.Entry(pnlEntry, textvariable=self.entryVar) 50 | entry.pack(side=tk.LEFT, anchor=tk.E, fill=tk.BOTH, expand=True) 51 | # Panel with buttons 52 | pnlButtons = tk.PanedWindow(self.root) 53 | pnlButtons.pack(fill=tk.BOTH, expand=True) 54 | # Button OK 55 | btnOk = tk.Button(pnlButtons, text="OK", command=self.btnOK_Click) 56 | btnOk.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) 57 | # Button Cancel 58 | btnCancel = tk.Button(pnlButtons, text="Cancel", command=self.btnCancel_Click) 59 | btnCancel.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True) 60 | # Bind key events 61 | entry.bind('', self.Enter_keyDown) 62 | entry.bind('', self.Escape_keyDown) 63 | # Focus on the entry widget 64 | entry.focus() 65 | 66 | def Center(self, referenceWindow): 67 | # Gets the requested values of the height and widht. 68 | windowWidth = referenceWindow.winfo_reqwidth() 69 | windowHeight = referenceWindow.winfo_reqheight() 70 | # Gets both half the screen width/height and window width/height 71 | positionRight = int(referenceWindow.winfo_screenwidth()/2 - windowWidth/2) 72 | positionDown = int(referenceWindow.winfo_screenheight()/2 - windowHeight/2) 73 | # Positions the window in the center of the page. 74 | self.root.geometry("+{}+{}".format(positionRight, positionDown)) 75 | 76 | # root = tk.Tk() 77 | # inputWindow = InputWindow(parent=root, eventClose=None, title="Entry with a frame number", minValue=0, maxValue=5) 78 | # inputWindow.Center(root) 79 | # root.mainloop() -------------------------------------------------------------------------------- /VDAO_Access/VDAO_files/ListBox.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | import tkinter.font as tkFont 3 | import tkinter.ttk as ttk 4 | from PIL import ImageTk, Image 5 | 6 | class MyListBox: 7 | """use a ttk.TreeView as a multicolumn ListBox""" 8 | def __init__(self, parent, headers, itemSelectedCallBack): 9 | self.parent = parent 10 | self.headers = headers 11 | # Setup Widget 12 | # self.tree = None 13 | # container.pack(fill='both', expand=True) 14 | # create a treeview with dual scrollbars 15 | self.tree = ttk.Treeview(columns=headers, show="headings") 16 | vsb = ttk.Scrollbar(orient="vertical", command=self.tree.yview) 17 | hsb = ttk.Scrollbar(orient="horizontal", command=self.tree.xview) 18 | self.tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set) 19 | self.tree.grid(column=0, row=0, sticky='nsew', in_=self.parent) 20 | vsb.grid(column=1, row=0, sticky='ns', in_=self.parent) 21 | hsb.grid(column=0, row=1, sticky='ew', in_=self.parent) 22 | self.parent.grid_columnconfigure(0, weight=1) 23 | self.parent.grid_rowconfigure(0, weight=1) 24 | self.tree.bind('<>', self.ListItemSelected) 25 | self.itemSelectedCallBack = itemSelectedCallBack 26 | ## Build tree 27 | # Add headers 28 | for col in headers: 29 | self.tree.heading(col, text=col.title(),command=lambda c=col: sortby(self.tree, c, 0)) 30 | # adjust the column's width to the header string 31 | self.tree.column(col, width=tkFont.Font().measure(col.title()), anchor=tk.CENTER) 32 | 33 | def AddItems(self, items): 34 | self.items = items 35 | self.tree.delete(*self.tree.get_children()) 36 | for item in items: 37 | self.tree.insert('', 'end', values=item) 38 | # adjust column's width if necessary to fit each value 39 | for ix, val in enumerate(item): 40 | col_w = tkFont.Font().measure(val) 41 | if self.tree.column(self.headers[ix],width=None) Theres always one reference video (Table_X-Reference_01/) 117 | saveFileDir = os.path.join(directory,'Table_%s-Reference_01' % (tableStr)) 118 | if not os.path.exists(saveFileDir): #create folder if it does not exist 119 | os.makedirs(saveFileDir) 120 | urlToDownload = _videoReferencePath.replace('#NUMTABLE',tableStr) 121 | downloadContent(urlToDownload,os.path.join(directory,saveFileDir)) 122 | # Object video -> Theres always one reference video (Table_X-Reference_01/) 123 | saveFileDir = os.path.join(directory,'Table_%s-Object_01' % (tableStr)) 124 | if not os.path.exists(saveFileDir): #create folder if it does not exist 125 | os.makedirs(saveFileDir) 126 | urlToDownload = _videoObjectPath.replace('#NUMTABLE',tableStr) 127 | downloadContent(urlToDownload,os.path.join(directory,saveFileDir)) 128 | 129 | ##################################################################################### 130 | # Vídeos para treinamento 131 | # http://www02.smt.ufrj.br/~tvdigital/database/objects/page_01.html 132 | ##################################################################################### 133 | # downloadVideosTreinamento() 134 | 135 | 136 | ##################################################################################### 137 | # Vídeos para teste 138 | # http://www02.smt.ufrj.br/~tvdigital/database/research/page_01.html 139 | ##################################################################################### 140 | downloadVideosTeste() -------------------------------------------------------------------------------- /VDAO_Access/images/ex_maskBB.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/VDAO_Access/images/ex_maskBB.jpg -------------------------------------------------------------------------------- /VDAO_Access/images/ex_mergedImages.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/VDAO_Access/images/ex_mergedImages.jpg -------------------------------------------------------------------------------- /VDAO_Access/images/ex_noInfo_frame.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/VDAO_Access/images/ex_noInfo_frame.jpg -------------------------------------------------------------------------------- /VDAO_Access/images/ex_withInfo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/VDAO_Access/images/ex_withInfo.jpg -------------------------------------------------------------------------------- /VDAO_Access/images/ex_withWithoutInfo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/VDAO_Access/images/ex_withWithoutInfo.jpg -------------------------------------------------------------------------------- /VDAO_Access/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/VDAO_Access/images/logo.png -------------------------------------------------------------------------------- /VDAO_Access/images/mask_963_l.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/VDAO_Access/images/mask_963_l.png -------------------------------------------------------------------------------- /VDAO_Access/images/mergedImage.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/VDAO_Access/images/mergedImage.jpg -------------------------------------------------------------------------------- /VDAO_Access/images/original_963_l.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/VDAO_Access/images/original_963_l.png -------------------------------------------------------------------------------- /VDAO_Access/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fnmatch 3 | import cv2 4 | import numpy as np 5 | import itertools 6 | import random 7 | import math 8 | 9 | # Ex: 10 | # in: '/home/rafael/thesis/simulations/data1/test_data/000001.jpg' 11 | # out: '/home/rafael/thesis/simulations/data1/test_data/', '000001.jpg' 12 | def splitPathFile(fileDataPath): 13 | idx = fileDataPath.rfind('/') 14 | p = fileDataPath[:idx+1] #path 15 | f = fileDataPath[idx+1:] #file 16 | return p,f 17 | 18 | # Ex: 19 | # in: '/home/rafael/thesis/simulations/data1/test_data/' 20 | # out: '{ 'home', 'rafael', 'thesis', 'simulations', 'data1', 'test_data' } 21 | def splitPaths(path): 22 | folders = [] 23 | indexes = [i for i, letter in enumerate(path) if letter == '/'] 24 | for i in range(len(indexes)): 25 | if i+1 < len(indexes): 26 | item = path[indexes[i]:indexes[i+1]] 27 | else: 28 | item = path[indexes[i]:] 29 | item = item.replace('/','') 30 | if item != '': 31 | folders.append(item) 32 | return folders 33 | 34 | def getAllFilesRecursively(filePath, extension="*"): 35 | files = [os.path.join(dirpath, f) 36 | for dirpath, dirnames, files in os.walk(filePath) 37 | for f in fnmatch.filter(files, '*.'+extension)] 38 | return files 39 | 40 | # boxA = (Ax1,Ay1,Ax2,Ay2) 41 | # boxB = (Bx1,By1,Bx2,By2) 42 | def boxesIntersect(boxA, boxB): 43 | if boxA[0] > boxB[2]: 44 | return False # boxA is right of boxB 45 | if boxB[0] > boxA[2]: 46 | return False # boxA is left of boxB 47 | if boxA[3] < boxB[1]: 48 | return False # boxA is above boxB 49 | if boxA[1] > boxB[3]: 50 | return False # boxA is below boxB 51 | return True 52 | 53 | # box = (Ax1,Ay1,Ax2,Ay2) 54 | def getArea(box): 55 | return (box[2] - box[0]) * (box[3] - box[1]) 56 | 57 | def getNonOverlappedBoxes(boxes): 58 | if len(boxes) == 1 or boxes == []: 59 | return boxes, [0] 60 | nonOverlappedBoxes = [] 61 | nonOverlappedIdx = [] 62 | # Get combination among all boxes 63 | combinations = list(itertools.combinations(boxes,2)) 64 | # Loop through the pairs 65 | for combination in combinations: 66 | # If boxes do not intersect 67 | if boxesIntersect(combination[0], combination[1]) == False: 68 | if combination[0] not in nonOverlappedBoxes: 69 | nonOverlappedBoxes.append(combination[0]) 70 | nonOverlappedIdx.append(boxes.index(combination[0])) 71 | if combination[1] not in nonOverlappedBoxes: 72 | nonOverlappedBoxes.append(combination[1]) 73 | nonOverlappedIdx.append(boxes.index(combination[1])) 74 | return nonOverlappedBoxes, nonOverlappedIdx 75 | 76 | def getOverlappedBoxes(boxes): 77 | if len(boxes) == 1 or boxes == []: 78 | return [], [] 79 | overlappedBoxes = [] 80 | overlappedIdx = [] 81 | # Get combination among all boxes 82 | combinations = list(itertools.combinations(boxes,2)) 83 | # Loop through the pairs 84 | for combination in combinations: 85 | # If boxes do not intersect 86 | if boxesIntersect(combination[0], combination[1]) == True: 87 | if combination[0] not in overlappedBoxes: 88 | overlappedBoxes.append(combination[0]) 89 | overlappedIdx.append(boxes.index(combination[0])) 90 | if combination[1] not in overlappedBoxes: 91 | overlappedBoxes.append(combination[1]) 92 | overlappedIdx.append(boxes.index(combination[1])) 93 | return overlappedBoxes, overlappedIdx 94 | 95 | def removeIdxList(myList, indexesToRemove): 96 | newList = [] 97 | for idx in range(len(myList)): 98 | # index should not be removed 99 | if idx not in indexesToRemove: 100 | newList.append(myList[idx]) 101 | return newList 102 | 103 | # bgShape = (height, width) 104 | # boxSize = (height, width) 105 | # scaleFator = (minFactor, maxFactor) 106 | # rotationFator = (minAngle, maxAngle) 107 | def getUniqueBoundingBoxes(bgShape, amountBoxes, boxSize, scaleFator=(100,100)): 108 | # Get background dimension 109 | bgHeight = bgShape[0] 110 | bgWidth = bgShape[1] 111 | # Get box original dimension 112 | boxHeight = boxSize[0] 113 | boxWidth = boxSize[1] 114 | 115 | boxes = [] 116 | scales = [] 117 | # for i in range(amountBoxes): 118 | while len(boxes) < amountBoxes: 119 | # Random scale 120 | scale = float(random.randint(scaleFator[0], scaleFator[1]))/100 121 | scales.append(scale) 122 | # Apply random scale 123 | boxHeight = boxSize[0]*scale 124 | boxWidth = boxSize[1]*scale 125 | # Apply random Xint Yint 126 | xPos = random.randint(0,bgWidth-boxWidth) 127 | yPos = random.randint(0,bgHeight-boxHeight) 128 | # Define transformation matrix (rotation and scale) 129 | # transfMatriz = cv2.getRotationMatrix2D(, angle, 1.0) 130 | boxes.append([xPos, yPos, int(xPos+boxWidth), int(yPos+boxHeight)]) 131 | combinations, overlappedIdx = getOverlappedBoxes(boxes) 132 | # remove overlapped 133 | boxes = removeIdxList(boxes, overlappedIdx) 134 | scales = removeIdxList(scales, overlappedIdx) 135 | ## Just display results graphically ## 136 | # img = np.zeros((bgHeight, bgWidth, 3), np.uint8) 137 | # for box in boxes: 138 | # img = add_bb_into_image(img, box, (255,0,0), 1) 139 | # cv2.imshow('a',img) 140 | # cv2.waitKey(0) 141 | return boxes, scales 142 | 143 | def add_bb_into_image(image, boundingBox, color, thickness, label=None): 144 | r = int(color[0]) 145 | g = int(color[1]) 146 | b = int(color[2]) 147 | 148 | font = cv2.FONT_HERSHEY_SIMPLEX 149 | fontScale = 0.5 150 | fontThickness = 1 151 | safetyPixels = 0 152 | 153 | xIn = boundingBox[0] 154 | yIn = boundingBox[1] 155 | cv2.rectangle(image,(boundingBox[0], boundingBox[1]),(boundingBox[2], boundingBox[3]),(b,g,r), thickness) 156 | # Add label 157 | if label != None: 158 | # Get size of the text box 159 | (tw, th) = cv2.getTextSize(label, font, fontScale, fontThickness)[0] 160 | # Top-left coord of the textbox 161 | (xin_bb, yin_bb) = (xIn+thickness, yIn-th+int(12.5*fontScale)) 162 | # Checking position of the text top-left (outside or inside the bb) 163 | if yin_bb - th <= 0: # if outside the image 164 | yin_bb = yIn+th # put it inside the bb 165 | r_Xin = xIn-int(thickness/2) 166 | r_Yin = yin_bb-th-int(thickness/2) 167 | # Draw filled rectangle to put the text in it 168 | cv2.rectangle(image,(r_Xin,r_Yin-thickness), (r_Xin+tw+thickness*3,r_Yin+th+int(12.5*fontScale)), (b,g,r), -1) 169 | cv2.putText(image,label, (xin_bb, yin_bb), font, fontScale, (0,0,0), fontThickness, cv2.LINE_AA) 170 | return image 171 | 172 | # Source: https://www.pyimagesearch.com/2015/09/07/blur-detection-with-opencv/ 173 | # The lower the value, the more blurier the image is 174 | def blur_measurement(image): 175 | if type(image) is str: 176 | if os.path.isfile(image): 177 | image = cv2.imread(image) 178 | else: 179 | raise IOError('It was not possible to load image %s' % image) 180 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 181 | channelR = image[:,:,2] 182 | channelG = image[:,:,1] 183 | channelB = image[:,:,0] 184 | try: 185 | grayVar = cv2.Laplacian(gray, cv2.CV_64F) 186 | grayVar = grayVar.var() 187 | RVar = cv2.Laplacian(channelR, cv2.CV_64F).var() 188 | GVar = cv2.Laplacian(channelG, cv2.CV_64F).var() 189 | BVar = cv2.Laplacian(channelB, cv2.CV_64F).var() 190 | except IOError as e: 191 | print("I/O error({0}): {1}".format(e.errno, e.strerror)) 192 | 193 | return [RVar, GVar, BVar, grayVar] 194 | 195 | def enlargeMask(mask, iterations): 196 | inv_mask = 255 - mask 197 | se = cv2.getStructuringElement(shape=cv2.MORPH_RECT,ksize=(3,3)) 198 | enlargedMask = cv2.erode(src=inv_mask, kernel=se, iterations=iterations) 199 | enlargedMask_bin = enlargedMask/255 200 | diffMask = np.add(enlargedMask, mask) 201 | diffMask_bin = diffMask/255 202 | return enlargedMask, enlargedMask_bin.astype(np.uint8), diffMask, diffMask_bin.astype(np.uint8) 203 | 204 | def euclideanDistance(list1, list2): 205 | # dist = 0 206 | # for i in range(len(vect1)): 207 | # dist = dist + pow(vect1[i]-vect2[i],2) 208 | # return math.sqrt(dist) 209 | # OR 210 | return np.linalg.norm(np.asarray(list1).astype(float)-np.asarray(list2).astype(float)) 211 | 212 | 213 | def psnr(x, y): 214 | mse = np.mean((x - y) ** 2) 215 | if mse == 0: 216 | return 100 217 | PIXEL_MAX = 255.0 218 | return 20 * math.log10(PIXEL_MAX / math.sqrt(mse)) 219 | 220 | def secsToMinSecMs(seconds): 221 | frac,whole = math.modf(round(seconds/60,9)) 222 | _min = str(whole).replace('.0','') #minutes 223 | frac,whole = math.modf(frac*60) 224 | _sec = str(whole).replace('.0','') #seconds 225 | _ms = str(round(frac*1000,2)) #milliseconds 226 | return '%s min %s sec %s ms' % (_min, _sec, _ms) 227 | 228 | -------------------------------------------------------------------------------- /VDAO_Alignment.md: -------------------------------------------------------------------------------- 1 | 2 | # VDAO Alignment 3 | 4 | | # | Type | Illumination | 5 | | :---: | :---: | :---: | 6 | | Table 01 | [Single object 1](#table_01) | Normal | 7 | | Table 02 | [Single object 2](#table_02) | Normal | 8 | | Table 03 | [Single object 3](#table_03) | Normal | 9 | | Table 04 | [Single object 4](#table_04) | Extra | 10 | | Table 05 | [Single object 5](#table_05) | Extra | 11 | | Table 06 | [Single object 6](#table_06) | Extra | 12 | | Table 07 | [Multiple objects 1](#table_07) | Normal | 13 | | Table 08 | [Multiple objects 2](#table_08) | Normal | 14 | | Table 09 | [Multiple objects 3](#table_09) | Extra | 15 | | Table 10 | [Multiple objects 4](#table_10) | Extra | 16 | 17 | 18 | ## Table 01 ## 19 |

20 |

21 | 22 | 23 | ## Table 02 ## 24 |

25 |

26 | 27 | **Observations:** 28 | * Object 1 is already present in the initial frame of the Path 1 (going). 29 | 30 | 31 | ## Table 03 ## 32 |

33 |

34 | 35 | **Observations:** 36 | * Object 9 is already present in the initial frame of the Path 1 (going). 37 | * Object 12 is already present in the initial frame of the Path 1 (going). 38 | * Around frame 2236 of the Object 16, Path 1 (going), there is a sudden change of illumination that makes the video grayscale from that frame on. 39 | 40 | 41 | ## Table 04 ## 42 |

43 |

44 | 45 | **Observations:** 46 | * Object 4 is already present in the initial frame of the Path 1 (going). 47 | 48 | 49 | ## Table 05 ## 50 |

51 |

52 | 53 | 54 | ## Table 06 ## 55 |

56 |

57 | 58 | **Observations:** 59 | * Object 11 is already present in the initial frame of the Path 1 (going). 60 | 61 | 62 | ## Table 07 ## 63 |

64 |

65 | 66 | **Observations:** 67 | * Reference video already contains one more path (going), but it is not complete. Therefore it was discarded. 68 | * Object 1 is already present in the initial frame of the Path 1 (going) and Path Path 3 (going). 69 | * Object 1 has no annottion file, informing the bounding boxes of the objects. 70 | 71 | 72 | ## Table 08 ## 73 |

74 |

75 | 76 | **Observations:** 77 | * Object 1 is already present in the initial frame of the Path 1 (going) and Path Path 3 (going). 78 | * Object 2 is already present in the initial frame of the Path 1 (going) and Path Path 3 (going). 79 | * Object 2 video already contains one more path (going), but it is not complete. Therefore it was discarded. 80 | 81 | 82 | ## Table 09 ## 83 |

84 |

85 | 86 | 87 | ## Table 10 ## 88 |

89 |

90 | 91 | ----------------- 92 | 93 | The complete list with aligned frames used for training (**object group**) can be downloaded [here](https://github.com/rafaelpadilla/DeepLearning-VDAO/blob/master/aligned_frames_object.zip). 94 | 95 | The complete list with aligned frames used for testing/validation (**research groun**) can be downloaded [here](https://github.com/rafaelpadilla/DeepLearning-VDAO/blob/master/aligned_frames_research.zip). 96 | -------------------------------------------------------------------------------- /aligned_frames_object.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/aligned_frames_object.zip -------------------------------------------------------------------------------- /aligned_frames_research.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/aligned_frames_research.zip -------------------------------------------------------------------------------- /data_aug_env.yml: -------------------------------------------------------------------------------- 1 | name: data_aug 2 | channels: 3 | - conda-forge 4 | - anaconda 5 | - defaults 6 | dependencies: 7 | - _libgcc_mutex=0.1=main 8 | - attrs=19.1.0=py37_1 9 | - backcall=0.1.0=py37_0 10 | - bleach=3.1.0=py37_0 11 | - bzip2=1.0.8=h516909a_0 12 | - ca-certificates=2019.5.15=0 13 | - cairo=1.16.0=h18b612c_1001 14 | - certifi=2019.6.16=py37_0 15 | - cycler=0.10.0=py_1 16 | - dbus=1.13.6=he372182_0 17 | - decorator=4.4.0=py37_1 18 | - defusedxml=0.6.0=py_0 19 | - entrypoints=0.3=py37_1000 20 | - expat=2.2.5=he1b5a44_1003 21 | - ffmpeg=4.1.3=h167e202_0 22 | - flake8=3.7.7=py37_0 23 | - fontconfig=2.13.1=he4413a7_1000 24 | - freetype=2.10.0=he983fc9_0 25 | - gettext=0.19.8.1=hc5be6a0_1002 26 | - giflib=5.1.9=h516909a_0 27 | - glib=2.58.3=h6f030ca_1002 28 | - gmp=6.1.2=hf484d3e_1000 29 | - gnutls=3.6.5=hd3a4fd2_1002 30 | - graphite2=1.3.13=hf484d3e_1000 31 | - gst-plugins-base=1.14.5=h0935bb2_0 32 | - gstreamer=1.14.5=h36ae1b5_0 33 | - harfbuzz=2.4.0=h37c48d4_1 34 | - hdf5=1.10.5=nompi_h3c11f04_1100 35 | - icu=58.2=hf484d3e_1000 36 | - imageio=2.5.0=py37_0 37 | - ipykernel=5.1.1=py37h39e3cac_0 38 | - ipython=7.6.1=py37h39e3cac_0 39 | - ipython_genutils=0.2.0=py37_0 40 | - ipywidgets=7.5.0=py_0 41 | - jasper=1.900.1=h07fcdf6_1006 42 | - jedi=0.13.3=py37_0 43 | - jinja2=2.10.1=py37_0 44 | - jpeg=9c=h14c3975_1001 45 | - jsonschema=3.0.1=py37_0 46 | - jupyter_client=5.3.1=py_0 47 | - jupyter_core=4.5.0=py_0 48 | - kiwisolver=1.1.0=py37hc9558a2_0 49 | - lame=3.100=h14c3975_1001 50 | - libblas=3.8.0=10_openblas 51 | - libcblas=3.8.0=10_openblas 52 | - libedit=3.1.20181209=hc058e9b_0 53 | - libffi=3.2.1=hd88cf55_4 54 | - libgcc-ng=9.1.0=hdf63c60_0 55 | - libgfortran-ng=7.3.0=hdf63c60_0 56 | - libiconv=1.15=h516909a_1005 57 | - liblapack=3.8.0=10_openblas 58 | - liblapacke=3.8.0=10_openblas 59 | - libopenblas=0.3.6=h6e990d7_4 60 | - libpng=1.6.37=hed695b0_0 61 | - libsodium=1.0.16=h1bed415_0 62 | - libstdcxx-ng=9.1.0=hdf63c60_0 63 | - libtiff=4.0.10=h57b8799_1003 64 | - libuuid=2.32.1=h14c3975_1000 65 | - libwebp=1.0.2=h576950b_1 66 | - libxcb=1.13=h14c3975_1002 67 | - libxml2=2.9.9=h13577e0_2 68 | - lz4-c=1.8.3=he1b5a44_1001 69 | - markupsafe=1.1.1=py37h7b6447c_0 70 | - matplotlib=3.1.1=py37_0 71 | - matplotlib-base=3.1.1=py37hfd891ef_0 72 | - mccabe=0.6.1=py_1 73 | - mistune=0.8.4=py37h7b6447c_0 74 | - nb_conda=2.2.1=py37_0 75 | - nb_conda_kernels=2.2.2=py37_0 76 | - nbconvert=5.5.0=py_0 77 | - nbformat=4.4.0=py37_0 78 | - ncurses=6.1=he6710b0_1 79 | - nettle=3.4.1=h1bed415_1002 80 | - notebook=6.0.0=py37_0 81 | - numpy=1.16.4=py37h95a1406_0 82 | - olefile=0.46=py37_0 83 | - openblas=0.3.6=h6e990d7_4 84 | - opencv=4.1.0=py37h3aa1047_5 85 | - openh264=1.8.0=hdbcaa40_1000 86 | - openssl=1.1.1=h7b6447c_0 87 | - pandas=0.24.2=py37he6710b0_0 88 | - pandoc=2.2.3.2=0 89 | - pandocfilters=1.4.2=py37_1 90 | - parso=0.5.0=py_0 91 | - pcre=8.41=hf484d3e_1003 92 | - pexpect=4.7.0=py37_0 93 | - pickleshare=0.7.5=py37_0 94 | - pillow=6.1.0=py37h34e0f95_0 95 | - pip=19.1.1=py37_0 96 | - pixman=0.38.0=h516909a_1003 97 | - prometheus_client=0.7.1=py_0 98 | - prompt_toolkit=2.0.9=py37_0 99 | - pthread-stubs=0.4=h14c3975_1001 100 | - ptyprocess=0.6.0=py37_0 101 | - pycodestyle=2.5.0=py_0 102 | - pyflakes=2.1.1=py_0 103 | - pygments=2.4.2=py_0 104 | - pyparsing=2.4.1=py_0 105 | - pyqt=5.9.2=py37hcca6a23_0 106 | - pyrsistent=0.14.11=py37h7b6447c_0 107 | - python=3.7.3=h0371630_0 108 | - python-dateutil=2.8.0=py37_0 109 | - pytz=2019.1=py_0 110 | - pyzmq=18.0.0=py37he6710b0_0 111 | - qt=5.9.7=h52cfd70_2 112 | - readline=7.0=h7b6447c_5 113 | - send2trash=1.5.0=py37_0 114 | - setuptools=41.0.1=py37_0 115 | - sip=4.19.8=py37hf484d3e_1000 116 | - six=1.12.0=py37_0 117 | - sqlite=3.29.0=h7b6447c_0 118 | - terminado=0.8.2=py37_0 119 | - testpath=0.4.2=py37_0 120 | - tk=8.6.9=hed695b0_1002 121 | - tornado=6.0.3=py37h516909a_0 122 | - traitlets=4.3.2=py37_0 123 | - wcwidth=0.1.7=py37_0 124 | - webencodings=0.5.1=py37_1 125 | - wheel=0.33.4=py37_0 126 | - widgetsnbextension=3.5.0=py37_0 127 | - x264=1!152.20180806=h14c3975_0 128 | - xorg-kbproto=1.0.7=h14c3975_1002 129 | - xorg-libice=1.0.10=h516909a_0 130 | - xorg-libsm=1.2.3=h84519dc_1000 131 | - xorg-libx11=1.6.8=h516909a_0 132 | - xorg-libxau=1.0.9=h14c3975_0 133 | - xorg-libxdmcp=1.1.3=h516909a_0 134 | - xorg-libxext=1.3.4=h516909a_0 135 | - xorg-libxrender=0.9.10=h516909a_1002 136 | - xorg-renderproto=0.11.1=h14c3975_1002 137 | - xorg-xextproto=7.3.0=h14c3975_1002 138 | - xorg-xproto=7.0.31=h14c3975_1007 139 | - xz=5.2.4=h14c3975_4 140 | - yapf=0.27.0=py_0 141 | - zeromq=4.3.1=he6710b0_3 142 | - zlib=1.2.11=h7b6447c_3 143 | - zstd=1.4.0=h3b9ef0a_0 144 | prefix: /home/rafael/anaconda3/envs/data_aug 145 | 146 | -------------------------------------------------------------------------------- /images/aux_images/output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/aux_images/output.jpg -------------------------------------------------------------------------------- /images/aux_images/output_list.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/aux_images/output_list.jpg -------------------------------------------------------------------------------- /images/aux_images/result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/aux_images/result.jpg -------------------------------------------------------------------------------- /images/ex_frames_reference.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/ex_frames_reference.jpg -------------------------------------------------------------------------------- /images/ex_frames_target.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/ex_frames_target.jpg -------------------------------------------------------------------------------- /images/ex_withBBfromAnnotation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/ex_withBBfromAnnotation.jpg -------------------------------------------------------------------------------- /images/table01_video_alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/table01_video_alignment.png -------------------------------------------------------------------------------- /images/table02_video_alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/table02_video_alignment.png -------------------------------------------------------------------------------- /images/table03_video_alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/table03_video_alignment.png -------------------------------------------------------------------------------- /images/table04_video_alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/table04_video_alignment.png -------------------------------------------------------------------------------- /images/table05_video_alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/table05_video_alignment.png -------------------------------------------------------------------------------- /images/table06_video_alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/table06_video_alignment.png -------------------------------------------------------------------------------- /images/table07_video_alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/table07_video_alignment.png -------------------------------------------------------------------------------- /images/table08_video_alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/table08_video_alignment.png -------------------------------------------------------------------------------- /images/table09_video_alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/table09_video_alignment.png -------------------------------------------------------------------------------- /images/table10_video_alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/table10_video_alignment.png -------------------------------------------------------------------------------- /images/yolo_youtube.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafaelpadilla/DeepLearning-VDAO/ae2fbd11487e2de4db6c3a7608bdb467fe13baa1/images/yolo_youtube.jpg --------------------------------------------------------------------------------