├── __init__.py ├── vgg_trainable ├── __init__.py ├── test │ ├── __init__.py │ ├── parse_sptam │ ├── show_clip.py │ ├── plot_points.py │ ├── relative_to_kitti_format.py │ ├── plot_traj.py │ ├── test_model.py │ ├── show_traj_kitti.py │ └── plotHelpers.py └── vgg.py ├── wgan └── tflib │ ├── ops │ ├── __init__.py │ ├── layernorm.py │ ├── cond_batchnorm.py │ ├── deconv2d.py │ ├── conv1d.py │ ├── conv2d.py │ ├── batchnorm.py │ └── linear.py │ ├── small_imagenet.py │ ├── cifar10.py │ ├── plot.py │ ├── save_images.py │ ├── mnist.py │ ├── inception_score.py │ └── __init__.py ├── .dockerignore ├── _config.yml ├── images-dir └── README.md ├── requirements.txt ├── docker-compose.yml ├── Dockerfile ├── eval_kitti ├── README ├── mail.h ├── matrix.h └── readme.txt ├── LICENSE.txt ├── Makefile ├── array_utils.py ├── .gitignore ├── outliers.py ├── play_images.py ├── image.py ├── download.sh ├── geometry.py ├── project_laser_into_camera.py ├── triangulate.py ├── lie_algebra.py ├── transform.py ├── adapt_images.py ├── camera_model.py ├── Dependencies.md ├── README.md ├── interpolate_poses.py └── eval_utils.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vgg_trainable/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /wgan/tflib/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | images-dir/ 2 | -------------------------------------------------------------------------------- /vgg_trainable/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-slate -------------------------------------------------------------------------------- /images-dir/README.md: -------------------------------------------------------------------------------- 1 | Directorio para guardar las imagenes y el modelo a correr 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.16.3 2 | scipy==1.1.0 3 | pillow 4 | imageio==2.5 5 | colour-science 6 | colour-demosaicing 7 | matplotlib 8 | opencv-python==3.1.0.0 9 | -------------------------------------------------------------------------------- /vgg_trainable/test/parse_sptam: -------------------------------------------------------------------------------- 1 | #cat $1 | grep 'TRACKED_FRAME_POSE' | sed 's/ /,/g' | while read -r line; do arr=$(echo "$line" | sed 's/,/ /g'); arr=($arr); echo ${arr[@]:2:12}; done 2 | cat $1 | grep 'TRACKED_FRAME_POSE' | while read -r line; do arr=($line); echo ${arr[@]:2:12}; done 3 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2.3' 2 | services: 3 | wganvo: 4 | container_name: wganvo-docker 5 | entrypoint: /bin/bash 6 | stdin_open: true # Con estas dos lineas, al hacer docker-compose up -d (o make start) el container 7 | tty: true # queda corriendo, caso contrario, finalizaba de inmediato 8 | build: . 9 | runtime: nvidia 10 | volumes: 11 | - .:/app 12 | - ./images-dir:/var/kitti 13 | #volumes: 14 | # - /var 15 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:1.8.0-devel-gpu 2 | WORKDIR /app 3 | COPY . /app 4 | RUN ln -s /usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/libcuda.so \ 5 | /usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/libcuda.so.1 && \ 6 | apt-get update && \ 7 | apt-get install -y python-tk && \ 8 | pip install --trusted-host pypi.python.org -r requirements.txt 9 | 10 | ENV LD_LIBRARY_PATH="/usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/:${LD_LIBRARY_PATH}" 11 | #RUN mkdir -p /var/kitti-images/ 12 | #VOLUME /var/kitti-images/ 13 | #CMD ["/bin/bash"] 14 | -------------------------------------------------------------------------------- /eval_kitti/README: -------------------------------------------------------------------------------- 1 | Para correr: 2 | En el dir raiz tener la siguiente estructura: 3 | data/odometry/poses/ -> aqui va el "ground truth" de 11-21, obtenido por ej. con SPTAM, o con algún método a comparar. Los archivos se nombrar como xx.txt, donde xx es el numero de secuencia (11-21) 4 | results/poses/data/ -> aqui va nuestra estimacion. Los archivos se nombran de la misma forma. 5 | 6 | Compilar con: 7 | g++ -O3 -DNDEBUG -o evaluate_odometry evaluate_odometry.cpp matrix.cpp 8 | 9 | Correr con: 10 | ./evaluate_odometry poses 11 | 12 | (poses es el nombre de la subcarpeta de results) 13 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | wganvo is released under a GPLv3 license (see License-gpl.txt), except for some files. 2 | The scripts that are used to pre-process the images are released under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (see License-CCBYNCSA4.txt). 3 | These scripts are: 4 | * adapt_images.py 5 | * adapt_images_kitti.py 6 | * build_pointcloud.py 7 | * camera_model.py 8 | * image.py 9 | * interpolate_poses.py 10 | * play_images.py 11 | * project_laser_into_camera.py 12 | * transform.py 13 | 14 | Please see Dependencies.md for a list of all the included code and library dependencies which are not property of the authors of wganvo. 15 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | help: 2 | @echo "help -- print this help" 3 | @echo "start -- start docker stack" 4 | @echo "stop -- stop docker stack" 5 | @echo "ps -- show status" 6 | @echo "top -- displays the running processes" 7 | @echo "clean -- clean all artifacts" 8 | @echo "shell -- run bash inside docker" 9 | @echo "image -- create ymy docker image" 10 | 11 | start: 12 | docker-compose up -d 13 | 14 | stop: 15 | docker-compose stop 16 | 17 | ps: 18 | docker-compose ps 19 | 20 | top: 21 | docker-compose top 22 | 23 | clean: stop 24 | docker-compose rm --force -v 25 | 26 | shell: 27 | docker exec -it wganvo-docker bash 28 | 29 | image: 30 | docker-compose build 31 | 32 | .PHONY: help start stop ps top clean shell image 33 | 34 | -------------------------------------------------------------------------------- /array_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 5 | # For more information see 6 | # 7 | # wganvo is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # wganvo is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with wganvo. If not, see . 19 | 20 | import numpy as np 21 | 22 | def save_txt(name, array, fmt='%1.6f'): 23 | np.savetxt(name + '.txt', array, delimiter=' ', fmt=fmt) 24 | 25 | def load(name): 26 | return np.loadtxt(name, delimiter=' ') 27 | 28 | def save_as_list(name, array, fmt='%1.6f'): 29 | np.savetxt(name + '.txt', array.ravel(), delimiter=' ', fmt=fmt) 30 | 31 | def list_to_array(list): 32 | return np.array(list) 33 | 34 | def save_npy(name, arr): 35 | np.save(name, arr) 36 | 37 | def load_npy(name): 38 | return np.load(name) -------------------------------------------------------------------------------- /vgg_trainable/test/show_clip.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 5 | # For more information see 6 | # 7 | # wganvo is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # wganvo is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with wganvo. If not, see . 19 | # 20 | 21 | import numpy as np 22 | import matplotlib.pyplot as plt 23 | from scipy import linalg 24 | import argparse 25 | import sys, os, inspect 26 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 27 | parentdir = os.path.dirname(currentdir) 28 | sys.path.insert(0,parentdir) 29 | from input_data import read_data_sets, DataSet 30 | 31 | def show(images): 32 | artist = plt.imshow(images[0], cmap='gray') 33 | for img in images: 34 | artist.set_data(img) 35 | plt.xticks([]) 36 | plt.yticks([]) 37 | plt.pause(0.01) 38 | 39 | def main(): 40 | images,_,_,_,_ = read_data_sets(FLAGS.img_file) 41 | show(images[...,1]) 42 | 43 | 44 | if __name__ == '__main__': 45 | parser = argparse.ArgumentParser() 46 | parser.add_argument( 47 | 'img_file', 48 | type=str, 49 | help='Images file' 50 | ) 51 | FLAGS, unparsed = parser.parse_known_args() 52 | main() 53 | 54 | 55 | -------------------------------------------------------------------------------- /vgg_trainable/test/plot_points.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 5 | # For more information see 6 | # 7 | # wganvo is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # wganvo is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with wganvo. If not, see . 19 | # 20 | 21 | import tensorflow as tf 22 | import numpy as np 23 | import argparse 24 | import sys, os 25 | import matplotlib.pyplot as plt 26 | 27 | def plot(X_axis, Y_axis, xlabel, ylabel): 28 | fig, ax = plt.subplots() 29 | ax.plot(X_axis, Y_axis, 'r.') 30 | ax.set_xlabel(xlabel) 31 | ax.set_ylabel(ylabel) 32 | #plt.show() 33 | return fig, ax 34 | # 35 | 36 | def main(_): 37 | points = np.loadtxt(FLAGS.file, delimiter=' ') 38 | X_axis = points[:, 0] 39 | Y_axis = points[:, 1] 40 | fig, ax = plot(X_axis, Y_axis, "frames", "distance(m)") 41 | fig.savefig(os.path.join(FLAGS.output_dir, 'frames_vs_dist.png')) 42 | 43 | if __name__ == '__main__': 44 | parser = argparse.ArgumentParser() 45 | parser.add_argument( 46 | 'file', 47 | type=str, 48 | help='File' 49 | ) 50 | parser.add_argument( 51 | '--output_dir', 52 | type=str, 53 | default=os.getcwd(), 54 | help='Output Dir' 55 | ) 56 | FLAGS, unparsed = parser.parse_known_args() 57 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *.cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # Jupyter Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # SageMath parsed files 79 | *.sage.py 80 | 81 | # Environments 82 | .env 83 | .venv 84 | env/ 85 | venv/ 86 | ENV/ 87 | 88 | # Spyder project settings 89 | .spyderproject 90 | .spyproject 91 | 92 | # Rope project settings 93 | .ropeproject 94 | 95 | # mkdocs documentation 96 | /site 97 | 98 | # mypy 99 | .mypy_cache/ 100 | 101 | # NetBeans Project 102 | nbproject/ 103 | .idea/ 104 | 105 | # Temporary Files 106 | *~ 107 | 108 | 109 | .idea/ 110 | 111 | 112 | # Generated Images 113 | *.jpg 114 | 115 | *.npz 116 | *.png 117 | *.txt 118 | 119 | # Directorio de trabajo 120 | /images-dir 121 | -------------------------------------------------------------------------------- /vgg_trainable/test/relative_to_kitti_format.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 5 | # For more information see 6 | # 7 | # wganvo is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # wganvo is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with wganvo. If not, see . 19 | # 20 | 21 | import os, sys, inspect 22 | 23 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 24 | parentdir = os.path.dirname(currentdir) 25 | sys.path.insert(0, parentdir) 26 | gparentdir = os.path.dirname(parentdir) 27 | sys.path.insert(0, gparentdir) 28 | import eval_utils 29 | from plot_traj import read, plot 30 | import argparse 31 | import numpy as np 32 | 33 | # Instantiate the parser 34 | parser = argparse.ArgumentParser(description='Optional app description') 35 | parser.add_argument('poses', type=str, 36 | help='Poses file') 37 | parser.add_argument("--mode", help="inv = invert the transformation", 38 | default='ninv', choices=["inv", "ninv"]) 39 | 40 | args = parser.parse_args() 41 | 42 | data = read(args.poses, delimiter=' ') 43 | 44 | current = np.matrix(np.identity(4)) 45 | current = current[0:3, :] 46 | num_examples = len(data) 47 | # ts = np.empty((num_examples,12)) 48 | i = 0 49 | inv = args.mode == 'inv' 50 | ts = eval_utils.get_absolute_poses(data.reshape(-1, 3, 4), inv=inv) 51 | ts = ts.reshape(-1, 12) 52 | # for t in data: 53 | # T = np.matrix(np.identity(4)) 54 | # T[0:3,:] = t.reshape(3,4) 55 | # if args.mode == 'inv': 56 | # transformation = np.linalg.inv(T) 57 | # else: 58 | # transformation = T 59 | # current = current * transformation 60 | # ts[i] = current.reshape(12) 61 | # i += 1 62 | np.savetxt('abs.poses.txt', ts, delimiter=' ') 63 | -------------------------------------------------------------------------------- /vgg_trainable/test/plot_traj.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 5 | # For more information see 6 | # 7 | # wganvo is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # wganvo is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with wganvo. If not, see . 19 | # 20 | 21 | from mpl_toolkits.mplot3d import axes3d 22 | import numpy as np 23 | import matplotlib.pyplot as plt 24 | 25 | 26 | def read(filename, delimiter=','): 27 | return np.genfromtxt(filename, delimiter=delimiter) 28 | 29 | 30 | def plot(array): 31 | fig = plt.figure() 32 | ax = fig.add_subplot(111, projection='3d') # 111 means "1x1 grid, first subplot" 33 | p = ax.plot(array[:, 0], array[:, 1], array[:, 2], label='target') 34 | ax.set_xlabel('x') 35 | ax.set_ylabel('y') 36 | ax.set_zlabel('z') 37 | plt.legend() 38 | plt.show() 39 | 40 | 41 | def main(): 42 | import transformations 43 | data = read('vo.csv') 44 | data = data[1:len(data), 2:8] 45 | 46 | current = np.array([0., 0., 0.]) # .transpose() 47 | # current = np.matrix(np.identity(4)) 48 | num_examples = len(data) 49 | ts = np.empty((num_examples, 3)) 50 | poses = np.empty((num_examples, 12)) 51 | i = 0 52 | for t in data: 53 | # Devuelve una matriz 4x4 54 | # t[3] = roll, t[4] = pitch, t[5] = yaw 55 | T = transformations.euler_matrix(t[3], t[4], t[5], 'sxyz') 56 | T[0:3, 3] = t[0:3] 57 | current = t[0:3] + current # np.linalg.inv(T) *current #np.linalg.inv(T) * current 58 | ts[i] = current # [0:3,3].transpose() 59 | # poses[i] = current[0:3,:].reshape(12) 60 | i += 1 61 | 62 | np.savetxt("poses.txt", poses, delimiter=" ") 63 | plot(ts) 64 | 65 | 66 | if __name__ == "__main__": 67 | main() 68 | -------------------------------------------------------------------------------- /outliers.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 5 | # For more information see 6 | # 7 | # wganvo is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # wganvo is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with wganvo. If not, see . 19 | 20 | import numpy as np 21 | import matplotlib.pyplot as plt 22 | 23 | 24 | def reject_outliers(data, m=2.): 25 | mask = mask_outliers(data, m) 26 | return data[mask] 27 | 28 | 29 | def mask_outliers(data, m): 30 | d = np.abs(data - np.median(data)) 31 | mdev = np.median(d) 32 | s = d / mdev if mdev else 0. 33 | return s < m 34 | 35 | 36 | # data: Nx3xP 37 | def print_points(data): 38 | plt.subplot(131) 39 | plt.plot(data[:, 0, :]) 40 | plt.subplot(132) 41 | plt.plot(data[:, 1, :]) 42 | plt.subplot(133) 43 | plt.plot(data[:, 2, :]) 44 | plt.show() 45 | 46 | 47 | def load(path): 48 | return np.load(path) 49 | 50 | # def fix_array(m, K, N): 51 | # new_m = np.empty((m.shape[0], m.shape[1], 25)) 52 | # for idx, points in enumerate(m): 53 | # points_h = np.ones((4, 150)) 54 | # points_h[0:3, :] = points 55 | # x1 = np.matmul(K, points_h) 56 | # x1 /= x1[2] 57 | # c_mask = center_crop_mask(x1) 58 | # points = points[:, c_mask] 59 | # front_mask = in_front_of_cam_mask(points, 0.) 60 | # points = points[:, front_mask] 61 | # replace = points.shape[1] <= N 62 | # random_selection = np.random.choice(points.shape[1], N, replace=replace) 63 | # points = points[:3, random_selection] 64 | # new_m[idx] = points 65 | # return new_m 66 | 67 | if __name__ == "__main__": 68 | m = load('/home/jcremona/output/02/points.npy') 69 | print_points(m) 70 | # m.shape -> Nx3xP 71 | # new_m = fix_array(m, K, N) 72 | #np.save('/home/jcremona/output/09/points.npy', new_m) 73 | 74 | # Take some example 75 | X = m[432] 76 | for i in range(3): 77 | # Each axis (X,Y,Z) is filtered by mask_outliers 78 | mask = mask_outliers(X[i], 1000) 79 | X = X[:, mask] 80 | print(X.shape) -------------------------------------------------------------------------------- /eval_kitti/mail.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of wganvo. 3 | * This file belongs to the authors of KITTI (http://www.cvlibs.net/datasets/kitti/eval_odometry.php) 4 | * 5 | * Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | * For more information see 7 | * 8 | * wganvo is free software: you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation, either version 3 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * wganvo is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with wganvo. If not, see . 20 | */ 21 | 22 | #ifndef MAIL_H 23 | #define MAIL_H 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | class Mail { 31 | 32 | public: 33 | 34 | Mail (std::string email = "",std::string from = "noreply@cvlibs.net",std::string subject = "KITTI Evaluation Benchmark") { 35 | if (email.compare("")) { 36 | char cmd[2048]; 37 | sprintf(cmd,"/usr/lib/sendmail -t -f noreply@cvlibs.net"); 38 | mail = popen(cmd,"w"); 39 | fprintf(mail,"To: %s\n", email.c_str()); 40 | fprintf(mail,"From: %s\n", from.c_str()); 41 | fprintf(mail,"Subject: %s\n", subject.c_str()); 42 | fprintf(mail,"\n\n"); 43 | } else { 44 | mail = 0; 45 | } 46 | } 47 | 48 | ~Mail() { 49 | if (mail) { 50 | pclose(mail); 51 | } 52 | } 53 | 54 | void msg (const char *format, ...) { 55 | va_list args; 56 | va_start(args,format); 57 | if (mail) { 58 | vfprintf(mail,format,args); 59 | fprintf(mail,"\n"); 60 | } 61 | vprintf(format,args); 62 | printf("\n"); 63 | va_end(args); 64 | } 65 | 66 | void msg (std::string str) { 67 | if (mail) { 68 | fprintf(mail,"%s\n",str.c_str()); 69 | } 70 | printf("%s\n",str.c_str()); 71 | } 72 | 73 | void finalize (bool success,std::string benchmark,std::string result_sha="",std::string user_sha="") { 74 | if (success) { 75 | msg("Your evaluation results are available at:"); 76 | msg("http://www.cvlibs.net/datasets/kitti/user_submit_check_login.php?benchmark=%s&user=%s&result=%s",benchmark.c_str(),user_sha.c_str(), result_sha.c_str()); 77 | } else { 78 | msg("An error occured while processing your results."); 79 | msg("Please make sure that the data in your zip archive has the right format!"); 80 | } 81 | } 82 | 83 | private: 84 | 85 | FILE *mail; 86 | 87 | }; 88 | 89 | #endif 90 | -------------------------------------------------------------------------------- /wgan/tflib/ops/layernorm.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below). 4 | # 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | # MIT License 23 | # 24 | # Copyright (c) 2017 Ishaan Gulrajani 25 | # 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy 27 | # of this software and associated documentation files (the "Software"), to deal 28 | # in the Software without restriction, including without limitation the rights 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | # copies of the Software, and to permit persons to whom the Software is 31 | # furnished to do so, subject to the following conditions: 32 | # 33 | # The above copyright notice and this permission notice shall be included in all 34 | # copies or substantial portions of the Software. 35 | # 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | # SOFTWARE. 43 | 44 | import tflib as lib 45 | 46 | import numpy as np 47 | import tensorflow as tf 48 | 49 | def Layernorm(name, norm_axes, inputs): 50 | mean, var = tf.nn.moments(inputs, norm_axes, keep_dims=True) 51 | 52 | # Assume the 'neurons' axis is the first of norm_axes. This is the case for fully-connected and BCHW conv layers. 53 | n_neurons = inputs.get_shape().as_list()[norm_axes[0]] 54 | 55 | offset = lib.param(name+'.offset', np.zeros(n_neurons, dtype='float32')) 56 | scale = lib.param(name+'.scale', np.ones(n_neurons, dtype='float32')) 57 | 58 | # Add broadcasting dims to offset and scale (e.g. BCHW conv data) 59 | offset = tf.reshape(offset, [-1] + [1 for i in xrange(len(norm_axes)-1)]) 60 | scale = tf.reshape(scale, [-1] + [1 for i in xrange(len(norm_axes)-1)]) 61 | 62 | result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-5) 63 | 64 | return result -------------------------------------------------------------------------------- /wgan/tflib/ops/cond_batchnorm.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below). 4 | # 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | # MIT License 23 | # 24 | # Copyright (c) 2017 Ishaan Gulrajani 25 | # 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy 27 | # of this software and associated documentation files (the "Software"), to deal 28 | # in the Software without restriction, including without limitation the rights 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | # copies of the Software, and to permit persons to whom the Software is 31 | # furnished to do so, subject to the following conditions: 32 | # 33 | # The above copyright notice and this permission notice shall be included in all 34 | # copies or substantial portions of the Software. 35 | # 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | # SOFTWARE. 43 | 44 | import tflib as lib 45 | 46 | import numpy as np 47 | import tensorflow as tf 48 | 49 | def Batchnorm(name, axes, inputs, is_training=None, stats_iter=None, update_moving_stats=True, fused=True, labels=None, n_labels=None): 50 | """conditional batchnorm (dumoulin et al 2016) for BCHW conv filtermaps""" 51 | if axes != [0,2,3]: 52 | raise Exception('unsupported') 53 | mean, var = tf.nn.moments(inputs, axes, keep_dims=True) 54 | shape = mean.get_shape().as_list() # shape is [1,n,1,1] 55 | offset_m = lib.param(name+'.offset', np.zeros([n_labels,shape[1]], dtype='float32')) 56 | scale_m = lib.param(name+'.scale', np.ones([n_labels,shape[1]], dtype='float32')) 57 | offset = tf.nn.embedding_lookup(offset_m, labels) 58 | scale = tf.nn.embedding_lookup(scale_m, labels) 59 | result = tf.nn.batch_normalization(inputs, mean, var, offset[:,:,None,None], scale[:,:,None,None], 1e-5) 60 | return result -------------------------------------------------------------------------------- /play_images.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/ori-mrg/robotcar-dataset-sdk 4 | # (see original license below) 5 | # 6 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 7 | # For more information see 8 | # 9 | # This file is licensed under the Creative Commons 10 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 11 | # To view a copy of this license, visit 12 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 13 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 14 | # 15 | 16 | ################################################################################ 17 | # 18 | # Copyright (c) 2017 University of Oxford 19 | # Authors: 20 | # Geoff Pascoe (gmp@robots.ox.ac.uk) 21 | # 22 | # This work is licensed under the Creative Commons 23 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 24 | # To view a copy of this license, visit 25 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 26 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 27 | # 28 | ################################################################################ 29 | 30 | import argparse 31 | import os 32 | import re 33 | import matplotlib.pyplot as plt 34 | from datetime import datetime as dt 35 | from image import load_image 36 | from camera_model import CameraModel 37 | 38 | parser = argparse.ArgumentParser(description='Play back images from a given directory') 39 | 40 | parser.add_argument('dir', type=str, help='Directory containing images.') 41 | parser.add_argument('--models_dir', type=str, default=None, help='(optional) Directory containing camera model. If supplied, images will be undistorted before display') 42 | parser.add_argument('--scale', type=float, default=1.0, help='(optional) factor by which to scale images before display') 43 | 44 | args = parser.parse_args() 45 | 46 | camera = re.search('(stereo|mono_(left|right|rear))', args.dir).group(0) 47 | 48 | timestamps_path = os.path.join(os.path.join(args.dir, os.pardir, camera + '.timestamps')) 49 | if not os.path.isfile(timestamps_path): 50 | timestamps_path = os.path.join(args.dir, os.pardir, os.pardir, camera + '.timestamps') 51 | if not os.path.isfile(timestamps_path): 52 | raise IOError("Could not find timestamps file") 53 | 54 | model = None 55 | if args.models_dir: 56 | model = CameraModel(args.models_dir, args.dir) 57 | 58 | current_chunk = 0 59 | timestamps_file = open(timestamps_path) 60 | for line in timestamps_file: 61 | tokens = line.split() 62 | datetime = dt.utcfromtimestamp(int(tokens[0])/1000000) 63 | chunk = int(tokens[1]) 64 | 65 | filename = os.path.join(args.dir, tokens[0] + '.png') 66 | if not os.path.isfile(filename): 67 | if chunk != current_chunk: 68 | print("Chunk " + str(chunk) + " not found") 69 | current_chunk = chunk 70 | continue 71 | 72 | current_chunk = chunk 73 | 74 | img = load_image(filename, model) 75 | plt.imshow(img) 76 | plt.xlabel(datetime) 77 | plt.xticks([]) 78 | plt.yticks([]) 79 | plt.pause(0.01) 80 | -------------------------------------------------------------------------------- /image.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/ori-mrg/robotcar-dataset-sdk 4 | # (see original license below) 5 | # 6 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 7 | # For more information see 8 | # 9 | # This file is licensed under the Creative Commons 10 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 11 | # To view a copy of this license, visit 12 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 13 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 14 | # 15 | 16 | ################################################################################ 17 | # 18 | # Copyright (c) 2017 University of Oxford 19 | # Authors: 20 | # Geoff Pascoe (gmp@robots.ox.ac.uk) 21 | # 22 | # This work is licensed under the Creative Commons 23 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 24 | # To view a copy of this license, visit 25 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 26 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 27 | # 28 | ############################################################################### 29 | 30 | import re 31 | import numpy as np 32 | from scipy.misc import imresize, imsave, imread 33 | from PIL import Image 34 | from colour_demosaicing import demosaicing_CFA_Bayer_bilinear as demosaic 35 | 36 | BAYER_STEREO = 'gbrg' 37 | BAYER_MONO = 'rggb' 38 | 39 | 40 | def load_image(image_path, model=None): 41 | """Loads and rectifies an image from file. 42 | 43 | Args: 44 | image_path (str): path to an image from the dataset. 45 | model (camera_model.CameraModel): if supplied, model will be used to undistort image. 46 | 47 | Returns: 48 | numpy.ndarray: demosaiced and optionally undistorted image 49 | 50 | """ 51 | if model: 52 | camera = model.camera 53 | else: 54 | camera = re.search('(stereo|mono_(left|right|rear))', image_path).group(0) 55 | if camera == 'stereo': 56 | pattern = BAYER_STEREO 57 | else: 58 | pattern = BAYER_MONO 59 | 60 | if model: 61 | img = demosaic(Image.open(image_path), pattern) 62 | img = model.undistort(img) 63 | img = rgb_2_grey(img) 64 | else: 65 | img = non_demosaic_load(image_path) 66 | assert isinstance(img, np.ndarray) and img.dtype == np.uint8 and img.flags.contiguous 67 | return img 68 | 69 | def non_demosaic_load(image_path): 70 | return imread(image_path) 71 | 72 | 73 | def crop_image(num_array, cropx, cropy): 74 | y = num_array.shape[0] 75 | x = num_array.shape[1] 76 | startx = x // 2 - (cropx // 2) 77 | starty = y // 2 - (cropy // 2) 78 | return num_array[starty:starty + cropy, startx:startx+cropx] 79 | 80 | def scale_image(num_array, sizex, sizey): 81 | return imresize(num_array, (sizey,sizex)) 82 | 83 | def save_image(num_array, path): 84 | imsave(path, num_array) 85 | 86 | def savez_compressed(path, array): 87 | np.savez_compressed(path, array) 88 | 89 | def rgb_2_grey(img): 90 | return np.dot(img[...,:3],[0.299, 0.587, 0.114]).astype(img.dtype) -------------------------------------------------------------------------------- /wgan/tflib/small_imagenet.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below). 4 | # 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | # MIT License 23 | # 24 | # Copyright (c) 2017 Ishaan Gulrajani 25 | # 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy 27 | # of this software and associated documentation files (the "Software"), to deal 28 | # in the Software without restriction, including without limitation the rights 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | # copies of the Software, and to permit persons to whom the Software is 31 | # furnished to do so, subject to the following conditions: 32 | # 33 | # The above copyright notice and this permission notice shall be included in all 34 | # copies or substantial portions of the Software. 35 | # 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | # SOFTWARE. 43 | 44 | import numpy as np 45 | import scipy.misc 46 | import time 47 | 48 | def make_generator(path, n_files, batch_size): 49 | epoch_count = [1] 50 | def get_epoch(): 51 | images = np.zeros((batch_size, 3, 64, 64), dtype='int32') 52 | files = range(n_files) 53 | random_state = np.random.RandomState(epoch_count[0]) 54 | random_state.shuffle(files) 55 | epoch_count[0] += 1 56 | for n, i in enumerate(files): 57 | image = scipy.misc.imread("{}/{}.png".format(path, str(i+1).zfill(len(str(n_files))))) 58 | images[n % batch_size] = image.transpose(2,0,1) 59 | if n > 0 and n % batch_size == 0: 60 | yield (images,) 61 | return get_epoch 62 | 63 | def load(batch_size, data_dir='/home/ishaan/data/imagenet64'): 64 | return ( 65 | make_generator(data_dir+'/train_64x64', 1281149, batch_size), 66 | make_generator(data_dir+'/valid_64x64', 49999, batch_size) 67 | ) 68 | 69 | if __name__ == '__main__': 70 | train_gen, valid_gen = load(64) 71 | t0 = time.time() 72 | for i, batch in enumerate(train_gen(), start=1): 73 | print "{}\t{}".format(str(time.time() - t0), batch[0][0,0,0,0]) 74 | if i == 1000: 75 | break 76 | t0 = time.time() -------------------------------------------------------------------------------- /wgan/tflib/cifar10.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below). 4 | # 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | # MIT License 23 | # 24 | # Copyright (c) 2017 Ishaan Gulrajani 25 | # 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy 27 | # of this software and associated documentation files (the "Software"), to deal 28 | # in the Software without restriction, including without limitation the rights 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | # copies of the Software, and to permit persons to whom the Software is 31 | # furnished to do so, subject to the following conditions: 32 | # 33 | # The above copyright notice and this permission notice shall be included in all 34 | # copies or substantial portions of the Software. 35 | # 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | # SOFTWARE. 43 | 44 | import numpy as np 45 | 46 | import os 47 | import urllib 48 | import gzip 49 | import cPickle as pickle 50 | 51 | def unpickle(file): 52 | fo = open(file, 'rb') 53 | dict = pickle.load(fo) 54 | fo.close() 55 | return dict['data'], dict['labels'] 56 | 57 | def cifar_generator(filenames, batch_size, data_dir): 58 | all_data = [] 59 | all_labels = [] 60 | for filename in filenames: 61 | data, labels = unpickle(data_dir + '/' + filename) 62 | all_data.append(data) 63 | all_labels.append(labels) 64 | 65 | images = np.concatenate(all_data, axis=0) 66 | labels = np.concatenate(all_labels, axis=0) 67 | 68 | def get_epoch(): 69 | rng_state = np.random.get_state() 70 | np.random.shuffle(images) 71 | np.random.set_state(rng_state) 72 | np.random.shuffle(labels) 73 | 74 | for i in xrange(len(images) / batch_size): 75 | yield (images[i*batch_size:(i+1)*batch_size], labels[i*batch_size:(i+1)*batch_size]) 76 | 77 | return get_epoch 78 | 79 | 80 | def load(batch_size, data_dir): 81 | return ( 82 | cifar_generator(['data_batch_1','data_batch_2','data_batch_3','data_batch_4','data_batch_5'], batch_size, data_dir), 83 | cifar_generator(['test_batch'], batch_size, data_dir) 84 | ) -------------------------------------------------------------------------------- /wgan/tflib/plot.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below). 4 | # 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | # MIT License 23 | # 24 | # Copyright (c) 2017 Ishaan Gulrajani 25 | # 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy 27 | # of this software and associated documentation files (the "Software"), to deal 28 | # in the Software without restriction, including without limitation the rights 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | # copies of the Software, and to permit persons to whom the Software is 31 | # furnished to do so, subject to the following conditions: 32 | # 33 | # The above copyright notice and this permission notice shall be included in all 34 | # copies or substantial portions of the Software. 35 | # 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | # SOFTWARE. 43 | 44 | import numpy as np 45 | 46 | import matplotlib 47 | 48 | matplotlib.use('Agg') 49 | import matplotlib.pyplot as plt 50 | import os 51 | import collections 52 | import time 53 | import cPickle as pickle 54 | 55 | _since_beginning = collections.defaultdict(lambda: {}) 56 | _since_last_flush = collections.defaultdict(lambda: {}) 57 | 58 | _iter = [0] 59 | 60 | 61 | def tick(): 62 | _iter[0] += 1 63 | 64 | def get_global_iter(): 65 | return _iter[0] 66 | 67 | 68 | def plot(name, value): 69 | _since_last_flush[name][_iter[0]] = value 70 | 71 | 72 | def flush(log_dir): 73 | prints = [] 74 | 75 | for name, vals in _since_last_flush.items(): 76 | prints.append("{}\t{}".format(name, np.mean(vals.values()))) 77 | _since_beginning[name].update(vals) 78 | 79 | x_vals = np.sort(_since_beginning[name].keys()) 80 | y_vals = [_since_beginning[name][x] for x in x_vals] 81 | 82 | plt.clf() 83 | plt.plot(x_vals, y_vals) 84 | plt.xlabel('iteration') 85 | plt.ylabel(name) 86 | file_name = name.replace(' ', '_') + '.jpg' 87 | plt.savefig(os.path.join(log_dir, file_name)) 88 | 89 | print "iter {}\t{}".format(_iter[0], "\t".join(prints)) 90 | _since_last_flush.clear() 91 | 92 | with open(os.path.join(log_dir, 'log.pkl'), 'wb') as f: 93 | pickle.dump(dict(_since_beginning), f, pickle.HIGHEST_PROTOCOL) 94 | -------------------------------------------------------------------------------- /download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | HOME=~ 4 | PROJECT_DIR=$HOME/tesina 5 | WORKSPACE_DIR=$PROJECT_DIR/workspace 6 | SOURCE_DIR=$PROJECT_DIR/tesina 7 | FILENAME_TEMP="index_temp.html" 8 | DATASET_LIST_FILE=$1 9 | LOGIN_URL="http://mrgdatashare.robots.ox.ac.uk/accounts/login/" 10 | MODELS_DIR=$PROJECT_DIR/models 11 | EXTRINSICS_DIR=$PROJECT_DIR/extrinsics 12 | CROP_WIDTH=880 13 | CROP_HEIGHT=660 14 | SCALE_WIDTH=128 15 | SCALE_HEIGHT=96 16 | FILENAME_COOKIE_TEMP=cookies.txt 17 | # TODO why isn't sessionid mandatory? 18 | #COOKIE_HEADER="Cookie: _ga=GA1.3.147376171.1502830394; _gat=1; _gid=GA1.3.370211867.1505683919; sessionid=gcef24ow0h95wjezgrqrkudsas1hp5x1" 19 | 20 | # TODO handle authentication errors 21 | printf "Username: " 22 | read USERNAME 23 | stty -echo 24 | printf "Password: " 25 | read PASSWORD 26 | stty echo 27 | printf "\n" 28 | 29 | output_dir=$WORKSPACE_DIR 30 | processing_dataset=false 31 | 32 | function download_file { # 1 = url, 2 = user, 3 = pass, 4 = filename_path 33 | wget --save-cookies $FILENAME_COOKIE_TEMP --server-response -q -O - $1 > $FILENAME_TEMP 34 | csrf_middleware_token=$(sed -n "/csrfmiddlewaretoken/s/.*name='csrfmiddlewaretoken'\s\+value='\([^']\+\).*/\1/p" $FILENAME_TEMP) 35 | next_redirect=$(sed -n '/next/s/.*name="next"\s\+value="\([^"]\+\).*/\1/p' $FILENAME_TEMP) 36 | next_redirect_encoded=$(php -r "echo urlencode(\"$next_redirect\");") 37 | # TODO why isn't referer header mandatory? 38 | #referer_header="Referer: ${referer_encoded}" 39 | post_data="csrfmiddlewaretoken="$csrf_middleware_token"&username="$2"&password="$3"&next="$next_redirect_encoded 40 | wget --load-cookies $FILENAME_COOKIE_TEMP --post-data="${post_data}" "${LOGIN_URL}" -O $4 41 | } 42 | 43 | while read url_dataset; do 44 | filename=$(basename $url_dataset) 45 | dirname=$(dirname $url_dataset) 46 | directory_name="${filename%.*}" 47 | filename_path="${output_dir}/${filename}" 48 | download_file $url_dataset $USERNAME $PASSWORD $filename_path 49 | output_dataset_directory="${output_dir}/${directory_name}" 50 | mkdir -p $output_dataset_directory 51 | tar xopf "${filename_path}" -C $output_dataset_directory 52 | tar_output=$? 53 | if [ "$tar_output" -eq 0 ]; then 54 | rm "${filename_path}" 55 | fi 56 | if [ "$processing_dataset" = true ] ; then 57 | #wait $processing_dataset_pid 58 | processing_dataset=false 59 | fi 60 | IFS='_' read -ra FOLDERS <<< "$filename" 61 | dataset_image_directory="${output_dataset_directory}/${FOLDERS[0]}/${FOLDERS[1]}/${FOLDERS[2]}" 62 | vo_filename="${FOLDERS[0]}_vo.tar" 63 | vo_filename_path="${output_dir}/${vo_filename}" 64 | url_vo_file="${dirname}/$vo_filename" 65 | download_file $url_vo_file $USERNAME $PASSWORD $vo_filename_path 66 | output_vo_directory="${output_dataset_directory}/vo" 67 | mkdir -p $output_vo_directory 68 | tar xopf $vo_filename_path -C $output_vo_directory 69 | tar_vo_output=$? 70 | if [ "$tar_vo_output" -eq 0 ]; then 71 | rm "${vo_filename_path}" 72 | fi 73 | poses_file="${output_vo_directory}/${FOLDERS[0]}/vo/vo.csv" 74 | python "${SOURCE_DIR}/adapt_images.py" "$dataset_image_directory" "$poses_file" --models_dir "${MODELS_DIR}" --crop "${CROP_WIDTH}" "${CROP_HEIGHT}" --scale "${SCALE_WIDTH}" "${SCALE_HEIGHT}" --output_dir "${output_dataset_directory}" # & 75 | # TODO comment this line when running in background 76 | if [ "$tar_output" -eq 0 ]; then 77 | rm -rf "${dataset_image_directory}" 78 | fi 79 | processing_dataset_pid=$! 80 | processing_dataset=true 81 | 82 | rm $FILENAME_TEMP 83 | done <$DATASET_LIST_FILE 84 | rm $FILENAME_COOKIE_TEMP 85 | 86 | -------------------------------------------------------------------------------- /geometry.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from evo (github.com/MichaelGrupp/evo) (see original license below) 4 | # 5 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | 21 | # Provides generic geometry algorithms. 22 | # author: Michael Grupp 23 | # 24 | # This file is part of evo (github.com/MichaelGrupp/evo). 25 | # 26 | # evo is free software: you can redistribute it and/or modify 27 | # it under the terms of the GNU General Public License as published by 28 | # the Free Software Foundation, either version 3 of the License, or 29 | # (at your option) any later version. 30 | # 31 | # evo is distributed in the hope that it will be useful, 32 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 33 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 34 | # GNU General Public License for more details. 35 | # 36 | # You should have received a copy of the GNU General Public License 37 | # along with evo. If not, see . 38 | 39 | import numpy as np 40 | 41 | 42 | class GeometryException(Exception): 43 | pass 44 | 45 | 46 | def umeyama_alignment(x, y, with_scale=False): 47 | """ 48 | Computes the least squares solution parameters of an Sim(m) matrix 49 | that minimizes the distance between a set of registered points. 50 | Umeyama, Shinji: Least-squares estimation of transformation parameters 51 | between two point patterns. IEEE PAMI, 1991 52 | :param x: mxn matrix of points, m = dimension, n = nr. of data points 53 | :param y: mxn matrix of points, m = dimension, n = nr. of data points 54 | :param with_scale: set to True to align also the scale (default: 1.0 scale) 55 | :return: r, t, c - rotation matrix, translation vector and scale factor 56 | """ 57 | if x.shape != y.shape: 58 | raise GeometryException("data matrices must have the same shape") 59 | 60 | # m = dimension, n = nr. of data points 61 | m, n = x.shape 62 | 63 | # means, eq. 34 and 35 64 | mean_x = x.mean(axis=1) 65 | mean_y = y.mean(axis=1) 66 | 67 | # variance, eq. 36 68 | # "transpose" for column subtraction 69 | sigma_x = 1.0/n * (np.linalg.norm(x - mean_x[:, np.newaxis])**2) 70 | 71 | # covariance matrix, eq. 38 72 | outer_sum = np.zeros((m, m)) 73 | for i in range(n): 74 | outer_sum += np.outer((y[:, i] - mean_y), (x[:, i] - mean_x)) 75 | cov_xy = np.multiply(1.0/n, outer_sum) 76 | 77 | # SVD (text betw. eq. 38 and 39) 78 | u, d, v = np.linalg.svd(cov_xy) 79 | 80 | # S matrix, eq. 43 81 | s = np.eye(m) 82 | if np.linalg.det(u) * np.linalg.det(v) < 0.0: 83 | # Ensure a RHS coordinate system (Kabsch algorithm). 84 | s[m-1, m-1] = -1 85 | 86 | # rotation, eq. 40 87 | r = u.dot(s).dot(v) 88 | 89 | # scale & translation, eq. 42 and 41 90 | c = 1/sigma_x * np.trace(np.diag(d).dot(s)) if with_scale else 1.0 91 | t = mean_y - np.multiply(c, r.dot(mean_x)) 92 | 93 | return r, t, c 94 | 95 | 96 | def arc_len(x): 97 | """ 98 | :param x: nxm array of points, m=dimension 99 | :return: the (discrete approximated) arc-length of the point sequence 100 | """ 101 | return np.sum(np.linalg.norm(x[:-1] - x[1:], axis=1)) -------------------------------------------------------------------------------- /project_laser_into_camera.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/ori-mrg/robotcar-dataset-sdk 4 | # (see original license below) 5 | # 6 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 7 | # For more information see 8 | # 9 | # This file is licensed under the Creative Commons 10 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 11 | # To view a copy of this license, visit 12 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 13 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 14 | # 15 | 16 | ################################################################################ 17 | # 18 | # Copyright (c) 2017 University of Oxford 19 | # Authors: 20 | # Geoff Pascoe (gmp@robots.ox.ac.uk) 21 | # 22 | # This work is licensed under the Creative Commons 23 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 24 | # To view a copy of this license, visit 25 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 26 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 27 | # 28 | ################################################################################ 29 | 30 | import os 31 | import re 32 | import numpy as np 33 | import matplotlib.pyplot as plt 34 | import argparse 35 | 36 | from build_pointcloud import build_pointcloud 37 | from transform import build_se3_transform 38 | from image import load_image 39 | from camera_model import CameraModel 40 | 41 | parser = argparse.ArgumentParser(description='Project LIDAR data into camera image') 42 | parser.add_argument('--image_dir', type=str, help='Directory containing images') 43 | parser.add_argument('--laser_dir', type=str, help='Directory containing LIDAR scans') 44 | parser.add_argument('--poses_file', type=str, help='File containing either INS or VO poses') 45 | parser.add_argument('--models_dir', type=str, help='Directory containing camera models') 46 | parser.add_argument('--extrinsics_dir', type=str, help='Directory containing sensor extrinsics') 47 | parser.add_argument('--image_idx', type=int, help='Index of image to display') 48 | 49 | args = parser.parse_args() 50 | 51 | model = CameraModel(args.models_dir, args.image_dir) 52 | 53 | extrinsics_path = os.path.join(args.extrinsics_dir, model.camera + '.txt') 54 | with open(extrinsics_path) as extrinsics_file: 55 | extrinsics = [float(x) for x in next(extrinsics_file).split(' ')] 56 | 57 | G_camera_vehicle = build_se3_transform(extrinsics) 58 | G_camera_posesource = None 59 | 60 | poses_type = re.search('(vo|ins)\.csv', args.poses_file).group(1) 61 | if poses_type == 'ins': 62 | with open(os.path.join(args.extrinsics_dir, 'ins.txt')) as extrinsics_file: 63 | extrinsics = next(extrinsics_file) 64 | G_camera_posesource = G_camera_vehicle * build_se3_transform([float(x) for x in extrinsics.split(' ')]) 65 | else: 66 | # VO frame and vehicle frame are the same 67 | G_camera_posesource = G_camera_vehicle 68 | 69 | 70 | timestamps_path = os.path.join(args.image_dir, os.pardir, model.camera + '.timestamps') 71 | if not os.path.isfile(timestamps_path): 72 | timestamps_path = os.path.join(args.image_dir, os.pardir, os.pardir, model.camera + '.timestamps') 73 | 74 | timestamp = 0 75 | with open(timestamps_path) as timestamps_file: 76 | for i, line in enumerate(timestamps_file): 77 | if i == args.image_idx: 78 | timestamp = int(line.split(' ')[0]) 79 | 80 | pointcloud, reflectance = build_pointcloud(args.laser_dir, args.poses_file, args.extrinsics_dir, 81 | timestamp - 1e7, timestamp + 1e7, timestamp) 82 | 83 | pointcloud = np.dot(G_camera_posesource, pointcloud) 84 | 85 | image_path = os.path.join(args.image_dir, str(timestamp) + '.png') 86 | image = load_image(image_path, model) 87 | 88 | uv, depth = model.project(pointcloud, image.shape) 89 | 90 | plt.imshow(image) 91 | plt.hold(True) 92 | plt.scatter(np.ravel(uv[0, :]), np.ravel(uv[1, :]), s=2, c=depth, edgecolors='none', cmap='jet') 93 | plt.xlim(0, image.shape[1]) 94 | plt.ylim(image.shape[0], 0) 95 | plt.xticks([]) 96 | plt.yticks([]) 97 | plt.show() 98 | -------------------------------------------------------------------------------- /wgan/tflib/save_images.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below). 4 | # 5 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | # MIT License 23 | # 24 | # Copyright (c) 2017 Ishaan Gulrajani 25 | # 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy 27 | # of this software and associated documentation files (the "Software"), to deal 28 | # in the Software without restriction, including without limitation the rights 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | # copies of the Software, and to permit persons to whom the Software is 31 | # furnished to do so, subject to the following conditions: 32 | # 33 | # The above copyright notice and this permission notice shall be included in all 34 | # copies or substantial portions of the Software. 35 | # 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | # SOFTWARE. 43 | 44 | """ 45 | Image grid saver, based on color_grid_vis from github.com/Newmu 46 | """ 47 | 48 | import numpy as np 49 | import os 50 | from scipy.misc import imsave 51 | 52 | 53 | def save_images(X, save_path): 54 | img = build_grid(X) 55 | imsave(save_path, img) 56 | 57 | 58 | def build_grid(X): 59 | # [0, 1] -> [0,255] 60 | if isinstance(X.flatten()[0], np.floating): 61 | X = (255.99 * X).astype('uint8') 62 | n_samples = X.shape[0] 63 | rows = int(np.sqrt(n_samples)) 64 | while n_samples % rows != 0: 65 | rows -= 1 66 | nh, nw = rows, n_samples / rows 67 | if X.ndim == 2: 68 | X = np.reshape(X, (X.shape[0], int(np.sqrt(X.shape[1])), int(np.sqrt(X.shape[1])))) 69 | if X.ndim == 4: 70 | # BCHW -> BHWC 71 | X = X.transpose(0, 2, 3, 1) 72 | h, w = X[0].shape[:2] 73 | img = np.zeros((h * nh, w * nw, X.shape[3])) 74 | elif X.ndim == 3: 75 | h, w = X[0].shape[:2] 76 | img = np.zeros((h * nh, w * nw)) 77 | for n, x in enumerate(X): 78 | j = n / nw 79 | i = n % nw 80 | img[j * h:j * h + h, i * w:i * w + w] = x 81 | return img 82 | 83 | 84 | # Guarda dos grillas 85 | def save_pair_images_grid(X, save_path, iteration, prefix='samples'): 86 | grid = build_grid(X) 87 | assert grid.ndim == 3 and grid.shape[2] == 2 88 | 89 | grid = grid.transpose(2, 0, 1) 90 | imsave_pair(grid, save_path, iteration, prefix) 91 | 92 | 93 | # Guarda imgs individuales 94 | def save_pair_images(X, save_path, iteration, prefix='samples'): 95 | # BCHW 96 | assert X.ndim == 4 and X.shape[1] == 2 97 | 98 | idx = 0 #randrange(X.shape[0]) 99 | pair = X[idx] 100 | imsave_pair(pair, save_path, iteration, prefix) 101 | 102 | 103 | def imsave_pair(pair, save_path, iteration, prefix): 104 | img_name = prefix + '_{}_{}.png' 105 | imsave(os.path.join(save_path, img_name.format(iteration, 0)), pair[0, ...]) 106 | imsave(os.path.join(save_path, img_name.format(iteration, 1)), pair[1, ...]) 107 | -------------------------------------------------------------------------------- /wgan/tflib/mnist.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below). 4 | # 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | # MIT License 23 | # 24 | # Copyright (c) 2017 Ishaan Gulrajani 25 | # 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy 27 | # of this software and associated documentation files (the "Software"), to deal 28 | # in the Software without restriction, including without limitation the rights 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | # copies of the Software, and to permit persons to whom the Software is 31 | # furnished to do so, subject to the following conditions: 32 | # 33 | # The above copyright notice and this permission notice shall be included in all 34 | # copies or substantial portions of the Software. 35 | # 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | # SOFTWARE. 43 | 44 | import numpy 45 | 46 | import os 47 | import urllib 48 | import gzip 49 | import cPickle as pickle 50 | 51 | def mnist_generator(data, batch_size, n_labelled, limit=None): 52 | images, targets = data 53 | 54 | rng_state = numpy.random.get_state() 55 | numpy.random.shuffle(images) 56 | numpy.random.set_state(rng_state) 57 | numpy.random.shuffle(targets) 58 | if limit is not None: 59 | print "WARNING ONLY FIRST {} MNIST DIGITS".format(limit) 60 | images = images.astype('float32')[:limit] 61 | targets = targets.astype('int32')[:limit] 62 | if n_labelled is not None: 63 | labelled = numpy.zeros(len(images), dtype='int32') 64 | labelled[:n_labelled] = 1 65 | 66 | def get_epoch(): 67 | rng_state = numpy.random.get_state() 68 | numpy.random.shuffle(images) 69 | numpy.random.set_state(rng_state) 70 | numpy.random.shuffle(targets) 71 | 72 | if n_labelled is not None: 73 | numpy.random.set_state(rng_state) 74 | numpy.random.shuffle(labelled) 75 | 76 | image_batches = images.reshape(-1, batch_size, 784) 77 | target_batches = targets.reshape(-1, batch_size) 78 | 79 | if n_labelled is not None: 80 | labelled_batches = labelled.reshape(-1, batch_size) 81 | 82 | for i in xrange(len(image_batches)): 83 | yield (numpy.copy(image_batches[i]), numpy.copy(target_batches[i]), numpy.copy(labelled)) 84 | 85 | else: 86 | 87 | for i in xrange(len(image_batches)): 88 | yield (numpy.copy(image_batches[i]), numpy.copy(target_batches[i])) 89 | 90 | return get_epoch 91 | 92 | def load(batch_size, test_batch_size, n_labelled=None): 93 | filepath = '/tmp/mnist.pkl.gz' 94 | url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' 95 | 96 | if not os.path.isfile(filepath): 97 | print "Couldn't find MNIST dataset in /tmp, downloading..." 98 | urllib.urlretrieve(url, filepath) 99 | 100 | with gzip.open('/tmp/mnist.pkl.gz', 'rb') as f: 101 | train_data, dev_data, test_data = pickle.load(f) 102 | 103 | return ( 104 | mnist_generator(train_data, batch_size, n_labelled), 105 | mnist_generator(dev_data, test_batch_size, n_labelled), 106 | mnist_generator(test_data, test_batch_size, n_labelled) 107 | ) -------------------------------------------------------------------------------- /triangulate.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 5 | # For more information see 6 | # 7 | # wganvo is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # wganvo is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with wganvo. If not, see . 19 | # 20 | 21 | import cv2 22 | import numpy as np 23 | from matplotlib import pyplot as plt 24 | 25 | 26 | # Input: stereo images 27 | # Output: pts1 -> 2xN array of points from left image 28 | # pts2 -> 2xN array of points from right image 29 | def matcher(img1, img2): 30 | sift = cv2.xfeatures2d.SIFT_create() 31 | # find the keypoints and descriptors with SIFT 32 | kp1, des1 = sift.detectAndCompute(img1, None) 33 | kp2, des2 = sift.detectAndCompute(img2, None) 34 | # Brute Force Matcher parameters 35 | bf = cv2.BFMatcher(crossCheck=True) 36 | matches = bf.knnMatch(des1, des2, k=1) 37 | # FLANN parameters 38 | # FLANN_INDEX_KDTREE = 0 39 | # index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5) 40 | # search_params = dict(checks=50) 41 | # flann = cv2.FlannBasedMatcher(index_params,search_params) 42 | # matches = flann.knnMatch(des1,des2,k=2) 43 | good = [] 44 | pts1 = [] 45 | pts2 = [] 46 | 47 | for match in matches: 48 | if match: 49 | # print('%d -> %d: %f' % (match[0].queryIdx, match[0].trainIdx, match[0].distance)) 50 | m = match[0] 51 | good.append(m) 52 | pts2.append(kp2[m.trainIdx].pt) 53 | pts1.append(kp1[m.queryIdx].pt) 54 | 55 | pts1 = np.int32(pts1) 56 | pts2 = np.int32(pts2) 57 | F, mask = cv2.findFundamentalMat(pts1, pts2, cv2.FM_LMEDS) 58 | 59 | # We select only inlier points 60 | pts1 = pts1[mask.ravel() == 1] 61 | pts2 = pts2[mask.ravel() == 1] 62 | # draw_params = dict(matchColor=(0, 255, 0), 63 | # singlePointColor=(255, 0, 0), 64 | # matchesMask=mask.ravel().tolist(), 65 | # flags=0) 66 | # img3 = cv2.drawMatches(img1, kp1, img2, kp2, good, None, **draw_params) 67 | # plt.imshow(img3), plt.show() 68 | 69 | pts1 = pts1.transpose().astype(np.float32) 70 | pts2 = pts2.transpose().astype(np.float32) 71 | 72 | same_line_mask = pts1[1, :] == pts2[1, :] 73 | # threshold = (pts1[1, :] + 1) == pts2[1, :] 74 | # threshold |= (pts2[1, :] + 1) == pts1[1, :] 75 | # same_line_mask |= threshold 76 | pts1 = pts1[:, same_line_mask] 77 | pts2 = pts2[:, same_line_mask] 78 | 79 | return pts1, pts2 80 | 81 | 82 | # Input: P1 -> projection matrix 83 | # P2 -> projection matrix 84 | # x1 -> 2xN array of points 85 | # x2 -> 2xN array of points 86 | # Output: X -> 4xN array of 3D points (homogeneous coordinates) 87 | def triangulatePoints(P1, P2, x1, x2): 88 | X = cv2.triangulatePoints(P1, P2, x1, x2) 89 | return X / X[3] 90 | 91 | # folder = '/home/jcremona/data/03/' 92 | # filename = '000000.png' 93 | # 94 | # img1 = cv2.imread(folder + 'image_0/' + filename,0) #queryimage # left image 95 | # img2 = cv2.imread(folder + 'image_1/' + filename,0) #trainimage # right image 96 | # 97 | # # KITTI Seq 3 left camera calibration 98 | # K0 = np.matrix([[7.215377000000e+02, 0.000000000000e+00, 6.095593000000e+02, 0.000000000000e+00], 99 | # [0.000000000000e+00, 7.215377000000e+02, 1.728540000000e+02, 0.000000000000e+00], 100 | # [0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 0.000000000000e+00]]) 101 | # 102 | # # KITTI Seq 3 right camera calibration 103 | # K1 = np.matrix([[7.215377000000e+02, 0.000000000000e+00, 6.095593000000e+02, -3.875744000000e+02], 104 | # [0.000000000000e+00, 7.215377000000e+02, 1.728540000000e+02, 0.000000000000e+00], 105 | # [0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 0.000000000000e+00]]) 106 | # 107 | # # KITTI Seq 3 1st frame pose 108 | # Rt0 = np.matrix([[1.000000e+00, -1.822835e-10, 5.241111e-10, -5.551115e-17], 109 | # [-1.822835e-10, 9.999999e-01, -5.072855e-10, -3.330669e-16], 110 | # [5.241111e-10, -5.072855e-10, 9.999999e-01, 2.220446e-16], 111 | # [0.,0.,0.,1.]]) 112 | -------------------------------------------------------------------------------- /vgg_trainable/test/test_model.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 5 | # For more information see 6 | # 7 | # wganvo is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # wganvo is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with wganvo. If not, see . 19 | # 20 | 21 | import tensorflow as tf 22 | import numpy as np 23 | from scipy import linalg 24 | import argparse 25 | import sys, os, inspect 26 | import time 27 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 28 | parentdir = os.path.dirname(currentdir) 29 | sys.path.insert(0, parentdir) 30 | from input_data import read_data_sets, DataSet 31 | from eval_utils import infer_relative_poses, get_absolute_poses, plot_frames_vs_abs_distance 32 | 33 | # import transformations 34 | 35 | DEFAULT_INTRINSIC_FILE_NAME = "intrinsic_matrix.txt" 36 | 37 | 38 | def test_model(model_name, data_dir, output_dir, batch_size): 39 | sess = tf.Session() 40 | saver = tf.train.import_meta_graph(model_name + ".meta") 41 | # print(model_name) 42 | # inverse_intrinsic_matrix = np.linalg.inv(intrinsic_matrix) 43 | saver.restore(sess, model_name) # tf.train.latest_checkpoint('./')) 44 | graph = tf.get_default_graph() 45 | outputs = graph.get_tensor_by_name("outputs:0") 46 | targets_placeholder = graph.get_tensor_by_name("targets_placeholder:0") 47 | images_placeholder = graph.get_tensor_by_name("images_placeholder:0") 48 | train_mode = graph.get_tensor_by_name("train_mode:0") # FIXME Podria arrojar exception 49 | start_loading_time = time.time() 50 | images, targets, _, groups, _ = read_data_sets(data_dir) 51 | start_infer_time = time.time() 52 | dataset = DataSet(images, targets, groups, fake_data=False) 53 | relative_poses_prediction, relative_poses_target = infer_relative_poses(sess, dataset, batch_size, 54 | images_placeholder, 55 | outputs, 56 | targets_placeholder, train_mode) 57 | end_time = time.time() 58 | print("Inference time: {}".format(end_time - start_infer_time)) 59 | print("Load Images + Inference Time: {}".format(end_time - start_loading_time)) 60 | print("Images in the seq: {}".format(relative_poses_prediction.shape[0])) 61 | frames, abs_distance = plot_frames_vs_abs_distance(relative_poses_prediction, relative_poses_target, dataset, 62 | output_dir, save_txt=True, plot=True) 63 | points = np.array(zip(frames, abs_distance)) 64 | np.savetxt(os.path.join(output_dir, "frames_vs_abs_distance.txt"), points) 65 | np.savetxt(os.path.join(output_dir, "relative_poses_prediction.txt"), relative_poses_prediction.reshape(-1, 12), 66 | delimiter=' ') 67 | np.savetxt(os.path.join(output_dir, "relative_poses_target.txt"), relative_poses_target.reshape(-1, 12), 68 | delimiter=' ') 69 | absolute_poses_prediction = get_absolute_poses(relative_poses_prediction) 70 | absolute_poses_target = get_absolute_poses(relative_poses_target) 71 | np.savetxt(os.path.join(output_dir, "absolute_poses_prediction.txt"), 72 | absolute_poses_prediction.reshape(-1, 12), delimiter=' ') 73 | np.savetxt(os.path.join(output_dir, "absolute_poses_target.txt"), absolute_poses_target.reshape(-1, 12), 74 | delimiter=' ') 75 | 76 | 77 | def main(_): 78 | # intrinsic_matrix = np.matrix(np.loadtxt(FLAGS.intrinsics_path, delimiter=' ')) 79 | test_model(FLAGS.model_name, FLAGS.data_dir, FLAGS.output_dir, FLAGS.batch_size) 80 | 81 | 82 | if __name__ == '__main__': 83 | parser = argparse.ArgumentParser() 84 | parser.add_argument( 85 | 'model_name', 86 | type=str, 87 | help='Model name' 88 | ) 89 | parser.add_argument( 90 | 'data_dir', 91 | type=str, 92 | help='Directory to put the data' 93 | ) 94 | # parser.add_argument( 95 | # '--intrinsics_path', 96 | # type=str, 97 | # default=os.path.join(os.getcwd(), DEFAULT_INTRINSIC_FILE_NAME), 98 | # help='Intrinsic matrix path' 99 | # ) 100 | parser.add_argument( 101 | '--output_dir', 102 | type=str, 103 | default=os.getcwd(), 104 | help='Output dir' 105 | ) 106 | parser.add_argument( 107 | '--batch_size', 108 | type=int, 109 | default=100, 110 | help='Batch size' 111 | ) 112 | 113 | FLAGS, unparsed = parser.parse_known_args() 114 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 115 | -------------------------------------------------------------------------------- /vgg_trainable/test/show_traj_kitti.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 5 | # For more information see 6 | # 7 | # wganvo is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # wganvo is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with wganvo. If not, see . 19 | # 20 | 21 | import numpy as np 22 | import matplotlib.pyplot as plt 23 | from mpl_toolkits.mplot3d import Axes3D 24 | import matplotlib.animation as animation 25 | from scipy import linalg 26 | import argparse 27 | import sys, os, inspect 28 | 29 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 30 | parentdir = os.path.dirname(currentdir) 31 | sys.path.insert(0,parentdir) 32 | from input_data import read_data_sets, DataSet 33 | from matplotlib import gridspec 34 | 35 | # Example: 36 | # python show_traj_kitti.py ~/KITTI/ gt_pose_00.txt --poses_pred orb_slam2_pose_00.txt wganvo_pose_00.txt --labels ORB-SLAM2 WGANVO 37 | # Labels and filenames must be the same length 38 | 39 | def get_cmap(n, name='hsv'): 40 | '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct 41 | RGB color; the keyword argument name must be a standard mpl colormap name.''' 42 | return plt.cm.get_cmap(name, n) 43 | 44 | def show(images, poses, pred_poses, labels, points=None): 45 | fig = plt.figure() 46 | gs = gridspec.GridSpec(2, 1, height_ratios=[1, 2]) 47 | ax1 = fig.add_subplot(gs[0]) 48 | artist = ax1.imshow(images[0], cmap='gray') 49 | ax1.axis('off') 50 | ax2 = fig.add_subplot(gs[1], projection='3d') 51 | 52 | x = [] 53 | y = [] 54 | z = [] 55 | lin = ax2.plot(x, y, z, label='Ground Truth', linewidth=1.1)[0] 56 | data_ = [poses] 57 | lines_ = [lin] 58 | for i, label in zip(pred_poses, labels): 59 | lin_pred = ax2.plot(x, y, z, label=label, linewidth=1.1)[0] 60 | data_.append(i) 61 | lines_.append(lin_pred) 62 | 63 | 64 | cloud = ax2.scatter(x,y,z, c="red", s=0.1) 65 | # data_.append(points) 66 | # lines_.append(cloud) 67 | ax2.legend() 68 | ax2.set_xlabel('X (m)') 69 | ax2.set_ylabel('Y (m)') 70 | ax2.set_zlabel('Z (m)') 71 | 72 | max_range = np.array([poses[:,0].max() - poses[:,0].min(), poses[:,1].max() - poses[:,1].min(), 73 | poses[:,2].max() - poses[:,2].min()]).max() / 2.0 74 | mean_x = poses[:,0].mean() 75 | mean_y = poses[:,1].mean() 76 | mean_z = poses[:,2].mean() 77 | 78 | ax2.set_xlim(mean_x - max_range, mean_x + max_range) 79 | ax2.set_ylim(mean_y - max_range, mean_y + max_range) 80 | ax2.set_zlim(mean_z - max_range, mean_z + max_range) 81 | 82 | def update(num, img, datalines, lines, scatter, points): 83 | artist.set_data(img[num]) 84 | idx = num + 1 85 | for lin, data in zip(lines, datalines): 86 | lin.set_xdata(data[:idx,0]) 87 | lin.set_ydata(data[:idx,1]) 88 | lin.set_3d_properties(data[:idx,2]) 89 | #scatter._offsets3d = (points[num,0], points[num,1], points[num,2]) 90 | return lines 91 | 92 | #assert len(images) == len(poses) 93 | # FIXME originalmente era frames = len(pred_poses) 94 | ani = animation.FuncAnimation(fig, update, frames=len(poses), fargs=(images,data_,lines_, cloud, points), 95 | interval=10, blit=False) 96 | plt.show() 97 | 98 | def main(): 99 | images,_,_,_, points = read_data_sets(FLAGS.img_file) 100 | poses = np.loadtxt(FLAGS.poses, delimiter=" ") 101 | assert len(FLAGS.poses_pred) == len(FLAGS.labels), "Num. of pose files and num. of labels must be the same" 102 | poses_pred = [] 103 | for pose_file in FLAGS.poses_pred: 104 | pose_pred = np.loadtxt(pose_file, delimiter=" ") 105 | pose_pred = pose_pred.reshape((-1, 3, 4)) 106 | pose_pred = pose_pred[:, 0:3, 3] 107 | poses_pred.append(pose_pred) 108 | 109 | # points = None 110 | # if FLAGS.points != None: 111 | # points = np.load(FLAGS.points) 112 | 113 | last = images[-1][..., 1] 114 | last = last.reshape((-1,last.shape[0],last.shape[1])) 115 | im = np.append(images[..., 0], last,axis=0) 116 | print(len(im)) 117 | print(len(poses)) 118 | #assert len(im) == len(poses) 119 | 120 | poses = poses.reshape((-1,3,4)) 121 | poses = poses[:, 0:3, 3] 122 | # print(poses.shape) 123 | #print(poses_pred.shape) 124 | # print(points.shape) 125 | show(im, poses, poses_pred, FLAGS.labels) 126 | 127 | 128 | if __name__ == '__main__': 129 | parser = argparse.ArgumentParser() 130 | parser.add_argument( 131 | 'img_file', 132 | type=str, 133 | help='Images file' 134 | ) 135 | parser.add_argument( 136 | 'poses', 137 | type=str, 138 | help='Poses' 139 | ) 140 | parser.add_argument( 141 | '--poses_pred', 142 | nargs="*", 143 | help='Poses Pred' 144 | ) 145 | parser.add_argument( 146 | '--labels', 147 | nargs="*", 148 | help='Labels/Legends to be used in the plot' 149 | ) 150 | parser.add_argument( 151 | '--points', 152 | type=str, 153 | help='3D Points' 154 | ) 155 | FLAGS, unparsed = parser.parse_known_args() 156 | main() 157 | 158 | 159 | -------------------------------------------------------------------------------- /wgan/tflib/ops/deconv2d.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below). 4 | # 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | # MIT License 23 | # 24 | # Copyright (c) 2017 Ishaan Gulrajani 25 | # 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy 27 | # of this software and associated documentation files (the "Software"), to deal 28 | # in the Software without restriction, including without limitation the rights 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | # copies of the Software, and to permit persons to whom the Software is 31 | # furnished to do so, subject to the following conditions: 32 | # 33 | # The above copyright notice and this permission notice shall be included in all 34 | # copies or substantial portions of the Software. 35 | # 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | # SOFTWARE. 43 | 44 | import tflib as lib 45 | 46 | import numpy as np 47 | import tensorflow as tf 48 | 49 | _default_weightnorm = False 50 | def enable_default_weightnorm(): 51 | global _default_weightnorm 52 | _default_weightnorm = True 53 | 54 | _weights_stdev = None 55 | def set_weights_stdev(weights_stdev): 56 | global _weights_stdev 57 | _weights_stdev = weights_stdev 58 | 59 | def unset_weights_stdev(): 60 | global _weights_stdev 61 | _weights_stdev = None 62 | 63 | def Deconv2D( 64 | name, 65 | input_dim, 66 | output_dim, 67 | filter_size, 68 | inputs, 69 | he_init=True, 70 | weightnorm=None, 71 | biases=True, 72 | gain=1., 73 | mask_type=None, 74 | ): 75 | """ 76 | inputs: tensor of shape (batch size, height, width, input_dim) 77 | returns: tensor of shape (batch size, 2*height, 2*width, output_dim) 78 | """ 79 | with tf.name_scope(name) as scope: 80 | 81 | if mask_type != None: 82 | raise Exception('Unsupported configuration') 83 | 84 | def uniform(stdev, size): 85 | return np.random.uniform( 86 | low=-stdev * np.sqrt(3), 87 | high=stdev * np.sqrt(3), 88 | size=size 89 | ).astype('float32') 90 | 91 | stride = 2 92 | fan_in = input_dim * filter_size**2 / (stride**2) 93 | fan_out = output_dim * filter_size**2 94 | 95 | if he_init: 96 | filters_stdev = np.sqrt(4./(fan_in+fan_out)) 97 | else: # Normalized init (Glorot & Bengio) 98 | filters_stdev = np.sqrt(2./(fan_in+fan_out)) 99 | 100 | 101 | if _weights_stdev is not None: 102 | filter_values = uniform( 103 | _weights_stdev, 104 | (filter_size, filter_size, output_dim, input_dim) 105 | ) 106 | else: 107 | filter_values = uniform( 108 | filters_stdev, 109 | (filter_size, filter_size, output_dim, input_dim) 110 | ) 111 | 112 | filter_values *= gain 113 | 114 | filters = lib.param( 115 | name+'.Filters', 116 | filter_values 117 | ) 118 | 119 | if weightnorm==None: 120 | weightnorm = _default_weightnorm 121 | if weightnorm: 122 | norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1,3))) 123 | target_norms = lib.param( 124 | name + '.g', 125 | norm_values 126 | ) 127 | with tf.name_scope('weightnorm') as scope: 128 | norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1,3])) 129 | filters = filters * tf.expand_dims(target_norms / norms, 1) 130 | 131 | 132 | inputs = tf.transpose(inputs, [0,2,3,1], name='NCHW_to_NHWC') 133 | 134 | input_shape = tf.shape(inputs) 135 | try: # tf pre-1.0 (top) vs 1.0 (bottom) 136 | output_shape = tf.pack([input_shape[0], 2*input_shape[1], 2*input_shape[2], output_dim]) 137 | except Exception as e: 138 | output_shape = tf.stack([input_shape[0], 2*input_shape[1], 2*input_shape[2], output_dim]) 139 | 140 | result = tf.nn.conv2d_transpose( 141 | value=inputs, 142 | filter=filters, 143 | output_shape=output_shape, 144 | strides=[1, 2, 2, 1], 145 | padding='SAME' 146 | ) 147 | 148 | if biases: 149 | _biases = lib.param( 150 | name+'.Biases', 151 | np.zeros(output_dim, dtype='float32') 152 | ) 153 | result = tf.nn.bias_add(result, _biases) 154 | 155 | result = tf.transpose(result, [0,3,1,2], name='NHWC_to_NCHW') 156 | 157 | 158 | return result 159 | -------------------------------------------------------------------------------- /wgan/tflib/ops/conv1d.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below). 4 | # 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | # MIT License 23 | # 24 | # Copyright (c) 2017 Ishaan Gulrajani 25 | # 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy 27 | # of this software and associated documentation files (the "Software"), to deal 28 | # in the Software without restriction, including without limitation the rights 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | # copies of the Software, and to permit persons to whom the Software is 31 | # furnished to do so, subject to the following conditions: 32 | # 33 | # The above copyright notice and this permission notice shall be included in all 34 | # copies or substantial portions of the Software. 35 | # 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | # SOFTWARE. 43 | 44 | import tflib as lib 45 | 46 | import numpy as np 47 | import tensorflow as tf 48 | 49 | _default_weightnorm = False 50 | def enable_default_weightnorm(): 51 | global _default_weightnorm 52 | _default_weightnorm = True 53 | 54 | def Conv1D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True, gain=1.): 55 | """ 56 | inputs: tensor of shape (batch size, num channels, width) 57 | mask_type: one of None, 'a', 'b' 58 | 59 | returns: tensor of shape (batch size, num channels, width) 60 | """ 61 | with tf.name_scope(name) as scope: 62 | 63 | if mask_type is not None: 64 | mask_type, mask_n_channels = mask_type 65 | 66 | mask = np.ones( 67 | (filter_size, input_dim, output_dim), 68 | dtype='float32' 69 | ) 70 | center = filter_size // 2 71 | 72 | # Mask out future locations 73 | # filter shape is (width, input channels, output channels) 74 | mask[center+1:, :, :] = 0. 75 | 76 | # Mask out future channels 77 | for i in xrange(mask_n_channels): 78 | for j in xrange(mask_n_channels): 79 | if (mask_type=='a' and i >= j) or (mask_type=='b' and i > j): 80 | mask[ 81 | center, 82 | i::mask_n_channels, 83 | j::mask_n_channels 84 | ] = 0. 85 | 86 | 87 | def uniform(stdev, size): 88 | return np.random.uniform( 89 | low=-stdev * np.sqrt(3), 90 | high=stdev * np.sqrt(3), 91 | size=size 92 | ).astype('float32') 93 | 94 | fan_in = input_dim * filter_size 95 | fan_out = output_dim * filter_size / stride 96 | 97 | if mask_type is not None: # only approximately correct 98 | fan_in /= 2. 99 | fan_out /= 2. 100 | 101 | if he_init: 102 | filters_stdev = np.sqrt(4./(fan_in+fan_out)) 103 | else: # Normalized init (Glorot & Bengio) 104 | filters_stdev = np.sqrt(2./(fan_in+fan_out)) 105 | 106 | filter_values = uniform( 107 | filters_stdev, 108 | (filter_size, input_dim, output_dim) 109 | ) 110 | # print "WARNING IGNORING GAIN" 111 | filter_values *= gain 112 | 113 | filters = lib.param(name+'.Filters', filter_values) 114 | 115 | if weightnorm==None: 116 | weightnorm = _default_weightnorm 117 | if weightnorm: 118 | norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1))) 119 | target_norms = lib.param( 120 | name + '.g', 121 | norm_values 122 | ) 123 | with tf.name_scope('weightnorm') as scope: 124 | norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1])) 125 | filters = filters * (target_norms / norms) 126 | 127 | if mask_type is not None: 128 | with tf.name_scope('filter_mask'): 129 | filters = filters * mask 130 | 131 | result = tf.nn.conv1d( 132 | value=inputs, 133 | filters=filters, 134 | stride=stride, 135 | padding='SAME', 136 | data_format='NCHW' 137 | ) 138 | 139 | if biases: 140 | _biases = lib.param( 141 | name+'.Biases', 142 | np.zeros([output_dim], dtype='float32') 143 | ) 144 | 145 | # result = result + _biases 146 | 147 | result = tf.expand_dims(result, 3) 148 | result = tf.nn.bias_add(result, _biases, data_format='NCHW') 149 | result = tf.squeeze(result) 150 | 151 | return result 152 | -------------------------------------------------------------------------------- /wgan/tflib/inception_score.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below). 4 | # 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | # MIT License 23 | # 24 | # Copyright (c) 2017 Ishaan Gulrajani 25 | # 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy 27 | # of this software and associated documentation files (the "Software"), to deal 28 | # in the Software without restriction, including without limitation the rights 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | # copies of the Software, and to permit persons to whom the Software is 31 | # furnished to do so, subject to the following conditions: 32 | # 33 | # The above copyright notice and this permission notice shall be included in all 34 | # copies or substantial portions of the Software. 35 | # 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | # SOFTWARE. 43 | # From https://github.com/openai/improved-gan/blob/master/inception_score/model.py 44 | # Code derived from tensorflow/tensorflow/models/image/imagenet/classify_image.py 45 | from __future__ import absolute_import 46 | from __future__ import division 47 | from __future__ import print_function 48 | 49 | import os.path 50 | import sys 51 | import tarfile 52 | 53 | import numpy as np 54 | from six.moves import urllib 55 | import tensorflow as tf 56 | import glob 57 | import scipy.misc 58 | import math 59 | import sys 60 | 61 | MODEL_DIR = '/tmp/imagenet' 62 | DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' 63 | softmax = None 64 | 65 | # Call this function with list of images. Each of elements should be a 66 | # numpy array with values ranging from 0 to 255. 67 | def get_inception_score(images, splits=10): 68 | assert(type(images) == list) 69 | assert(type(images[0]) == np.ndarray) 70 | assert(len(images[0].shape) == 3) 71 | assert(np.max(images[0]) > 10) 72 | assert(np.min(images[0]) >= 0.0) 73 | inps = [] 74 | for img in images: 75 | img = img.astype(np.float32) 76 | inps.append(np.expand_dims(img, 0)) 77 | bs = 100 78 | with tf.Session() as sess: 79 | preds = [] 80 | n_batches = int(math.ceil(float(len(inps)) / float(bs))) 81 | for i in range(n_batches): 82 | # sys.stdout.write(".") 83 | # sys.stdout.flush() 84 | inp = inps[(i * bs):min((i + 1) * bs, len(inps))] 85 | inp = np.concatenate(inp, 0) 86 | pred = sess.run(softmax, {'ExpandDims:0': inp}) 87 | preds.append(pred) 88 | preds = np.concatenate(preds, 0) 89 | scores = [] 90 | for i in range(splits): 91 | part = preds[(i * preds.shape[0] // splits):((i + 1) * preds.shape[0] // splits), :] 92 | kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0))) 93 | kl = np.mean(np.sum(kl, 1)) 94 | scores.append(np.exp(kl)) 95 | return np.mean(scores), np.std(scores) 96 | 97 | # This function is called automatically. 98 | def _init_inception(): 99 | global softmax 100 | if not os.path.exists(MODEL_DIR): 101 | os.makedirs(MODEL_DIR) 102 | filename = DATA_URL.split('/')[-1] 103 | filepath = os.path.join(MODEL_DIR, filename) 104 | if not os.path.exists(filepath): 105 | def _progress(count, block_size, total_size): 106 | sys.stdout.write('\r>> Downloading %s %.1f%%' % ( 107 | filename, float(count * block_size) / float(total_size) * 100.0)) 108 | sys.stdout.flush() 109 | filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress) 110 | print() 111 | statinfo = os.stat(filepath) 112 | print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.') 113 | tarfile.open(filepath, 'r:gz').extractall(MODEL_DIR) 114 | with tf.gfile.FastGFile(os.path.join( 115 | MODEL_DIR, 'classify_image_graph_def.pb'), 'rb') as f: 116 | graph_def = tf.GraphDef() 117 | graph_def.ParseFromString(f.read()) 118 | _ = tf.import_graph_def(graph_def, name='') 119 | # Works with an arbitrary minibatch size. 120 | with tf.Session() as sess: 121 | pool3 = sess.graph.get_tensor_by_name('pool_3:0') 122 | ops = pool3.graph.get_operations() 123 | for op_idx, op in enumerate(ops): 124 | for o in op.outputs: 125 | shape = o.get_shape() 126 | shape = [s.value for s in shape] 127 | new_shape = [] 128 | for j, s in enumerate(shape): 129 | if s == 1 and j == 0: 130 | new_shape.append(None) 131 | else: 132 | new_shape.append(s) 133 | o._shape = tf.TensorShape(new_shape) 134 | w = sess.graph.get_operation_by_name("softmax/logits/MatMul").inputs[1] 135 | logits = tf.matmul(tf.squeeze(pool3), w) 136 | softmax = tf.nn.softmax(logits) 137 | 138 | if softmax is None: 139 | _init_inception() 140 | -------------------------------------------------------------------------------- /wgan/tflib/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below). 4 | # 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | # MIT License 23 | # 24 | # Copyright (c) 2017 Ishaan Gulrajani 25 | # 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy 27 | # of this software and associated documentation files (the "Software"), to deal 28 | # in the Software without restriction, including without limitation the rights 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | # copies of the Software, and to permit persons to whom the Software is 31 | # furnished to do so, subject to the following conditions: 32 | # 33 | # The above copyright notice and this permission notice shall be included in all 34 | # copies or substantial portions of the Software. 35 | # 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | # SOFTWARE. 43 | 44 | import numpy as np 45 | import tensorflow as tf 46 | 47 | import locale 48 | 49 | locale.setlocale(locale.LC_ALL, '') 50 | 51 | _params = {} 52 | _param_aliases = {} 53 | def param(name, *args, **kwargs): 54 | """ 55 | A wrapper for `tf.Variable` which enables parameter sharing in models. 56 | 57 | Creates and returns theano shared variables similarly to `tf.Variable`, 58 | except if you try to create a param with the same name as a 59 | previously-created one, `param(...)` will just return the old one instead of 60 | making a new one. 61 | 62 | This constructor also adds a `param` attribute to the shared variables it 63 | creates, so that you can easily search a graph for all params. 64 | """ 65 | 66 | if name not in _params: 67 | kwargs['name'] = name 68 | param = tf.Variable(*args, **kwargs) 69 | param.param = True 70 | _params[name] = param 71 | result = _params[name] 72 | i = 0 73 | while result in _param_aliases: 74 | # print 'following alias {}: {} to {}'.format(i, result, _param_aliases[result]) 75 | i += 1 76 | result = _param_aliases[result] 77 | return result 78 | 79 | def params_with_name(name): 80 | return [p for n,p in _params.items() if name in n] 81 | 82 | def delete_all_params(): 83 | _params.clear() 84 | 85 | def alias_params(replace_dict): 86 | for old,new in replace_dict.items(): 87 | # print "aliasing {} to {}".format(old,new) 88 | _param_aliases[old] = new 89 | 90 | def delete_param_aliases(): 91 | _param_aliases.clear() 92 | 93 | # def search(node, critereon): 94 | # """ 95 | # Traverse the Theano graph starting at `node` and return a list of all nodes 96 | # which match the `critereon` function. When optimizing a cost function, you 97 | # can use this to get a list of all of the trainable params in the graph, like 98 | # so: 99 | 100 | # `lib.search(cost, lambda x: hasattr(x, "param"))` 101 | # """ 102 | 103 | # def _search(node, critereon, visited): 104 | # if node in visited: 105 | # return [] 106 | # visited.add(node) 107 | 108 | # results = [] 109 | # if isinstance(node, T.Apply): 110 | # for inp in node.inputs: 111 | # results += _search(inp, critereon, visited) 112 | # else: # Variable node 113 | # if critereon(node): 114 | # results.append(node) 115 | # if node.owner is not None: 116 | # results += _search(node.owner, critereon, visited) 117 | # return results 118 | 119 | # return _search(node, critereon, set()) 120 | 121 | # def print_params_info(params): 122 | # """Print information about the parameters in the given param set.""" 123 | 124 | # params = sorted(params, key=lambda p: p.name) 125 | # values = [p.get_value(borrow=True) for p in params] 126 | # shapes = [p.shape for p in values] 127 | # print "Params for cost:" 128 | # for param, value, shape in zip(params, values, shapes): 129 | # print "\t{0} ({1})".format( 130 | # param.name, 131 | # ",".join([str(x) for x in shape]) 132 | # ) 133 | 134 | # total_param_count = 0 135 | # for shape in shapes: 136 | # param_count = 1 137 | # for dim in shape: 138 | # param_count *= dim 139 | # total_param_count += param_count 140 | # print "Total parameter count: {0}".format( 141 | # locale.format("%d", total_param_count, grouping=True) 142 | # ) 143 | 144 | def print_model_settings(locals_): 145 | print "Uppercase local vars:" 146 | all_vars = [(k,v) for (k,v) in locals_.items() if (k.isupper() and k!='T' and k!='SETTINGS' and k!='ALL_SETTINGS')] 147 | all_vars = sorted(all_vars, key=lambda x: x[0]) 148 | for var_name, var_value in all_vars: 149 | print "\t{}: {}".format(var_name, var_value) 150 | 151 | 152 | def print_model_settings_dict(settings): 153 | print "Settings dict:" 154 | all_vars = [(k,v) for (k,v) in settings.items()] 155 | all_vars = sorted(all_vars, key=lambda x: x[0]) 156 | for var_name, var_value in all_vars: 157 | print "\t{}: {}".format(var_name, var_value) -------------------------------------------------------------------------------- /wgan/tflib/ops/conv2d.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below). 4 | # 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | # MIT License 23 | # 24 | # Copyright (c) 2017 Ishaan Gulrajani 25 | # 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy 27 | # of this software and associated documentation files (the "Software"), to deal 28 | # in the Software without restriction, including without limitation the rights 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | # copies of the Software, and to permit persons to whom the Software is 31 | # furnished to do so, subject to the following conditions: 32 | # 33 | # The above copyright notice and this permission notice shall be included in all 34 | # copies or substantial portions of the Software. 35 | # 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | # SOFTWARE. 43 | 44 | import tflib as lib 45 | 46 | import numpy as np 47 | import tensorflow as tf 48 | 49 | _default_weightnorm = False 50 | def enable_default_weightnorm(): 51 | global _default_weightnorm 52 | _default_weightnorm = True 53 | 54 | _weights_stdev = None 55 | def set_weights_stdev(weights_stdev): 56 | global _weights_stdev 57 | _weights_stdev = weights_stdev 58 | 59 | def unset_weights_stdev(): 60 | global _weights_stdev 61 | _weights_stdev = None 62 | 63 | def Conv2D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True, gain=1.): 64 | """ 65 | inputs: tensor of shape (batch size, num channels, height, width) 66 | mask_type: one of None, 'a', 'b' 67 | 68 | returns: tensor of shape (batch size, num channels, height, width) 69 | """ 70 | with tf.name_scope(name) as scope: 71 | 72 | if mask_type is not None: 73 | mask_type, mask_n_channels = mask_type 74 | 75 | mask = np.ones( 76 | (filter_size, filter_size, input_dim, output_dim), 77 | dtype='float32' 78 | ) 79 | center = filter_size // 2 80 | 81 | # Mask out future locations 82 | # filter shape is (height, width, input channels, output channels) 83 | mask[center+1:, :, :, :] = 0. 84 | mask[center, center+1:, :, :] = 0. 85 | 86 | # Mask out future channels 87 | for i in xrange(mask_n_channels): 88 | for j in xrange(mask_n_channels): 89 | if (mask_type=='a' and i >= j) or (mask_type=='b' and i > j): 90 | mask[ 91 | center, 92 | center, 93 | i::mask_n_channels, 94 | j::mask_n_channels 95 | ] = 0. 96 | 97 | 98 | def uniform(stdev, size): 99 | return np.random.uniform( 100 | low=-stdev * np.sqrt(3), 101 | high=stdev * np.sqrt(3), 102 | size=size 103 | ).astype('float32') 104 | 105 | fan_in = input_dim * filter_size**2 106 | fan_out = output_dim * filter_size**2 / (stride**2) 107 | 108 | if mask_type is not None: # only approximately correct 109 | fan_in /= 2. 110 | fan_out /= 2. 111 | 112 | if he_init: 113 | filters_stdev = np.sqrt(4./(fan_in+fan_out)) 114 | else: # Normalized init (Glorot & Bengio) 115 | filters_stdev = np.sqrt(2./(fan_in+fan_out)) 116 | 117 | if _weights_stdev is not None: 118 | filter_values = uniform( 119 | _weights_stdev, 120 | (filter_size, filter_size, input_dim, output_dim) 121 | ) 122 | else: 123 | filter_values = uniform( 124 | filters_stdev, 125 | (filter_size, filter_size, input_dim, output_dim) 126 | ) 127 | 128 | # print "WARNING IGNORING GAIN" 129 | filter_values *= gain 130 | 131 | filters = lib.param(name+'.Filters', filter_values) 132 | 133 | if weightnorm==None: 134 | weightnorm = _default_weightnorm 135 | if weightnorm: 136 | norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1,2))) 137 | target_norms = lib.param( 138 | name + '.g', 139 | norm_values 140 | ) 141 | with tf.name_scope('weightnorm') as scope: 142 | norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1,2])) 143 | filters = filters * (target_norms / norms) 144 | 145 | if mask_type is not None: 146 | with tf.name_scope('filter_mask'): 147 | filters = filters * mask 148 | 149 | result = tf.nn.conv2d( 150 | input=inputs, 151 | filter=filters, 152 | strides=[1, 1, stride, stride], 153 | padding='SAME', 154 | data_format='NCHW' 155 | ) 156 | 157 | if biases: 158 | _biases = lib.param( 159 | name+'.Biases', 160 | np.zeros(output_dim, dtype='float32') 161 | ) 162 | 163 | result = tf.nn.bias_add(result, _biases, data_format='NCHW') 164 | 165 | 166 | return result 167 | -------------------------------------------------------------------------------- /eval_kitti/matrix.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of wganvo. 3 | * This file belongs to the authors of KITTI (http://www.cvlibs.net/datasets/kitti/eval_odometry.php) 4 | * (see original license below) 5 | * 6 | * Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 7 | * For more information see 8 | * 9 | * wganvo is free software: you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation, either version 3 of the License, or 12 | * (at your option) any later version. 13 | * 14 | * wganvo is distributed in the hope that it will be useful, 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * GNU General Public License for more details. 18 | * 19 | * You should have received a copy of the GNU General Public License 20 | * along with wganvo. If not, see . 21 | */ 22 | /* 23 | Copyright 2011. All rights reserved. 24 | Institute of Measurement and Control Systems 25 | Karlsruhe Institute of Technology, Germany 26 | 27 | This file is part of libviso2. 28 | Authors: Andreas Geiger 29 | 30 | libviso2 is free software; you can redistribute it and/or modify it under the 31 | terms of the GNU General Public License as published by the Free Software 32 | Foundation; either version 2 of the License, or any later version. 33 | 34 | libviso2 is distributed in the hope that it will be useful, but WITHOUT ANY 35 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 36 | PARTICULAR PURPOSE. See the GNU General Public License for more details. 37 | 38 | You should have received a copy of the GNU General Public License along with 39 | libviso2; if not, write to the Free Software Foundation, Inc., 51 Franklin 40 | Street, Fifth Floor, Boston, MA 02110-1301, USA 41 | */ 42 | 43 | #ifndef MATRIX_H 44 | #define MATRIX_H 45 | 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | 52 | #ifndef _MSC_VER 53 | #include 54 | #else 55 | typedef __int8 int8_t; 56 | typedef __int16 int16_t; 57 | typedef __int32 int32_t; 58 | typedef __int64 int64_t; 59 | typedef unsigned __int8 uint8_t; 60 | typedef unsigned __int16 uint16_t; 61 | typedef unsigned __int32 uint32_t; 62 | typedef unsigned __int64 uint64_t; 63 | #endif 64 | 65 | #define endll endl << endl // double end line definition 66 | 67 | typedef double FLOAT; // double precision 68 | //typedef float FLOAT; // single precision 69 | 70 | class Matrix { 71 | 72 | public: 73 | 74 | // constructor / deconstructor 75 | Matrix (); // init empty 0x0 matrix 76 | Matrix (const int32_t m,const int32_t n); // init empty mxn matrix 77 | Matrix (const int32_t m,const int32_t n,const FLOAT* val_); // init mxn matrix with values from array 'val' 78 | Matrix (const Matrix &M); // creates deepcopy of M 79 | ~Matrix (); 80 | 81 | // assignment operator, copies contents of M 82 | Matrix& operator= (const Matrix &M); 83 | 84 | // copies submatrix of M into array 'val', default values copy whole row/column/matrix 85 | void getData(FLOAT* val_,int32_t i1=0,int32_t j1=0,int32_t i2=-1,int32_t j2=-1); 86 | 87 | // set or get submatrices of current matrix 88 | Matrix getMat(int32_t i1,int32_t j1,int32_t i2=-1,int32_t j2=-1); 89 | void setMat(const Matrix &M,const int32_t i,const int32_t j); 90 | 91 | // set sub-matrix to scalar (default 0), -1 as end replaces whole row/column/matrix 92 | void setVal(FLOAT s,int32_t i1=0,int32_t j1=0,int32_t i2=-1,int32_t j2=-1); 93 | 94 | // set (part of) diagonal to scalar, -1 as end replaces whole diagonal 95 | void setDiag(FLOAT s,int32_t i1=0,int32_t i2=-1); 96 | 97 | // clear matrix 98 | void zero(); 99 | 100 | // extract columns with given index 101 | Matrix extractCols (std::vector idx); 102 | 103 | // create identity matrix 104 | static Matrix eye (const int32_t m); 105 | void eye (); 106 | 107 | // create diagonal matrix with nx1 or 1xn matrix M as elements 108 | static Matrix diag(const Matrix &M); 109 | 110 | // returns the m-by-n matrix whose elements are taken column-wise from M 111 | static Matrix reshape(const Matrix &M,int32_t m,int32_t n); 112 | 113 | // create 3x3 rotation matrices (convention: http://en.wikipedia.org/wiki/Rotation_matrix) 114 | static Matrix rotMatX(const FLOAT &angle); 115 | static Matrix rotMatY(const FLOAT &angle); 116 | static Matrix rotMatZ(const FLOAT &angle); 117 | 118 | // simple arithmetic operations 119 | Matrix operator+ (const Matrix &M); // add matrix 120 | Matrix operator- (const Matrix &M); // subtract matrix 121 | Matrix operator* (const Matrix &M); // multiply with matrix 122 | Matrix operator* (const FLOAT &s); // multiply with scalar 123 | Matrix operator/ (const Matrix &M); // divide elementwise by matrix (or vector) 124 | Matrix operator/ (const FLOAT &s); // divide by scalar 125 | Matrix operator- (); // negative matrix 126 | Matrix operator~ (); // transpose 127 | FLOAT l2norm (); // euclidean norm (vectors) / frobenius norm (matrices) 128 | FLOAT mean (); // mean of all elements in matrix 129 | 130 | // complex arithmetic operations 131 | static Matrix cross (const Matrix &a, const Matrix &b); // cross product of two vectors 132 | static Matrix inv (const Matrix &M); // invert matrix M 133 | bool inv (); // invert this matrix 134 | FLOAT det (); // returns determinant of matrix 135 | bool solve (const Matrix &M,FLOAT eps=1e-20); // solve linear system M*x=B, replaces *this and M 136 | bool lu(int32_t *idx, FLOAT &d, FLOAT eps=1e-20); // replace *this by lower upper decomposition 137 | void svd(Matrix &U,Matrix &W,Matrix &V); // singular value decomposition *this = U*diag(W)*V^T 138 | 139 | // print matrix to stream 140 | friend std::ostream& operator<< (std::ostream& out,const Matrix& M); 141 | 142 | // direct data access 143 | FLOAT **val; 144 | int32_t m,n; 145 | 146 | private: 147 | 148 | void allocateMemory (const int32_t m_,const int32_t n_); 149 | void releaseMemory (); 150 | inline FLOAT pythag(FLOAT a,FLOAT b); 151 | 152 | }; 153 | 154 | #endif // MATRIX_H 155 | -------------------------------------------------------------------------------- /lie_algebra.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF8 -*- 2 | # 3 | # This file is part of wganvo. 4 | # This file is based on a file from evo (github.com/MichaelGrupp/evo) (see original license below) 5 | # 6 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 7 | # For more information see 8 | # 9 | # wganvo is free software: you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation, either version 3 of the License, or 12 | # (at your option) any later version. 13 | # 14 | # wganvo is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU General Public License 20 | # along with wganvo. If not, see . 21 | 22 | # Provides functions for Lie group calculations. 23 | # author: Michael Grupp 24 | # 25 | # This file is part of evo (github.com/MichaelGrupp/evo). 26 | # 27 | # evo is free software: you can redistribute it and/or modify 28 | # it under the terms of the GNU General Public License as published by 29 | # the Free Software Foundation, either version 3 of the License, or 30 | # (at your option) any later version. 31 | # 32 | # evo is distributed in the hope that it will be useful, 33 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 34 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 35 | # GNU General Public License for more details. 36 | # 37 | # You should have received a copy of the GNU General Public License 38 | # along with evo. If not, see . 39 | 40 | import numpy as np 41 | import scipy.linalg as sl 42 | 43 | import transformations as tr 44 | 45 | 46 | class LieAlgebraException(Exception): 47 | pass 48 | 49 | 50 | def hat(v): 51 | """ 52 | :param v: 3x1 vector 53 | :return: 3x3 skew symmetric matrix 54 | """ 55 | return np.array([[0.0, -v[2], v[1]], 56 | [v[2], 0.0, -v[0]], 57 | [-v[1], v[0], 0.0]]) 58 | 59 | 60 | def vee(m): 61 | """ 62 | :param m: 3x3 skew symmetric matrix 63 | :return: 3x1 vector 64 | """ 65 | return np.array([-m[1, 2], m[0, 2], -m[0, 1]]) 66 | 67 | 68 | def so3_exp(axis, angle): 69 | """ 70 | Computes an SO(3) matrix from an axis/angle representation. 71 | Code source: http://stackoverflow.com/a/25709323 72 | :param axis: 3x1 rotation axis (unit vector!) 73 | :param angle: radians 74 | :return: SO(3) rotation matrix (matrix exponential of so(3)) 75 | """ 76 | return sl.expm(np.cross(np.eye(3), axis / np.linalg.norm(axis) * angle)) 77 | 78 | 79 | def so3_log(r, return_angle_only=True, return_skew=False): 80 | """ 81 | :param r: SO(3) rotation matrix 82 | :param return_angle_only: return only the angle (default) 83 | :param return_skew: return skew symmetric Lie algebra element 84 | :return: axis/angle 85 | or if skew: 86 | 3x3 skew symmetric logarithmic map in so(3) (Ma, Soatto eq. 2.8) 87 | """ 88 | if not is_so3(r): 89 | raise LieAlgebraException("matrix is not a valid SO(3) group element") 90 | if return_angle_only and not return_skew: 91 | return np.arccos(min(1, max(-1, (np.trace(r) - 1) / 2))) 92 | angle, axis, _ = tr.rotation_from_matrix(se3(r, [0, 0, 0])) 93 | if return_skew: 94 | return hat(axis * angle) 95 | else: 96 | return axis, angle 97 | 98 | 99 | def se3(r=np.eye(3), t=np.array([0, 0, 0])): 100 | """ 101 | :param r: SO(3) rotation matrix 102 | :param t: 3x1 translation vector 103 | :return: SE(3) transformation matrix 104 | """ 105 | se3 = np.eye(4) 106 | se3[:3, :3] = r 107 | se3[:3, 3] = t 108 | return se3 109 | 110 | 111 | def sim3(r, t, s): 112 | """ 113 | :param r: SO(3) rotation matrix 114 | :param t: 3x1 translation vector 115 | :param s: positive, non-zero scale factor 116 | :return: Sim(3) similarity transformation matrix 117 | """ 118 | sim3 = np.eye(4) 119 | sim3[:3, :3] = s * r 120 | sim3[:3, 3] = t 121 | return sim3 122 | 123 | 124 | def so3_from_se3(p): 125 | """ 126 | :param p: absolute SE(3) pose 127 | :return: the SO(3) rotation matrix in p 128 | """ 129 | return p[:3, :3] 130 | 131 | 132 | def se3_inverse(p): 133 | """ 134 | :param p: absolute SE(3) pose 135 | :return: the inverted pose 136 | """ 137 | r_inv = p[:3, :3].transpose() 138 | t_inv = -r_inv.dot(p[:3, 3]) 139 | return se3(r_inv, t_inv) 140 | 141 | 142 | def is_so3(r): 143 | """ 144 | :param r: a 3x3 matrix 145 | :return: True if r is in the SO(3) group 146 | """ 147 | # Check the determinant. 148 | det_valid = np.isclose(np.linalg.det(r), [1.0], atol=1e-6) 149 | # Check if the transpose is the inverse. 150 | inv_valid = np.allclose(r.transpose().dot(r), np.eye(3), atol=1e-6) 151 | return det_valid and inv_valid 152 | 153 | 154 | def is_se3(p): 155 | """ 156 | :param p: a 4x4 matrix 157 | :return: True if p is in the SE(3) group 158 | """ 159 | rot_valid = is_so3(p[:3, :3]) 160 | lower_valid = np.equal(p[3, :], np.array([0.0, 0.0, 0.0, 1.0])).all() 161 | return rot_valid and lower_valid 162 | 163 | 164 | def is_sim3(p, s): 165 | """ 166 | :param p: a 4x4 matrix 167 | :param s: expected scale factor 168 | :return: True if p is in the Sim(3) group with scale s 169 | """ 170 | rot = p[:3, :3] 171 | rot_unscaled = np.multiply(rot, 1.0 / s) 172 | rot_valid = is_so3(rot_unscaled) 173 | lower_valid = np.equal(p[3, :], np.array([0.0, 0.0, 0.0, 1.0])).all() 174 | return rot_valid and lower_valid 175 | 176 | 177 | def relative_so3(r1, r2): 178 | """ 179 | :param r1, r2: SO(3) matrices 180 | :return: the relative rotation r1^{⁻1} * r2 181 | """ 182 | return np.dot(r1.transpose(), r2) 183 | 184 | 185 | def relative_se3(p1, p2): 186 | """ 187 | :param p1, p2: SE(3) matrices 188 | :return: the relative transformation p1^{⁻1} * p2 189 | """ 190 | return np.dot(se3_inverse(p1), p2) 191 | 192 | 193 | def random_so3(): 194 | """ 195 | :return: a random SO(3) matrix (for debugging) 196 | """ 197 | return tr.random_rotation_matrix()[:3, :3] 198 | 199 | 200 | def random_se3(): 201 | """ 202 | :return: a random SE(3) matrix (for debugging) 203 | """ 204 | r = random_so3() 205 | t = tr.random_vector(3) 206 | return se3(r, t) 207 | -------------------------------------------------------------------------------- /wgan/tflib/ops/batchnorm.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below). 4 | # 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | # MIT License 23 | # 24 | # Copyright (c) 2017 Ishaan Gulrajani 25 | # 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy 27 | # of this software and associated documentation files (the "Software"), to deal 28 | # in the Software without restriction, including without limitation the rights 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | # copies of the Software, and to permit persons to whom the Software is 31 | # furnished to do so, subject to the following conditions: 32 | # 33 | # The above copyright notice and this permission notice shall be included in all 34 | # copies or substantial portions of the Software. 35 | # 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | # SOFTWARE. 43 | 44 | import tflib as lib 45 | 46 | import numpy as np 47 | import tensorflow as tf 48 | 49 | def Batchnorm(name, axes, inputs, is_training=None, stats_iter=None, update_moving_stats=True, fused=True): 50 | if ((axes == [0,2,3]) or (axes == [0,2])) and fused==True: 51 | if axes==[0,2]: 52 | inputs = tf.expand_dims(inputs, 3) 53 | # Old (working but pretty slow) implementation: 54 | ########## 55 | 56 | # inputs = tf.transpose(inputs, [0,2,3,1]) 57 | 58 | # mean, var = tf.nn.moments(inputs, [0,1,2], keep_dims=False) 59 | # offset = lib.param(name+'.offset', np.zeros(mean.get_shape()[-1], dtype='float32')) 60 | # scale = lib.param(name+'.scale', np.ones(var.get_shape()[-1], dtype='float32')) 61 | # result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-4) 62 | 63 | # return tf.transpose(result, [0,3,1,2]) 64 | 65 | # New (super fast but untested) implementation: 66 | offset = lib.param(name+'.offset', np.zeros(inputs.get_shape()[1], dtype='float32')) 67 | scale = lib.param(name+'.scale', np.ones(inputs.get_shape()[1], dtype='float32')) 68 | 69 | moving_mean = lib.param(name+'.moving_mean', np.zeros(inputs.get_shape()[1], dtype='float32'), trainable=False) 70 | moving_variance = lib.param(name+'.moving_variance', np.ones(inputs.get_shape()[1], dtype='float32'), trainable=False) 71 | 72 | def _fused_batch_norm_training(): 73 | return tf.nn.fused_batch_norm(inputs, scale, offset, epsilon=1e-5, data_format='NCHW') 74 | def _fused_batch_norm_inference(): 75 | # Version which blends in the current item's statistics 76 | batch_size = tf.cast(tf.shape(inputs)[0], 'float32') 77 | mean, var = tf.nn.moments(inputs, [2,3], keep_dims=True) 78 | mean = ((1./batch_size)*mean) + (((batch_size-1.)/batch_size)*moving_mean)[None,:,None,None] 79 | var = ((1./batch_size)*var) + (((batch_size-1.)/batch_size)*moving_variance)[None,:,None,None] 80 | return tf.nn.batch_normalization(inputs, mean, var, offset[None,:,None,None], scale[None,:,None,None], 1e-5), mean, var 81 | 82 | # Standard version 83 | # return tf.nn.fused_batch_norm( 84 | # inputs, 85 | # scale, 86 | # offset, 87 | # epsilon=1e-2, 88 | # mean=moving_mean, 89 | # variance=moving_variance, 90 | # is_training=False, 91 | # data_format='NCHW' 92 | # ) 93 | 94 | if is_training is None: 95 | outputs, batch_mean, batch_var = _fused_batch_norm_training() 96 | else: 97 | outputs, batch_mean, batch_var = tf.cond(is_training, 98 | _fused_batch_norm_training, 99 | _fused_batch_norm_inference) 100 | if update_moving_stats: 101 | no_updates = lambda: outputs 102 | def _force_updates(): 103 | """Internal function forces updates moving_vars if is_training.""" 104 | float_stats_iter = tf.cast(stats_iter, tf.float32) 105 | 106 | update_moving_mean = tf.assign(moving_mean, ((float_stats_iter/(float_stats_iter+1))*moving_mean) + ((1/(float_stats_iter+1))*batch_mean)) 107 | update_moving_variance = tf.assign(moving_variance, ((float_stats_iter/(float_stats_iter+1))*moving_variance) + ((1/(float_stats_iter+1))*batch_var)) 108 | 109 | with tf.control_dependencies([update_moving_mean, update_moving_variance]): 110 | return tf.identity(outputs) 111 | outputs = tf.cond(is_training, _force_updates, no_updates) 112 | 113 | if axes == [0,2]: 114 | return outputs[:,:,:,0] # collapse last dim 115 | else: 116 | return outputs 117 | else: 118 | # raise Exception('old BN') 119 | # TODO we can probably use nn.fused_batch_norm here too for speedup 120 | mean, var = tf.nn.moments(inputs, axes, keep_dims=True) 121 | shape = mean.get_shape().as_list() 122 | if 0 not in axes: 123 | print "WARNING ({}): didn't find 0 in axes, but not using separate BN params for each item in batch".format(name) 124 | shape[0] = 1 125 | offset = lib.param(name+'.offset', np.zeros(shape, dtype='float32')) 126 | scale = lib.param(name+'.scale', np.ones(shape, dtype='float32')) 127 | result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-5) 128 | 129 | 130 | return result 131 | -------------------------------------------------------------------------------- /transform.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/ori-mrg/robotcar-dataset-sdk 4 | # (see original license below) 5 | # 6 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 7 | # For more information see 8 | # 9 | # This file is licensed under the Creative Commons 10 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 11 | # To view a copy of this license, visit 12 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 13 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 14 | # 15 | 16 | ################################################################################ 17 | # 18 | # Copyright (c) 2017 University of Oxford 19 | # Authors: 20 | # Geoff Pascoe (gmp@robots.ox.ac.uk) 21 | # 22 | # This work is licensed under the Creative Commons 23 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 24 | # To view a copy of this license, visit 25 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 26 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 27 | # 28 | ################################################################################ 29 | 30 | import numpy as np 31 | import numpy.matlib as matlib 32 | from math import sin, cos, atan2, sqrt 33 | 34 | MATRIX_MATCH_TOLERANCE = 1e-4 35 | 36 | def build_intrinsic_matrix(focal_length, principal_point): 37 | intrinsic_matrix = matlib.identity(3) 38 | intrinsic_matrix[0:2, 2] = np.matrix(principal_point).transpose() 39 | intrinsic_matrix[0,0] = focal_length[0] 40 | intrinsic_matrix[1,1] = focal_length[1] 41 | return intrinsic_matrix 42 | 43 | 44 | def build_se3_transform(xyzrpy): 45 | """Creates an SE3 transform from translation and Euler angles. 46 | 47 | Args: 48 | xyzrpy (list[float]): translation and Euler angles for transform. Must have six components. 49 | 50 | Returns: 51 | numpy.matrixlib.defmatrix.matrix: SE3 homogeneous transformation matrix 52 | 53 | Raises: 54 | ValueError: if `len(xyzrpy) != 6` 55 | 56 | """ 57 | if len(xyzrpy) != 6: 58 | raise ValueError("Must supply 6 values to build transform") 59 | 60 | se3 = matlib.identity(4) 61 | se3[0:3, 0:3] = euler_to_so3(xyzrpy[3:6]) 62 | se3[0:3, 3] = np.matrix(xyzrpy[0:3]).transpose() 63 | return se3 64 | 65 | 66 | def euler_to_so3(rpy): 67 | """Converts Euler angles to an SO3 rotation matrix. 68 | 69 | Args: 70 | rpy (list[float]): Euler angles (in radians). Must have three components. 71 | 72 | Returns: 73 | numpy.matrixlib.defmatrix.matrix: 3x3 SO3 rotation matrix 74 | 75 | Raises: 76 | ValueError: if `len(rpy) != 3`. 77 | 78 | """ 79 | if len(rpy) != 3: 80 | raise ValueError("Euler angles must have three components") 81 | 82 | R_x = np.matrix([[1, 0, 0], 83 | [0, cos(rpy[0]), -sin(rpy[0])], 84 | [0, sin(rpy[0]), cos(rpy[0])]]) 85 | R_y = np.matrix([[cos(rpy[1]), 0, sin(rpy[1])], 86 | [0, 1, 0], 87 | [-sin(rpy[1]), 0, cos(rpy[1])]]) 88 | R_z = np.matrix([[cos(rpy[2]), -sin(rpy[2]), 0], 89 | [sin(rpy[2]), cos(rpy[2]), 0], 90 | [0, 0, 1]]) 91 | R_zyx = R_z * R_y * R_x 92 | return R_zyx 93 | 94 | 95 | def so3_to_euler(so3): 96 | """Converts an SO3 rotation matrix to Euler angles 97 | 98 | Args: 99 | so3: 3x3 rotation matrix 100 | 101 | Returns: 102 | numpy.matrixlib.defmatrix.matrix: list of Euler angles (size 3) 103 | 104 | Raises: 105 | ValueError: if so3 is not 3x3 106 | ValueError: if a valid Euler parametrisation cannot be found 107 | 108 | """ 109 | if so3.shape != (3, 3): 110 | raise ValueError("SO3 matrix must be 3x3") 111 | roll = atan2(so3[2, 1], so3[2, 2]) 112 | yaw = atan2(so3[1, 0], so3[0, 0]) 113 | denom = sqrt(so3[0, 0] ** 2 + so3[1, 0] ** 2) 114 | pitch_poss = [atan2(-so3[2, 0], denom), atan2(-so3[2, 0], -denom)] 115 | 116 | R = euler_to_so3((roll, pitch_poss[0], yaw)) 117 | 118 | if (so3 - R).sum() < MATRIX_MATCH_TOLERANCE: 119 | return np.matrix([roll, pitch_poss[0], yaw]) 120 | else: 121 | R = euler_to_so3((roll, pitch_poss[1], yaw)) 122 | if (so3 - R).sum() > MATRIX_MATCH_TOLERANCE: 123 | raise ValueError("Could not find valid pitch angle") 124 | return np.matrix([roll, pitch_poss[1], yaw]) 125 | 126 | 127 | def so3_to_quaternion(so3): 128 | """Converts an SO3 rotation matrix to a quaternion 129 | 130 | Args: 131 | so3: 3x3 rotation matrix 132 | 133 | Returns: 134 | numpy.ndarray: quaternion [w, x, y, z] 135 | 136 | Raises: 137 | ValueError: if so3 is not 3x3 138 | """ 139 | if so3.shape != (3, 3): 140 | raise ValueError("SO3 matrix must be 3x3") 141 | 142 | R_xx = so3[0, 0] 143 | R_xy = so3[0, 1] 144 | R_xz = so3[0, 2] 145 | R_yx = so3[1, 0] 146 | R_yy = so3[1, 1] 147 | R_yz = so3[1, 2] 148 | R_zx = so3[2, 0] 149 | R_zy = so3[2, 1] 150 | R_zz = so3[2, 2] 151 | 152 | try: 153 | w = sqrt(so3.trace() + 1) / 2 154 | except(ValueError): 155 | # w is non-real 156 | w = 0 157 | 158 | x = sqrt(1 + R_xx - R_yy - R_zz) / 2 159 | y = sqrt(1 + R_yy - R_xx - R_zz) / 2 160 | z = sqrt(1 + R_zz - R_yy - R_xx) / 2 161 | 162 | max_index = max(range(4), key=[w, x, y, z].__getitem__) 163 | 164 | if max_index == 0: 165 | x = (R_zy - R_yz) / (4 * w) 166 | y = (R_xz - R_zx) / (4 * w) 167 | z = (R_yx - R_xy) / (4 * w) 168 | elif max_index == 1: 169 | w = (R_zy - R_yz) / (4 * x) 170 | y = (R_xy + R_yx) / (4 * x) 171 | z = (R_zx + R_xz) / (4 * x) 172 | elif max_index == 2: 173 | w = (R_xz - R_zx) / (4 * y) 174 | x = (R_xy + R_yx) / (4 * y) 175 | z = (R_yz + R_zy) / (4 * y) 176 | elif max_index == 3: 177 | w = (R_yx - R_xy) / (4 * z) 178 | x = (R_zx + R_xz) / (4 * z) 179 | y = (R_yz + R_zy) / (4 * z) 180 | 181 | return np.array([w, x, y, z]) 182 | 183 | 184 | def se3_to_components(se3): 185 | """Converts an SE3 rotation matrix to linear translation and Euler angles 186 | 187 | Args: 188 | se3: 4x4 transformation matrix 189 | 190 | Returns: 191 | numpy.matrixlib.defmatrix.matrix: list of [x, y, z, roll, pitch, yaw] 192 | 193 | Raises: 194 | ValueError: if se3 is not 4x4 195 | ValueError: if a valid Euler parametrisation cannot be found 196 | 197 | """ 198 | if se3.shape != (4, 4): 199 | raise ValueError("SE3 transform must be a 4x4 matrix") 200 | xyzrpy = np.empty(6) 201 | xyzrpy[0:3] = se3[0:3, 3].transpose() 202 | xyzrpy[3:6] = so3_to_euler(se3[0:3, 0:3]) 203 | return xyzrpy 204 | -------------------------------------------------------------------------------- /eval_kitti/readme.txt: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # THE KITTI VISION BENCHMARK SUITE: VISUAL ODOMETRY / SLAM BENCHMARK # 3 | # Andreas Geiger Philip Lenz Raquel Urtasun # 4 | # Karlsruhe Institute of Technology # 5 | # Toyota Technological Institute at Chicago # 6 | # www.cvlibs.net # 7 | ########################################################################### 8 | 9 | This file describes the KITTI visual odometry / SLAM benchmark package. 10 | Accurate ground truth (<10cm) is provided by a GPS/IMU system with RTK 11 | float/integer corrections enabled. In order to enable a fair comparison of 12 | all methods, only ground truth for the sequences 00-10 is made publicly 13 | available. The remaining sequences (11-21) serve as evaluation sequences. 14 | 15 | NOTE: WHEN SUBMITTING RESULTS, PLEASE STORE THEM IN THE SAME DATA FORMAT IN 16 | WHICH THE GROUND TRUTH DATA IS PROVIDED (SEE 'POSES' BELOW), USING THE 17 | FILE NAMES 11.txt TO 21.txt. CREATE A ZIP ARCHIVE OF THEM AND STORE YOUR 18 | RESULTS IN ITS ROOT FOLDER. 19 | 20 | File description: 21 | ================= 22 | 23 | Folder 'sequences': 24 | 25 | Each folder within the folder 'sequences' contains a single sequence, where 26 | the left and right images are stored in the sub-folders image_0 and 27 | image_1, respectively. The images are provided as greyscale PNG images and 28 | can be loaded with MATLAB or libpng++. All images have been undistorted and 29 | rectified. Sequences 0-10 can be used for training, while results must be 30 | provided for the test sequences 11-21. 31 | 32 | Additionally we provide the velodyne point clouds for point-cloud-based 33 | methods. To save space, all scans have been stored as Nx4 float matrix into 34 | a binary file using the following code: 35 | 36 | stream = fopen (dst_file.c_str(),"wb"); 37 | fwrite(data,sizeof(float),4*num,stream); 38 | fclose(stream); 39 | 40 | Here, data contains 4*num values, where the first 3 values correspond to 41 | x,y and z, and the last value is the reflectance information. All scans 42 | are stored row-aligned, meaning that the first 4 values correspond to the 43 | first measurement. Since each scan might potentially have a different 44 | number of points, this must be determined from the file size when reading 45 | the file, where 1e6 is a good enough upper bound on the number of values: 46 | 47 | // allocate 4 MB buffer (only ~130*4*4 KB are needed) 48 | int32_t num = 1000000; 49 | float *data = (float*)malloc(num*sizeof(float)); 50 | 51 | // pointers 52 | float *px = data+0; 53 | float *py = data+1; 54 | float *pz = data+2; 55 | float *pr = data+3; 56 | 57 | // load point cloud 58 | FILE *stream; 59 | stream = fopen (currFilenameBinary.c_str(),"rb"); 60 | num = fread(data,sizeof(float),num,stream)/4; 61 | for (int32_t i=0; i 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | # MIT License 23 | # 24 | # Copyright (c) 2017 Ishaan Gulrajani 25 | # 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy 27 | # of this software and associated documentation files (the "Software"), to deal 28 | # in the Software without restriction, including without limitation the rights 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | # copies of the Software, and to permit persons to whom the Software is 31 | # furnished to do so, subject to the following conditions: 32 | # 33 | # The above copyright notice and this permission notice shall be included in all 34 | # copies or substantial portions of the Software. 35 | # 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | # SOFTWARE. 43 | 44 | import tflib as lib 45 | 46 | import numpy as np 47 | import tensorflow as tf 48 | 49 | _default_weightnorm = False 50 | def enable_default_weightnorm(): 51 | global _default_weightnorm 52 | _default_weightnorm = True 53 | 54 | def disable_default_weightnorm(): 55 | global _default_weightnorm 56 | _default_weightnorm = False 57 | 58 | _weights_stdev = None 59 | def set_weights_stdev(weights_stdev): 60 | global _weights_stdev 61 | _weights_stdev = weights_stdev 62 | 63 | def unset_weights_stdev(): 64 | global _weights_stdev 65 | _weights_stdev = None 66 | 67 | def Linear( 68 | name, 69 | input_dim, 70 | output_dim, 71 | inputs, 72 | biases=True, 73 | initialization=None, 74 | weightnorm=None, 75 | gain=1. 76 | ): 77 | """ 78 | initialization: None, `lecun`, 'glorot', `he`, 'glorot_he', `orthogonal`, `("uniform", range)` 79 | """ 80 | with tf.name_scope(name) as scope: 81 | 82 | def uniform(stdev, size): 83 | if _weights_stdev is not None: 84 | stdev = _weights_stdev 85 | return np.random.uniform( 86 | low=-stdev * np.sqrt(3), 87 | high=stdev * np.sqrt(3), 88 | size=size 89 | ).astype('float32') 90 | 91 | if initialization == 'lecun':# and input_dim != output_dim): 92 | # disabling orth. init for now because it's too slow 93 | weight_values = uniform( 94 | np.sqrt(1./input_dim), 95 | (input_dim, output_dim) 96 | ) 97 | 98 | elif initialization == 'glorot' or (initialization == None): 99 | 100 | weight_values = uniform( 101 | np.sqrt(2./(input_dim+output_dim)), 102 | (input_dim, output_dim) 103 | ) 104 | 105 | elif initialization == 'he': 106 | 107 | weight_values = uniform( 108 | np.sqrt(2./input_dim), 109 | (input_dim, output_dim) 110 | ) 111 | 112 | elif initialization == 'glorot_he': 113 | 114 | weight_values = uniform( 115 | np.sqrt(4./(input_dim+output_dim)), 116 | (input_dim, output_dim) 117 | ) 118 | 119 | elif initialization == 'orthogonal' or \ 120 | (initialization == None and input_dim == output_dim): 121 | 122 | # From lasagne 123 | def sample(shape): 124 | if len(shape) < 2: 125 | raise RuntimeError("Only shapes of length 2 or more are " 126 | "supported.") 127 | flat_shape = (shape[0], np.prod(shape[1:])) 128 | # TODO: why normal and not uniform? 129 | a = np.random.normal(0.0, 1.0, flat_shape) 130 | u, _, v = np.linalg.svd(a, full_matrices=False) 131 | # pick the one with the correct shape 132 | q = u if u.shape == flat_shape else v 133 | q = q.reshape(shape) 134 | return q.astype('float32') 135 | weight_values = sample((input_dim, output_dim)) 136 | 137 | elif initialization[0] == 'uniform': 138 | 139 | weight_values = np.random.uniform( 140 | low=-initialization[1], 141 | high=initialization[1], 142 | size=(input_dim, output_dim) 143 | ).astype('float32') 144 | 145 | else: 146 | 147 | raise Exception('Invalid initialization!') 148 | 149 | weight_values *= gain 150 | 151 | weight = lib.param( 152 | name + '.W', 153 | weight_values 154 | ) 155 | 156 | if weightnorm==None: 157 | weightnorm = _default_weightnorm 158 | if weightnorm: 159 | norm_values = np.sqrt(np.sum(np.square(weight_values), axis=0)) 160 | # norm_values = np.linalg.norm(weight_values, axis=0) 161 | 162 | target_norms = lib.param( 163 | name + '.g', 164 | norm_values 165 | ) 166 | 167 | with tf.name_scope('weightnorm') as scope: 168 | norms = tf.sqrt(tf.reduce_sum(tf.square(weight), reduction_indices=[0])) 169 | weight = weight * (target_norms / norms) 170 | 171 | # if 'Discriminator' in name: 172 | # print "WARNING weight constraint on {}".format(name) 173 | # weight = tf.nn.softsign(10.*weight)*.1 174 | 175 | if inputs.get_shape().ndims == 2: 176 | result = tf.matmul(inputs, weight) 177 | else: 178 | reshaped_inputs = tf.reshape(inputs, [-1, input_dim]) 179 | result = tf.matmul(reshaped_inputs, weight) 180 | result = tf.reshape(result, tf.stack(tf.unstack(tf.shape(inputs))[:-1] + [output_dim])) 181 | 182 | if biases: 183 | result = tf.nn.bias_add( 184 | result, 185 | lib.param( 186 | name + '.b', 187 | np.zeros((output_dim,), dtype='float32') 188 | ) 189 | ) 190 | 191 | return result -------------------------------------------------------------------------------- /adapt_images.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 5 | # For more information see 6 | # 7 | # This file is licensed under the Creative Commons 8 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 9 | # To view a copy of this license, visit 10 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 11 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 12 | # 13 | 14 | import argparse 15 | import os 16 | import re 17 | import csv 18 | #from datetime import datetime as dt 19 | from image import load_image, crop_image, scale_image, savez_compressed 20 | from camera_model import CameraModel 21 | from transform import build_se3_transform, build_intrinsic_matrix 22 | from array_utils import save_as_list, list_to_array, save_txt 23 | import numpy as np 24 | 25 | 26 | def process_image(img, crop=None, scale=None): 27 | return resize_image(img, crop, scale) 28 | 29 | 30 | def resize_image(img, crop=None, scale=None): 31 | resolution = get_resolution(img) 32 | if crop: 33 | img = crop_image(img, crop[0], crop[1]) 34 | if scale: 35 | img = scale_image(img, scale[0], scale[1]) 36 | return img, resolution 37 | 38 | 39 | def get_resolution(img): 40 | resolution = [img.shape[1], img.shape[0]] 41 | return resolution 42 | 43 | 44 | def get_intrinsics_parameters(focal_length, principal_point, resolution, crop=None, scale=None): 45 | if crop: 46 | resolution = crop 47 | principal_point = [x / 2. for x in crop] ## FIXME ver este mismo metodo en adapt_images_kitti 48 | if scale: 49 | focal_length = [float(scale[i])/resolution[i] * focal_length[i] for i in range(len(focal_length))] 50 | principal_point = [x / 2. for x in scale] 51 | return focal_length, principal_point 52 | 53 | 54 | def main(): 55 | args = get_arguments() 56 | 57 | camera = re.search('(stereo|mono_(left|right|rear))', args.dir).group(0) 58 | 59 | timestamps_path = os.path.join(os.path.join(args.dir, os.pardir, camera + '.timestamps')) 60 | if not os.path.isfile(timestamps_path): 61 | timestamps_path = os.path.join(args.dir, os.pardir, os.pardir, camera + '.timestamps') 62 | if not os.path.isfile(timestamps_path): 63 | raise IOError("Could not find timestamps file") 64 | 65 | model = None 66 | if args.models_dir: 67 | model = CameraModel(args.models_dir, args.dir) 68 | 69 | output_dir = os.curdir 70 | if args.output_dir: 71 | output_dir = args.output_dir 72 | if not os.path.isdir(output_dir): 73 | raise IOError(output_dir + "is not an existing folder") 74 | 75 | result_list = [] 76 | count = 0 77 | dictionary = {} 78 | t_records = [] 79 | p_records = [] 80 | angles_records = [] 81 | intrinsic_matrix = None 82 | 83 | with open(args.poses_file) as vo_file: 84 | vo_reader = csv.reader(vo_file) 85 | headers = next(vo_file) 86 | for row in vo_reader: 87 | src_image_name = row[0] 88 | dst_image_name = row[1] 89 | src_image_filename = os.path.join(args.dir, src_image_name + '.png') 90 | dst_image_filename = os.path.join(args.dir, dst_image_name + '.png') 91 | if not os.path.isfile(src_image_filename) or not os.path.isfile(dst_image_filename): 92 | continue 93 | if dst_image_name not in dictionary: 94 | img, orig_resolution = process_image(load_image(dst_image_filename, model), args.crop, args.scale) 95 | dictionary[dst_image_name] = count 96 | count = count + 1 97 | result_list.append(list(img)) 98 | if src_image_name not in dictionary: 99 | img, orig_resolution = process_image(load_image(src_image_filename, model), args.crop, args.scale) 100 | dictionary[src_image_name] = count 101 | count = count + 1 102 | result_list.append(list(img)) 103 | 104 | focal_length, principal_point = get_intrinsics_parameters(model.get_focal_length(), model.get_principal_point(), orig_resolution, args.crop, args.scale) 105 | src_image_idx = dictionary[src_image_name] 106 | dst_image_idx = dictionary[dst_image_name] 107 | xyzrpy = [float(v) for v in row[2:8]] 108 | rel_pose = build_se3_transform(xyzrpy) 109 | t_matrix = rel_pose[0:3] # 3x4 matrix 110 | intrinsic_matrix = build_intrinsic_matrix(focal_length, principal_point) 111 | p_matrix = intrinsic_matrix * t_matrix 112 | t_records.append((t_matrix, src_image_idx, dst_image_idx)) 113 | p_records.append((p_matrix, src_image_idx, dst_image_idx)) 114 | angles_records.append((xyzrpy, src_image_idx, dst_image_idx)) 115 | 116 | transf = np.array(t_records, dtype=[('T',('float64',(3,4))),('src_idx', 'int32'),('dst_idx', 'int32')]) 117 | proy = np.array(p_records, dtype=[('P',('float64',(3,4))),('src_idx', 'int32'),('dst_idx', 'int32')]) 118 | angles = np.array(angles_records, dtype=[('ang',('float64',6)),('src_idx', 'int32'),('dst_idx', 'int32')]) 119 | # Solo lo guardo una vez porque es constante para todo el dataset (o deberia serlo) 120 | if intrinsic_matrix is not None: 121 | save_txt(os.path.join(output_dir,"intrinsic_matrix"), intrinsic_matrix) 122 | save_txt(os.path.join(output_dir,"intrinsic_parameters"), [focal_length, principal_point]) 123 | #path = os.path.normpath(args.dir) 124 | #folders = path.split(os.sep) 125 | #compressed_file_path = os.path.join(output_dir, folders[-3]) 126 | result = list_to_array(result_list) 127 | save_txt(os.path.join(output_dir, 'images_shape'), result.shape, fmt='%i') 128 | print(result.shape) 129 | compressed_file_path = os.path.join(output_dir, 'images') 130 | savez_compressed(compressed_file_path, result) 131 | savez_compressed(os.path.join(output_dir, 't'),transf) 132 | savez_compressed(os.path.join(output_dir, 'p'),proy) 133 | savez_compressed(os.path.join(output_dir, 'angles'),angles) 134 | 135 | 136 | def get_arguments(): 137 | parser = argparse.ArgumentParser(description='Play back images from a given directory') 138 | parser.add_argument('dir', type=str, help='Directory containing images.') 139 | parser.add_argument('poses_file', type=str, help='File containing VO poses') 140 | parser.add_argument('--models_dir', type=str, default=None, 141 | help='(optional) Directory containing camera model. If supplied, images will be undistorted before display') 142 | parser.add_argument('--crop', nargs=2, default=None, type=int, metavar=('WIDTH', 'HEIGHT'), 143 | help='(optional) If supplied, images will be cropped to WIDTH x HEIGHT') 144 | parser.add_argument('--scale', nargs=2, default=None, type=int, metavar=('WIDTH', 'HEIGHT'), 145 | help='(optional) If supplied, images will be scaled to WIDTH x HEIGHT') 146 | parser.add_argument('--output_dir', type=str, default=None, help='(optional) Output directory') 147 | # parser.add_argument('image_name', type=str, help='Image name.') 148 | args = parser.parse_args() 149 | return args 150 | 151 | 152 | if __name__ == "__main__": 153 | main() 154 | -------------------------------------------------------------------------------- /camera_model.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/ori-mrg/robotcar-dataset-sdk 4 | # (see original license below) 5 | # 6 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 7 | # For more information see 8 | # 9 | # This file is licensed under the Creative Commons 10 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 11 | # To view a copy of this license, visit 12 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 13 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 14 | # 15 | 16 | ################################################################################ 17 | # 18 | # Copyright (c) 2017 University of Oxford 19 | # Authors: 20 | # Geoff Pascoe (gmp@robots.ox.ac.uk) 21 | # 22 | # This work is licensed under the Creative Commons 23 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 24 | # To view a copy of this license, visit 25 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 26 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 27 | # 28 | ################################################################################ 29 | 30 | import re 31 | import os 32 | import numpy as np 33 | import scipy.interpolate as interp 34 | from scipy.ndimage import map_coordinates 35 | 36 | 37 | class CameraModel: 38 | """Provides intrinsic parameters and undistortion LUT for a camera. 39 | 40 | Attributes: 41 | camera (str): Name of the camera. 42 | camera sensor (str): Name of the sensor on the camera for multi-sensor cameras. 43 | focal_length (tuple[float]): Focal length of the camera in horizontal and vertical axis, in pixels. 44 | principal_point (tuple[float]): Principal point of camera for pinhole projection model, in pixels. 45 | G_camera_image (:obj: `numpy.matrixlib.defmatrix.matrix`): Transform from image frame to camera frame. 46 | bilinear_lut (:obj: `numpy.ndarray`): Look-up table for undistortion of images, mapping pixels in an undistorted 47 | image to pixels in the distorted image 48 | 49 | """ 50 | 51 | def __init__(self, models_dir, images_dir): 52 | """Loads a camera model from disk. 53 | 54 | Args: 55 | models_dir (str): directory containing camera model files. 56 | images_dir (str): directory containing images for which to read camera model. 57 | 58 | """ 59 | self.camera = None 60 | self.camera_sensor = None 61 | self.focal_length = None 62 | self.principal_point = None 63 | self.G_camera_image = None 64 | self.bilinear_lut = None 65 | 66 | self.__load_intrinsics(models_dir, images_dir) 67 | self.__load_lut(models_dir, images_dir) 68 | 69 | def project(self, xyz, image_size): 70 | """Projects a pointcloud into the camera using a pinhole camera model. 71 | 72 | Args: 73 | xyz (:obj: `numpy.ndarray`): 3xn array, where each column is (x, y, z) point relative to camera frame. 74 | image_size (tuple[int]): dimensions of image in pixels 75 | 76 | Returns: 77 | numpy.ndarray: 2xm array of points, where each column is the (u, v) pixel coordinates of a point in pixels. 78 | numpy.array: array of depth values for points in image. 79 | 80 | Note: 81 | Number of output points m will be less than or equal to number of input points n, as points that do not 82 | project into the image are discarded. 83 | 84 | """ 85 | if xyz.shape[0] == 3: 86 | xyz = np.stack((xyz, np.ones((1, xyz.shape[1])))) 87 | xyzw = np.linalg.solve(self.G_camera_image, xyz) 88 | 89 | # Find which points lie in front of the camera 90 | in_front = [i for i in range(0, xyzw.shape[1]) if xyzw[2, i] >= 0] 91 | xyzw = xyzw[:, in_front] 92 | 93 | uv = np.vstack((self.focal_length[0] * xyzw[0, :] / xyzw[2, :] + self.principal_point[0], 94 | self.focal_length[1] * xyzw[1, :] / xyzw[2, :] + self.principal_point[1])) 95 | 96 | in_img = [i for i in range(0, uv.shape[1]) 97 | if 0.5 <= uv[0, i] <= image_size[1] and 0.5 <= uv[1, i] <= image_size[0]] 98 | 99 | return uv[:, in_img], np.ravel(xyzw[2, in_img]) 100 | 101 | def undistort(self, image): 102 | """Undistorts an image. 103 | 104 | Args: 105 | image (:obj: `numpy.ndarray`): A distorted image. Must be demosaiced - ie. must be a 3-channel RGB image. 106 | 107 | Returns: 108 | numpy.ndarray: Undistorted version of image. 109 | 110 | Raises: 111 | ValueError: if image size does not match camera model. 112 | ValueError: if image only has a single channel. 113 | 114 | """ 115 | if image.shape[0] * image.shape[1] != self.bilinear_lut.shape[0]: 116 | raise ValueError('Incorrect image size for camera model') 117 | 118 | lut = self.bilinear_lut[:, 1::-1].T.reshape((2, image.shape[0], image.shape[1])) 119 | 120 | if len(image.shape) == 1: 121 | raise ValueError('Undistortion function only works with multi-channel images') 122 | 123 | undistorted = np.rollaxis(np.array([map_coordinates(image[:, :, channel], lut, order=1) 124 | for channel in range(0, image.shape[2])]), 0, 3) 125 | 126 | return undistorted.astype(image.dtype) 127 | 128 | def __get_model_name(self, images_dir): 129 | self.camera = re.search('(stereo|mono_(left|right|rear))', images_dir).group(0) 130 | if self.camera == 'stereo': 131 | self.camera_sensor = re.search('(left|centre|right)', images_dir).group(0) 132 | if self.camera_sensor == 'left': 133 | return 'stereo_wide_left' 134 | elif self.camera_sensor == 'right': 135 | return 'stereo_wide_right' 136 | elif self.camera_sensor == 'centre': 137 | return 'stereo_narrow_left' 138 | else: 139 | raise RuntimeError('Unknown camera model for given directory: ' + images_dir) 140 | else: 141 | return self.camera 142 | 143 | def __load_intrinsics(self, models_dir, images_dir): 144 | model_name = self.__get_model_name(images_dir) 145 | intrinsics_path = os.path.join(models_dir, model_name + '.txt') 146 | 147 | with open(intrinsics_path) as intrinsics_file: 148 | vals = [float(x) for x in next(intrinsics_file).split()] 149 | self.focal_length = (vals[0], vals[1]) 150 | self.principal_point = (vals[2], vals[3]) 151 | 152 | G_camera_image = [] 153 | for line in intrinsics_file: 154 | G_camera_image.append([float(x) for x in line.split()]) 155 | self.G_camera_image = np.array(G_camera_image) 156 | 157 | def __load_lut(self, models_dir, images_dir): 158 | model_name = self.__get_model_name(images_dir) 159 | lut_path = os.path.join(models_dir, model_name + '_distortion_lut.bin') 160 | 161 | lut = np.fromfile(lut_path, np.double) 162 | lut = lut.reshape([2, lut.size // 2]) 163 | self.bilinear_lut = lut.transpose() 164 | 165 | def get_focal_length(self): 166 | return self.focal_length 167 | def get_principal_point(self): 168 | return self.principal_point 169 | 170 | -------------------------------------------------------------------------------- /vgg_trainable/test/plotHelpers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of wganvo. 4 | # 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 6 | # For more information see 7 | # 8 | # wganvo is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # wganvo is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with wganvo. If not, see . 20 | # 21 | 22 | import numpy as np 23 | import matplotlib.pyplot as plt 24 | from mpl_toolkits.mplot3d import Axes3D 25 | 26 | 27 | ################################################################################ 28 | # PLOT HELPERS 29 | ################################################################################ 30 | 31 | def plotVsTime1(time, xs, title="", xlabel=None, ylabel=None): 32 | fig = plt.figure() 33 | ax = fig.add_subplot(111) 34 | 35 | p, = ax.plot(time, xs) 36 | 37 | ax.set_xlabel(xlabel) 38 | ax.set_ylabel(ylabel) 39 | 40 | plt.grid() 41 | 42 | fig.suptitle(title) 43 | 44 | 45 | def plotVsTime3(time, xs, ys, zs, title=""): 46 | fig, (ax_x, ax_y, ax_z) = plt.subplots(3, sharex=True, sharey=True) 47 | 48 | px, = ax_x.plot(time, xs) 49 | py, = ax_y.plot(time, ys) 50 | pz, = ax_z.plot(time, zs) 51 | 52 | # Fine-tune figure; make subplots close to each other and hide x ticks for 53 | # all but bottom plot. 54 | # fig.subplots_adjust(hspace=0) 55 | plt.setp([a.get_xticklabels() for a in fig.axes[:-1]], visible=False) 56 | 57 | ax_z.set_xlabel("time (s)") 58 | 59 | ax_x.grid(True) 60 | ax_y.grid(True) 61 | ax_z.grid(True) 62 | 63 | fig.suptitle(title) 64 | 65 | 66 | def plotVsTime4(time, xs, ys, zs, title=""): 67 | fig, (ax_x, ax_y, ax_z, ax_avg) = plt.subplots(4, sharex=True, sharey=True) 68 | 69 | px, = ax_x.plot(time, xs) 70 | py, = ax_y.plot(time, ys) 71 | pz, = ax_z.plot(time, zs) 72 | p_avg, = ax_avg.plot(time, np.linalg.norm(np.array([xs, ys, zs]), axis=0)) 73 | 74 | # Fine-tune figure; make subplots close to each other and hide x ticks for 75 | # all but bottom plot. 76 | fig.subplots_adjust(hspace=0) 77 | plt.setp([a.get_xticklabels() for a in fig.axes[:-1]], visible=False) 78 | 79 | ax_avg.set_xlabel("time (s)") 80 | 81 | plt.grid() 82 | 83 | fig.suptitle(title) 84 | 85 | fig.tight_layout() 86 | 87 | 88 | def plotPath3D(X, Y, Z, title): 89 | fig = plt.figure() 90 | 91 | ax = fig.add_subplot(111, projection='3d') 92 | 93 | ax.plot(X, Y, Z) 94 | 95 | # draw a point 96 | # ~ ax.scatter([0],[0],[0],color="black",s=1) 97 | 98 | # draw cube 99 | # ~ r = [-1, 1] 100 | # ~ for s, e in combinations(np.array(list(product(r,r,r))), 2): 101 | # ~ if np.sum(np.abs(s-e)) == r[1]-r[0]: 102 | # ~ ax.plot3D(*zip(s,e), color="black") 103 | 104 | # simulate equal aspect ratio 105 | # ~ """ 106 | max_range = np.array([X.max() - X.min(), Y.max() - Y.min(), Z.max() - Z.min()]).max() / 2.0 107 | mean_x = X.mean() 108 | mean_y = Y.mean() 109 | mean_z = Z.mean() 110 | ax.set_xlim(mean_x - max_range, mean_x + max_range) 111 | ax.set_ylim(mean_y - max_range, mean_y + max_range) 112 | ax.set_zlim(mean_z - max_range, mean_z + max_range) 113 | # ~ """ 114 | # Set axis labels 115 | 116 | xLabel = ax.set_xlabel('x') 117 | yLabel = ax.set_ylabel('y') 118 | zLabel = ax.set_zlabel('z') 119 | 120 | ax.set_title(title) 121 | 122 | 123 | def plotPaths3D(paths, labels, colors, title=None): 124 | """ 125 | """ 126 | 127 | assert (len(paths) == len(labels)) 128 | assert (len(paths) <= len(colors)) 129 | 130 | fig = plt.figure() 131 | 132 | ax = fig.add_subplot(111, projection='3d') 133 | 134 | for (X, Y, Z), label, color in zip(paths, labels, colors): 135 | ax.plot(X, Y, Z, label=label, color=color) 136 | 137 | # draw a point 138 | # ~ ax.scatter([0],[0],[0],color="black",s=1) 139 | 140 | # draw cube 141 | # ~ r = [-1, 1] 142 | # ~ for s, e in combinations(np.array(list(product(r,r,r))), 2): 143 | # ~ if np.sum(np.abs(s-e)) == r[1]-r[0]: 144 | # ~ ax.plot3D(*zip(s,e), color="black") 145 | 146 | # simulate equal aspect ratio 147 | # assume first path is ground truth 148 | # """ 149 | max_range = np.array([paths[0][0].max() - paths[0][0].min(), paths[0][1].max() - paths[0][1].min(), 150 | paths[0][2].max() - paths[0][2].min()]).max() / 2.0 151 | mean_x = paths[0][0].mean() 152 | mean_y = paths[0][1].mean() 153 | mean_z = paths[0][2].mean() 154 | ax.set_xlim(mean_x - max_range, mean_x + max_range) 155 | ax.set_ylim(mean_y - max_range, mean_y + max_range) 156 | ax.set_zlim(mean_z - max_range, mean_z + max_range) 157 | # """ 158 | # Set axis labels 159 | 160 | xLabel = ax.set_xlabel('x (m)') 161 | yLabel = ax.set_ylabel('y (m)') 162 | zLabel = ax.set_zlabel('z (m)') 163 | 164 | handles, labels = ax.get_legend_handles_labels() 165 | ax.legend(handles, labels) 166 | 167 | if title: 168 | ax.set_title(title) 169 | 170 | 171 | def plotPaths2D(paths, labels, colors, xlabel=None, ylabel=None, cloud_file=None, grid=None, save_filename=None): 172 | """ 173 | """ 174 | 175 | assert (len(paths) == len(labels)) 176 | assert (len(paths) <= len(colors)) 177 | 178 | # Plot the 2D trajectory (coordinate z = 0) 179 | fig = plt.figure() 180 | ax = fig.add_subplot(111) 181 | 182 | if (cloud_file): 183 | cloud = np.loadtxt(cloud_file) 184 | ax.scatter(cloud[:, 0], cloud[:, 1], s=1, color='0.1') 185 | 186 | for (X, Y), label, color in zip(paths, labels, colors): 187 | ax.plot(X, Y, label=label, color=color, linewidth=1.5) 188 | ax.plot(X[0], Y[0], marker='v', alpha=1, markersize=8, color=color) 189 | 190 | # enable for KITTI 00 dataset 191 | # ~ plt.xlim(-10, 50) 192 | # ~ plt.ylim(0, 30) 193 | 194 | plt.gca().set_aspect('equal', adjustable='box') 195 | 196 | if xlabel: 197 | ax.set_xlabel(xlabel) 198 | 199 | if ylabel: 200 | ax.set_ylabel(ylabel) 201 | 202 | handles, labels = ax.get_legend_handles_labels() 203 | ax.legend(handles, labels, ncol=2, loc='center', bbox_to_anchor=(0.5, 1.2)) 204 | 205 | if grid: 206 | ax.grid(True) 207 | 208 | 209 | def plotLoops3D(paths, loops, title=None, time_unit=None): 210 | fig = plt.figure() 211 | 212 | ax = fig.add_subplot(111, projection='3d') 213 | 214 | for X, Y, Z, color in paths: 215 | ax.plot(X, Y, Z, color=color) 216 | 217 | for X, Y, Z, color in loops: 218 | ax.plot(X, Y, Z, color=color) 219 | 220 | # simulate equal aspect ratio 221 | # assume first path is ground truth 222 | # """ 223 | max_range = np.array([paths[0][0].max() - paths[0][0].min(), paths[0][1].max() - paths[0][1].min(), 224 | paths[0][2].max() - paths[0][2].min()]).max() / 2.0 225 | mean_x = paths[0][0].mean() 226 | mean_y = paths[0][1].mean() 227 | mean_z = paths[0][2].mean() 228 | ax.set_xlim(mean_x - max_range, mean_x + max_range) 229 | ax.set_ylim(mean_y - max_range, mean_y + max_range) 230 | ax.set_zlim(mean_z - max_range, mean_z + max_range) 231 | # """ 232 | # Set axis labels 233 | 234 | xLabel = ax.set_xlabel('x (m)') 235 | yLabel = ax.set_ylabel('y (m)') 236 | if time_unit: 237 | zLabel = ax.set_zlabel('time (' + time_unit + ')') 238 | else: 239 | zLabel = ax.set_zlabel('time (seconds)') 240 | -------------------------------------------------------------------------------- /Dependencies.md: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | In this document we list all the pieces of code included by wganvo which are not property of the authors of wganvo. 3 | 4 | ## Code in `wgan` folder 5 | Source code in `wgan/` is a modified version of [Improved Training of Wasserstein GANs 6 | ](https://github.com/igul222/improved_wgan_training/). These files are MIT-licensed. 7 | ``` 8 | MIT License 9 | 10 | Copyright (c) 2017 Ishaan Gulrajani 11 | 12 | Permission is hereby granted, free of charge, to any person obtaining a copy 13 | of this software and associated documentation files (the "Software"), to deal 14 | in the Software without restriction, including without limitation the rights 15 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 16 | copies of the Software, and to permit persons to whom the Software is 17 | furnished to do so, subject to the following conditions: 18 | 19 | The above copyright notice and this permission notice shall be included in all 20 | copies or substantial portions of the Software. 21 | 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 27 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 28 | SOFTWARE. 29 | ``` 30 | 31 | ## Code in `vgg_trainable` folder (except `vgg.py`) 32 | The following files placed in `vgg_trainable` belongs to the authors of [Tensorflow](https://github.com/tensorflow/tensorflow) 33 | and they are released under Apache License 2.0. 34 | * `vgg_trainable/input_data.py` 35 | * `vgg_trainable/main.py` 36 | * `vgg_trainable/model.py` 37 | 38 | ``` 39 | Copyright 2015 The TensorFlow Authors. All Rights Reserved. 40 | 41 | Licensed under the Apache License, Version 2.0 (the "License"); 42 | you may not use this file except in compliance with the License. 43 | You may obtain a copy of the License at 44 | 45 | http://www.apache.org/licenses/LICENSE-2.0 46 | 47 | Unless required by applicable law or agreed to in writing, software 48 | distributed under the License is distributed on an "AS IS" BASIS, 49 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 50 | See the License for the specific language governing permissions and 51 | limitations under the License. 52 | ``` 53 | 54 | ## Code in `eval_kitti` folder 55 | Source code in `eval_kitti` belongs to the authors of KITTI. Some files are part of libviso2, which is released under GPL License. 56 | ``` 57 | Copyright 2011. All rights reserved. 58 | Institute of Measurement and Control Systems 59 | Karlsruhe Institute of Technology, Germany 60 | 61 | This file is part of libviso2. 62 | Authors: Andreas Geiger 63 | 64 | libviso2 is free software; you can redistribute it and/or modify it under the 65 | terms of the GNU General Public License as published by the Free Software 66 | Foundation; either version 2 of the License, or any later version. 67 | 68 | libviso2 is distributed in the hope that it will be useful, but WITHOUT ANY 69 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 70 | PARTICULAR PURPOSE. See the GNU General Public License for more details. 71 | 72 | You should have received a copy of the GNU General Public License along with 73 | libviso2; if not, write to the Free Software Foundation, Inc., 51 Franklin 74 | Street, Fifth Floor, Boston, MA 02110-1301, USA 75 | ``` 76 | Files that are not explicitly licensed are released under 77 | [Creative Commons Attribution-NonCommercial-ShareAlike 3.0](http://creativecommons.org/licenses/by-nc-sa/3.0/), 78 | (according to their [website](http://www.cvlibs.net/datasets/kitti/)). 79 | 80 | ## `transformations.py` from C. Gohlke 81 | `transformations.py` file is BSD licensed. 82 | ``` 83 | Copyright (c) 2006-2018, Christoph Gohlke 84 | Copyright (c) 2006-2018, The Regents of the University of California 85 | Produced at the Laboratory for Fluorescence Dynamics 86 | All rights reserved. 87 | 88 | Redistribution and use in source and binary forms, with or without 89 | modification, are permitted provided that the following conditions are met: 90 | 91 | * Redistributions of source code must retain the above copyright 92 | notice, this list of conditions and the following disclaimer. 93 | * Redistributions in binary form must reproduce the above copyright 94 | notice, this list of conditions and the following disclaimer in the 95 | documentation and/or other materials provided with the distribution. 96 | * Neither the name of the copyright holders nor the names of any 97 | contributors may be used to endorse or promote products derived 98 | from this software without specific prior written permission. 99 | 100 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 101 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 102 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 103 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 104 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 105 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 106 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 107 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 108 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 109 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 110 | POSSIBILITY OF SUCH DAMAGE. 111 | ``` 112 | 113 | ## `tfquaternions.py` 114 | `tfquaternions.py` is based on a file from [tf-quaternion](https://github.com/PhilJd/tf-quaternion) (Apache 2.0 License). 115 | ``` 116 | Copyright Philipp Jund (jundp@cs.uni-freiburg.de) 2017. All Rights Reserved. 117 | 118 | Licensed under the Apache License, Version 2.0 (the "License"); 119 | you may not use this file except in compliance with the License. 120 | You may obtain a copy of the License at 121 | 122 | http://www.apache.org/licenses/LICENSE-2.0 123 | 124 | Unless required by applicable law or agreed to in writing, software 125 | distributed under the License is distributed on an "AS IS" BASIS, 126 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 127 | See the License for the specific language governing permissions and 128 | limitations under the License. 129 | ``` 130 | 131 | ## Files from Robotcar Dataset 132 | The following files are part of [robotcar-dataset-sdk](https://github.com/ori-mrg/robotcar-dataset-sdk) and they are released under 133 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 License (CC BY-NC-SA 4.0). 134 | * `build_pointcloud.py` 135 | * `camera_model.py` 136 | * `image.py` 137 | * `interpolate_poses.py` 138 | * `play_images.py` 139 | * `project_laser_into_camera.py` 140 | * `transform.py` 141 | 142 | ``` 143 | Copyright (c) 2017 University of Oxford 144 | Authors: 145 | Geoff Pascoe (gmp@robots.ox.ac.uk) 146 | 147 | This work is licensed under the Creative Commons 148 | Attribution-NonCommercial-ShareAlike 4.0 International License. 149 | To view a copy of this license, visit 150 | http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 151 | Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 152 | 153 | ``` 154 | Some of them have been modified by the authors of wganvo. 155 | 156 | ## Files from `evo` library 157 | The following files are part of [evo](https://github.com/MichaelGrupp/evo) and they are release under GPL License. 158 | * `geometry.py` 159 | * `trajectory.py` 160 | * `lie_algebra.py` 161 | 162 | ``` 163 | author: Michael Grupp 164 | 165 | evo is free software: you can redistribute it and/or modify 166 | it under the terms of the GNU General Public License as published by 167 | the Free Software Foundation, either version 3 of the License, or 168 | (at your option) any later version. 169 | 170 | evo is distributed in the hope that it will be useful, 171 | but WITHOUT ANY WARRANTY; without even the implied warranty of 172 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 173 | GNU General Public License for more details. 174 | 175 | You should have received a copy of the GNU General Public License 176 | along with evo. If not, see . 177 | ``` 178 | 179 | ## `vgg_trainable/vgg.py` 180 | This file is based on a file from https://github.com/machrisaa/tensorflow-vgg. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | WGANVO: Monocular Visual Odometry based on Generative Adversarial Networks 2 | ============================= 3 | Visual Odometry is one the most essential techniques for robot localization. 4 | In this work we propose the use of Generative Adversarial Networks to estimate the pose taking images of a monocular camera as input. We present WGANVO, a Deep Learning based monocular Visual Odometry method. In particular, a neural network is trained to regress a pose estimate from an image pair. The training is performed using a semi-supervised approach, combining the unsupervised GAN technique with labeled data. Unlike geometry based monocular methods, the proposed method can recover the absolute scale of the observed scene without neither prior knowledge nor extra information as it can infer it from the training stage. The evaluation of the resulting system is carried out on the well-known KITTI dataset where it is shown to work in real time and the accuracy obtained is encouraging to continue the development of Deep Learning based methods. 5 | 6 | ### Paper 7 | * **WGANVO: odometría visual monocular basada en redes adversarias generativas**, Javier Cremona, Lucas C. Uzal, Taihú Pire, Revista Iberoamericana de Automática e Informática industrial, [S.l.], dic. 2021. ISSN 1697-7920. Disponible en: [pdf](https://polipapers.upv.es/index.php/RIAI/article/view/16113). DOI: https://doi.org/10.4995/riai.2022.16113. 8 | 9 | * **WGANVO: Monocular Visual Odometry based on Generative Adversarial Networks**, Javier Cremona, Lucas C. Uzal, Taihú Pire, arXiv [pdf](https://arxiv.org/abs/2007.13704). 10 | 11 | ### How to cite 12 | ``` 13 | @article{cremona2021wganvo, 14 | author = {Javier Alejandro Cremona and Lucas Uzal and Taihú Pire}, 15 | title = {WGANVO: odometría visual monocular basada en redes adversarias generativas}, 16 | journal = {Revista Iberoamericana de Automática e Informática industrial}, 17 | volume = {19}, 18 | number = {2}, 19 | year = {2021}, 20 | keywords = {Localización; Redes Neuronales; Robots Móviles}, 21 | abstract = {Los sistemas tradicionales de odometría visual (VO), directos o basados en características visuales, son susceptibles de cometer errores de correspondencia entre imágenes. Además, las configuraciones monoculares sólo son capaces de estimar la localización sujeto a un factor de escala, lo que hace imposible su uso inmediato en aplicaciones de robótica o realidad virtual. Recientemente, varios problemas de Visión por Computadora han sido abordados con éxito por algoritmos de Aprendizaje Profundo. En este trabajo presentamos un sistema de odometría visual monocular basado en Aprendizaje Profundo llamado WGANVO. Específicamente, entrenamos una red neuronal basada en GAN para regresionar una estimación de movimiento. El modelo resultante recibe un par de imágenes y estima el movimiento relativo entre ellas. Entrenamos la red neuronal utilizando un enfoque semi-supervisado. A diferencia de los sistemas monoculares tradicionales basados en geometría, nuestro método basado en Deep Learning es capaz de estimar la escala absoluta de la escena sin información extra ni conocimiento previo. Evaluamos WGANVO en el conocido conjunto de datos KITTI. Demostramos que nuestro sistema funciona en tiempo real y la precisión obtenida alienta a seguir desarrollando sistemas de localización basados en Aprendizaje Profundo.}, 22 | issn = {1697-7920}, 23 | doi = {10.4995/riai.2022.16113}, 24 | url = {https://polipapers.upv.es/index.php/RIAI/article/view/16113} 25 | } 26 | ``` 27 | 28 | ## Video 1 29 | 30 | Demo 1 31 | 32 | 33 | ## Video 2 34 | 35 | Demo 2 36 | 37 | 38 | # License 39 | Our work is released under a [GPLv3 license](License-gpl.txt), except for some files. 40 | The scripts that are used to pre-process the images are released under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](License-CCBYNCSA4.txt) (see [LICENSE.txt](LICENSE.txt)). For a list of dependencies (and associated licenses), please see [Dependencies.md](Dependencies.md). 41 | 42 | # Requirements 43 | * NVIDIA GPU 44 | * Python 2.7 and pip (image pre-processing) 45 | 46 | # Dependencies 47 | ## Docker and docker-compose 48 | 1. Install Docker and docker-compose 49 | 50 | 2. Install nvidia-docker: 51 | ``` 52 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID) 53 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - 54 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list 55 | sudo apt-get update && sudo apt-get install -y nvidia-docker2 56 | sudo systemctl restart docker 57 | ``` 58 | 59 | ## Dependencies for pre-processing KITTI images 60 | Run: 61 | ``` 62 | pip install -r requirements.txt 63 | ``` 64 | 65 | 66 | # Installation 67 | 1. Clone the repository 68 | 2. Run: 69 | ``` 70 | cd wganvo 71 | sudo make image 72 | sudo make start 73 | ``` 74 | 75 | # KITTI 76 | ## Image pre-processing 77 | In order to reduce the resolution of the images, we pre-process KITTI images using a Python script. 78 | **This step will be optional in future versions.** 79 | 80 | 1. Download KITTI odometry dataset 81 | 2. For each of the KITTI sequences, simply run: 82 | ``` 83 | python adapt_images_kitti --crop 500 375 --scale 128 96 --output_dir 84 | ``` 85 | 86 | As a result, several files are generated. These files in this specific format are required to train the network. Future versions will no longer require this specific folder structure to be used. 87 | 88 | ## Training 89 | Input must be provided in a specific folder structure. **This step will be optional in future versions.** 90 | 91 | For example, if we want to train the network with sequences 00, 01 and 03 as input, we pre-process the images with ```adapt_images_kitti``` and then we save the output files in this way: 92 | ``` 93 | train_images/ 94 | ├── 00 95 | │   ├── images.npz 96 | │   ├── t.npz 97 | │   ├── images_shape.txt 98 | │   └── ... 99 | ├── 01 100 | │   ├── images.npz 101 | │   ├── t.npz 102 | │   ├── images_shape.txt 103 | │   └── ... 104 | └── 03 105 | ├── images.npz 106 | ├── t.npz 107 | ├── images_shape.txt 108 | └── ... 109 | 110 | ``` 111 | **Note**: Folder names (`train_images`, `00`, `01`, `03`) are not required to be the same as the ones in this example. 112 | 113 | Then, you must repeat this step in order to generate images to perform the adversarial training and to test the network. After that, copy everything into `images-dir` folder. This folder will be mounted as a volume in the Docker container. For example, you may ended up having this structure. 114 | ``` 115 | images-dir/ 116 | ├── train_images/ 117 | │   ├── 00 118 | │   ├── 01 119 | │   └── 03 120 | ├── train_gan_images/ 121 | │   ├── 06 122 | │   ├── 07 123 | │   └── 08 124 | └── test_images/ 125 | └── 04 126 | ``` 127 | **Note**: Try to have at least 2 folders in `train_images`. 128 | 129 | A shell in the Docker container must be opened: 130 | ``` 131 | make shell 132 | ``` 133 | Alternatively, you can run `docker run -it --rm --runtime=nvidia -v $(pwd)/images-dir:/var/kitti wganvo_wganvo:latest` or `docker run -it --rm --gpus all -v $(pwd)/images-dir:/var/kitti wganvo_wganvo:latest` in newer versions of Docker. It may be different based on your machine’s operating system and the kind of NVIDIA GPU that your machine has. See [this link](https://towardsdatascience.com/how-to-properly-use-the-gpu-within-a-docker-container-4c699c78c6d1). 134 | 135 | The main script is `wgan/wgan_improved.py`. In the container's shell you can run this script to train the network. 136 | ``` 137 | python wgan/wgan_improved.py /var/kitti/train_images /var/kitti/test_images /var/kitti/train_gan_images --batch_size 138 | ``` 139 | The command `python wgan/wgan_improved.py -h` will display all the options that can be configured. It is important to set `--log_dir`. 140 | 141 | ## Testing 142 | In order to test the resulting network, `vgg_trainable/test/test_model.py` can be used. Run: 143 | ``` 144 | python vgg_trainable/test/test_model.py /var/kitti/test_images/ --batch_size 145 | ``` 146 | where `` is the batch size used to train the network and `` is the name of the model that was saved in the log directory (the path was set using `--log_dir`). The name of the model is the name of the file `.meta` (supply it to `test_model.py` without the `.meta` suffix). 147 | 148 | 149 | -------------------------------------------------------------------------------- /interpolate_poses.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # This file is based on a file from https://github.com/ori-mrg/robotcar-dataset-sdk 4 | # (see original license below) 5 | # 6 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 7 | # For more information see 8 | # 9 | # This file is licensed under the Creative Commons 10 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 11 | # To view a copy of this license, visit 12 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 13 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 14 | # 15 | 16 | ################################################################################ 17 | # 18 | # Copyright (c) 2017 University of Oxford 19 | # Authors: 20 | # Geoff Pascoe (gmp@robots.ox.ac.uk) 21 | # 22 | # This work is licensed under the Creative Commons 23 | # Attribution-NonCommercial-ShareAlike 4.0 International License. 24 | # To view a copy of this license, visit 25 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to 26 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 27 | # 28 | ################################################################################ 29 | 30 | import bisect 31 | import csv 32 | import numpy as np 33 | import numpy.matlib as ml 34 | from transform import * 35 | 36 | 37 | def interpolate_vo_poses(vo_path, pose_timestamps, origin_timestamp): 38 | """Interpolate poses from visual odometry. 39 | 40 | Args: 41 | vo_path (str): path to file containing relative poses from visual odometry. 42 | pose_timestamps (list[int]): UNIX timestamps at which interpolated poses are required. 43 | origin_timestamp (int): UNIX timestamp of origin frame. Poses will be reported relative to this frame. 44 | 45 | Returns: 46 | list[numpy.matrixlib.defmatrix.matrix]: SE3 matrix representing interpolated pose for each requested timestamp. 47 | 48 | """ 49 | with open(vo_path) as vo_file: 50 | vo_reader = csv.reader(vo_file) 51 | headers = next(vo_file) 52 | 53 | vo_timestamps = [0] 54 | abs_poses = [ml.identity(4)] 55 | 56 | lower_timestamp = min(min(pose_timestamps), origin_timestamp) 57 | upper_timestamp = max(max(pose_timestamps), origin_timestamp) 58 | 59 | for row in vo_reader: 60 | timestamp = int(row[0]) 61 | if timestamp < lower_timestamp: 62 | vo_timestamps[0] = timestamp 63 | continue 64 | 65 | vo_timestamps.append(timestamp) 66 | 67 | xyzrpy = [float(v) for v in row[2:8]] 68 | rel_pose = build_se3_transform(xyzrpy) 69 | abs_pose = abs_poses[-1] * rel_pose 70 | abs_poses.append(abs_pose) 71 | 72 | if timestamp >= upper_timestamp: 73 | break 74 | 75 | return interpolate_poses(vo_timestamps, abs_poses, pose_timestamps, origin_timestamp) 76 | 77 | 78 | def interpolate_ins_poses(ins_path, pose_timestamps, origin_timestamp): 79 | """Interpolate poses from INS. 80 | 81 | Args: 82 | ins_path (str): path to file containing poses from INS. 83 | pose_timestamps (list[int]): UNIX timestamps at which interpolated poses are required. 84 | origin_timestamp (int): UNIX timestamp of origin frame. Poses will be reported relative to this frame. 85 | 86 | Returns: 87 | list[numpy.matrixlib.defmatrix.matrix]: SE3 matrix representing interpolated pose for each requested timestamp. 88 | 89 | """ 90 | with open(ins_path) as ins_file: 91 | ins_reader = csv.reader(ins_file) 92 | headers = next(ins_file) 93 | 94 | ins_timestamps = [0] 95 | abs_poses = [ml.identity(4)] 96 | 97 | upper_timestamp = max(max(pose_timestamps), origin_timestamp) 98 | 99 | for row in ins_reader: 100 | timestamp = int(row[0]) 101 | ins_timestamps.append(timestamp) 102 | 103 | xyzrpy = [float(v) for v in row[2:8]] 104 | abs_pose = build_se3_transform(xyzrpy) 105 | abs_poses.append(abs_pose) 106 | 107 | if timestamp >= upper_timestamp: 108 | break 109 | 110 | ins_timestamps = ins_timestamps[1:] 111 | abs_poses = abs_poses[1:] 112 | 113 | return interpolate_poses(ins_timestamps, abs_poses, pose_timestamps, origin_timestamp) 114 | 115 | 116 | def interpolate_poses(pose_timestamps, abs_poses, requested_timestamps, origin_timestamp): 117 | """Interpolate between absolute poses. 118 | 119 | Args: 120 | pose_timestamps (list[int]): Timestamps of supplied poses. Must be in ascending order. 121 | abs_poses (list[numpy.matrixlib.defmatrix.matrix]): SE3 matrices representing poses at the timestamps specified. 122 | requested_timestamps (list[int]): Timestamps for which interpolated timestamps are required. 123 | origin_timestamp (int): UNIX timestamp of origin frame. Poses will be reported relative to this frame. 124 | 125 | Returns: 126 | list[numpy.matrixlib.defmatrix.matrix]: SE3 matrix representing interpolated pose for each requested timestamp. 127 | 128 | Raises: 129 | ValueError: if pose_timestamps and abs_poses are not the same length 130 | ValueError: if pose_timestamps is not in ascending order 131 | 132 | """ 133 | requested_timestamps.insert(0, origin_timestamp) 134 | requested_timestamps = np.array(requested_timestamps) 135 | pose_timestamps = np.array(pose_timestamps) 136 | 137 | if len(pose_timestamps) != len(abs_poses): 138 | raise ValueError('Must supply same number of timestamps as poses') 139 | 140 | abs_quaternions = np.zeros((4, len(abs_poses))) 141 | abs_positions = np.zeros((3, len(abs_poses))) 142 | for i, pose in enumerate(abs_poses): 143 | if i > 0 and pose_timestamps[i-1] >= pose_timestamps[i]: 144 | raise ValueError('Pose timestamps must be in ascending order') 145 | 146 | abs_quaternions[:, i] = so3_to_quaternion(pose[0:3, 0:3]) 147 | abs_positions[:, i] = np.ravel(pose[0:3, 3]) 148 | 149 | upper_indices = [bisect.bisect(pose_timestamps, pt) for pt in requested_timestamps] 150 | lower_indices = [u - 1 for u in upper_indices] 151 | 152 | if max(upper_indices) >= len(pose_timestamps): 153 | upper_indices = [min(i, len(pose_timestamps) - 1) for i in upper_indices] 154 | 155 | fractions = (requested_timestamps - pose_timestamps[lower_indices]) / \ 156 | (pose_timestamps[upper_indices] - pose_timestamps[lower_indices]) 157 | 158 | quaternions_lower = abs_quaternions[:, lower_indices] 159 | quaternions_upper = abs_quaternions[:, upper_indices] 160 | 161 | d_array = (quaternions_lower * quaternions_upper).sum(0) 162 | 163 | linear_interp_indices = np.nonzero(d_array >= 1) 164 | sin_interp_indices = np.nonzero(d_array < 1) 165 | 166 | scale0_array = np.zeros(d_array.shape) 167 | scale1_array = np.zeros(d_array.shape) 168 | 169 | scale0_array[linear_interp_indices] = 1 - fractions[linear_interp_indices] 170 | scale1_array[linear_interp_indices] = fractions[linear_interp_indices] 171 | 172 | theta_array = np.arccos(np.abs(d_array[sin_interp_indices])) 173 | 174 | scale0_array[sin_interp_indices] = \ 175 | np.sin((1 - fractions[sin_interp_indices]) * theta_array) / np.sin(theta_array) 176 | scale1_array[sin_interp_indices] = \ 177 | np.sin(fractions[sin_interp_indices] * theta_array) / np.sin(theta_array) 178 | 179 | negative_d_indices = np.nonzero(d_array < 0) 180 | scale1_array[negative_d_indices] = -scale1_array[negative_d_indices] 181 | 182 | quaternions_interp = np.tile(scale0_array, (4, 1)) * quaternions_lower \ 183 | + np.tile(scale1_array, (4, 1)) * quaternions_upper 184 | 185 | positions_lower = abs_positions[:, lower_indices] 186 | positions_upper = abs_positions[:, upper_indices] 187 | 188 | positions_interp = np.multiply(np.tile((1 - fractions), (3, 1)), positions_lower) \ 189 | + np.multiply(np.tile(fractions, (3, 1)), positions_upper) 190 | 191 | poses_mat = ml.zeros((4, 4 * len(requested_timestamps))) 192 | 193 | poses_mat[0, 0::4] = 1 - 2 * np.square(quaternions_interp[2, :]) - \ 194 | 2 * np.square(quaternions_interp[3, :]) 195 | poses_mat[0, 1::4] = 2 * np.multiply(quaternions_interp[1, :], quaternions_interp[2, :]) - \ 196 | 2 * np.multiply(quaternions_interp[3, :], quaternions_interp[0, :]) 197 | poses_mat[0, 2::4] = 2 * np.multiply(quaternions_interp[1, :], quaternions_interp[3, :]) + \ 198 | 2 * np.multiply(quaternions_interp[2, :], quaternions_interp[0, :]) 199 | 200 | poses_mat[1, 0::4] = 2 * np.multiply(quaternions_interp[1, :], quaternions_interp[2, :]) \ 201 | + 2 * np.multiply(quaternions_interp[3, :], quaternions_interp[0, :]) 202 | poses_mat[1, 1::4] = 1 - 2 * np.square(quaternions_interp[1, :]) \ 203 | - 2 * np.square(quaternions_interp[3, :]) 204 | poses_mat[1, 2::4] = 2 * np.multiply(quaternions_interp[2, :], quaternions_interp[3, :]) - \ 205 | 2 * np.multiply(quaternions_interp[1, :], quaternions_interp[0, :]) 206 | 207 | poses_mat[2, 0::4] = 2 * np.multiply(quaternions_interp[1, :], quaternions_interp[3, :]) - \ 208 | 2 * np.multiply(quaternions_interp[2, :], quaternions_interp[0, :]) 209 | poses_mat[2, 1::4] = 2 * np.multiply(quaternions_interp[2, :], quaternions_interp[3, :]) + \ 210 | 2 * np.multiply(quaternions_interp[1, :], quaternions_interp[0, :]) 211 | poses_mat[2, 2::4] = 1 - 2 * np.square(quaternions_interp[1, :]) - \ 212 | 2 * np.square(quaternions_interp[2, :]) 213 | 214 | poses_mat[0:3, 3::4] = positions_interp 215 | poses_mat[3, 3::4] = 1 216 | 217 | poses_mat = np.linalg.solve(poses_mat[0:4, 0:4], poses_mat) 218 | 219 | poses_out = [0] * (len(requested_timestamps) - 1) 220 | for i in range(1, len(requested_timestamps)): 221 | poses_out[i - 1] = poses_mat[0:4, i * 4:(i + 1) * 4] 222 | 223 | return poses_out 224 | -------------------------------------------------------------------------------- /eval_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of wganvo. 3 | # 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 5 | # For more information see 6 | # 7 | # wganvo is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # wganvo is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with wganvo. If not, see . 19 | 20 | import sys, os, inspect 21 | 22 | import numpy as np 23 | import transformations 24 | import random 25 | import matplotlib 26 | 27 | matplotlib.use('Agg') 28 | import matplotlib.pyplot as plt 29 | import trajectory 30 | 31 | 32 | def infer_relative_poses(sess, dataset, batch_size, images_placeholder, outputs, 33 | targets_placeholder, train_mode=None): 34 | steps_per_epoch = dataset.num_examples // batch_size 35 | num_examples = steps_per_epoch * batch_size 36 | relative_poses_prediction = np.empty((num_examples, 3, 4)) 37 | relative_poses_target = np.empty((num_examples, 3, 4)) 38 | standardize_targets = False 39 | # rmse, mse, norm_mse = do_evaluation(sess,outputs,images_placeholder, targets_placeholder, dataset, batch_size, True) 40 | for step in xrange(steps_per_epoch): 41 | feed_dict = fill_feed_dict(dataset, images_placeholder, targets_placeholder, feed_with_batch=True, 42 | batch_size=batch_size, shuffle=False, standardize_targets=standardize_targets) 43 | if train_mode is not None: 44 | feed_dict[train_mode] = False 45 | prediction_batch, target_batch = sess.run([outputs, targets_placeholder], feed_dict=feed_dict) 46 | batch_relative_poses_pred = get_transformation_matrices(dataset, batch_size, 47 | prediction_batch, 48 | standardize_targets) 49 | batch_relative_poses_target = get_transformation_matrices(dataset, batch_size, 50 | target_batch, 51 | standardize_targets) 52 | init = batch_size * step 53 | end = batch_size * (step + 1) 54 | relative_poses_prediction[init:end] = batch_relative_poses_pred 55 | relative_poses_target[init:end] = batch_relative_poses_target 56 | if train_mode is not None: 57 | print("Train Mode: " + str(sess.run(train_mode, feed_dict))) 58 | return relative_poses_prediction, relative_poses_target 59 | 60 | 61 | def get_absolute_poses(relative_poses, inv=False): 62 | current = np.matrix(np.identity(4)) 63 | num_examples = relative_poses.shape[0] 64 | absolute_poses = np.empty(shape=relative_poses.shape) 65 | for i in xrange(num_examples): 66 | T = np.matrix(np.identity(4)) 67 | T[0:3, :] = relative_poses[i] 68 | if inv: 69 | T = np.linalg.inv(T) 70 | current = current * T 71 | absolute_poses[i] = current[0:3, :] 72 | return absolute_poses 73 | 74 | 75 | def get_transformation_matrices(dataset, batch_size, batch, 76 | standardize_targets): 77 | transformation_matrices = np.empty((batch_size, 3, 4)) 78 | # poses_target = np.empty((batch_size, 3, 4)) 79 | for i in xrange(batch_size): 80 | transformation = batch[i] 81 | # Original scale 82 | if standardize_targets: 83 | transformation = transformation * dataset.targets_std + dataset.targets_mean 84 | 85 | # prediction = prediction.reshape(3,4) 86 | # pred_transformation = inverse_intrinsic_matrix * prediction 87 | # u,_ = linalg.polar(pred_transformation[0:3,0:3]) 88 | # pred_transf_correction = np.empty((3,4)) 89 | # pred_transf_correction[0:3, 0:3] = u 90 | # pred_transf_correction[0:3, 3] = pred_transformation[0:3,3].transpose() 91 | 92 | # target = target_batch[i] 93 | # if standardize_targets: 94 | # target = target * dataset.targets_std + dataset.targets_mean 95 | # target = target.reshape(3,4) 96 | # target_transformation = inverse_intrinsic_matrix * target 97 | # poses_prediction[i] = pred_transf_correction.reshape(12) 98 | # poses_target[i] = target_transformation.reshape(12) 99 | 100 | transformation_matrices[i] = vector_to_transformation_mtx(transformation) 101 | # poses_target[i] = x_q_to_mtx(target) 102 | 103 | return transformation_matrices 104 | 105 | 106 | def vector_to_transformation_mtx(xq): 107 | mtx = transformations.quaternion_matrix(xq[3:]) 108 | mtx[0:3, 3] = xq[0:3] 109 | out = mtx[0:3, :] 110 | return out # .reshape(12) 111 | 112 | 113 | def fill_feed_dict(data_set, images_pl, labels_pl, points_pl=None, feed_with_batch=False, batch_size=None, shuffle=True, 114 | standardize_targets=False, fake_data=False): 115 | """Fills the feed_dict for training the given step or for evaluating the entire dataset. 116 | A feed_dict takes the form of: 117 | feed_dict = { 118 | : , 119 | .... 120 | } 121 | Args: 122 | data_set: The set of images and labels, from input_data.read_data_sets() 123 | images_pl: The images placeholder, from placeholder_inputs(). 124 | labels_pl: The labels placeholder, from placeholder_inputs(). 125 | Returns: 126 | feed_dict: The feed dictionary mapping from placeholders to values. 127 | """ 128 | # Create the feed_dict for the placeholders filled with the next 129 | # `batch size` examples. 130 | if (feed_with_batch): 131 | if (batch_size is None): 132 | raise ValueError("batch_size not specified") 133 | images_feed, labels_feed, points = data_set.next_batch(batch_size, 134 | fake_data, 135 | shuffle=shuffle, 136 | standardize_targets=standardize_targets) 137 | # Create the feed_dict for the placeholders filled with the entire dataset 138 | else: 139 | images_feed = data_set.images 140 | labels_feed = data_set.labels 141 | points = data_set.points 142 | 143 | feed_dict = { 144 | images_pl: images_feed, 145 | labels_pl: labels_feed, 146 | #points_pl: points, 147 | } 148 | if points_pl is not None: 149 | feed_dict[points_pl] = points 150 | return feed_dict 151 | 152 | 153 | def plot_frames_vs_abs_distance(relative_poses_prediction, relative_poses_target, dataset, output_dir, save_txt=False, 154 | plot=False, samples=30): 155 | groups = dataset.groups 156 | datasets_idxs = {} 157 | for i, _ in enumerate(relative_poses_prediction): 158 | group = str(groups[i]) 159 | if group in datasets_idxs: 160 | datasets_idxs[group].append(i) 161 | else: 162 | datasets_idxs[group] = [i] 163 | # acc_rmse_tr = 0. 164 | # acc_rmse_rot = 0. 165 | X_axis = [] 166 | Y_axis = [] 167 | for grp, idxs in datasets_idxs.iteritems(): 168 | relative_prediction = relative_poses_prediction[idxs] 169 | relative_target = relative_poses_target[idxs] 170 | max_num_of_frames = len(relative_prediction) 171 | assert max_num_of_frames == len(relative_target) 172 | # Get SAMPLES sub-trajectories from sequence 173 | for i in xrange(samples): 174 | # Random sub-trajectory 175 | N = random.randint(1, max_num_of_frames) 176 | start = random.randint(0, max_num_of_frames - N) 177 | traslation_error = get_traslation_error(relative_prediction[start:start + N], 178 | relative_target[start:start + N]) 179 | assert len(traslation_error) == N 180 | d = traslation_error[-1] 181 | X_axis.append(N) 182 | Y_axis.append(d) 183 | if save_txt: 184 | np.savetxt(os.path.join(output_dir, 'abs_poses_target_{}.txt'.format(grp)), 185 | get_absolute_poses(relative_target).reshape(-1, 12)) 186 | np.savetxt(os.path.join(output_dir, 'abs_poses_prediction_{}.txt'.format(grp)), 187 | get_absolute_poses(relative_prediction).reshape(-1, 12)) 188 | # print("Num of frames") 189 | # print(N) 190 | # print("d") 191 | # print(d) 192 | 193 | # if save_txt: 194 | # np.savetxt(os.path.join(output_dir, 'orig_relative_target.txt'), relative_poses_target.reshape(-1, 12)) 195 | # np.savetxt(os.path.join(output_dir, 'orig_relative_prediction.txt'), relative_poses_prediction.reshape(-1, 12)) 196 | # rmse_tr, rmse_rot = calc_trajectory_rmse(relative_poses_prediction[idxs], relative_poses_target[idxs]) 197 | # print('*' * 50) 198 | # print(grp, len(idxs)) 199 | # print(rmse_tr, rmse_rot) 200 | # acc_rmse_tr += rmse_tr 201 | # acc_rmse_rot += rmse_rot 202 | if plot: 203 | fig, ax = plt.subplots() 204 | ax.plot(X_axis, Y_axis, 'r.') 205 | fig.savefig(os.path.join(output_dir, 'f_vs_d.png')) 206 | return X_axis, Y_axis 207 | # return acc_rmse_tr / len(datasets_idxs), acc_rmse_rot / len(datasets_idxs) 208 | 209 | 210 | def get_traslation_error(relative_poses_prediction, relative_poses_target): 211 | absolute_poses_prediction = get_absolute_poses(relative_poses_prediction).reshape(-1, 12) 212 | absolute_poses_target = get_absolute_poses(relative_poses_target).reshape(-1, 12) 213 | poses_prediction = se3_pose_list(absolute_poses_prediction) 214 | poses_target = se3_pose_list(absolute_poses_target) 215 | poses_prediction = trajectory.PosePath3D(poses_se3=poses_prediction) 216 | poses_target = trajectory.PosePath3D(poses_se3=poses_target) 217 | E_tr = poses_prediction.positions_xyz - poses_target.positions_xyz 218 | traslation_error = [np.linalg.norm(E_i) for E_i in E_tr] 219 | return traslation_error 220 | 221 | 222 | def se3_pose_list(kitti_format): 223 | return [np.array([[r[0], r[1], r[2], r[3]], 224 | [r[4], r[5], r[6], r[7]], 225 | [r[8], r[9], r[10], r[11]], 226 | [0, 0, 0, 1]]) for r in kitti_format] 227 | 228 | 229 | def our_metric_evaluation(relative_prediction, relative_target, test_dataset, curr_fold_log_path, 230 | save_txt): 231 | frames, abs_distance = plot_frames_vs_abs_distance(relative_prediction, relative_target, test_dataset, 232 | curr_fold_log_path, save_txt=save_txt) 233 | frames = np.array(frames) 234 | abs_distance = np.array(abs_distance) 235 | te_eval = np.mean(np.square(np.log(abs_distance) / np.log(frames + 1))) 236 | return te_eval 237 | -------------------------------------------------------------------------------- /vgg_trainable/vgg.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file was originally part of https://github.com/machrisaa/tensorflow-vgg 3 | # 4 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET) 5 | # For more information see 6 | # 7 | # wganvo is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # wganvo is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with wganvo. If not, see . 19 | # 20 | 21 | import tensorflow as tf 22 | 23 | import numpy as np 24 | from functools import reduce 25 | 26 | 27 | # VGG_MEAN = [103.939, 116.779, 123.68] 28 | 29 | 30 | class Vgg19: 31 | """ 32 | A trainable version VGG19. 33 | """ 34 | 35 | def __init__(self, width, height, vgg19_npy_path=None, trainable=True, dropout=0.5, activation_function="relu"): 36 | if vgg19_npy_path is not None: 37 | self.data_dict = np.load(vgg19_npy_path, encoding='latin1').item() 38 | else: 39 | self.data_dict = None 40 | 41 | self.var_dict = {} 42 | self.trainable = trainable 43 | self.dropout = dropout 44 | self.activation_function = activation_function 45 | self.width = width 46 | self.height = height 47 | 48 | def build(self, images, train_mode=None, pooling_type="max"): 49 | """ 50 | load variable from npy to build the VGG 51 | :param images: [batch, height, width, 1] (usually a placeholder) 52 | :param train_mode: a bool tensor, usually a placeholder: if True, dropout will be turned on 53 | """ 54 | 55 | self.conv1_1 = self.conv_layer(images, 2, 64, "conv1_1") 56 | self.conv1_2 = self.conv_layer(self.conv1_1, 64, 64, "conv1_2") 57 | self.pool1 = self.pooling(self.conv1_2, 'pool1', pooling_type=pooling_type) 58 | 59 | self.conv2_1 = self.conv_layer(self.pool1, 64, 128, "conv2_1") 60 | self.conv2_2 = self.conv_layer(self.conv2_1, 128, 128, "conv2_2") 61 | self.pool2 = self.pooling(self.conv2_2, 'pool2', pooling_type=pooling_type) 62 | 63 | self.conv3_1 = self.conv_layer(self.pool2, 128, 256, "conv3_1") 64 | self.conv3_2 = self.conv_layer(self.conv3_1, 256, 256, "conv3_2") 65 | self.conv3_3 = self.conv_layer(self.conv3_2, 256, 256, "conv3_3") 66 | self.conv3_4 = self.conv_layer(self.conv3_3, 256, 256, "conv3_4") 67 | self.pool3 = self.pooling(self.conv3_4, 'pool3', pooling_type=pooling_type) 68 | 69 | self.conv4_1 = self.conv_layer(self.pool3, 256, 512, "conv4_1") 70 | self.conv4_2 = self.conv_layer(self.conv4_1, 512, 512, "conv4_2") 71 | self.conv4_3 = self.conv_layer(self.conv4_2, 512, 512, "conv4_3") 72 | self.conv4_4 = self.conv_layer(self.conv4_3, 512, 512, "conv4_4") 73 | self.pool4 = self.pooling(self.conv4_4, 'pool4', pooling_type=pooling_type) 74 | 75 | self.conv5_1 = self.conv_layer(self.pool4, 512, 512, "conv5_1") 76 | self.conv5_2 = self.conv_layer(self.conv5_1, 512, 512, "conv5_2") 77 | self.conv5_3 = self.conv_layer(self.conv5_2, 512, 512, "conv5_3") 78 | self.conv5_4 = self.conv_layer(self.conv5_3, 512, 512, "conv5_4") 79 | self.pool5 = self.pooling(self.conv5_4, 'pool5', pooling_type=pooling_type) 80 | 81 | fc_in_size = ((self.width // (2 ** 5)) * (self.height // (2 ** 5))) * 512 # (las conv_layer mantienen el ancho y alto, y los max_pool lo reducen a la mitad. Hay 5 max pool) 82 | self.fc6 = self.fc_layer(self.pool5, fc_in_size, 4096, "fc6") 83 | self.relu6 = self.activation_function_tensor(self.fc6, act_function=self.activation_function)#tf.nn.relu(self.fc6) 84 | if train_mode is not None: 85 | print("Train Mode placeholder") 86 | self.relu6 = tf.cond(train_mode, lambda: tf.nn.dropout(self.relu6, self.dropout), lambda: self.relu6) 87 | elif self.trainable: 88 | print("Not Train Mode placeholder") 89 | self.relu6 = tf.nn.dropout(self.relu6, self.dropout) 90 | 91 | self.fc7 = self.fc_layer(self.relu6, 4096, 4096, "fc7") 92 | self.relu7 = self.activation_function_tensor(self.fc7, act_function=self.activation_function)#tf.nn.relu(self.fc7) 93 | if train_mode is not None: 94 | self.relu7 = tf.cond(train_mode, lambda: tf.nn.dropout(self.relu7, self.dropout), lambda: self.relu7) 95 | elif self.trainable: 96 | self.relu7 = tf.nn.dropout(self.relu7, self.dropout) 97 | 98 | self.fc8 = self.fc_layer(self.relu7, 4096, 7, "fc8") 99 | quaternions = self.fc8[:, 3:7] 100 | quaternions_norm = tf.norm(quaternions, axis=1) 101 | unit_quaternions = quaternions / tf.reshape(quaternions_norm, (-1, 1)) 102 | self.fc8 = tf.concat([self.fc8[:, :3], unit_quaternions], 1) 103 | #self.prob = tf.nn.softmax(self.fc8, name="prob") 104 | 105 | self.data_dict = None 106 | return self.fc8 107 | 108 | def build_pruned_vgg(self, images, train_mode=None): 109 | """ 110 | load variable from npy to build the VGG 111 | :param images: [batch, height, width, 1] (usually a placeholder) 112 | :param train_mode: a bool tensor, usually a placeholder: if True, dropout will be turned on 113 | """ 114 | 115 | self.conv1_1 = self.conv_layer(images, 2, 64, "conv1_1") 116 | self.conv1_2 = self.conv_layer(self.conv1_1, 64, 64, "conv1_2") 117 | self.pool1 = self.pooling(self.conv1_2, 'pool1') 118 | 119 | self.conv2_1 = self.conv_layer(self.pool1, 64, 128, "conv2_1") 120 | self.conv2_2 = self.conv_layer(self.conv2_1, 128, 128, "conv2_2") 121 | self.pool2 = self.pooling(self.conv2_2, 'pool2') 122 | 123 | self.conv3_1 = self.conv_layer(self.pool2, 128, 256, "conv3_1") 124 | self.conv3_2 = self.conv_layer(self.conv3_1, 256, 256, "conv3_2") 125 | self.pool3 = self.pooling(self.conv3_2, 'pool3') 126 | 127 | self.conv4_1 = self.conv_layer(self.pool3, 256, 512, "conv4_1") 128 | self.conv4_2 = self.conv_layer(self.conv4_1, 512, 512, "conv4_2") 129 | self.pool4 = self.pooling(self.conv4_2, 'pool4') 130 | 131 | self.conv5_1 = self.conv_layer(self.pool4, 512, 512, "conv5_1") 132 | self.conv5_2 = self.conv_layer(self.conv5_1, 512, 512, "conv5_2") 133 | self.pool5 = self.pooling(self.conv5_2, 'pool5') 134 | 135 | fc_in_size = ((self.width // (2 ** 5)) * (self.height // (2 ** 5))) * 512 # (las conv_layer mantienen el ancho y alto, y los max_pool lo reducen a la mitad. Hay 5 max pool) 136 | self.fc_in = tf.reshape(self.pool5, [-1, fc_in_size]) 137 | if train_mode is not None: 138 | self.fc_in = tf.cond(train_mode, lambda: tf.nn.dropout(self.fc_in, self.dropout), lambda: self.fc_in) 139 | elif self.trainable: 140 | self.fc_in = tf.nn.dropout(self.fc_in, self.dropout) 141 | 142 | self.output = self.fc_layer(self.fc_in, fc_in_size, 12, "fc8") 143 | self.data_dict = None 144 | return self.output 145 | 146 | def build_non_deep_nn(self, images): 147 | self.conv1_1 = self.conv_layer(images, 2, 32, "conv1_1") 148 | self.conv1_2 = self.conv_layer(self.conv1_1, 32, 32, "conv1_2") 149 | self.pool1 = self.max_pool(self.conv1_2, 'pool1') 150 | fc_in_size = ((self.width // 2) * (self.height // 2)) * 32 151 | print(fc_in_size) 152 | self.fc = self.fc_layer(self.pool1, fc_in_size, 12, "fc") 153 | self.data_dict = None 154 | return self.fc 155 | 156 | def pooling(self, bottom, name, pooling_type="max"): 157 | if pooling_type == "avg": 158 | return self.avg_pool(bottom, name) 159 | return self.max_pool(bottom, name) 160 | 161 | def avg_pool(self, bottom, name): 162 | print("Using avg pool") 163 | return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) 164 | 165 | def max_pool(self, bottom, name): 166 | print("Using max pool") 167 | return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) 168 | 169 | def activation_function_tensor(self, features, act_function="relu"): 170 | if act_function == "leaky_relu": 171 | print("Using leaky relu") 172 | return tf.nn.leaky_relu(features) 173 | print("Using relu") 174 | return tf.nn.relu(features) 175 | 176 | def conv_layer(self, bottom, in_channels, out_channels, name): 177 | with tf.variable_scope(name): 178 | filt, conv_biases = self.get_conv_var(3, in_channels, out_channels, name) 179 | 180 | conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME') 181 | bias = tf.nn.bias_add(conv, conv_biases) 182 | act_funct = self.activation_function_tensor(bias, act_function=self.activation_function) 183 | 184 | return act_funct 185 | 186 | def fc_layer(self, bottom, in_size, out_size, name): 187 | with tf.variable_scope(name): 188 | weights, biases = self.get_fc_var(in_size, out_size, name) 189 | 190 | x = tf.reshape(bottom, [-1, in_size]) 191 | fc = tf.nn.bias_add(tf.matmul(x, weights), biases) 192 | 193 | return fc 194 | 195 | def get_conv_var(self, filter_size, in_channels, out_channels, name): 196 | initializer = tf.contrib.layers.xavier_initializer() 197 | initial_value = initializer([filter_size, filter_size, in_channels, out_channels]) 198 | # initial_value = tf.truncated_normal([filter_size, filter_size, in_channels, out_channels], 0.0, 0.001) 199 | filters = self.get_var(initial_value, name, 0, name + "_filters") 200 | 201 | initial_value = initializer([out_channels]) 202 | # initial_value = tf.truncated_normal([out_channels], .0, .001) 203 | biases = self.get_var(initial_value, name, 1, name + "_biases") 204 | 205 | return filters, biases 206 | 207 | def get_fc_var(self, in_size, out_size, name): 208 | initializer = tf.contrib.layers.xavier_initializer() 209 | # initial_value = tf.truncated_normal([in_size, out_size], 0.0, 0.001) 210 | initial_value = initializer([in_size, out_size]) 211 | weights = self.get_var(initial_value, name, 0, name + "_weights") 212 | 213 | # initial_value = tf.truncated_normal([out_size], .0, .001) 214 | initial_value = initializer([out_size]) 215 | biases = self.get_var(initial_value, name, 1, name + "_biases") 216 | 217 | return weights, biases 218 | 219 | def get_var(self, initial_value, name, idx, var_name): 220 | if self.data_dict is not None and name in self.data_dict: 221 | value = self.data_dict[name][idx] 222 | else: 223 | value = initial_value 224 | 225 | if self.trainable: 226 | var = tf.Variable(value, name=var_name) 227 | else: 228 | var = tf.constant(value, dtype=tf.float32, name=var_name) 229 | 230 | self.var_dict[(name, idx)] = var 231 | 232 | # print var_name, var.get_shape().as_list() 233 | assert var.get_shape() == initial_value.get_shape() 234 | 235 | return var 236 | 237 | def save_npy(self, sess, npy_path="./vgg19-save.npy"): 238 | assert isinstance(sess, tf.Session) 239 | 240 | data_dict = {} 241 | 242 | for (name, idx), var in list(self.var_dict.items()): 243 | var_out = sess.run(var) 244 | if name not in data_dict: 245 | data_dict[name] = {} 246 | data_dict[name][idx] = var_out 247 | 248 | np.save(npy_path, data_dict) 249 | print(("file saved", npy_path)) 250 | return npy_path 251 | 252 | def get_var_count(self): 253 | count = 0 254 | for v in list(self.var_dict.values()): 255 | count += reduce(lambda x, y: x * y, v.get_shape().as_list()) 256 | return count 257 | --------------------------------------------------------------------------------