├── __init__.py
├── vgg_trainable
    ├── __init__.py
    ├── test
    │   ├── __init__.py
    │   ├── parse_sptam
    │   ├── show_clip.py
    │   ├── plot_points.py
    │   ├── relative_to_kitti_format.py
    │   ├── plot_traj.py
    │   ├── test_model.py
    │   ├── show_traj_kitti.py
    │   └── plotHelpers.py
    └── vgg.py
├── wgan
    └── tflib
    │   ├── ops
    │       ├── __init__.py
    │       ├── layernorm.py
    │       ├── cond_batchnorm.py
    │       ├── deconv2d.py
    │       ├── conv1d.py
    │       ├── conv2d.py
    │       ├── batchnorm.py
    │       └── linear.py
    │   ├── small_imagenet.py
    │   ├── cifar10.py
    │   ├── plot.py
    │   ├── save_images.py
    │   ├── mnist.py
    │   ├── inception_score.py
    │   └── __init__.py
├── .dockerignore
├── _config.yml
├── images-dir
    └── README.md
├── requirements.txt
├── docker-compose.yml
├── Dockerfile
├── eval_kitti
    ├── README
    ├── mail.h
    ├── matrix.h
    └── readme.txt
├── LICENSE.txt
├── Makefile
├── array_utils.py
├── .gitignore
├── outliers.py
├── play_images.py
├── image.py
├── download.sh
├── geometry.py
├── project_laser_into_camera.py
├── triangulate.py
├── lie_algebra.py
├── transform.py
├── adapt_images.py
├── camera_model.py
├── Dependencies.md
├── README.md
├── interpolate_poses.py
└── eval_utils.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vgg_trainable/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/wgan/tflib/ops/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | images-dir/
2 | 


--------------------------------------------------------------------------------
/vgg_trainable/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-slate


--------------------------------------------------------------------------------
/images-dir/README.md:
--------------------------------------------------------------------------------
1 | Directorio para guardar las imagenes y el modelo a correr
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.16.3
2 | scipy==1.1.0
3 | pillow
4 | imageio==2.5
5 | colour-science
6 | colour-demosaicing
7 | matplotlib
8 | opencv-python==3.1.0.0
9 | 


--------------------------------------------------------------------------------
/vgg_trainable/test/parse_sptam:
--------------------------------------------------------------------------------
1 | #cat $1 | grep 'TRACKED_FRAME_POSE' | sed 's/ /,/g' | while read -r line; do arr=$(echo "$line" | sed 's/,/ /g'); arr=($arr); echo ${arr[@]:2:12}; done 
2 | cat $1 | grep 'TRACKED_FRAME_POSE' | while read -r line; do arr=($line); echo ${arr[@]:2:12}; done
3 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '2.3'
 2 | services:
 3 |   wganvo:
 4 |     container_name: wganvo-docker
 5 |     entrypoint: /bin/bash
 6 |     stdin_open: true # Con estas dos lineas, al hacer docker-compose up -d (o make start) el container
 7 |     tty: true        # queda corriendo, caso contrario, finalizaba de inmediato
 8 |     build: .
 9 |     runtime: nvidia
10 |     volumes:
11 |       - .:/app
12 |       - ./images-dir:/var/kitti
13 | #volumes:
14 | #  - /var
15 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:1.8.0-devel-gpu
 2 | WORKDIR /app
 3 | COPY . /app
 4 | RUN ln -s /usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/libcuda.so \
 5 |           /usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/libcuda.so.1 && \
 6 |     apt-get update && \
 7 |     apt-get install -y python-tk && \
 8 |     pip install --trusted-host pypi.python.org -r requirements.txt
 9 | 
10 | ENV LD_LIBRARY_PATH="/usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/:${LD_LIBRARY_PATH}"
11 | #RUN mkdir -p /var/kitti-images/
12 | #VOLUME /var/kitti-images/
13 | #CMD ["/bin/bash"]
14 | 


--------------------------------------------------------------------------------
/eval_kitti/README:
--------------------------------------------------------------------------------
 1 | Para correr:
 2 | En el dir raiz tener la siguiente estructura:
 3 | data/odometry/poses/ -> aqui va el "ground truth" de 11-21, obtenido por ej. con SPTAM, o con algún método a comparar. Los archivos se nombrar como xx.txt, donde xx es el numero de secuencia (11-21)
 4 | results/poses/data/ -> aqui va nuestra estimacion. Los archivos se nombran de la misma forma.
 5 | 
 6 | Compilar con:
 7 | g++ -O3 -DNDEBUG -o evaluate_odometry evaluate_odometry.cpp matrix.cpp
 8 | 
 9 | Correr con:
10 | ./evaluate_odometry poses
11 | 
12 | (poses es el nombre de la subcarpeta de results)
13 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | wganvo is released under a GPLv3 license (see License-gpl.txt), except for some files.
 2 | The scripts that are used to pre-process the images are released under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (see License-CCBYNCSA4.txt).
 3 | These scripts are:
 4 | * adapt_images.py
 5 | * adapt_images_kitti.py
 6 | * build_pointcloud.py
 7 | * camera_model.py
 8 | * image.py
 9 | * interpolate_poses.py
10 | * play_images.py
11 | * project_laser_into_camera.py
12 | * transform.py
13 | 
14 | Please see Dependencies.md for a list of all the included code and library dependencies which are not property of the authors of wganvo.
15 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | help:
 2 | 	@echo "help  -- print this help"
 3 | 	@echo "start -- start docker stack"
 4 | 	@echo "stop  -- stop docker stack"
 5 | 	@echo "ps    -- show status"
 6 | 	@echo "top   -- displays the running processes"
 7 | 	@echo "clean -- clean all artifacts"
 8 | 	@echo "shell -- run bash inside docker"
 9 | 	@echo "image  -- create ymy docker image"
10 | 
11 | start:
12 | 	docker-compose up -d
13 | 
14 | stop:
15 | 	docker-compose stop
16 | 
17 | ps:
18 | 	docker-compose ps
19 | 	
20 | top:
21 | 	docker-compose top
22 | 
23 | clean: stop
24 | 	docker-compose rm --force -v
25 | 
26 | shell:
27 | 	docker exec -it wganvo-docker bash
28 | 
29 | image:
30 | 	docker-compose build
31 | 
32 | .PHONY: help start stop ps top clean shell image
33 | 
34 | 


--------------------------------------------------------------------------------
/array_utils.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of wganvo.
 3 | #
 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 5 | # For more information see <https://github.com/CIFASIS/wganvo>
 6 | #
 7 | # wganvo is free software: you can redistribute it and/or modify
 8 | # it under the terms of the GNU General Public License as published by
 9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # wganvo is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
19 | 
20 | import numpy as np
21 | 
22 | def save_txt(name, array, fmt='%1.6f'):
23 |     np.savetxt(name + '.txt', array, delimiter=' ', fmt=fmt)
24 | 
25 | def load(name):
26 |     return np.loadtxt(name, delimiter=' ')
27 |     
28 | def save_as_list(name, array, fmt='%1.6f'):
29 |     np.savetxt(name + '.txt', array.ravel(), delimiter=' ', fmt=fmt)
30 |     
31 | def list_to_array(list):
32 |     return np.array(list)
33 | 
34 | def save_npy(name, arr):
35 |     np.save(name, arr)
36 | 
37 | def load_npy(name):
38 |     return np.load(name)


--------------------------------------------------------------------------------
/vgg_trainable/test/show_clip.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of wganvo.
 3 | #
 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 5 | # For more information see <https://github.com/CIFASIS/wganvo>
 6 | #
 7 | # wganvo is free software: you can redistribute it and/or modify
 8 | # it under the terms of the GNU General Public License as published by
 9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # wganvo is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
19 | #
20 | 
21 | import numpy as np
22 | import matplotlib.pyplot as plt
23 | from scipy import linalg
24 | import argparse
25 | import sys, os, inspect
26 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
27 | parentdir = os.path.dirname(currentdir)
28 | sys.path.insert(0,parentdir)
29 | from input_data import read_data_sets, DataSet
30 | 
31 | def show(images):
32 |     artist = plt.imshow(images[0], cmap='gray')
33 |     for img in images:
34 |         artist.set_data(img)
35 |         plt.xticks([])
36 |         plt.yticks([])
37 |         plt.pause(0.01)
38 | 
39 | def main():
40 |     images,_,_,_,_ = read_data_sets(FLAGS.img_file)
41 |     show(images[...,1])
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     parser = argparse.ArgumentParser()
46 |     parser.add_argument(
47 |         'img_file',
48 |         type=str,
49 |         help='Images file'
50 |     )
51 |     FLAGS, unparsed = parser.parse_known_args()
52 |     main()
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/vgg_trainable/test/plot_points.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of wganvo.
 3 | #
 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 5 | # For more information see <https://github.com/CIFASIS/wganvo>
 6 | #
 7 | # wganvo is free software: you can redistribute it and/or modify
 8 | # it under the terms of the GNU General Public License as published by
 9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # wganvo is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
19 | #
20 | 
21 | import tensorflow as tf
22 | import numpy as np
23 | import argparse
24 | import sys, os
25 | import matplotlib.pyplot as plt
26 | 
27 | def plot(X_axis, Y_axis, xlabel, ylabel):
28 |     fig, ax = plt.subplots()
29 |     ax.plot(X_axis, Y_axis, 'r.')
30 |     ax.set_xlabel(xlabel)
31 |     ax.set_ylabel(ylabel)
32 |     #plt.show()
33 |     return fig, ax
34 |     #
35 | 
36 | def main(_):
37 |     points = np.loadtxt(FLAGS.file, delimiter=' ')
38 |     X_axis = points[:, 0]
39 |     Y_axis = points[:, 1]
40 |     fig, ax = plot(X_axis, Y_axis, "frames", "distance(m)")
41 |     fig.savefig(os.path.join(FLAGS.output_dir, 'frames_vs_dist.png'))
42 | 
43 | if __name__ == '__main__':
44 |     parser = argparse.ArgumentParser()
45 |     parser.add_argument(
46 |         'file',
47 |         type=str,
48 |         help='File'
49 |     )
50 |     parser.add_argument(
51 |         '--output_dir',
52 |         type=str,
53 |         default=os.getcwd(),
54 |         help='Output Dir'
55 |     )
56 |     FLAGS, unparsed = parser.parse_known_args()
57 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
58 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *.cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # Jupyter Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # SageMath parsed files
 79 | *.sage.py
 80 | 
 81 | # Environments
 82 | .env
 83 | .venv
 84 | env/
 85 | venv/
 86 | ENV/
 87 | 
 88 | # Spyder project settings
 89 | .spyderproject
 90 | .spyproject
 91 | 
 92 | # Rope project settings
 93 | .ropeproject
 94 | 
 95 | # mkdocs documentation
 96 | /site
 97 | 
 98 | # mypy
 99 | .mypy_cache/
100 | 
101 | # NetBeans Project
102 | nbproject/
103 | .idea/
104 | 
105 | # Temporary Files
106 | *~
107 | 
108 | 
109 | .idea/
110 | 
111 | 
112 | # Generated Images
113 | *.jpg
114 | 
115 | *.npz
116 | *.png
117 | *.txt
118 | 
119 | # Directorio de trabajo
120 | /images-dir
121 | 


--------------------------------------------------------------------------------
/vgg_trainable/test/relative_to_kitti_format.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of wganvo.
 3 | #
 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 5 | # For more information see <https://github.com/CIFASIS/wganvo>
 6 | #
 7 | # wganvo is free software: you can redistribute it and/or modify
 8 | # it under the terms of the GNU General Public License as published by
 9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # wganvo is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
19 | #
20 | 
21 | import os, sys, inspect
22 | 
23 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
24 | parentdir = os.path.dirname(currentdir)
25 | sys.path.insert(0, parentdir)
26 | gparentdir = os.path.dirname(parentdir)
27 | sys.path.insert(0, gparentdir)
28 | import eval_utils
29 | from plot_traj import read, plot
30 | import argparse
31 | import numpy as np
32 | 
33 | # Instantiate the parser
34 | parser = argparse.ArgumentParser(description='Optional app description')
35 | parser.add_argument('poses', type=str,
36 |                     help='Poses file')
37 | parser.add_argument("--mode", help="inv = invert the transformation",
38 |                     default='ninv', choices=["inv", "ninv"])
39 | 
40 | args = parser.parse_args()
41 | 
42 | data = read(args.poses, delimiter=' ')
43 | 
44 | current = np.matrix(np.identity(4))
45 | current = current[0:3, :]
46 | num_examples = len(data)
47 | # ts = np.empty((num_examples,12))
48 | i = 0
49 | inv = args.mode == 'inv'
50 | ts = eval_utils.get_absolute_poses(data.reshape(-1, 3, 4), inv=inv)
51 | ts = ts.reshape(-1, 12)
52 | # for t in data:
53 | # 	T = np.matrix(np.identity(4))
54 | # 	T[0:3,:] = t.reshape(3,4)
55 | # 	if args.mode == 'inv':
56 | # 		transformation = np.linalg.inv(T)
57 | # 	else:
58 | # 		transformation = T
59 | # 	current = current * transformation
60 | # 	ts[i] = current.reshape(12)
61 | # 	i += 1
62 | np.savetxt('abs.poses.txt', ts, delimiter=' ')
63 | 


--------------------------------------------------------------------------------
/vgg_trainable/test/plot_traj.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of wganvo.
 3 | #
 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 5 | # For more information see <https://github.com/CIFASIS/wganvo>
 6 | #
 7 | # wganvo is free software: you can redistribute it and/or modify
 8 | # it under the terms of the GNU General Public License as published by
 9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # wganvo is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
19 | #
20 | 
21 | from mpl_toolkits.mplot3d import axes3d
22 | import numpy as np
23 | import matplotlib.pyplot as plt
24 | 
25 | 
26 | def read(filename, delimiter=','):
27 |     return np.genfromtxt(filename, delimiter=delimiter)
28 | 
29 | 
30 | def plot(array):
31 |     fig = plt.figure()
32 |     ax = fig.add_subplot(111, projection='3d')  # 111 means "1x1 grid, first subplot"
33 |     p = ax.plot(array[:, 0], array[:, 1], array[:, 2], label='target')
34 |     ax.set_xlabel('x')
35 |     ax.set_ylabel('y')
36 |     ax.set_zlabel('z')
37 |     plt.legend()
38 |     plt.show()
39 | 
40 | 
41 | def main():
42 |     import transformations
43 |     data = read('vo.csv')
44 |     data = data[1:len(data), 2:8]
45 | 
46 |     current = np.array([0., 0., 0.])  # .transpose()
47 |     # current = np.matrix(np.identity(4))
48 |     num_examples = len(data)
49 |     ts = np.empty((num_examples, 3))
50 |     poses = np.empty((num_examples, 12))
51 |     i = 0
52 |     for t in data:
53 |         # Devuelve una matriz 4x4
54 |         # t[3] = roll, t[4] = pitch, t[5] = yaw
55 |         T = transformations.euler_matrix(t[3], t[4], t[5], 'sxyz')
56 |         T[0:3, 3] = t[0:3]
57 |         current = t[0:3] + current  # np.linalg.inv(T) *current   #np.linalg.inv(T) * current
58 |         ts[i] = current  # [0:3,3].transpose()
59 |         # poses[i] = current[0:3,:].reshape(12)
60 |         i += 1
61 | 
62 |     np.savetxt("poses.txt", poses, delimiter=" ")
63 |     plot(ts)
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     main()
68 | 


--------------------------------------------------------------------------------
/outliers.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of wganvo.
 3 | #
 4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 5 | # For more information see <https://github.com/CIFASIS/wganvo>
 6 | #
 7 | # wganvo is free software: you can redistribute it and/or modify
 8 | # it under the terms of the GNU General Public License as published by
 9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # wganvo is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
19 | 
20 | import numpy as np
21 | import matplotlib.pyplot as plt
22 | 
23 | 
24 | def reject_outliers(data, m=2.):
25 |     mask = mask_outliers(data, m)
26 |     return data[mask]
27 | 
28 | 
29 | def mask_outliers(data, m):
30 |     d = np.abs(data - np.median(data))
31 |     mdev = np.median(d)
32 |     s = d / mdev if mdev else 0.
33 |     return s < m
34 | 
35 | 
36 | # data: Nx3xP
37 | def print_points(data):
38 |     plt.subplot(131)
39 |     plt.plot(data[:, 0, :])
40 |     plt.subplot(132)
41 |     plt.plot(data[:, 1, :])
42 |     plt.subplot(133)
43 |     plt.plot(data[:, 2, :])
44 |     plt.show()
45 | 
46 | 
47 | def load(path):
48 |     return np.load(path)
49 | 
50 | # def fix_array(m, K, N):
51 | #     new_m = np.empty((m.shape[0], m.shape[1], 25))
52 | #     for idx, points in enumerate(m):
53 | #         points_h = np.ones((4, 150))
54 | #         points_h[0:3, :] = points
55 | #         x1 = np.matmul(K, points_h)
56 | #         x1 /= x1[2]
57 | #         c_mask = center_crop_mask(x1)
58 | #         points = points[:, c_mask]
59 | #         front_mask = in_front_of_cam_mask(points, 0.)
60 | #         points = points[:, front_mask]
61 | #         replace = points.shape[1] <= N
62 | #         random_selection = np.random.choice(points.shape[1], N, replace=replace)
63 | #         points = points[:3, random_selection]
64 | #         new_m[idx] = points
65 | #     return new_m
66 | 
67 | if __name__ == "__main__":
68 |     m = load('/home/jcremona/output/02/points.npy')
69 |     print_points(m)
70 |     # m.shape -> Nx3xP
71 |     # new_m = fix_array(m, K, N)
72 |     #np.save('/home/jcremona/output/09/points.npy', new_m)
73 | 
74 |     # Take some example
75 |     X = m[432]
76 |     for i in range(3):
77 |         # Each axis (X,Y,Z) is filtered by mask_outliers
78 |         mask = mask_outliers(X[i], 1000)
79 |         X = X[:, mask]
80 |     print(X.shape)


--------------------------------------------------------------------------------
/eval_kitti/mail.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | * This file is part of wganvo.
 3 | * This file belongs to the authors of KITTI (http://www.cvlibs.net/datasets/kitti/eval_odometry.php)
 4 | *
 5 | * Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 6 | * For more information see <https://github.com/CIFASIS/wganvo>
 7 | *
 8 | * wganvo is free software: you can redistribute it and/or modify
 9 | * it under the terms of the GNU General Public License as published by
10 | * the Free Software Foundation, either version 3 of the License, or
11 | * (at your option) any later version.
12 | *
13 | * wganvo is distributed in the hope that it will be useful,
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | * GNU General Public License for more details.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with wganvo. If not, see <http://www.gnu.org/licenses/>.
20 | */
21 | 
22 | #ifndef MAIL_H
23 | #define MAIL_H
24 | 
25 | #include <stdlib.h>
26 | #include <stdio.h>
27 | #include <stdarg.h>
28 | #include <string.h>
29 | 
30 | class Mail {
31 | 
32 | public:
33 | 
34 |   Mail (std::string email = "",std::string from = "noreply@cvlibs.net",std::string subject = "KITTI Evaluation Benchmark") {
35 |     if (email.compare("")) {
36 |       char cmd[2048];
37 |       sprintf(cmd,"/usr/lib/sendmail -t -f noreply@cvlibs.net");
38 |       mail = popen(cmd,"w");
39 |       fprintf(mail,"To: %s\n", email.c_str());
40 |       fprintf(mail,"From: %s\n", from.c_str());
41 |       fprintf(mail,"Subject: %s\n", subject.c_str());
42 |       fprintf(mail,"\n\n");
43 |     } else {
44 |       mail = 0;
45 |     }
46 |   }
47 |   
48 |   ~Mail() {
49 |     if (mail) {
50 |       pclose(mail);
51 |     }
52 |   }
53 |   
54 |   void msg (const char *format, ...) {
55 |     va_list args;
56 |     va_start(args,format);
57 |     if (mail) {
58 |       vfprintf(mail,format,args);
59 |       fprintf(mail,"\n");
60 |     }
61 |     vprintf(format,args);
62 |     printf("\n");
63 |     va_end(args);
64 |   }
65 |   
66 |   void msg (std::string str) {
67 |     if (mail) {
68 |       fprintf(mail,"%s\n",str.c_str());
69 |     }
70 |     printf("%s\n",str.c_str());
71 |   }
72 | 
73 |   void finalize (bool success,std::string benchmark,std::string result_sha="",std::string user_sha="") {
74 |     if (success) {
75 |       msg("Your evaluation results are available at:");
76 |       msg("http://www.cvlibs.net/datasets/kitti/user_submit_check_login.php?benchmark=%s&user=%s&result=%s",benchmark.c_str(),user_sha.c_str(), result_sha.c_str());
77 |     } else {
78 |       msg("An error occured while processing your results.");
79 |       msg("Please make sure that the data in your zip archive has the right format!");
80 |     }
81 |   }
82 |     
83 | private:
84 | 
85 |   FILE *mail;
86 |   
87 | };
88 | 
89 | #endif
90 | 


--------------------------------------------------------------------------------
/wgan/tflib/ops/layernorm.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of wganvo.
 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below).
 4 | #
 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 6 | # For more information see <https://github.com/CIFASIS/wganvo>
 7 | #
 8 | # wganvo is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # wganvo is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
20 | #
21 | 
22 | # MIT License
23 | #
24 | # Copyright (c) 2017 Ishaan Gulrajani
25 | #
26 | # Permission is hereby granted, free of charge, to any person obtaining a copy
27 | # of this software and associated documentation files (the "Software"), to deal
28 | # in the Software without restriction, including without limitation the rights
29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
30 | # copies of the Software, and to permit persons to whom the Software is
31 | # furnished to do so, subject to the following conditions:
32 | #
33 | # The above copyright notice and this permission notice shall be included in all
34 | # copies or substantial portions of the Software.
35 | #
36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
42 | # SOFTWARE.
43 | 
44 | import tflib as lib
45 | 
46 | import numpy as np
47 | import tensorflow as tf
48 | 
49 | def Layernorm(name, norm_axes, inputs):
50 |     mean, var = tf.nn.moments(inputs, norm_axes, keep_dims=True)
51 | 
52 |     # Assume the 'neurons' axis is the first of norm_axes. This is the case for fully-connected and BCHW conv layers.
53 |     n_neurons = inputs.get_shape().as_list()[norm_axes[0]]
54 | 
55 |     offset = lib.param(name+'.offset', np.zeros(n_neurons, dtype='float32'))
56 |     scale = lib.param(name+'.scale', np.ones(n_neurons, dtype='float32'))
57 | 
58 |     # Add broadcasting dims to offset and scale (e.g. BCHW conv data)
59 |     offset = tf.reshape(offset, [-1] + [1 for i in xrange(len(norm_axes)-1)])
60 |     scale = tf.reshape(scale, [-1] + [1 for i in xrange(len(norm_axes)-1)])
61 | 
62 |     result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-5)
63 | 
64 |     return result


--------------------------------------------------------------------------------
/wgan/tflib/ops/cond_batchnorm.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of wganvo.
 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below).
 4 | #
 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 6 | # For more information see <https://github.com/CIFASIS/wganvo>
 7 | #
 8 | # wganvo is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # wganvo is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
20 | #
21 | 
22 | # MIT License
23 | #
24 | # Copyright (c) 2017 Ishaan Gulrajani
25 | #
26 | # Permission is hereby granted, free of charge, to any person obtaining a copy
27 | # of this software and associated documentation files (the "Software"), to deal
28 | # in the Software without restriction, including without limitation the rights
29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
30 | # copies of the Software, and to permit persons to whom the Software is
31 | # furnished to do so, subject to the following conditions:
32 | #
33 | # The above copyright notice and this permission notice shall be included in all
34 | # copies or substantial portions of the Software.
35 | #
36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
42 | # SOFTWARE.
43 | 
44 | import tflib as lib
45 | 
46 | import numpy as np
47 | import tensorflow as tf
48 | 
49 | def Batchnorm(name, axes, inputs, is_training=None, stats_iter=None, update_moving_stats=True, fused=True, labels=None, n_labels=None):
50 |     """conditional batchnorm (dumoulin et al 2016) for BCHW conv filtermaps"""
51 |     if axes != [0,2,3]:
52 |         raise Exception('unsupported')
53 |     mean, var = tf.nn.moments(inputs, axes, keep_dims=True)
54 |     shape = mean.get_shape().as_list() # shape is [1,n,1,1]
55 |     offset_m = lib.param(name+'.offset', np.zeros([n_labels,shape[1]], dtype='float32'))
56 |     scale_m = lib.param(name+'.scale', np.ones([n_labels,shape[1]], dtype='float32'))
57 |     offset = tf.nn.embedding_lookup(offset_m, labels)
58 |     scale = tf.nn.embedding_lookup(scale_m, labels)
59 |     result = tf.nn.batch_normalization(inputs, mean, var, offset[:,:,None,None], scale[:,:,None,None], 1e-5)
60 |     return result


--------------------------------------------------------------------------------
/play_images.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of wganvo.
 3 | # This file is based on a file from https://github.com/ori-mrg/robotcar-dataset-sdk
 4 | # (see original license below)
 5 | #
 6 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 7 | # For more information see <https://github.com/CIFASIS/wganvo>
 8 | #
 9 | # This file is licensed under the Creative Commons
10 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
11 | # To view a copy of this license, visit
12 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to
13 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
14 | #
15 | 
16 | ################################################################################
17 | #
18 | # Copyright (c) 2017 University of Oxford
19 | # Authors:
20 | #  Geoff Pascoe (gmp@robots.ox.ac.uk)
21 | #
22 | # This work is licensed under the Creative Commons
23 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
24 | # To view a copy of this license, visit
25 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to
26 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
27 | #
28 | ################################################################################
29 | 
30 | import argparse
31 | import os
32 | import re
33 | import matplotlib.pyplot as plt
34 | from datetime import datetime as dt
35 | from image import load_image
36 | from camera_model import CameraModel
37 | 
38 | parser = argparse.ArgumentParser(description='Play back images from a given directory')
39 | 
40 | parser.add_argument('dir', type=str, help='Directory containing images.')
41 | parser.add_argument('--models_dir', type=str, default=None, help='(optional) Directory containing camera model. If supplied, images will be undistorted before display')
42 | parser.add_argument('--scale', type=float, default=1.0, help='(optional) factor by which to scale images before display')
43 | 
44 | args = parser.parse_args()
45 | 
46 | camera = re.search('(stereo|mono_(left|right|rear))', args.dir).group(0)
47 | 
48 | timestamps_path = os.path.join(os.path.join(args.dir, os.pardir, camera + '.timestamps'))
49 | if not os.path.isfile(timestamps_path):
50 |   timestamps_path = os.path.join(args.dir, os.pardir, os.pardir, camera + '.timestamps')
51 |   if not os.path.isfile(timestamps_path):
52 |       raise IOError("Could not find timestamps file")
53 | 
54 | model = None
55 | if args.models_dir:
56 |     model = CameraModel(args.models_dir, args.dir)
57 | 
58 | current_chunk = 0
59 | timestamps_file = open(timestamps_path)
60 | for line in timestamps_file:
61 |     tokens = line.split()
62 |     datetime = dt.utcfromtimestamp(int(tokens[0])/1000000)
63 |     chunk = int(tokens[1])
64 | 
65 |     filename = os.path.join(args.dir, tokens[0] + '.png')
66 |     if not os.path.isfile(filename):
67 |         if chunk != current_chunk:
68 |             print("Chunk " + str(chunk) + " not found")
69 |             current_chunk = chunk
70 |         continue
71 | 
72 |     current_chunk = chunk
73 | 
74 |     img = load_image(filename, model)
75 |     plt.imshow(img)
76 |     plt.xlabel(datetime)
77 |     plt.xticks([])
78 |     plt.yticks([])
79 |     plt.pause(0.01)
80 | 


--------------------------------------------------------------------------------
/image.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of wganvo.
 3 | # This file is based on a file from https://github.com/ori-mrg/robotcar-dataset-sdk
 4 | # (see original license below)
 5 | #
 6 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 7 | # For more information see <https://github.com/CIFASIS/wganvo>
 8 | #
 9 | # This file is licensed under the Creative Commons
10 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
11 | # To view a copy of this license, visit
12 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to
13 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
14 | #
15 | 
16 | ################################################################################
17 | #
18 | # Copyright (c) 2017 University of Oxford
19 | # Authors:
20 | #  Geoff Pascoe (gmp@robots.ox.ac.uk)
21 | #
22 | # This work is licensed under the Creative Commons
23 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
24 | # To view a copy of this license, visit
25 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to
26 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
27 | #
28 | ###############################################################################
29 | 
30 | import re
31 | import numpy as np
32 | from scipy.misc import imresize, imsave, imread
33 | from PIL import Image
34 | from colour_demosaicing import demosaicing_CFA_Bayer_bilinear as demosaic
35 | 
36 | BAYER_STEREO = 'gbrg'
37 | BAYER_MONO = 'rggb'
38 | 
39 | 
40 | def load_image(image_path, model=None):
41 |     """Loads and rectifies an image from file.
42 | 
43 |     Args:
44 |         image_path (str): path to an image from the dataset.
45 |         model (camera_model.CameraModel): if supplied, model will be used to undistort image.
46 | 
47 |     Returns:
48 |         numpy.ndarray: demosaiced and optionally undistorted image
49 | 
50 |     """
51 |     if model:
52 |         camera = model.camera
53 |     else:
54 |         camera = re.search('(stereo|mono_(left|right|rear))', image_path).group(0)
55 |     if camera == 'stereo':
56 |         pattern = BAYER_STEREO
57 |     else:
58 |         pattern = BAYER_MONO
59 |     
60 |     if model:    
61 |         img = demosaic(Image.open(image_path), pattern)    
62 |         img = model.undistort(img)
63 |         img = rgb_2_grey(img)
64 |     else:
65 |         img = non_demosaic_load(image_path)
66 |     assert isinstance(img, np.ndarray) and img.dtype == np.uint8 and img.flags.contiguous
67 |     return img
68 | 
69 | def non_demosaic_load(image_path):
70 |     return imread(image_path)
71 | 
72 | 
73 | def crop_image(num_array, cropx, cropy):
74 |     y = num_array.shape[0]
75 |     x = num_array.shape[1]
76 |     startx = x // 2 - (cropx // 2)
77 |     starty = y // 2 - (cropy // 2)
78 |     return num_array[starty:starty + cropy, startx:startx+cropx]    
79 | 
80 | def scale_image(num_array, sizex, sizey):
81 |     return imresize(num_array, (sizey,sizex))
82 | 
83 | def save_image(num_array, path):
84 |     imsave(path, num_array)
85 | 
86 | def savez_compressed(path, array):
87 |     np.savez_compressed(path, array)
88 | 
89 | def rgb_2_grey(img):
90 |     return np.dot(img[...,:3],[0.299, 0.587, 0.114]).astype(img.dtype)


--------------------------------------------------------------------------------
/wgan/tflib/small_imagenet.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of wganvo.
 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below).
 4 | #
 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 6 | # For more information see <https://github.com/CIFASIS/wganvo>
 7 | #
 8 | # wganvo is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # wganvo is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
20 | #
21 | 
22 | # MIT License
23 | #
24 | # Copyright (c) 2017 Ishaan Gulrajani
25 | #
26 | # Permission is hereby granted, free of charge, to any person obtaining a copy
27 | # of this software and associated documentation files (the "Software"), to deal
28 | # in the Software without restriction, including without limitation the rights
29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
30 | # copies of the Software, and to permit persons to whom the Software is
31 | # furnished to do so, subject to the following conditions:
32 | #
33 | # The above copyright notice and this permission notice shall be included in all
34 | # copies or substantial portions of the Software.
35 | #
36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
42 | # SOFTWARE.
43 | 
44 | import numpy as np
45 | import scipy.misc
46 | import time
47 | 
48 | def make_generator(path, n_files, batch_size):
49 |     epoch_count = [1]
50 |     def get_epoch():
51 |         images = np.zeros((batch_size, 3, 64, 64), dtype='int32')
52 |         files = range(n_files)
53 |         random_state = np.random.RandomState(epoch_count[0])
54 |         random_state.shuffle(files)
55 |         epoch_count[0] += 1
56 |         for n, i in enumerate(files):
57 |             image = scipy.misc.imread("{}/{}.png".format(path, str(i+1).zfill(len(str(n_files)))))
58 |             images[n % batch_size] = image.transpose(2,0,1)
59 |             if n > 0 and n % batch_size == 0:
60 |                 yield (images,)
61 |     return get_epoch
62 | 
63 | def load(batch_size, data_dir='/home/ishaan/data/imagenet64'):
64 |     return (
65 |         make_generator(data_dir+'/train_64x64', 1281149, batch_size),
66 |         make_generator(data_dir+'/valid_64x64', 49999, batch_size)
67 |     )
68 | 
69 | if __name__ == '__main__':
70 |     train_gen, valid_gen = load(64)
71 |     t0 = time.time()
72 |     for i, batch in enumerate(train_gen(), start=1):
73 |         print "{}\t{}".format(str(time.time() - t0), batch[0][0,0,0,0])
74 |         if i == 1000:
75 |             break
76 |         t0 = time.time()


--------------------------------------------------------------------------------
/wgan/tflib/cifar10.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of wganvo.
 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below).
 4 | #
 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 6 | # For more information see <https://github.com/CIFASIS/wganvo>
 7 | #
 8 | # wganvo is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # wganvo is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
20 | #
21 | 
22 | # MIT License
23 | #
24 | # Copyright (c) 2017 Ishaan Gulrajani
25 | #
26 | # Permission is hereby granted, free of charge, to any person obtaining a copy
27 | # of this software and associated documentation files (the "Software"), to deal
28 | # in the Software without restriction, including without limitation the rights
29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
30 | # copies of the Software, and to permit persons to whom the Software is
31 | # furnished to do so, subject to the following conditions:
32 | #
33 | # The above copyright notice and this permission notice shall be included in all
34 | # copies or substantial portions of the Software.
35 | #
36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
42 | # SOFTWARE.
43 | 
44 | import numpy as np
45 | 
46 | import os
47 | import urllib
48 | import gzip
49 | import cPickle as pickle
50 | 
51 | def unpickle(file):
52 |     fo = open(file, 'rb')
53 |     dict = pickle.load(fo)
54 |     fo.close()
55 |     return dict['data'], dict['labels']
56 | 
57 | def cifar_generator(filenames, batch_size, data_dir):
58 |     all_data = []
59 |     all_labels = []
60 |     for filename in filenames:        
61 |         data, labels = unpickle(data_dir + '/' + filename)
62 |         all_data.append(data)
63 |         all_labels.append(labels)
64 | 
65 |     images = np.concatenate(all_data, axis=0)
66 |     labels = np.concatenate(all_labels, axis=0)
67 | 
68 |     def get_epoch():
69 |         rng_state = np.random.get_state()
70 |         np.random.shuffle(images)
71 |         np.random.set_state(rng_state)
72 |         np.random.shuffle(labels)
73 | 
74 |         for i in xrange(len(images) / batch_size):
75 |             yield (images[i*batch_size:(i+1)*batch_size], labels[i*batch_size:(i+1)*batch_size])
76 | 
77 |     return get_epoch
78 | 
79 | 
80 | def load(batch_size, data_dir):
81 |     return (
82 |         cifar_generator(['data_batch_1','data_batch_2','data_batch_3','data_batch_4','data_batch_5'], batch_size, data_dir), 
83 |         cifar_generator(['test_batch'], batch_size, data_dir)
84 |     )


--------------------------------------------------------------------------------
/wgan/tflib/plot.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of wganvo.
 3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below).
 4 | #
 5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 6 | # For more information see <https://github.com/CIFASIS/wganvo>
 7 | #
 8 | # wganvo is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # wganvo is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
20 | #
21 | 
22 | # MIT License
23 | #
24 | # Copyright (c) 2017 Ishaan Gulrajani
25 | #
26 | # Permission is hereby granted, free of charge, to any person obtaining a copy
27 | # of this software and associated documentation files (the "Software"), to deal
28 | # in the Software without restriction, including without limitation the rights
29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
30 | # copies of the Software, and to permit persons to whom the Software is
31 | # furnished to do so, subject to the following conditions:
32 | #
33 | # The above copyright notice and this permission notice shall be included in all
34 | # copies or substantial portions of the Software.
35 | #
36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
42 | # SOFTWARE.
43 | 
44 | import numpy as np
45 | 
46 | import matplotlib
47 | 
48 | matplotlib.use('Agg')
49 | import matplotlib.pyplot as plt
50 | import os
51 | import collections
52 | import time
53 | import cPickle as pickle
54 | 
55 | _since_beginning = collections.defaultdict(lambda: {})
56 | _since_last_flush = collections.defaultdict(lambda: {})
57 | 
58 | _iter = [0]
59 | 
60 | 
61 | def tick():
62 |     _iter[0] += 1
63 | 
64 | def get_global_iter():
65 |     return _iter[0]
66 | 
67 | 
68 | def plot(name, value):
69 |     _since_last_flush[name][_iter[0]] = value
70 | 
71 | 
72 | def flush(log_dir):
73 |     prints = []
74 | 
75 |     for name, vals in _since_last_flush.items():
76 |         prints.append("{}\t{}".format(name, np.mean(vals.values())))
77 |         _since_beginning[name].update(vals)
78 | 
79 |         x_vals = np.sort(_since_beginning[name].keys())
80 |         y_vals = [_since_beginning[name][x] for x in x_vals]
81 | 
82 |         plt.clf()
83 |         plt.plot(x_vals, y_vals)
84 |         plt.xlabel('iteration')
85 |         plt.ylabel(name)
86 |         file_name = name.replace(' ', '_') + '.jpg'
87 |         plt.savefig(os.path.join(log_dir, file_name))
88 | 
89 |     print "iter {}\t{}".format(_iter[0], "\t".join(prints))
90 |     _since_last_flush.clear()
91 | 
92 |     with open(os.path.join(log_dir, 'log.pkl'), 'wb') as f:
93 |         pickle.dump(dict(_since_beginning), f, pickle.HIGHEST_PROTOCOL)
94 | 


--------------------------------------------------------------------------------
/download.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | HOME=~
 4 | PROJECT_DIR=$HOME/tesina
 5 | WORKSPACE_DIR=$PROJECT_DIR/workspace
 6 | SOURCE_DIR=$PROJECT_DIR/tesina
 7 | FILENAME_TEMP="index_temp.html"
 8 | DATASET_LIST_FILE=$1
 9 | LOGIN_URL="http://mrgdatashare.robots.ox.ac.uk/accounts/login/"
10 | MODELS_DIR=$PROJECT_DIR/models
11 | EXTRINSICS_DIR=$PROJECT_DIR/extrinsics
12 | CROP_WIDTH=880
13 | CROP_HEIGHT=660
14 | SCALE_WIDTH=128
15 | SCALE_HEIGHT=96
16 | FILENAME_COOKIE_TEMP=cookies.txt
17 | # TODO why isn't sessionid mandatory?
18 | #COOKIE_HEADER="Cookie: _ga=GA1.3.147376171.1502830394; _gat=1; _gid=GA1.3.370211867.1505683919; sessionid=gcef24ow0h95wjezgrqrkudsas1hp5x1"
19 | 
20 | # TODO handle authentication errors
21 | printf "Username: "
22 | read USERNAME
23 | stty -echo
24 | printf "Password: "
25 | read PASSWORD
26 | stty echo
27 | printf "\n"
28 | 
29 | output_dir=$WORKSPACE_DIR
30 | processing_dataset=false
31 | 
32 | function download_file { # 1 = url, 2 = user, 3 = pass, 4 = filename_path
33 |     	wget --save-cookies $FILENAME_COOKIE_TEMP --server-response -q -O - $1 > $FILENAME_TEMP
34 | 	csrf_middleware_token=$(sed -n "/csrfmiddlewaretoken/s/.*name='csrfmiddlewaretoken'\s\+value='\([^']\+\).*/\1/p" $FILENAME_TEMP)
35 | 	next_redirect=$(sed -n '/next/s/.*name="next"\s\+value="\([^"]\+\).*/\1/p' $FILENAME_TEMP)
36 | 	next_redirect_encoded=$(php -r "echo urlencode(\"$next_redirect\");")
37 | 	# TODO why isn't referer header mandatory?
38 | 	#referer_header="Referer: ${referer_encoded}"
39 | 	post_data="csrfmiddlewaretoken="$csrf_middleware_token"&username="$2"&password="$3"&next="$next_redirect_encoded
40 | 	wget --load-cookies $FILENAME_COOKIE_TEMP --post-data="${post_data}" "${LOGIN_URL}" -O $4
41 | }
42 | 
43 | while read url_dataset; do
44 |         filename=$(basename $url_dataset)
45 |         dirname=$(dirname $url_dataset)
46 | 	directory_name="${filename%.*}"
47 | 	filename_path="${output_dir}/${filename}"
48 |         download_file $url_dataset $USERNAME $PASSWORD $filename_path	
49 | 	output_dataset_directory="${output_dir}/${directory_name}"
50 | 	mkdir -p $output_dataset_directory
51 | 	tar xopf "${filename_path}" -C $output_dataset_directory
52 | 	tar_output=$?
53 | 	if [ "$tar_output" -eq 0 ]; then
54 | 		rm "${filename_path}"
55 | 	fi
56 | 	if [ "$processing_dataset" = true ] ; then
57 | 		#wait $processing_dataset_pid
58 | 		processing_dataset=false
59 | 	fi		
60 | 	IFS='_' read -ra FOLDERS <<< "$filename"
61 | 	dataset_image_directory="${output_dataset_directory}/${FOLDERS[0]}/${FOLDERS[1]}/${FOLDERS[2]}"
62 |         vo_filename="${FOLDERS[0]}_vo.tar"
63 |         vo_filename_path="${output_dir}/${vo_filename}"
64 |         url_vo_file="${dirname}/$vo_filename"        
65 |         download_file $url_vo_file $USERNAME $PASSWORD $vo_filename_path
66 |         output_vo_directory="${output_dataset_directory}/vo"
67 |         mkdir -p $output_vo_directory
68 |         tar xopf $vo_filename_path -C $output_vo_directory
69 |         tar_vo_output=$?
70 |         if [ "$tar_vo_output" -eq 0 ]; then
71 | 		rm "${vo_filename_path}"
72 | 	fi
73 |         poses_file="${output_vo_directory}/${FOLDERS[0]}/vo/vo.csv"
74 | 	python "${SOURCE_DIR}/adapt_images.py" "$dataset_image_directory" "$poses_file" --models_dir "${MODELS_DIR}" --crop "${CROP_WIDTH}" "${CROP_HEIGHT}" --scale "${SCALE_WIDTH}" "${SCALE_HEIGHT}" --output_dir "${output_dataset_directory}" # &	
75 | 	# TODO comment this line when running in background	
76 | 	if [ "$tar_output" -eq 0 ]; then
77 | 		rm -rf "${dataset_image_directory}"
78 | 	fi
79 | 	processing_dataset_pid=$!
80 | 	processing_dataset=true
81 | 	
82 | 	rm $FILENAME_TEMP
83 | done <$DATASET_LIST_FILE
84 | rm $FILENAME_COOKIE_TEMP
85 | 
86 | 


--------------------------------------------------------------------------------
/geometry.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | # This file is based on a file from evo (github.com/MichaelGrupp/evo) (see original license below)
  4 | #
  5 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  6 | # For more information see <https://github.com/CIFASIS/wganvo>
  7 | #
  8 | # wganvo is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # wganvo is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 20 | 
 21 | # Provides generic geometry algorithms.
 22 | # author: Michael Grupp
 23 | #
 24 | # This file is part of evo (github.com/MichaelGrupp/evo).
 25 | #
 26 | # evo is free software: you can redistribute it and/or modify
 27 | # it under the terms of the GNU General Public License as published by
 28 | # the Free Software Foundation, either version 3 of the License, or
 29 | # (at your option) any later version.
 30 | #
 31 | # evo is distributed in the hope that it will be useful,
 32 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 33 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 34 | # GNU General Public License for more details.
 35 | #
 36 | # You should have received a copy of the GNU General Public License
 37 | # along with evo.  If not, see <http://www.gnu.org/licenses/>.
 38 | 
 39 | import numpy as np
 40 | 
 41 | 
 42 | class GeometryException(Exception):
 43 |     pass
 44 | 
 45 | 
 46 | def umeyama_alignment(x, y, with_scale=False):
 47 |     """
 48 |     Computes the least squares solution parameters of an Sim(m) matrix
 49 |     that minimizes the distance between a set of registered points.
 50 |     Umeyama, Shinji: Least-squares estimation of transformation parameters
 51 |                      between two point patterns. IEEE PAMI, 1991
 52 |     :param x: mxn matrix of points, m = dimension, n = nr. of data points
 53 |     :param y: mxn matrix of points, m = dimension, n = nr. of data points
 54 |     :param with_scale: set to True to align also the scale (default: 1.0 scale)
 55 |     :return: r, t, c - rotation matrix, translation vector and scale factor
 56 |     """
 57 |     if x.shape != y.shape:
 58 |         raise GeometryException("data matrices must have the same shape")
 59 | 
 60 |     # m = dimension, n = nr. of data points
 61 |     m, n = x.shape
 62 | 
 63 |     # means, eq. 34 and 35
 64 |     mean_x = x.mean(axis=1)
 65 |     mean_y = y.mean(axis=1)
 66 | 
 67 |     # variance, eq. 36
 68 |     # "transpose" for column subtraction
 69 |     sigma_x = 1.0/n * (np.linalg.norm(x - mean_x[:, np.newaxis])**2)
 70 | 
 71 |     # covariance matrix, eq. 38
 72 |     outer_sum = np.zeros((m, m))
 73 |     for i in range(n):
 74 |         outer_sum += np.outer((y[:, i] - mean_y), (x[:, i] - mean_x))
 75 |     cov_xy = np.multiply(1.0/n, outer_sum)
 76 | 
 77 |     # SVD (text betw. eq. 38 and 39)
 78 |     u, d, v = np.linalg.svd(cov_xy)
 79 | 
 80 |     # S matrix, eq. 43
 81 |     s = np.eye(m)
 82 |     if np.linalg.det(u) * np.linalg.det(v) < 0.0:
 83 |         # Ensure a RHS coordinate system (Kabsch algorithm).
 84 |         s[m-1, m-1] = -1
 85 | 
 86 |     # rotation, eq. 40
 87 |     r = u.dot(s).dot(v)
 88 | 
 89 |     # scale & translation, eq. 42 and 41
 90 |     c = 1/sigma_x * np.trace(np.diag(d).dot(s)) if with_scale else 1.0
 91 |     t = mean_y - np.multiply(c, r.dot(mean_x))
 92 | 
 93 |     return r, t, c
 94 | 
 95 | 
 96 | def arc_len(x):
 97 |     """
 98 |     :param x: nxm array of points, m=dimension
 99 |     :return: the (discrete approximated) arc-length of the point sequence
100 |     """
101 |     return np.sum(np.linalg.norm(x[:-1] - x[1:], axis=1))


--------------------------------------------------------------------------------
/project_laser_into_camera.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is part of wganvo.
 3 | # This file is based on a file from https://github.com/ori-mrg/robotcar-dataset-sdk
 4 | # (see original license below)
 5 | #
 6 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
 7 | # For more information see <https://github.com/CIFASIS/wganvo>
 8 | #
 9 | # This file is licensed under the Creative Commons
10 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
11 | # To view a copy of this license, visit
12 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to
13 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
14 | #
15 | 
16 | ################################################################################
17 | #
18 | # Copyright (c) 2017 University of Oxford
19 | # Authors:
20 | #  Geoff Pascoe (gmp@robots.ox.ac.uk)
21 | #
22 | # This work is licensed under the Creative Commons
23 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
24 | # To view a copy of this license, visit
25 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to
26 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
27 | #
28 | ################################################################################
29 | 
30 | import os
31 | import re
32 | import numpy as np
33 | import matplotlib.pyplot as plt
34 | import argparse
35 | 
36 | from build_pointcloud import build_pointcloud
37 | from transform import build_se3_transform
38 | from image import load_image
39 | from camera_model import CameraModel
40 | 
41 | parser = argparse.ArgumentParser(description='Project LIDAR data into camera image')
42 | parser.add_argument('--image_dir', type=str, help='Directory containing images')
43 | parser.add_argument('--laser_dir', type=str, help='Directory containing LIDAR scans')
44 | parser.add_argument('--poses_file', type=str, help='File containing either INS or VO poses')
45 | parser.add_argument('--models_dir', type=str, help='Directory containing camera models')
46 | parser.add_argument('--extrinsics_dir', type=str, help='Directory containing sensor extrinsics')
47 | parser.add_argument('--image_idx', type=int, help='Index of image to display')
48 | 
49 | args = parser.parse_args()
50 | 
51 | model = CameraModel(args.models_dir, args.image_dir)
52 | 
53 | extrinsics_path = os.path.join(args.extrinsics_dir, model.camera + '.txt')
54 | with open(extrinsics_path) as extrinsics_file:
55 |     extrinsics = [float(x) for x in next(extrinsics_file).split(' ')]
56 | 
57 | G_camera_vehicle = build_se3_transform(extrinsics)
58 | G_camera_posesource = None
59 | 
60 | poses_type = re.search('(vo|ins)\.csv', args.poses_file).group(1)
61 | if poses_type == 'ins':
62 |     with open(os.path.join(args.extrinsics_dir, 'ins.txt')) as extrinsics_file:
63 |         extrinsics = next(extrinsics_file)
64 |         G_camera_posesource = G_camera_vehicle * build_se3_transform([float(x) for x in extrinsics.split(' ')])
65 | else:
66 |     # VO frame and vehicle frame are the same
67 |     G_camera_posesource = G_camera_vehicle
68 | 
69 | 
70 | timestamps_path = os.path.join(args.image_dir, os.pardir, model.camera + '.timestamps')
71 | if not os.path.isfile(timestamps_path):
72 |     timestamps_path = os.path.join(args.image_dir, os.pardir, os.pardir, model.camera + '.timestamps')
73 | 
74 | timestamp = 0
75 | with open(timestamps_path) as timestamps_file:
76 |     for i, line in enumerate(timestamps_file):
77 |         if i == args.image_idx:
78 |             timestamp = int(line.split(' ')[0])
79 | 
80 | pointcloud, reflectance = build_pointcloud(args.laser_dir, args.poses_file, args.extrinsics_dir,
81 |                                            timestamp - 1e7, timestamp + 1e7, timestamp)
82 | 
83 | pointcloud = np.dot(G_camera_posesource, pointcloud)
84 | 
85 | image_path = os.path.join(args.image_dir, str(timestamp) + '.png')
86 | image = load_image(image_path, model)
87 | 
88 | uv, depth = model.project(pointcloud, image.shape)
89 | 
90 | plt.imshow(image)
91 | plt.hold(True)
92 | plt.scatter(np.ravel(uv[0, :]), np.ravel(uv[1, :]), s=2, c=depth, edgecolors='none', cmap='jet')
93 | plt.xlim(0, image.shape[1])
94 | plt.ylim(image.shape[0], 0)
95 | plt.xticks([])
96 | plt.yticks([])
97 | plt.show()
98 | 


--------------------------------------------------------------------------------
/wgan/tflib/save_images.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below).
  4 | #
  5 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  6 | # For more information see <https://github.com/CIFASIS/wganvo>
  7 | #
  8 | # wganvo is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # wganvo is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | # MIT License
 23 | #
 24 | # Copyright (c) 2017 Ishaan Gulrajani
 25 | #
 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 27 | # of this software and associated documentation files (the "Software"), to deal
 28 | # in the Software without restriction, including without limitation the rights
 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 30 | # copies of the Software, and to permit persons to whom the Software is
 31 | # furnished to do so, subject to the following conditions:
 32 | #
 33 | # The above copyright notice and this permission notice shall be included in all
 34 | # copies or substantial portions of the Software.
 35 | #
 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 42 | # SOFTWARE.
 43 | 
 44 | """
 45 | Image grid saver, based on color_grid_vis from github.com/Newmu
 46 | """
 47 | 
 48 | import numpy as np
 49 | import os
 50 | from scipy.misc import imsave
 51 | 
 52 | 
 53 | def save_images(X, save_path):
 54 |     img = build_grid(X)
 55 |     imsave(save_path, img)
 56 | 
 57 | 
 58 | def build_grid(X):
 59 |     # [0, 1] -> [0,255]
 60 |     if isinstance(X.flatten()[0], np.floating):
 61 |         X = (255.99 * X).astype('uint8')
 62 |     n_samples = X.shape[0]
 63 |     rows = int(np.sqrt(n_samples))
 64 |     while n_samples % rows != 0:
 65 |         rows -= 1
 66 |     nh, nw = rows, n_samples / rows
 67 |     if X.ndim == 2:
 68 |         X = np.reshape(X, (X.shape[0], int(np.sqrt(X.shape[1])), int(np.sqrt(X.shape[1]))))
 69 |     if X.ndim == 4:
 70 |         # BCHW -> BHWC
 71 |         X = X.transpose(0, 2, 3, 1)
 72 |         h, w = X[0].shape[:2]
 73 |         img = np.zeros((h * nh, w * nw, X.shape[3]))
 74 |     elif X.ndim == 3:
 75 |         h, w = X[0].shape[:2]
 76 |         img = np.zeros((h * nh, w * nw))
 77 |     for n, x in enumerate(X):
 78 |         j = n / nw
 79 |         i = n % nw
 80 |         img[j * h:j * h + h, i * w:i * w + w] = x
 81 |     return img
 82 | 
 83 | 
 84 | # Guarda dos grillas
 85 | def save_pair_images_grid(X, save_path, iteration, prefix='samples'):
 86 |     grid = build_grid(X)
 87 |     assert grid.ndim == 3 and grid.shape[2] == 2
 88 | 
 89 |     grid = grid.transpose(2, 0, 1)
 90 |     imsave_pair(grid, save_path, iteration, prefix)
 91 | 
 92 | 
 93 | # Guarda imgs individuales
 94 | def save_pair_images(X, save_path, iteration, prefix='samples'):
 95 |     # BCHW
 96 |     assert X.ndim == 4 and X.shape[1] == 2
 97 | 
 98 |     idx = 0 #randrange(X.shape[0])
 99 |     pair = X[idx]
100 |     imsave_pair(pair, save_path, iteration, prefix)
101 | 
102 | 
103 | def imsave_pair(pair, save_path, iteration, prefix):
104 |     img_name = prefix + '_{}_{}.png'
105 |     imsave(os.path.join(save_path, img_name.format(iteration, 0)), pair[0, ...])
106 |     imsave(os.path.join(save_path, img_name.format(iteration, 1)), pair[1, ...])
107 | 


--------------------------------------------------------------------------------
/wgan/tflib/mnist.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below).
  4 | #
  5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  6 | # For more information see <https://github.com/CIFASIS/wganvo>
  7 | #
  8 | # wganvo is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # wganvo is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | # MIT License
 23 | #
 24 | # Copyright (c) 2017 Ishaan Gulrajani
 25 | #
 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 27 | # of this software and associated documentation files (the "Software"), to deal
 28 | # in the Software without restriction, including without limitation the rights
 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 30 | # copies of the Software, and to permit persons to whom the Software is
 31 | # furnished to do so, subject to the following conditions:
 32 | #
 33 | # The above copyright notice and this permission notice shall be included in all
 34 | # copies or substantial portions of the Software.
 35 | #
 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 42 | # SOFTWARE.
 43 | 
 44 | import numpy
 45 | 
 46 | import os
 47 | import urllib
 48 | import gzip
 49 | import cPickle as pickle
 50 | 
 51 | def mnist_generator(data, batch_size, n_labelled, limit=None):
 52 |     images, targets = data
 53 | 
 54 |     rng_state = numpy.random.get_state()
 55 |     numpy.random.shuffle(images)
 56 |     numpy.random.set_state(rng_state)
 57 |     numpy.random.shuffle(targets)
 58 |     if limit is not None:
 59 |         print "WARNING ONLY FIRST {} MNIST DIGITS".format(limit)
 60 |         images = images.astype('float32')[:limit]
 61 |         targets = targets.astype('int32')[:limit]
 62 |     if n_labelled is not None:
 63 |         labelled = numpy.zeros(len(images), dtype='int32')
 64 |         labelled[:n_labelled] = 1
 65 | 
 66 |     def get_epoch():
 67 |         rng_state = numpy.random.get_state()
 68 |         numpy.random.shuffle(images)
 69 |         numpy.random.set_state(rng_state)
 70 |         numpy.random.shuffle(targets)
 71 | 
 72 |         if n_labelled is not None:
 73 |             numpy.random.set_state(rng_state)
 74 |             numpy.random.shuffle(labelled)
 75 | 
 76 |         image_batches = images.reshape(-1, batch_size, 784)
 77 |         target_batches = targets.reshape(-1, batch_size)
 78 | 
 79 |         if n_labelled is not None:
 80 |             labelled_batches = labelled.reshape(-1, batch_size)
 81 | 
 82 |             for i in xrange(len(image_batches)):
 83 |                 yield (numpy.copy(image_batches[i]), numpy.copy(target_batches[i]), numpy.copy(labelled))
 84 | 
 85 |         else:
 86 | 
 87 |             for i in xrange(len(image_batches)):
 88 |                 yield (numpy.copy(image_batches[i]), numpy.copy(target_batches[i]))
 89 | 
 90 |     return get_epoch
 91 | 
 92 | def load(batch_size, test_batch_size, n_labelled=None):
 93 |     filepath = '/tmp/mnist.pkl.gz'
 94 |     url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
 95 | 
 96 |     if not os.path.isfile(filepath):
 97 |         print "Couldn't find MNIST dataset in /tmp, downloading..."
 98 |         urllib.urlretrieve(url, filepath)
 99 | 
100 |     with gzip.open('/tmp/mnist.pkl.gz', 'rb') as f:
101 |         train_data, dev_data, test_data = pickle.load(f)
102 | 
103 |     return (
104 |         mnist_generator(train_data, batch_size, n_labelled), 
105 |         mnist_generator(dev_data, test_batch_size, n_labelled), 
106 |         mnist_generator(test_data, test_batch_size, n_labelled)
107 |     )


--------------------------------------------------------------------------------
/triangulate.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | #
  4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  5 | # For more information see <https://github.com/CIFASIS/wganvo>
  6 | #
  7 | # wganvo is free software: you can redistribute it and/or modify
  8 | # it under the terms of the GNU General Public License as published by
  9 | # the Free Software Foundation, either version 3 of the License, or
 10 | # (at your option) any later version.
 11 | #
 12 | # wganvo is distributed in the hope that it will be useful,
 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | # GNU General Public License for more details.
 16 | #
 17 | # You should have received a copy of the GNU General Public License
 18 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 19 | #
 20 | 
 21 | import cv2
 22 | import numpy as np
 23 | from matplotlib import pyplot as plt
 24 | 
 25 | 
 26 | # Input: stereo images
 27 | # Output: pts1 -> 2xN array of points from left image
 28 | #         pts2 -> 2xN array of points from right image
 29 | def matcher(img1, img2):
 30 |     sift = cv2.xfeatures2d.SIFT_create()
 31 |     # find the keypoints and descriptors with SIFT
 32 |     kp1, des1 = sift.detectAndCompute(img1, None)
 33 |     kp2, des2 = sift.detectAndCompute(img2, None)
 34 |     # Brute Force Matcher parameters
 35 |     bf = cv2.BFMatcher(crossCheck=True)
 36 |     matches = bf.knnMatch(des1, des2, k=1)
 37 |     # FLANN parameters
 38 |     # FLANN_INDEX_KDTREE = 0
 39 |     # index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
 40 |     # search_params = dict(checks=50)
 41 |     # flann = cv2.FlannBasedMatcher(index_params,search_params)
 42 |     # matches = flann.knnMatch(des1,des2,k=2)
 43 |     good = []
 44 |     pts1 = []
 45 |     pts2 = []
 46 | 
 47 |     for match in matches:
 48 |         if match:
 49 |             # print('%d -> %d: %f' % (match[0].queryIdx, match[0].trainIdx, match[0].distance))
 50 |             m = match[0]
 51 |             good.append(m)
 52 |             pts2.append(kp2[m.trainIdx].pt)
 53 |             pts1.append(kp1[m.queryIdx].pt)
 54 | 
 55 |     pts1 = np.int32(pts1)
 56 |     pts2 = np.int32(pts2)
 57 |     F, mask = cv2.findFundamentalMat(pts1, pts2, cv2.FM_LMEDS)
 58 | 
 59 |     # We select only inlier points
 60 |     pts1 = pts1[mask.ravel() == 1]
 61 |     pts2 = pts2[mask.ravel() == 1]
 62 |     # draw_params = dict(matchColor=(0, 255, 0),
 63 |     #                    singlePointColor=(255, 0, 0),
 64 |     #                    matchesMask=mask.ravel().tolist(),
 65 |     #                    flags=0)
 66 |     # img3 = cv2.drawMatches(img1, kp1, img2, kp2, good, None, **draw_params)
 67 |     # plt.imshow(img3), plt.show()
 68 | 
 69 |     pts1 = pts1.transpose().astype(np.float32)
 70 |     pts2 = pts2.transpose().astype(np.float32)
 71 | 
 72 |     same_line_mask = pts1[1, :] == pts2[1, :]
 73 |     # threshold =  (pts1[1, :] + 1) == pts2[1, :]
 74 |     # threshold |= (pts2[1, :] + 1) == pts1[1, :]
 75 |     # same_line_mask |= threshold
 76 |     pts1 = pts1[:, same_line_mask]
 77 |     pts2 = pts2[:, same_line_mask]
 78 | 
 79 |     return pts1, pts2
 80 | 
 81 | 
 82 | # Input: P1 -> projection matrix
 83 | #        P2 -> projection matrix
 84 | #        x1 -> 2xN array of points
 85 | #        x2 -> 2xN array of points
 86 | # Output: X -> 4xN array of 3D points (homogeneous coordinates)
 87 | def triangulatePoints(P1, P2, x1, x2):
 88 |     X = cv2.triangulatePoints(P1, P2, x1, x2)
 89 |     return X / X[3]
 90 | 
 91 | # folder = '/home/jcremona/data/03/'
 92 | # filename = '000000.png'
 93 | #
 94 | # img1 = cv2.imread(folder + 'image_0/' + filename,0)  #queryimage # left image
 95 | # img2 = cv2.imread(folder + 'image_1/' + filename,0) #trainimage # right image
 96 | #
 97 | # # KITTI Seq 3 left camera calibration
 98 | # K0 =  np.matrix([[7.215377000000e+02, 0.000000000000e+00, 6.095593000000e+02, 0.000000000000e+00],
 99 | #                  [0.000000000000e+00, 7.215377000000e+02, 1.728540000000e+02, 0.000000000000e+00],
100 | #                  [0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 0.000000000000e+00]])
101 | #
102 | # # KITTI Seq 3 right camera calibration
103 | # K1 = np.matrix([[7.215377000000e+02, 0.000000000000e+00, 6.095593000000e+02, -3.875744000000e+02],
104 | #                 [0.000000000000e+00, 7.215377000000e+02, 1.728540000000e+02, 0.000000000000e+00],
105 | #                 [0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 0.000000000000e+00]])
106 | #
107 | # # KITTI Seq 3 1st frame pose
108 | # Rt0 = np.matrix([[1.000000e+00, -1.822835e-10, 5.241111e-10, -5.551115e-17],
109 | #                  [-1.822835e-10, 9.999999e-01, -5.072855e-10, -3.330669e-16],
110 | #                  [5.241111e-10, -5.072855e-10, 9.999999e-01, 2.220446e-16],
111 | #                  [0.,0.,0.,1.]])
112 | 


--------------------------------------------------------------------------------
/vgg_trainable/test/test_model.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | #
  4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  5 | # For more information see <https://github.com/CIFASIS/wganvo>
  6 | #
  7 | # wganvo is free software: you can redistribute it and/or modify
  8 | # it under the terms of the GNU General Public License as published by
  9 | # the Free Software Foundation, either version 3 of the License, or
 10 | # (at your option) any later version.
 11 | #
 12 | # wganvo is distributed in the hope that it will be useful,
 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | # GNU General Public License for more details.
 16 | #
 17 | # You should have received a copy of the GNU General Public License
 18 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 19 | #
 20 | 
 21 | import tensorflow as tf
 22 | import numpy as np
 23 | from scipy import linalg
 24 | import argparse
 25 | import sys, os, inspect
 26 | import time
 27 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 28 | parentdir = os.path.dirname(currentdir)
 29 | sys.path.insert(0, parentdir)
 30 | from input_data import read_data_sets, DataSet
 31 | from eval_utils import infer_relative_poses, get_absolute_poses, plot_frames_vs_abs_distance
 32 | 
 33 | # import transformations
 34 | 
 35 | DEFAULT_INTRINSIC_FILE_NAME = "intrinsic_matrix.txt"
 36 | 
 37 | 
 38 | def test_model(model_name, data_dir, output_dir, batch_size):
 39 |     sess = tf.Session()
 40 |     saver = tf.train.import_meta_graph(model_name + ".meta")
 41 |     # print(model_name)
 42 |     # inverse_intrinsic_matrix = np.linalg.inv(intrinsic_matrix)
 43 |     saver.restore(sess, model_name)  # tf.train.latest_checkpoint('./'))
 44 |     graph = tf.get_default_graph()
 45 |     outputs = graph.get_tensor_by_name("outputs:0")
 46 |     targets_placeholder = graph.get_tensor_by_name("targets_placeholder:0")
 47 |     images_placeholder = graph.get_tensor_by_name("images_placeholder:0")
 48 |     train_mode = graph.get_tensor_by_name("train_mode:0")  # FIXME Podria arrojar exception
 49 |     start_loading_time = time.time()
 50 |     images, targets, _, groups, _ = read_data_sets(data_dir)
 51 |     start_infer_time = time.time()
 52 |     dataset = DataSet(images, targets, groups, fake_data=False)
 53 |     relative_poses_prediction, relative_poses_target = infer_relative_poses(sess, dataset, batch_size,
 54 |                                                                             images_placeholder,
 55 |                                                                             outputs,
 56 |                                                                             targets_placeholder, train_mode)
 57 |     end_time = time.time()
 58 |     print("Inference time: {}".format(end_time - start_infer_time))
 59 |     print("Load Images + Inference Time: {}".format(end_time - start_loading_time))
 60 |     print("Images in the seq: {}".format(relative_poses_prediction.shape[0]))
 61 |     frames, abs_distance = plot_frames_vs_abs_distance(relative_poses_prediction, relative_poses_target, dataset,
 62 |                                                        output_dir, save_txt=True, plot=True)
 63 |     points = np.array(zip(frames, abs_distance))
 64 |     np.savetxt(os.path.join(output_dir, "frames_vs_abs_distance.txt"), points)
 65 |     np.savetxt(os.path.join(output_dir, "relative_poses_prediction.txt"), relative_poses_prediction.reshape(-1, 12),
 66 |                delimiter=' ')
 67 |     np.savetxt(os.path.join(output_dir, "relative_poses_target.txt"), relative_poses_target.reshape(-1, 12),
 68 |                delimiter=' ')
 69 |     absolute_poses_prediction = get_absolute_poses(relative_poses_prediction)
 70 |     absolute_poses_target = get_absolute_poses(relative_poses_target)
 71 |     np.savetxt(os.path.join(output_dir, "absolute_poses_prediction.txt"),
 72 |                absolute_poses_prediction.reshape(-1, 12), delimiter=' ')
 73 |     np.savetxt(os.path.join(output_dir, "absolute_poses_target.txt"), absolute_poses_target.reshape(-1, 12),
 74 |                delimiter=' ')
 75 | 
 76 | 
 77 | def main(_):
 78 |     # intrinsic_matrix = np.matrix(np.loadtxt(FLAGS.intrinsics_path, delimiter=' '))
 79 |     test_model(FLAGS.model_name, FLAGS.data_dir, FLAGS.output_dir, FLAGS.batch_size)
 80 | 
 81 | 
 82 | if __name__ == '__main__':
 83 |     parser = argparse.ArgumentParser()
 84 |     parser.add_argument(
 85 |         'model_name',
 86 |         type=str,
 87 |         help='Model name'
 88 |     )
 89 |     parser.add_argument(
 90 |         'data_dir',
 91 |         type=str,
 92 |         help='Directory to put the data'
 93 |     )
 94 |     # parser.add_argument(
 95 |     #     '--intrinsics_path',
 96 |     #     type=str,
 97 |     #     default=os.path.join(os.getcwd(), DEFAULT_INTRINSIC_FILE_NAME),
 98 |     #     help='Intrinsic matrix path'
 99 |     # )
100 |     parser.add_argument(
101 |         '--output_dir',
102 |         type=str,
103 |         default=os.getcwd(),
104 |         help='Output dir'
105 |     )
106 |     parser.add_argument(
107 |         '--batch_size',
108 |         type=int,
109 |         default=100,
110 |         help='Batch size'
111 |     )
112 | 
113 |     FLAGS, unparsed = parser.parse_known_args()
114 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
115 | 


--------------------------------------------------------------------------------
/vgg_trainable/test/show_traj_kitti.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | #
  4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  5 | # For more information see <https://github.com/CIFASIS/wganvo>
  6 | #
  7 | # wganvo is free software: you can redistribute it and/or modify
  8 | # it under the terms of the GNU General Public License as published by
  9 | # the Free Software Foundation, either version 3 of the License, or
 10 | # (at your option) any later version.
 11 | #
 12 | # wganvo is distributed in the hope that it will be useful,
 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | # GNU General Public License for more details.
 16 | #
 17 | # You should have received a copy of the GNU General Public License
 18 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 19 | #
 20 | 
 21 | import numpy as np
 22 | import matplotlib.pyplot as plt
 23 | from mpl_toolkits.mplot3d import Axes3D
 24 | import matplotlib.animation as animation
 25 | from scipy import linalg
 26 | import argparse
 27 | import sys, os, inspect
 28 | 
 29 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 30 | parentdir = os.path.dirname(currentdir)
 31 | sys.path.insert(0,parentdir)
 32 | from input_data import read_data_sets, DataSet
 33 | from matplotlib import gridspec
 34 | 
 35 | # Example:
 36 | # python show_traj_kitti.py ~/KITTI/ gt_pose_00.txt --poses_pred orb_slam2_pose_00.txt wganvo_pose_00.txt --labels ORB-SLAM2 WGANVO
 37 | # Labels and filenames must be the same length
 38 | 
 39 | def get_cmap(n, name='hsv'):
 40 |     '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct 
 41 |     RGB color; the keyword argument name must be a standard mpl colormap name.'''
 42 |     return plt.cm.get_cmap(name, n)
 43 | 
 44 | def show(images, poses, pred_poses, labels, points=None):
 45 |     fig = plt.figure()
 46 |     gs = gridspec.GridSpec(2, 1, height_ratios=[1, 2])
 47 |     ax1 = fig.add_subplot(gs[0])
 48 |     artist = ax1.imshow(images[0], cmap='gray')
 49 |     ax1.axis('off')
 50 |     ax2 = fig.add_subplot(gs[1], projection='3d')
 51 | 
 52 |     x = []
 53 |     y = []
 54 |     z = []
 55 |     lin = ax2.plot(x, y, z, label='Ground Truth', linewidth=1.1)[0]
 56 |     data_ = [poses]
 57 |     lines_ = [lin]
 58 |     for i, label in zip(pred_poses, labels):
 59 |         lin_pred = ax2.plot(x, y, z, label=label, linewidth=1.1)[0]
 60 |         data_.append(i)
 61 |         lines_.append(lin_pred)
 62 | 
 63 | 
 64 |     cloud = ax2.scatter(x,y,z, c="red", s=0.1)
 65 |         # data_.append(points)
 66 |         # lines_.append(cloud)
 67 |     ax2.legend()
 68 |     ax2.set_xlabel('X (m)')
 69 |     ax2.set_ylabel('Y (m)')
 70 |     ax2.set_zlabel('Z (m)')
 71 | 
 72 |     max_range = np.array([poses[:,0].max() - poses[:,0].min(), poses[:,1].max() - poses[:,1].min(),
 73 |                           poses[:,2].max() - poses[:,2].min()]).max() / 2.0
 74 |     mean_x = poses[:,0].mean()
 75 |     mean_y = poses[:,1].mean()
 76 |     mean_z = poses[:,2].mean()
 77 | 
 78 |     ax2.set_xlim(mean_x - max_range, mean_x + max_range)
 79 |     ax2.set_ylim(mean_y - max_range, mean_y + max_range)
 80 |     ax2.set_zlim(mean_z - max_range, mean_z + max_range)
 81 | 
 82 |     def update(num, img, datalines, lines, scatter, points):
 83 |         artist.set_data(img[num])
 84 |         idx = num + 1
 85 |         for lin, data in zip(lines, datalines):
 86 |             lin.set_xdata(data[:idx,0])
 87 |             lin.set_ydata(data[:idx,1])
 88 |             lin.set_3d_properties(data[:idx,2])
 89 |         #scatter._offsets3d = (points[num,0], points[num,1], points[num,2])
 90 |         return lines
 91 | 
 92 |     #assert len(images) == len(poses)
 93 |     # FIXME originalmente era frames = len(pred_poses)
 94 |     ani = animation.FuncAnimation(fig, update, frames=len(poses), fargs=(images,data_,lines_, cloud, points),
 95 |                                   interval=10, blit=False)
 96 |     plt.show()
 97 | 
 98 | def main():
 99 |     images,_,_,_, points = read_data_sets(FLAGS.img_file)
100 |     poses = np.loadtxt(FLAGS.poses, delimiter=" ")
101 |     assert len(FLAGS.poses_pred) == len(FLAGS.labels), "Num. of pose files and num. of labels must be the same"
102 |     poses_pred = []
103 |     for pose_file in FLAGS.poses_pred:
104 |         pose_pred = np.loadtxt(pose_file, delimiter=" ")
105 |         pose_pred = pose_pred.reshape((-1, 3, 4))
106 |         pose_pred = pose_pred[:, 0:3, 3]
107 |         poses_pred.append(pose_pred)
108 | 
109 |     # points = None
110 |     # if FLAGS.points != None:
111 |     #     points = np.load(FLAGS.points)
112 | 
113 |     last = images[-1][..., 1]
114 |     last = last.reshape((-1,last.shape[0],last.shape[1]))
115 |     im = np.append(images[..., 0], last,axis=0)
116 |     print(len(im))
117 |     print(len(poses))
118 |     #assert len(im) == len(poses)
119 | 
120 |     poses = poses.reshape((-1,3,4))
121 |     poses = poses[:, 0:3, 3]
122 |     # print(poses.shape)
123 |     #print(poses_pred.shape)
124 |     # print(points.shape)
125 |     show(im, poses, poses_pred, FLAGS.labels)
126 | 
127 | 
128 | if __name__ == '__main__':
129 |     parser = argparse.ArgumentParser()
130 |     parser.add_argument(
131 |         'img_file',
132 |         type=str,
133 |         help='Images file'
134 |     )
135 |     parser.add_argument(
136 |         'poses',
137 |         type=str,
138 |         help='Poses'
139 |     )
140 |     parser.add_argument(
141 |         '--poses_pred',
142 |         nargs="*",
143 |         help='Poses Pred'
144 |     )
145 |     parser.add_argument(
146 |         '--labels',
147 |         nargs="*",
148 |         help='Labels/Legends to be used in the plot'
149 |     )
150 |     parser.add_argument(
151 |         '--points',
152 |         type=str,
153 |         help='3D Points'
154 |     )
155 |     FLAGS, unparsed = parser.parse_known_args()
156 |     main()
157 | 
158 | 
159 | 


--------------------------------------------------------------------------------
/wgan/tflib/ops/deconv2d.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below).
  4 | #
  5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  6 | # For more information see <https://github.com/CIFASIS/wganvo>
  7 | #
  8 | # wganvo is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # wganvo is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | # MIT License
 23 | #
 24 | # Copyright (c) 2017 Ishaan Gulrajani
 25 | #
 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 27 | # of this software and associated documentation files (the "Software"), to deal
 28 | # in the Software without restriction, including without limitation the rights
 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 30 | # copies of the Software, and to permit persons to whom the Software is
 31 | # furnished to do so, subject to the following conditions:
 32 | #
 33 | # The above copyright notice and this permission notice shall be included in all
 34 | # copies or substantial portions of the Software.
 35 | #
 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 42 | # SOFTWARE.
 43 | 
 44 | import tflib as lib
 45 | 
 46 | import numpy as np
 47 | import tensorflow as tf
 48 | 
 49 | _default_weightnorm = False
 50 | def enable_default_weightnorm():
 51 |     global _default_weightnorm
 52 |     _default_weightnorm = True
 53 | 
 54 | _weights_stdev = None
 55 | def set_weights_stdev(weights_stdev):
 56 |     global _weights_stdev
 57 |     _weights_stdev = weights_stdev
 58 | 
 59 | def unset_weights_stdev():
 60 |     global _weights_stdev
 61 |     _weights_stdev = None
 62 | 
 63 | def Deconv2D(
 64 |     name, 
 65 |     input_dim, 
 66 |     output_dim, 
 67 |     filter_size, 
 68 |     inputs, 
 69 |     he_init=True,
 70 |     weightnorm=None,
 71 |     biases=True,
 72 |     gain=1.,
 73 |     mask_type=None,
 74 |     ):
 75 |     """
 76 |     inputs: tensor of shape (batch size, height, width, input_dim)
 77 |     returns: tensor of shape (batch size, 2*height, 2*width, output_dim)
 78 |     """
 79 |     with tf.name_scope(name) as scope:
 80 | 
 81 |         if mask_type != None:
 82 |             raise Exception('Unsupported configuration')
 83 | 
 84 |         def uniform(stdev, size):
 85 |             return np.random.uniform(
 86 |                 low=-stdev * np.sqrt(3),
 87 |                 high=stdev * np.sqrt(3),
 88 |                 size=size
 89 |             ).astype('float32')
 90 | 
 91 |         stride = 2
 92 |         fan_in = input_dim * filter_size**2 / (stride**2)
 93 |         fan_out = output_dim * filter_size**2
 94 | 
 95 |         if he_init:
 96 |             filters_stdev = np.sqrt(4./(fan_in+fan_out))
 97 |         else: # Normalized init (Glorot & Bengio)
 98 |             filters_stdev = np.sqrt(2./(fan_in+fan_out))
 99 | 
100 | 
101 |         if _weights_stdev is not None:
102 |             filter_values = uniform(
103 |                 _weights_stdev,
104 |                 (filter_size, filter_size, output_dim, input_dim)
105 |             )
106 |         else:
107 |             filter_values = uniform(
108 |                 filters_stdev,
109 |                 (filter_size, filter_size, output_dim, input_dim)
110 |             )
111 | 
112 |         filter_values *= gain
113 | 
114 |         filters = lib.param(
115 |             name+'.Filters',
116 |             filter_values
117 |         )
118 | 
119 |         if weightnorm==None:
120 |             weightnorm = _default_weightnorm
121 |         if weightnorm:
122 |             norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1,3)))
123 |             target_norms = lib.param(
124 |                 name + '.g',
125 |                 norm_values
126 |             )
127 |             with tf.name_scope('weightnorm') as scope:
128 |                 norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1,3]))
129 |                 filters = filters * tf.expand_dims(target_norms / norms, 1)
130 | 
131 | 
132 |         inputs = tf.transpose(inputs, [0,2,3,1], name='NCHW_to_NHWC')
133 | 
134 |         input_shape = tf.shape(inputs)
135 |         try: # tf pre-1.0 (top) vs 1.0 (bottom)
136 |             output_shape = tf.pack([input_shape[0], 2*input_shape[1], 2*input_shape[2], output_dim])
137 |         except Exception as e:
138 |             output_shape = tf.stack([input_shape[0], 2*input_shape[1], 2*input_shape[2], output_dim])
139 | 
140 |         result = tf.nn.conv2d_transpose(
141 |             value=inputs, 
142 |             filter=filters,
143 |             output_shape=output_shape, 
144 |             strides=[1, 2, 2, 1],
145 |             padding='SAME'
146 |         )
147 | 
148 |         if biases:
149 |             _biases = lib.param(
150 |                 name+'.Biases',
151 |                 np.zeros(output_dim, dtype='float32')
152 |             )
153 |             result = tf.nn.bias_add(result, _biases)
154 | 
155 |         result = tf.transpose(result, [0,3,1,2], name='NHWC_to_NCHW')
156 | 
157 | 
158 |         return result
159 | 


--------------------------------------------------------------------------------
/wgan/tflib/ops/conv1d.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below).
  4 | #
  5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  6 | # For more information see <https://github.com/CIFASIS/wganvo>
  7 | #
  8 | # wganvo is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # wganvo is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | # MIT License
 23 | #
 24 | # Copyright (c) 2017 Ishaan Gulrajani
 25 | #
 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 27 | # of this software and associated documentation files (the "Software"), to deal
 28 | # in the Software without restriction, including without limitation the rights
 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 30 | # copies of the Software, and to permit persons to whom the Software is
 31 | # furnished to do so, subject to the following conditions:
 32 | #
 33 | # The above copyright notice and this permission notice shall be included in all
 34 | # copies or substantial portions of the Software.
 35 | #
 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 42 | # SOFTWARE.
 43 | 
 44 | import tflib as lib
 45 | 
 46 | import numpy as np
 47 | import tensorflow as tf
 48 | 
 49 | _default_weightnorm = False
 50 | def enable_default_weightnorm():
 51 |     global _default_weightnorm
 52 |     _default_weightnorm = True
 53 | 
 54 | def Conv1D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True, gain=1.):
 55 |     """
 56 |     inputs: tensor of shape (batch size, num channels, width)
 57 |     mask_type: one of None, 'a', 'b'
 58 | 
 59 |     returns: tensor of shape (batch size, num channels, width)
 60 |     """
 61 |     with tf.name_scope(name) as scope:
 62 | 
 63 |         if mask_type is not None:
 64 |             mask_type, mask_n_channels = mask_type
 65 | 
 66 |             mask = np.ones(
 67 |                 (filter_size, input_dim, output_dim), 
 68 |                 dtype='float32'
 69 |             )
 70 |             center = filter_size // 2
 71 | 
 72 |             # Mask out future locations
 73 |             # filter shape is (width, input channels, output channels)
 74 |             mask[center+1:, :, :] = 0.
 75 | 
 76 |             # Mask out future channels
 77 |             for i in xrange(mask_n_channels):
 78 |                 for j in xrange(mask_n_channels):
 79 |                     if (mask_type=='a' and i >= j) or (mask_type=='b' and i > j):
 80 |                         mask[
 81 |                             center,
 82 |                             i::mask_n_channels,
 83 |                             j::mask_n_channels
 84 |                         ] = 0.
 85 | 
 86 | 
 87 |         def uniform(stdev, size):
 88 |             return np.random.uniform(
 89 |                 low=-stdev * np.sqrt(3),
 90 |                 high=stdev * np.sqrt(3),
 91 |                 size=size
 92 |             ).astype('float32')
 93 | 
 94 |         fan_in = input_dim * filter_size
 95 |         fan_out = output_dim * filter_size / stride
 96 | 
 97 |         if mask_type is not None: # only approximately correct
 98 |             fan_in /= 2.
 99 |             fan_out /= 2.
100 | 
101 |         if he_init:
102 |             filters_stdev = np.sqrt(4./(fan_in+fan_out))
103 |         else: # Normalized init (Glorot & Bengio)
104 |             filters_stdev = np.sqrt(2./(fan_in+fan_out))
105 | 
106 |         filter_values = uniform(
107 |             filters_stdev,
108 |             (filter_size, input_dim, output_dim)
109 |         )
110 |         # print "WARNING IGNORING GAIN"
111 |         filter_values *= gain
112 | 
113 |         filters = lib.param(name+'.Filters', filter_values)
114 | 
115 |         if weightnorm==None:
116 |             weightnorm = _default_weightnorm
117 |         if weightnorm:
118 |             norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1)))
119 |             target_norms = lib.param(
120 |                 name + '.g',
121 |                 norm_values
122 |             )
123 |             with tf.name_scope('weightnorm') as scope:
124 |                 norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1]))
125 |                 filters = filters * (target_norms / norms)
126 | 
127 |         if mask_type is not None:
128 |             with tf.name_scope('filter_mask'):
129 |                 filters = filters * mask
130 | 
131 |         result = tf.nn.conv1d(
132 |             value=inputs, 
133 |             filters=filters, 
134 |             stride=stride,
135 |             padding='SAME',
136 |             data_format='NCHW'
137 |         )
138 | 
139 |         if biases:
140 |             _biases = lib.param(
141 |                 name+'.Biases',
142 |                 np.zeros([output_dim], dtype='float32')
143 |             )
144 | 
145 |             # result = result + _biases
146 | 
147 |             result = tf.expand_dims(result, 3)
148 |             result = tf.nn.bias_add(result, _biases, data_format='NCHW')
149 |             result = tf.squeeze(result)
150 | 
151 |         return result
152 | 


--------------------------------------------------------------------------------
/wgan/tflib/inception_score.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below).
  4 | #
  5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  6 | # For more information see <https://github.com/CIFASIS/wganvo>
  7 | #
  8 | # wganvo is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # wganvo is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | # MIT License
 23 | #
 24 | # Copyright (c) 2017 Ishaan Gulrajani
 25 | #
 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 27 | # of this software and associated documentation files (the "Software"), to deal
 28 | # in the Software without restriction, including without limitation the rights
 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 30 | # copies of the Software, and to permit persons to whom the Software is
 31 | # furnished to do so, subject to the following conditions:
 32 | #
 33 | # The above copyright notice and this permission notice shall be included in all
 34 | # copies or substantial portions of the Software.
 35 | #
 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 42 | # SOFTWARE.
 43 | # From https://github.com/openai/improved-gan/blob/master/inception_score/model.py
 44 | # Code derived from tensorflow/tensorflow/models/image/imagenet/classify_image.py
 45 | from __future__ import absolute_import
 46 | from __future__ import division
 47 | from __future__ import print_function
 48 | 
 49 | import os.path
 50 | import sys
 51 | import tarfile
 52 | 
 53 | import numpy as np
 54 | from six.moves import urllib
 55 | import tensorflow as tf
 56 | import glob
 57 | import scipy.misc
 58 | import math
 59 | import sys
 60 | 
 61 | MODEL_DIR = '/tmp/imagenet'
 62 | DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
 63 | softmax = None
 64 | 
 65 | # Call this function with list of images. Each of elements should be a 
 66 | # numpy array with values ranging from 0 to 255.
 67 | def get_inception_score(images, splits=10):
 68 |   assert(type(images) == list)
 69 |   assert(type(images[0]) == np.ndarray)
 70 |   assert(len(images[0].shape) == 3)
 71 |   assert(np.max(images[0]) > 10)
 72 |   assert(np.min(images[0]) >= 0.0)
 73 |   inps = []
 74 |   for img in images:
 75 |     img = img.astype(np.float32)
 76 |     inps.append(np.expand_dims(img, 0))
 77 |   bs = 100
 78 |   with tf.Session() as sess:
 79 |     preds = []
 80 |     n_batches = int(math.ceil(float(len(inps)) / float(bs)))
 81 |     for i in range(n_batches):
 82 |         # sys.stdout.write(".")
 83 |         # sys.stdout.flush()
 84 |         inp = inps[(i * bs):min((i + 1) * bs, len(inps))]
 85 |         inp = np.concatenate(inp, 0)
 86 |         pred = sess.run(softmax, {'ExpandDims:0': inp})
 87 |         preds.append(pred)
 88 |     preds = np.concatenate(preds, 0)
 89 |     scores = []
 90 |     for i in range(splits):
 91 |       part = preds[(i * preds.shape[0] // splits):((i + 1) * preds.shape[0] // splits), :]
 92 |       kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
 93 |       kl = np.mean(np.sum(kl, 1))
 94 |       scores.append(np.exp(kl))
 95 |     return np.mean(scores), np.std(scores)
 96 | 
 97 | # This function is called automatically.
 98 | def _init_inception():
 99 |   global softmax
100 |   if not os.path.exists(MODEL_DIR):
101 |     os.makedirs(MODEL_DIR)
102 |   filename = DATA_URL.split('/')[-1]
103 |   filepath = os.path.join(MODEL_DIR, filename)
104 |   if not os.path.exists(filepath):
105 |     def _progress(count, block_size, total_size):
106 |       sys.stdout.write('\r>> Downloading %s %.1f%%' % (
107 |           filename, float(count * block_size) / float(total_size) * 100.0))
108 |       sys.stdout.flush()
109 |     filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
110 |     print()
111 |     statinfo = os.stat(filepath)
112 |     print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
113 |   tarfile.open(filepath, 'r:gz').extractall(MODEL_DIR)
114 |   with tf.gfile.FastGFile(os.path.join(
115 |       MODEL_DIR, 'classify_image_graph_def.pb'), 'rb') as f:
116 |     graph_def = tf.GraphDef()
117 |     graph_def.ParseFromString(f.read())
118 |     _ = tf.import_graph_def(graph_def, name='')
119 |   # Works with an arbitrary minibatch size.
120 |   with tf.Session() as sess:
121 |     pool3 = sess.graph.get_tensor_by_name('pool_3:0')
122 |     ops = pool3.graph.get_operations()
123 |     for op_idx, op in enumerate(ops):
124 |         for o in op.outputs:
125 |             shape = o.get_shape()
126 |             shape = [s.value for s in shape]
127 |             new_shape = []
128 |             for j, s in enumerate(shape):
129 |                 if s == 1 and j == 0:
130 |                     new_shape.append(None)
131 |                 else:
132 |                     new_shape.append(s)
133 |             o._shape = tf.TensorShape(new_shape)
134 |     w = sess.graph.get_operation_by_name("softmax/logits/MatMul").inputs[1]
135 |     logits = tf.matmul(tf.squeeze(pool3), w)
136 |     softmax = tf.nn.softmax(logits)
137 | 
138 | if softmax is None:
139 |   _init_inception()
140 | 


--------------------------------------------------------------------------------
/wgan/tflib/__init__.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below).
  4 | #
  5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  6 | # For more information see <https://github.com/CIFASIS/wganvo>
  7 | #
  8 | # wganvo is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # wganvo is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | # MIT License
 23 | #
 24 | # Copyright (c) 2017 Ishaan Gulrajani
 25 | #
 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 27 | # of this software and associated documentation files (the "Software"), to deal
 28 | # in the Software without restriction, including without limitation the rights
 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 30 | # copies of the Software, and to permit persons to whom the Software is
 31 | # furnished to do so, subject to the following conditions:
 32 | #
 33 | # The above copyright notice and this permission notice shall be included in all
 34 | # copies or substantial portions of the Software.
 35 | #
 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 42 | # SOFTWARE.
 43 | 
 44 | import numpy as np
 45 | import tensorflow as tf
 46 | 
 47 | import locale
 48 | 
 49 | locale.setlocale(locale.LC_ALL, '')
 50 | 
 51 | _params = {}
 52 | _param_aliases = {}
 53 | def param(name, *args, **kwargs):
 54 |     """
 55 |     A wrapper for `tf.Variable` which enables parameter sharing in models.
 56 |     
 57 |     Creates and returns theano shared variables similarly to `tf.Variable`, 
 58 |     except if you try to create a param with the same name as a 
 59 |     previously-created one, `param(...)` will just return the old one instead of 
 60 |     making a new one.
 61 | 
 62 |     This constructor also adds a `param` attribute to the shared variables it 
 63 |     creates, so that you can easily search a graph for all params.
 64 |     """
 65 | 
 66 |     if name not in _params:
 67 |         kwargs['name'] = name
 68 |         param = tf.Variable(*args, **kwargs)
 69 |         param.param = True
 70 |         _params[name] = param
 71 |     result = _params[name]
 72 |     i = 0
 73 |     while result in _param_aliases:
 74 |         # print 'following alias {}: {} to {}'.format(i, result, _param_aliases[result])
 75 |         i += 1
 76 |         result = _param_aliases[result]
 77 |     return result
 78 | 
 79 | def params_with_name(name):
 80 |     return [p for n,p in _params.items() if name in n]
 81 | 
 82 | def delete_all_params():
 83 |     _params.clear()
 84 | 
 85 | def alias_params(replace_dict):
 86 |     for old,new in replace_dict.items():
 87 |         # print "aliasing {} to {}".format(old,new)
 88 |         _param_aliases[old] = new
 89 | 
 90 | def delete_param_aliases():
 91 |     _param_aliases.clear()
 92 | 
 93 | # def search(node, critereon):
 94 | #     """
 95 | #     Traverse the Theano graph starting at `node` and return a list of all nodes
 96 | #     which match the `critereon` function. When optimizing a cost function, you 
 97 | #     can use this to get a list of all of the trainable params in the graph, like
 98 | #     so:
 99 | 
100 | #     `lib.search(cost, lambda x: hasattr(x, "param"))`
101 | #     """
102 | 
103 | #     def _search(node, critereon, visited):
104 | #         if node in visited:
105 | #             return []
106 | #         visited.add(node)
107 | 
108 | #         results = []
109 | #         if isinstance(node, T.Apply):
110 | #             for inp in node.inputs:
111 | #                 results += _search(inp, critereon, visited)
112 | #         else: # Variable node
113 | #             if critereon(node):
114 | #                 results.append(node)
115 | #             if node.owner is not None:
116 | #                 results += _search(node.owner, critereon, visited)
117 | #         return results
118 | 
119 | #     return _search(node, critereon, set())
120 | 
121 | # def print_params_info(params):
122 | #     """Print information about the parameters in the given param set."""
123 | 
124 | #     params = sorted(params, key=lambda p: p.name)
125 | #     values = [p.get_value(borrow=True) for p in params]
126 | #     shapes = [p.shape for p in values]
127 | #     print "Params for cost:"
128 | #     for param, value, shape in zip(params, values, shapes):
129 | #         print "\t{0} ({1})".format(
130 | #             param.name,
131 | #             ",".join([str(x) for x in shape])
132 | #         )
133 | 
134 | #     total_param_count = 0
135 | #     for shape in shapes:
136 | #         param_count = 1
137 | #         for dim in shape:
138 | #             param_count *= dim
139 | #         total_param_count += param_count
140 | #     print "Total parameter count: {0}".format(
141 | #         locale.format("%d", total_param_count, grouping=True)
142 | #     )
143 | 
144 | def print_model_settings(locals_):
145 |     print "Uppercase local vars:"
146 |     all_vars = [(k,v) for (k,v) in locals_.items() if (k.isupper() and k!='T' and k!='SETTINGS' and k!='ALL_SETTINGS')]
147 |     all_vars = sorted(all_vars, key=lambda x: x[0])
148 |     for var_name, var_value in all_vars:
149 |         print "\t{}: {}".format(var_name, var_value)
150 | 
151 | 
152 | def print_model_settings_dict(settings):
153 |     print "Settings dict:"
154 |     all_vars = [(k,v) for (k,v) in settings.items()]
155 |     all_vars = sorted(all_vars, key=lambda x: x[0])
156 |     for var_name, var_value in all_vars:
157 |         print "\t{}: {}".format(var_name, var_value)


--------------------------------------------------------------------------------
/wgan/tflib/ops/conv2d.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below).
  4 | #
  5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  6 | # For more information see <https://github.com/CIFASIS/wganvo>
  7 | #
  8 | # wganvo is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # wganvo is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | # MIT License
 23 | #
 24 | # Copyright (c) 2017 Ishaan Gulrajani
 25 | #
 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 27 | # of this software and associated documentation files (the "Software"), to deal
 28 | # in the Software without restriction, including without limitation the rights
 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 30 | # copies of the Software, and to permit persons to whom the Software is
 31 | # furnished to do so, subject to the following conditions:
 32 | #
 33 | # The above copyright notice and this permission notice shall be included in all
 34 | # copies or substantial portions of the Software.
 35 | #
 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 42 | # SOFTWARE.
 43 | 
 44 | import tflib as lib
 45 | 
 46 | import numpy as np
 47 | import tensorflow as tf
 48 | 
 49 | _default_weightnorm = False
 50 | def enable_default_weightnorm():
 51 |     global _default_weightnorm
 52 |     _default_weightnorm = True
 53 | 
 54 | _weights_stdev = None
 55 | def set_weights_stdev(weights_stdev):
 56 |     global _weights_stdev
 57 |     _weights_stdev = weights_stdev
 58 | 
 59 | def unset_weights_stdev():
 60 |     global _weights_stdev
 61 |     _weights_stdev = None
 62 | 
 63 | def Conv2D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True, gain=1.):
 64 |     """
 65 |     inputs: tensor of shape (batch size, num channels, height, width)
 66 |     mask_type: one of None, 'a', 'b'
 67 | 
 68 |     returns: tensor of shape (batch size, num channels, height, width)
 69 |     """
 70 |     with tf.name_scope(name) as scope:
 71 | 
 72 |         if mask_type is not None:
 73 |             mask_type, mask_n_channels = mask_type
 74 | 
 75 |             mask = np.ones(
 76 |                 (filter_size, filter_size, input_dim, output_dim), 
 77 |                 dtype='float32'
 78 |             )
 79 |             center = filter_size // 2
 80 | 
 81 |             # Mask out future locations
 82 |             # filter shape is (height, width, input channels, output channels)
 83 |             mask[center+1:, :, :, :] = 0.
 84 |             mask[center, center+1:, :, :] = 0.
 85 | 
 86 |             # Mask out future channels
 87 |             for i in xrange(mask_n_channels):
 88 |                 for j in xrange(mask_n_channels):
 89 |                     if (mask_type=='a' and i >= j) or (mask_type=='b' and i > j):
 90 |                         mask[
 91 |                             center,
 92 |                             center,
 93 |                             i::mask_n_channels,
 94 |                             j::mask_n_channels
 95 |                         ] = 0.
 96 | 
 97 | 
 98 |         def uniform(stdev, size):
 99 |             return np.random.uniform(
100 |                 low=-stdev * np.sqrt(3),
101 |                 high=stdev * np.sqrt(3),
102 |                 size=size
103 |             ).astype('float32')
104 | 
105 |         fan_in = input_dim * filter_size**2
106 |         fan_out = output_dim * filter_size**2 / (stride**2)
107 | 
108 |         if mask_type is not None: # only approximately correct
109 |             fan_in /= 2.
110 |             fan_out /= 2.
111 | 
112 |         if he_init:
113 |             filters_stdev = np.sqrt(4./(fan_in+fan_out))
114 |         else: # Normalized init (Glorot & Bengio)
115 |             filters_stdev = np.sqrt(2./(fan_in+fan_out))
116 | 
117 |         if _weights_stdev is not None:
118 |             filter_values = uniform(
119 |                 _weights_stdev,
120 |                 (filter_size, filter_size, input_dim, output_dim)
121 |             )
122 |         else:
123 |             filter_values = uniform(
124 |                 filters_stdev,
125 |                 (filter_size, filter_size, input_dim, output_dim)
126 |             )
127 | 
128 |         # print "WARNING IGNORING GAIN"
129 |         filter_values *= gain
130 | 
131 |         filters = lib.param(name+'.Filters', filter_values)
132 | 
133 |         if weightnorm==None:
134 |             weightnorm = _default_weightnorm
135 |         if weightnorm:
136 |             norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1,2)))
137 |             target_norms = lib.param(
138 |                 name + '.g',
139 |                 norm_values
140 |             )
141 |             with tf.name_scope('weightnorm') as scope:
142 |                 norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1,2]))
143 |                 filters = filters * (target_norms / norms)
144 | 
145 |         if mask_type is not None:
146 |             with tf.name_scope('filter_mask'):
147 |                 filters = filters * mask
148 | 
149 |         result = tf.nn.conv2d(
150 |             input=inputs, 
151 |             filter=filters, 
152 |             strides=[1, 1, stride, stride],
153 |             padding='SAME',
154 |             data_format='NCHW'
155 |         )
156 | 
157 |         if biases:
158 |             _biases = lib.param(
159 |                 name+'.Biases',
160 |                 np.zeros(output_dim, dtype='float32')
161 |             )
162 | 
163 |             result = tf.nn.bias_add(result, _biases, data_format='NCHW')
164 | 
165 | 
166 |         return result
167 | 


--------------------------------------------------------------------------------
/eval_kitti/matrix.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | * This file is part of wganvo.
  3 | * This file belongs to the authors of KITTI (http://www.cvlibs.net/datasets/kitti/eval_odometry.php)
  4 | * (see original license below)
  5 | *
  6 | * Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  7 | * For more information see <https://github.com/CIFASIS/wganvo>
  8 | *
  9 | * wganvo is free software: you can redistribute it and/or modify
 10 | * it under the terms of the GNU General Public License as published by
 11 | * the Free Software Foundation, either version 3 of the License, or
 12 | * (at your option) any later version.
 13 | *
 14 | * wganvo is distributed in the hope that it will be useful,
 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 17 | * GNU General Public License for more details.
 18 | *
 19 | * You should have received a copy of the GNU General Public License
 20 | * along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 21 | */
 22 | /*
 23 | Copyright 2011. All rights reserved.
 24 | Institute of Measurement and Control Systems
 25 | Karlsruhe Institute of Technology, Germany
 26 | 
 27 | This file is part of libviso2.
 28 | Authors: Andreas Geiger
 29 | 
 30 | libviso2 is free software; you can redistribute it and/or modify it under the
 31 | terms of the GNU General Public License as published by the Free Software
 32 | Foundation; either version 2 of the License, or any later version.
 33 | 
 34 | libviso2 is distributed in the hope that it will be useful, but WITHOUT ANY
 35 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 36 | PARTICULAR PURPOSE. See the GNU General Public License for more details.
 37 | 
 38 | You should have received a copy of the GNU General Public License along with
 39 | libviso2; if not, write to the Free Software Foundation, Inc., 51 Franklin
 40 | Street, Fifth Floor, Boston, MA 02110-1301, USA 
 41 | */
 42 | 
 43 | #ifndef MATRIX_H
 44 | #define MATRIX_H
 45 | 
 46 | #include <stdio.h>
 47 | #include <string.h>
 48 | #include <stdlib.h>
 49 | #include <iostream>
 50 | #include <vector>
 51 | 
 52 | #ifndef _MSC_VER
 53 |   #include <stdint.h>
 54 | #else
 55 |   typedef __int8            int8_t;
 56 |   typedef __int16           int16_t;
 57 |   typedef __int32           int32_t;
 58 |   typedef __int64           int64_t;
 59 |   typedef unsigned __int8   uint8_t;
 60 |   typedef unsigned __int16  uint16_t;
 61 |   typedef unsigned __int32  uint32_t;
 62 |   typedef unsigned __int64  uint64_t;
 63 | #endif
 64 | 
 65 | #define endll endl << endl // double end line definition
 66 | 
 67 | typedef double FLOAT;      // double precision
 68 | //typedef float  FLOAT;    // single precision
 69 | 
 70 | class Matrix {
 71 | 
 72 | public:
 73 | 
 74 |   // constructor / deconstructor
 75 |   Matrix ();                                                  // init empty 0x0 matrix
 76 |   Matrix (const int32_t m,const int32_t n);                   // init empty mxn matrix
 77 |   Matrix (const int32_t m,const int32_t n,const FLOAT* val_); // init mxn matrix with values from array 'val'
 78 |   Matrix (const Matrix &M);                                   // creates deepcopy of M
 79 |   ~Matrix ();
 80 | 
 81 |   // assignment operator, copies contents of M
 82 |   Matrix& operator= (const Matrix &M);
 83 | 
 84 |   // copies submatrix of M into array 'val', default values copy whole row/column/matrix
 85 |   void getData(FLOAT* val_,int32_t i1=0,int32_t j1=0,int32_t i2=-1,int32_t j2=-1);
 86 | 
 87 |   // set or get submatrices of current matrix
 88 |   Matrix getMat(int32_t i1,int32_t j1,int32_t i2=-1,int32_t j2=-1);
 89 |   void   setMat(const Matrix &M,const int32_t i,const int32_t j);
 90 | 
 91 |   // set sub-matrix to scalar (default 0), -1 as end replaces whole row/column/matrix
 92 |   void setVal(FLOAT s,int32_t i1=0,int32_t j1=0,int32_t i2=-1,int32_t j2=-1);
 93 | 
 94 |   // set (part of) diagonal to scalar, -1 as end replaces whole diagonal
 95 |   void setDiag(FLOAT s,int32_t i1=0,int32_t i2=-1);
 96 | 
 97 |   // clear matrix
 98 |   void zero();
 99 |   
100 |   // extract columns with given index
101 |   Matrix extractCols (std::vector<int> idx);
102 | 
103 |   // create identity matrix
104 |   static Matrix eye (const int32_t m);
105 |   void          eye ();
106 | 
107 |   // create diagonal matrix with nx1 or 1xn matrix M as elements
108 |   static Matrix diag(const Matrix &M);
109 |   
110 |   // returns the m-by-n matrix whose elements are taken column-wise from M
111 |   static Matrix reshape(const Matrix &M,int32_t m,int32_t n);
112 | 
113 |   // create 3x3 rotation matrices (convention: http://en.wikipedia.org/wiki/Rotation_matrix)
114 |   static Matrix rotMatX(const FLOAT &angle);
115 |   static Matrix rotMatY(const FLOAT &angle);
116 |   static Matrix rotMatZ(const FLOAT &angle);
117 | 
118 |   // simple arithmetic operations
119 |   Matrix  operator+ (const Matrix &M); // add matrix
120 |   Matrix  operator- (const Matrix &M); // subtract matrix
121 |   Matrix  operator* (const Matrix &M); // multiply with matrix
122 |   Matrix  operator* (const FLOAT &s);  // multiply with scalar
123 |   Matrix  operator/ (const Matrix &M); // divide elementwise by matrix (or vector)
124 |   Matrix  operator/ (const FLOAT &s);  // divide by scalar
125 |   Matrix  operator- ();                // negative matrix
126 |   Matrix  operator~ ();                // transpose
127 |   FLOAT   l2norm ();                   // euclidean norm (vectors) / frobenius norm (matrices)
128 |   FLOAT   mean ();                     // mean of all elements in matrix
129 | 
130 |   // complex arithmetic operations
131 |   static Matrix cross (const Matrix &a, const Matrix &b);    // cross product of two vectors
132 |   static Matrix inv (const Matrix &M);                       // invert matrix M
133 |   bool   inv ();                                             // invert this matrix
134 |   FLOAT  det ();                                             // returns determinant of matrix
135 |   bool   solve (const Matrix &M,FLOAT eps=1e-20);            // solve linear system M*x=B, replaces *this and M
136 |   bool   lu(int32_t *idx, FLOAT &d, FLOAT eps=1e-20);        // replace *this by lower upper decomposition
137 |   void   svd(Matrix &U,Matrix &W,Matrix &V);                 // singular value decomposition *this = U*diag(W)*V^T
138 | 
139 |   // print matrix to stream
140 |   friend std::ostream& operator<< (std::ostream& out,const Matrix& M);
141 | 
142 |   // direct data access
143 |   FLOAT   **val;
144 |   int32_t   m,n;
145 | 
146 | private:
147 | 
148 |   void allocateMemory (const int32_t m_,const int32_t n_);
149 |   void releaseMemory ();
150 |   inline FLOAT pythag(FLOAT a,FLOAT b);
151 | 
152 | };
153 | 
154 | #endif // MATRIX_H
155 | 


--------------------------------------------------------------------------------
/lie_algebra.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: UTF8 -*-
  2 | #
  3 | # This file is part of wganvo.
  4 | # This file is based on a file from evo (github.com/MichaelGrupp/evo) (see original license below)
  5 | #
  6 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  7 | # For more information see <https://github.com/CIFASIS/wganvo>
  8 | #
  9 | # wganvo is free software: you can redistribute it and/or modify
 10 | # it under the terms of the GNU General Public License as published by
 11 | # the Free Software Foundation, either version 3 of the License, or
 12 | # (at your option) any later version.
 13 | #
 14 | # wganvo is distributed in the hope that it will be useful,
 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 17 | # GNU General Public License for more details.
 18 | #
 19 | # You should have received a copy of the GNU General Public License
 20 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 21 | 
 22 | # Provides functions for Lie group calculations.
 23 | # author: Michael Grupp
 24 | #
 25 | # This file is part of evo (github.com/MichaelGrupp/evo).
 26 | #
 27 | # evo is free software: you can redistribute it and/or modify
 28 | # it under the terms of the GNU General Public License as published by
 29 | # the Free Software Foundation, either version 3 of the License, or
 30 | # (at your option) any later version.
 31 | #
 32 | # evo is distributed in the hope that it will be useful,
 33 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 34 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 35 | # GNU General Public License for more details.
 36 | #
 37 | # You should have received a copy of the GNU General Public License
 38 | # along with evo.  If not, see <http://www.gnu.org/licenses/>.
 39 | 
 40 | import numpy as np
 41 | import scipy.linalg as sl
 42 | 
 43 | import transformations as tr
 44 | 
 45 | 
 46 | class LieAlgebraException(Exception):
 47 |     pass
 48 | 
 49 | 
 50 | def hat(v):
 51 |     """
 52 |     :param v: 3x1 vector
 53 |     :return: 3x3 skew symmetric matrix
 54 |     """
 55 |     return np.array([[0.0, -v[2], v[1]],
 56 |                      [v[2], 0.0, -v[0]],
 57 |                      [-v[1], v[0], 0.0]])
 58 | 
 59 | 
 60 | def vee(m):
 61 |     """
 62 |     :param m: 3x3 skew symmetric matrix
 63 |     :return: 3x1 vector
 64 |     """
 65 |     return np.array([-m[1, 2], m[0, 2], -m[0, 1]])
 66 | 
 67 | 
 68 | def so3_exp(axis, angle):
 69 |     """
 70 |     Computes an SO(3) matrix from an axis/angle representation.
 71 |     Code source: http://stackoverflow.com/a/25709323
 72 |     :param axis: 3x1 rotation axis (unit vector!)
 73 |     :param angle: radians
 74 |     :return: SO(3) rotation matrix (matrix exponential of so(3))
 75 |     """
 76 |     return sl.expm(np.cross(np.eye(3), axis / np.linalg.norm(axis) * angle))
 77 | 
 78 | 
 79 | def so3_log(r, return_angle_only=True, return_skew=False):
 80 |     """
 81 |     :param r: SO(3) rotation matrix
 82 |     :param return_angle_only: return only the angle (default)
 83 |     :param return_skew: return skew symmetric Lie algebra element
 84 |     :return: axis/angle
 85 |         or if skew:
 86 |              3x3 skew symmetric logarithmic map in so(3) (Ma, Soatto eq. 2.8)
 87 |     """
 88 |     if not is_so3(r):
 89 |         raise LieAlgebraException("matrix is not a valid SO(3) group element")
 90 |     if return_angle_only and not return_skew:
 91 |         return np.arccos(min(1, max(-1, (np.trace(r) - 1) / 2)))
 92 |     angle, axis, _ = tr.rotation_from_matrix(se3(r, [0, 0, 0]))
 93 |     if return_skew:
 94 |         return hat(axis * angle)
 95 |     else:
 96 |         return axis, angle
 97 | 
 98 | 
 99 | def se3(r=np.eye(3), t=np.array([0, 0, 0])):
100 |     """
101 |     :param r: SO(3) rotation matrix
102 |     :param t: 3x1 translation vector
103 |     :return: SE(3) transformation matrix
104 |     """
105 |     se3 = np.eye(4)
106 |     se3[:3, :3] = r
107 |     se3[:3, 3] = t
108 |     return se3
109 | 
110 | 
111 | def sim3(r, t, s):
112 |     """
113 |     :param r: SO(3) rotation matrix
114 |     :param t: 3x1 translation vector
115 |     :param s: positive, non-zero scale factor
116 |     :return: Sim(3) similarity transformation matrix
117 |     """
118 |     sim3 = np.eye(4)
119 |     sim3[:3, :3] = s * r
120 |     sim3[:3, 3] = t
121 |     return sim3
122 | 
123 | 
124 | def so3_from_se3(p):
125 |     """
126 |     :param p: absolute SE(3) pose
127 |     :return: the SO(3) rotation matrix in p
128 |     """
129 |     return p[:3, :3]
130 | 
131 | 
132 | def se3_inverse(p):
133 |     """
134 |     :param p: absolute SE(3) pose
135 |     :return: the inverted pose
136 |     """
137 |     r_inv = p[:3, :3].transpose()
138 |     t_inv = -r_inv.dot(p[:3, 3])
139 |     return se3(r_inv, t_inv)
140 | 
141 | 
142 | def is_so3(r):
143 |     """
144 |     :param r: a 3x3 matrix
145 |     :return: True if r is in the SO(3) group
146 |     """
147 |     # Check the determinant.
148 |     det_valid = np.isclose(np.linalg.det(r), [1.0], atol=1e-6)
149 |     # Check if the transpose is the inverse.
150 |     inv_valid = np.allclose(r.transpose().dot(r), np.eye(3), atol=1e-6)
151 |     return det_valid and inv_valid
152 | 
153 | 
154 | def is_se3(p):
155 |     """
156 |     :param p: a 4x4 matrix
157 |     :return: True if p is in the SE(3) group
158 |     """
159 |     rot_valid = is_so3(p[:3, :3])
160 |     lower_valid = np.equal(p[3, :], np.array([0.0, 0.0, 0.0, 1.0])).all()
161 |     return rot_valid and lower_valid
162 | 
163 | 
164 | def is_sim3(p, s):
165 |     """
166 |     :param p: a 4x4 matrix
167 |     :param s: expected scale factor
168 |     :return: True if p is in the Sim(3) group with scale s
169 |     """
170 |     rot = p[:3, :3]
171 |     rot_unscaled = np.multiply(rot, 1.0 / s)
172 |     rot_valid = is_so3(rot_unscaled)
173 |     lower_valid = np.equal(p[3, :], np.array([0.0, 0.0, 0.0, 1.0])).all()
174 |     return rot_valid and lower_valid
175 | 
176 | 
177 | def relative_so3(r1, r2):
178 |     """
179 |     :param r1, r2: SO(3) matrices
180 |     :return: the relative rotation r1^{⁻1} * r2
181 |     """
182 |     return np.dot(r1.transpose(), r2)
183 | 
184 | 
185 | def relative_se3(p1, p2):
186 |     """
187 |     :param p1, p2: SE(3) matrices
188 |     :return: the relative transformation p1^{⁻1} * p2
189 |     """
190 |     return np.dot(se3_inverse(p1), p2)
191 | 
192 | 
193 | def random_so3():
194 |     """
195 |     :return: a random SO(3) matrix (for debugging)
196 |     """
197 |     return tr.random_rotation_matrix()[:3, :3]
198 | 
199 | 
200 | def random_se3():
201 |     """
202 |     :return: a random SE(3) matrix (for debugging)
203 |     """
204 |     r = random_so3()
205 |     t = tr.random_vector(3)
206 |     return se3(r, t)
207 | 


--------------------------------------------------------------------------------
/wgan/tflib/ops/batchnorm.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below).
  4 | #
  5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  6 | # For more information see <https://github.com/CIFASIS/wganvo>
  7 | #
  8 | # wganvo is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # wganvo is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | # MIT License
 23 | #
 24 | # Copyright (c) 2017 Ishaan Gulrajani
 25 | #
 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 27 | # of this software and associated documentation files (the "Software"), to deal
 28 | # in the Software without restriction, including without limitation the rights
 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 30 | # copies of the Software, and to permit persons to whom the Software is
 31 | # furnished to do so, subject to the following conditions:
 32 | #
 33 | # The above copyright notice and this permission notice shall be included in all
 34 | # copies or substantial portions of the Software.
 35 | #
 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 42 | # SOFTWARE.
 43 | 
 44 | import tflib as lib
 45 | 
 46 | import numpy as np
 47 | import tensorflow as tf
 48 | 
 49 | def Batchnorm(name, axes, inputs, is_training=None, stats_iter=None, update_moving_stats=True, fused=True):
 50 |     if ((axes == [0,2,3]) or (axes == [0,2])) and fused==True:
 51 |         if axes==[0,2]:
 52 |             inputs = tf.expand_dims(inputs, 3)
 53 |         # Old (working but pretty slow) implementation:
 54 |         ##########
 55 | 
 56 |         # inputs = tf.transpose(inputs, [0,2,3,1])
 57 | 
 58 |         # mean, var = tf.nn.moments(inputs, [0,1,2], keep_dims=False)
 59 |         # offset = lib.param(name+'.offset', np.zeros(mean.get_shape()[-1], dtype='float32'))
 60 |         # scale = lib.param(name+'.scale', np.ones(var.get_shape()[-1], dtype='float32'))
 61 |         # result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-4)
 62 | 
 63 |         # return tf.transpose(result, [0,3,1,2])
 64 | 
 65 |         # New (super fast but untested) implementation:
 66 |         offset = lib.param(name+'.offset', np.zeros(inputs.get_shape()[1], dtype='float32'))
 67 |         scale = lib.param(name+'.scale', np.ones(inputs.get_shape()[1], dtype='float32'))
 68 | 
 69 |         moving_mean = lib.param(name+'.moving_mean', np.zeros(inputs.get_shape()[1], dtype='float32'), trainable=False)
 70 |         moving_variance = lib.param(name+'.moving_variance', np.ones(inputs.get_shape()[1], dtype='float32'), trainable=False)
 71 | 
 72 |         def _fused_batch_norm_training():
 73 |             return tf.nn.fused_batch_norm(inputs, scale, offset, epsilon=1e-5, data_format='NCHW')
 74 |         def _fused_batch_norm_inference():
 75 |             # Version which blends in the current item's statistics
 76 |             batch_size = tf.cast(tf.shape(inputs)[0], 'float32')
 77 |             mean, var = tf.nn.moments(inputs, [2,3], keep_dims=True)
 78 |             mean = ((1./batch_size)*mean) + (((batch_size-1.)/batch_size)*moving_mean)[None,:,None,None]
 79 |             var = ((1./batch_size)*var) + (((batch_size-1.)/batch_size)*moving_variance)[None,:,None,None]
 80 |             return tf.nn.batch_normalization(inputs, mean, var, offset[None,:,None,None], scale[None,:,None,None], 1e-5), mean, var
 81 | 
 82 |             # Standard version
 83 |             # return tf.nn.fused_batch_norm(
 84 |             #     inputs,
 85 |             #     scale,
 86 |             #     offset,
 87 |             #     epsilon=1e-2,
 88 |             #     mean=moving_mean,
 89 |             #     variance=moving_variance,
 90 |             #     is_training=False,
 91 |             #     data_format='NCHW'
 92 |             # )
 93 | 
 94 |         if is_training is None:
 95 |             outputs, batch_mean, batch_var = _fused_batch_norm_training()
 96 |         else:
 97 |             outputs, batch_mean, batch_var = tf.cond(is_training,
 98 |                                                        _fused_batch_norm_training,
 99 |                                                        _fused_batch_norm_inference)
100 |             if update_moving_stats:
101 |                 no_updates = lambda: outputs
102 |                 def _force_updates():
103 |                     """Internal function forces updates moving_vars if is_training."""
104 |                     float_stats_iter = tf.cast(stats_iter, tf.float32)
105 | 
106 |                     update_moving_mean = tf.assign(moving_mean, ((float_stats_iter/(float_stats_iter+1))*moving_mean) + ((1/(float_stats_iter+1))*batch_mean))
107 |                     update_moving_variance = tf.assign(moving_variance, ((float_stats_iter/(float_stats_iter+1))*moving_variance) + ((1/(float_stats_iter+1))*batch_var))
108 | 
109 |                     with tf.control_dependencies([update_moving_mean, update_moving_variance]):
110 |                         return tf.identity(outputs)
111 |                 outputs = tf.cond(is_training, _force_updates, no_updates)
112 | 
113 |         if axes == [0,2]:
114 |             return outputs[:,:,:,0] # collapse last dim
115 |         else:
116 |             return outputs
117 |     else:
118 |         # raise Exception('old BN')
119 |         # TODO we can probably use nn.fused_batch_norm here too for speedup
120 |         mean, var = tf.nn.moments(inputs, axes, keep_dims=True)
121 |         shape = mean.get_shape().as_list()
122 |         if 0 not in axes:
123 |             print "WARNING ({}): didn't find 0 in axes, but not using separate BN params for each item in batch".format(name)
124 |             shape[0] = 1
125 |         offset = lib.param(name+'.offset', np.zeros(shape, dtype='float32'))
126 |         scale = lib.param(name+'.scale', np.ones(shape, dtype='float32'))
127 |         result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-5)
128 | 
129 | 
130 |         return result
131 | 


--------------------------------------------------------------------------------
/transform.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | # This file is based on a file from https://github.com/ori-mrg/robotcar-dataset-sdk
  4 | # (see original license below)
  5 | #
  6 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  7 | # For more information see <https://github.com/CIFASIS/wganvo>
  8 | #
  9 | # This file is licensed under the Creative Commons
 10 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
 11 | # To view a copy of this license, visit
 12 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to
 13 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
 14 | #
 15 | 
 16 | ################################################################################
 17 | #
 18 | # Copyright (c) 2017 University of Oxford
 19 | # Authors:
 20 | #  Geoff Pascoe (gmp@robots.ox.ac.uk)
 21 | #
 22 | # This work is licensed under the Creative Commons
 23 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
 24 | # To view a copy of this license, visit
 25 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to
 26 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
 27 | #
 28 | ################################################################################
 29 | 
 30 | import numpy as np
 31 | import numpy.matlib as matlib
 32 | from math import sin, cos, atan2, sqrt
 33 | 
 34 | MATRIX_MATCH_TOLERANCE = 1e-4
 35 | 
 36 | def build_intrinsic_matrix(focal_length, principal_point):
 37 |     intrinsic_matrix = matlib.identity(3)
 38 |     intrinsic_matrix[0:2, 2] = np.matrix(principal_point).transpose()
 39 |     intrinsic_matrix[0,0] = focal_length[0]
 40 |     intrinsic_matrix[1,1] = focal_length[1]
 41 |     return intrinsic_matrix
 42 |     
 43 | 
 44 | def build_se3_transform(xyzrpy):
 45 |     """Creates an SE3 transform from translation and Euler angles.
 46 | 
 47 |     Args:
 48 |         xyzrpy (list[float]): translation and Euler angles for transform. Must have six components.
 49 | 
 50 |     Returns:
 51 |         numpy.matrixlib.defmatrix.matrix: SE3 homogeneous transformation matrix
 52 | 
 53 |     Raises:
 54 |         ValueError: if `len(xyzrpy) != 6`
 55 | 
 56 |     """
 57 |     if len(xyzrpy) != 6:
 58 |         raise ValueError("Must supply 6 values to build transform")
 59 | 
 60 |     se3 = matlib.identity(4)
 61 |     se3[0:3, 0:3] = euler_to_so3(xyzrpy[3:6])
 62 |     se3[0:3, 3] = np.matrix(xyzrpy[0:3]).transpose()
 63 |     return se3
 64 | 
 65 | 
 66 | def euler_to_so3(rpy):
 67 |     """Converts Euler angles to an SO3 rotation matrix.
 68 | 
 69 |     Args:
 70 |         rpy (list[float]): Euler angles (in radians). Must have three components.
 71 | 
 72 |     Returns:
 73 |         numpy.matrixlib.defmatrix.matrix: 3x3 SO3 rotation matrix
 74 | 
 75 |     Raises:
 76 |         ValueError: if `len(rpy) != 3`.
 77 | 
 78 |     """
 79 |     if len(rpy) != 3:
 80 |         raise ValueError("Euler angles must have three components")
 81 | 
 82 |     R_x = np.matrix([[1, 0, 0],
 83 |                      [0, cos(rpy[0]), -sin(rpy[0])],
 84 |                      [0, sin(rpy[0]), cos(rpy[0])]])
 85 |     R_y = np.matrix([[cos(rpy[1]), 0, sin(rpy[1])],
 86 |                      [0, 1, 0],
 87 |                      [-sin(rpy[1]), 0, cos(rpy[1])]])
 88 |     R_z = np.matrix([[cos(rpy[2]), -sin(rpy[2]), 0],
 89 |                      [sin(rpy[2]), cos(rpy[2]), 0],
 90 |                      [0, 0, 1]])
 91 |     R_zyx = R_z * R_y * R_x
 92 |     return R_zyx
 93 | 
 94 | 
 95 | def so3_to_euler(so3):
 96 |     """Converts an SO3 rotation matrix to Euler angles
 97 | 
 98 |     Args:
 99 |         so3: 3x3 rotation matrix
100 | 
101 |     Returns:
102 |         numpy.matrixlib.defmatrix.matrix: list of Euler angles (size 3)
103 | 
104 |     Raises:
105 |         ValueError: if so3 is not 3x3
106 |         ValueError: if a valid Euler parametrisation cannot be found
107 | 
108 |     """
109 |     if so3.shape != (3, 3):
110 |         raise ValueError("SO3 matrix must be 3x3")
111 |     roll = atan2(so3[2, 1], so3[2, 2])
112 |     yaw = atan2(so3[1, 0], so3[0, 0])
113 |     denom = sqrt(so3[0, 0] ** 2 + so3[1, 0] ** 2)
114 |     pitch_poss = [atan2(-so3[2, 0], denom), atan2(-so3[2, 0], -denom)]
115 | 
116 |     R = euler_to_so3((roll, pitch_poss[0], yaw))
117 | 
118 |     if (so3 - R).sum() < MATRIX_MATCH_TOLERANCE:
119 |         return np.matrix([roll, pitch_poss[0], yaw])
120 |     else:
121 |         R = euler_to_so3((roll, pitch_poss[1], yaw))
122 |         if (so3 - R).sum() > MATRIX_MATCH_TOLERANCE:
123 |             raise ValueError("Could not find valid pitch angle")
124 |         return np.matrix([roll, pitch_poss[1], yaw])
125 | 
126 | 
127 | def so3_to_quaternion(so3):
128 |     """Converts an SO3 rotation matrix to a quaternion
129 | 
130 |     Args:
131 |         so3: 3x3 rotation matrix
132 | 
133 |     Returns:
134 |         numpy.ndarray: quaternion [w, x, y, z]
135 | 
136 |     Raises:
137 |         ValueError: if so3 is not 3x3
138 |     """
139 |     if so3.shape != (3, 3):
140 |         raise ValueError("SO3 matrix must be 3x3")
141 | 
142 |     R_xx = so3[0, 0]
143 |     R_xy = so3[0, 1]
144 |     R_xz = so3[0, 2]
145 |     R_yx = so3[1, 0]
146 |     R_yy = so3[1, 1]
147 |     R_yz = so3[1, 2]
148 |     R_zx = so3[2, 0]
149 |     R_zy = so3[2, 1]
150 |     R_zz = so3[2, 2]
151 | 
152 |     try:
153 |         w = sqrt(so3.trace() + 1) / 2
154 |     except(ValueError):
155 |         # w is non-real
156 |         w = 0
157 | 
158 |     x = sqrt(1 + R_xx - R_yy - R_zz) / 2
159 |     y = sqrt(1 + R_yy - R_xx - R_zz) / 2
160 |     z = sqrt(1 + R_zz - R_yy - R_xx) / 2
161 | 
162 |     max_index = max(range(4), key=[w, x, y, z].__getitem__)
163 | 
164 |     if max_index == 0:
165 |         x = (R_zy - R_yz) / (4 * w)
166 |         y = (R_xz - R_zx) / (4 * w)
167 |         z = (R_yx - R_xy) / (4 * w)
168 |     elif max_index == 1:
169 |         w = (R_zy - R_yz) / (4 * x)
170 |         y = (R_xy + R_yx) / (4 * x)
171 |         z = (R_zx + R_xz) / (4 * x)
172 |     elif max_index == 2:
173 |         w = (R_xz - R_zx) / (4 * y)
174 |         x = (R_xy + R_yx) / (4 * y)
175 |         z = (R_yz + R_zy) / (4 * y)
176 |     elif max_index == 3:
177 |         w = (R_yx - R_xy) / (4 * z)
178 |         x = (R_zx + R_xz) / (4 * z)
179 |         y = (R_yz + R_zy) / (4 * z)
180 | 
181 |     return np.array([w, x, y, z])
182 | 
183 | 
184 | def se3_to_components(se3):
185 |     """Converts an SE3 rotation matrix to linear translation and Euler angles
186 | 
187 |     Args:
188 |         se3: 4x4 transformation matrix
189 | 
190 |     Returns:
191 |         numpy.matrixlib.defmatrix.matrix: list of [x, y, z, roll, pitch, yaw]
192 | 
193 |     Raises:
194 |         ValueError: if se3 is not 4x4
195 |         ValueError: if a valid Euler parametrisation cannot be found
196 | 
197 |     """
198 |     if se3.shape != (4, 4):
199 |         raise ValueError("SE3 transform must be a 4x4 matrix")
200 |     xyzrpy = np.empty(6)
201 |     xyzrpy[0:3] = se3[0:3, 3].transpose()
202 |     xyzrpy[3:6] = so3_to_euler(se3[0:3, 0:3])
203 |     return xyzrpy
204 | 


--------------------------------------------------------------------------------
/eval_kitti/readme.txt:
--------------------------------------------------------------------------------
  1 | ###########################################################################
  2 | #   THE KITTI VISION BENCHMARK SUITE: VISUAL ODOMETRY / SLAM BENCHMARK    #
  3 | #              Andreas Geiger    Philip Lenz    Raquel Urtasun            #
  4 | #                    Karlsruhe Institute of Technology                    #
  5 | #                Toyota Technological Institute at Chicago                #
  6 | #                             www.cvlibs.net                              #
  7 | ###########################################################################
  8 | 
  9 | This file describes the KITTI visual odometry / SLAM benchmark package.
 10 | Accurate ground truth (<10cm) is provided by a GPS/IMU system with RTK
 11 | float/integer corrections enabled. In order to enable a fair comparison of
 12 | all methods, only ground truth for the sequences 00-10 is made publicly
 13 | available. The remaining sequences (11-21) serve as evaluation sequences.
 14 | 
 15 | NOTE: WHEN SUBMITTING RESULTS, PLEASE STORE THEM IN THE SAME DATA FORMAT IN
 16 | WHICH THE GROUND TRUTH DATA IS PROVIDED (SEE 'POSES' BELOW), USING THE
 17 | FILE NAMES 11.txt TO 21.txt. CREATE A ZIP ARCHIVE OF THEM AND STORE YOUR
 18 | RESULTS IN ITS ROOT FOLDER.
 19 | 
 20 | File description:
 21 | =================
 22 | 
 23 | Folder 'sequences':
 24 | 
 25 | Each folder within the folder 'sequences' contains a single sequence, where
 26 | the left and right images are stored in the sub-folders image_0 and
 27 | image_1, respectively. The images are provided as greyscale PNG images and
 28 | can be loaded with MATLAB or libpng++. All images have been undistorted and
 29 | rectified. Sequences 0-10 can be used for training, while results must be
 30 | provided for the test sequences 11-21.
 31 | 
 32 | Additionally we provide the velodyne point clouds for point-cloud-based
 33 | methods. To save space, all scans have been stored as Nx4 float matrix into
 34 | a binary file using the following code:
 35 | 
 36 |   stream = fopen (dst_file.c_str(),"wb");
 37 |   fwrite(data,sizeof(float),4*num,stream);
 38 |   fclose(stream);
 39 | 
 40 | Here, data contains 4*num values, where the first 3 values correspond to
 41 | x,y and z, and the last value is the reflectance information. All scans
 42 | are stored row-aligned, meaning that the first 4 values correspond to the
 43 | first measurement. Since each scan might potentially have a different
 44 | number of points, this must be determined from the file size when reading
 45 | the file, where 1e6 is a good enough upper bound on the number of values:
 46 | 
 47 |   // allocate 4 MB buffer (only ~130*4*4 KB are needed)
 48 |   int32_t num = 1000000;
 49 |   float *data = (float*)malloc(num*sizeof(float));
 50 | 
 51 |   // pointers
 52 |   float *px = data+0;
 53 |   float *py = data+1;
 54 |   float *pz = data+2;
 55 |   float *pr = data+3;
 56 | 
 57 |   // load point cloud
 58 |   FILE *stream;
 59 |   stream = fopen (currFilenameBinary.c_str(),"rb");
 60 |   num = fread(data,sizeof(float),num,stream)/4;
 61 |   for (int32_t i=0; i<num; i++) {
 62 |     point_cloud.points.push_back(tPoint(*px,*py,*pz,*pr));
 63 |     px+=4; py+=4; pz+=4; pr+=4;
 64 |   }
 65 |   fclose(stream);
 66 | 
 67 | x,y and y are stored in metric (m) Velodyne coordinates.
 68 | 
 69 | IMPORTANT NOTE: Note that the velodyne scanner takes depth measurements
 70 | continuously while rotating around its vertical axis (in contrast to the cameras,
 71 | which are triggered at a certain point in time). This effect has been
 72 | eliminated from this postprocessed data by compensating for the egomotion!!
 73 | Note that this is in contrast to the raw data.
 74 | 
 75 | The relationship between the camera triggers and the velodyne is the following:
 76 | We trigger the cameras when the velodyne is looking exactly forward (into the
 77 | direction of the cameras). After compensation, the point cloud data should
 78 | correspond to the camera data for all static elements of the scene. Dynamic
 79 | ones are still slightly distorted, of course. If you want the raw velodyne
 80 | scans, please have a look at the section 'mapping to raw data' below.
 81 | 
 82 | The base directory of each folder additionally contains:
 83 | 
 84 | calib.txt: Calibration data for the cameras: P0/P1 are the 3x4 projection
 85 | matrices after rectification. Here P0 denotes the left and P1 denotes the
 86 | right camera. Tr transforms a point from velodyne coordinates into the
 87 | left rectified camera coordinate system. In order to map a point X from the
 88 | velodyne scanner to a point x in the i'th image plane, you thus have to
 89 | transform it like:
 90 | 
 91 |   x = Pi * Tr * X
 92 | 
 93 | times.txt: Timestamps for each of the synchronized image pairs in seconds,
 94 | in case your method reasons about the dynamics of the vehicle.
 95 | 
 96 | Folder 'poses':
 97 | 
 98 | The folder 'poses' contains the ground truth poses (trajectory) for the
 99 | first 11 sequences. This information can be used for training/tuning your
100 | method. Each file xx.txt contains a N x 12 table, where N is the number of
101 | frames of this sequence. Row i represents the i'th pose of the left camera
102 | coordinate system (i.e., z pointing forwards) via a 3x4 transformation
103 | matrix. The matrices are stored in row aligned order (the first entries
104 | correspond to the first row), and take a point in the i'th coordinate
105 | system and project it into the first (=0th) coordinate system. Hence, the
106 | translational part (3x1 vector of column 4) corresponds to the pose of the
107 | left camera coordinate system in the i'th frame with respect to the first
108 | (=0th) frame. Your submission results must be provided using the same data
109 | format.
110 | 
111 | Mapping to Raw Data
112 | ===================
113 | 
114 | Note that this section is additional to the benchmark, and not required for
115 | solving the object detection task.
116 | 
117 | In order to allow the usage of the laser point clouds, gps data, the right
118 | camera image and the grayscale images for the TRAINING data as well, we
119 | provide the mapping of the training set to the raw data of the KITTI dataset.
120 | The following table lists the name, start and end frame of each sequence that
121 | has been used to extract the visual odometry / SLAM training set:
122 | 
123 | Nr.     Sequence name     Start   End
124 | ---------------------------------------
125 | 00: 2011_10_03_drive_0027 000000 004540
126 | 01: 2011_10_03_drive_0042 000000 001100
127 | 02: 2011_10_03_drive_0034 000000 004660
128 | 03: 2011_09_26_drive_0067 000000 000800
129 | 04: 2011_09_30_drive_0016 000000 000270
130 | 05: 2011_09_30_drive_0018 000000 002760
131 | 06: 2011_09_30_drive_0020 000000 001100
132 | 07: 2011_09_30_drive_0027 000000 001100
133 | 08: 2011_09_30_drive_0028 001100 005170
134 | 09: 2011_09_30_drive_0033 000000 001590
135 | 10: 2011_09_30_drive_0034 000000 001200
136 | 
137 | The raw sequences can be downloaded from
138 | http://www.cvlibs.net/datasets/kitti/raw_data.php
139 | in the respective category (mostly: Residential).
140 | 
141 | Evaluation Code:
142 | ================
143 | 
144 | For transparency we have included the KITTI evaluation code in the
145 | subfolder 'cpp' of this development kit. It can be compiled via:
146 | 
147 | g++ -O3 -DNDEBUG -o evaluate_odometry evaluate_odometry.cpp matrix.cpp
148 | 
149 | 


--------------------------------------------------------------------------------
/wgan/tflib/ops/linear.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | # This file is based on a file from https://github.com/igul222/improved_wgan_training (see original license below).
  4 | #
  5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  6 | # For more information see <https://github.com/CIFASIS/wganvo>
  7 | #
  8 | # wganvo is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # wganvo is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | # MIT License
 23 | #
 24 | # Copyright (c) 2017 Ishaan Gulrajani
 25 | #
 26 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 27 | # of this software and associated documentation files (the "Software"), to deal
 28 | # in the Software without restriction, including without limitation the rights
 29 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 30 | # copies of the Software, and to permit persons to whom the Software is
 31 | # furnished to do so, subject to the following conditions:
 32 | #
 33 | # The above copyright notice and this permission notice shall be included in all
 34 | # copies or substantial portions of the Software.
 35 | #
 36 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 37 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 38 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 39 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 40 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 41 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 42 | # SOFTWARE.
 43 | 
 44 | import tflib as lib
 45 | 
 46 | import numpy as np
 47 | import tensorflow as tf
 48 | 
 49 | _default_weightnorm = False
 50 | def enable_default_weightnorm():
 51 |     global _default_weightnorm
 52 |     _default_weightnorm = True
 53 | 
 54 | def disable_default_weightnorm():
 55 |     global _default_weightnorm
 56 |     _default_weightnorm = False
 57 | 
 58 | _weights_stdev = None
 59 | def set_weights_stdev(weights_stdev):
 60 |     global _weights_stdev
 61 |     _weights_stdev = weights_stdev
 62 | 
 63 | def unset_weights_stdev():
 64 |     global _weights_stdev
 65 |     _weights_stdev = None
 66 | 
 67 | def Linear(
 68 |         name, 
 69 |         input_dim, 
 70 |         output_dim, 
 71 |         inputs,
 72 |         biases=True,
 73 |         initialization=None,
 74 |         weightnorm=None,
 75 |         gain=1.
 76 |         ):
 77 |     """
 78 |     initialization: None, `lecun`, 'glorot', `he`, 'glorot_he', `orthogonal`, `("uniform", range)`
 79 |     """
 80 |     with tf.name_scope(name) as scope:
 81 | 
 82 |         def uniform(stdev, size):
 83 |             if _weights_stdev is not None:
 84 |                 stdev = _weights_stdev
 85 |             return np.random.uniform(
 86 |                 low=-stdev * np.sqrt(3),
 87 |                 high=stdev * np.sqrt(3),
 88 |                 size=size
 89 |             ).astype('float32')
 90 | 
 91 |         if initialization == 'lecun':# and input_dim != output_dim):
 92 |             # disabling orth. init for now because it's too slow
 93 |             weight_values = uniform(
 94 |                 np.sqrt(1./input_dim),
 95 |                 (input_dim, output_dim)
 96 |             )
 97 | 
 98 |         elif initialization == 'glorot' or (initialization == None):
 99 | 
100 |             weight_values = uniform(
101 |                 np.sqrt(2./(input_dim+output_dim)),
102 |                 (input_dim, output_dim)
103 |             )
104 | 
105 |         elif initialization == 'he':
106 | 
107 |             weight_values = uniform(
108 |                 np.sqrt(2./input_dim),
109 |                 (input_dim, output_dim)
110 |             )
111 | 
112 |         elif initialization == 'glorot_he':
113 | 
114 |             weight_values = uniform(
115 |                 np.sqrt(4./(input_dim+output_dim)),
116 |                 (input_dim, output_dim)
117 |             )
118 | 
119 |         elif initialization == 'orthogonal' or \
120 |             (initialization == None and input_dim == output_dim):
121 |             
122 |             # From lasagne
123 |             def sample(shape):
124 |                 if len(shape) < 2:
125 |                     raise RuntimeError("Only shapes of length 2 or more are "
126 |                                        "supported.")
127 |                 flat_shape = (shape[0], np.prod(shape[1:]))
128 |                  # TODO: why normal and not uniform?
129 |                 a = np.random.normal(0.0, 1.0, flat_shape)
130 |                 u, _, v = np.linalg.svd(a, full_matrices=False)
131 |                 # pick the one with the correct shape
132 |                 q = u if u.shape == flat_shape else v
133 |                 q = q.reshape(shape)
134 |                 return q.astype('float32')
135 |             weight_values = sample((input_dim, output_dim))
136 |         
137 |         elif initialization[0] == 'uniform':
138 |         
139 |             weight_values = np.random.uniform(
140 |                 low=-initialization[1],
141 |                 high=initialization[1],
142 |                 size=(input_dim, output_dim)
143 |             ).astype('float32')
144 | 
145 |         else:
146 | 
147 |             raise Exception('Invalid initialization!')
148 | 
149 |         weight_values *= gain
150 | 
151 |         weight = lib.param(
152 |             name + '.W',
153 |             weight_values
154 |         )
155 | 
156 |         if weightnorm==None:
157 |             weightnorm = _default_weightnorm
158 |         if weightnorm:
159 |             norm_values = np.sqrt(np.sum(np.square(weight_values), axis=0))
160 |             # norm_values = np.linalg.norm(weight_values, axis=0)
161 | 
162 |             target_norms = lib.param(
163 |                 name + '.g',
164 |                 norm_values
165 |             )
166 | 
167 |             with tf.name_scope('weightnorm') as scope:
168 |                 norms = tf.sqrt(tf.reduce_sum(tf.square(weight), reduction_indices=[0]))
169 |                 weight = weight * (target_norms / norms)
170 | 
171 |         # if 'Discriminator' in name:
172 |         #     print "WARNING weight constraint on {}".format(name)
173 |         #     weight = tf.nn.softsign(10.*weight)*.1
174 | 
175 |         if inputs.get_shape().ndims == 2:
176 |             result = tf.matmul(inputs, weight)
177 |         else:
178 |             reshaped_inputs = tf.reshape(inputs, [-1, input_dim])
179 |             result = tf.matmul(reshaped_inputs, weight)
180 |             result = tf.reshape(result, tf.stack(tf.unstack(tf.shape(inputs))[:-1] + [output_dim]))
181 | 
182 |         if biases:
183 |             result = tf.nn.bias_add(
184 |                 result,
185 |                 lib.param(
186 |                     name + '.b',
187 |                     np.zeros((output_dim,), dtype='float32')
188 |                 )
189 |             )
190 | 
191 |         return result


--------------------------------------------------------------------------------
/adapt_images.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | #
  4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  5 | # For more information see <https://github.com/CIFASIS/wganvo>
  6 | #
  7 | # This file is licensed under the Creative Commons
  8 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
  9 | # To view a copy of this license, visit
 10 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to
 11 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
 12 | #
 13 | 
 14 | import argparse
 15 | import os
 16 | import re
 17 | import csv
 18 | #from datetime import datetime as dt
 19 | from image import load_image, crop_image, scale_image, savez_compressed
 20 | from camera_model import CameraModel
 21 | from transform import build_se3_transform, build_intrinsic_matrix
 22 | from array_utils import save_as_list, list_to_array, save_txt
 23 | import numpy as np
 24 | 
 25 | 
 26 | def process_image(img, crop=None, scale=None):
 27 |     return resize_image(img, crop, scale)
 28 | 
 29 | 
 30 | def resize_image(img, crop=None, scale=None):
 31 |     resolution = get_resolution(img)
 32 |     if crop:
 33 |         img = crop_image(img, crop[0], crop[1])
 34 |     if scale:
 35 |         img = scale_image(img, scale[0], scale[1])
 36 |     return img, resolution
 37 | 
 38 | 
 39 | def get_resolution(img):
 40 |     resolution = [img.shape[1], img.shape[0]]
 41 |     return resolution
 42 | 
 43 | 
 44 | def get_intrinsics_parameters(focal_length, principal_point, resolution, crop=None, scale=None):
 45 |     if crop:
 46 |         resolution = crop
 47 |         principal_point = [x / 2. for x in crop]  ## FIXME ver este mismo metodo en adapt_images_kitti
 48 |     if scale:
 49 |         focal_length = [float(scale[i])/resolution[i] * focal_length[i] for i in range(len(focal_length))]
 50 |         principal_point = [x / 2. for x in scale]
 51 |     return focal_length, principal_point
 52 | 
 53 | 
 54 | def main():
 55 |     args = get_arguments()
 56 | 
 57 |     camera = re.search('(stereo|mono_(left|right|rear))', args.dir).group(0)
 58 | 
 59 |     timestamps_path = os.path.join(os.path.join(args.dir, os.pardir, camera + '.timestamps'))
 60 |     if not os.path.isfile(timestamps_path):
 61 |       timestamps_path = os.path.join(args.dir, os.pardir, os.pardir, camera + '.timestamps')
 62 |       if not os.path.isfile(timestamps_path):
 63 |           raise IOError("Could not find timestamps file")
 64 | 
 65 |     model = None
 66 |     if args.models_dir:
 67 |         model = CameraModel(args.models_dir, args.dir)
 68 | 
 69 |     output_dir = os.curdir
 70 |     if args.output_dir:
 71 |             output_dir = args.output_dir
 72 |     if not os.path.isdir(output_dir):
 73 |         raise IOError(output_dir + "is not an existing folder")
 74 | 
 75 |     result_list = []
 76 |     count = 0
 77 |     dictionary = {}
 78 |     t_records = []
 79 |     p_records = []
 80 |     angles_records = []
 81 |     intrinsic_matrix = None
 82 | 
 83 |     with open(args.poses_file) as vo_file:
 84 |             vo_reader = csv.reader(vo_file)
 85 |             headers = next(vo_file)
 86 |             for row in vo_reader:
 87 |                 src_image_name = row[0]
 88 |                 dst_image_name = row[1]
 89 |                 src_image_filename = os.path.join(args.dir, src_image_name + '.png')
 90 |                 dst_image_filename = os.path.join(args.dir, dst_image_name + '.png')
 91 |                 if not os.path.isfile(src_image_filename) or not os.path.isfile(dst_image_filename):
 92 |                     continue
 93 |                 if dst_image_name not in dictionary:
 94 |                     img, orig_resolution = process_image(load_image(dst_image_filename, model), args.crop, args.scale)
 95 |                     dictionary[dst_image_name] = count
 96 |                     count = count + 1
 97 |                     result_list.append(list(img))
 98 |                 if src_image_name not in dictionary:
 99 |                     img, orig_resolution = process_image(load_image(src_image_filename, model), args.crop, args.scale)
100 |                     dictionary[src_image_name] = count
101 |                     count = count + 1
102 |                     result_list.append(list(img))
103 | 
104 |                 focal_length, principal_point = get_intrinsics_parameters(model.get_focal_length(), model.get_principal_point(), orig_resolution, args.crop, args.scale)
105 |                 src_image_idx = dictionary[src_image_name]
106 |                 dst_image_idx = dictionary[dst_image_name]
107 |                 xyzrpy = [float(v) for v in row[2:8]]
108 |                 rel_pose = build_se3_transform(xyzrpy)
109 |                 t_matrix = rel_pose[0:3] # 3x4 matrix
110 |                 intrinsic_matrix = build_intrinsic_matrix(focal_length, principal_point)
111 |                 p_matrix = intrinsic_matrix * t_matrix
112 |                 t_records.append((t_matrix, src_image_idx, dst_image_idx))
113 |                 p_records.append((p_matrix, src_image_idx, dst_image_idx))
114 |                 angles_records.append((xyzrpy, src_image_idx, dst_image_idx))
115 | 
116 |     transf = np.array(t_records, dtype=[('T',('float64',(3,4))),('src_idx', 'int32'),('dst_idx', 'int32')])
117 |     proy = np.array(p_records, dtype=[('P',('float64',(3,4))),('src_idx', 'int32'),('dst_idx', 'int32')])
118 |     angles = np.array(angles_records, dtype=[('ang',('float64',6)),('src_idx', 'int32'),('dst_idx', 'int32')])
119 |     # Solo lo guardo una vez porque es constante para todo el dataset (o deberia serlo)
120 |     if intrinsic_matrix is not None:
121 |         save_txt(os.path.join(output_dir,"intrinsic_matrix"), intrinsic_matrix)
122 |         save_txt(os.path.join(output_dir,"intrinsic_parameters"), [focal_length, principal_point])
123 |     #path = os.path.normpath(args.dir)
124 |     #folders = path.split(os.sep)
125 |     #compressed_file_path = os.path.join(output_dir, folders[-3])
126 |     result = list_to_array(result_list)
127 |     save_txt(os.path.join(output_dir, 'images_shape'), result.shape, fmt='%i')
128 |     print(result.shape)
129 |     compressed_file_path = os.path.join(output_dir, 'images')
130 |     savez_compressed(compressed_file_path, result)
131 |     savez_compressed(os.path.join(output_dir, 't'),transf)
132 |     savez_compressed(os.path.join(output_dir, 'p'),proy)
133 |     savez_compressed(os.path.join(output_dir, 'angles'),angles)
134 | 
135 | 
136 | def get_arguments():
137 |     parser = argparse.ArgumentParser(description='Play back images from a given directory')
138 |     parser.add_argument('dir', type=str, help='Directory containing images.')
139 |     parser.add_argument('poses_file', type=str, help='File containing VO poses')
140 |     parser.add_argument('--models_dir', type=str, default=None,
141 |                         help='(optional) Directory containing camera model. If supplied, images will be undistorted before display')
142 |     parser.add_argument('--crop', nargs=2, default=None, type=int, metavar=('WIDTH', 'HEIGHT'),
143 |                         help='(optional) If supplied, images will be cropped to WIDTH x HEIGHT')
144 |     parser.add_argument('--scale', nargs=2, default=None, type=int, metavar=('WIDTH', 'HEIGHT'),
145 |                         help='(optional) If supplied, images will be scaled to WIDTH x HEIGHT')
146 |     parser.add_argument('--output_dir', type=str, default=None, help='(optional) Output directory')
147 |     # parser.add_argument('image_name', type=str, help='Image name.')
148 |     args = parser.parse_args()
149 |     return args
150 | 
151 | 
152 | if __name__ == "__main__":
153 |     main()
154 | 


--------------------------------------------------------------------------------
/camera_model.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | # This file is based on a file from https://github.com/ori-mrg/robotcar-dataset-sdk
  4 | # (see original license below)
  5 | #
  6 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  7 | # For more information see <https://github.com/CIFASIS/wganvo>
  8 | #
  9 | # This file is licensed under the Creative Commons
 10 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
 11 | # To view a copy of this license, visit
 12 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to
 13 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
 14 | #
 15 | 
 16 | ################################################################################
 17 | #
 18 | # Copyright (c) 2017 University of Oxford
 19 | # Authors:
 20 | #  Geoff Pascoe (gmp@robots.ox.ac.uk)
 21 | #
 22 | # This work is licensed under the Creative Commons
 23 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
 24 | # To view a copy of this license, visit
 25 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to
 26 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
 27 | #
 28 | ################################################################################
 29 | 
 30 | import re
 31 | import os
 32 | import numpy as np
 33 | import scipy.interpolate as interp
 34 | from scipy.ndimage import map_coordinates
 35 | 
 36 | 
 37 | class CameraModel:
 38 |     """Provides intrinsic parameters and undistortion LUT for a camera.
 39 | 
 40 |     Attributes:
 41 |         camera (str): Name of the camera.
 42 |         camera sensor (str): Name of the sensor on the camera for multi-sensor cameras.
 43 |         focal_length (tuple[float]): Focal length of the camera in horizontal and vertical axis, in pixels.
 44 |         principal_point (tuple[float]): Principal point of camera for pinhole projection model, in pixels.
 45 |         G_camera_image (:obj: `numpy.matrixlib.defmatrix.matrix`): Transform from image frame to camera frame.
 46 |         bilinear_lut (:obj: `numpy.ndarray`): Look-up table for undistortion of images, mapping pixels in an undistorted
 47 |             image to pixels in the distorted image
 48 | 
 49 |     """
 50 | 
 51 |     def __init__(self, models_dir, images_dir):
 52 |         """Loads a camera model from disk.
 53 | 
 54 |         Args:
 55 |             models_dir (str): directory containing camera model files.
 56 |             images_dir (str): directory containing images for which to read camera model.
 57 | 
 58 |         """
 59 |         self.camera = None
 60 |         self.camera_sensor = None
 61 |         self.focal_length = None
 62 |         self.principal_point = None
 63 |         self.G_camera_image = None
 64 |         self.bilinear_lut = None
 65 | 
 66 |         self.__load_intrinsics(models_dir, images_dir)
 67 |         self.__load_lut(models_dir, images_dir)
 68 | 
 69 |     def project(self, xyz, image_size):
 70 |         """Projects a pointcloud into the camera using a pinhole camera model.
 71 | 
 72 |         Args:
 73 |             xyz (:obj: `numpy.ndarray`): 3xn array, where each column is (x, y, z) point relative to camera frame.
 74 |             image_size (tuple[int]): dimensions of image in pixels
 75 | 
 76 |         Returns:
 77 |             numpy.ndarray: 2xm array of points, where each column is the (u, v) pixel coordinates of a point in pixels.
 78 |             numpy.array: array of depth values for points in image.
 79 | 
 80 |         Note:
 81 |             Number of output points m will be less than or equal to number of input points n, as points that do not
 82 |             project into the image are discarded.
 83 | 
 84 |         """
 85 |         if xyz.shape[0] == 3:
 86 |             xyz = np.stack((xyz, np.ones((1, xyz.shape[1]))))
 87 |         xyzw = np.linalg.solve(self.G_camera_image, xyz)
 88 | 
 89 |         # Find which points lie in front of the camera
 90 |         in_front = [i for i in range(0, xyzw.shape[1]) if xyzw[2, i] >= 0]
 91 |         xyzw = xyzw[:, in_front]
 92 | 
 93 |         uv = np.vstack((self.focal_length[0] * xyzw[0, :] / xyzw[2, :] + self.principal_point[0],
 94 |                         self.focal_length[1] * xyzw[1, :] / xyzw[2, :] + self.principal_point[1]))
 95 | 
 96 |         in_img = [i for i in range(0, uv.shape[1])
 97 |                   if 0.5 <= uv[0, i] <= image_size[1] and 0.5 <= uv[1, i] <= image_size[0]]
 98 | 
 99 |         return uv[:, in_img], np.ravel(xyzw[2, in_img])
100 | 
101 |     def undistort(self, image):
102 |         """Undistorts an image.
103 | 
104 |         Args:
105 |             image (:obj: `numpy.ndarray`): A distorted image. Must be demosaiced - ie. must be a 3-channel RGB image.
106 | 
107 |         Returns:
108 |             numpy.ndarray: Undistorted version of image.
109 | 
110 |         Raises:
111 |             ValueError: if image size does not match camera model.
112 |             ValueError: if image only has a single channel.
113 | 
114 |         """
115 |         if image.shape[0] * image.shape[1] != self.bilinear_lut.shape[0]:
116 |             raise ValueError('Incorrect image size for camera model')
117 | 
118 |         lut = self.bilinear_lut[:, 1::-1].T.reshape((2, image.shape[0], image.shape[1]))
119 | 
120 |         if len(image.shape) == 1:
121 |             raise ValueError('Undistortion function only works with multi-channel images')
122 | 
123 |         undistorted = np.rollaxis(np.array([map_coordinates(image[:, :, channel], lut, order=1)
124 |                                 for channel in range(0, image.shape[2])]), 0, 3)
125 | 
126 |         return undistorted.astype(image.dtype)
127 | 
128 |     def __get_model_name(self, images_dir):
129 |         self.camera = re.search('(stereo|mono_(left|right|rear))', images_dir).group(0)
130 |         if self.camera == 'stereo':
131 |             self.camera_sensor = re.search('(left|centre|right)', images_dir).group(0)
132 |             if self.camera_sensor == 'left':
133 |                 return 'stereo_wide_left'
134 |             elif self.camera_sensor == 'right':
135 |                 return 'stereo_wide_right'
136 |             elif self.camera_sensor == 'centre':
137 |                 return 'stereo_narrow_left'
138 |             else:
139 |                 raise RuntimeError('Unknown camera model for given directory: ' + images_dir)
140 |         else:
141 |             return self.camera
142 | 
143 |     def __load_intrinsics(self, models_dir, images_dir):
144 |         model_name = self.__get_model_name(images_dir)
145 |         intrinsics_path = os.path.join(models_dir, model_name + '.txt')
146 | 
147 |         with open(intrinsics_path) as intrinsics_file:
148 |             vals = [float(x) for x in next(intrinsics_file).split()]
149 |             self.focal_length = (vals[0], vals[1])
150 |             self.principal_point = (vals[2], vals[3])
151 | 
152 |             G_camera_image = []
153 |             for line in intrinsics_file:
154 |                 G_camera_image.append([float(x) for x in line.split()])
155 |             self.G_camera_image = np.array(G_camera_image)
156 | 
157 |     def __load_lut(self, models_dir, images_dir):
158 |         model_name = self.__get_model_name(images_dir)
159 |         lut_path = os.path.join(models_dir, model_name + '_distortion_lut.bin')
160 | 
161 |         lut = np.fromfile(lut_path, np.double)
162 |         lut = lut.reshape([2, lut.size // 2])
163 |         self.bilinear_lut = lut.transpose()
164 | 
165 |     def get_focal_length(self):
166 |         return self.focal_length
167 |     def get_principal_point(self):
168 |         return self.principal_point
169 |         
170 | 


--------------------------------------------------------------------------------
/vgg_trainable/test/plotHelpers.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # This file is part of wganvo.
  4 | #
  5 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  6 | # For more information see <https://github.com/CIFASIS/wganvo>
  7 | #
  8 | # wganvo is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # wganvo is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | import numpy as np
 23 | import matplotlib.pyplot as plt
 24 | from mpl_toolkits.mplot3d import Axes3D
 25 | 
 26 | 
 27 | ################################################################################
 28 | # PLOT HELPERS
 29 | ################################################################################
 30 | 
 31 | def plotVsTime1(time, xs, title="", xlabel=None, ylabel=None):
 32 |     fig = plt.figure()
 33 |     ax = fig.add_subplot(111)
 34 | 
 35 |     p, = ax.plot(time, xs)
 36 | 
 37 |     ax.set_xlabel(xlabel)
 38 |     ax.set_ylabel(ylabel)
 39 | 
 40 |     plt.grid()
 41 | 
 42 |     fig.suptitle(title)
 43 | 
 44 | 
 45 | def plotVsTime3(time, xs, ys, zs, title=""):
 46 |     fig, (ax_x, ax_y, ax_z) = plt.subplots(3, sharex=True, sharey=True)
 47 | 
 48 |     px, = ax_x.plot(time, xs)
 49 |     py, = ax_y.plot(time, ys)
 50 |     pz, = ax_z.plot(time, zs)
 51 | 
 52 |     # Fine-tune figure; make subplots close to each other and hide x ticks for
 53 |     # all but bottom plot.
 54 |     # fig.subplots_adjust(hspace=0)
 55 |     plt.setp([a.get_xticklabels() for a in fig.axes[:-1]], visible=False)
 56 | 
 57 |     ax_z.set_xlabel("time (s)")
 58 | 
 59 |     ax_x.grid(True)
 60 |     ax_y.grid(True)
 61 |     ax_z.grid(True)
 62 | 
 63 |     fig.suptitle(title)
 64 | 
 65 | 
 66 | def plotVsTime4(time, xs, ys, zs, title=""):
 67 |     fig, (ax_x, ax_y, ax_z, ax_avg) = plt.subplots(4, sharex=True, sharey=True)
 68 | 
 69 |     px, = ax_x.plot(time, xs)
 70 |     py, = ax_y.plot(time, ys)
 71 |     pz, = ax_z.plot(time, zs)
 72 |     p_avg, = ax_avg.plot(time, np.linalg.norm(np.array([xs, ys, zs]), axis=0))
 73 | 
 74 |     # Fine-tune figure; make subplots close to each other and hide x ticks for
 75 |     # all but bottom plot.
 76 |     fig.subplots_adjust(hspace=0)
 77 |     plt.setp([a.get_xticklabels() for a in fig.axes[:-1]], visible=False)
 78 | 
 79 |     ax_avg.set_xlabel("time (s)")
 80 | 
 81 |     plt.grid()
 82 | 
 83 |     fig.suptitle(title)
 84 | 
 85 |     fig.tight_layout()
 86 | 
 87 | 
 88 | def plotPath3D(X, Y, Z, title):
 89 |     fig = plt.figure()
 90 | 
 91 |     ax = fig.add_subplot(111, projection='3d')
 92 | 
 93 |     ax.plot(X, Y, Z)
 94 | 
 95 |     # draw a point
 96 |     # ~ ax.scatter([0],[0],[0],color="black",s=1)
 97 | 
 98 |     # draw cube
 99 |     # ~ r = [-1, 1]
100 |     # ~ for s, e in combinations(np.array(list(product(r,r,r))), 2):
101 |     # ~ if np.sum(np.abs(s-e)) == r[1]-r[0]:
102 |     # ~ ax.plot3D(*zip(s,e), color="black")
103 | 
104 |     # simulate equal aspect ratio
105 |     # ~ """
106 |     max_range = np.array([X.max() - X.min(), Y.max() - Y.min(), Z.max() - Z.min()]).max() / 2.0
107 |     mean_x = X.mean()
108 |     mean_y = Y.mean()
109 |     mean_z = Z.mean()
110 |     ax.set_xlim(mean_x - max_range, mean_x + max_range)
111 |     ax.set_ylim(mean_y - max_range, mean_y + max_range)
112 |     ax.set_zlim(mean_z - max_range, mean_z + max_range)
113 |     # ~ """
114 |     # Set axis labels
115 | 
116 |     xLabel = ax.set_xlabel('x')
117 |     yLabel = ax.set_ylabel('y')
118 |     zLabel = ax.set_zlabel('z')
119 | 
120 |     ax.set_title(title)
121 | 
122 | 
123 | def plotPaths3D(paths, labels, colors, title=None):
124 |     """
125 |     """
126 | 
127 |     assert (len(paths) == len(labels))
128 |     assert (len(paths) <= len(colors))
129 | 
130 |     fig = plt.figure()
131 | 
132 |     ax = fig.add_subplot(111, projection='3d')
133 | 
134 |     for (X, Y, Z), label, color in zip(paths, labels, colors):
135 |         ax.plot(X, Y, Z, label=label, color=color)
136 | 
137 |         # draw a point
138 |         # ~ ax.scatter([0],[0],[0],color="black",s=1)
139 | 
140 |         # draw cube
141 |         # ~ r = [-1, 1]
142 |         # ~ for s, e in combinations(np.array(list(product(r,r,r))), 2):
143 |         # ~ if np.sum(np.abs(s-e)) == r[1]-r[0]:
144 |         # ~ ax.plot3D(*zip(s,e), color="black")
145 | 
146 |     # simulate equal aspect ratio
147 |     # assume first path is ground truth
148 |     # """
149 |     max_range = np.array([paths[0][0].max() - paths[0][0].min(), paths[0][1].max() - paths[0][1].min(),
150 |                           paths[0][2].max() - paths[0][2].min()]).max() / 2.0
151 |     mean_x = paths[0][0].mean()
152 |     mean_y = paths[0][1].mean()
153 |     mean_z = paths[0][2].mean()
154 |     ax.set_xlim(mean_x - max_range, mean_x + max_range)
155 |     ax.set_ylim(mean_y - max_range, mean_y + max_range)
156 |     ax.set_zlim(mean_z - max_range, mean_z + max_range)
157 |     # """
158 |     # Set axis labels
159 | 
160 |     xLabel = ax.set_xlabel('x (m)')
161 |     yLabel = ax.set_ylabel('y (m)')
162 |     zLabel = ax.set_zlabel('z (m)')
163 | 
164 |     handles, labels = ax.get_legend_handles_labels()
165 |     ax.legend(handles, labels)
166 | 
167 |     if title:
168 |         ax.set_title(title)
169 | 
170 | 
171 | def plotPaths2D(paths, labels, colors, xlabel=None, ylabel=None, cloud_file=None, grid=None, save_filename=None):
172 |     """
173 |     """
174 | 
175 |     assert (len(paths) == len(labels))
176 |     assert (len(paths) <= len(colors))
177 | 
178 |     # Plot the 2D trajectory (coordinate z = 0)
179 |     fig = plt.figure()
180 |     ax = fig.add_subplot(111)
181 | 
182 |     if (cloud_file):
183 |         cloud = np.loadtxt(cloud_file)
184 |         ax.scatter(cloud[:, 0], cloud[:, 1], s=1, color='0.1')
185 | 
186 |     for (X, Y), label, color in zip(paths, labels, colors):
187 |         ax.plot(X, Y, label=label, color=color, linewidth=1.5)
188 |         ax.plot(X[0], Y[0], marker='v', alpha=1, markersize=8, color=color)
189 | 
190 |         # enable for KITTI 00 dataset
191 |     # ~ plt.xlim(-10, 50)
192 |     # ~ plt.ylim(0, 30)
193 | 
194 |     plt.gca().set_aspect('equal', adjustable='box')
195 | 
196 |     if xlabel:
197 |         ax.set_xlabel(xlabel)
198 | 
199 |     if ylabel:
200 |         ax.set_ylabel(ylabel)
201 | 
202 |     handles, labels = ax.get_legend_handles_labels()
203 |     ax.legend(handles, labels, ncol=2, loc='center', bbox_to_anchor=(0.5, 1.2))
204 | 
205 |     if grid:
206 |         ax.grid(True)
207 | 
208 | 
209 | def plotLoops3D(paths, loops, title=None, time_unit=None):
210 |     fig = plt.figure()
211 | 
212 |     ax = fig.add_subplot(111, projection='3d')
213 | 
214 |     for X, Y, Z, color in paths:
215 |         ax.plot(X, Y, Z, color=color)
216 | 
217 |     for X, Y, Z, color in loops:
218 |         ax.plot(X, Y, Z, color=color)
219 | 
220 |     # simulate equal aspect ratio
221 |     # assume first path is ground truth
222 |     # """
223 |     max_range = np.array([paths[0][0].max() - paths[0][0].min(), paths[0][1].max() - paths[0][1].min(),
224 |                           paths[0][2].max() - paths[0][2].min()]).max() / 2.0
225 |     mean_x = paths[0][0].mean()
226 |     mean_y = paths[0][1].mean()
227 |     mean_z = paths[0][2].mean()
228 |     ax.set_xlim(mean_x - max_range, mean_x + max_range)
229 |     ax.set_ylim(mean_y - max_range, mean_y + max_range)
230 |     ax.set_zlim(mean_z - max_range, mean_z + max_range)
231 |     # """
232 |     # Set axis labels
233 | 
234 |     xLabel = ax.set_xlabel('x (m)')
235 |     yLabel = ax.set_ylabel('y (m)')
236 |     if time_unit:
237 |         zLabel = ax.set_zlabel('time (' + time_unit + ')')
238 |     else:
239 |         zLabel = ax.set_zlabel('time (seconds)')
240 | 


--------------------------------------------------------------------------------
/Dependencies.md:
--------------------------------------------------------------------------------
  1 | # Dependencies
  2 | In this document we list all the pieces of code included by wganvo  which are not property of the authors of wganvo.
  3 | 
  4 | ## Code in `wgan` folder
  5 | Source code in `wgan/` is a modified version of [Improved Training of Wasserstein GANs
  6 | ](https://github.com/igul222/improved_wgan_training/). These files are MIT-licensed.
  7 | ```
  8 | MIT License
  9 | 
 10 | Copyright (c) 2017 Ishaan Gulrajani
 11 | 
 12 | Permission is hereby granted, free of charge, to any person obtaining a copy
 13 | of this software and associated documentation files (the "Software"), to deal
 14 | in the Software without restriction, including without limitation the rights
 15 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 16 | copies of the Software, and to permit persons to whom the Software is
 17 | furnished to do so, subject to the following conditions:
 18 | 
 19 | The above copyright notice and this permission notice shall be included in all
 20 | copies or substantial portions of the Software.
 21 | 
 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 27 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 28 | SOFTWARE.
 29 | ```
 30 | 
 31 | ## Code in `vgg_trainable` folder (except `vgg.py`)
 32 | The following files placed in `vgg_trainable` belongs to the authors of [Tensorflow](https://github.com/tensorflow/tensorflow) 
 33 | and they are released under Apache License 2.0.
 34 | * `vgg_trainable/input_data.py`
 35 | * `vgg_trainable/main.py`
 36 | * `vgg_trainable/model.py`
 37 | 
 38 | ``` 
 39 | Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 40 | 
 41 | Licensed under the Apache License, Version 2.0 (the "License");
 42 | you may not use this file except in compliance with the License.
 43 | You may obtain a copy of the License at
 44 | 
 45 |     http://www.apache.org/licenses/LICENSE-2.0
 46 | 
 47 | Unless required by applicable law or agreed to in writing, software
 48 | distributed under the License is distributed on an "AS IS" BASIS,
 49 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 50 | See the License for the specific language governing permissions and
 51 | limitations under the License.
 52 | ``` 
 53 | 
 54 | ## Code in `eval_kitti` folder
 55 | Source code in `eval_kitti` belongs to the authors of KITTI. Some files are part of libviso2, which is released under GPL License.
 56 | ```
 57 | Copyright 2011. All rights reserved.
 58 | Institute of Measurement and Control Systems
 59 | Karlsruhe Institute of Technology, Germany
 60 | 
 61 | This file is part of libviso2.
 62 | Authors: Andreas Geiger
 63 | 
 64 | libviso2 is free software; you can redistribute it and/or modify it under the
 65 | terms of the GNU General Public License as published by the Free Software
 66 | Foundation; either version 2 of the License, or any later version.
 67 | 
 68 | libviso2 is distributed in the hope that it will be useful, but WITHOUT ANY
 69 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 70 | PARTICULAR PURPOSE. See the GNU General Public License for more details.
 71 | 
 72 | You should have received a copy of the GNU General Public License along with
 73 | libviso2; if not, write to the Free Software Foundation, Inc., 51 Franklin
 74 | Street, Fifth Floor, Boston, MA 02110-1301, USA 
 75 | ```
 76 | Files that are not explicitly licensed are released under 
 77 | [Creative Commons Attribution-NonCommercial-ShareAlike 3.0](http://creativecommons.org/licenses/by-nc-sa/3.0/),
 78 | (according to their [website](http://www.cvlibs.net/datasets/kitti/)).
 79 | 
 80 | ## `transformations.py` from C. Gohlke
 81 | `transformations.py` file is BSD licensed.
 82 | ```
 83 | Copyright (c) 2006-2018, Christoph Gohlke
 84 | Copyright (c) 2006-2018, The Regents of the University of California
 85 | Produced at the Laboratory for Fluorescence Dynamics
 86 | All rights reserved.
 87 | 
 88 | Redistribution and use in source and binary forms, with or without
 89 | modification, are permitted provided that the following conditions are met:
 90 | 
 91 | * Redistributions of source code must retain the above copyright
 92 |   notice, this list of conditions and the following disclaimer.
 93 | * Redistributions in binary form must reproduce the above copyright
 94 |   notice, this list of conditions and the following disclaimer in the
 95 |   documentation and/or other materials provided with the distribution.
 96 | * Neither the name of the copyright holders nor the names of any
 97 |   contributors may be used to endorse or promote products derived
 98 |   from this software without specific prior written permission.
 99 | 
100 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
101 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
102 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
103 | ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
104 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
105 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
106 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
107 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
108 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
109 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
110 | POSSIBILITY OF SUCH DAMAGE.
111 | ```
112 | 
113 | ## `tfquaternions.py`
114 | `tfquaternions.py` is based on a file from [tf-quaternion](https://github.com/PhilJd/tf-quaternion) (Apache 2.0 License).
115 | ```
116 | Copyright Philipp Jund (jundp@cs.uni-freiburg.de) 2017. All Rights Reserved.
117 | 
118 | Licensed under the Apache License, Version 2.0 (the "License");
119 | you may not use this file except in compliance with the License.
120 | You may obtain a copy of the License at
121 | 
122 |      http://www.apache.org/licenses/LICENSE-2.0
123 | 
124 | Unless required by applicable law or agreed to in writing, software
125 | distributed under the License is distributed on an "AS IS" BASIS,
126 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
127 | See the License for the specific language governing permissions and
128 | limitations under the License.
129 | ```
130 | 
131 | ## Files from Robotcar Dataset
132 | The following files are part of [robotcar-dataset-sdk](https://github.com/ori-mrg/robotcar-dataset-sdk) and they are released under
133 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 License (CC BY-NC-SA 4.0).  
134 | * `build_pointcloud.py`
135 | * `camera_model.py`
136 | * `image.py`
137 | * `interpolate_poses.py`
138 | * `play_images.py`
139 | * `project_laser_into_camera.py`
140 | * `transform.py`
141 | 
142 | ```
143 | Copyright (c) 2017 University of Oxford
144 | Authors:
145 |  Geoff Pascoe (gmp@robots.ox.ac.uk)
146 | 
147 | This work is licensed under the Creative Commons
148 | Attribution-NonCommercial-ShareAlike 4.0 International License.
149 | To view a copy of this license, visit
150 | http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to
151 | Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
152 | 
153 | ```
154 | Some of them have been modified by the authors of wganvo.
155 | 
156 | ## Files from `evo` library
157 | The following files are part of [evo](https://github.com/MichaelGrupp/evo) and they are release under GPL License.
158 | * `geometry.py`
159 | * `trajectory.py`
160 | * `lie_algebra.py`
161 | 
162 | ```
163 | author: Michael Grupp
164 | 
165 | evo is free software: you can redistribute it and/or modify
166 | it under the terms of the GNU General Public License as published by
167 | the Free Software Foundation, either version 3 of the License, or
168 | (at your option) any later version.
169 | 
170 | evo is distributed in the hope that it will be useful,
171 | but WITHOUT ANY WARRANTY; without even the implied warranty of
172 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
173 | GNU General Public License for more details.
174 | 
175 | You should have received a copy of the GNU General Public License
176 | along with evo.  If not, see <http://www.gnu.org/licenses/>.
177 | ```
178 | 
179 | ## `vgg_trainable/vgg.py`
180 | This file is based on a file from https://github.com/machrisaa/tensorflow-vgg. 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | WGANVO: Monocular Visual Odometry based on Generative Adversarial Networks
  2 | =============================
  3 | Visual Odometry is one the most essential techniques for robot localization.
  4 | In this work we propose the use of Generative Adversarial Networks to estimate the pose taking images of a monocular camera as input. We present WGANVO, a Deep Learning based monocular Visual Odometry method. In particular, a neural network is trained to regress a pose estimate from an image pair. The training is performed using a semi-supervised approach, combining the unsupervised GAN technique with labeled data. Unlike geometry based monocular methods, the proposed method can recover the absolute scale of the observed scene without neither prior knowledge nor extra information as it can infer it from the training stage. The evaluation of the resulting system is carried out on the well-known KITTI dataset where it is shown to work in real time and the accuracy obtained is encouraging to continue the development of Deep Learning based methods.
  5 | 
  6 | ### Paper
  7 | * **WGANVO: odometría visual monocular basada en redes adversarias generativas**, Javier Cremona, Lucas C. Uzal, Taihú Pire, Revista Iberoamericana de Automática e Informática industrial, [S.l.], dic. 2021. ISSN 1697-7920. Disponible en: [pdf](https://polipapers.upv.es/index.php/RIAI/article/view/16113). DOI: https://doi.org/10.4995/riai.2022.16113.
  8 | 
  9 | * **WGANVO: Monocular Visual Odometry based on Generative Adversarial Networks**, Javier Cremona, Lucas C. Uzal, Taihú Pire, arXiv [pdf](https://arxiv.org/abs/2007.13704).
 10 | 
 11 | ### How to cite
 12 | ```
 13 | @article{cremona2021wganvo,
 14 | 	author = {Javier Alejandro Cremona and Lucas Uzal and Taihú Pire},
 15 | 	title = {WGANVO: odometría visual monocular basada en redes adversarias generativas},
 16 | 	journal = {Revista Iberoamericana de Automática e Informática industrial},
 17 | 	volume = {19},
 18 | 	number = {2},
 19 | 	year = {2021},
 20 | 	keywords = {Localización; Redes Neuronales; Robots Móviles},
 21 | 	abstract = {Los sistemas tradicionales de odometría visual (VO), directos o basados en características visuales, son susceptibles de cometer errores de correspondencia entre imágenes. Además, las configuraciones monoculares sólo son capaces de estimar la localización sujeto a un factor de escala, lo que hace imposible su uso inmediato en aplicaciones de robótica o realidad virtual. Recientemente, varios problemas de Visión por Computadora han sido abordados con éxito por algoritmos de Aprendizaje Profundo. En este trabajo presentamos un sistema de odometría visual monocular basado en Aprendizaje Profundo llamado WGANVO. Específicamente, entrenamos una red neuronal basada en GAN para regresionar una estimación de movimiento. El modelo resultante recibe un par de imágenes y estima el movimiento relativo entre ellas. Entrenamos la red neuronal utilizando un enfoque semi-supervisado. A diferencia de los sistemas monoculares tradicionales basados en geometría, nuestro método basado en Deep Learning es capaz de estimar la escala absoluta de la escena sin información extra ni conocimiento previo. Evaluamos WGANVO en el conocido conjunto de datos KITTI. Demostramos que nuestro sistema funciona en tiempo real y la precisión obtenida alienta a seguir desarrollando sistemas de localización basados en Aprendizaje Profundo.},
 22 | 	issn = {1697-7920},	
 23 | 	doi = {10.4995/riai.2022.16113},
 24 | 	url = {https://polipapers.upv.es/index.php/RIAI/article/view/16113}
 25 | }
 26 | ```
 27 | 
 28 | ## Video 1
 29 | <a href="https://www.youtube.com/watch?v=6vcR9PCsWDQ" target="_blank">
 30 |   <img src="https://user-images.githubusercontent.com/3181393/260768754-a8ea542e-b36c-4a4d-8021-5418262293fd.png" alt="Demo 1" width="700" />
 31 | </a>
 32 | 
 33 | ## Video 2
 34 | <a href="https://www.youtube.com/watch?v=zg5BlvUQhWE" target="_blank">
 35 |   <img src="https://user-images.githubusercontent.com/3181393/260743282-d90481df-d0e9-48b7-865f-0d9e021ef2d1.png" alt="Demo 2" width="700" />
 36 | </a>
 37 | 
 38 | # License 
 39 | Our work is released under a [GPLv3 license](License-gpl.txt), except for some files.
 40 | The scripts that are used to pre-process the images are released under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](License-CCBYNCSA4.txt) (see [LICENSE.txt](LICENSE.txt)). For a list of dependencies (and associated licenses), please see [Dependencies.md](Dependencies.md).
 41 | 
 42 | # Requirements
 43 | * NVIDIA GPU
 44 | * Python 2.7 and pip (image pre-processing)
 45 | 
 46 | # Dependencies
 47 | ## Docker and docker-compose
 48 | 1. Install Docker and docker-compose
 49 | 
 50 | 2. Install nvidia-docker:
 51 | ```
 52 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
 53 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
 54 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
 55 | sudo apt-get update && sudo apt-get install -y nvidia-docker2
 56 | sudo systemctl restart docker
 57 | ```
 58 | 
 59 | ## Dependencies for pre-processing KITTI images
 60 | Run:
 61 | ```
 62 | pip install -r requirements.txt
 63 | ```
 64 | 
 65 | 
 66 | # Installation
 67 | 1. Clone the repository
 68 | 2. Run:
 69 | ```
 70 | cd wganvo
 71 | sudo make image
 72 | sudo make start
 73 | ```
 74 | 
 75 | # KITTI
 76 | ## Image pre-processing
 77 | In order to reduce the resolution of the images, we pre-process KITTI images using a Python script.
 78 | **This step will be optional in future versions.**
 79 | 
 80 | 1. Download KITTI odometry dataset
 81 | 2. For each of the KITTI sequences, simply run:
 82 | ```
 83 | python adapt_images_kitti <path-to-sequence-dir> <path-to-poses-file> --crop 500 375 --scale 128 96 --output_dir <path-to-output-dir>
 84 | ```
 85 | 
 86 | As a result, several files are generated. These files in this specific format are required to train the network. Future versions will no longer require this specific folder structure to be used.
 87 | 
 88 | ## Training
 89 | Input must be provided in a specific folder structure. **This step will be optional in future versions.**
 90 | 
 91 | For example, if we want to train the network with sequences 00, 01 and 03 as input, we pre-process the images with ```adapt_images_kitti``` and then we save the output files in this way:
 92 | ```
 93 | train_images/
 94 | ├── 00
 95 | │   ├── images.npz
 96 | │   ├── t.npz
 97 | │   ├── images_shape.txt
 98 | │   └── ...
 99 | ├── 01
100 | │   ├── images.npz
101 | │   ├── t.npz
102 | │   ├── images_shape.txt
103 | │   └── ...
104 | └── 03
105 |     ├── images.npz
106 |     ├── t.npz
107 |     ├── images_shape.txt
108 |     └── ...
109 | 
110 | ```
111 | **Note**: Folder names (`train_images`, `00`, `01`, `03`) are not required to be the same as the ones in this example.
112 | 
113 | Then, you must repeat this step in order to generate images to perform the adversarial training and to test the network. After that, copy everything into `images-dir` folder. This folder will be mounted as a volume in the Docker container. For example, you may ended up having this structure.
114 | ```
115 | images-dir/
116 | ├── train_images/
117 | │   ├── 00
118 | │   ├── 01
119 | │   └── 03
120 | ├── train_gan_images/
121 | │   ├── 06
122 | │   ├── 07
123 | │   └── 08
124 | └── test_images/
125 |     └── 04
126 | ```
127 | **Note**: Try to have at least 2 folders in `train_images`.
128 | 
129 | A shell in the Docker container must be opened:
130 | ```
131 | make shell
132 | ```
133 | Alternatively, you can run `docker run -it --rm --runtime=nvidia -v $(pwd)/images-dir:/var/kitti wganvo_wganvo:latest` or `docker run -it --rm --gpus all -v $(pwd)/images-dir:/var/kitti wganvo_wganvo:latest` in newer versions of Docker. It may be different based on your machine’s operating system and the kind of NVIDIA GPU that your machine has. See [this link](https://towardsdatascience.com/how-to-properly-use-the-gpu-within-a-docker-container-4c699c78c6d1).
134 | 
135 | The main script is `wgan/wgan_improved.py`. In the container's shell you can run this script to train the network. 
136 | ```
137 | python wgan/wgan_improved.py /var/kitti/train_images /var/kitti/test_images /var/kitti/train_gan_images --batch_size <BATCH_SIZE>
138 | ```
139 | The command `python wgan/wgan_improved.py -h` will display all the options that can be configured. It is important to set `--log_dir`.
140 | 
141 | ## Testing
142 | In order to test the resulting network, `vgg_trainable/test/test_model.py` can be used. Run:
143 | ```
144 | python vgg_trainable/test/test_model.py <MODEL_NAME> /var/kitti/test_images/ --batch_size <BATCH_SIZE> 
145 | ```
146 | where `<BATCH_SIZE>` is the batch size used to train the network and `<MODEL_NAME>` is the name of the model that was saved in the log directory (the path was set using `--log_dir`). The name of the model is the name of the file `<MODEL_NAME>.meta` (supply it to `test_model.py` without the `.meta` suffix).
147 | 
148 | <!--Para correr el test `vgg_trainable/test/test_model.py`, guardar las imágenes y el modelo en `images_dir` buscar donde se creo el volume, y en el shell del Docker, correr el test apuntando al volume. -->
149 | 


--------------------------------------------------------------------------------
/interpolate_poses.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | # This file is based on a file from https://github.com/ori-mrg/robotcar-dataset-sdk
  4 | # (see original license below)
  5 | #
  6 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  7 | # For more information see <https://github.com/CIFASIS/wganvo>
  8 | #
  9 | # This file is licensed under the Creative Commons
 10 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
 11 | # To view a copy of this license, visit
 12 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to
 13 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
 14 | #
 15 | 
 16 | ################################################################################
 17 | #
 18 | # Copyright (c) 2017 University of Oxford
 19 | # Authors:
 20 | #  Geoff Pascoe (gmp@robots.ox.ac.uk)
 21 | #
 22 | # This work is licensed under the Creative Commons
 23 | # Attribution-NonCommercial-ShareAlike 4.0 International License.
 24 | # To view a copy of this license, visit
 25 | # http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to
 26 | # Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
 27 | #
 28 | ################################################################################
 29 | 
 30 | import bisect
 31 | import csv
 32 | import numpy as np
 33 | import numpy.matlib as ml
 34 | from transform import *
 35 | 
 36 | 
 37 | def interpolate_vo_poses(vo_path, pose_timestamps, origin_timestamp):
 38 |     """Interpolate poses from visual odometry.
 39 | 
 40 |     Args:
 41 |         vo_path (str): path to file containing relative poses from visual odometry.
 42 |         pose_timestamps (list[int]): UNIX timestamps at which interpolated poses are required.
 43 |         origin_timestamp (int): UNIX timestamp of origin frame. Poses will be reported relative to this frame.
 44 | 
 45 |     Returns:
 46 |         list[numpy.matrixlib.defmatrix.matrix]: SE3 matrix representing interpolated pose for each requested timestamp.
 47 | 
 48 |     """
 49 |     with open(vo_path) as vo_file:
 50 |         vo_reader = csv.reader(vo_file)
 51 |         headers = next(vo_file)
 52 | 
 53 |         vo_timestamps = [0]
 54 |         abs_poses = [ml.identity(4)]
 55 | 
 56 |         lower_timestamp = min(min(pose_timestamps), origin_timestamp)
 57 |         upper_timestamp = max(max(pose_timestamps), origin_timestamp)
 58 | 
 59 |         for row in vo_reader:
 60 |             timestamp = int(row[0])
 61 |             if timestamp < lower_timestamp:
 62 |                 vo_timestamps[0] = timestamp
 63 |                 continue
 64 | 
 65 |             vo_timestamps.append(timestamp)
 66 | 
 67 |             xyzrpy = [float(v) for v in row[2:8]]
 68 |             rel_pose = build_se3_transform(xyzrpy)
 69 |             abs_pose = abs_poses[-1] * rel_pose
 70 |             abs_poses.append(abs_pose)
 71 | 
 72 |             if timestamp >= upper_timestamp:
 73 |                 break
 74 | 
 75 |     return interpolate_poses(vo_timestamps, abs_poses, pose_timestamps, origin_timestamp)
 76 | 
 77 | 
 78 | def interpolate_ins_poses(ins_path, pose_timestamps, origin_timestamp):
 79 |     """Interpolate poses from INS.
 80 | 
 81 |     Args:
 82 |         ins_path (str): path to file containing poses from INS.
 83 |         pose_timestamps (list[int]): UNIX timestamps at which interpolated poses are required.
 84 |         origin_timestamp (int): UNIX timestamp of origin frame. Poses will be reported relative to this frame.
 85 | 
 86 |     Returns:
 87 |         list[numpy.matrixlib.defmatrix.matrix]: SE3 matrix representing interpolated pose for each requested timestamp.
 88 | 
 89 |     """
 90 |     with open(ins_path) as ins_file:
 91 |         ins_reader = csv.reader(ins_file)
 92 |         headers = next(ins_file)
 93 | 
 94 |         ins_timestamps = [0]
 95 |         abs_poses = [ml.identity(4)]
 96 | 
 97 |         upper_timestamp = max(max(pose_timestamps), origin_timestamp)
 98 | 
 99 |         for row in ins_reader:
100 |             timestamp = int(row[0])
101 |             ins_timestamps.append(timestamp)
102 | 
103 |             xyzrpy = [float(v) for v in row[2:8]]
104 |             abs_pose = build_se3_transform(xyzrpy)
105 |             abs_poses.append(abs_pose)
106 | 
107 |             if timestamp >= upper_timestamp:
108 |                 break
109 | 
110 |     ins_timestamps = ins_timestamps[1:]
111 |     abs_poses = abs_poses[1:]
112 | 
113 |     return interpolate_poses(ins_timestamps, abs_poses, pose_timestamps, origin_timestamp)
114 | 
115 | 
116 | def interpolate_poses(pose_timestamps, abs_poses, requested_timestamps, origin_timestamp):
117 |     """Interpolate between absolute poses.
118 | 
119 |     Args:
120 |         pose_timestamps (list[int]): Timestamps of supplied poses. Must be in ascending order.
121 |         abs_poses (list[numpy.matrixlib.defmatrix.matrix]): SE3 matrices representing poses at the timestamps specified.
122 |         requested_timestamps (list[int]): Timestamps for which interpolated timestamps are required.
123 |         origin_timestamp (int): UNIX timestamp of origin frame. Poses will be reported relative to this frame.
124 | 
125 |     Returns:
126 |         list[numpy.matrixlib.defmatrix.matrix]: SE3 matrix representing interpolated pose for each requested timestamp.
127 | 
128 |     Raises:
129 |         ValueError: if pose_timestamps and abs_poses are not the same length
130 |         ValueError: if pose_timestamps is not in ascending order
131 | 
132 |     """
133 |     requested_timestamps.insert(0, origin_timestamp)
134 |     requested_timestamps = np.array(requested_timestamps)
135 |     pose_timestamps = np.array(pose_timestamps)
136 | 
137 |     if len(pose_timestamps) != len(abs_poses):
138 |         raise ValueError('Must supply same number of timestamps as poses')
139 | 
140 |     abs_quaternions = np.zeros((4, len(abs_poses)))
141 |     abs_positions = np.zeros((3, len(abs_poses)))
142 |     for i, pose in enumerate(abs_poses):
143 |         if i > 0 and pose_timestamps[i-1] >= pose_timestamps[i]:
144 |             raise ValueError('Pose timestamps must be in ascending order')
145 | 
146 |         abs_quaternions[:, i] = so3_to_quaternion(pose[0:3, 0:3])
147 |         abs_positions[:, i] = np.ravel(pose[0:3, 3])
148 | 
149 |     upper_indices = [bisect.bisect(pose_timestamps, pt) for pt in requested_timestamps]
150 |     lower_indices = [u - 1 for u in upper_indices]
151 | 
152 |     if max(upper_indices) >= len(pose_timestamps):
153 |         upper_indices = [min(i, len(pose_timestamps) - 1) for i in upper_indices]
154 | 
155 |     fractions = (requested_timestamps - pose_timestamps[lower_indices]) / \
156 |                 (pose_timestamps[upper_indices] - pose_timestamps[lower_indices])
157 | 
158 |     quaternions_lower = abs_quaternions[:, lower_indices]
159 |     quaternions_upper = abs_quaternions[:, upper_indices]
160 | 
161 |     d_array = (quaternions_lower * quaternions_upper).sum(0)
162 | 
163 |     linear_interp_indices = np.nonzero(d_array >= 1)
164 |     sin_interp_indices = np.nonzero(d_array < 1)
165 | 
166 |     scale0_array = np.zeros(d_array.shape)
167 |     scale1_array = np.zeros(d_array.shape)
168 | 
169 |     scale0_array[linear_interp_indices] = 1 - fractions[linear_interp_indices]
170 |     scale1_array[linear_interp_indices] = fractions[linear_interp_indices]
171 | 
172 |     theta_array = np.arccos(np.abs(d_array[sin_interp_indices]))
173 | 
174 |     scale0_array[sin_interp_indices] = \
175 |         np.sin((1 - fractions[sin_interp_indices]) * theta_array) / np.sin(theta_array)
176 |     scale1_array[sin_interp_indices] = \
177 |         np.sin(fractions[sin_interp_indices] * theta_array) / np.sin(theta_array)
178 | 
179 |     negative_d_indices = np.nonzero(d_array < 0)
180 |     scale1_array[negative_d_indices] = -scale1_array[negative_d_indices]
181 | 
182 |     quaternions_interp = np.tile(scale0_array, (4, 1)) * quaternions_lower \
183 |                          + np.tile(scale1_array, (4, 1)) * quaternions_upper
184 | 
185 |     positions_lower = abs_positions[:, lower_indices]
186 |     positions_upper = abs_positions[:, upper_indices]
187 | 
188 |     positions_interp = np.multiply(np.tile((1 - fractions), (3, 1)), positions_lower) \
189 |                        + np.multiply(np.tile(fractions, (3, 1)), positions_upper)
190 | 
191 |     poses_mat = ml.zeros((4, 4 * len(requested_timestamps)))
192 | 
193 |     poses_mat[0, 0::4] = 1 - 2 * np.square(quaternions_interp[2, :]) - \
194 |                          2 * np.square(quaternions_interp[3, :])
195 |     poses_mat[0, 1::4] = 2 * np.multiply(quaternions_interp[1, :], quaternions_interp[2, :]) - \
196 |                          2 * np.multiply(quaternions_interp[3, :], quaternions_interp[0, :])
197 |     poses_mat[0, 2::4] = 2 * np.multiply(quaternions_interp[1, :], quaternions_interp[3, :]) + \
198 |                          2 * np.multiply(quaternions_interp[2, :], quaternions_interp[0, :])
199 | 
200 |     poses_mat[1, 0::4] = 2 * np.multiply(quaternions_interp[1, :], quaternions_interp[2, :]) \
201 |                          + 2 * np.multiply(quaternions_interp[3, :], quaternions_interp[0, :])
202 |     poses_mat[1, 1::4] = 1 - 2 * np.square(quaternions_interp[1, :]) \
203 |                          - 2 * np.square(quaternions_interp[3, :])
204 |     poses_mat[1, 2::4] = 2 * np.multiply(quaternions_interp[2, :], quaternions_interp[3, :]) - \
205 |                          2 * np.multiply(quaternions_interp[1, :], quaternions_interp[0, :])
206 | 
207 |     poses_mat[2, 0::4] = 2 * np.multiply(quaternions_interp[1, :], quaternions_interp[3, :]) - \
208 |                          2 * np.multiply(quaternions_interp[2, :], quaternions_interp[0, :])
209 |     poses_mat[2, 1::4] = 2 * np.multiply(quaternions_interp[2, :], quaternions_interp[3, :]) + \
210 |                          2 * np.multiply(quaternions_interp[1, :], quaternions_interp[0, :])
211 |     poses_mat[2, 2::4] = 1 - 2 * np.square(quaternions_interp[1, :]) - \
212 |                          2 * np.square(quaternions_interp[2, :])
213 | 
214 |     poses_mat[0:3, 3::4] = positions_interp
215 |     poses_mat[3, 3::4] = 1
216 | 
217 |     poses_mat = np.linalg.solve(poses_mat[0:4, 0:4], poses_mat)
218 | 
219 |     poses_out = [0] * (len(requested_timestamps) - 1)
220 |     for i in range(1, len(requested_timestamps)):
221 |         poses_out[i - 1] = poses_mat[0:4, i * 4:(i + 1) * 4]
222 | 
223 |     return poses_out
224 | 


--------------------------------------------------------------------------------
/eval_utils.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is part of wganvo.
  3 | #
  4 | # Copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  5 | # For more information see <https://github.com/CIFASIS/wganvo>
  6 | #
  7 | # wganvo is free software: you can redistribute it and/or modify
  8 | # it under the terms of the GNU General Public License as published by
  9 | # the Free Software Foundation, either version 3 of the License, or
 10 | # (at your option) any later version.
 11 | #
 12 | # wganvo is distributed in the hope that it will be useful,
 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | # GNU General Public License for more details.
 16 | #
 17 | # You should have received a copy of the GNU General Public License
 18 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 19 | 
 20 | import sys, os, inspect
 21 | 
 22 | import numpy as np
 23 | import transformations
 24 | import random
 25 | import matplotlib
 26 | 
 27 | matplotlib.use('Agg')
 28 | import matplotlib.pyplot as plt
 29 | import trajectory
 30 | 
 31 | 
 32 | def infer_relative_poses(sess, dataset, batch_size, images_placeholder, outputs,
 33 |                          targets_placeholder, train_mode=None):
 34 |     steps_per_epoch = dataset.num_examples // batch_size
 35 |     num_examples = steps_per_epoch * batch_size
 36 |     relative_poses_prediction = np.empty((num_examples, 3, 4))
 37 |     relative_poses_target = np.empty((num_examples, 3, 4))
 38 |     standardize_targets = False
 39 |     #        rmse, mse, norm_mse = do_evaluation(sess,outputs,images_placeholder, targets_placeholder, dataset, batch_size, True)
 40 |     for step in xrange(steps_per_epoch):
 41 |         feed_dict = fill_feed_dict(dataset, images_placeholder, targets_placeholder, feed_with_batch=True,
 42 |                                    batch_size=batch_size, shuffle=False, standardize_targets=standardize_targets)
 43 |         if train_mode is not None:
 44 |             feed_dict[train_mode] = False
 45 |         prediction_batch, target_batch = sess.run([outputs, targets_placeholder], feed_dict=feed_dict)
 46 |         batch_relative_poses_pred = get_transformation_matrices(dataset, batch_size,
 47 |                                                                 prediction_batch,
 48 |                                                                 standardize_targets)
 49 |         batch_relative_poses_target = get_transformation_matrices(dataset, batch_size,
 50 |                                                                   target_batch,
 51 |                                                                   standardize_targets)
 52 |         init = batch_size * step
 53 |         end = batch_size * (step + 1)
 54 |         relative_poses_prediction[init:end] = batch_relative_poses_pred
 55 |         relative_poses_target[init:end] = batch_relative_poses_target
 56 |     if train_mode is not None:
 57 |         print("Train Mode: " + str(sess.run(train_mode, feed_dict)))
 58 |     return relative_poses_prediction, relative_poses_target
 59 | 
 60 | 
 61 | def get_absolute_poses(relative_poses, inv=False):
 62 |     current = np.matrix(np.identity(4))
 63 |     num_examples = relative_poses.shape[0]
 64 |     absolute_poses = np.empty(shape=relative_poses.shape)
 65 |     for i in xrange(num_examples):
 66 |         T = np.matrix(np.identity(4))
 67 |         T[0:3, :] = relative_poses[i]
 68 |         if inv:
 69 |             T = np.linalg.inv(T)
 70 |         current = current * T
 71 |         absolute_poses[i] = current[0:3, :]
 72 |     return absolute_poses
 73 | 
 74 | 
 75 | def get_transformation_matrices(dataset, batch_size, batch,
 76 |                                 standardize_targets):
 77 |     transformation_matrices = np.empty((batch_size, 3, 4))
 78 |     # poses_target = np.empty((batch_size, 3, 4))
 79 |     for i in xrange(batch_size):
 80 |         transformation = batch[i]
 81 |         # Original scale
 82 |         if standardize_targets:
 83 |             transformation = transformation * dataset.targets_std + dataset.targets_mean
 84 | 
 85 |         # prediction = prediction.reshape(3,4)
 86 |         # pred_transformation = inverse_intrinsic_matrix * prediction
 87 |         # u,_ = linalg.polar(pred_transformation[0:3,0:3])
 88 |         # pred_transf_correction = np.empty((3,4))
 89 |         # pred_transf_correction[0:3, 0:3] = u
 90 |         # pred_transf_correction[0:3, 3] = pred_transformation[0:3,3].transpose()
 91 | 
 92 |         # target = target_batch[i]
 93 |         # if standardize_targets:
 94 |         #    target = target * dataset.targets_std + dataset.targets_mean
 95 |         # target = target.reshape(3,4)
 96 |         # target_transformation = inverse_intrinsic_matrix * target
 97 |         # poses_prediction[i] = pred_transf_correction.reshape(12)
 98 |         # poses_target[i] = target_transformation.reshape(12)
 99 | 
100 |         transformation_matrices[i] = vector_to_transformation_mtx(transformation)
101 |         # poses_target[i] = x_q_to_mtx(target)
102 | 
103 |     return transformation_matrices
104 | 
105 | 
106 | def vector_to_transformation_mtx(xq):
107 |     mtx = transformations.quaternion_matrix(xq[3:])
108 |     mtx[0:3, 3] = xq[0:3]
109 |     out = mtx[0:3, :]
110 |     return out  # .reshape(12)
111 | 
112 | 
113 | def fill_feed_dict(data_set, images_pl, labels_pl, points_pl=None, feed_with_batch=False, batch_size=None, shuffle=True,
114 |                    standardize_targets=False, fake_data=False):
115 |     """Fills the feed_dict for training the given step or for evaluating the entire dataset.
116 |     A feed_dict takes the form of:
117 |     feed_dict = {
118 |         <placeholder>: <tensor of values to be passed for placeholder>,
119 |         ....
120 |     }
121 |     Args:
122 |       data_set: The set of images and labels, from input_data.read_data_sets()
123 |       images_pl: The images placeholder, from placeholder_inputs().
124 |       labels_pl: The labels placeholder, from placeholder_inputs().
125 |     Returns:
126 |       feed_dict: The feed dictionary mapping from placeholders to values.
127 |     """
128 |     # Create the feed_dict for the placeholders filled with the next
129 |     # `batch size` examples.
130 |     if (feed_with_batch):
131 |         if (batch_size is None):
132 |             raise ValueError("batch_size not specified")
133 |         images_feed, labels_feed, points = data_set.next_batch(batch_size,
134 |                                                        fake_data,
135 |                                                        shuffle=shuffle,
136 |                                                        standardize_targets=standardize_targets)
137 |     # Create the feed_dict for the placeholders filled with the entire dataset
138 |     else:
139 |         images_feed = data_set.images
140 |         labels_feed = data_set.labels
141 |         points = data_set.points
142 | 
143 |     feed_dict = {
144 |         images_pl: images_feed,
145 |         labels_pl: labels_feed,
146 |         #points_pl: points,
147 |     }
148 |     if points_pl is not None:
149 |         feed_dict[points_pl] = points
150 |     return feed_dict
151 | 
152 | 
153 | def plot_frames_vs_abs_distance(relative_poses_prediction, relative_poses_target, dataset, output_dir, save_txt=False,
154 |                                 plot=False, samples=30):
155 |     groups = dataset.groups
156 |     datasets_idxs = {}
157 |     for i, _ in enumerate(relative_poses_prediction):
158 |         group = str(groups[i])
159 |         if group in datasets_idxs:
160 |             datasets_idxs[group].append(i)
161 |         else:
162 |             datasets_idxs[group] = [i]
163 |     # acc_rmse_tr = 0.
164 |     # acc_rmse_rot = 0.
165 |     X_axis = []
166 |     Y_axis = []
167 |     for grp, idxs in datasets_idxs.iteritems():
168 |         relative_prediction = relative_poses_prediction[idxs]
169 |         relative_target = relative_poses_target[idxs]
170 |         max_num_of_frames = len(relative_prediction)
171 |         assert max_num_of_frames == len(relative_target)
172 |         # Get SAMPLES sub-trajectories from sequence
173 |         for i in xrange(samples):
174 |             # Random sub-trajectory
175 |             N = random.randint(1, max_num_of_frames)
176 |             start = random.randint(0, max_num_of_frames - N)
177 |             traslation_error = get_traslation_error(relative_prediction[start:start + N],
178 |                                                     relative_target[start:start + N])
179 |             assert len(traslation_error) == N
180 |             d = traslation_error[-1]
181 |             X_axis.append(N)
182 |             Y_axis.append(d)
183 |             if save_txt:
184 |                 np.savetxt(os.path.join(output_dir, 'abs_poses_target_{}.txt'.format(grp)),
185 |                            get_absolute_poses(relative_target).reshape(-1, 12))
186 |                 np.savetxt(os.path.join(output_dir, 'abs_poses_prediction_{}.txt'.format(grp)),
187 |                            get_absolute_poses(relative_prediction).reshape(-1, 12))
188 |                 # print("Num of frames")
189 |                 # print(N)
190 |                 # print("d")
191 |                 # print(d)
192 | 
193 |                 # if save_txt:
194 |                 #    np.savetxt(os.path.join(output_dir, 'orig_relative_target.txt'), relative_poses_target.reshape(-1, 12))
195 |                 #    np.savetxt(os.path.join(output_dir, 'orig_relative_prediction.txt'), relative_poses_prediction.reshape(-1, 12))
196 |                 # rmse_tr, rmse_rot = calc_trajectory_rmse(relative_poses_prediction[idxs], relative_poses_target[idxs])
197 |                 # print('*' * 50)
198 |                 # print(grp, len(idxs))
199 |                 # print(rmse_tr, rmse_rot)
200 |                 # acc_rmse_tr += rmse_tr
201 |                 # acc_rmse_rot += rmse_rot
202 |     if plot:
203 |         fig, ax = plt.subplots()
204 |         ax.plot(X_axis, Y_axis, 'r.')
205 |         fig.savefig(os.path.join(output_dir, 'f_vs_d.png'))
206 |     return X_axis, Y_axis
207 |     # return acc_rmse_tr / len(datasets_idxs), acc_rmse_rot / len(datasets_idxs)
208 | 
209 | 
210 | def get_traslation_error(relative_poses_prediction, relative_poses_target):
211 |     absolute_poses_prediction = get_absolute_poses(relative_poses_prediction).reshape(-1, 12)
212 |     absolute_poses_target = get_absolute_poses(relative_poses_target).reshape(-1, 12)
213 |     poses_prediction = se3_pose_list(absolute_poses_prediction)
214 |     poses_target = se3_pose_list(absolute_poses_target)
215 |     poses_prediction = trajectory.PosePath3D(poses_se3=poses_prediction)
216 |     poses_target = trajectory.PosePath3D(poses_se3=poses_target)
217 |     E_tr = poses_prediction.positions_xyz - poses_target.positions_xyz
218 |     traslation_error = [np.linalg.norm(E_i) for E_i in E_tr]
219 |     return traslation_error
220 | 
221 | 
222 | def se3_pose_list(kitti_format):
223 |     return [np.array([[r[0], r[1], r[2], r[3]],
224 |                       [r[4], r[5], r[6], r[7]],
225 |                       [r[8], r[9], r[10], r[11]],
226 |                       [0, 0, 0, 1]]) for r in kitti_format]
227 | 
228 | 
229 | def our_metric_evaluation(relative_prediction, relative_target, test_dataset, curr_fold_log_path,
230 |                           save_txt):
231 |     frames, abs_distance = plot_frames_vs_abs_distance(relative_prediction, relative_target, test_dataset,
232 |                                                        curr_fold_log_path, save_txt=save_txt)
233 |     frames = np.array(frames)
234 |     abs_distance = np.array(abs_distance)
235 |     te_eval = np.mean(np.square(np.log(abs_distance) / np.log(frames + 1)))
236 |     return te_eval
237 | 


--------------------------------------------------------------------------------
/vgg_trainable/vgg.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file was originally part of https://github.com/machrisaa/tensorflow-vgg
  3 | #
  4 | # Modifications copyright (C) 2019 Javier Cremona (CIFASIS-CONICET)
  5 | # For more information see <https://github.com/CIFASIS/wganvo>
  6 | #
  7 | # wganvo is free software: you can redistribute it and/or modify
  8 | # it under the terms of the GNU General Public License as published by
  9 | # the Free Software Foundation, either version 3 of the License, or
 10 | # (at your option) any later version.
 11 | #
 12 | # wganvo is distributed in the hope that it will be useful,
 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15 | # GNU General Public License for more details.
 16 | #
 17 | # You should have received a copy of the GNU General Public License
 18 | # along with wganvo. If not, see <http://www.gnu.org/licenses/>.
 19 | #
 20 | 
 21 | import tensorflow as tf
 22 | 
 23 | import numpy as np
 24 | from functools import reduce
 25 | 
 26 | 
 27 | # VGG_MEAN = [103.939, 116.779, 123.68]
 28 | 
 29 | 
 30 | class Vgg19:
 31 |     """
 32 |     A trainable version VGG19.
 33 |     """
 34 | 
 35 |     def __init__(self, width, height, vgg19_npy_path=None, trainable=True, dropout=0.5, activation_function="relu"):
 36 |         if vgg19_npy_path is not None:
 37 |             self.data_dict = np.load(vgg19_npy_path, encoding='latin1').item()
 38 |         else:
 39 |             self.data_dict = None
 40 | 
 41 |         self.var_dict = {}
 42 |         self.trainable = trainable
 43 |         self.dropout = dropout
 44 |         self.activation_function = activation_function
 45 |         self.width = width
 46 |         self.height = height
 47 | 
 48 |     def build(self, images, train_mode=None, pooling_type="max"):
 49 |         """
 50 |         load variable from npy to build the VGG
 51 |         :param images: [batch, height, width, 1] (usually a placeholder)
 52 |         :param train_mode: a bool tensor, usually a placeholder: if True, dropout will be turned on
 53 |         """
 54 | 
 55 |         self.conv1_1 = self.conv_layer(images, 2, 64, "conv1_1")
 56 |         self.conv1_2 = self.conv_layer(self.conv1_1, 64, 64, "conv1_2")
 57 |         self.pool1 = self.pooling(self.conv1_2, 'pool1', pooling_type=pooling_type)
 58 | 
 59 |         self.conv2_1 = self.conv_layer(self.pool1, 64, 128, "conv2_1")
 60 |         self.conv2_2 = self.conv_layer(self.conv2_1, 128, 128, "conv2_2")
 61 |         self.pool2 = self.pooling(self.conv2_2, 'pool2', pooling_type=pooling_type)
 62 | 
 63 |         self.conv3_1 = self.conv_layer(self.pool2, 128, 256, "conv3_1")
 64 |         self.conv3_2 = self.conv_layer(self.conv3_1, 256, 256, "conv3_2")
 65 |         self.conv3_3 = self.conv_layer(self.conv3_2, 256, 256, "conv3_3")
 66 |         self.conv3_4 = self.conv_layer(self.conv3_3, 256, 256, "conv3_4")
 67 |         self.pool3 = self.pooling(self.conv3_4, 'pool3', pooling_type=pooling_type)
 68 | 
 69 |         self.conv4_1 = self.conv_layer(self.pool3, 256, 512, "conv4_1")
 70 |         self.conv4_2 = self.conv_layer(self.conv4_1, 512, 512, "conv4_2")
 71 |         self.conv4_3 = self.conv_layer(self.conv4_2, 512, 512, "conv4_3")
 72 |         self.conv4_4 = self.conv_layer(self.conv4_3, 512, 512, "conv4_4")
 73 |         self.pool4 = self.pooling(self.conv4_4, 'pool4', pooling_type=pooling_type)
 74 | 
 75 |         self.conv5_1 = self.conv_layer(self.pool4, 512, 512, "conv5_1")
 76 |         self.conv5_2 = self.conv_layer(self.conv5_1, 512, 512, "conv5_2")
 77 |         self.conv5_3 = self.conv_layer(self.conv5_2, 512, 512, "conv5_3")
 78 |         self.conv5_4 = self.conv_layer(self.conv5_3, 512, 512, "conv5_4")
 79 |         self.pool5 = self.pooling(self.conv5_4, 'pool5', pooling_type=pooling_type)
 80 | 
 81 |         fc_in_size = ((self.width // (2 ** 5)) * (self.height // (2 ** 5))) * 512 # (las conv_layer mantienen el ancho y alto, y los max_pool lo reducen a la mitad. Hay 5 max pool)
 82 |         self.fc6 = self.fc_layer(self.pool5, fc_in_size, 4096, "fc6")
 83 |         self.relu6 = self.activation_function_tensor(self.fc6, act_function=self.activation_function)#tf.nn.relu(self.fc6)
 84 |         if train_mode is not None:
 85 |             print("Train Mode placeholder")
 86 |             self.relu6 = tf.cond(train_mode, lambda: tf.nn.dropout(self.relu6, self.dropout), lambda: self.relu6)
 87 |         elif self.trainable:
 88 |             print("Not Train Mode placeholder")
 89 |             self.relu6 = tf.nn.dropout(self.relu6, self.dropout)
 90 | 
 91 |         self.fc7 = self.fc_layer(self.relu6, 4096, 4096, "fc7")
 92 |         self.relu7 = self.activation_function_tensor(self.fc7, act_function=self.activation_function)#tf.nn.relu(self.fc7)
 93 |         if train_mode is not None:
 94 |             self.relu7 = tf.cond(train_mode, lambda: tf.nn.dropout(self.relu7, self.dropout), lambda: self.relu7)
 95 |         elif self.trainable:
 96 |             self.relu7 = tf.nn.dropout(self.relu7, self.dropout)
 97 | 
 98 |         self.fc8 = self.fc_layer(self.relu7, 4096, 7, "fc8")
 99 |         quaternions = self.fc8[:, 3:7]
100 |         quaternions_norm = tf.norm(quaternions, axis=1)
101 |         unit_quaternions = quaternions / tf.reshape(quaternions_norm, (-1, 1))
102 |         self.fc8 = tf.concat([self.fc8[:, :3], unit_quaternions], 1)    
103 |         #self.prob = tf.nn.softmax(self.fc8, name="prob")
104 | 
105 |         self.data_dict = None
106 |         return self.fc8
107 | 
108 |     def build_pruned_vgg(self, images, train_mode=None):
109 |         """
110 |         load variable from npy to build the VGG
111 |         :param images: [batch, height, width, 1] (usually a placeholder)
112 |         :param train_mode: a bool tensor, usually a placeholder: if True, dropout will be turned on
113 |         """
114 | 
115 |         self.conv1_1 = self.conv_layer(images, 2, 64, "conv1_1")
116 |         self.conv1_2 = self.conv_layer(self.conv1_1, 64, 64, "conv1_2")
117 |         self.pool1 = self.pooling(self.conv1_2, 'pool1')
118 | 
119 |         self.conv2_1 = self.conv_layer(self.pool1, 64, 128, "conv2_1")
120 |         self.conv2_2 = self.conv_layer(self.conv2_1, 128, 128, "conv2_2")
121 |         self.pool2 = self.pooling(self.conv2_2, 'pool2')
122 | 
123 |         self.conv3_1 = self.conv_layer(self.pool2, 128, 256, "conv3_1")
124 |         self.conv3_2 = self.conv_layer(self.conv3_1, 256, 256, "conv3_2")
125 |         self.pool3 = self.pooling(self.conv3_2, 'pool3')
126 | 
127 |         self.conv4_1 = self.conv_layer(self.pool3, 256, 512, "conv4_1")
128 |         self.conv4_2 = self.conv_layer(self.conv4_1, 512, 512, "conv4_2")
129 |         self.pool4 = self.pooling(self.conv4_2, 'pool4')
130 | 
131 |         self.conv5_1 = self.conv_layer(self.pool4, 512, 512, "conv5_1")
132 |         self.conv5_2 = self.conv_layer(self.conv5_1, 512, 512, "conv5_2")
133 |         self.pool5 = self.pooling(self.conv5_2, 'pool5')
134 | 
135 |         fc_in_size = ((self.width // (2 ** 5)) * (self.height // (2 ** 5))) * 512  # (las conv_layer mantienen el ancho y alto, y los max_pool lo reducen a la mitad. Hay 5 max pool)
136 |         self.fc_in = tf.reshape(self.pool5, [-1, fc_in_size])
137 |         if train_mode is not None:
138 |             self.fc_in = tf.cond(train_mode, lambda: tf.nn.dropout(self.fc_in, self.dropout), lambda: self.fc_in)
139 |         elif self.trainable:
140 |             self.fc_in = tf.nn.dropout(self.fc_in, self.dropout)
141 | 
142 |         self.output = self.fc_layer(self.fc_in, fc_in_size, 12, "fc8")
143 |         self.data_dict = None
144 |         return self.output
145 | 
146 |     def build_non_deep_nn(self, images):
147 |         self.conv1_1 = self.conv_layer(images, 2, 32, "conv1_1")
148 |         self.conv1_2 = self.conv_layer(self.conv1_1, 32, 32, "conv1_2")
149 |         self.pool1 = self.max_pool(self.conv1_2, 'pool1')
150 |         fc_in_size = ((self.width // 2) * (self.height // 2)) * 32
151 |         print(fc_in_size)
152 |         self.fc = self.fc_layer(self.pool1, fc_in_size, 12, "fc")
153 |         self.data_dict = None
154 |         return self.fc
155 | 
156 |     def pooling(self, bottom, name, pooling_type="max"):
157 |         if pooling_type == "avg":
158 |             return self.avg_pool(bottom, name)
159 |         return self.max_pool(bottom, name)
160 | 
161 |     def avg_pool(self, bottom, name):
162 |         print("Using avg pool")
163 |         return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
164 | 
165 |     def max_pool(self, bottom, name):
166 |         print("Using max pool")
167 |         return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
168 | 
169 |     def activation_function_tensor(self, features, act_function="relu"):
170 |         if act_function == "leaky_relu":
171 |             print("Using leaky relu")
172 |             return tf.nn.leaky_relu(features)
173 |         print("Using relu")
174 |         return tf.nn.relu(features)
175 | 
176 |     def conv_layer(self, bottom, in_channels, out_channels, name):
177 |         with tf.variable_scope(name):
178 |             filt, conv_biases = self.get_conv_var(3, in_channels, out_channels, name)
179 | 
180 |             conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
181 |             bias = tf.nn.bias_add(conv, conv_biases)
182 |             act_funct = self.activation_function_tensor(bias, act_function=self.activation_function)
183 | 
184 |             return act_funct
185 | 
186 |     def fc_layer(self, bottom, in_size, out_size, name):
187 |         with tf.variable_scope(name):
188 |             weights, biases = self.get_fc_var(in_size, out_size, name)
189 | 
190 |             x = tf.reshape(bottom, [-1, in_size])
191 |             fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
192 | 
193 |             return fc
194 | 
195 |     def get_conv_var(self, filter_size, in_channels, out_channels, name):
196 |         initializer = tf.contrib.layers.xavier_initializer()
197 |         initial_value = initializer([filter_size, filter_size, in_channels, out_channels])
198 |         # initial_value = tf.truncated_normal([filter_size, filter_size, in_channels, out_channels], 0.0, 0.001)
199 |         filters = self.get_var(initial_value, name, 0, name + "_filters")
200 | 
201 |         initial_value = initializer([out_channels])
202 |         # initial_value = tf.truncated_normal([out_channels], .0, .001)
203 |         biases = self.get_var(initial_value, name, 1, name + "_biases")
204 | 
205 |         return filters, biases
206 | 
207 |     def get_fc_var(self, in_size, out_size, name):
208 |         initializer = tf.contrib.layers.xavier_initializer()
209 |         # initial_value = tf.truncated_normal([in_size, out_size], 0.0, 0.001)
210 |         initial_value = initializer([in_size, out_size])
211 |         weights = self.get_var(initial_value, name, 0, name + "_weights")
212 | 
213 |         # initial_value = tf.truncated_normal([out_size], .0, .001)
214 |         initial_value = initializer([out_size])
215 |         biases = self.get_var(initial_value, name, 1, name + "_biases")
216 | 
217 |         return weights, biases
218 | 
219 |     def get_var(self, initial_value, name, idx, var_name):
220 |         if self.data_dict is not None and name in self.data_dict:
221 |             value = self.data_dict[name][idx]
222 |         else:
223 |             value = initial_value
224 | 
225 |         if self.trainable:
226 |             var = tf.Variable(value, name=var_name)
227 |         else:
228 |             var = tf.constant(value, dtype=tf.float32, name=var_name)
229 | 
230 |         self.var_dict[(name, idx)] = var
231 | 
232 |         # print var_name, var.get_shape().as_list()
233 |         assert var.get_shape() == initial_value.get_shape()
234 | 
235 |         return var
236 | 
237 |     def save_npy(self, sess, npy_path="./vgg19-save.npy"):
238 |         assert isinstance(sess, tf.Session)
239 | 
240 |         data_dict = {}
241 | 
242 |         for (name, idx), var in list(self.var_dict.items()):
243 |             var_out = sess.run(var)
244 |             if name not in data_dict:
245 |                 data_dict[name] = {}
246 |             data_dict[name][idx] = var_out
247 | 
248 |         np.save(npy_path, data_dict)
249 |         print(("file saved", npy_path))
250 |         return npy_path
251 | 
252 |     def get_var_count(self):
253 |         count = 0
254 |         for v in list(self.var_dict.values()):
255 |             count += reduce(lambda x, y: x * y, v.get_shape().as_list())
256 |         return count
257 | 


--------------------------------------------------------------------------------