├── .gitignore
├── .gitmodules
├── LICENSE.md
├── README.md
├── assets
    └── images
    │   ├── stereolabel.jpg
    │   └── valve.jpg
├── config
    ├── calibration.yaml
    ├── cups.json
    └── valve.json
├── notebooks
    ├── DataloadingTest.ipynb
    ├── ModelBench.ipynb
    ├── OverfittingTest.ipynb
    └── keypoint_debug.ipynb
├── perception
    ├── __init__.py
    ├── constants.py
    ├── corner_net_lite
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── README.md
    │   ├── __init__.py
    │   ├── conda_packagelist.txt
    │   ├── configs
    │   │   ├── CornerNet-multi_scale.json
    │   │   ├── CornerNet.json
    │   │   ├── CornerNet_Saccade.json
    │   │   └── CornerNet_Squeeze.json
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── config.py
    │   │   ├── dbs
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── coco.py
    │   │   │   └── detection.py
    │   │   ├── detectors.py
    │   │   ├── external
    │   │   │   ├── .gitignore
    │   │   │   ├── Makefile
    │   │   │   ├── __init__.py
    │   │   │   ├── bbox.cpython-37m-x86_64-linux-gnu.so
    │   │   │   ├── bbox.pyx
    │   │   │   ├── nms.cpython-37m-x86_64-linux-gnu.so
    │   │   │   ├── nms.pyx
    │   │   │   └── setup.py
    │   │   ├── models
    │   │   │   ├── CornerNet.py
    │   │   │   ├── CornerNet_Saccade.py
    │   │   │   ├── CornerNet_Squeeze.py
    │   │   │   ├── __init__.py
    │   │   │   └── py_utils
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── _cpools
    │   │   │   │       ├── .gitignore
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── setup.py
    │   │   │   │       └── src
    │   │   │   │       │   ├── bottom_pool.cpp
    │   │   │   │       │   ├── left_pool.cpp
    │   │   │   │       │   ├── right_pool.cpp
    │   │   │   │       │   └── top_pool.cpp
    │   │   │   │   ├── data_parallel.py
    │   │   │   │   ├── losses.py
    │   │   │   │   ├── modules.py
    │   │   │   │   ├── scatter_gather.py
    │   │   │   │   └── utils.py
    │   │   ├── nnet
    │   │   │   ├── __init__.py
    │   │   │   └── py_factory.py
    │   │   ├── paths.py
    │   │   ├── sample
    │   │   │   ├── __init__.py
    │   │   │   ├── cornernet.py
    │   │   │   ├── cornernet_saccade.py
    │   │   │   └── utils.py
    │   │   ├── test
    │   │   │   ├── __init__.py
    │   │   │   ├── cornernet.py
    │   │   │   └── cornernet_saccade.py
    │   │   ├── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── timer.py
    │   │   │   └── tqdm.py
    │   │   └── vis_utils.py
    │   ├── demo.jpg
    │   ├── demo.py
    │   ├── evaluate.py
    │   └── train.py
    ├── datasets
    │   ├── __init__.py
    │   ├── utils.py
    │   └── video.py
    ├── loss.py
    ├── models.py
    ├── pipeline.py
    └── utils
    │   ├── __init__.py
    │   ├── camera_utils.py
    │   ├── clustering_utils.py
    │   ├── linalg.py
    │   ├── ros.py
    │   └── timer.py
├── requirements.txt
├── scripts
    ├── collect_bags.py
    ├── constants.py
    ├── encode_bag.py
    ├── eval_model.py
    ├── label.py
    ├── make_video.sh
    ├── package_model.py
    ├── show_keypoints.py
    └── train.py
├── setup.py
└── test
    ├── __init__.py
    ├── test_pipeline.py
    ├── test_video_dataset.py
    └── utils
        ├── __init__.py
        └── test_ros.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .pyc
3 | *.egg-info
4 | 
5 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "perception/corner_net_lite"]
2 | 	path = perception/corner_net_lite
3 | 	url = https://github.com/princeton-vl/CornerNet-Lite
4 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Object Keypoint Tracking
 2 | 
 3 | This repository contains a toolkit for collecting, labeling and tracking object keypoints. Object keypoints are semantic points in an object's coordinate frame.
 4 | 
 5 | The project allows collecting images from multiple viewpoints using a robot with a wrist mounted camera. These image sequences can then be labeled using an easy to use user interface, StereoLabel.
 6 | 
 7 | ![StereoLabel keypoint labeling](assets/images/stereolabel.jpg)
 8 | 
 9 | Once the images are labeled, a model can be learned to detect keypoints in the images and compute 3D keypoints in the camera's coordinate frame.
10 | 
11 | ## Installation
12 | 
13 | External Dependencies:
14 | - [HUD](https://github.com/ethz-asl/hud)
15 | - ROS melodic/noetic
16 | 
17 | Install HUD. Then install dependencies with `pip install -r requirements.txt` and finally install the package using `pip3 install -e .`.
18 | 
19 | ## Usage
20 | 
21 | Here we describe the process we used to arrive at our labeled datasets and learned models.
22 | 
23 | ### Calibration and setup
24 | 
25 | First, calibrate your camera and obtain a hand-eye-calibration. Calibrating the camera can be done using [Kalibr](https://github.com/ethz-asl/kalibr). Hand-eye-calibration can be done with the [ethz-asl/hand_eye_calibration](https://github.com/ethz-asl/hand_eye_calibration) or [easy_handeye](https://github.com/IFL-CAMP/easy_handeye) packages.
26 | 
27 | The software currently assumes that the Kalibr `pinhole-equi` camera model was used when calibrating the camera.
28 | 
29 | Kalibr will spit out a yaml file like the one at `config/calibration.yaml`. This should be passed in as the `--calibration` argument for `label.py` and other scripts.
30 | 
31 | Once you have obtained the hand-eye calibration, configure your robot description so that the tf tree correctly is able to transform poses from the base frame to the camera optical frame.
32 | 
33 | ### Collecting data
34 | 
35 | The script `scripts/collect_bags.py` is a helper program to assist in collecting data. It will use [rosbag](http://wiki.ros.org/rosbag) to record the camera topics and and transform messages.
36 | 
37 | Run it with `python3 scripts/collect_bags.py --out <path-to-bag-output-folder>`.
38 | 
39 | Press enter to start recording a new sequence. Recording will start after a 5 second grace period, after which the topics will be recorded for 30 seconds. During the 30 seconds, slowly guide the robot arm to different viewpoints observing your target objects.
40 | 
41 | ### Encoding data
42 | 
43 | Since rosbag is not a very convenient or efficient format for our purposes, we encode the data into a format that is easier to work with and uses up less disk space. This is done using the script `scripts/encode_bag.py`.
44 | 
45 | Run it with `python3 scripts/encode_bags.py --bags <path-to-bag-output-folder> --out <path-to-dataset-output> --calibration <path-to-kalibr-calibration.yaml>`.
46 | 
47 | ### Labeling data
48 | 
49 | ![Valve](assets/images/valve.jpg)
50 | 
51 | First decide how many keypoints you will use for your object class and what their configuration is. Write a keypoint configuration file, like `config/valve.json` and `config/cups.json`. For example, in the case of our valve above, we define four different keypoints, which are of two types. The first type is the center keypoint type and the second is the spoke keypoint type. For our valve, there are three spokes, so we write our keypoint configuration as:
52 | ```
53 | { "keypoint_config": [1, 3] }
54 | ```
55 | What this means, is that there will first be a keypoint of the first type and then three keypoints of the next type. Save this file for later.
56 | 
57 | StereoLabel can be launched with `python3 scripts/label.py <path-to-dataset-folder>`. To label keypoints, click on the keypoints in the same order in each image. Make sure to label the points consistent with the keypoint configuration that you defined, so that the keypoints end up on the right heatmaps downstream.
58 | 
59 | If you have multiple objects in the scene, it is important that you annotate one object at the time, sticking to the keypoint order, as the tool makes the assumption that one object's keypoints follow each other. The amount of keypoints you label should equal the amount of objects times the total number of keypoints per object.
60 | 
61 | Once you have labeled an equal number of points on the left and right image, points will be backprojected, so that you can make sure that everything is correctly configured and that you didn't accidentally label the points in the wrong order. The points are saved at the same time to a file `keypoints.json` in each scene's directory.
62 | 
63 | Here are some keyboard actions the tool supports:
64 | - Press `a` to change the left frame with a random frame from the current sequence.
65 | - Press `b` to change the right frame with a random frame from the current sequence.
66 | - Press `<tab>` to go to next sequence, after you labeled a sequence.
67 | 
68 | Switching frames is especially useful, if for example in one viewpoint a keypoint is occluded and it is hard to annotate accurately.
69 | 
70 | Once the points have been saved and backprojected, you can freely press `a` and `b` to swap out the frames to different ones in the sequence. It will project the 3D points back into 2D onto the new frames. You can check that the keypoints project nicely to each frame. If not, you likely misclicked, the viewpoints are too close to each other, there could be an issue with your intrinsics or hand-eye calibration or the camera poses are not accurate for some other reason.
71 | 
72 | ### Checking the data
73 | 
74 | Once all your sequences have been labeled, you can check that the labels are correct on all frames using `python scripts/show_keypoints.py <path-to-dataset-folder>`, which will play the images one by one and show the backprojected points.
75 | 
76 | ### Learning a model
77 | 
78 | First, download the weights for the CornerNet backbone model. This can be done from the [CornerNet repository](https://github.com/princeton-vl/CornerNet-Lite). We use the CornerNet-Squeeze model. Place the file at `models/corner_net.pkl`.
79 | 
80 | You can train a model with `python scripts/train.py --train <path-to-training-dataset> --val <path-to-validation-dataset>`. Where `--train` points to the directory containing your training scenes. `--val` points to the directory containing your validation scenes.
81 | 
82 | Once done, you can package a model with `python scripts/package_model.py --model lightning_logs/version_x/checkpoints/<checkpoint>.ckpt --out model.pt`
83 | 
84 | You can then run and check the metrics on a test set using `python scripts/eval_model.py <path-to-dataset> --model model.pt --keypoints <keypoint-config>`.
85 | 
86 | ## General tips
87 | 
88 | Here are some general tips that might be of use:
89 | - Collect data at something like 4-5 fps. Generally, frames that are super close to each other aren't that useful and you don't really need every single frame. I.e. configure your camera node to only publish image messages at that rate.
90 | - Increase the publishing rate of your `robot_state_publisher` node to something like 100 or 200.
91 | - Move your robot slowly when collecting the data such that the time synchronization between your camera and robot is not that big of a problem.
92 | - Keep the scenes reasonable.
93 | - Collect data in all the operating conditions in which you will want to be detecting keypoints at.
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/assets/images/stereolabel.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/assets/images/stereolabel.jpg


--------------------------------------------------------------------------------
/assets/images/valve.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/assets/images/valve.jpg


--------------------------------------------------------------------------------
/config/calibration.yaml:
--------------------------------------------------------------------------------
 1 | cam0:
 2 |   cam_overlaps: [1]
 3 |   camera_model: pinhole
 4 |   distortion_coeffs: [0.14655632604598726, 0.12297180523259119, -0.5214959677970255,
 5 |     0.5139323931960924]
 6 |   distortion_model: equidistant
 7 |   intrinsics: [698.556012835607, 699.1907430278537, 641.0077159827421, 368.1644084321484]
 8 |   resolution: [1280, 720]
 9 |   rostopic: /zedm/zed_node/left_raw/image_raw_color
10 | cam1:
11 |   T_cn_cnm1:
12 |   - [0.9999956816225143, 0.00029784085839669114, 0.002923700933251551, -0.062421684917401604]
13 |   - [-0.0002810221198796635, 0.9999834217813691, -0.005751277161941048, -0.0002341856117885678]
14 |   - [-0.002925365428825057, 0.005750430701121079, 0.9999791871753434, -5.759928320471004e-05]
15 |   - [0.0, 0.0, 0.0, 1.0]
16 |   cam_overlaps: [0]
17 |   camera_model: pinhole
18 |   distortion_coeffs: [0.22834286859634897, -0.5718130159249208, 1.6170727444831785,
19 |     -1.6613629469156743]
20 |   distortion_model: equidistant
21 |   intrinsics: [693.93451021037, 694.5577956988693, 640.5646956100311, 364.74017968589965]
22 |   resolution: [1280, 720]
23 |   rostopic: /zedm/zed_node/right_raw/image_raw_color
24 | 


--------------------------------------------------------------------------------
/config/cups.json:
--------------------------------------------------------------------------------
1 | {
2 |   "keypoint_config": [1, 1, 1]
3 | }
4 | 


--------------------------------------------------------------------------------
/config/valve.json:
--------------------------------------------------------------------------------
1 | {
2 |   "keypoint_config": [1, 3]
3 | }
4 | 


--------------------------------------------------------------------------------
/notebooks/DataloadingTest.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "visible-treaty",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "from matplotlib import pyplot as plt\n",
 11 |     "import numpy as np\n",
 12 |     "from argparse import Namespace\n",
 13 |     "import json"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "id": "modern-third",
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "from scripts.train import DataModule, _to_image"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "id": "preceding-valuable",
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "with open('../config/cups.json', 'rt') as f:\n",
 34 |     "    keypoint_config = json.load(f)\n",
 35 |     "module = DataModule(Namespace(train='/home/ken/data/cups_train/', val='/home/ken/data/cups_test/', batch_size=1, workers=1, pool=32), keypoint_config=keypoint_config)\n",
 36 |     "module.setup('fit')\n",
 37 |     "\n",
 38 |     "train_loader = module.train_dataloader()\n",
 39 |     "train_iterator = iter(train_loader)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "id": "killing-bullet",
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "import cv2\n",
 50 |     "def resize(target, width=640, height=360):\n",
 51 |     "    return cv2.resize(target, (width, height))\n",
 52 |     "    "
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "id": "soviet-ceramic",
 59 |    "metadata": {
 60 |     "tags": []
 61 |    },
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "frame, target, depth, centers = next(train_iterator)\n",
 65 |     "\n",
 66 |     "plt.figure(figsize=(14, 8))\n",
 67 |     "image = _to_image(frame[0].numpy())\n",
 68 |     "for i in range(2):\n",
 69 |     "    for j in range(2):\n",
 70 |     "        axis = plt.subplot2grid((2, 2), loc=(i, j))\n",
 71 |     "        axis.imshow(image)\n",
 72 |     "        axis.imshow(resize(target[0, i * 2 + j].numpy()), alpha=0.5)\n",
 73 |     "        plt.axis('off')\n",
 74 |     "plt.tight_layout()\n",
 75 |     "pass"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "id": "descending-resource",
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "plt.figure(figsize=(14, 8))\n",
 86 |     "for i in range(2):\n",
 87 |     "    for j in range(2):\n",
 88 |     "        axis = plt.subplot2grid((2, 2), loc=(i, j))\n",
 89 |     "        axis.imshow(image)\n",
 90 |     "        axis.imshow(resize(target[0, i * 2 + j].numpy()), alpha=0.5)\n",
 91 |     "        axis.imshow(resize((np.abs(centers[0].numpy()) > 1e-1).any(axis=0).astype(np.float32)), alpha=0.5, vmin=0.0, vmax=1.0)\n",
 92 |     "        plt.axis('off')\n",
 93 |     "plt.tight_layout()\n",
 94 |     "pass"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "id": "loved-swing",
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "vectors = centers.numpy()[0]\n",
105 |     "indices = np.zeros((2, 180, 320), dtype=np.uint16)\n",
106 |     "for i in range(180):\n",
107 |     "    for j in range(320):\n",
108 |     "        indices[:, i, j] = [j, i]\n",
109 |     "plt.figure(figsize=(12, 10))\n",
110 |     "plt.quiver(vectors[0], vectors[1], units='xy', scale_units='xy', scale=1.0)\n",
111 |     "plt.show()\n"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "id": "disciplinary-parker",
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "pixel_indices = np.zeros((2, 180, 320), dtype=np.float32)\n",
122 |     "for i in range(180):\n",
123 |     "    for j in range(320):\n",
124 |     "        pixel_indices[:, i, j] = [j + 0.5, i + 0.5]"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "id": "micro-abuse",
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "norms = np.linalg.norm(vectors, axis=0)\n",
135 |     "where_non_zero = target[0].sum(axis=0) > 0.5\n",
136 |     "p_centers = np.zeros((2, pixel_indices.shape[1], pixel_indices.shape[2]), dtype=np.float32)\n",
137 |     "p_centers[:, where_non_zero] = pixel_indices[:, where_non_zero] + vectors[:, where_non_zero]"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "id": "narrative-stanford",
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "dotted_image = cv2.resize(image.copy(), (320, 180))\n",
148 |     "for point in p_centers[:, where_non_zero].transpose():\n",
149 |     "    cv2.circle(dotted_image, (point[0], point[1]), 2, (255, 0, 0), -1)\n",
150 |     "plt.imshow(dotted_image)"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "id": "central-contemporary",
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "assert p_centers[:, where_non_zero].shape[1] > 3\n",
161 |     "center_points = np.unique(p_centers[:, where_non_zero].round().astype(np.int32), axis=1)\n",
162 |     "assert center_points.shape[1] >= 1 and center_points.shape[1] < 5"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "id": "01c4b265-c08f-41c6-9970-fe0a4c340379",
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": [
172 |     "p_centers[:, where_non_zero].shape"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "id": "ffa0aa22-b817-4bf3-a59b-e7eb57ce6ad4",
179 |    "metadata": {},
180 |    "outputs": [],
181 |    "source": []
182 |   }
183 |  ],
184 |  "metadata": {
185 |   "kernelspec": {
186 |    "display_name": "vision",
187 |    "language": "python",
188 |    "name": "vision"
189 |   },
190 |   "language_info": {
191 |    "codemirror_mode": {
192 |     "name": "ipython",
193 |     "version": 3
194 |    },
195 |    "file_extension": ".py",
196 |    "mimetype": "text/x-python",
197 |    "name": "python",
198 |    "nbconvert_exporter": "python",
199 |    "pygments_lexer": "ipython3",
200 |    "version": "3.7.10"
201 |   }
202 |  },
203 |  "nbformat": 4,
204 |  "nbformat_minor": 5
205 | }
206 | 


--------------------------------------------------------------------------------
/notebooks/ModelBench.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "clear-microwave",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "from matplotlib import pyplot as plt\n",
 11 |     "import numpy as np\n",
 12 |     "from argparse import Namespace\n",
 13 |     "import json\n",
 14 |     "import torch"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "id": "first-eight",
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "from scripts.train import DataModule, _to_image, KeypointModule"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "id": "monthly-prince",
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "with open('../config/cups.json', 'rt') as f:\n",
 35 |     "    keypoint_config = json.load(f)\n",
 36 |     "module = DataModule(Namespace(train='/home/ken/data/cups_train/', val='/home/ken/data/cups_test/', batch_size=1, workers=1, pool=32), keypoint_config=keypoint_config)\n",
 37 |     "module.setup('fit')\n",
 38 |     "\n",
 39 |     "dataloader = module.val_dataloader()\n",
 40 |     "print(dataloader)\n",
 41 |     "train_iterator = iter(dataloader)"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "id": "considered-parish",
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "import cv2\n",
 52 |     "def resize(target, width=320, height=180):\n",
 53 |     "    return cv2.resize(target, (width, height))\n",
 54 |     "    "
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "id": "composed-charleston",
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "module = KeypointModule.load_from_checkpoint('../lightning_logs/version_0/checkpoints/epoch=15-step=33567.ckpt', keypoint_config=keypoint_config)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "id": "mineral-vacuum",
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "model = module.model.eval()"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "id": "available-cancellation",
 81 |    "metadata": {
 82 |     "tags": []
 83 |    },
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "frame, target, depth, centers = next(train_iterator)\n",
 87 |     "\n",
 88 |     "plt.figure(figsize=(14, 8))\n",
 89 |     "image = _to_image(frame[0].numpy())\n",
 90 |     "for i in range(2):\n",
 91 |     "    for j in range(2):\n",
 92 |     "        axis = plt.subplot2grid((2, 2), loc=(i, j))\n",
 93 |     "        axis.imshow(resize(image))\n",
 94 |     "        axis.imshow(resize(target[0, i * 2 + j].numpy()), alpha=0.7)\n",
 95 |     "        plt.axis('off')\n",
 96 |     "plt.tight_layout()\n",
 97 |     "pass"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "id": "crazy-bookmark",
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "with torch.no_grad():\n",
108 |     "    heatmap_p, depth_p, centers_p = model(frame)\n",
109 |     "    heatmap_p = torch.sigmoid(heatmap_p)\n",
110 |     "\n",
111 |     "plt.figure(figsize=(14, 8))\n",
112 |     "image = _to_image(frame[0].numpy())\n",
113 |     "for i in range(2):\n",
114 |     "    for j in range(2):\n",
115 |     "        axis = plt.subplot2grid((2, 2), loc=(i, j))\n",
116 |     "        axis.imshow(resize(image))\n",
117 |     "        axis.imshow(resize(heatmap_p[0, i * 2 + j].detach().numpy()), alpha=0.7, vmin=0.0, vmax=1.0)\n",
118 |     "        plt.axis('off')\n",
119 |     "plt.tight_layout()\n",
120 |     "pass"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "id": "742832fc-bb90-4de5-8360-1af36bb7f8d4",
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "with torch.no_grad():\n",
131 |     "    heatmap_p, depth_p, centers_p = model(frame)\n",
132 |     "\n",
133 |     "plt.figure(figsize=(14, 8))\n",
134 |     "image = _to_image(frame[0].numpy())\n",
135 |     "plt.imshow(resize(depth_p[0, 3].detach().numpy()), alpha=0.7, vmin=0.0, vmax=2.0)\n",
136 |     "plt.axis('off')\n",
137 |     "plt.tight_layout()\n",
138 |     "pass"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "id": "sacred-cotton",
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "pixel_indices = np.zeros((2, 180, 320), dtype=np.float32)\n",
149 |     "for i in range(180):\n",
150 |     "    for j in range(320):\n",
151 |     "        pixel_indices[:, i, j] = [j + 0.5, i + 0.5]"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "id": "indonesian-steam",
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "vectors = centers.numpy()[0]\n",
162 |     "norms = np.linalg.norm(vectors, axis=0)\n",
163 |     "where_non_zero = np.abs(norms) > 1e-1\n",
164 |     "gt_centers = np.zeros((2, pixel_indices.shape[1], pixel_indices.shape[2]), dtype=np.float32)\n",
165 |     "gt_centers[:, where_non_zero] = pixel_indices[:, where_non_zero] + vectors[:, where_non_zero]\n",
166 |     "\n",
167 |     "where_heatmap_non_zero = heatmap_p[0].numpy().sum(axis=0) > 0.1\n",
168 |     "p_centers = np.zeros((2, pixel_indices.shape[1], pixel_indices.shape[2]), dtype=np.float32)\n",
169 |     "p_centers[:, where_heatmap_non_zero] = pixel_indices[:, where_heatmap_non_zero] + centers_p[0].detach().numpy()[:, where_heatmap_non_zero]\n",
170 |     "\n",
171 |     "figure = plt.figure(figsize=(10, 5))\n",
172 |     "dotted_image = cv2.resize(image.copy(), (320, 180))\n",
173 |     "for point in gt_centers[:, where_non_zero].transpose():    \n",
174 |     "    cv2.circle(dotted_image, (point[0], point[1]), 2, (255, 0, 0), -1)\n",
175 |     "    \n",
176 |     "dotted_image_pred = cv2.resize(image.copy(), (320, 180))\n",
177 |     "for point in p_centers[:, where_non_zero].transpose():    \n",
178 |     "    cv2.circle(dotted_image_pred, (point[0], point[1]), 2, (255, 0, 0), -1)\n",
179 |     "\n",
180 |     "axis = plt.subplot2grid((1, 2), loc=(0, 0))\n",
181 |     "axis.imshow(dotted_image)\n",
182 |     "plt.axis('off')\n",
183 |     "\n",
184 |     "axis = plt.subplot2grid((1, 2), loc=(0, 1))\n",
185 |     "axis.imshow(dotted_image_pred)\n",
186 |     "plt.axis('off')\n",
187 |     "pass"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "id": "hindu-tuesday",
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": []
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": null,
201 |    "id": "essential-receiver",
202 |    "metadata": {},
203 |    "outputs": [],
204 |    "source": []
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "id": "marine-alliance",
210 |    "metadata": {},
211 |    "outputs": [],
212 |    "source": []
213 |   }
214 |  ],
215 |  "metadata": {
216 |   "kernelspec": {
217 |    "display_name": "vision",
218 |    "language": "python",
219 |    "name": "vision"
220 |   },
221 |   "language_info": {
222 |    "codemirror_mode": {
223 |     "name": "ipython",
224 |     "version": 3
225 |    },
226 |    "file_extension": ".py",
227 |    "mimetype": "text/x-python",
228 |    "name": "python",
229 |    "nbconvert_exporter": "python",
230 |    "pygments_lexer": "ipython3",
231 |    "version": "3.7.10"
232 |   }
233 |  },
234 |  "nbformat": 4,
235 |  "nbformat_minor": 5
236 | }
237 | 


--------------------------------------------------------------------------------
/notebooks/OverfittingTest.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "quantitative-muslim",
  7 |    "metadata": {
  8 |     "tags": []
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "from matplotlib import pyplot as plt\n",
 13 |     "import numpy as np\n",
 14 |     "from argparse import Namespace\n",
 15 |     "import json\n",
 16 |     "import torch"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "id": "amazing-millennium",
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "from scripts.train import DataModule, _to_image, KeypointModule\n",
 27 |     "from perception.loss import KeypointLoss"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "id": "protective-myanmar",
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "with open('../config/cups.json', 'rt') as f:\n",
 38 |     "    keypoint_config = json.load(f)\n",
 39 |     "module = DataModule(Namespace(train='/home/ken/data/cups_train/', val='/home/ken/data/cups_test/', batch_size=2, workers=1, pool=256), keypoint_config=keypoint_config)\n",
 40 |     "module.setup('fit')\n",
 41 |     "\n",
 42 |     "train_loader = module.train_dataloader()\n",
 43 |     "train_iterator = iter(train_loader)"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "id": "square-college",
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "print(len(train_loader))"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "id": "sought-collins",
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "from tqdm import tqdm\n",
 64 |     "for _ in tqdm(range(10)):\n",
 65 |     "    next(train_iterator)"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "id": "private-bloom",
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "import cv2\n",
 76 |     "def resize(target, width=640, height=360):\n",
 77 |     "    return cv2.resize(target, (width, height), interpolation=cv2.INTER_LINEAR)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "id": "typical-positive",
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "module = KeypointModule(keypoint_config=keypoint_config)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "id": "brilliant-jonathan",
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "model = module.model"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "id": "equipped-caution",
104 |    "metadata": {
105 |     "tags": []
106 |    },
107 |    "outputs": [],
108 |    "source": [
109 |     "frame, target, depth, centers = next(train_iterator)\n",
110 |     "\n",
111 |     "def to_p(target):\n",
112 |     "    return target\n",
113 |     "\n",
114 |     "plt.figure(figsize=(7, 4 * frame.shape[0]))\n",
115 |     "rows = frame.shape[0]\n",
116 |     "for i in range(rows):\n",
117 |     "    axis = plt.subplot2grid((rows, 1), loc=(i, 0))\n",
118 |     "    axis.imshow(_to_image(frame[i].numpy()))\n",
119 |     "    axis.imshow(resize(to_p(target[i]).sum(dim=0).numpy()), alpha=0.7, vmin=0.0, vmax=1.0)\n",
120 |     "    plt.axis('off')\n",
121 |     "plt.tight_layout()\n",
122 |     "pass"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "id": "ultimate-northern",
129 |    "metadata": {
130 |     "tags": []
131 |    },
132 |    "outputs": [],
133 |    "source": [
134 |     "optimizer = torch.optim.Adam(lr=3e-4, params=model.parameters())\n",
135 |     "loss_fn = KeypointLoss(keypoint_config['keypoint_config'])\n",
136 |     "model.train()\n",
137 |     "progress = tqdm(range(100))\n",
138 |     "for i in progress:\n",
139 |     "    p_heatmaps, p_depth, p_centers = model(frame)\n",
140 |     "    loss_value, _ = loss_fn(p_heatmaps, target, p_depth, depth, p_centers, centers)\n",
141 |     "    progress.set_postfix({'loss': loss_value.item()})\n",
142 |     "    loss_value.backward()\n",
143 |     "    optimizer.step()\n",
144 |     "    optimizer.zero_grad()\n",
145 |     "    "
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "id": "possible-banking",
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": [
155 |     "from torch.nn import functional as F\n",
156 |     "print(F.binary_cross_entropy_with_logits(p_heatmaps, target, reduction='none').sum(dim=[1,2,3]).mean() * loss_fn.heatmap_weight)\n",
157 |     "print(loss_fn.reduce(F.l1_loss(p_depth[depth > 0.05], depth[depth > 0.05], reduction='none')))\n",
158 |     "print(loss_fn.reduction)\n",
159 |     "print(loss_fn.focal_loss(p_heatmaps, target).max())\n",
160 |     "print(target.max())\n",
161 |     "print(torch.sigmoid(p_heatmaps).max())"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "id": "ordinary-arthur",
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "with torch.no_grad():\n",
172 |     "    model.eval()\n",
173 |     "    heatmap_p, depth_p, centers_p = model(frame)\n",
174 |     "    heatmap_p = torch.sigmoid(heatmap_p)\n",
175 |     "plt.figure(figsize=(7, 4 * frame.shape[0]))\n",
176 |     "rows = frame.shape[0]\n",
177 |     "for i in range(rows):\n",
178 |     "    axis = plt.subplot2grid((rows, 1), loc=(i, 0))\n",
179 |     "    axis.imshow(_to_image(frame[i].numpy()))\n",
180 |     "    axis.imshow(resize(heatmap_p[i].sum(dim=0).numpy()), alpha=0.7, vmin=0.0, vmax=1.0)\n",
181 |     "    plt.axis('off')\n",
182 |     "pass"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "id": "extreme-pacific",
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": [
192 |     "pixel_indices = np.zeros((2, 180, 320), dtype=np.float32)\n",
193 |     "for i in range(180):\n",
194 |     "    for j in range(320):\n",
195 |     "        pixel_indices[:, i, j] = [j + 0.5, i + 0.5]"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": null,
201 |    "id": "favorite-organ",
202 |    "metadata": {},
203 |    "outputs": [],
204 |    "source": [
205 |     "vectors = centers.numpy()[0]\n",
206 |     "norms = np.linalg.norm(vectors, axis=0)\n",
207 |     "where_non_zero = np.abs(norms) > 1e-2\n",
208 |     "gt_centers = np.zeros((2, pixel_indices.shape[1], pixel_indices.shape[2]), dtype=np.float32)\n",
209 |     "gt_centers = pixel_indices + vectors\n",
210 |     "\n",
211 |     "where_heatmap_non_zero = target[0].numpy().sum(axis=0) > 0.1\n",
212 |     "\n",
213 |     "p_centers = np.zeros((2, pixel_indices.shape[1], pixel_indices.shape[2]), dtype=np.float32)\n",
214 |     "p_centers = pixel_indices + centers_p[0].detach().numpy()\n",
215 |     "\n",
216 |     "figure = plt.figure(figsize=(10, 5))\n",
217 |     "dotted_image = cv2.resize(_to_image(frame[0].numpy().copy()), (320, 180))\n",
218 |     "for point in gt_centers[:, where_non_zero].transpose():    \n",
219 |     "    cv2.circle(dotted_image, (point[0], point[1]), 2, (255, 0, 0), -1)\n",
220 |     "    \n",
221 |     "dotted_image_pred = cv2.resize(_to_image(frame[0].numpy().copy()), (320, 180))\n",
222 |     "where_pred_non_zero = heatmap_p[0].sum(dim=0) > 0.25\n",
223 |     "for point in p_centers[:, where_pred_non_zero].transpose():    \n",
224 |     "    cv2.circle(dotted_image_pred, (point[0], point[1]), 2, (255, 0, 0), -1)\n",
225 |     "\n",
226 |     "axis = plt.subplot2grid((1, 2), loc=(0, 0))\n",
227 |     "axis.imshow(dotted_image)\n",
228 |     "plt.axis('off')\n",
229 |     "\n",
230 |     "axis = plt.subplot2grid((1, 2), loc=(0, 1))\n",
231 |     "axis.imshow(dotted_image_pred)\n",
232 |     "plt.axis('off')\n",
233 |     "pass"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": null,
239 |    "id": "rapid-seating",
240 |    "metadata": {},
241 |    "outputs": [],
242 |    "source": []
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": null,
247 |    "id": "spoken-oxide",
248 |    "metadata": {},
249 |    "outputs": [],
250 |    "source": []
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": null,
255 |    "id": "liked-ensemble",
256 |    "metadata": {},
257 |    "outputs": [],
258 |    "source": []
259 |   }
260 |  ],
261 |  "metadata": {
262 |   "kernelspec": {
263 |    "display_name": "vision",
264 |    "language": "python",
265 |    "name": "vision"
266 |   },
267 |   "language_info": {
268 |    "codemirror_mode": {
269 |     "name": "ipython",
270 |     "version": 3
271 |    },
272 |    "file_extension": ".py",
273 |    "mimetype": "text/x-python",
274 |    "name": "python",
275 |    "nbconvert_exporter": "python",
276 |    "pygments_lexer": "ipython3",
277 |    "version": "3.7.10"
278 |   }
279 |  },
280 |  "nbformat": 4,
281 |  "nbformat_minor": 5
282 | }
283 | 


--------------------------------------------------------------------------------
/notebooks/keypoint_debug.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "expected-tunisia",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "from matplotlib import pyplot as plt\n",
 11 |     "import numpy as np\n",
 12 |     "%matplotlib widget"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "id": "extensive-candidate",
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "points_3d = np.array([[ 0.,          0.,          0.        ],\n",
 23 |     " [-0.03138264, -0.09922726,  0.00946505],\n",
 24 |     " [-0.07037588,  0.07883613,  0.0089331 ],\n",
 25 |     " [ 0.10787677,  0.02588217,  0.00836965]])\n",
 26 |     "keypoints_2d = np.array([[603.39123535, 301.11212158],\n",
 27 |     " [640.75494385, 396.78601074],\n",
 28 |     " [556.61352539, 291.11450195],\n",
 29 |     " [697.81604004, 276.12750244]])\n"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 3,
 35 |    "id": "alleged-clerk",
 36 |    "metadata": {},
 37 |    "outputs": [
 38 |     {
 39 |      "data": {
 40 |       "application/vnd.jupyter.widget-view+json": {
 41 |        "model_id": "77ed4fe527794923953ac0c2b3680b47",
 42 |        "version_major": 2,
 43 |        "version_minor": 0
 44 |       },
 45 |       "text/plain": [
 46 |        "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …"
 47 |       ]
 48 |      },
 49 |      "metadata": {},
 50 |      "output_type": "display_data"
 51 |     },
 52 |     {
 53 |      "data": {
 54 |       "text/plain": [
 55 |        "(-0.15, 0.15)"
 56 |       ]
 57 |      },
 58 |      "execution_count": 3,
 59 |      "metadata": {},
 60 |      "output_type": "execute_result"
 61 |     }
 62 |    ],
 63 |    "source": [
 64 |     "fig = plt.figure()\n",
 65 |     "ax = fig.add_subplot(projection='3d')\n",
 66 |     "ax.scatter(points_3d[:, 0], points_3d[:, 1], points_3d[:, 2], c=np.linspace(0, 1, 4), cmap='summer')\n",
 67 |     "ax.set_xlim(-0.15, 0.15)\n",
 68 |     "ax.set_ylim(-0.15, 0.15)\n",
 69 |     "ax.set_zlim(-0.15, 0.15)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "id": "settled-township",
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "data": {
 80 |       "application/vnd.jupyter.widget-view+json": {
 81 |        "model_id": "326ba677018f4daf96840f6b2f74e98b",
 82 |        "version_major": 2,
 83 |        "version_minor": 0
 84 |       },
 85 |       "text/plain": [
 86 |        "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …"
 87 |       ]
 88 |      },
 89 |      "metadata": {},
 90 |      "output_type": "display_data"
 91 |     },
 92 |     {
 93 |      "data": {
 94 |       "text/plain": [
 95 |        "(0.0, 720.0)"
 96 |       ]
 97 |      },
 98 |      "execution_count": 4,
 99 |      "metadata": {},
100 |      "output_type": "execute_result"
101 |     }
102 |    ],
103 |    "source": [
104 |     "figure = plt.figure()\n",
105 |     "plt.scatter(keypoints_2d[:, 0], keypoints_2d[:, 1], c=np.arange(4), cmap='summer')\n",
106 |     "plt.xlim(0, 1280)\n",
107 |     "plt.ylim(0, 720)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 5,
113 |    "id": "religious-wells",
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "import cv2\n",
118 |     "K = np.array([[697.87732212 , 0. , 648.08562626],\n",
119 |     " [  0., 697.28594061, 371.49958099],\n",
120 |     " [  0., 0.,1.        ]]) / 8.0\n",
121 |     "D = np.array([-1.74610270e-01,  2.75427408e-02,  6.24873971e-05,  9.10956548e-05])"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 21,
127 |    "id": "sealed-cambridge",
128 |    "metadata": {},
129 |    "outputs": [
130 |     {
131 |      "data": {
132 |       "text/plain": [
133 |        "(True,\n",
134 |        " array([[ 0.75168222, -0.27787168,  0.5981314 , -0.0314786 ],\n",
135 |        "        [-0.45333637, -0.87638764,  0.16257562, -0.06893673],\n",
136 |        "        [ 0.47901981, -0.39335992, -0.78473435,  0.89141784],\n",
137 |        "        [ 0.        ,  0.        ,  0.        ,  1.        ]]))"
138 |       ]
139 |      },
140 |      "execution_count": 21,
141 |      "metadata": {},
142 |      "output_type": "execute_result"
143 |     }
144 |    ],
145 |    "source": [
146 |     "success, rotvec, tvec = cv2.solvePnP(points_3d, keypoints_2d, K, np.zeros(4), flags=cv2.SOLVEPNP_EPNP)\n",
147 |     "T = np.eye(4)\n",
148 |     "R, _ = cv2.Rodrigues(rotvec)\n",
149 |     "T[:3, :3] = R\n",
150 |     "T[:3, 3] = tvec[:, 0]\n",
151 |     "success, T"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 22,
157 |    "id": "regional-gossip",
158 |    "metadata": {},
159 |    "outputs": [
160 |     {
161 |      "data": {
162 |       "text/plain": [
163 |        "array([[-0.0314786 , -0.06893673,  0.89141784,  1.        ],\n",
164 |        "       [-0.02183458,  0.03379049,  0.90798941,  1.        ],\n",
165 |        "       [-0.10094206, -0.10467149,  0.81968532,  1.        ],\n",
166 |        "       [ 0.04742468, -0.1391633 ,  0.92634399,  1.        ]])"
167 |       ]
168 |      },
169 |      "execution_count": 22,
170 |      "metadata": {},
171 |      "output_type": "execute_result"
172 |     }
173 |    ],
174 |    "source": [
175 |     "predicted_points = (T @ np.concatenate([points_3d, np.ones((4, 1))], axis=1)[:,:, None])[:, :, 0]\n",
176 |     "predicted_points"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 24,
182 |    "id": "valid-folks",
183 |    "metadata": {},
184 |    "outputs": [
185 |     {
186 |      "data": {
187 |       "application/vnd.jupyter.widget-view+json": {
188 |        "model_id": "dc3df26192f6474da4c095a2f249e203",
189 |        "version_major": 2,
190 |        "version_minor": 0
191 |       },
192 |       "text/plain": [
193 |        "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …"
194 |       ]
195 |      },
196 |      "metadata": {},
197 |      "output_type": "display_data"
198 |     },
199 |     {
200 |      "name": "stdout",
201 |      "output_type": "stream",
202 |      "text": [
203 |       "[[623.44151511 317.57581015   1.        ]\n",
204 |       " [631.30364683 397.44882302   1.        ]\n",
205 |       " [562.14389836 282.4581459    1.        ]\n",
206 |       " [683.81383124 266.74733235   1.        ]]\n"
207 |      ]
208 |     },
209 |     {
210 |      "data": {
211 |       "text/plain": [
212 |        "(0.0, 720.0)"
213 |       ]
214 |      },
215 |      "execution_count": 24,
216 |      "metadata": {},
217 |      "output_type": "execute_result"
218 |     }
219 |    ],
220 |    "source": [
221 |     "fig = plt.figure()\n",
222 |     "reprojected = (K @ predicted_points[:, :3, None])[:, :, 0]\n",
223 |     "reprojected = reprojected / reprojected[:, 2:3]\n",
224 |     "print(reprojected)\n",
225 |     "\n",
226 |     "plt.scatter(keypoints_2d[:, 0], keypoints_2d[:, 1], c=np.linspace(0, 1, keypoints_2d.shape[0]), cmap='spring')\n",
227 |     "plt.scatter(reprojected[:, 0], reprojected[:, 1], c=np.linspace(0, 1, reprojected.shape[0]), cmap='spring')\n",
228 |     "plt.xlim(0, 1280)\n",
229 |     "plt.ylim(0, 720)"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": 9,
235 |    "id": "formal-marketplace",
236 |    "metadata": {},
237 |    "outputs": [
238 |     {
239 |      "data": {
240 |       "text/plain": [
241 |        "array([[525.72722885, 262.88184879,   0.83918927],\n",
242 |        "       [564.64292418, 344.10913791,   0.88081192],\n",
243 |        "       [450.11823921, 233.91501349,   0.82930932],\n",
244 |        "       [543.28804458, 205.62918175,   0.78049184]])"
245 |       ]
246 |      },
247 |      "execution_count": 9,
248 |      "metadata": {},
249 |      "output_type": "execute_result"
250 |     }
251 |    ],
252 |    "source": [
253 |     "(K @ predicted_points[:, :3, None])[:, :, 0]"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": 10,
259 |    "id": "permanent-assessment",
260 |    "metadata": {},
261 |    "outputs": [
262 |     {
263 |      "data": {
264 |       "text/plain": [
265 |        "array([[-0.02599206, -0.07009551,  0.83918927,  1.        ],\n",
266 |        "       [-0.0088821 ,  0.02421945,  0.88081192,  1.        ],\n",
267 |        "       [-0.1251584 , -0.10637394,  0.82930932,  1.        ],\n",
268 |        "       [ 0.05368064, -0.1209306 ,  0.78049184,  1.        ]])"
269 |       ]
270 |      },
271 |      "execution_count": 10,
272 |      "metadata": {},
273 |      "output_type": "execute_result"
274 |     }
275 |    ],
276 |    "source": [
277 |     "predicted_points"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": null,
283 |    "id": "mature-corpus",
284 |    "metadata": {},
285 |    "outputs": [],
286 |    "source": []
287 |   }
288 |  ],
289 |  "metadata": {
290 |   "kernelspec": {
291 |    "display_name": "vision",
292 |    "language": "python",
293 |    "name": "vision"
294 |   },
295 |   "language_info": {
296 |    "codemirror_mode": {
297 |     "name": "ipython",
298 |     "version": 3
299 |    },
300 |    "file_extension": ".py",
301 |    "mimetype": "text/x-python",
302 |    "name": "python",
303 |    "nbconvert_exporter": "python",
304 |    "pygments_lexer": "ipython3",
305 |    "version": "3.7.10"
306 |   }
307 |  },
308 |  "nbformat": 4,
309 |  "nbformat_minor": 5
310 | }
311 | 


--------------------------------------------------------------------------------
/perception/__init__.py:
--------------------------------------------------------------------------------
1 | from . import utils
2 | 


--------------------------------------------------------------------------------
/perception/constants.py:
--------------------------------------------------------------------------------
 1 | import hud
 2 | import numpy as np
 3 | 
 4 | def _to_camera_matrix(proj):
 5 |     return np.array([[proj[0], 0., proj[2]],
 6 |         [0., proj[1], proj[3]],
 7 |         [0., 0., 1.]], dtype=np.float64)
 8 | 
 9 | KEYPOINT_FILENAME = 'keypoints.json'
10 | IMAGE_HEIGHT = 720
11 | IMAGE_WIDTH = 1280
12 | IMAGE_RECT = hud.Rect(0, 0, IMAGE_WIDTH, IMAGE_HEIGHT)
13 | image_size = (int(IMAGE_RECT.width), int(IMAGE_RECT.height))
14 | KEYPOINT_COLOR = np.array([1.0, 0.0, 0.0, 1.0])
15 | 
16 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/.gitignore:
--------------------------------------------------------------------------------
 1 | loss/
 2 | data/
 3 | cache/
 4 | tf_cache/
 5 | debug/
 6 | results/
 7 | 
 8 | misc/outputs
 9 | 
10 | evaluation/evaluate_object
11 | evaluation/analyze_object
12 | 
13 | nnet/__pycache__/
14 | 
15 | *.swp
16 | 
17 | *.pyc
18 | *.o*
19 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019, Princeton University
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/README.md:
--------------------------------------------------------------------------------
  1 | # CornerNet-Lite: Training, Evaluation and Testing Code
  2 | Code for reproducing results in the following paper:
  3 | 
  4 | [**CornerNet-Lite: Efficient Keypoint Based Object Detection**](https://arxiv.org/abs/1904.08900)  
  5 | Hei Law, Yun Teng, Olga Russakovsky, Jia Deng  
  6 | *arXiv:1904.08900* 
  7 | 
  8 | ## Getting Started
  9 | ### Software Requirement
 10 | - Python 3.7
 11 | - PyTorch 1.0.0
 12 | - CUDA 10
 13 | - GCC 4.9.2 or above
 14 | 
 15 | ### Installing Dependencies
 16 | Please first install [Anaconda](https://anaconda.org) and create an Anaconda environment using the provided package list `conda_packagelist.txt`.
 17 | ```
 18 | conda create --name CornerNet_Lite --file conda_packagelist.txt --channel pytorch
 19 | ```
 20 | 
 21 | After you create the environment, please activate it.
 22 | ```
 23 | source activate CornerNet_Lite
 24 | ```
 25 | 
 26 | ### Compiling Corner Pooling Layers
 27 | Compile the C++ implementation of the corner pooling layers. (GCC4.9.2 or above is required.)
 28 | ```
 29 | cd <CornerNet-Lite dir>/core/models/py_utils/_cpools/
 30 | python setup.py install --user
 31 | ```
 32 | 
 33 | ### Compiling NMS
 34 | Compile the NMS code which are originally from [Faster R-CNN](https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/nms/cpu_nms.pyx) and [Soft-NMS](https://github.com/bharatsingh430/soft-nms/blob/master/lib/nms/cpu_nms.pyx).
 35 | ```
 36 | cd <CornerNet-Lite dir>/core/external
 37 | make
 38 | ```
 39 | 
 40 | ### Downloading Models
 41 | In this repo, we provide models for the following detectors:
 42 | - [CornerNet-Saccade](https://drive.google.com/file/d/1MQDyPRI0HgDHxHToudHqQ-2m8TVBciaa/view?usp=sharing)
 43 | - [CornerNet-Squeeze](https://drive.google.com/file/d/1qM8BBYCLUBcZx_UmLT0qMXNTh-Yshp4X/view?usp=sharing)
 44 | - [CornerNet](https://drive.google.com/file/d/1e8At_iZWyXQgLlMwHkB83kN-AN85Uff1/view?usp=sharing)
 45 | 
 46 | Put the CornerNet-Saccade model under `<CornerNet-Lite dir>/cache/nnet/CornerNet_Saccade/`, CornerNet-Squeeze model under `<CornerNet-Lite dir>/cache/nnet/CornerNet_Squeeze/` and CornerNet model under `<CornerNet-Lite dir>/cache/nnet/CornerNet/`. (\* Note we use underscore instead of dash in both the directory names for CornerNet-Saccade and CornerNet-Squeeze.)
 47 | 
 48 | Note: The CornerNet model is the same as the one in the original [CornerNet repo](https://github.com/princeton-vl/CornerNet). We just ported it to this new repo.
 49 | 
 50 | ### Running the Demo Script
 51 | After downloading the models, you should be able to use the detectors on your own images. We provide a demo script `demo.py` to test if the repo is installed correctly.
 52 | ```
 53 | python demo.py
 54 | ```
 55 | This script applies CornerNet-Saccade to `demo.jpg` and writes the results to `demo_out.jpg`.
 56 | 
 57 | In the demo script, the default detector is CornerNet-Saccade. You can modify the demo script to test different detectors. For example, if you want to test CornerNet-Squeeze:
 58 | ```python
 59 | #!/usr/bin/env python
 60 | 
 61 | import cv2
 62 | from core.detectors import CornerNet_Squeeze
 63 | from core.vis_utils import draw_bboxes
 64 | 
 65 | detector = CornerNet_Squeeze()
 66 | image    = cv2.imread("demo.jpg")
 67 | 
 68 | bboxes = detector(image)
 69 | image  = draw_bboxes(image, bboxes)
 70 | cv2.imwrite("demo_out.jpg", image)
 71 | ```
 72 | 
 73 | ### Using CornerNet-Lite in Your Project
 74 | It is also easy to use CornerNet-Lite in your project. You will need to change the directory name from `CornerNet-Lite` to `CornerNet_Lite`. Otherwise, you won't be able to import CornerNet-Lite.
 75 | ```
 76 | Your project
 77 | │   README.md
 78 | │   ...
 79 | │   foo.py
 80 | │
 81 | └───CornerNet_Lite
 82 | │
 83 | └───directory1
 84 | │   
 85 | └───...
 86 | ```
 87 | 
 88 | In `foo.py`, you can easily import CornerNet-Saccade by adding:
 89 | ```python
 90 | from CornerNet_Lite import CornerNet_Saccade
 91 | 
 92 | def foo():
 93 |     cornernet = CornerNet_Saccade()
 94 |     # CornerNet_Saccade is ready to use
 95 | 
 96 |     image  = cv2.imread('/path/to/your/image')
 97 |     bboxes = cornernet(image)
 98 | ```
 99 | 
100 | If you want to train or evaluate the detectors on COCO, please move on to the following steps.
101 | 
102 | ## Training and Evaluation
103 | 
104 | ### Installing MS COCO APIs
105 | ```
106 | mkdir -p <CornerNet-Lite dir>/data
107 | cd <CornerNet-Lite dir>/data
108 | git clone git@github.com:cocodataset/cocoapi.git coco
109 | cd <CornerNet-Lite dir>/data/coco/PythonAPI
110 | make install
111 | ```
112 | 
113 | ### Downloading MS COCO Data
114 | - Download the training/validation split we use in our paper from [here](https://drive.google.com/file/d/1dop4188xo5lXDkGtOZUzy2SHOD_COXz4/view?usp=sharing) (originally from [Faster R-CNN](https://github.com/rbgirshick/py-faster-rcnn/tree/master/data))
115 | - Unzip the file and place `annotations` under `<CornerNet-Lite dir>/data/coco`
116 | - Download the images (2014 Train, 2014 Val, 2017 Test) from [here](http://cocodataset.org/#download)
117 | - Create 3 directories, `trainval2014`, `minival2014` and `testdev2017`, under `<CornerNet-Lite dir>/data/coco/images/`
118 | - Copy the training/validation/testing images to the corresponding directories according to the annotation files
119 | 
120 | To train and evaluate a network, you will need to create a configuration file, which defines the hyperparameters, and a model file, which defines the network architecture. The configuration file should be in JSON format and placed in `<CornerNet-Lite dir>/configs/`. Each configuration file should have a corresponding model file in `<CornerNet-Lite dir>/core/models/`. i.e. If there is a `<model>.json` in `<CornerNet-Lite dir>/configs/`, there should be a `<model>.py` in `<CornerNet-Lite dir>/core/models/`. There is only one exception which we will mention later.
121 | 
122 | ### Training and Evaluating a Model
123 | To train a model:
124 | ```
125 | python train.py <model>
126 | ```
127 | 
128 | We provide the configuration files and the model files for CornerNet-Saccade, CornerNet-Squeeze and CornerNet in this repo. Please check the configuration files in `<CornerNet-Lite dir>/configs/`.
129 | 
130 | To train CornerNet-Saccade:
131 | ```
132 | python train.py CornerNet_Saccade
133 | ```
134 | Please adjust the batch size in `CornerNet_Saccade.json` to accommodate the number of GPUs that are available to you.
135 | 
136 | To evaluate the trained model:
137 | ```
138 | python evaluate.py CornerNet_Saccade --testiter 500000 --split <split>
139 | ```
140 | 
141 | If you want to test different hyperparameters during evaluation and do not want to overwrite the original configuration file, you can do so by creating a configuration file with a suffix (`<model>-<suffix>.json`). There is no need to create `<model>-<suffix>.py` in `<CornerNet-Lite dir>/core/models/`.
142 | 
143 | To use the new configuration file:
144 | ```
145 | python evaluate.py <model> --testiter <iter> --split <split> --suffix <suffix>
146 | ```
147 | 
148 | We also include a configuration file for CornerNet under multi-scale setting, which is `CornerNet-multi_scale.json`, in this repo. 
149 | 
150 | To use the multi-scale configuration file:
151 | ```
152 | python evaluate.py CornerNet --testiter <iter> --split <split> --suffix multi_scale
153 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/__init__.py:
--------------------------------------------------------------------------------
1 | from .core.detectors import CornerNet, CornerNet_Squeeze, CornerNet_Saccade
2 | from .core.vis_utils import draw_bboxes
3 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/conda_packagelist.txt:
--------------------------------------------------------------------------------
 1 | # This file may be used to create an environment using:
 2 | # $ conda create --name <env> --file <this file>
 3 | # platform: linux-64
 4 | blas=1.0=mkl
 5 | bzip2=1.0.6=h14c3975_5
 6 | ca-certificates=2018.12.5=0
 7 | cairo=1.14.12=h8948797_3
 8 | certifi=2018.11.29=py37_0
 9 | cffi=1.11.5=py37he75722e_1
10 | cuda100=1.0=0
11 | cycler=0.10.0=py37_0
12 | cython=0.28.5=py37hf484d3e_0
13 | dbus=1.13.2=h714fa37_1
14 | expat=2.2.6=he6710b0_0
15 | ffmpeg=4.0=hcdf2ecd_0
16 | fontconfig=2.13.0=h9420a91_0
17 | freeglut=3.0.0=hf484d3e_5
18 | freetype=2.9.1=h8a8886c_1
19 | glib=2.56.2=hd408876_0
20 | graphite2=1.3.12=h23475e2_2
21 | gst-plugins-base=1.14.0=hbbd80ab_1
22 | gstreamer=1.14.0=hb453b48_1
23 | harfbuzz=1.8.8=hffaf4a1_0
24 | hdf5=1.10.2=hba1933b_1
25 | icu=58.2=h9c2bf20_1
26 | intel-openmp=2019.0=118
27 | jasper=2.0.14=h07fcdf6_1
28 | jpeg=9b=h024ee3a_2
29 | kiwisolver=1.0.1=py37hf484d3e_0
30 | libedit=3.1.20170329=h6b74fdf_2
31 | libffi=3.2.1=hd88cf55_4
32 | libgcc-ng=8.2.0=hdf63c60_1
33 | libgfortran-ng=7.3.0=hdf63c60_0
34 | libglu=9.0.0=hf484d3e_1
35 | libopencv=3.4.2=hb342d67_1
36 | libopus=1.2.1=hb9ed12e_0
37 | libpng=1.6.35=hbc83047_0
38 | libstdcxx-ng=8.2.0=hdf63c60_1
39 | libtiff=4.0.9=he85c1e1_2
40 | libuuid=1.0.3=h1bed415_2
41 | libvpx=1.7.0=h439df22_0
42 | libxcb=1.13=h1bed415_1
43 | libxml2=2.9.8=h26e45fe_1
44 | matplotlib=3.0.2=py37h5429711_0
45 | mkl=2018.0.3=1
46 | mkl_fft=1.0.6=py37h7dd41cf_0
47 | mkl_random=1.0.1=py37h4414c95_1
48 | ncurses=6.1=hf484d3e_0
49 | ninja=1.8.2=py37h6bb024c_1
50 | numpy=1.15.4=py37h1d66e8a_0
51 | numpy-base=1.15.4=py37h81de0dd_0
52 | olefile=0.46=py37_0
53 | opencv=3.4.2=py37h6fd60c2_1
54 | openssl=1.1.1a=h7b6447c_0
55 | pcre=8.42=h439df22_0
56 | pillow=5.2.0=py37heded4f4_0
57 | pip=10.0.1=py37_0
58 | pixman=0.34.0=hceecf20_3
59 | py-opencv=3.4.2=py37hb342d67_1
60 | pycparser=2.18=py37_1
61 | pyparsing=2.2.0=py37_1
62 | pyqt=5.9.2=py37h05f1152_2
63 | python=3.7.1=h0371630_3
64 | python-dateutil=2.7.3=py37_0
65 | pytorch=1.0.0=py3.7_cuda10.0.130_cudnn7.4.1_1
66 | pytz=2018.5=py37_0
67 | qt=5.9.7=h5867ecd_1
68 | readline=7.0=h7b6447c_5
69 | scikit-learn=0.19.1=py37hedc7406_0
70 | scipy=1.1.0=py37hfa4b5c9_1
71 | setuptools=40.2.0=py37_0
72 | sip=4.19.8=py37hf484d3e_0
73 | six=1.11.0=py37_1
74 | sqlite=3.25.3=h7b6447c_0
75 | tk=8.6.8=hbc83047_0
76 | torchvision=0.2.1=py37_1
77 | tornado=5.1=py37h14c3975_0
78 | tqdm=4.25.0=py37h28b3542_0
79 | wheel=0.31.1=py37_0
80 | xz=5.2.4=h14c3975_4
81 | zlib=1.2.11=ha838bed_2
82 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/configs/CornerNet-multi_scale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "system": {
 3 |         "dataset": "COCO",
 4 |         "batch_size": 49,
 5 |         "sampling_function": "cornernet",
 6 | 
 7 |         "train_split": "trainval",
 8 |         "val_split": "minival",
 9 | 
10 |         "learning_rate": 0.00025,
11 |         "decay_rate": 10,
12 | 
13 |         "val_iter": 100,
14 | 
15 |         "opt_algo": "adam",
16 |         "prefetch_size": 5,
17 | 
18 |         "max_iter": 500000,
19 |         "stepsize": 450000,
20 |         "snapshot": 5000,
21 | 
22 |         "chunk_sizes": [4, 5, 5, 5, 5, 5, 5, 5, 5, 5],
23 | 
24 |         "data_dir": "./data"
25 |     },
26 |     
27 |     "db": {
28 |         "rand_scale_min": 0.6,
29 |         "rand_scale_max": 1.4,
30 |         "rand_scale_step": 0.1,
31 |         "rand_scales": null,
32 | 
33 |         "rand_crop": true,
34 |         "rand_color": true,
35 | 
36 |         "border": 128,
37 |         "gaussian_bump": true,
38 | 
39 |         "input_size": [511, 511],
40 |         "output_sizes": [[128, 128]],
41 | 
42 |         "test_scales": [0.5, 0.75, 1, 1.25, 1.5],
43 | 
44 |         "top_k": 100,
45 |         "categories": 80,
46 |         "ae_threshold": 0.5,
47 |         "nms_threshold": 0.5,
48 | 
49 |         "merge_bbox": true,
50 |         "weight_exp": 10,
51 | 
52 |         "max_per_image": 100
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/configs/CornerNet.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "system": {
 3 |         "dataset": "COCO",
 4 |         "batch_size": 49,
 5 |         "sampling_function": "cornernet",
 6 | 
 7 |         "train_split": "trainval",
 8 |         "val_split": "minival",
 9 | 
10 |         "learning_rate": 0.00025,
11 |         "decay_rate": 10,
12 | 
13 |         "val_iter": 100,
14 | 
15 |         "opt_algo": "adam",
16 |         "prefetch_size": 5,
17 | 
18 |         "max_iter": 500000,
19 |         "stepsize": 450000,
20 |         "snapshot": 5000,
21 | 
22 |         "chunk_sizes": [4, 5, 5, 5, 5, 5, 5, 5, 5, 5],
23 | 
24 |         "data_dir": "./data"
25 |     },
26 |     
27 |     "db": {
28 |         "rand_scale_min": 0.6,
29 |         "rand_scale_max": 1.4,
30 |         "rand_scale_step": 0.1,
31 |         "rand_scales": null,
32 | 
33 |         "rand_crop": true,
34 |         "rand_color": true,
35 | 
36 |         "border": 128,
37 |         "gaussian_bump": true,
38 |         "gaussian_iou": 0.3,
39 | 
40 |         "input_size": [511, 511],
41 |         "output_sizes": [[128, 128]],
42 | 
43 |         "test_scales": [1],
44 | 
45 |         "top_k": 100,
46 |         "categories": 80,
47 |         "ae_threshold": 0.5,
48 |         "nms_threshold": 0.5,
49 | 
50 |         "max_per_image": 100
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/configs/CornerNet_Saccade.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "system": {
 3 |         "dataset": "COCO",
 4 |         "batch_size": 48,
 5 |         "sampling_function": "cornernet_saccade",
 6 | 
 7 |         "train_split": "trainval",
 8 |         "val_split": "minival",
 9 | 
10 |         "learning_rate": 0.00025,
11 |         "decay_rate": 10,
12 | 
13 |         "val_iter": 100,
14 | 
15 |         "opt_algo": "adam",
16 |         "prefetch_size": 5,
17 | 
18 |         "max_iter": 500000,
19 |         "stepsize": 450000,
20 |         "snapshot": 5000,
21 | 
22 |         "chunk_sizes": [12, 12, 12, 12]
23 |     },
24 |     
25 |     "db": {
26 |         "rand_scale_min": 0.5,
27 |         "rand_scale_max": 1.1,
28 |         "rand_scale_step": 0.1,
29 |         "rand_scales": null,
30 | 
31 |         "rand_full_crop": true,
32 |         "gaussian_bump": true,
33 |         "gaussian_iou": 0.5,
34 | 
35 |         "min_scale": 16,
36 |         "view_sizes": [],
37 | 
38 |         "height_mult": 31,
39 |         "width_mult": 31,
40 | 
41 |         "input_size": [255, 255],
42 |         "output_sizes": [[64, 64]],
43 | 
44 |         "att_max_crops": 30,
45 |         "att_scales": [[1, 2, 4]],
46 |         "att_thresholds": [0.3],
47 | 
48 |         "top_k": 12,
49 |         "num_dets": 12,
50 |         "categories": 80,
51 |         "ae_threshold": 0.3,
52 |         "nms_threshold": 0.5,
53 | 
54 |         "max_per_image": 100
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/configs/CornerNet_Squeeze.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "system": {
 3 |         "dataset": "COCO",
 4 |         "batch_size": 55,
 5 |         "sampling_function": "cornernet",
 6 | 
 7 |         "train_split": "trainval",
 8 |         "val_split": "minival",
 9 | 
10 |         "learning_rate": 0.00025,
11 |         "decay_rate": 10,
12 | 
13 |         "val_iter": 100,
14 | 
15 |         "opt_algo": "adam",
16 |         "prefetch_size": 5,
17 | 
18 |         "max_iter": 500000,
19 |         "stepsize": 450000,
20 |         "snapshot": 5000,
21 | 
22 |         "chunk_sizes": [13, 14, 14, 14],
23 | 
24 |         "data_dir": "./data"
25 |     },
26 |     
27 |     "db": {
28 |         "rand_scale_min": 0.6,
29 |         "rand_scale_max": 1.4,
30 |         "rand_scale_step": 0.1,
31 |         "rand_scales": null,
32 | 
33 |         "rand_crop": true,
34 |         "rand_color": true,
35 | 
36 |         "border": 128,
37 |         "gaussian_bump": true,
38 |         "gaussian_iou": 0.3,
39 | 
40 |         "input_size": [511, 511],
41 |         "output_sizes": [[64, 64]],
42 | 
43 |         "test_scales": [1],
44 |         "test_flipped": false,
45 | 
46 |         "top_k": 20,
47 |         "num_dets": 100,
48 |         "categories": 80,
49 |         "ae_threshold": 0.5,
50 |         "nms_threshold": 0.5,
51 | 
52 |         "max_per_image": 100
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/corner_net_lite/core/__init__.py


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/base.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from .nnet.py_factory import NetworkFactory
 4 | 
 5 | class Base(object):
 6 |     def __init__(self, db, nnet, func, model=None):
 7 |         super(Base, self).__init__()
 8 | 
 9 |         self._db   = db
10 |         self._nnet = nnet
11 |         self._func = func
12 | 
13 |         if model is not None:
14 |             self._nnet.load_pretrained_params(model)
15 | 
16 |         self._nnet.cuda()
17 |         self._nnet.eval_mode()
18 | 
19 |     def _inference(self, image, *args, **kwargs):
20 |         return self._func(self._db, self._nnet, image.copy(), *args, **kwargs)
21 | 
22 |     def __call__(self, image, *args, **kwargs):
23 |         categories = self._db.configs["categories"]
24 |         bboxes     = self._inference(image, *args, **kwargs)
25 |         return {self._db.cls2name(j): bboxes[j] for j in range(1, categories + 1)}
26 | 
27 | def load_cfg(cfg_file):
28 |     with open(cfg_file, "r") as f:
29 |         cfg = json.load(f)
30 | 
31 |     cfg_sys = cfg["system"]
32 |     cfg_db  = cfg["db"]
33 |     return cfg_sys, cfg_db
34 | 
35 | def load_nnet(cfg_sys, model):
36 |     return NetworkFactory(cfg_sys, model)
37 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | 
  4 | class SystemConfig(object):
  5 |     def __init__(self):
  6 |         self._configs = {}
  7 |         self._configs["dataset"] = None
  8 |         self._configs["sampling_function"] = "coco_detection"
  9 | 
 10 |         # Training Config
 11 |         self._configs["display"]           = 5
 12 |         self._configs["snapshot"]          = 400
 13 |         self._configs["stepsize"]          = 5000
 14 |         self._configs["learning_rate"]     = 0.001
 15 |         self._configs["decay_rate"]        = 10
 16 |         self._configs["max_iter"]          = 100000
 17 |         self._configs["val_iter"]          = 20
 18 |         self._configs["batch_size"]        = 1
 19 |         self._configs["snapshot_name"]     = None
 20 |         self._configs["prefetch_size"]     = 100
 21 |         self._configs["pretrain"]          = None
 22 |         self._configs["opt_algo"]          = "adam"
 23 |         self._configs["chunk_sizes"]       = None
 24 | 
 25 |         # Directories
 26 |         self._configs["data_dir"]   = "./data"
 27 |         self._configs["cache_dir"]  = "./cache"
 28 |         self._configs["config_dir"] = "./config"
 29 |         self._configs["result_dir"] = "./results"
 30 | 
 31 |         # Split
 32 |         self._configs["train_split"] = "training"
 33 |         self._configs["val_split"]   = "validation"
 34 |         self._configs["test_split"]  = "testdev"
 35 | 
 36 |         # Rng
 37 |         self._configs["data_rng"] = np.random.RandomState(123)
 38 |         self._configs["nnet_rng"] = np.random.RandomState(317)
 39 | 
 40 |     @property
 41 |     def chunk_sizes(self):
 42 |         return self._configs["chunk_sizes"]
 43 | 
 44 |     @property
 45 |     def train_split(self):
 46 |         return self._configs["train_split"]
 47 | 
 48 |     @property
 49 |     def val_split(self):
 50 |         return self._configs["val_split"]
 51 | 
 52 |     @property
 53 |     def test_split(self):
 54 |         return self._configs["test_split"]
 55 | 
 56 |     @property
 57 |     def full(self):
 58 |         return self._configs
 59 | 
 60 |     @property
 61 |     def sampling_function(self):
 62 |         return self._configs["sampling_function"]
 63 | 
 64 |     @property
 65 |     def data_rng(self):
 66 |         return self._configs["data_rng"]
 67 | 
 68 |     @property
 69 |     def nnet_rng(self):
 70 |         return self._configs["nnet_rng"]
 71 | 
 72 |     @property
 73 |     def opt_algo(self):
 74 |         return self._configs["opt_algo"]
 75 | 
 76 |     @property
 77 |     def prefetch_size(self):
 78 |         return self._configs["prefetch_size"]
 79 | 
 80 |     @property
 81 |     def pretrain(self):
 82 |         return self._configs["pretrain"]
 83 | 
 84 |     @property
 85 |     def result_dir(self):
 86 |         result_dir = os.path.join(self._configs["result_dir"], self.snapshot_name)
 87 |         if not os.path.exists(result_dir):
 88 |             os.makedirs(result_dir)
 89 |         return result_dir
 90 | 
 91 |     @property
 92 |     def dataset(self):
 93 |         return self._configs["dataset"]
 94 | 
 95 |     @property
 96 |     def snapshot_name(self):
 97 |         return self._configs["snapshot_name"]
 98 | 
 99 |     @property
100 |     def snapshot_dir(self):
101 |         snapshot_dir = os.path.join(self.cache_dir, "nnet", self.snapshot_name)
102 | 
103 |         if not os.path.exists(snapshot_dir):
104 |             os.makedirs(snapshot_dir)
105 |         return snapshot_dir
106 | 
107 |     @property
108 |     def snapshot_file(self):
109 |         snapshot_file = os.path.join(self.snapshot_dir, self.snapshot_name + "_{}.pkl")
110 |         return snapshot_file
111 | 
112 |     @property
113 |     def config_dir(self):
114 |         return self._configs["config_dir"]
115 | 
116 |     @property
117 |     def batch_size(self):
118 |         return self._configs["batch_size"]
119 | 
120 |     @property
121 |     def max_iter(self):
122 |         return self._configs["max_iter"]
123 | 
124 |     @property
125 |     def learning_rate(self):
126 |         return self._configs["learning_rate"]
127 | 
128 |     @property
129 |     def decay_rate(self):
130 |         return self._configs["decay_rate"]
131 | 
132 |     @property
133 |     def stepsize(self):
134 |         return self._configs["stepsize"]
135 | 
136 |     @property
137 |     def snapshot(self):
138 |         return self._configs["snapshot"]
139 | 
140 |     @property
141 |     def display(self):
142 |         return self._configs["display"]
143 | 
144 |     @property
145 |     def val_iter(self):
146 |         return self._configs["val_iter"]
147 | 
148 |     @property
149 |     def data_dir(self):
150 |         return self._configs["data_dir"]
151 | 
152 |     @property
153 |     def cache_dir(self):
154 |         if not os.path.exists(self._configs["cache_dir"]):
155 |             os.makedirs(self._configs["cache_dir"])
156 |         return self._configs["cache_dir"]
157 | 
158 |     def update_config(self, new):
159 |         for key in new:
160 |             if key in self._configs:
161 |                 self._configs[key] = new[key]
162 |         return self
163 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/dbs/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco import COCO
2 | 
3 | datasets = {
4 |     "COCO": COCO
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/dbs/base.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | class BASE(object):
 5 |     def __init__(self):
 6 |         self._split     = None
 7 |         self._db_inds   = []
 8 |         self._image_ids = []
 9 | 
10 |         self._mean    = np.zeros((3, ), dtype=np.float32)
11 |         self._std     = np.ones((3, ), dtype=np.float32)
12 |         self._eig_val = np.ones((3, ), dtype=np.float32)
13 |         self._eig_vec = np.zeros((3, 3), dtype=np.float32)
14 | 
15 |         self._configs = {}
16 |         self._configs["data_aug"] = True
17 | 
18 |         self._data_rng = None
19 | 
20 |     @property
21 |     def configs(self):
22 |         return self._configs
23 | 
24 |     @property
25 |     def mean(self):
26 |         return self._mean
27 | 
28 |     @property
29 |     def std(self):
30 |         return self._std
31 | 
32 |     @property
33 |     def eig_val(self):
34 |         return self._eig_val
35 | 
36 |     @property
37 |     def eig_vec(self):
38 |         return self._eig_vec
39 | 
40 |     @property
41 |     def db_inds(self):
42 |         return self._db_inds
43 | 
44 |     @property
45 |     def split(self):
46 |         return self._split
47 | 
48 |     def update_config(self, new):
49 |         for key in new:
50 |             if key in self._configs:
51 |                 self._configs[key] = new[key]
52 | 
53 |     def image_ids(self, ind):
54 |         return self._image_ids[ind]
55 | 
56 |     def image_path(self, ind):
57 |         pass
58 | 
59 |     def write_result(self, ind, all_bboxes, all_scores):
60 |         pass
61 | 
62 |     def evaluate(self, name):
63 |         pass
64 | 
65 |     def shuffle_inds(self, quiet=False):
66 |         if self._data_rng is None:
67 |             self._data_rng = np.random.RandomState(os.getpid())
68 | 
69 |         if not quiet:
70 |             print("shuffling indices...")
71 |         rand_perm = self._data_rng.permutation(len(self._db_inds))
72 |         self._db_inds = self._db_inds[rand_perm]
73 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/dbs/coco.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import numpy as np
  4 | 
  5 | from .detection import DETECTION
  6 | from ..paths import get_file_path
  7 | 
  8 | # COCO bounding boxes are 0-indexed
  9 | 
 10 | class COCO(DETECTION):
 11 |     def __init__(self, db_config, split=None, sys_config=None):
 12 |         assert split is None or sys_config is not None
 13 |         super(COCO, self).__init__(db_config)
 14 | 
 15 |         self._mean    = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32)
 16 |         self._std     = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32)
 17 |         self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], dtype=np.float32)
 18 |         self._eig_vec = np.array([
 19 |             [-0.58752847, -0.69563484, 0.41340352],
 20 |             [-0.5832747, 0.00994535, -0.81221408],
 21 |             [-0.56089297, 0.71832671, 0.41158938]
 22 |         ], dtype=np.float32)
 23 | 
 24 |         self._coco_cls_ids = [
 25 |             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 
 26 |             14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 
 27 |             24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 
 28 |             37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 
 29 |             48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 
 30 |             58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 
 31 |             72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 
 32 |             82, 84, 85, 86, 87, 88, 89, 90
 33 |         ]
 34 | 
 35 |         self._coco_cls_names = [
 36 |             'person', 'bicycle', 'car', 'motorcycle', 'airplane', 
 37 |             'bus', 'train', 'truck', 'boat', 'traffic light', 
 38 |             'fire hydrant', 'stop sign', 'parking meter', 'bench', 
 39 |             'bird', 'cat', 'dog', 'horse','sheep', 'cow', 'elephant', 
 40 |             'bear', 'zebra','giraffe', 'backpack', 'umbrella', 
 41 |             'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 
 42 |             'snowboard','sports ball', 'kite', 'baseball bat', 
 43 |             'baseball glove', 'skateboard', 'surfboard', 
 44 |             'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 
 45 |             'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 
 46 |             'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
 47 |             'donut', 'cake', 'chair', 'couch', 'potted plant', 
 48 |             'bed', 'dining table', 'toilet', 'tv', 'laptop', 
 49 |             'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 
 50 |             'oven', 'toaster', 'sink', 'refrigerator', 'book', 
 51 |             'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 
 52 |             'toothbrush'
 53 |         ]
 54 | 
 55 |         self._cls2coco  = {ind + 1: coco_id for ind, coco_id in enumerate(self._coco_cls_ids)}
 56 |         self._coco2cls  = {coco_id: cls_id for cls_id, coco_id in self._cls2coco.items()}
 57 |         self._coco2name = {cls_id: cls_name for cls_id, cls_name in zip(self._coco_cls_ids, self._coco_cls_names)}
 58 |         self._name2coco = {cls_name: cls_id for cls_name, cls_id in self._coco2name.items()}
 59 | 
 60 |         if split is not None:
 61 |             coco_dir = os.path.join(sys_config.data_dir, "coco")
 62 | 
 63 |             self._split     = {
 64 |                 "trainval": "trainval2014",
 65 |                 "minival":  "minival2014",
 66 |                 "testdev":  "testdev2017"
 67 |             }[split]
 68 |             self._data_dir  = os.path.join(coco_dir, "images", self._split)
 69 |             self._anno_file = os.path.join(coco_dir, "annotations", "instances_{}.json".format(self._split))
 70 | 
 71 |             self._detections, self._eval_ids = self._load_coco_annos()
 72 |             self._image_ids = list(self._detections.keys())
 73 |             self._db_inds   = np.arange(len(self._image_ids))
 74 | 
 75 |     def _load_coco_annos(self):
 76 |         from pycocotools.coco import COCO
 77 | 
 78 |         coco = COCO(self._anno_file)
 79 |         self._coco = coco
 80 | 
 81 |         class_ids = coco.getCatIds()
 82 |         image_ids = coco.getImgIds()
 83 |         
 84 |         eval_ids   = {}
 85 |         detections = {}
 86 |         for image_id in image_ids:
 87 |             image = coco.loadImgs(image_id)[0]
 88 |             dets  = []
 89 |             
 90 |             eval_ids[image["file_name"]] = image_id
 91 |             for class_id in class_ids:
 92 |                 annotation_ids = coco.getAnnIds(imgIds=image["id"], catIds=class_id)
 93 |                 annotations    = coco.loadAnns(annotation_ids)
 94 |                 category       = self._coco2cls[class_id]
 95 |                 for annotation in annotations:
 96 |                     det     = annotation["bbox"] + [category]
 97 |                     det[2] += det[0]
 98 |                     det[3] += det[1]
 99 |                     dets.append(det)
100 | 
101 |             file_name = image["file_name"]
102 |             if len(dets) == 0:
103 |                 detections[file_name] = np.zeros((0, 5), dtype=np.float32)
104 |             else:
105 |                 detections[file_name] = np.array(dets, dtype=np.float32)
106 |         return detections, eval_ids
107 | 
108 |     def image_path(self, ind):
109 |         if self._data_dir is None:
110 |             raise ValueError("Data directory is not set")
111 | 
112 |         db_ind    = self._db_inds[ind]
113 |         file_name = self._image_ids[db_ind]
114 |         return os.path.join(self._data_dir, file_name)
115 | 
116 |     def detections(self, ind):
117 |         db_ind    = self._db_inds[ind]
118 |         file_name = self._image_ids[db_ind]
119 |         return self._detections[file_name].copy()
120 | 
121 |     def cls2name(self, cls):
122 |         coco = self._cls2coco[cls]
123 |         return self._coco2name[coco]
124 | 
125 |     def _to_float(self, x):
126 |         return float("{:.2f}".format(x))
127 | 
128 |     def convert_to_coco(self, all_bboxes):
129 |         detections = []
130 |         for image_id in all_bboxes:
131 |             coco_id = self._eval_ids[image_id]
132 |             for cls_ind in all_bboxes[image_id]:
133 |                 category_id = self._cls2coco[cls_ind]
134 |                 for bbox in all_bboxes[image_id][cls_ind]:
135 |                     bbox[2] -= bbox[0]
136 |                     bbox[3] -= bbox[1]
137 | 
138 |                     score = bbox[4]
139 |                     bbox  = list(map(self._to_float, bbox[0:4]))
140 | 
141 |                     detection = {
142 |                         "image_id": coco_id,
143 |                         "category_id": category_id,
144 |                         "bbox": bbox,
145 |                         "score": float("{:.2f}".format(score))
146 |                     }
147 | 
148 |                     detections.append(detection)
149 |         return detections
150 | 
151 |     def evaluate(self, result_json, cls_ids, image_ids):
152 |         from pycocotools.cocoeval import COCOeval
153 | 
154 |         if self._split == "testdev":
155 |             return None
156 | 
157 |         coco = self._coco
158 | 
159 |         eval_ids = [self._eval_ids[image_id] for image_id in image_ids]
160 |         cat_ids  = [self._cls2coco[cls_id] for cls_id in cls_ids]
161 | 
162 |         coco_dets = coco.loadRes(result_json)
163 |         coco_eval = COCOeval(coco, coco_dets, "bbox")
164 |         coco_eval.params.imgIds = eval_ids
165 |         coco_eval.params.catIds = cat_ids
166 |         coco_eval.evaluate()
167 |         coco_eval.accumulate()
168 |         coco_eval.summarize()
169 |         return coco_eval.stats[0], coco_eval.stats[12:]
170 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/dbs/detection.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .base import BASE
 4 | 
 5 | class DETECTION(BASE):
 6 |     def __init__(self, db_config):
 7 |         super(DETECTION, self).__init__()
 8 | 
 9 |         # Configs for training
10 |         self._configs["categories"]      = 80
11 |         self._configs["rand_scales"]     = [1]
12 |         self._configs["rand_scale_min"]  = 0.8
13 |         self._configs["rand_scale_max"]  = 1.4
14 |         self._configs["rand_scale_step"] = 0.2
15 | 
16 |         # Configs for both training and testing
17 |         self._configs["input_size"]      = [383, 383]
18 |         self._configs["output_sizes"]    = [[96, 96], [48, 48], [24, 24], [12, 12]]
19 | 
20 |         self._configs["score_threshold"] = 0.05
21 |         self._configs["nms_threshold"]   = 0.7
22 |         self._configs["max_per_set"]     = 40
23 |         self._configs["max_per_image"]   = 100
24 |         self._configs["top_k"]           = 20
25 |         self._configs["ae_threshold"]    = 1
26 |         self._configs["nms_kernel"]      = 3
27 |         self._configs["num_dets"]        = 1000
28 | 
29 |         self._configs["nms_algorithm"]   = "exp_soft_nms"
30 |         self._configs["weight_exp"]      = 8
31 |         self._configs["merge_bbox"]      = False
32 | 
33 |         self._configs["data_aug"]        = True
34 |         self._configs["lighting"]        = True
35 | 
36 |         self._configs["border"]          = 64
37 |         self._configs["gaussian_bump"]   = False
38 |         self._configs["gaussian_iou"]    = 0.7
39 |         self._configs["gaussian_radius"] = -1
40 |         self._configs["rand_crop"]       = False
41 |         self._configs["rand_color"]      = False
42 |         self._configs["rand_center"]     = True
43 | 
44 |         self._configs["init_sizes"]      = [192, 255]
45 |         self._configs["view_sizes"]      = []
46 | 
47 |         self._configs["min_scale"]       = 16
48 |         self._configs["max_scale"]       = 32
49 | 
50 |         self._configs["att_sizes"]       = [[16, 16], [32, 32], [64, 64]]
51 |         self._configs["att_ranges"]      = [[96, 256], [32, 96], [0, 32]]
52 |         self._configs["att_ratios"]      = [16, 8, 4]
53 |         self._configs["att_scales"]      = [1, 1.5, 2]
54 |         self._configs["att_thresholds"]  = [0.3, 0.3, 0.3, 0.3]
55 |         self._configs["att_nms_ks"]      = [3, 3, 3]
56 |         self._configs["att_max_crops"]   = 8
57 |         self._configs["ref_dets"]        = True
58 | 
59 |         # Configs for testing
60 |         self._configs["test_scales"]     = [1]
61 |         self._configs["test_flipped"]    = True
62 | 
63 |         self.update_config(db_config)
64 | 
65 |         if self._configs["rand_scales"] is None:
66 |             self._configs["rand_scales"] = np.arange(
67 |                 self._configs["rand_scale_min"], 
68 |                 self._configs["rand_scale_max"],
69 |                 self._configs["rand_scale_step"]
70 |             )
71 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/detectors.py:
--------------------------------------------------------------------------------
 1 | from .base import Base, load_cfg, load_nnet
 2 | from .paths import get_file_path
 3 | from .config import SystemConfig
 4 | from .dbs.coco import COCO
 5 | 
 6 | class CornerNet(Base):
 7 |     def __init__(self):
 8 |         from .test.cornernet import cornernet_inference
 9 |         from .models.CornerNet import model
10 | 
11 |         cfg_path   = get_file_path("..", "configs", "CornerNet.json")
12 |         model_path = get_file_path("..", "cache", "nnet", "CornerNet", "CornerNet_500000.pkl")
13 | 
14 |         cfg_sys, cfg_db = load_cfg(cfg_path)
15 |         sys_cfg = SystemConfig().update_config(cfg_sys)
16 |         coco    = COCO(cfg_db)
17 | 
18 |         cornernet = load_nnet(sys_cfg, model())
19 |         super(CornerNet, self).__init__(coco, cornernet, cornernet_inference, model=model_path)
20 | 
21 | class CornerNet_Squeeze(Base):
22 |     def __init__(self):
23 |         from .test.cornernet import cornernet_inference
24 |         from .models.CornerNet_Squeeze import model
25 | 
26 |         cfg_path   = get_file_path("..", "configs", "CornerNet_Squeeze.json")
27 |         model_path = get_file_path("..", "cache", "nnet", "CornerNet_Squeeze", "CornerNet_Squeeze_500000.pkl")
28 | 
29 |         cfg_sys, cfg_db = load_cfg(cfg_path)
30 |         sys_cfg = SystemConfig().update_config(cfg_sys)
31 |         coco    = COCO(cfg_db)
32 | 
33 |         cornernet = load_nnet(sys_cfg, model())
34 |         super(CornerNet_Squeeze, self).__init__(coco, cornernet, cornernet_inference, model=model_path)
35 | 
36 | class CornerNet_Saccade(Base):
37 |     def __init__(self):
38 |         from .test.cornernet_saccade import cornernet_saccade_inference
39 |         from .models.CornerNet_Saccade import model
40 | 
41 |         cfg_path   = get_file_path("..", "configs", "CornerNet_Saccade.json")
42 |         model_path = get_file_path("..", "cache", "nnet", "CornerNet_Saccade", "CornerNet_Saccade_500000.pkl")
43 | 
44 |         cfg_sys, cfg_db = load_cfg(cfg_path)
45 |         sys_cfg = SystemConfig().update_config(cfg_sys)
46 |         coco    = COCO(cfg_db)
47 | 
48 |         cornernet = load_nnet(sys_cfg, model())
49 |         super(CornerNet_Saccade, self).__init__(coco, cornernet, cornernet_saccade_inference, model=model_path)
50 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/external/.gitignore:
--------------------------------------------------------------------------------
1 | bbox.c
2 | bbox.cpython-35m-x86_64-linux-gnu.so
3 | bbox.cpython-36m-x86_64-linux-gnu.so
4 | 
5 | nms.c
6 | nms.cpython-35m-x86_64-linux-gnu.so
7 | nms.cpython-36m-x86_64-linux-gnu.so
8 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/external/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/corner_net_lite/core/external/__init__.py


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/external/bbox.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/corner_net_lite/core/external/bbox.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/external/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/external/nms.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/corner_net_lite/core/external/nms.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/external/setup.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | from distutils.core import setup
 3 | from distutils.extension import Extension
 4 | from Cython.Build import cythonize
 5 | 
 6 | extensions = [
 7 |     Extension(
 8 |         "bbox", 
 9 |         ["bbox.pyx"],
10 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
11 |     ),
12 |     Extension(
13 |         "nms", 
14 |         ["nms.pyx"],
15 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
16 |     )
17 | ]
18 | 
19 | setup(
20 |     name="coco",
21 |     ext_modules=cythonize(extensions),
22 |     include_dirs=[numpy.get_include()]
23 | )
24 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/CornerNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .py_utils import TopPool, BottomPool, LeftPool, RightPool
 5 | 
 6 | from .py_utils.utils import convolution, residual, corner_pool
 7 | from .py_utils.losses import CornerNet_Loss
 8 | from .py_utils.modules import hg_module, hg, hg_net
 9 | 
10 | def make_pool_layer(dim):
11 |     return nn.Sequential()
12 | 
13 | def make_hg_layer(inp_dim, out_dim, modules):
14 |     layers  = [residual(inp_dim, out_dim, stride=2)]
15 |     layers += [residual(out_dim, out_dim) for _ in range(1, modules)]
16 |     return nn.Sequential(*layers)
17 | 
18 | class model(hg_net):
19 |     def _pred_mod(self, dim):
20 |         return nn.Sequential(
21 |             convolution(3, 256, 256, with_bn=False),
22 |             nn.Conv2d(256, dim, (1, 1))
23 |         )
24 | 
25 |     def _merge_mod(self):
26 |         return nn.Sequential(
27 |             nn.Conv2d(256, 256, (1, 1), bias=False),
28 |             nn.BatchNorm2d(256)
29 |         )
30 | 
31 |     def __init__(self):
32 |         stacks  = 2
33 |         pre     = nn.Sequential(
34 |             convolution(7, 3, 128, stride=2),
35 |             residual(128, 256, stride=2)
36 |         )
37 |         hg_mods = nn.ModuleList([
38 |             hg_module(
39 |                 5, [256, 256, 384, 384, 384, 512], [2, 2, 2, 2, 2, 4],
40 |                 make_pool_layer=make_pool_layer,
41 |                 make_hg_layer=make_hg_layer
42 |             ) for _ in range(stacks)
43 |         ])
44 |         cnvs    = nn.ModuleList([convolution(3, 256, 256) for _ in range(stacks)])
45 |         inters  = nn.ModuleList([residual(256, 256) for _ in range(stacks - 1)])
46 |         cnvs_   = nn.ModuleList([self._merge_mod() for _ in range(stacks - 1)])
47 |         inters_ = nn.ModuleList([self._merge_mod() for _ in range(stacks - 1)])
48 | 
49 |         hgs = hg(pre, hg_mods, cnvs, inters, cnvs_, inters_) 
50 | 
51 |         tl_modules = nn.ModuleList([corner_pool(256, TopPool, LeftPool) for _ in range(stacks)])
52 |         br_modules = nn.ModuleList([corner_pool(256, BottomPool, RightPool) for _ in range(stacks)])
53 | 
54 |         tl_heats = nn.ModuleList([self._pred_mod(80) for _ in range(stacks)])
55 |         br_heats = nn.ModuleList([self._pred_mod(80) for _ in range(stacks)])
56 |         for tl_heat, br_heat in zip(tl_heats, br_heats):
57 |             torch.nn.init.constant_(tl_heat[-1].bias, -2.19)
58 |             torch.nn.init.constant_(br_heat[-1].bias, -2.19)
59 | 
60 |         tl_tags = nn.ModuleList([self._pred_mod(1) for _ in range(stacks)])
61 |         br_tags = nn.ModuleList([self._pred_mod(1) for _ in range(stacks)])
62 | 
63 |         tl_offs = nn.ModuleList([self._pred_mod(2) for _ in range(stacks)])
64 |         br_offs = nn.ModuleList([self._pred_mod(2) for _ in range(stacks)])
65 | 
66 |         super(model, self).__init__(
67 |             hgs, tl_modules, br_modules, tl_heats, br_heats, 
68 |             tl_tags, br_tags, tl_offs, br_offs
69 |         )
70 | 
71 |         self.loss = CornerNet_Loss(pull_weight=1e-1, push_weight=1e-1)
72 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/CornerNet_Saccade.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .py_utils import TopPool, BottomPool, LeftPool, RightPool
 5 | 
 6 | from .py_utils.utils import convolution, residual, corner_pool
 7 | from .py_utils.losses import CornerNet_Saccade_Loss
 8 | from .py_utils.modules import saccade_net, saccade_module, saccade
 9 | 
10 | def make_pool_layer(dim):
11 |     return nn.Sequential()
12 | 
13 | def make_hg_layer(inp_dim, out_dim, modules):
14 |     layers  = [residual(inp_dim, out_dim, stride=2)]
15 |     layers += [residual(out_dim, out_dim) for _ in range(1, modules)]
16 |     return nn.Sequential(*layers)
17 | 
18 | class model(saccade_net):
19 |     def _pred_mod(self, dim):
20 |         return nn.Sequential(
21 |             convolution(3, 256, 256, with_bn=False),
22 |             nn.Conv2d(256, dim, (1, 1))
23 |         )
24 | 
25 |     def _merge_mod(self):
26 |         return nn.Sequential(
27 |             nn.Conv2d(256, 256, (1, 1), bias=False),
28 |             nn.BatchNorm2d(256)
29 |         )
30 | 
31 |     def __init__(self):
32 |         stacks  = 3
33 |         pre     = nn.Sequential(
34 |             convolution(7, 3, 128, stride=2),
35 |             residual(128, 256, stride=2)
36 |         )
37 |         hg_mods = nn.ModuleList([
38 |             saccade_module(
39 |                 3, [256, 384, 384, 512], [1, 1, 1, 1],
40 |                 make_pool_layer=make_pool_layer,
41 |                 make_hg_layer=make_hg_layer
42 |             ) for _ in range(stacks)
43 |         ])
44 |         cnvs    = nn.ModuleList([convolution(3, 256, 256) for _ in range(stacks)])
45 |         inters  = nn.ModuleList([residual(256, 256) for _ in range(stacks - 1)])
46 |         cnvs_   = nn.ModuleList([self._merge_mod() for _ in range(stacks - 1)])
47 |         inters_ = nn.ModuleList([self._merge_mod() for _ in range(stacks - 1)])
48 | 
49 |         att_mods = nn.ModuleList([
50 |             nn.ModuleList([
51 |                 nn.Sequential(
52 |                     convolution(3, 384, 256, with_bn=False),
53 |                     nn.Conv2d(256, 1, (1, 1))
54 |                 ),
55 |                 nn.Sequential(
56 |                     convolution(3, 384, 256, with_bn=False),
57 |                     nn.Conv2d(256, 1, (1, 1))
58 |                 ),
59 |                 nn.Sequential(
60 |                     convolution(3, 256, 256, with_bn=False),
61 |                     nn.Conv2d(256, 1, (1, 1))
62 |                 )
63 |             ]) for _ in range(stacks)
64 |         ])
65 |         for att_mod in att_mods:
66 |             for att in att_mod:
67 |                 torch.nn.init.constant_(att[-1].bias, -2.19)
68 | 
69 |         hgs = saccade(pre, hg_mods, cnvs, inters, cnvs_, inters_) 
70 | 
71 |         tl_modules = nn.ModuleList([corner_pool(256, TopPool, LeftPool) for _ in range(stacks)])
72 |         br_modules = nn.ModuleList([corner_pool(256, BottomPool, RightPool) for _ in range(stacks)])
73 | 
74 |         tl_heats = nn.ModuleList([self._pred_mod(80) for _ in range(stacks)])
75 |         br_heats = nn.ModuleList([self._pred_mod(80) for _ in range(stacks)])
76 |         for tl_heat, br_heat in zip(tl_heats, br_heats):
77 |             torch.nn.init.constant_(tl_heat[-1].bias, -2.19)
78 |             torch.nn.init.constant_(br_heat[-1].bias, -2.19)
79 | 
80 |         tl_tags = nn.ModuleList([self._pred_mod(1) for _ in range(stacks)])
81 |         br_tags = nn.ModuleList([self._pred_mod(1) for _ in range(stacks)])
82 | 
83 |         tl_offs = nn.ModuleList([self._pred_mod(2) for _ in range(stacks)])
84 |         br_offs = nn.ModuleList([self._pred_mod(2) for _ in range(stacks)])
85 | 
86 |         super(model, self).__init__(
87 |             hgs, tl_modules, br_modules, tl_heats, br_heats, 
88 |             tl_tags, br_tags, tl_offs, br_offs, att_mods
89 |         )
90 | 
91 |         self.loss = CornerNet_Saccade_Loss(pull_weight=1e-1, push_weight=1e-1)
92 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/CornerNet_Squeeze.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .py_utils import TopPool, BottomPool, LeftPool, RightPool
  5 | 
  6 | from .py_utils.utils import convolution, corner_pool, residual
  7 | from .py_utils.losses import CornerNet_Loss
  8 | from .py_utils.modules import hg_module, hg, hg_net
  9 | 
 10 | class fire_module(nn.Module):
 11 |     def __init__(self, inp_dim, out_dim, sr=2, stride=1):
 12 |         super(fire_module, self).__init__()
 13 |         self.conv1    = nn.Conv2d(inp_dim, out_dim // sr, kernel_size=1, stride=1, bias=False)
 14 |         self.bn1      = nn.BatchNorm2d(out_dim // sr)
 15 |         self.conv_1x1 = nn.Conv2d(out_dim // sr, out_dim // 2, kernel_size=1, stride=stride, bias=False)
 16 |         self.conv_3x3 = nn.Conv2d(out_dim // sr, out_dim // 2, kernel_size=3, padding=1, 
 17 |                                   stride=stride, groups=out_dim // sr, bias=False)
 18 |         self.bn2      = nn.BatchNorm2d(out_dim)
 19 |         self.skip     = (stride == 1 and inp_dim == out_dim)
 20 |         self.relu     = nn.ReLU(inplace=True)
 21 | 
 22 |     def forward(self, x):
 23 |         conv1 = self.conv1(x)
 24 |         bn1   = self.bn1(conv1)
 25 |         conv2 = torch.cat((self.conv_1x1(bn1), self.conv_3x3(bn1)), 1)
 26 |         bn2   = self.bn2(conv2)
 27 |         if self.skip:
 28 |             return self.relu(bn2 + x)
 29 |         else:
 30 |             return self.relu(bn2)
 31 | 
 32 | def make_pool_layer(dim):
 33 |     return nn.Sequential()
 34 | 
 35 | def make_unpool_layer(dim):
 36 |     return nn.ConvTranspose2d(dim, dim, kernel_size=4, stride=2, padding=1)
 37 | 
 38 | def make_layer(inp_dim, out_dim, modules):
 39 |     layers  = [fire_module(inp_dim, out_dim)]
 40 |     layers += [fire_module(out_dim, out_dim) for _ in range(1, modules)]
 41 |     return nn.Sequential(*layers)
 42 | 
 43 | def make_layer_revr(inp_dim, out_dim, modules):
 44 |     layers  = [fire_module(inp_dim, inp_dim) for _ in range(modules - 1)]
 45 |     layers += [fire_module(inp_dim, out_dim)]
 46 |     return nn.Sequential(*layers)
 47 | 
 48 | def make_hg_layer(inp_dim, out_dim, modules):
 49 |     layers  = [fire_module(inp_dim, out_dim, stride=2)]
 50 |     layers += [fire_module(out_dim, out_dim) for _ in range(1, modules)]
 51 |     return nn.Sequential(*layers)
 52 | 
 53 | class model(hg_net):
 54 |     def _pred_mod(self, dim):
 55 |         return nn.Sequential(
 56 |             convolution(1, 256, 256, with_bn=False),
 57 |             nn.Conv2d(256, dim, (1, 1))
 58 |         )
 59 | 
 60 |     def _merge_mod(self):
 61 |         return nn.Sequential(
 62 |             nn.Conv2d(256, 256, (1, 1), bias=False),
 63 |             nn.BatchNorm2d(256)
 64 |         )
 65 | 
 66 |     def __init__(self):
 67 |         stacks  = 2
 68 |         pre     = nn.Sequential(
 69 |             convolution(7, 3, 128, stride=2),
 70 |             residual(128, 256, stride=2),
 71 |             residual(256, 256, stride=2)
 72 |         )
 73 |         hg_mods = nn.ModuleList([
 74 |             hg_module(
 75 |                 4, [256, 256, 384, 384, 512], [2, 2, 2, 2, 4],
 76 |                 make_pool_layer=make_pool_layer,
 77 |                 make_unpool_layer=make_unpool_layer,
 78 |                 make_up_layer=make_layer,
 79 |                 make_low_layer=make_layer,
 80 |                 make_hg_layer_revr=make_layer_revr,
 81 |                 make_hg_layer=make_hg_layer
 82 |             ) for _ in range(stacks)
 83 |         ])
 84 |         cnvs    = nn.ModuleList([convolution(3, 256, 256) for _ in range(stacks)])
 85 |         inters  = nn.ModuleList([residual(256, 256) for _ in range(stacks - 1)])
 86 |         cnvs_   = nn.ModuleList([self._merge_mod() for _ in range(stacks - 1)])
 87 |         inters_ = nn.ModuleList([self._merge_mod() for _ in range(stacks - 1)])
 88 | 
 89 |         hgs = hg(pre, hg_mods, cnvs, inters, cnvs_, inters_) 
 90 | 
 91 |         tl_modules = nn.ModuleList([corner_pool(256, TopPool, LeftPool) for _ in range(stacks)])
 92 |         br_modules = nn.ModuleList([corner_pool(256, BottomPool, RightPool) for _ in range(stacks)])
 93 | 
 94 |         tl_heats = nn.ModuleList([self._pred_mod(80) for _ in range(stacks)])
 95 |         br_heats = nn.ModuleList([self._pred_mod(80) for _ in range(stacks)])
 96 |         for tl_heat, br_heat in zip(tl_heats, br_heats):
 97 |             torch.nn.init.constant_(tl_heat[-1].bias, -2.19)
 98 |             torch.nn.init.constant_(br_heat[-1].bias, -2.19)
 99 | 
100 |         tl_tags = nn.ModuleList([self._pred_mod(1) for _ in range(stacks)])
101 |         br_tags = nn.ModuleList([self._pred_mod(1) for _ in range(stacks)])
102 | 
103 |         tl_offs = nn.ModuleList([self._pred_mod(2) for _ in range(stacks)])
104 |         br_offs = nn.ModuleList([self._pred_mod(2) for _ in range(stacks)])
105 | 
106 |         super(model, self).__init__(
107 |             hgs, tl_modules, br_modules, tl_heats, br_heats, 
108 |             tl_tags, br_tags, tl_offs, br_offs
109 |         )
110 | 
111 |         self.loss = CornerNet_Loss(pull_weight=1e-1, push_weight=1e-1)
112 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/corner_net_lite/core/models/__init__.py


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/py_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from ._cpools import TopPool, BottomPool, LeftPool, RightPool
2 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/py_utils/_cpools/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | cpools.egg-info/
3 | dist/
4 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/py_utils/_cpools/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | 
 6 | import top_pool, bottom_pool, left_pool, right_pool
 7 | 
 8 | class TopPoolFunction(Function):
 9 |     @staticmethod
10 |     def forward(ctx, input):
11 |         output = top_pool.forward(input)[0]
12 |         ctx.save_for_backward(input)
13 |         return output
14 | 
15 |     @staticmethod
16 |     def backward(ctx, grad_output):
17 |         input  = ctx.saved_variables[0]
18 |         output = top_pool.backward(input, grad_output)[0]
19 |         return output
20 | 
21 | class BottomPoolFunction(Function):
22 |     @staticmethod
23 |     def forward(ctx, input):
24 |         output = bottom_pool.forward(input)[0]
25 |         ctx.save_for_backward(input)
26 |         return output
27 | 
28 |     @staticmethod
29 |     def backward(ctx, grad_output):
30 |         input  = ctx.saved_variables[0]
31 |         output = bottom_pool.backward(input, grad_output)[0]
32 |         return output
33 | 
34 | class LeftPoolFunction(Function):
35 |     @staticmethod
36 |     def forward(ctx, input):
37 |         output = left_pool.forward(input)[0]
38 |         ctx.save_for_backward(input)
39 |         return output
40 | 
41 |     @staticmethod
42 |     def backward(ctx, grad_output):
43 |         input  = ctx.saved_variables[0]
44 |         output = left_pool.backward(input, grad_output)[0]
45 |         return output
46 | 
47 | class RightPoolFunction(Function):
48 |     @staticmethod
49 |     def forward(ctx, input):
50 |         output = right_pool.forward(input)[0]
51 |         ctx.save_for_backward(input)
52 |         return output
53 | 
54 |     @staticmethod
55 |     def backward(ctx, grad_output):
56 |         input  = ctx.saved_variables[0]
57 |         output = right_pool.backward(input, grad_output)[0]
58 |         return output
59 | 
60 | class TopPool(nn.Module):
61 |     def forward(self, x):
62 |         return TopPoolFunction.apply(x)
63 | 
64 | class BottomPool(nn.Module):
65 |     def forward(self, x):
66 |         return BottomPoolFunction.apply(x)
67 | 
68 | class LeftPool(nn.Module):
69 |     def forward(self, x):
70 |         return LeftPoolFunction.apply(x)
71 | 
72 | class RightPool(nn.Module):
73 |     def forward(self, x):
74 |         return RightPoolFunction.apply(x)
75 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/py_utils/_cpools/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CppExtension
 3 | 
 4 | setup(
 5 |     name="cpools",
 6 |     ext_modules=[
 7 |         CppExtension("top_pool", ["src/top_pool.cpp"]),
 8 |         CppExtension("bottom_pool", ["src/bottom_pool.cpp"]),
 9 |         CppExtension("left_pool", ["src/left_pool.cpp"]),
10 |         CppExtension("right_pool", ["src/right_pool.cpp"])
11 |     ],
12 |     cmdclass={
13 |         "build_ext": BuildExtension
14 |     }
15 | )
16 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/py_utils/_cpools/src/bottom_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get height
12 |     int64_t height = input.size(2);
13 | 
14 |     output.copy_(input);
15 | 
16 |     for (int64_t ind = 1; ind < height; ind <<= 1) {
17 |         at::Tensor max_temp = at::slice(output, 2, ind, height);
18 |         at::Tensor cur_temp = at::slice(output, 2, ind, height);
19 |         at::Tensor next_temp = at::slice(output, 2, 0, height-ind);
20 |         at::max_out(max_temp, cur_temp, next_temp);
21 |     }
22 | 
23 |     return { 
24 |         output
25 |     };
26 | }
27 | 
28 | std::vector<at::Tensor> pool_backward(
29 |     at::Tensor input,
30 |     at::Tensor grad_output
31 | ) {
32 |     auto output = at::zeros_like(input);
33 | 
34 |     int32_t batch   = input.size(0);
35 |     int32_t channel = input.size(1);
36 |     int32_t height  = input.size(2);
37 |     int32_t width   = input.size(3);
38 | 
39 |     auto max_val = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat));
40 |     auto max_ind = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kLong));
41 | 
42 |     auto input_temp = input.select(2, 0);
43 |     max_val.copy_(input_temp);
44 | 
45 |     max_ind.fill_(0);
46 | 
47 |     auto output_temp      = output.select(2, 0);
48 |     auto grad_output_temp = grad_output.select(2, 0);
49 |     output_temp.copy_(grad_output_temp);
50 | 
51 |     auto un_max_ind = max_ind.unsqueeze(2);
52 |     auto gt_mask    = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kByte));
53 |     auto max_temp   = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat));
54 |     for (int32_t ind = 0; ind < height - 1; ++ind) {
55 |         input_temp = input.select(2, ind + 1);
56 |         at::gt_out(gt_mask, input_temp, max_val);
57 | 
58 |         at::masked_select_out(max_temp, input_temp, gt_mask);
59 |         max_val.masked_scatter_(gt_mask, max_temp);
60 |         max_ind.masked_fill_(gt_mask, ind + 1);
61 | 
62 |         grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2);
63 |         output.scatter_add_(2, un_max_ind, grad_output_temp);
64 |     }
65 | 
66 |     return {
67 |         output
68 |     };
69 | }
70 | 
71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
72 |     m.def(
73 |         "forward", &pool_forward, "Bottom Pool Forward",
74 |         py::call_guard<py::gil_scoped_release>()
75 |     );
76 |     m.def(
77 |         "backward", &pool_backward, "Bottom Pool Backward",
78 |         py::call_guard<py::gil_scoped_release>()
79 |     );
80 | }
81 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/py_utils/_cpools/src/left_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get width
12 |     int64_t width = input.size(3);
13 | 
14 |     output.copy_(input);
15 | 
16 |     for (int64_t ind = 1; ind < width; ind <<= 1) {
17 |         at::Tensor max_temp = at::slice(output, 3, 0, width-ind); 
18 |         at::Tensor cur_temp = at::slice(output, 3, 0, width-ind);        
19 |         at::Tensor next_temp = at::slice(output, 3, ind, width);
20 |         at::max_out(max_temp, cur_temp, next_temp);
21 |     }
22 | 
23 |     return { 
24 |         output
25 |     };
26 | }
27 | 
28 | std::vector<at::Tensor> pool_backward(
29 |     at::Tensor input,
30 |     at::Tensor grad_output
31 | ) {
32 |     auto output = at::zeros_like(input);
33 | 
34 |     int32_t batch   = input.size(0);
35 |     int32_t channel = input.size(1);
36 |     int32_t height  = input.size(2);
37 |     int32_t width   = input.size(3);
38 | 
39 |     auto max_val = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat));
40 |     auto max_ind = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kLong));
41 | 
42 |     auto input_temp = input.select(3, width - 1);
43 |     max_val.copy_(input_temp);
44 | 
45 |     max_ind.fill_(width - 1);
46 | 
47 |     auto output_temp      = output.select(3, width - 1);
48 |     auto grad_output_temp = grad_output.select(3, width - 1);
49 |     output_temp.copy_(grad_output_temp);
50 | 
51 |     auto un_max_ind = max_ind.unsqueeze(3);
52 |     auto gt_mask    = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kByte));
53 |     auto max_temp   = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat));
54 |     for (int32_t ind = 1; ind < width; ++ind) {
55 |         input_temp = input.select(3, width - ind - 1);
56 |         at::gt_out(gt_mask, input_temp, max_val);
57 | 
58 |         at::masked_select_out(max_temp, input_temp, gt_mask);
59 |         max_val.masked_scatter_(gt_mask, max_temp);
60 |         max_ind.masked_fill_(gt_mask, width - ind - 1);
61 | 
62 |         grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3);
63 |         output.scatter_add_(3, un_max_ind, grad_output_temp);
64 |     }
65 | 
66 |     return {
67 |         output
68 |     };
69 | }
70 | 
71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
72 |     m.def(
73 |         "forward", &pool_forward, "Left Pool Forward", 
74 |         py::call_guard<py::gil_scoped_release>()
75 |     );
76 |     m.def(
77 |         "backward", &pool_backward, "Left Pool Backward", 
78 |         py::call_guard<py::gil_scoped_release>()
79 |     );
80 | }
81 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/py_utils/_cpools/src/right_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get width
12 |     int64_t width = input.size(3);
13 | 
14 |     output.copy_(input);
15 | 
16 |     for (int64_t ind = 1; ind < width; ind <<= 1) {
17 |         at::Tensor max_temp = at::slice(output, 3, ind, width); 
18 |         at::Tensor cur_temp = at::slice(output, 3, ind, width);        
19 |         at::Tensor next_temp = at::slice(output, 3, 0, width-ind);
20 |         at::max_out(max_temp, cur_temp, next_temp);
21 |     }
22 | 
23 |     return { 
24 |         output
25 |     };
26 | }
27 | 
28 | std::vector<at::Tensor> pool_backward(
29 |     at::Tensor input,
30 |     at::Tensor grad_output
31 | ) {
32 |     at::Tensor output = at::zeros_like(input);
33 | 
34 |     int32_t batch   = input.size(0);
35 |     int32_t channel = input.size(1);
36 |     int32_t height  = input.size(2);
37 |     int32_t width   = input.size(3);
38 | 
39 |     auto max_val = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat));
40 |     auto max_ind = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kLong));
41 | 
42 |     auto input_temp = input.select(3, 0);
43 |     max_val.copy_(input_temp);
44 | 
45 |     max_ind.fill_(0);
46 | 
47 |     auto output_temp      = output.select(3, 0);
48 |     auto grad_output_temp = grad_output.select(3, 0);
49 |     output_temp.copy_(grad_output_temp);
50 | 
51 |     auto un_max_ind = max_ind.unsqueeze(3);
52 |     auto gt_mask    = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kByte));
53 |     auto max_temp   = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat));
54 |     for (int32_t ind = 0; ind < width - 1; ++ind) {
55 |         input_temp = input.select(3, ind + 1);
56 |         at::gt_out(gt_mask, input_temp, max_val);
57 | 
58 |         at::masked_select_out(max_temp, input_temp, gt_mask);
59 |         max_val.masked_scatter_(gt_mask, max_temp);
60 |         max_ind.masked_fill_(gt_mask, ind + 1);
61 | 
62 |         grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3);
63 |         output.scatter_add_(3, un_max_ind, grad_output_temp);
64 |     }
65 | 
66 |     return {
67 |         output
68 |     };
69 | }
70 | 
71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
72 |     m.def(
73 |         "forward", &pool_forward, "Right Pool Forward",
74 |         py::call_guard<py::gil_scoped_release>()
75 |     );
76 |     m.def(
77 |         "backward", &pool_backward, "Right Pool Backward",
78 |         py::call_guard<py::gil_scoped_release>()     
79 |     );
80 | }
81 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/py_utils/_cpools/src/top_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> top_pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get height
12 |     int64_t height = input.size(2);
13 | 
14 |     output.copy_(input);
15 | 
16 |     for (int64_t ind = 1; ind < height; ind <<= 1) {
17 |         at::Tensor max_temp = at::slice(output, 2, 0, height-ind);
18 |         at::Tensor cur_temp = at::slice(output, 2, 0, height-ind);
19 |         at::Tensor next_temp = at::slice(output, 2, ind, height);
20 |         at::max_out(max_temp, cur_temp, next_temp);
21 |     }
22 | 
23 |     return { 
24 |         output
25 |     };
26 | }
27 | 
28 | std::vector<at::Tensor> top_pool_backward(
29 |     at::Tensor input,
30 |     at::Tensor grad_output
31 | ) {
32 |     auto output = at::zeros_like(input);
33 | 
34 |     int32_t batch   = input.size(0);
35 |     int32_t channel = input.size(1);
36 |     int32_t height  = input.size(2);
37 |     int32_t width   = input.size(3);
38 | 
39 |     auto max_val = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat));
40 |     auto max_ind = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kLong));
41 | 
42 |     auto input_temp = input.select(2, height - 1);
43 |     max_val.copy_(input_temp);
44 | 
45 |     max_ind.fill_(height - 1);
46 | 
47 |     auto output_temp      = output.select(2, height - 1);
48 |     auto grad_output_temp = grad_output.select(2, height - 1);
49 |     output_temp.copy_(grad_output_temp);
50 | 
51 |     auto un_max_ind = max_ind.unsqueeze(2);
52 |     auto gt_mask    = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kByte));
53 |     auto max_temp   = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat));
54 |     for (int32_t ind = 1; ind < height; ++ind) {
55 |         input_temp = input.select(2, height - ind - 1);
56 |         at::gt_out(gt_mask, input_temp, max_val);
57 | 
58 |         at::masked_select_out(max_temp, input_temp, gt_mask);
59 |         max_val.masked_scatter_(gt_mask, max_temp);
60 |         max_ind.masked_fill_(gt_mask, height - ind - 1);
61 | 
62 |         grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2);
63 |         output.scatter_add_(2, un_max_ind, grad_output_temp);
64 |     }
65 | 
66 |     return {
67 |         output
68 |     };
69 | }
70 | 
71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
72 |     m.def(
73 |         "forward", &top_pool_forward, "Top Pool Forward",
74 |         py::call_guard<py::gil_scoped_release>()
75 |     );
76 |     m.def(
77 |         "backward", &top_pool_backward, "Top Pool Backward",
78 |         py::call_guard<py::gil_scoped_release>()
79 |     );
80 | }
81 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/py_utils/data_parallel.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn.modules import Module
  3 | from torch.nn.parallel.scatter_gather import gather
  4 | from torch.nn.parallel.replicate import replicate
  5 | from torch.nn.parallel.parallel_apply import parallel_apply
  6 | 
  7 | from .scatter_gather import scatter_kwargs
  8 | 
  9 | class DataParallel(Module):
 10 |     r"""Implements data parallelism at the module level.
 11 | 
 12 |     This container parallelizes the application of the given module by
 13 |     splitting the input across the specified devices by chunking in the batch
 14 |     dimension. In the forward pass, the module is replicated on each device,
 15 |     and each replica handles a portion of the input. During the backwards
 16 |     pass, gradients from each replica are summed into the original module.
 17 | 
 18 |     The batch size should be larger than the number of GPUs used. It should
 19 |     also be an integer multiple of the number of GPUs so that each chunk is the
 20 |     same size (so that each GPU processes the same number of samples).
 21 | 
 22 |     See also: :ref:`cuda-nn-dataparallel-instead`
 23 | 
 24 |     Arbitrary positional and keyword inputs are allowed to be passed into
 25 |     DataParallel EXCEPT Tensors. All variables will be scattered on dim
 26 |     specified (default 0). Primitive types will be broadcasted, but all
 27 |     other types will be a shallow copy and can be corrupted if written to in
 28 |     the model's forward pass.
 29 | 
 30 |     Args:
 31 |         module: module to be parallelized
 32 |         device_ids: CUDA devices (default: all devices)
 33 |         output_device: device location of output (default: device_ids[0])
 34 | 
 35 |     Example::
 36 | 
 37 |         >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
 38 |         >>> output = net(input_var)
 39 |     """
 40 | 
 41 |     # TODO: update notes/cuda.rst when this class handles 8+ GPUs well
 42 | 
 43 |     def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
 44 |         super(DataParallel, self).__init__()
 45 | 
 46 |         if not torch.cuda.is_available():
 47 |             self.module = module
 48 |             self.device_ids = []
 49 |             return
 50 | 
 51 |         if device_ids is None:
 52 |             device_ids = list(range(torch.cuda.device_count()))
 53 |         if output_device is None:
 54 |             output_device = device_ids[0]
 55 |         self.dim = dim
 56 |         self.module = module
 57 |         self.device_ids = device_ids
 58 |         self.chunk_sizes = chunk_sizes
 59 |         self.output_device = output_device
 60 |         if len(self.device_ids) == 1:
 61 |             self.module.cuda(device_ids[0])
 62 | 
 63 |     def forward(self, *inputs, **kwargs):
 64 |         if not self.device_ids:
 65 |             return self.module(*inputs, **kwargs)
 66 |         inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
 67 |         if len(self.device_ids) == 1:
 68 |             return self.module(*inputs[0], **kwargs[0])
 69 |         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
 70 |         outputs = self.parallel_apply(replicas, inputs, kwargs)
 71 |         return self.gather(outputs, self.output_device)
 72 | 
 73 |     def replicate(self, module, device_ids):
 74 |         return replicate(module, device_ids)
 75 | 
 76 |     def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
 77 |         return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
 78 | 
 79 |     def parallel_apply(self, replicas, inputs, kwargs):
 80 |         return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
 81 | 
 82 |     def gather(self, outputs, output_device):
 83 |         return gather(outputs, output_device, dim=self.dim)
 84 | 
 85 | 
 86 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
 87 |     r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
 88 | 
 89 |     This is the functional version of the DataParallel module.
 90 | 
 91 |     Args:
 92 |         module: the module to evaluate in parallel
 93 |         inputs: inputs to the module
 94 |         device_ids: GPU ids on which to replicate module
 95 |         output_device: GPU location of the output  Use -1 to indicate the CPU.
 96 |             (default: device_ids[0])
 97 |     Returns:
 98 |         a Variable containing the result of module(input) located on
 99 |         output_device
100 |     """
101 |     if not isinstance(inputs, tuple):
102 |         inputs = (inputs,)
103 | 
104 |     if device_ids is None:
105 |         device_ids = list(range(torch.cuda.device_count()))
106 | 
107 |     if output_device is None:
108 |         output_device = device_ids[0]
109 | 
110 |     inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
111 |     if len(device_ids) == 1:
112 |         return module(*inputs[0], **module_kwargs[0])
113 |     used_device_ids = device_ids[:len(inputs)]
114 |     replicas = replicate(module, used_device_ids)
115 |     outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
116 |     return gather(outputs, output_device, dim)
117 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/py_utils/losses.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .utils import _tranpose_and_gather_feat
  5 | 
  6 | def _sigmoid(x):
  7 |     return torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
  8 | 
  9 | def _ae_loss(tag0, tag1, mask):
 10 |     num  = mask.sum(dim=1, keepdim=True).float()
 11 |     tag0 = tag0.squeeze()
 12 |     tag1 = tag1.squeeze()
 13 | 
 14 |     tag_mean = (tag0 + tag1) / 2
 15 | 
 16 |     tag0 = torch.pow(tag0 - tag_mean, 2) / (num + 1e-4)
 17 |     tag0 = tag0[mask].sum()
 18 |     tag1 = torch.pow(tag1 - tag_mean, 2) / (num + 1e-4)
 19 |     tag1 = tag1[mask].sum()
 20 |     pull = tag0 + tag1
 21 | 
 22 |     mask = mask.unsqueeze(1) + mask.unsqueeze(2)
 23 |     mask = mask.eq(2)
 24 |     num  = num.unsqueeze(2)
 25 |     num2 = (num - 1) * num
 26 |     dist = tag_mean.unsqueeze(1) - tag_mean.unsqueeze(2)
 27 |     dist = 1 - torch.abs(dist)
 28 |     dist = nn.functional.relu(dist, inplace=True)
 29 |     dist = dist - 1 / (num + 1e-4)
 30 |     dist = dist / (num2 + 1e-4)
 31 |     dist = dist[mask]
 32 |     push = dist.sum()
 33 |     return pull, push
 34 | 
 35 | def _off_loss(off, gt_off, mask):
 36 |     num  = mask.float().sum()
 37 |     mask = mask.unsqueeze(2).expand_as(gt_off)
 38 | 
 39 |     off    = off[mask]
 40 |     gt_off = gt_off[mask]
 41 |     
 42 |     off_loss = nn.functional.smooth_l1_loss(off, gt_off, reduction="sum")
 43 |     off_loss = off_loss / (num + 1e-4)
 44 |     return off_loss
 45 | 
 46 | def _focal_loss_mask(preds, gt, mask):
 47 |     pos_inds = gt.eq(1)
 48 |     neg_inds = gt.lt(1)
 49 | 
 50 |     neg_weights = torch.pow(1 - gt[neg_inds], 4)
 51 | 
 52 |     pos_mask = mask[pos_inds]
 53 |     neg_mask = mask[neg_inds]
 54 | 
 55 |     loss = 0
 56 |     for pred in preds:
 57 |         pos_pred = pred[pos_inds]
 58 |         neg_pred = pred[neg_inds]
 59 | 
 60 |         pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) * pos_mask
 61 |         neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights * neg_mask
 62 | 
 63 |         num_pos  = pos_inds.float().sum()
 64 |         pos_loss = pos_loss.sum()
 65 |         neg_loss = neg_loss.sum()
 66 | 
 67 |         if pos_pred.nelement() == 0:
 68 |             loss = loss - neg_loss
 69 |         else:
 70 |             loss = loss - (pos_loss + neg_loss) / num_pos
 71 |     return loss
 72 | 
 73 | def _focal_loss(preds, gt):
 74 |     pos_inds = gt.eq(1)
 75 |     neg_inds = gt.lt(1)
 76 | 
 77 |     neg_weights = torch.pow(1 - gt[neg_inds], 4)
 78 | 
 79 |     loss = 0
 80 |     for pred in preds:
 81 |         pos_pred = pred[pos_inds]
 82 |         neg_pred = pred[neg_inds]
 83 | 
 84 |         pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
 85 |         neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
 86 | 
 87 |         num_pos  = pos_inds.float().sum()
 88 |         pos_loss = pos_loss.sum()
 89 |         neg_loss = neg_loss.sum()
 90 | 
 91 |         if pos_pred.nelement() == 0:
 92 |             loss = loss - neg_loss
 93 |         else:
 94 |             loss = loss - (pos_loss + neg_loss) / num_pos
 95 |     return loss
 96 | 
 97 | class CornerNet_Saccade_Loss(nn.Module):
 98 |     def __init__(self, pull_weight=1, push_weight=1, off_weight=1, focal_loss=_focal_loss_mask):
 99 |         super(CornerNet_Saccade_Loss, self).__init__()
100 | 
101 |         self.pull_weight = pull_weight
102 |         self.push_weight = push_weight
103 |         self.off_weight  = off_weight
104 |         self.focal_loss  = focal_loss
105 |         self.ae_loss     = _ae_loss
106 |         self.off_loss    = _off_loss
107 | 
108 |     def forward(self, outs, targets):
109 |         tl_heats = outs[0]
110 |         br_heats = outs[1]
111 |         tl_tags  = outs[2]
112 |         br_tags  = outs[3]
113 |         tl_offs  = outs[4]
114 |         br_offs  = outs[5]
115 |         atts     = outs[6]
116 | 
117 |         gt_tl_heat  = targets[0]
118 |         gt_br_heat  = targets[1]
119 |         gt_mask     = targets[2]
120 |         gt_tl_off   = targets[3]
121 |         gt_br_off   = targets[4]
122 |         gt_tl_ind   = targets[5]
123 |         gt_br_ind   = targets[6]
124 |         gt_tl_valid = targets[7]
125 |         gt_br_valid = targets[8]
126 |         gt_atts     = targets[9]
127 | 
128 |         # focal loss
129 |         focal_loss = 0
130 | 
131 |         tl_heats = [_sigmoid(t) for t in tl_heats]
132 |         br_heats = [_sigmoid(b) for b in br_heats]
133 | 
134 |         focal_loss += self.focal_loss(tl_heats, gt_tl_heat, gt_tl_valid)
135 |         focal_loss += self.focal_loss(br_heats, gt_br_heat, gt_br_valid)
136 | 
137 |         atts = [[_sigmoid(a) for a in att] for att in atts]
138 |         atts = [[att[ind] for att in atts] for ind in range(len(gt_atts))]
139 | 
140 |         att_loss = 0
141 |         for att, gt_att in zip(atts, gt_atts):
142 |             att_loss += _focal_loss(att, gt_att) / max(len(att), 1)
143 | 
144 |         # tag loss
145 |         pull_loss = 0
146 |         push_loss = 0
147 |         tl_tags   = [_tranpose_and_gather_feat(tl_tag, gt_tl_ind) for tl_tag in tl_tags]
148 |         br_tags   = [_tranpose_and_gather_feat(br_tag, gt_br_ind) for br_tag in br_tags]
149 |         for tl_tag, br_tag in zip(tl_tags, br_tags):
150 |             pull, push = self.ae_loss(tl_tag, br_tag, gt_mask)
151 |             pull_loss += pull
152 |             push_loss += push
153 |         pull_loss = self.pull_weight * pull_loss
154 |         push_loss = self.push_weight * push_loss
155 | 
156 |         off_loss = 0
157 |         tl_offs  = [_tranpose_and_gather_feat(tl_off, gt_tl_ind) for tl_off in tl_offs]
158 |         br_offs  = [_tranpose_and_gather_feat(br_off, gt_br_ind) for br_off in br_offs]
159 |         for tl_off, br_off in zip(tl_offs, br_offs):
160 |             off_loss += self.off_loss(tl_off, gt_tl_off, gt_mask)
161 |             off_loss += self.off_loss(br_off, gt_br_off, gt_mask)
162 |         off_loss = self.off_weight * off_loss
163 | 
164 |         loss = (focal_loss + att_loss + pull_loss + push_loss + off_loss) / max(len(tl_heats), 1)
165 |         return loss.unsqueeze(0)
166 | 
167 | class CornerNet_Loss(nn.Module):
168 |     def __init__(self, pull_weight=1, push_weight=1, off_weight=1, focal_loss=_focal_loss):
169 |         super(CornerNet_Loss, self).__init__()
170 | 
171 |         self.pull_weight = pull_weight
172 |         self.push_weight = push_weight
173 |         self.off_weight  = off_weight
174 |         self.focal_loss  = focal_loss
175 |         self.ae_loss     = _ae_loss
176 |         self.off_loss    = _off_loss
177 | 
178 |     def forward(self, outs, targets):
179 |         tl_heats = outs[0]
180 |         br_heats = outs[1]
181 |         tl_tags  = outs[2]
182 |         br_tags  = outs[3]
183 |         tl_offs  = outs[4]
184 |         br_offs  = outs[5]
185 | 
186 |         gt_tl_heat  = targets[0]
187 |         gt_br_heat  = targets[1]
188 |         gt_mask     = targets[2]
189 |         gt_tl_off   = targets[3]
190 |         gt_br_off   = targets[4]
191 |         gt_tl_ind   = targets[5]
192 |         gt_br_ind   = targets[6]
193 | 
194 |         # focal loss
195 |         focal_loss = 0
196 | 
197 |         tl_heats = [_sigmoid(t) for t in tl_heats]
198 |         br_heats = [_sigmoid(b) for b in br_heats]
199 | 
200 |         focal_loss += self.focal_loss(tl_heats, gt_tl_heat)
201 |         focal_loss += self.focal_loss(br_heats, gt_br_heat)
202 | 
203 |         # tag loss
204 |         pull_loss = 0
205 |         push_loss = 0
206 |         tl_tags   = [_tranpose_and_gather_feat(tl_tag, gt_tl_ind) for tl_tag in tl_tags]
207 |         br_tags   = [_tranpose_and_gather_feat(br_tag, gt_br_ind) for br_tag in br_tags]
208 |         for tl_tag, br_tag in zip(tl_tags, br_tags):
209 |             pull, push = self.ae_loss(tl_tag, br_tag, gt_mask)
210 |             pull_loss += pull
211 |             push_loss += push
212 |         pull_loss = self.pull_weight * pull_loss
213 |         push_loss = self.push_weight * push_loss
214 | 
215 |         off_loss = 0
216 |         tl_offs  = [_tranpose_and_gather_feat(tl_off, gt_tl_ind) for tl_off in tl_offs]
217 |         br_offs  = [_tranpose_and_gather_feat(br_off, gt_br_ind) for br_off in br_offs]
218 |         for tl_off, br_off in zip(tl_offs, br_offs):
219 |             off_loss += self.off_loss(tl_off, gt_tl_off, gt_mask)
220 |             off_loss += self.off_loss(br_off, gt_br_off, gt_mask)
221 |         off_loss = self.off_weight * off_loss
222 | 
223 |         loss = (focal_loss + pull_loss + push_loss + off_loss) / max(len(tl_heats), 1)
224 |         return loss.unsqueeze(0)
225 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/py_utils/scatter_gather.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torch.nn.parallel._functions import Scatter, Gather
 4 | 
 5 | 
 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None):
 7 |     r"""
 8 |     Slices variables into approximately equal chunks and
 9 |     distributes them across given GPUs. Duplicates
10 |     references to objects that are not variables. Does not
11 |     support Tensors.
12 |     """
13 |     def scatter_map(obj):
14 |         if isinstance(obj, Variable):
15 |             return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
16 |         assert not torch.is_tensor(obj), "Tensors not supported in scatter."
17 |         if isinstance(obj, tuple):
18 |             return list(zip(*map(scatter_map, obj)))
19 |         if isinstance(obj, list):
20 |             return list(map(list, zip(*map(scatter_map, obj))))
21 |         if isinstance(obj, dict):
22 |             return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
23 |         return [obj for targets in target_gpus]
24 | 
25 |     return scatter_map(inputs)
26 | 
27 | 
28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None):
29 |     r"""Scatter with support for kwargs dictionary"""
30 |     inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else []
31 |     kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else []
32 |     if len(inputs) < len(kwargs):
33 |         inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
34 |     elif len(kwargs) < len(inputs):
35 |         kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
36 |     inputs = tuple(inputs)
37 |     kwargs = tuple(kwargs)
38 |     return inputs, kwargs
39 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/models/py_utils/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | def _gather_feat(feat, ind, mask=None):
  5 |     dim  = feat.size(2)
  6 |     ind  = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
  7 |     feat = feat.gather(1, ind)
  8 |     if mask is not None:
  9 |         mask = mask.unsqueeze(2).expand_as(feat)
 10 |         feat = feat[mask]
 11 |         feat = feat.view(-1, dim)
 12 |     return feat
 13 | 
 14 | def _nms(heat, kernel=1):
 15 |     pad = (kernel - 1) // 2
 16 | 
 17 |     hmax = nn.functional.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad)
 18 |     keep = (hmax == heat).float()
 19 |     return heat * keep
 20 | 
 21 | def _tranpose_and_gather_feat(feat, ind):
 22 |     feat = feat.permute(0, 2, 3, 1).contiguous()
 23 |     feat = feat.view(feat.size(0), -1, feat.size(3))
 24 |     feat = _gather_feat(feat, ind)
 25 |     return feat
 26 | 
 27 | def _topk(scores, K=20):
 28 |     batch, cat, height, width = scores.size()
 29 | 
 30 |     topk_scores, topk_inds = torch.topk(scores.view(batch, -1), K)
 31 | 
 32 |     topk_clses = (topk_inds / (height * width)).int()
 33 | 
 34 |     topk_inds = topk_inds % (height * width)
 35 |     topk_ys   = (topk_inds / width).int().float()
 36 |     topk_xs   = (topk_inds % width).int().float()
 37 |     return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs
 38 | 
 39 | def _decode(
 40 |     tl_heat, br_heat, tl_tag, br_tag, tl_regr, br_regr, 
 41 |     K=100, kernel=1, ae_threshold=1, num_dets=1000, no_border=False
 42 | ):
 43 |     batch, cat, height, width = tl_heat.size()
 44 | 
 45 |     tl_heat = torch.sigmoid(tl_heat)
 46 |     br_heat = torch.sigmoid(br_heat)
 47 | 
 48 |     # perform nms on heatmaps
 49 |     tl_heat = _nms(tl_heat, kernel=kernel)
 50 |     br_heat = _nms(br_heat, kernel=kernel)
 51 | 
 52 |     tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = _topk(tl_heat, K=K)
 53 |     br_scores, br_inds, br_clses, br_ys, br_xs = _topk(br_heat, K=K)
 54 | 
 55 |     tl_ys = tl_ys.view(batch, K, 1).expand(batch, K, K)
 56 |     tl_xs = tl_xs.view(batch, K, 1).expand(batch, K, K)
 57 |     br_ys = br_ys.view(batch, 1, K).expand(batch, K, K)
 58 |     br_xs = br_xs.view(batch, 1, K).expand(batch, K, K)
 59 | 
 60 |     if no_border:
 61 |         tl_ys_binds = (tl_ys == 0)
 62 |         tl_xs_binds = (tl_xs == 0)
 63 |         br_ys_binds = (br_ys == height - 1)
 64 |         br_xs_binds = (br_xs == width  - 1)
 65 | 
 66 |     if tl_regr is not None and br_regr is not None:
 67 |         tl_regr = _tranpose_and_gather_feat(tl_regr, tl_inds)
 68 |         tl_regr = tl_regr.view(batch, K, 1, 2)
 69 |         br_regr = _tranpose_and_gather_feat(br_regr, br_inds)
 70 |         br_regr = br_regr.view(batch, 1, K, 2)
 71 | 
 72 |         tl_xs = tl_xs + tl_regr[..., 0]
 73 |         tl_ys = tl_ys + tl_regr[..., 1]
 74 |         br_xs = br_xs + br_regr[..., 0]
 75 |         br_ys = br_ys + br_regr[..., 1]
 76 | 
 77 |     # all possible boxes based on top k corners (ignoring class)
 78 |     bboxes = torch.stack((tl_xs, tl_ys, br_xs, br_ys), dim=3)
 79 | 
 80 |     tl_tag = _tranpose_and_gather_feat(tl_tag, tl_inds)
 81 |     tl_tag = tl_tag.view(batch, K, 1)
 82 |     br_tag = _tranpose_and_gather_feat(br_tag, br_inds)
 83 |     br_tag = br_tag.view(batch, 1, K)
 84 |     dists  = torch.abs(tl_tag - br_tag)
 85 | 
 86 |     tl_scores = tl_scores.view(batch, K, 1).expand(batch, K, K)
 87 |     br_scores = br_scores.view(batch, 1, K).expand(batch, K, K)
 88 |     scores    = (tl_scores + br_scores) / 2
 89 | 
 90 |     # reject boxes based on classes
 91 |     tl_clses = tl_clses.view(batch, K, 1).expand(batch, K, K)
 92 |     br_clses = br_clses.view(batch, 1, K).expand(batch, K, K)
 93 |     cls_inds = (tl_clses != br_clses)
 94 | 
 95 |     # reject boxes based on distances
 96 |     dist_inds = (dists > ae_threshold)
 97 | 
 98 |     # reject boxes based on widths and heights
 99 |     width_inds  = (br_xs < tl_xs)
100 |     height_inds = (br_ys < tl_ys)
101 | 
102 |     if no_border:
103 |         scores[tl_ys_binds] = -1
104 |         scores[tl_xs_binds] = -1
105 |         scores[br_ys_binds] = -1
106 |         scores[br_xs_binds] = -1
107 | 
108 |     scores[cls_inds]    = -1
109 |     scores[dist_inds]   = -1
110 |     scores[width_inds]  = -1
111 |     scores[height_inds] = -1
112 | 
113 |     scores = scores.view(batch, -1)
114 |     scores, inds = torch.topk(scores, num_dets)
115 |     scores = scores.unsqueeze(2)
116 | 
117 |     bboxes = bboxes.view(batch, -1, 4)
118 |     bboxes = _gather_feat(bboxes, inds)
119 | 
120 |     clses  = tl_clses.contiguous().view(batch, -1, 1)
121 |     clses  = _gather_feat(clses, inds).float()
122 | 
123 |     tl_scores = tl_scores.contiguous().view(batch, -1, 1)
124 |     tl_scores = _gather_feat(tl_scores, inds).float()
125 |     br_scores = br_scores.contiguous().view(batch, -1, 1)
126 |     br_scores = _gather_feat(br_scores, inds).float()
127 | 
128 |     detections = torch.cat([bboxes, scores, tl_scores, br_scores, clses], dim=2)
129 |     return detections
130 | 
131 | class upsample(nn.Module):
132 |     def __init__(self, scale_factor):
133 |         super(upsample, self).__init__()
134 |         self.scale_factor = scale_factor
135 | 
136 |     def forward(self, x):
137 |         return nn.functional.interpolate(x, scale_factor=self.scale_factor)
138 | 
139 | class merge(nn.Module):
140 |     def forward(self, x, y):
141 |         return x + y
142 | 
143 | class convolution(nn.Module):
144 |     def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
145 |         super(convolution, self).__init__()
146 | 
147 |         pad = (k - 1) // 2
148 |         self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn)
149 |         self.bn   = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential()
150 |         self.relu = nn.ReLU(inplace=True)
151 | 
152 |     def forward(self, x):
153 |         conv = self.conv(x)
154 |         bn   = self.bn(conv)
155 |         relu = self.relu(bn)
156 |         return relu
157 | 
158 | class residual(nn.Module):
159 |     def __init__(self, inp_dim, out_dim, k=3, stride=1):
160 |         super(residual, self).__init__()
161 |         p = (k - 1) // 2
162 | 
163 |         self.conv1 = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(p, p), stride=(stride, stride), bias=False)
164 |         self.bn1   = nn.BatchNorm2d(out_dim)
165 |         self.relu1 = nn.ReLU(inplace=True)
166 | 
167 |         self.conv2 = nn.Conv2d(out_dim, out_dim, (k, k), padding=(p, p), bias=False)
168 |         self.bn2   = nn.BatchNorm2d(out_dim)
169 |         
170 |         self.skip  = nn.Sequential(
171 |             nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False),
172 |             nn.BatchNorm2d(out_dim)
173 |         ) if stride != 1 or inp_dim != out_dim else nn.Sequential()
174 |         self.relu  = nn.ReLU(inplace=True)
175 | 
176 |     def forward(self, x):
177 |         conv1 = self.conv1(x)
178 |         bn1   = self.bn1(conv1)
179 |         relu1 = self.relu1(bn1)
180 | 
181 |         conv2 = self.conv2(relu1)
182 |         bn2   = self.bn2(conv2)
183 | 
184 |         skip  = self.skip(x)
185 |         return self.relu(bn2 + skip)
186 | 
187 | class corner_pool(nn.Module):
188 |     def __init__(self, dim, pool1, pool2):
189 |         super(corner_pool, self).__init__()
190 |         self._init_layers(dim, pool1, pool2)
191 | 
192 |     def _init_layers(self, dim, pool1, pool2):
193 |         self.p1_conv1 = convolution(3, dim, 128)
194 |         self.p2_conv1 = convolution(3, dim, 128)
195 | 
196 |         self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False)
197 |         self.p_bn1   = nn.BatchNorm2d(dim)
198 | 
199 |         self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False)
200 |         self.bn1   = nn.BatchNorm2d(dim)
201 |         self.relu1 = nn.ReLU(inplace=True)
202 | 
203 |         self.conv2 = convolution(3, dim, dim)
204 | 
205 |         self.pool1 = pool1()
206 |         self.pool2 = pool2()
207 | 
208 |     def forward(self, x):
209 |         # pool 1
210 |         p1_conv1 = self.p1_conv1(x)
211 |         pool1    = self.pool1(p1_conv1)
212 | 
213 |         # pool 2
214 |         p2_conv1 = self.p2_conv1(x)
215 |         pool2    = self.pool2(p2_conv1)
216 | 
217 |         # pool 1 + pool 2
218 |         p_conv1 = self.p_conv1(pool1 + pool2)
219 |         p_bn1   = self.p_bn1(p_conv1)
220 | 
221 |         conv1 = self.conv1(x)
222 |         bn1   = self.bn1(conv1)
223 |         relu1 = self.relu1(p_bn1 + bn1)
224 | 
225 |         conv2 = self.conv2(relu1)
226 |         return conv2
227 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/nnet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/corner_net_lite/core/nnet/__init__.py


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/nnet/py_factory.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import pickle
  4 | import importlib
  5 | import torch.nn as nn
  6 | 
  7 | from ..models.py_utils.data_parallel import DataParallel
  8 | 
  9 | torch.manual_seed(317)
 10 | 
 11 | class Network(nn.Module):
 12 |     def __init__(self, model, loss):
 13 |         super(Network, self).__init__()
 14 | 
 15 |         self.model = model
 16 |         self.loss  = loss
 17 | 
 18 |     def forward(self, xs, ys, **kwargs):
 19 |         preds = self.model(*xs, **kwargs)
 20 |         loss  = self.loss(preds, ys, **kwargs)
 21 |         return loss
 22 | 
 23 | # for model backward compatibility
 24 | # previously model was wrapped by DataParallel module
 25 | class DummyModule(nn.Module):
 26 |     def __init__(self, model):
 27 |         super(DummyModule, self).__init__()
 28 |         self.module = model
 29 | 
 30 |     def forward(self, *xs, **kwargs):
 31 |         return self.module(*xs, **kwargs)
 32 | 
 33 | class NetworkFactory(object):
 34 |     def __init__(self, system_config, model, distributed=False, gpu=None):
 35 |         super(NetworkFactory, self).__init__()
 36 | 
 37 |         self.system_config = system_config
 38 | 
 39 |         self.gpu     = gpu
 40 |         self.model   = DummyModule(model)
 41 |         self.loss    = model.loss
 42 |         self.network = Network(self.model, self.loss)
 43 | 
 44 |         if distributed:
 45 |             from apex.parallel import DistributedDataParallel, convert_syncbn_model
 46 |             torch.cuda.set_device(gpu)
 47 |             self.network = self.network.cuda(gpu)
 48 |             self.network = convert_syncbn_model(self.network)
 49 |             self.network = DistributedDataParallel(self.network)
 50 |         else:
 51 |             self.network = DataParallel(self.network, chunk_sizes=system_config.chunk_sizes)
 52 | 
 53 |         total_params = 0
 54 |         for params in self.model.parameters():
 55 |             num_params = 1
 56 |             for x in params.size():
 57 |                 num_params *= x
 58 |             total_params += num_params
 59 |         print("total parameters: {}".format(total_params))
 60 | 
 61 |         if system_config.opt_algo == "adam":
 62 |             self.optimizer = torch.optim.Adam(
 63 |                 filter(lambda p: p.requires_grad, self.model.parameters())
 64 |             )
 65 |         elif system_config.opt_algo == "sgd":
 66 |             self.optimizer = torch.optim.SGD(
 67 |                 filter(lambda p: p.requires_grad, self.model.parameters()),
 68 |                 lr=system_config.learning_rate, 
 69 |                 momentum=0.9, weight_decay=0.0001
 70 |             )
 71 |         else:
 72 |             raise ValueError("unknown optimizer")
 73 | 
 74 |     def cuda(self):
 75 |         self.model.cuda()
 76 | 
 77 |     def train_mode(self):
 78 |         self.network.train()
 79 | 
 80 |     def eval_mode(self):
 81 |         self.network.eval()
 82 | 
 83 |     def _t_cuda(self, xs):
 84 |         if type(xs) is list:
 85 |             return [x.cuda(self.gpu, non_blocking=True) for x in xs]
 86 |         return xs.cuda(self.gpu, non_blocking=True)
 87 | 
 88 |     def train(self, xs, ys, **kwargs):
 89 |         xs = [self._t_cuda(x) for x in xs]
 90 |         ys = [self._t_cuda(y) for y in ys]
 91 | 
 92 |         self.optimizer.zero_grad()
 93 |         loss = self.network(xs, ys)
 94 |         loss = loss.mean()
 95 |         loss.backward()
 96 |         self.optimizer.step()
 97 | 
 98 |         return loss
 99 | 
100 |     def validate(self, xs, ys, **kwargs):
101 |         with torch.no_grad():
102 |             xs = [self._t_cuda(x) for x in xs]
103 |             ys = [self._t_cuda(y) for y in ys]
104 | 
105 |             loss = self.network(xs, ys)
106 |             loss = loss.mean()
107 |             return loss
108 | 
109 |     def test(self, xs, **kwargs):
110 |         with torch.no_grad():
111 |             xs = [self._t_cuda(x) for x in xs]
112 |             return self.model(*xs, **kwargs)
113 | 
114 |     def set_lr(self, lr):
115 |         print("setting learning rate to: {}".format(lr))
116 |         for param_group in self.optimizer.param_groups:
117 |             param_group["lr"] = lr
118 | 
119 |     def load_pretrained_params(self, pretrained_model):
120 |         print("loading from {}".format(pretrained_model))
121 |         with open(pretrained_model, "rb") as f:
122 |             params = torch.load(f)
123 |             self.model.load_state_dict(params)
124 | 
125 |     def load_params(self, iteration):
126 |         cache_file = self.system_config.snapshot_file.format(iteration)
127 |         print("loading model from {}".format(cache_file))
128 |         with open(cache_file, "rb") as f:
129 |             params = torch.load(f)
130 |             self.model.load_state_dict(params)
131 | 
132 |     def save_params(self, iteration):
133 |         cache_file = self.system_config.snapshot_file.format(iteration)
134 |         print("saving model to {}".format(cache_file))
135 |         with open(cache_file, "wb") as f:
136 |             params = self.model.state_dict()
137 |             torch.save(params, f)
138 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/paths.py:
--------------------------------------------------------------------------------
1 | import pkg_resources
2 | 
3 | _package_name = __name__
4 | 
5 | def get_file_path(*paths):
6 |     path = "/".join(paths)
7 |     return pkg_resources.resource_filename(_package_name, path)
8 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/sample/__init__.py:
--------------------------------------------------------------------------------
1 | from .cornernet import cornernet
2 | from .cornernet_saccade import cornernet_saccade
3 | 
4 | def data_sampling_func(sys_configs, db, k_ind, data_aug=True, debug=False):
5 |     return globals()[sys_configs.sampling_function](sys_configs, db, k_ind, data_aug, debug)
6 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/sample/cornernet.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import math
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | from .utils import random_crop, draw_gaussian, gaussian_radius, normalize_, color_jittering_, lighting_
  7 | 
  8 | def _resize_image(image, detections, size):
  9 |     detections    = detections.copy()
 10 |     height, width = image.shape[0:2]
 11 |     new_height, new_width = size
 12 | 
 13 |     image = cv2.resize(image, (new_width, new_height))
 14 |     
 15 |     height_ratio = new_height / height
 16 |     width_ratio  = new_width  / width
 17 |     detections[:, 0:4:2] *= width_ratio
 18 |     detections[:, 1:4:2] *= height_ratio
 19 |     return image, detections
 20 | 
 21 | def _clip_detections(image, detections):
 22 |     detections    = detections.copy()
 23 |     height, width = image.shape[0:2]
 24 | 
 25 |     detections[:, 0:4:2] = np.clip(detections[:, 0:4:2], 0, width - 1)
 26 |     detections[:, 1:4:2] = np.clip(detections[:, 1:4:2], 0, height - 1)
 27 |     keep_inds  = ((detections[:, 2] - detections[:, 0]) > 0) & \
 28 |                  ((detections[:, 3] - detections[:, 1]) > 0)
 29 |     detections = detections[keep_inds]
 30 |     return detections
 31 | 
 32 | def cornernet(system_configs, db, k_ind, data_aug, debug):
 33 |     data_rng   = system_configs.data_rng
 34 |     batch_size = system_configs.batch_size
 35 | 
 36 |     categories   = db.configs["categories"]
 37 |     input_size   = db.configs["input_size"]
 38 |     output_size  = db.configs["output_sizes"][0]
 39 | 
 40 |     border        = db.configs["border"]
 41 |     lighting      = db.configs["lighting"]
 42 |     rand_crop     = db.configs["rand_crop"]
 43 |     rand_color    = db.configs["rand_color"]
 44 |     rand_scales   = db.configs["rand_scales"]
 45 |     gaussian_bump = db.configs["gaussian_bump"]
 46 |     gaussian_iou  = db.configs["gaussian_iou"]
 47 |     gaussian_rad  = db.configs["gaussian_radius"]
 48 | 
 49 |     max_tag_len = 128
 50 | 
 51 |     # allocating memory
 52 |     images      = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32)
 53 |     tl_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
 54 |     br_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
 55 |     tl_regrs    = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
 56 |     br_regrs    = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
 57 |     tl_tags     = np.zeros((batch_size, max_tag_len), dtype=np.int64)
 58 |     br_tags     = np.zeros((batch_size, max_tag_len), dtype=np.int64)
 59 |     tag_masks   = np.zeros((batch_size, max_tag_len), dtype=np.uint8)
 60 |     tag_lens    = np.zeros((batch_size, ), dtype=np.int32)
 61 | 
 62 |     db_size = db.db_inds.size
 63 |     for b_ind in range(batch_size):
 64 |         if not debug and k_ind == 0:
 65 |             db.shuffle_inds()
 66 | 
 67 |         db_ind = db.db_inds[k_ind]
 68 |         k_ind  = (k_ind + 1) % db_size
 69 | 
 70 |         # reading image
 71 |         image_path = db.image_path(db_ind)
 72 |         image      = cv2.imread(image_path)
 73 | 
 74 |         # reading detections
 75 |         detections = db.detections(db_ind)
 76 | 
 77 |         # cropping an image randomly
 78 |         if not debug and rand_crop:
 79 |             image, detections = random_crop(image, detections, rand_scales, input_size, border=border)
 80 | 
 81 |         image, detections = _resize_image(image, detections, input_size)
 82 |         detections = _clip_detections(image, detections)
 83 | 
 84 |         width_ratio  = output_size[1] / input_size[1]
 85 |         height_ratio = output_size[0] / input_size[0]
 86 | 
 87 |         # flipping an image randomly
 88 |         if not debug and np.random.uniform() > 0.5:
 89 |             image[:] = image[:, ::-1, :]
 90 |             width    = image.shape[1]
 91 |             detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1
 92 | 
 93 |         if not debug:
 94 |             image = image.astype(np.float32) / 255.
 95 |             if rand_color:
 96 |                 color_jittering_(data_rng, image)
 97 |                 if lighting:
 98 |                     lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)
 99 |             normalize_(image, db.mean, db.std)
100 |         images[b_ind] = image.transpose((2, 0, 1))
101 | 
102 |         for ind, detection in enumerate(detections):
103 |             category = int(detection[-1]) - 1
104 | 
105 |             xtl, ytl = detection[0], detection[1]
106 |             xbr, ybr = detection[2], detection[3]
107 | 
108 |             fxtl = (xtl * width_ratio)
109 |             fytl = (ytl * height_ratio)
110 |             fxbr = (xbr * width_ratio)
111 |             fybr = (ybr * height_ratio)
112 | 
113 |             xtl = int(fxtl)
114 |             ytl = int(fytl)
115 |             xbr = int(fxbr)
116 |             ybr = int(fybr)
117 | 
118 |             if gaussian_bump:
119 |                 width  = detection[2] - detection[0]
120 |                 height = detection[3] - detection[1]
121 | 
122 |                 width  = math.ceil(width * width_ratio)
123 |                 height = math.ceil(height * height_ratio)
124 | 
125 |                 if gaussian_rad == -1:
126 |                     radius = gaussian_radius((height, width), gaussian_iou)
127 |                     radius = max(0, int(radius))
128 |                 else:
129 |                     radius = gaussian_rad
130 | 
131 |                 draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius)
132 |                 draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius)
133 |             else:
134 |                 tl_heatmaps[b_ind, category, ytl, xtl] = 1
135 |                 br_heatmaps[b_ind, category, ybr, xbr] = 1
136 | 
137 |             tag_ind = tag_lens[b_ind]
138 |             tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl]
139 |             br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr]
140 |             tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl
141 |             br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr
142 |             tag_lens[b_ind] += 1
143 | 
144 |     for b_ind in range(batch_size):
145 |         tag_len = tag_lens[b_ind]
146 |         tag_masks[b_ind, :tag_len] = 1
147 | 
148 |     images      = torch.from_numpy(images)
149 |     tl_heatmaps = torch.from_numpy(tl_heatmaps)
150 |     br_heatmaps = torch.from_numpy(br_heatmaps)
151 |     tl_regrs    = torch.from_numpy(tl_regrs)
152 |     br_regrs    = torch.from_numpy(br_regrs)
153 |     tl_tags     = torch.from_numpy(tl_tags)
154 |     br_tags     = torch.from_numpy(br_tags)
155 |     tag_masks   = torch.from_numpy(tag_masks)
156 | 
157 |     return {
158 |         "xs": [images],
159 |         "ys": [tl_heatmaps, br_heatmaps, tag_masks, tl_regrs, br_regrs, tl_tags, br_tags]
160 |     }, k_ind
161 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/sample/utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import random
  4 | 
  5 | def grayscale(image):
  6 |     return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  7 | 
  8 | def normalize_(image, mean, std):
  9 |     image -= mean
 10 |     image /= std
 11 | 
 12 | def lighting_(data_rng, image, alphastd, eigval, eigvec):
 13 |     alpha = data_rng.normal(scale=alphastd, size=(3, ))
 14 |     image += np.dot(eigvec, eigval * alpha)
 15 | 
 16 | def blend_(alpha, image1, image2):
 17 |     image1 *= alpha
 18 |     image2 *= (1 - alpha)
 19 |     image1 += image2
 20 | 
 21 | def saturation_(data_rng, image, gs, gs_mean, var):
 22 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
 23 |     blend_(alpha, image, gs[:, :, None])
 24 | 
 25 | def brightness_(data_rng, image, gs, gs_mean, var):
 26 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
 27 |     image *= alpha
 28 | 
 29 | def contrast_(data_rng, image, gs, gs_mean, var):
 30 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
 31 |     blend_(alpha, image, gs_mean)
 32 | 
 33 | def color_jittering_(data_rng, image):
 34 |     functions = [brightness_, contrast_, saturation_]
 35 |     random.shuffle(functions)
 36 | 
 37 |     gs = grayscale(image)
 38 |     gs_mean = gs.mean()
 39 |     for f in functions:
 40 |         f(data_rng, image, gs, gs_mean, 0.4)
 41 | 
 42 | def gaussian2D(shape, sigma=1):
 43 |     m, n = [(ss - 1.) / 2. for ss in shape]
 44 |     y, x = np.ogrid[-m:m+1,-n:n+1]
 45 | 
 46 |     h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
 47 |     h[h < np.finfo(h.dtype).eps * h.max()] = 0
 48 |     return h
 49 | 
 50 | def draw_gaussian(heatmap, center, radius, k=1):
 51 |     diameter = 2 * radius + 1
 52 |     gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
 53 | 
 54 |     x, y = center
 55 | 
 56 |     height, width = heatmap.shape[0:2]
 57 |     
 58 |     left, right = min(x, radius), min(width - x, radius + 1)
 59 |     top, bottom = min(y, radius), min(height - y, radius + 1)
 60 | 
 61 |     masked_heatmap  = heatmap[y - top:y + bottom, x - left:x + right]
 62 |     masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
 63 |     np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
 64 | 
 65 | def gaussian_radius(det_size, min_overlap):
 66 |     height, width = det_size
 67 | 
 68 |     a1  = 1
 69 |     b1  = (height + width)
 70 |     c1  = width * height * (1 - min_overlap) / (1 + min_overlap)
 71 |     sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
 72 |     r1  = (b1 - sq1) / (2 * a1)
 73 | 
 74 |     a2  = 4
 75 |     b2  = 2 * (height + width)
 76 |     c2  = (1 - min_overlap) * width * height
 77 |     sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
 78 |     r2  = (b2 - sq2) / (2 * a2)
 79 | 
 80 |     a3  = 4 * min_overlap
 81 |     b3  = -2 * min_overlap * (height + width)
 82 |     c3  = (min_overlap - 1) * width * height
 83 |     sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
 84 |     r3  = (b3 + sq3) / (2 * a3)
 85 |     return min(r1, r2, r3)
 86 | 
 87 | def _get_border(border, size):
 88 |     i = 1
 89 |     while size - border // i <= border // i:
 90 |         i *= 2
 91 |     return border // i
 92 | 
 93 | def random_crop(image, detections, random_scales, view_size, border=64):
 94 |     view_height, view_width   = view_size
 95 |     image_height, image_width = image.shape[0:2]
 96 | 
 97 |     scale  = np.random.choice(random_scales)
 98 |     height = int(view_height * scale)
 99 |     width  = int(view_width  * scale)
100 | 
101 |     cropped_image = np.zeros((height, width, 3), dtype=image.dtype)
102 | 
103 |     w_border = _get_border(border, image_width)
104 |     h_border = _get_border(border, image_height)
105 | 
106 |     ctx = np.random.randint(low=w_border, high=image_width - w_border)
107 |     cty = np.random.randint(low=h_border, high=image_height - h_border)
108 | 
109 |     x0, x1 = max(ctx - width // 2, 0),  min(ctx + width // 2, image_width)
110 |     y0, y1 = max(cty - height // 2, 0), min(cty + height // 2, image_height)
111 | 
112 |     left_w, right_w = ctx - x0, x1 - ctx
113 |     top_h, bottom_h = cty - y0, y1 - cty
114 | 
115 |     # crop image
116 |     cropped_ctx, cropped_cty = width // 2, height // 2
117 |     x_slice = slice(cropped_ctx - left_w, cropped_ctx + right_w)
118 |     y_slice = slice(cropped_cty - top_h, cropped_cty + bottom_h)
119 |     cropped_image[y_slice, x_slice, :] = image[y0:y1, x0:x1, :]
120 | 
121 |     # crop detections
122 |     cropped_detections = detections.copy()
123 |     cropped_detections[:, 0:4:2] -= x0
124 |     cropped_detections[:, 1:4:2] -= y0
125 |     cropped_detections[:, 0:4:2] += cropped_ctx - left_w
126 |     cropped_detections[:, 1:4:2] += cropped_cty - top_h
127 | 
128 |     return cropped_image, cropped_detections
129 | 
130 | def crop_image(image, center, size, output_size=None):
131 |     if output_size == None:
132 |         output_size = size
133 | 
134 |     cty, ctx            = center
135 |     height, width       = size
136 |     o_height, o_width   = output_size
137 |     im_height, im_width = image.shape[0:2]
138 |     cropped_image       = np.zeros((o_height, o_width, 3), dtype=image.dtype)
139 | 
140 |     x0, x1 = max(0, ctx - width // 2), min(ctx + width // 2, im_width)
141 |     y0, y1 = max(0, cty - height // 2), min(cty + height // 2, im_height)
142 | 
143 |     left, right = ctx - x0, x1 - ctx
144 |     top, bottom = cty - y0, y1 - cty
145 | 
146 |     cropped_cty, cropped_ctx = o_height // 2, o_width // 2
147 |     y_slice = slice(cropped_cty - top, cropped_cty + bottom)
148 |     x_slice = slice(cropped_ctx - left, cropped_ctx + right)
149 |     cropped_image[y_slice, x_slice, :] = image[y0:y1, x0:x1, :]
150 | 
151 |     border = np.array([
152 |        cropped_cty - top,
153 |        cropped_cty + bottom,
154 |        cropped_ctx - left,
155 |        cropped_ctx + right
156 |     ], dtype=np.float32)
157 | 
158 |     offset = np.array([
159 |         cty - o_height // 2,
160 |         ctx - o_width  // 2
161 |     ])
162 | 
163 |     return cropped_image, border, offset
164 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/test/__init__.py:
--------------------------------------------------------------------------------
1 | from .cornernet import cornernet
2 | from .cornernet_saccade import cornernet_saccade
3 | 
4 | def test_func(sys_config, db, nnet, result_dir, debug=False):
5 |     return globals()[sys_config.sampling_function](db, nnet, result_dir, debug=debug)
6 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/test/cornernet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import json
  4 | import numpy as np
  5 | import torch
  6 | 
  7 | from tqdm import tqdm
  8 | 
  9 | from ..utils import Timer
 10 | from ..vis_utils import draw_bboxes
 11 | from ..sample.utils import crop_image
 12 | from ..external.nms import soft_nms, soft_nms_merge
 13 | 
 14 | def rescale_dets_(detections, ratios, borders, sizes):
 15 |     xs, ys = detections[..., 0:4:2], detections[..., 1:4:2]
 16 |     xs    /= ratios[:, 1][:, None, None]
 17 |     ys    /= ratios[:, 0][:, None, None]
 18 |     xs    -= borders[:, 2][:, None, None]
 19 |     ys    -= borders[:, 0][:, None, None]
 20 |     np.clip(xs, 0, sizes[:, 1][:, None, None], out=xs)
 21 |     np.clip(ys, 0, sizes[:, 0][:, None, None], out=ys)
 22 | 
 23 | def decode(nnet, images, K, ae_threshold=0.5, kernel=3, num_dets=1000):
 24 |     detections = nnet.test([images], ae_threshold=ae_threshold, test=True, K=K, kernel=kernel, num_dets=num_dets)[0]
 25 |     return detections.data.cpu().numpy()
 26 | 
 27 | def cornernet(db, nnet, result_dir, debug=False, decode_func=decode):
 28 |     debug_dir = os.path.join(result_dir, "debug")
 29 |     if not os.path.exists(debug_dir):
 30 |         os.makedirs(debug_dir)
 31 | 
 32 |     if db.split != "trainval2014":
 33 |         db_inds = db.db_inds[:100] if debug else db.db_inds
 34 |     else:
 35 |         db_inds = db.db_inds[:100] if debug else db.db_inds[:5000]
 36 | 
 37 |     num_images = db_inds.size
 38 |     categories = db.configs["categories"]
 39 | 
 40 |     timer = Timer()
 41 |     top_bboxes = {}
 42 |     for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
 43 |         db_ind = db_inds[ind]
 44 | 
 45 |         image_id   = db.image_ids(db_ind)
 46 |         image_path = db.image_path(db_ind)
 47 |         image      = cv2.imread(image_path)
 48 | 
 49 |         timer.tic()
 50 |         top_bboxes[image_id] = cornernet_inference(db, nnet, image)
 51 |         timer.toc()
 52 | 
 53 |         if debug:
 54 |             image_path = db.image_path(db_ind)
 55 |             image      = cv2.imread(image_path)
 56 |             bboxes     = {
 57 |                 db.cls2name(j): top_bboxes[image_id][j] 
 58 |                 for j in range(1, categories + 1)
 59 |             }
 60 |             image      = draw_bboxes(image, bboxes)
 61 |             debug_file = os.path.join(debug_dir, "{}.jpg".format(db_ind))
 62 |             cv2.imwrite(debug_file, image)
 63 |     print('average time: {}'.format(timer.average_time))
 64 | 
 65 |     result_json = os.path.join(result_dir, "results.json")
 66 |     detections  = db.convert_to_coco(top_bboxes)
 67 |     with open(result_json, "w") as f:
 68 |         json.dump(detections, f)
 69 | 
 70 |     cls_ids   = list(range(1, categories + 1))
 71 |     image_ids = [db.image_ids(ind) for ind in db_inds]
 72 |     db.evaluate(result_json, cls_ids, image_ids)
 73 |     return 0
 74 | 
 75 | def cornernet_inference(db, nnet, image, decode_func=decode):
 76 |     K             = db.configs["top_k"]
 77 |     ae_threshold  = db.configs["ae_threshold"]
 78 |     nms_kernel    = db.configs["nms_kernel"]
 79 |     num_dets      = db.configs["num_dets"]
 80 |     test_flipped  = db.configs["test_flipped"]
 81 | 
 82 |     input_size    = db.configs["input_size"]
 83 |     output_size   = db.configs["output_sizes"][0]
 84 |     
 85 |     scales        = db.configs["test_scales"]
 86 |     weight_exp    = db.configs["weight_exp"]
 87 |     merge_bbox    = db.configs["merge_bbox"]
 88 |     categories    = db.configs["categories"]
 89 |     nms_threshold = db.configs["nms_threshold"]
 90 |     max_per_image = db.configs["max_per_image"]
 91 |     nms_algorithm = {
 92 |         "nms": 0,
 93 |         "linear_soft_nms": 1, 
 94 |         "exp_soft_nms": 2
 95 |     }[db.configs["nms_algorithm"]]
 96 | 
 97 |     height, width = image.shape[0:2]
 98 | 
 99 |     height_scale  = (input_size[0] + 1) // output_size[0]
100 |     width_scale   = (input_size[1] + 1) // output_size[1]
101 | 
102 |     im_mean = torch.cuda.FloatTensor(db.mean).reshape(1, 3, 1, 1)
103 |     im_std  = torch.cuda.FloatTensor(db.std).reshape(1, 3, 1, 1)
104 | 
105 |     detections = []
106 |     for scale in scales:
107 |         new_height = int(height * scale)
108 |         new_width  = int(width * scale)
109 |         new_center = np.array([new_height // 2, new_width // 2])
110 | 
111 |         inp_height = new_height | 127
112 |         inp_width  = new_width  | 127
113 | 
114 |         images  = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
115 |         ratios  = np.zeros((1, 2), dtype=np.float32)
116 |         borders = np.zeros((1, 4), dtype=np.float32)
117 |         sizes   = np.zeros((1, 2), dtype=np.float32)
118 | 
119 |         out_height, out_width = (inp_height + 1) // height_scale, (inp_width + 1) // width_scale
120 |         height_ratio = out_height / inp_height
121 |         width_ratio  = out_width  / inp_width
122 | 
123 |         resized_image = cv2.resize(image, (new_width, new_height))
124 |         resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width])
125 | 
126 |         resized_image = resized_image / 255.
127 | 
128 |         images[0]  = resized_image.transpose((2, 0, 1))
129 |         borders[0] = border
130 |         sizes[0]   = [int(height * scale), int(width * scale)]
131 |         ratios[0]  = [height_ratio, width_ratio]
132 | 
133 |         if test_flipped:
134 |             images  = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
135 |         images  = torch.from_numpy(images).cuda()
136 |         images -= im_mean
137 |         images /= im_std
138 | 
139 |         dets = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel, num_dets=num_dets)
140 |         if test_flipped:
141 |             dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
142 |             dets = dets.reshape(1, -1, 8)
143 | 
144 |         rescale_dets_(dets, ratios, borders, sizes)
145 |         dets[:, :, 0:4] /= scale
146 |         detections.append(dets)
147 | 
148 |     detections = np.concatenate(detections, axis=1)
149 | 
150 |     classes    = detections[..., -1]
151 |     classes    = classes[0]
152 |     detections = detections[0]
153 | 
154 |     # reject detections with negative scores
155 |     keep_inds  = (detections[:, 4] > -1)
156 |     detections = detections[keep_inds]
157 |     classes    = classes[keep_inds]
158 | 
159 |     top_bboxes = {}
160 |     for j in range(categories):
161 |         keep_inds = (classes == j)
162 |         top_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
163 |         if merge_bbox:
164 |             soft_nms_merge(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp)
165 |         else:
166 |             soft_nms(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm)
167 |         top_bboxes[j + 1] = top_bboxes[j + 1][:, 0:5]
168 | 
169 |     scores = np.hstack([top_bboxes[j][:, -1] for j in range(1, categories + 1)])
170 |     if len(scores) > max_per_image:
171 |         kth    = len(scores) - max_per_image
172 |         thresh = np.partition(scores, kth)[kth]
173 |         for j in range(1, categories + 1):
174 |             keep_inds     = (top_bboxes[j][:, -1] >= thresh)
175 |             top_bboxes[j] = top_bboxes[j][keep_inds]
176 |     return top_bboxes
177 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .tqdm import stdout_to_tqdm
2 | from .timer import Timer
3 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/utils/timer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | class Timer(object):
 4 |     """A simple timer."""
 5 |     def __init__(self):
 6 |         self.total_time = 0.
 7 |         self.calls = 0
 8 |         self.start_time = 0.
 9 |         self.diff = 0.
10 |         self.average_time = 0.
11 | 
12 |     def tic(self):
13 |         # using time.time instead of time.clock because time time.clock
14 |         # does not normalize for multithreading
15 |         self.start_time = time.time()
16 | 
17 |     def toc(self, average=True):
18 |         self.diff = time.time() - self.start_time
19 |         self.total_time += self.diff
20 |         self.calls += 1
21 |         self.average_time = self.total_time / self.calls
22 |         if average:
23 |             return self.average_time
24 |         else:
25 |             return self.diff
26 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/utils/tqdm.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import contextlib
 4 | 
 5 | from tqdm import tqdm
 6 | 
 7 | class TqdmFile(object):
 8 |     dummy_file = None
 9 |     def __init__(self, dummy_file):
10 |         self.dummy_file = dummy_file
11 | 
12 |     def write(self, x):
13 |         if len(x.rstrip()) > 0:
14 |             tqdm.write(x, file=self.dummy_file)
15 | 
16 | @contextlib.contextmanager
17 | def stdout_to_tqdm():
18 |     save_stdout = sys.stdout
19 |     try:
20 |         sys.stdout = TqdmFile(sys.stdout)
21 |         yield save_stdout
22 |     except Exception as exc:
23 |         raise exc
24 |     finally:
25 |         sys.stdout = save_stdout
26 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/core/vis_utils.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | def draw_bboxes(image, bboxes, font_size=0.5, thresh=0.5, colors=None):
 5 |     """Draws bounding boxes on an image.
 6 | 
 7 |     Args:
 8 |         image: An image in OpenCV format
 9 |         bboxes: A dictionary representing bounding boxes of different object
10 |             categories, where the keys are the names of the categories and the
11 |             values are the bounding boxes. The bounding boxes of category should be
12 |             stored in a 2D NumPy array, where each row is a bounding box (x1, y1,
13 |             x2, y2, score).
14 |         font_size: (Optional) Font size of the category names.
15 |         thresh: (Optional) Only bounding boxes with scores above the threshold
16 |             will be drawn.
17 |         colors: (Optional) Color of bounding boxes for each category. If it is
18 |             not provided, this function will use random color for each category.
19 | 
20 |     Returns:
21 |         An image with bounding boxes.
22 |     """
23 | 
24 |     image = image.copy()
25 |     for cat_name in bboxes:
26 |         keep_inds = bboxes[cat_name][:, -1] > thresh
27 |         cat_size  = cv2.getTextSize(cat_name, cv2.FONT_HERSHEY_SIMPLEX, font_size, 2)[0]
28 | 
29 |         if colors is None:
30 |             color = np.random.random((3, )) * 0.6 + 0.4
31 |             color = (color * 255).astype(np.int32).tolist()
32 |         else:
33 |             color = colors[cat_name]
34 | 
35 |         for bbox in bboxes[cat_name][keep_inds]:
36 |             bbox = bbox[0:4].astype(np.int32)
37 |             if bbox[1] - cat_size[1] - 2 < 0:
38 |                 cv2.rectangle(image,
39 |                     (bbox[0], bbox[1] + 2),
40 |                     (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2),
41 |                     color, -1
42 |                 )
43 |                 cv2.putText(image, cat_name,
44 |                     (bbox[0], bbox[1] + cat_size[1] + 2),
45 |                     cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), thickness=1
46 |                 )
47 |             else:
48 |                 cv2.rectangle(image,
49 |                     (bbox[0], bbox[1] - cat_size[1] - 2),
50 |                     (bbox[0] + cat_size[0], bbox[1] - 2),
51 |                     color, -1
52 |                 )
53 |                 cv2.putText(image, cat_name,
54 |                     (bbox[0], bbox[1] - 2),
55 |                     cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), thickness=1
56 |                 )
57 |             cv2.rectangle(image,
58 |                 (bbox[0], bbox[1]),
59 |                 (bbox[2], bbox[3]),
60 |                 color, 2
61 |             )
62 |     return image
63 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/corner_net_lite/demo.jpg


--------------------------------------------------------------------------------
/perception/corner_net_lite/demo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import cv2
 4 | from core.detectors import CornerNet_Saccade
 5 | from core.vis_utils import draw_bboxes
 6 | 
 7 | detector = CornerNet_Saccade()
 8 | image    = cv2.imread("demo.jpg")
 9 | 
10 | bboxes = detector(image)
11 | image  = draw_bboxes(image, bboxes)
12 | cv2.imwrite("demo_out.jpg", image)
13 | 


--------------------------------------------------------------------------------
/perception/corner_net_lite/evaluate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import os
  3 | import json
  4 | import torch
  5 | import pprint
  6 | import argparse
  7 | import importlib
  8 | 
  9 | from core.dbs import datasets
 10 | from core.test import test_func
 11 | from core.config import SystemConfig
 12 | from core.nnet.py_factory import NetworkFactory
 13 | 
 14 | torch.backends.cudnn.benchmark = False
 15 | 
 16 | def parse_args():
 17 |     parser = argparse.ArgumentParser(description="Evaluation Script")
 18 |     parser.add_argument("cfg_file", help="config file", type=str)
 19 |     parser.add_argument("--testiter", dest="testiter",
 20 |                         help="test at iteration i",
 21 |                         default=None, type=int)
 22 |     parser.add_argument("--split", dest="split",
 23 |                         help="which split to use",
 24 |                         default="validation", type=str)
 25 |     parser.add_argument("--suffix", dest="suffix", default=None, type=str)
 26 |     parser.add_argument("--debug", action="store_true")
 27 | 
 28 |     args = parser.parse_args()
 29 |     return args
 30 | 
 31 | def make_dirs(directories):
 32 |     for directory in directories:
 33 |         if not os.path.exists(directory):
 34 |             os.makedirs(directory)
 35 | 
 36 | def test(db, system_config, model, args):
 37 |     split    = args.split
 38 |     testiter = args.testiter
 39 |     debug    = args.debug
 40 |     suffix   = args.suffix
 41 | 
 42 |     result_dir = system_config.result_dir
 43 |     result_dir = os.path.join(result_dir, str(testiter), split)
 44 | 
 45 |     if suffix is not None:
 46 |         result_dir = os.path.join(result_dir, suffix)
 47 | 
 48 |     make_dirs([result_dir])
 49 | 
 50 |     test_iter = system_config.max_iter if testiter is None else testiter
 51 |     print("loading parameters at iteration: {}".format(test_iter))
 52 | 
 53 |     print("building neural network...")
 54 |     nnet = NetworkFactory(system_config, model)
 55 |     print("loading parameters...")
 56 |     nnet.load_params(test_iter)
 57 | 
 58 |     nnet.cuda()
 59 |     nnet.eval_mode()
 60 |     test_func(system_config, db, nnet, result_dir, debug=debug)
 61 | 
 62 | def main(args):
 63 |     if args.suffix is None:
 64 |         cfg_file = os.path.join("./configs", args.cfg_file + ".json")
 65 |     else:
 66 |         cfg_file = os.path.join("./configs", args.cfg_file + "-{}.json".format(args.suffix))
 67 |     print("cfg_file: {}".format(cfg_file))
 68 | 
 69 |     with open(cfg_file, "r") as f:
 70 |         config = json.load(f)
 71 |             
 72 |     config["system"]["snapshot_name"] = args.cfg_file
 73 |     system_config = SystemConfig().update_config(config["system"])
 74 | 
 75 |     model_file  = "core.models.{}".format(args.cfg_file)
 76 |     model_file  = importlib.import_module(model_file)
 77 |     model       = model_file.model()
 78 | 
 79 |     train_split = system_config.train_split
 80 |     val_split   = system_config.val_split
 81 |     test_split  = system_config.test_split
 82 | 
 83 |     split = {
 84 |         "training": train_split,
 85 |         "validation": val_split,
 86 |         "testing": test_split
 87 |     }[args.split]
 88 | 
 89 |     print("loading all datasets...")
 90 |     dataset = system_config.dataset
 91 |     print("split: {}".format(split))
 92 |     testing_db = datasets[dataset](config["db"], split=split, sys_config=system_config)
 93 | 
 94 |     print("system config...")
 95 |     pprint.pprint(system_config.full)
 96 | 
 97 |     print("db config...")
 98 |     pprint.pprint(testing_db.configs)
 99 | 
100 |     test(testing_db, system_config, model, args)
101 | 
102 | if __name__ == "__main__":
103 |     args = parse_args()
104 |     main(args)
105 | 


--------------------------------------------------------------------------------
/perception/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/datasets/__init__.py


--------------------------------------------------------------------------------
/perception/datasets/utils.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import random
 3 | from torch.utils import data
 4 | 
 5 | class RoundRobin(data.IterableDataset):
 6 |     """
 7 |     This class will sample iterable datasets in a round robin fashion ad-ininitum.
 8 |     When a dataset runs out of juice, it will simply reset it.
 9 |     """
10 |     def __init__(self, datasets):
11 |         self.datasets = datasets
12 |         self.dataset_count = len(datasets)
13 | 
14 |     def __iter__(self):
15 |         datasets = [iter(d) for d in self.datasets]
16 |         i = 0
17 |         while True:
18 |             current_dataset = datasets[i]
19 |             try:
20 |                 example = next(current_dataset)
21 |                 i = (i + 1) % self.dataset_count
22 |                 yield example
23 |             except StopIteration as e:
24 |                 datasets[i] = iter(self.datasets[i])
25 |                 continue
26 | 
27 | class Chain(data.IterableDataset):
28 |     def __init__(self, datasets, shuffle=True, infinite=False):
29 |         self.shuffle = shuffle
30 |         self.datasets = datasets
31 |         self.infinite = infinite
32 | 
33 |     def __iter__(self):
34 |         datasets = self.datasets
35 |         if self.shuffle:
36 |             random.shuffle(datasets)
37 |         if self.infinite:
38 |             for dataset in itertools.cycle(self.datasets):
39 |                 for item in dataset:
40 |                     try:
41 |                         yield item
42 |                     except StopIteration:
43 |                         continue
44 |         else:
45 |             for dataset in self.datasets:
46 |                 for item in dataset:
47 |                     yield item
48 | 
49 |     def __len__(self):
50 |         return sum(len(d) for d in self.datasets)
51 | 
52 | class SamplingPool(data.IterableDataset):
53 |     """
54 |     Maintains a pool of N examples and samples randomly from that pool.
55 |     Useful for mixing different iterable datasets together.
56 |     """
57 |     def __init__(self, dataset, n=1000):
58 |         self.dataset = dataset
59 |         self.n = n
60 | 
61 |     def __iter__(self):
62 |         pool = []
63 |         iterator = iter(self.dataset)
64 |         for _ in range(self.n):
65 |             try:
66 |                 pool.append(next(iterator))
67 |             except StopIteration:
68 |                 break
69 | 
70 |         while True:
71 |             try:
72 |                 new_example = next(iterator)
73 |             except StopIteration as e:
74 |                 break
75 | 
76 |             random_index = random.randint(0, len(pool)-1)
77 |             yield pool[random_index]
78 |             pool[random_index] = new_example
79 | 
80 |         # If the dataset is exhausted, empty the pool.
81 |         while len(pool) > 0:
82 |             random_index = random.randint(0, len(pool)-1)
83 |             yield pool[random_index]
84 |             del pool[random_index]
85 | 
86 |     def __len__(self):
87 |         return len(self.dataset)
88 | 
89 | 


--------------------------------------------------------------------------------
/perception/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn.modules.loss import _Loss
 3 | from torch.nn import functional as F
 4 | 
 5 | class KeypointLoss(_Loss):
 6 |     def __init__(self, keypoint_config, depth_weight=10.0, center_weight=1.0, size_average=None, reduce=None, reduction='mean'):
 7 |         super().__init__(size_average, reduce, reduction)
 8 |         self.keypoint_config = keypoint_config
 9 |         self.n_keypoint_maps = len(keypoint_config) + 1 # Add one for center map.
10 |         self.depth_weight = depth_weight
11 |         self.center_weight = center_weight
12 |         if reduction == 'mean':
13 |             self.reduce = torch.mean
14 |         elif reduction == 'sum':
15 |             self.reduce = torch.sum
16 |         else:
17 |             raise NotImplementedError("Unknown reduction method {reduction}, try 'mean' or 'sum'.")
18 | 
19 |     def forward(self, p_heatmaps, gt_heatmaps, p_depth, gt_depth, p_centers, gt_centers):
20 |         """
21 |         predictions: N x D x H x W prediction tensor
22 |         gt: N x D x H x W
23 |         """
24 |         heatmap_loss = 0.0
25 |         depth_loss = 0.0
26 |         center_loss = 0.0
27 |         heatmap_losses = []
28 |         N = float(gt_heatmaps.shape[0])
29 |         depth_losses = []
30 |         center_losses = []
31 |         for p_hm, p_d, p_center in zip(p_heatmaps, p_depth, p_centers):
32 |             loss = F.binary_cross_entropy_with_logits(p_hm, gt_heatmaps, reduction='none').sum(dim=[1,2,3]).mean()
33 |             heatmap_loss += loss
34 |             heatmap_losses.append(loss)
35 | 
36 |             where_heat = gt_heatmaps > 0.01
37 | 
38 |             depth_l1 = F.l1_loss(p_d[where_heat], gt_depth[where_heat], reduction='sum')
39 |             depth_loss += depth_l1 / N
40 |             depth_losses.append(depth_l1)
41 | 
42 |             where_heat = where_heat[:, 1:, None].expand(-1, -1, 2, -1, -1)
43 |             center_l1 = F.smooth_l1_loss(p_center[where_heat], gt_centers[where_heat], reduction='sum')
44 |             center_loss += center_l1 / N
45 |             center_losses.append(center_l1)
46 | 
47 |         loss = heatmap_loss + self.depth_weight * depth_loss + self.center_weight * center_loss
48 |         return loss, heatmap_losses, depth_losses, center_losses
49 | 
50 | 


--------------------------------------------------------------------------------
/perception/models.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from torch import nn
 4 | from torch.nn import functional as F
 5 | from perception.corner_net_lite.core.models import CornerNet_Squeeze
 6 | from perception.corner_net_lite.core.models.py_utils.utils import convolution
 7 | from perception.corner_net_lite.core.base import load_nnet, load_cfg
 8 | from perception.corner_net_lite.core.config import SystemConfig
 9 | from perception.corner_net_lite.core.nnet.py_factory import NetworkFactory
10 | import timm
11 | 
12 | 
13 | def prediction_module(int_features, features_out):
14 |     return nn.Sequential(
15 |         convolution(1, 256, int_features, with_bn=True),
16 |         convolution(1, int_features, 32, with_bn=True),
17 |         nn.Conv2d(32, features_out, (1, 1), bias=True)
18 |     )
19 | 
20 | class HeatmapHead(nn.Module):
21 |     def __init__(self, features, heatmaps):
22 |         super().__init__()
23 |         self.output_head1 = prediction_module(features, heatmaps)
24 |         self.output_head2 = prediction_module(features, heatmaps)
25 |         self.output_head1[-1].bias.data.fill_(0.01/0.99)
26 |         self.output_head2[-1].bias.data.fill_(0.01/0.99)
27 | 
28 |     def forward(self, heatmaps):
29 |         return self.output_head1(heatmaps[0]), self.output_head2(heatmaps[1])
30 | 
31 | class DepthHead(nn.Module):
32 |     def __init__(self, features, heatmaps):
33 |         super().__init__()
34 |         self.output_head1 = prediction_module(features, heatmaps)
35 |         self.output_head2 = prediction_module(features, heatmaps)
36 | 
37 |     def forward(self, x):
38 |         out1 = self.output_head1(x[0])
39 |         out2 = self.output_head2(x[1])
40 |         return out1, out2
41 | 
42 | class CenterHead(nn.Module):
43 |     def __init__(self, features, heatmaps):
44 |         super().__init__()
45 |         self.outputs = heatmaps - 1
46 |         self.output_head1 = prediction_module(features, self.outputs * 2)
47 |         self.output_head2 = prediction_module(features, self.outputs * 2)
48 | 
49 |     def forward(self, x):
50 |         N, C, H, W = x[1].shape
51 |         out1 = self.output_head1(x[0])
52 |         out2 = self.output_head2(x[1])
53 |         return out1.reshape(N, self.outputs, 2, H, W), out2.reshape(N, self.outputs, 2, H, W)
54 | 
55 | def nms(x, size=5):
56 |     hmax = nn.functional.max_pool2d(x, (size, size), padding=size // 2, stride=1)
57 |     keep = (x == hmax).to(x.dtype)
58 |     return x * keep
59 | 
60 | class KeypointNet(nn.Module):
61 |     def __init__(self, output_size, features=128, heatmaps_out=2, dropout=0.1):
62 |         super().__init__()
63 |         self.backbone = self._build_hourglass()
64 |         self.heatmap_head = HeatmapHead(features, heatmaps_out)
65 |         self.depth_head = DepthHead(features, heatmaps_out)
66 |         self.center_head = CenterHead(features, heatmaps_out)
67 |         self.dropout = nn.Dropout(p=dropout)
68 | 
69 |     def _build_hourglass(self):
70 |         corner_net = CornerNet_Squeeze.model()
71 |         config, _ = load_cfg("./perception/corner_net_lite/configs/CornerNet_Squeeze.json")
72 |         sys_cfg = SystemConfig().update_config(config)
73 |         net = load_nnet(sys_cfg, corner_net)
74 |         if torch.cuda.is_available():
75 |             net.load_pretrained_params('./models/corner_net.pkl')
76 |         else:
77 |             print('Cuda not available. Will not load pretrained params')
78 |         return net.model.module.hg
79 | 
80 |     def forward(self, x):
81 |         features = [self.dropout(f) for f in self.backbone(x)]
82 |         heatmaps_out = self.heatmap_head(features)
83 |         depth_out = self.depth_head(features)
84 |         centers_out = self.center_head(features)
85 |         return heatmaps_out, depth_out, centers_out
86 | 
87 | 


--------------------------------------------------------------------------------
/perception/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | class Rate:
 4 |     def __init__(self, rate):
 5 |         self.rate = rate
 6 |         self.last_sleep = 0.0 # Long in the future.
 7 |         self.time_per_step = 1.0 / float(rate)
 8 | 
 9 |     def sleep(self):
10 |         now = time.time()
11 |         time_since_last = now - self.last_sleep
12 |         to_sleep = max(self.time_per_step - time_since_last, 0.0)
13 |         time.sleep(to_sleep)
14 |         self.last_sleep = now
15 | 


--------------------------------------------------------------------------------
/perception/utils/camera_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import os
  4 | import yaml
  5 | from . import linalg
  6 | 
  7 | class PinholeCamera:
  8 |     def __init__(self, K, D, image_size):
  9 |         # Camera matrix
 10 |         self.K = K
 11 |         self.Kinv = np.linalg.inv(K)
 12 |         # Distortion parameters
 13 |         self.D = D
 14 |         # height, width
 15 |         self.image_size = np.array(image_size)
 16 |         assert np.abs(K[0, 2] * 2.0 - image_size[1]) < 0.05 * image_size[1]
 17 | 
 18 |     def scale(self, scale):
 19 |         K = scale_camera_matrix(self.K, np.ones(2) * scale)
 20 |         return FisheyeCamera(K, self.D, self.image_size * scale)
 21 | 
 22 |     def cut(self, offset):
 23 |         cx = self.K[0, 2] - offset[0]
 24 |         cy = self.K[1, 2] - offset[1]
 25 |         K = self.K.copy()
 26 |         K[0, 2] = cx
 27 |         K[1, 2] = cy
 28 |         image_size = self.image_size - 2.0 * offset[::-1]
 29 |         return FisheyeCamera(K, self.D, image_size)
 30 | 
 31 |     def unproject(self, xys, zs):
 32 |         xs = np.concatenate([xys, np.ones((xys.shape[0], 1))], axis=1)
 33 |         X = (self.Kinv @ xs[:, :, None])[:, :, 0] * zs[:, None]
 34 |         return X
 35 | 
 36 |     def in_frame(self, x):
 37 |         """
 38 |         x: N x 2 array of points in image frame
 39 |         returns: N array of boolean values
 40 |         """
 41 |         under = (x <= 0.0).any(axis=1)
 42 |         over  = (x >= self.image_size).any(axis=1)
 43 |         return np.bitwise_or(under, over) == False
 44 | 
 45 | class RadTanPinholeCamera(PinholeCamera):
 46 |     def project(self, X, T_CW=np.eye(4)):
 47 |         """
 48 |         X: N x 3 points in world frame as define by T_CW
 49 |         returns: N x 2 points in image coordinates.
 50 |         """
 51 |         R, _ = cv2.Rodrigues(T_CW[:3, :3])
 52 |         x, _ = cv2.projectPoints(X[:, None, :], R, T_CW[:3, 3], self.K, self.D)
 53 |         x = x[:, 0]
 54 |         return x
 55 | 
 56 |     def undistort(self, xy):
 57 |         """
 58 |         xy: N x 2 image points
 59 |         returns: N x 2 undistorted image points.
 60 |         """
 61 |         return cv2.undistortPoints(xy[:, None, :], self.K, self.D,
 62 |                 P=self.K)[:, 0, :]
 63 | 
 64 | class FisheyeCamera(PinholeCamera):
 65 |     def project(self, X, T_CW=np.eye(4)):
 66 |         """
 67 |         X: N x 3 points in world frame as define by T_CW
 68 |         returns: N x 2 points in image coordinates.
 69 |         """
 70 |         R, _ = cv2.Rodrigues(T_CW[:3, :3])
 71 |         x, _ = cv2.fisheye.projectPoints(X[:, None, :], R, T_CW[:3, 3], self.K, self.D)
 72 |         x = x[:, 0]
 73 |         return x
 74 | 
 75 |     def undistort(self, xy):
 76 |         """
 77 |         xy: N x 2 image points
 78 |         returns: N x 2 undistorted image points.
 79 |         """
 80 |         return cv2.fisheye.undistortPoints(xy[:, None, :], self.K, self.D,
 81 |                 P=self.K)[:, 0, :]
 82 | 
 83 | 
 84 | class StereoCamera:
 85 |     def __init__(self, left_camera, right_camera, T_RL):
 86 |         self.left_camera = left_camera
 87 |         self.right_camera = right_camera
 88 |         self.T_RL = T_RL
 89 |         self.T_LR = linalg.inv_transform(T_RL)
 90 |         self.F = fundamental_matrix(T_RL, self.left_camera.K, self.right_camera.K)
 91 | 
 92 |     def triangulate(self, left_keypoints, right_keypoints):
 93 |         left_keypoints = left_keypoints[:, None, :].astype(np.float32)
 94 |         right_keypoints = right_keypoints[:, None, :].astype(np.float32)
 95 |         undistorted_left = cv2.fisheye.undistortPoints(left_keypoints, self.left_camera.K, self.left_camera.D,
 96 |                 P=self.left_camera.K)[:, 0, :]
 97 |         undistorted_right = cv2.fisheye.undistortPoints(right_keypoints, self.right_camera.K, self.right_camera.D,
 98 |                 P=self.right_camera.K)[:, 0, :]
 99 | 
100 |         corrected_left, corrected_right = cv2.correctMatches(self.F, undistorted_left[None], undistorted_right[None])
101 |         corrected_left, corrected_right = corrected_left[0], corrected_right[0]
102 | 
103 |         P1 = self.left_camera.K @ np.eye(3, 4)
104 |         P2 = self.right_camera.K @ self.T_RL[:3]
105 |         p_LK = cv2.triangulatePoints(
106 |             P1, P2, corrected_left.T, corrected_right.T
107 |         ).T  # N x 4
108 |         p_LK = p_LK[:, :3] / p_LK[:, 3:4]
109 | 
110 |         return p_LK
111 | 
112 |     @classmethod
113 |     def from_file(cls, calibration_file):
114 |         camera = load_calibration_params(calibration_file)
115 |         left_camera = FisheyeCamera(camera['K'], camera['D'], camera['image_size'])
116 |         right_camera = FisheyeCamera(camera['Kp'], camera['Dp'], camera['image_size'])
117 |         return cls(left_camera, right_camera, camera['T_RL'])
118 | 
119 | def camera_matrix(intrinsics):
120 |     fx, fy, cx, cy = intrinsics
121 |     return np.array([[fx, 0., cx],
122 |         [0., fy, cy],
123 |         [0., 0., 1.]])
124 | 
125 | def projection_matrix(camera_matrix, T_CW):
126 |     """
127 |     camera_matrix: 3 x 3 camera calibration matrix.
128 |     T_CW: 4x4 matrix transform from global to camera frame.
129 |     """
130 |     return camera_matrix @ T_CW[:3, :]
131 | 
132 | def from_calibration(calibration_file):
133 |     with open(calibration_file, 'rt') as f:
134 |         calibration = yaml.load(f.read(), Loader=yaml.SafeLoader)
135 |         camera = calibration['cam0']
136 | 
137 |         K = camera_matrix(camera['intrinsics'])
138 |         D = np.array(camera['distortion_coeffs'])
139 |         if camera['distortion_model'] == 'equidistant' and camera['camera_model'] == 'pinhole':
140 |             return FisheyeCamera(K, D, camera['resolution'][::-1])
141 |         elif camera['distortion_model'] == 'radtan' and camera['camera_model'] == 'pinhole':
142 |             return RadTanPinholeCamera(K, D, camera['resolution'][::-1])
143 |         else:
144 |             raise ValueError(f"Unrecognized calibration type {camera['distortion_model']}.")
145 | 
146 | def load_calibration_params(calibration_file):
147 |     with open(calibration_file, 'rt') as f:
148 |         calibration = yaml.load(f.read(), Loader=yaml.SafeLoader)
149 | 
150 |     left = calibration['cam0']
151 |     K = camera_matrix(left['intrinsics'])
152 |     right = calibration['cam1']
153 |     Kp = camera_matrix(right['intrinsics'])
154 |     D = np.array(calibration['cam0']['distortion_coeffs'])
155 |     Dp = np.array(calibration['cam1']['distortion_coeffs'])
156 | 
157 |     T_RL = np.array(calibration['cam1']['T_cn_cnm1'])
158 |     T_LR = np.eye(4)
159 |     T_LR[:3, :3] = T_RL[:3, :3].transpose()
160 |     T_LR[:3, 3] = -T_LR[:3, :3] @ T_RL[:3, 3]
161 |     image_size = calibration['cam1']['resolution'][::-1]
162 |     return {
163 |         'K': K,
164 |         'Kp': Kp,
165 |         'D': D,
166 |         'Dp': Dp,
167 |         'T_LR': T_LR,
168 |         'T_RL': T_RL,
169 |         'image_size': image_size
170 |     }
171 | 
172 | def scale_camera_matrix(K, scaling_factor):
173 |     """
174 |     K: 3 x 3 camera matrix
175 |     scaling_factor: array of length 2, x and y scaling factor.
176 |     """
177 |     out = K.copy()
178 |     out[0, 0] = K[0, 0] * scaling_factor[0]
179 |     out[1, 1] = K[1, 1] * scaling_factor[1]
180 |     out[0, 2] = K[0, 2] * scaling_factor[0]
181 |     out[1, 2] = K[1, 2] * scaling_factor[1]
182 |     return out
183 | 
184 | def fundamental_matrix(T_RL, K, Kp):
185 |     R = T_RL[:3, :3]
186 |     t = T_RL[:3, 3]
187 | 
188 |     C = linalg.skew_matrix(K @ R.T @ t)
189 |     return np.linalg.inv(Kp).T @ R @ K.T @ C
190 | 
191 | 


--------------------------------------------------------------------------------
/perception/utils/clustering_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import cluster
 3 | 
 4 | 
 5 | class KeypointClustering:
 6 |     def __init__(self, bandwidth):
 7 |         self.clustering = cluster.MeanShift(bandwidth=bandwidth, cluster_all=True, bin_seeding=True,
 8 |                 min_bin_freq=1)
 9 |         self.past_clusters = None
10 | 
11 |     def __call__(self, indices):
12 |         """
13 |         indices: N x D numpy array of image coordinates to be clustered.
14 |         returns: C x D keypoint estimates. C is the amount of clusters found.
15 |         """
16 |         self.clustering.fit(indices)
17 |         return self.clustering.cluster_centers_, self.clustering.labels_
18 | 
19 | 


--------------------------------------------------------------------------------
/perception/utils/linalg.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.spatial.transform import Rotation
 3 | 
 4 | def skew_matrix(v):
 5 |     return np.array([[0.0, -v[2], v[1]],
 6 |             [v[2], 0.0, -v[0]],
 7 |             [-v[1], v[0], 0.0]], dtype=v.dtype)
 8 | 
 9 | def inv_transform(T):
10 |     out = np.eye(4, dtype=T.dtype)
11 |     out[:3, :3] = T[:3, :3].T
12 |     out[:3, 3] = -out[:3,:3] @ T[:3, 3]
13 |     return out
14 | 
15 | def transform_points(T, points):
16 |     """
17 |     T: 4 x 4 numpy matrix
18 |     points: ... x 3 numpy matrix
19 |     """
20 |     return (T[:3, :3] @ points[..., None])[..., 0] + T[:3, 3]
21 | 
22 | def angle_between(R1, R2):
23 |     return Rotation.from_matrix(R1.T @ R2).as_euler('xyz', degrees=False)
24 | 


--------------------------------------------------------------------------------
/perception/utils/ros.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.spatial.transform import Rotation
 3 | from geometry_msgs import msg as geometry_msgs
 4 | 
 5 | def message_to_transform(message):
 6 |     T = np.eye(4)
 7 |     t = message.transform.translation
 8 |     r = message.transform.rotation
 9 |     R = Rotation.from_quat([r.x, r.y, r.z, r.w])
10 |     T[:3, 3]  = np.array([t.x, t.y, t.z])
11 |     T[:3, :3] = R.as_matrix()
12 |     return T
13 | 
14 | def transform_to_message(T, parent_frame, child_frame, timestamp):
15 |     msg = geometry_msgs.TransformStamped()
16 |     msg.header.stamp = timestamp
17 |     msg.header.frame_id = parent_frame
18 |     msg.child_frame_id = child_frame
19 |     msg.transform.translation.x = T[0, 3]
20 |     msg.transform.translation.y = T[1, 3]
21 |     msg.transform.translation.z = T[2, 3]
22 |     quat = Rotation.from_matrix(T[:3, :3]).as_quat()
23 |     msg.transform.rotation.x = quat[0]
24 |     msg.transform.rotation.y = quat[1]
25 |     msg.transform.rotation.z = quat[2]
26 |     msg.transform.rotation.w = quat[3]
27 |     return msg
28 | 
29 | def transform_to_pose(T, frame, timestamp):
30 |     msg = geometry_msgs.PoseStamped()
31 |     msg.header.stamp = timestamp
32 |     msg.header.frame_id = frame
33 |     msg.pose.position.x = T[0, 3]
34 |     msg.pose.position.y = T[1, 3]
35 |     msg.pose.position.z = T[2, 3]
36 |     q = Rotation.from_matrix(T[:3, :3]).as_quat()
37 |     msg.pose.orientation.x = q[0]
38 |     msg.pose.orientation.y = q[1]
39 |     msg.pose.orientation.z = q[2]
40 |     msg.pose.orientation.w = q[3]
41 |     return msg
42 | 


--------------------------------------------------------------------------------
/perception/utils/timer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np
 3 | from datetime import datetime
 4 | 
 5 | class Timing:
 6 |     def __init__(self):
 7 |         self.times = {}
 8 |         self.finished = {}
 9 | 
10 |     def start(self, tag):
11 |         assert tag not in self.times, f"{tag} already started"
12 |         start = datetime.now()
13 |         self.times[tag] = start
14 | 
15 |     def end(self, tag):
16 |         start = self.times[tag]
17 |         values = self.finished.get(tag, [])
18 |         diff = datetime.now() - start
19 |         values.append(diff.total_seconds())
20 |         self.finished[tag] = values
21 |         del self.times[tag]
22 | 
23 |     def print(self):
24 |         space = " " * 9
25 |         header = f"|\ttag{space}\t|\tavg\t|\tvar\t|"
26 |         print(header)
27 |         print("-" * (len(header) + 16))
28 |         for tag, values in self.finished.items():
29 |             avg = np.mean(values)
30 |             name = tag[:12] + " " * (12 - len(tag))
31 |             std = np.std(values)
32 |             print(f"|\t{name}\t|\t{avg}\t|\t{std}\t|")
33 | 
34 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | h5py==3.1.0
 2 | scikit-video==1.1.11
 3 | albumentations
 4 | efficientnet_pytorch
 5 | pytorch-lightning==1.2.1
 6 | scikit-learn==0.24.1
 7 | timm==0.4.5
 8 | rich
 9 | numba
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/scripts/collect_bags.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import curses
  3 | import os
  4 | import subprocess
  5 | import time
  6 | 
  7 | TOPICS_TO_RECORD = [
  8 |     '/tf_static',
  9 |     '/tf',
 10 |     '/zedm/zed_node/left_raw/camera_info',
 11 |     '/zedm/zed_node/left_raw/image_raw_color',
 12 |     '/zedm/zed_node/right_raw/camera_info',
 13 |     '/zedm/zed_node/right_raw/image_raw_color',
 14 |     '/joint_states'
 15 | ]
 16 | 
 17 | def read_args():
 18 |     parser = argparse.ArgumentParser()
 19 |     parser.add_argument('--out', '-o', type=str, default="~/data/bags")
 20 |     return parser.parse_args()
 21 | 
 22 | WAITING = "Waiting for command."
 23 | STARTING = "Starting to record bag."
 24 | RECORDING = "Recording bag..."
 25 | 
 26 | class Program:
 27 |     def __init__(self, screen, flags):
 28 |         self.screen = screen
 29 |         self.flags = flags
 30 |         self._stdout = []
 31 |         self.status_line = WAITING
 32 |         self._inventory()
 33 |         self._refresh_screen()
 34 | 
 35 |     def _inventory(self):
 36 |         files = sorted(os.listdir(self.flags.out))
 37 |         self.current_file = 0
 38 |         self._recorded_bags = []
 39 |         for f in files:
 40 |             filepath = os.path.join(self.flags.out, f)
 41 |             if '.bag' in f:
 42 |                 self._recorded_bags.append(filepath)
 43 |                 self.current_file += 1
 44 | 
 45 |     def _refresh_screen(self):
 46 |         self.screen.clear()
 47 |         self.screen.addstr(0, 0, self.status_line)
 48 |         for i, filepath in enumerate(self._recorded_bags):
 49 |             bagname = os.path.basename(filepath)
 50 |             self.screen.addstr(i + 2, 0, bagname)
 51 | 
 52 |         (height, width) = self.screen.getmaxyx()
 53 |         for i, line in enumerate(self._stdout[-20:]):
 54 |             self.screen.addstr(height // 2 + i, 0, line)
 55 |         self.screen.refresh()
 56 | 
 57 |     def _add_bag(self, filepath):
 58 |         self._recorded_bags.append(filepath)
 59 |         self.current_file += 1
 60 | 
 61 |     def _read_stdout(self, process):
 62 |         text = process.stdout.decode('utf-8')
 63 |         for line in text.split('\n'):
 64 |             self._stdout.append(line)
 65 | 
 66 |     def _record_bag(self):
 67 |         self.status_line = STARTING
 68 |         self._refresh_screen()
 69 |         time.sleep(5)
 70 |         filename = '{:03}.bag'.format(self.current_file)
 71 |         filepath = os.path.join(self.flags.out, filename)
 72 |         self.status_line = RECORDING
 73 |         self._refresh_screen()
 74 |         try:
 75 |             process = subprocess.run(['rosbag', 'record', '--buffsize=0', '--chunksize=524288', '--output-name', filepath, '--duration', '30'] + TOPICS_TO_RECORD,
 76 |                     stdout=subprocess.PIPE, check=True)
 77 |         except subprocess.CalledProcessError as e:
 78 |             print(e)
 79 |             exit()
 80 | 
 81 |         self._read_stdout(process)
 82 |         self._add_bag(filepath)
 83 |         self.status_line = WAITING
 84 |         self._refresh_screen()
 85 | 
 86 |     def run(self):
 87 |         while True:
 88 |             keypress = self.screen.getkey()
 89 |             if keypress == 'q':
 90 |                 curses.endwin()
 91 |                 return
 92 |             elif keypress == '\n':
 93 |                 self._record_bag()
 94 | 
 95 | def main(screen):
 96 |     curses.noecho()
 97 |     flags = read_args()
 98 |     flags.out = os.path.expanduser(flags.out)
 99 | 
100 |     os.makedirs(flags.out, exist_ok=True)
101 | 
102 |     program = Program(screen, flags)
103 |     program.run()
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     curses.wrapper(main)
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/scripts/constants.py:
--------------------------------------------------------------------------------
 1 | import hud
 2 | import numpy as np
 3 | 
 4 | def _to_camera_matrix(proj):
 5 |     return np.array([[proj[0], 0., proj[2]],
 6 |         [0., proj[1], proj[3]],
 7 |         [0., 0., 1.]], dtype=np.float64)
 8 | 
 9 | KEYPOINT_FILENAME = 'keypoints.json'
10 | IMAGE_HEIGHT = 720
11 | IMAGE_WIDTH = 1280
12 | IMAGE_RECT = hud.Rect(0, 0, IMAGE_WIDTH, IMAGE_HEIGHT)
13 | image_size = (int(IMAGE_RECT.width), int(IMAGE_RECT.height))
14 | KEYPOINT_COLOR = np.array([1.0, 0.0, 0.0, 1.0])
15 | 
16 | 


--------------------------------------------------------------------------------
/scripts/encode_bag.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import copy
  3 | import rospy
  4 | import shutil
  5 | import rosbag
  6 | import subprocess
  7 | import numpy as np
  8 | import tf2_py as tf2
  9 | import h5py
 10 | import skvideo.io
 11 | from time import time
 12 | from argparse import ArgumentParser
 13 | from PIL import Image
 14 | from cv_bridge import CvBridge
 15 | from geometry_msgs import msg
 16 | from perception.utils import ros as ros_utils
 17 | 
 18 | def read_args():
 19 |     parser = ArgumentParser()
 20 |     parser.add_argument('--bags', required=True, help="Path to directory containing rosbags.")
 21 |     parser.add_argument('--out', '-o', required=True, help="Where to write output files.")
 22 |     parser.add_argument('--skip', default=0, type=int, help="Skip the first n bags.")
 23 |     parser.add_argument('--until', default=None, type=int, help="Encode until the nth bag.")
 24 |     parser.add_argument('--topics', nargs="+", required=True, help="Which topics to encode into the stream.")
 25 |     parser.add_argument('--frames', nargs='+', required=True, help="The coordinate frames corresponding to the optical frames of each camera topic given to the --topics argument.")
 26 |     parser.add_argument('--base-frame', default='panda_link0', help="The name of the base tf frame.")
 27 |     return parser.parse_args()
 28 | 
 29 | bridge = CvBridge()
 30 | 
 31 | def _write_images(folder, data):
 32 |     for item in data:
 33 |         image = bridge.imgmsg_to_cv2(item['message'], desired_encoding='rgb8')
 34 |         image = Image.fromarray(image)
 35 |         image.save('/tmp/encode_bags_tmp/{}/{:05}.png'.format(folder, item['i']))
 36 |         print('Writing /tmp/encode_bags_tmp/{}/{:05}.png'.format(folder, item['i']), end='\r')
 37 |     print("")
 38 | 
 39 | def _encode_full_video(data, filepath):
 40 |     writer = skvideo.io.FFmpegWriter(filepath, outputdict={
 41 |         '-vcodec': 'libx264',
 42 |         '-crf': '0',
 43 |         '-preset': 'fast',
 44 |         '-framerate': '30'
 45 |     })
 46 |     try:
 47 |         for i, item in enumerate(data):
 48 |             print(f"Encoding frame {i:06}", end='\r')
 49 |             frame = bridge.imgmsg_to_cv2(item['message'], desired_encoding='rgb8')
 50 |             writer.writeFrame(frame)
 51 |     finally:
 52 |         writer.close()
 53 | 
 54 | def _encode_preview(video_file, preview_file):
 55 |     subprocess.run(['ffmpeg', '-i', video_file, '-c:a', 'copy',
 56 |         '-framerate', '30', '-c:v', 'libx264', '-crf', '24', '-vf', 'scale=1280:-1',
 57 |         '-preset', 'fast', '-y', preview_file])
 58 | 
 59 | class Runner:
 60 |     def __init__(self):
 61 |         self.flags = read_args()
 62 |         self._find_bags()
 63 | 
 64 |     def _find_bags(self):
 65 |         filenames = os.listdir(self.flags.bags)
 66 |         self._bags = []
 67 |         for filename in filenames:
 68 |             path = os.path.join(self.flags.bags, filename)
 69 |             if '.bag' in path:
 70 |                 self._bags.append(path)
 71 |         self._bags.sort()
 72 | 
 73 |     def _read_poses(self, out_folder, bag):
 74 |         print("Reading poses")
 75 |         tf_tree = tf2.BufferCore(rospy.Duration(360000.0))
 76 |         for topic, message, t in bag.read_messages(topics=["/tf", "/tf_static"]):
 77 |             for tf_message in message.transforms:
 78 |                 if topic == '/tf_static':
 79 |                     tf_tree.set_transform_static(tf_message, f"bag/{topic}")
 80 |                 else:
 81 |                     tf_tree.set_transform(tf_message, f'bag/{topic}')
 82 | 
 83 |         return tf_tree
 84 | 
 85 |     def _gather_images(self, bag):
 86 |         image_messages = []
 87 |         for topic in self.flags.topics:
 88 |             for _, message, t in bag.read_messages(topics=topic):
 89 |                 i = len(image_messages)
 90 |                 print("image {:05} time: {}".format(i, t), end="\r")
 91 |                 image_messages.append({
 92 |                     'message': message,
 93 |                     'i': i,
 94 |                     't': t.to_sec()
 95 |                 })
 96 | 
 97 |         return image_messages
 98 | 
 99 |     def _gather_poses(self, tf_tree, image_messages):
100 |         print("Looking up poses")
101 |         pose_data = []
102 |         i = 0
103 |         for frame in self.flags.frames:
104 |             for item in image_messages:
105 |                 try:
106 |                     # Reminder: ^{B}T^{A} = T_BA = lookup_transform(source_frame=A, target_frame=B)
107 |                     T_BC = ros_utils.message_to_transform(tf_tree.lookup_transform_core(target_frame=self.flags.base_frame,
108 |                             source_frame=frame, time=item['message'].header.stamp))
109 |                     item['camera_pose'] = T_BC
110 |                     item['i'] = i # Override index as some frames might have been skipped.
111 |                     pose_data.append(item)
112 |                     i += 1
113 |                 except tf2.ExtrapolationException:
114 |                     print("Extrapolation exception. Skipping entry {}.".format(i))
115 | 
116 |         return pose_data
117 | 
118 |     def _create_out_folder(self, bag_name):
119 |         out_folder = os.path.join(self.flags.out, bag_name.split(os.path.extsep)[0])
120 |         os.makedirs(out_folder, exist_ok=True)
121 |         return out_folder
122 | 
123 |     def _write_poses(self, out_file, poses):
124 |         transforms = out_file.create_dataset('camera_transform', (len(poses), 4, 4), dtype=np.float64)
125 |         for i, pose in enumerate(poses):
126 |             transforms[i] = pose['camera_pose']
127 | 
128 |     def _encode_video(self, bag_name, frame_data):
129 |         out_folder = os.path.join(self.flags.out, bag_name.split(os.path.extsep)[0])
130 | 
131 |         out_file = os.path.join(out_folder, 'frames.mp4')
132 |         preview = os.path.join(out_folder, 'frames_preview.mp4')
133 |         print("Encoding video {}".format(bag_name))
134 | 
135 |         _encode_full_video(frame_data, out_file)
136 |         _encode_preview(out_file, preview)
137 | 
138 |     def main(self):
139 |         for path in self._bags[self.flags.skip:self.flags.until]:
140 |             with rosbag.Bag(path, 'r') as bag:
141 |                 bag_name = os.path.basename(path)
142 |                 out_folder = self._create_out_folder(bag_name)
143 |                 filename = os.path.join(out_folder, 'data.hdf5')
144 | 
145 |                 with h5py.File(filename, 'w') as h5_file:
146 |                     tf_tree = self._read_poses(out_folder, bag)
147 |                     image_frames = self._gather_images(bag)
148 |                     poses = self._gather_poses(tf_tree, image_frames)
149 |                     self._write_poses(h5_file, poses)
150 |                     self._encode_video(bag_name, poses)
151 | 
152 |                 print(f"Done with bag {bag_name}.")
153 | 
154 |     def __enter__(self):
155 |         os.makedirs('/tmp/encode_bags_tmp', exist_ok=True)
156 |         os.makedirs(self.flags.out, exist_ok=True)
157 |         return self
158 | 
159 |     def __exit__(self, *args):
160 |         shutil.rmtree('/tmp/encode_bags_tmp')
161 | 
162 | if __name__ == '__main__':
163 |     with Runner() as runner:
164 |         runner.main()
165 | 
166 | 


--------------------------------------------------------------------------------
/scripts/make_video.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | 
4 | ffmpeg -i "$1/%06d.jpg" -r 60 -y -c:v libx264 -vf scale=1280:360 -crf 25 "$1/out.mp4"
5 | 
6 | 


--------------------------------------------------------------------------------
/scripts/package_model.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import torch
 4 | import json
 5 | from train import KeypointModule
 6 | import yaml
 7 | from pathlib import Path
 8 | 
 9 | def read_args():
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument('--model', type=str)
12 |     parser.add_argument('--out', type=str, required=True)
13 |     return parser.parse_args()
14 | 
15 | def load_hparams(path):
16 |     version_dir = Path(path).parent.parent.absolute()
17 |     with open(os.path.join(version_dir, 'hparams.yaml'), 'rt') as f:
18 |         params = yaml.load(f.read(), Loader=yaml.SafeLoader)
19 |     return params
20 | 
21 | class Model(torch.nn.Module):
22 |     def __init__(self, flags, hparams):
23 |         super().__init__()
24 |         self.model = KeypointModule.load_from_checkpoint(flags.model, **hparams).model
25 | 
26 |     def forward(self, x):
27 |         heatmap, depth, centers = self.model(x)
28 |         return torch.sigmoid(heatmap[-1]), depth[-1], centers[-1]
29 | 
30 | def main():
31 |     flags = read_args()
32 |     hparams = load_hparams(flags.model)
33 |     model = Model(flags, hparams).eval().cuda()
34 | 
35 |     dummy_input = torch.randn(2, 3, 511, 511).cuda()
36 |     input_names = ["frames"]
37 |     output_names = ["out"]
38 | 
39 |     with torch.no_grad():
40 |         traced = torch.jit.trace(model, dummy_input)
41 | 
42 |         traced.save(flags.out)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     main()
47 | 


--------------------------------------------------------------------------------
/scripts/show_keypoints.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import json
  4 | import time
  5 | import hud
  6 | import h5py
  7 | import numpy as np
  8 | import cv2
  9 | import yaml
 10 | import random
 11 | from skvideo import io as video_io
 12 | from perception import constants
 13 | from perception.utils import camera_utils, Rate, linalg
 14 | 
 15 | def read_args():
 16 |     parser = argparse.ArgumentParser()
 17 |     parser.add_argument('base_dir', help="Which directory to encoded video directories in.")
 18 |     parser.add_argument('--calibration', default='config/calibration.yaml', help="Calibration yaml file.")
 19 |     parser.add_argument('--rate', '-r', default=30, help="Frames per second.")
 20 |     parser.add_argument('--seed', type=int, default=0)
 21 |     return parser.parse_args()
 22 | 
 23 | KEYPOINT_FILENAME = 'keypoints.json'
 24 | 
 25 | class ViewModel:
 26 |     def __init__(self, flags, directory):
 27 |         self.flags = flags
 28 |         self._read_keypoints(directory)
 29 |         self._load_video(directory)
 30 |         self._load_metadata(directory)
 31 |         self.current_frame = 0
 32 | 
 33 |     def _read_keypoints(self, base_dir):
 34 |         filepath = os.path.join(base_dir, KEYPOINT_FILENAME)
 35 |         with open(filepath, 'r') as f:
 36 |             contents = json.loads(f.read())
 37 |         self.world_points = [np.array(p) for p in contents['3d_points']]
 38 | 
 39 |     def _load_video(self, base_dir):
 40 |         self.video = video_io.vreader(os.path.join(base_dir, 'frames.mp4'))
 41 | 
 42 |     def _load_metadata(self, base_dir):
 43 |         self.hdf = h5py.File(os.path.join(base_dir, 'data.hdf5'), 'r')
 44 |         self.num_frames = self.hdf['camera_transform'].shape[0]
 45 | 
 46 |         self.camera = camera_utils.from_calibration(self.flags.calibration)
 47 | 
 48 |     def close(self):
 49 |         self.hdf.close()
 50 |         self.video.close()
 51 | 
 52 |     def __iter__(self):
 53 |         return self
 54 | 
 55 |     def __next__(self):
 56 |         if self.current_frame >= self.num_frames:
 57 |             raise StopIteration()
 58 |         T_WC = self.hdf['camera_transform'][self.current_frame]
 59 |         T_CW = linalg.inv_transform(T_WC)
 60 |         R_c, _ = cv2.Rodrigues(T_CW[:3, :3])
 61 |         frame_points = []
 62 |         for p_WK in self.world_points:
 63 |             p_c = self.camera.project(p_WK[None, :3], T_CW)
 64 |             p_c = p_c.ravel()
 65 | 
 66 |             frame_points.append(
 67 |                     hud.utils.to_normalized_device_coordinates(
 68 |                         hud.Point(p_c[0], p_c[1]),
 69 |                         constants.IMAGE_RECT))
 70 | 
 71 | 
 72 |         frame = next(self.video)
 73 | 
 74 |         self.current_frame += 1
 75 |         return frame, frame_points
 76 | 
 77 |     def _transform_point(self, T_WC, point):
 78 |         T_CW = np.linalg.inv(T_WC)
 79 |         return T_CW @ point
 80 | 
 81 | 
 82 | class PointVisualizer:
 83 |     def __init__(self, flags):
 84 |         self.flags = flags
 85 |         self.paused = False
 86 |         self.next = False
 87 |         self.done = False
 88 |         self.window = hud.AppWindow("Keypoints", 640, 360)
 89 |         self._create_views()
 90 | 
 91 |     def _create_views(self):
 92 |         self.image_pane = hud.ImagePane()
 93 |         self.image_points = hud.PointLayer([])
 94 |         z_stack = hud.ZStack()
 95 |         z_stack.add_view(self.image_pane)
 96 |         z_stack.add_view(self.image_points)
 97 | 
 98 |         self.window.set_view(z_stack)
 99 |         self.window.add_key_handler(self._key_callback)
100 | 
101 |     def _key_callback(self, event):
102 |         if event.key == 'Q':
103 |             self.done = True
104 |         elif event.key == ' ':
105 |             self.paused = not self.paused
106 |         elif event.key == '\x00':
107 |             self.next = True
108 | 
109 |     def run(self):
110 |         random.seed(self.flags.seed)
111 |         rate = Rate(self.flags.rate)
112 |         if os.path.isfile(os.path.join(self.flags.base_dir, 'keypoints.json')):
113 |             directories = [os.path.basename(self.flags.base_dir)]
114 |             base_dir = os.path.dirname(self.flags.base_dir)
115 |         else:
116 |             directories = os.listdir(self.flags.base_dir)
117 |             base_dir = self.flags.base_dir
118 |             random.shuffle(directories)
119 |         for directory in directories:
120 |             try:
121 |                 view_model = ViewModel(self.flags, os.path.join(base_dir, directory))
122 |                 print(f"Sequence {directory}")
123 |                 for frame, points in view_model:
124 |                     print(f"Current frame {view_model.current_frame}, num frames: {view_model.num_frames}" + 5 * " ", end="\r")
125 |                     self.image_pane.set_texture(frame)
126 |                     self.image_points.set_points(points, constants.KEYPOINT_COLOR[None].repeat(len(points), 0))
127 |                     if not self.window.update() or self.done:
128 |                         return
129 |                     self.window.poll_events()
130 |                     while self.paused:
131 |                         self.window.poll_events()
132 |                         rate.sleep()
133 |                     if self.next:
134 |                         self.next = False
135 |                         break
136 |                     rate.sleep()
137 |             finally:
138 |                 view_model.close()
139 | 
140 | def main():
141 |     flags = read_args()
142 | 
143 |     app = PointVisualizer(flags)
144 |     app.run()
145 | 
146 | if __name__ == "__main__":
147 |     main()
148 | 


--------------------------------------------------------------------------------
/scripts/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import torch
  4 | import numpy as np
  5 | import json
  6 | import random
  7 | from matplotlib import pyplot as plt
  8 | from albumentations.augmentations import transforms
  9 | from perception.models import nms
 10 | import albumentations as A
 11 | from torch.utils.data import DataLoader
 12 | from perception.loss import KeypointLoss
 13 | from perception.datasets.video import SceneDataset
 14 | from perception.models import KeypointNet
 15 | import pytorch_lightning as pl
 16 | 
 17 | def read_args():
 18 |     parser = argparse.ArgumentParser()
 19 |     parser.add_argument('--workers', '-w', type=int, default=8, help="How many workers to use in data loader.")
 20 |     parser.add_argument('--train', type=str, required=True, help="Path to training dataset.")
 21 |     parser.add_argument('--val', type=str, required=True, help="Path to validation dataset.")
 22 |     parser.add_argument('--gpus', type=int, default=1)
 23 |     parser.add_argument('--fp16', action='store_true', help="Use half-precision.")
 24 |     parser.add_argument('--pool', default=1000, type=int, help="How many examples to use in shuffle pool")
 25 |     parser.add_argument('--keypoints', default="config/cups.json", help="Keypoint configuration file.")
 26 |     parser.add_argument('--batch-size', default=8, type=int)
 27 |     parser.add_argument('--weight-decay', default=0.01, type=float)
 28 |     parser.add_argument('--features', default=128, type=int, help="Intermediate features in network.")
 29 |     parser.add_argument('--center-weight', default=1.0, help="Weight for center loss vs. heatmap loss.")
 30 |     parser.add_argument('--lr', default=4e-3, type=float, help="Learning rate.")
 31 |     parser.add_argument('--dropout', default=0.1, type=float)
 32 |     parser.add_argument('--resume', default=None)
 33 |     return parser.parse_args()
 34 | 
 35 | def _to_image(image):
 36 |     image = image.transpose([1, 2, 0])
 37 |     image = image * np.array([0.25, 0.25, 0.25])
 38 |     image = image + np.array([0.5, 0.5, 0.5])
 39 |     return np.clip((image * 255.0).round(), 0.0, 255.0).astype(np.uint8)
 40 | 
 41 | def _init_worker(worker_id):
 42 |     random.seed(worker_id)
 43 |     np.random.seed(worker_id)
 44 | 
 45 | class KeypointModule(pl.LightningModule):
 46 |     def __init__(self, keypoint_config, lr=3e-4, features=128, dropout=0.1, weight_decay=0.01, center_weight=10.0):
 47 |         super().__init__()
 48 |         self.lr = lr
 49 |         self.weight_decay = weight_decay
 50 |         self.keypoint_config = keypoint_config
 51 |         self._load_model(features, dropout)
 52 |         self.loss = KeypointLoss(keypoint_config['keypoint_config'], center_weight=center_weight)
 53 |         self.save_hyperparameters()
 54 | 
 55 |     def _load_model(self, features, dropout):
 56 |         self.model = KeypointNet([180, 320], features=features, dropout=dropout, heatmaps_out=len(self.keypoint_config["keypoint_config"]) + 1)
 57 | 
 58 |     def forward(self, frame, *args, **kwargs):
 59 |         return self.model(frame, *args, **kwargs)
 60 | 
 61 |     def training_step(self, batch, batch_idx):
 62 |         frame, target, depth, gt_centers = batch
 63 |         heatmaps, p_depth, p_centers = self(frame)
 64 | 
 65 |         loss, heatmap_losses, depth_losses, center_losses = self.loss(heatmaps, target, p_depth, depth, p_centers, gt_centers)
 66 | 
 67 |         self.log('train_loss', loss)
 68 |         self.log('heatmap_loss1', heatmap_losses[0])
 69 |         self.log('heatmap_loss2', heatmap_losses[1])
 70 |         self.log('depth_loss1', depth_losses[0])
 71 |         self.log('depth_loss2', depth_losses[1])
 72 |         self.log('center_loss1', center_losses[0])
 73 |         self.log('center_loss2', center_losses[1])
 74 | 
 75 |         return loss
 76 | 
 77 |     def validation_step(self, batch, batch_idx):
 78 |         frame, target, depth, gt_centers, _, keypoints = batch
 79 |         heatmaps, p_depth, p_centers = self(frame)
 80 | 
 81 |         loss = self._validation_loss(heatmaps, target, keypoints)
 82 |         val_loss, heatmap_losses, depth_losses, center_losses = self.loss(heatmaps, target, p_depth, depth, p_centers, gt_centers)
 83 | 
 84 |         self.log('val_loss', loss)
 85 |         self.log('total_heatmap_loss', val_loss)
 86 |         self.log('val_heatmap_loss1', heatmap_losses[0])
 87 |         self.log('val_heatmap_loss2', heatmap_losses[1])
 88 |         self.log('val_depth_loss1', depth_losses[0])
 89 |         self.log('val_depth_loss2', depth_losses[1])
 90 |         self.log('val_center_loss1', center_losses[0])
 91 |         self.log('val_center_loss2', center_losses[1])
 92 | 
 93 |         return loss
 94 | 
 95 |     def configure_optimizers(self):
 96 |         optimizer = torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
 97 |         schedule = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=10, verbose=True)
 98 |         return {
 99 |             'scheduler': schedule,
100 |             'interval': 'epoch',
101 |             'frequency': 1,
102 |             'monitor': 'train_loss',
103 |             'optimizer': optimizer
104 |         }
105 | 
106 |     def _validation_loss(self, p_heatmaps, gt_heatmap, keypoints):
107 |         # heatmaps: N x K x H x W
108 |         # target: N x n_objects x K x 2
109 |         p_heatmap = torch.sigmoid(p_heatmaps[-1])
110 |         return torch.nn.functional.l1_loss(p_heatmap, gt_heatmap)
111 | 
112 | def _build_datasets(sequences, **kwargs):
113 |     datasets = []
114 |     for sequence in sequences:
115 |         dataset = SceneDataset(sequence, **kwargs)
116 |         datasets.append(dataset)
117 |     return datasets
118 | 
119 | class DataModule(pl.LightningDataModule):
120 |     def __init__(self, flags, keypoint_config):
121 |         super().__init__()
122 |         self.keypoint_config = keypoint_config
123 |         datasets = []
124 |         train_directories = os.listdir(flags.train)
125 |         train_sequences = sorted([os.path.join(flags.train, d) for d in train_directories])
126 |         val_directories = os.listdir(flags.val)
127 |         val_sequences = sorted([os.path.join(flags.val, d) for d in val_directories])
128 |         self.flags = flags
129 |         self.train_sequences = train_sequences
130 |         self.val_sequences = val_sequences
131 | 
132 |     def setup(self, stage):
133 |         if stage == 'fit':
134 |             train_datasets = []
135 |             train_datasets += _build_datasets(self.train_sequences, keypoint_config=self.keypoint_config, augment=True, augment_color=True)
136 |             val_datasets = _build_datasets(self.val_sequences, keypoint_config=self.keypoint_config, augment=False, include_pose=True)
137 |             train = torch.utils.data.ChainDataset(train_datasets)
138 |             self.train = torch.utils.data.BufferedShuffleDataset(train, self.flags.pool)
139 |             self.val = torch.utils.data.ChainDataset(val_datasets)
140 |         else:
141 |             raise NotImplementedError()
142 | 
143 |     def train_dataloader(self):
144 |         return DataLoader(self.train, batch_size=self.flags.batch_size, num_workers=self.flags.workers,
145 |                 worker_init_fn=_init_worker,
146 |                 persistent_workers=self.flags.workers > 0)
147 | 
148 |     def val_dataloader(self):
149 |         return DataLoader(self.val, batch_size=self.flags.batch_size * 2, num_workers=self.flags.workers)
150 | 
151 | def main():
152 |     flags = read_args()
153 |     with open(flags.keypoints) as f:
154 |         keypoint_config = json.load(f)
155 |     data_module = DataModule(flags, keypoint_config)
156 |     if flags.resume is None:
157 |         module = KeypointModule(keypoint_config,
158 |                 lr=flags.lr,
159 |                 center_weight=flags.center_weight,
160 |                 features=flags.features,
161 |                 dropout=flags.dropout,
162 |                 weight_decay=flags.weight_decay)
163 |     else:
164 |         module = KeypointModule.load_from_checkpoint(flags.resume,
165 |                 lr=flags.lr,
166 |                 center_weight=flags.center_weight,
167 |                 dropout=flags.dropout,
168 |                 weight_decay=flags.weight_decay)
169 | 
170 |     from pytorch_lightning.callbacks import ModelCheckpoint
171 |     checkpoint_cb = ModelCheckpoint(monitor='val_loss',
172 |             save_top_k=1)
173 |     trainer = pl.Trainer(
174 |             callbacks=[checkpoint_cb],
175 |             gpus=flags.gpus,
176 |             reload_dataloaders_every_epoch=False,
177 |             precision=16 if flags.fp16 else 32)
178 | 
179 |     trainer.fit(module, data_module)
180 | 
181 | if __name__ == "__main__":
182 |     main()
183 | 
184 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | setuptools.setup(
 4 |     name="perception", # Replace with your own username
 5 |     version="0.0.1",
 6 |     author="Kenneth Blomqvist",
 7 |     author_email="hello@keke.dev",
 8 |     description="A collection of utilities for doing robotic perception in Python and ROS.",
 9 |     url="https://github.com/kekeblom/perception",
10 |     packages=setuptools.find_packages(),
11 |     classifiers=[
12 |         "Programming Language :: Python :: 3",
13 |         "License :: MIT License"
14 |     ],
15 |     python_requires='>=3.6',
16 | )
17 | 
18 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/test/__init__.py


--------------------------------------------------------------------------------
/test/test_video_dataset.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest import mock
 3 | import numpy as np
 4 | from perception.datasets.video import StereoVideoDataset, _compute_kernel
 5 | 
 6 | class VideoDatasetTest(unittest.TestCase):
 7 |     def test_add_kernel(self):
 8 |         kernel = _compute_kernel(50, 25)
 9 |         target = np.zeros((120, 160), dtype=np.float32)
10 |         StereoVideoDataset.kernel = kernel
11 |         StereoVideoDataset.kernel_center = 25
12 |         StereoVideoDataset.kernel_size = 50
13 |         StereoVideoDataset._add_kernel(target, np.array([[80., 60.]]))
14 |         self.assertEqual(target.max(), kernel[25, 25])
15 |         self.assertEqual(target[60, 80], target.max())
16 | 
17 |         target = np.zeros((120, 160), dtype=np.float32)
18 |         StereoVideoDataset._add_kernel(target, np.array([[1., 1.]]))
19 |         self.assertEqual(target.max(), kernel[25, 25])
20 |         self.assertEqual(target[1, 1], target.max())
21 |         self.assertGreater(target.max(), 1e-3)
22 | 
23 |         # Past the end along x-axis.
24 |         target = np.zeros((120, 160), dtype=np.float32)
25 |         StereoVideoDataset._add_kernel(target, np.array([[165., 60.]]))
26 |         self.assertNotEqual(target.max(), kernel[25, 25])
27 |         self.assertEqual(target[60, 159], target.max())
28 | 
29 |         # Past end along both axes
30 |         target = np.zeros((120, 160), dtype=np.float32)
31 |         StereoVideoDataset._add_kernel(target, np.array([[165., 130.]]))
32 |         self.assertEqual(target[119, 159], target.max())
33 | 
34 |         # Before beginning.
35 |         target = np.zeros((120, 160), dtype=np.float32)
36 |         StereoVideoDataset._add_kernel(target, np.array([[-10., -130.]]))
37 |         self.assertEqual(target[0, 1], target.max())
38 | 
39 |         target = np.zeros((720, 1280), dtype=np.float32)
40 |         StereoVideoDataset.kernel_size = 50
41 |         StereoVideoDataset.kernel_center = 25
42 |         StereoVideoDataset.width = 1280
43 |         StereoVideoDataset.height = 720
44 |         StereoVideoDataset._add_kernel(target, np.array([[456.02, 34.744]]))
45 |         self.assertGreater(target.max(), 1e-3)
46 | 
47 |         target = np.zeros((360, 640), dtype=np.float32)
48 |         StereoVideoDataset._add_kernel(target, np.array([[353.5, 153.8]]))
49 |         self.assertEqual(target[154, 354], target.max())
50 | 
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     unittest.main()
55 | 


--------------------------------------------------------------------------------
/test/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/test/utils/__init__.py


--------------------------------------------------------------------------------
/test/utils/test_ros.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import unittest
 3 | import rospy
 4 | import rostest
 5 | import numpy as np
 6 | from scipy.spatial.transform import Rotation
 7 | from perception.utils import ros as ros_utils
 8 | 
 9 | class RosUtilsTestCase(unittest.TestCase):
10 |     @classmethod
11 |     def setUpClass(cls):
12 |         cls.node = rospy.init_node('test_ros_utils')
13 | 
14 |     def test_identity(self):
15 |         T = np.eye(4)
16 |         message = ros_utils.transform_to_message(T, 'parent', 'child', rospy.Time.now())
17 |         T_out = ros_utils.message_to_transform(message)
18 |         np.testing.assert_allclose(T_out, T)
19 | 
20 |     def test_random_rotation(self):
21 |         T = np.eye(4)
22 |         T[:3, :3] = Rotation.random().as_matrix()
23 |         message = ros_utils.transform_to_message(T, 'parent', 'child', rospy.Time.now())
24 |         T_out = ros_utils.message_to_transform(message)
25 |         np.testing.assert_allclose(T_out, T)
26 | 
27 |     def test_random_rotation_with_translation(self):
28 |         T = np.eye(4)
29 |         T[:3, :3] = Rotation.random().as_matrix()
30 |         T[:3, 3] = np.random.uniform(-1, 1, size=3)
31 |         message = ros_utils.transform_to_message(T, 'parent', 'child', rospy.Time.now())
32 |         T_out = ros_utils.message_to_transform(message)
33 |         np.testing.assert_allclose(T_out, T)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     unittest.main()
38 | 
39 | 


--------------------------------------------------------------------------------