├── .gitignore ├── .gitmodules ├── LICENSE.md ├── README.md ├── assets └── images │ ├── stereolabel.jpg │ └── valve.jpg ├── config ├── calibration.yaml ├── cups.json └── valve.json ├── notebooks ├── DataloadingTest.ipynb ├── ModelBench.ipynb ├── OverfittingTest.ipynb └── keypoint_debug.ipynb ├── perception ├── __init__.py ├── constants.py ├── corner_net_lite │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── conda_packagelist.txt │ ├── configs │ │ ├── CornerNet-multi_scale.json │ │ ├── CornerNet.json │ │ ├── CornerNet_Saccade.json │ │ └── CornerNet_Squeeze.json │ ├── core │ │ ├── __init__.py │ │ ├── base.py │ │ ├── config.py │ │ ├── dbs │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── coco.py │ │ │ └── detection.py │ │ ├── detectors.py │ │ ├── external │ │ │ ├── .gitignore │ │ │ ├── Makefile │ │ │ ├── __init__.py │ │ │ ├── bbox.cpython-37m-x86_64-linux-gnu.so │ │ │ ├── bbox.pyx │ │ │ ├── nms.cpython-37m-x86_64-linux-gnu.so │ │ │ ├── nms.pyx │ │ │ └── setup.py │ │ ├── models │ │ │ ├── CornerNet.py │ │ │ ├── CornerNet_Saccade.py │ │ │ ├── CornerNet_Squeeze.py │ │ │ ├── __init__.py │ │ │ └── py_utils │ │ │ │ ├── __init__.py │ │ │ │ ├── _cpools │ │ │ │ ├── .gitignore │ │ │ │ ├── __init__.py │ │ │ │ ├── setup.py │ │ │ │ └── src │ │ │ │ │ ├── bottom_pool.cpp │ │ │ │ │ ├── left_pool.cpp │ │ │ │ │ ├── right_pool.cpp │ │ │ │ │ └── top_pool.cpp │ │ │ │ ├── data_parallel.py │ │ │ │ ├── losses.py │ │ │ │ ├── modules.py │ │ │ │ ├── scatter_gather.py │ │ │ │ └── utils.py │ │ ├── nnet │ │ │ ├── __init__.py │ │ │ └── py_factory.py │ │ ├── paths.py │ │ ├── sample │ │ │ ├── __init__.py │ │ │ ├── cornernet.py │ │ │ ├── cornernet_saccade.py │ │ │ └── utils.py │ │ ├── test │ │ │ ├── __init__.py │ │ │ ├── cornernet.py │ │ │ └── cornernet_saccade.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ ├── timer.py │ │ │ └── tqdm.py │ │ └── vis_utils.py │ ├── demo.jpg │ ├── demo.py │ ├── evaluate.py │ └── train.py ├── datasets │ ├── __init__.py │ ├── utils.py │ └── video.py ├── loss.py ├── models.py ├── pipeline.py └── utils │ ├── __init__.py │ ├── camera_utils.py │ ├── clustering_utils.py │ ├── linalg.py │ ├── ros.py │ └── timer.py ├── requirements.txt ├── scripts ├── collect_bags.py ├── constants.py ├── encode_bag.py ├── eval_model.py ├── label.py ├── make_video.sh ├── package_model.py ├── show_keypoints.py └── train.py ├── setup.py └── test ├── __init__.py ├── test_pipeline.py ├── test_video_dataset.py └── utils ├── __init__.py └── test_ros.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .pyc 3 | *.egg-info 4 | 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "perception/corner_net_lite"] 2 | path = perception/corner_net_lite 3 | url = https://github.com/princeton-vl/CornerNet-Lite 4 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Object Keypoint Tracking 2 | 3 | This repository contains a toolkit for collecting, labeling and tracking object keypoints. Object keypoints are semantic points in an object's coordinate frame. 4 | 5 | The project allows collecting images from multiple viewpoints using a robot with a wrist mounted camera. These image sequences can then be labeled using an easy to use user interface, StereoLabel. 6 | 7 | ![StereoLabel keypoint labeling](assets/images/stereolabel.jpg) 8 | 9 | Once the images are labeled, a model can be learned to detect keypoints in the images and compute 3D keypoints in the camera's coordinate frame. 10 | 11 | ## Installation 12 | 13 | External Dependencies: 14 | - [HUD](https://github.com/ethz-asl/hud) 15 | - ROS melodic/noetic 16 | 17 | Install HUD. Then install dependencies with `pip install -r requirements.txt` and finally install the package using `pip3 install -e .`. 18 | 19 | ## Usage 20 | 21 | Here we describe the process we used to arrive at our labeled datasets and learned models. 22 | 23 | ### Calibration and setup 24 | 25 | First, calibrate your camera and obtain a hand-eye-calibration. Calibrating the camera can be done using [Kalibr](https://github.com/ethz-asl/kalibr). Hand-eye-calibration can be done with the [ethz-asl/hand_eye_calibration](https://github.com/ethz-asl/hand_eye_calibration) or [easy_handeye](https://github.com/IFL-CAMP/easy_handeye) packages. 26 | 27 | The software currently assumes that the Kalibr `pinhole-equi` camera model was used when calibrating the camera. 28 | 29 | Kalibr will spit out a yaml file like the one at `config/calibration.yaml`. This should be passed in as the `--calibration` argument for `label.py` and other scripts. 30 | 31 | Once you have obtained the hand-eye calibration, configure your robot description so that the tf tree correctly is able to transform poses from the base frame to the camera optical frame. 32 | 33 | ### Collecting data 34 | 35 | The script `scripts/collect_bags.py` is a helper program to assist in collecting data. It will use [rosbag](http://wiki.ros.org/rosbag) to record the camera topics and and transform messages. 36 | 37 | Run it with `python3 scripts/collect_bags.py --out `. 38 | 39 | Press enter to start recording a new sequence. Recording will start after a 5 second grace period, after which the topics will be recorded for 30 seconds. During the 30 seconds, slowly guide the robot arm to different viewpoints observing your target objects. 40 | 41 | ### Encoding data 42 | 43 | Since rosbag is not a very convenient or efficient format for our purposes, we encode the data into a format that is easier to work with and uses up less disk space. This is done using the script `scripts/encode_bag.py`. 44 | 45 | Run it with `python3 scripts/encode_bags.py --bags --out --calibration `. 46 | 47 | ### Labeling data 48 | 49 | ![Valve](assets/images/valve.jpg) 50 | 51 | First decide how many keypoints you will use for your object class and what their configuration is. Write a keypoint configuration file, like `config/valve.json` and `config/cups.json`. For example, in the case of our valve above, we define four different keypoints, which are of two types. The first type is the center keypoint type and the second is the spoke keypoint type. For our valve, there are three spokes, so we write our keypoint configuration as: 52 | ``` 53 | { "keypoint_config": [1, 3] } 54 | ``` 55 | What this means, is that there will first be a keypoint of the first type and then three keypoints of the next type. Save this file for later. 56 | 57 | StereoLabel can be launched with `python3 scripts/label.py `. To label keypoints, click on the keypoints in the same order in each image. Make sure to label the points consistent with the keypoint configuration that you defined, so that the keypoints end up on the right heatmaps downstream. 58 | 59 | If you have multiple objects in the scene, it is important that you annotate one object at the time, sticking to the keypoint order, as the tool makes the assumption that one object's keypoints follow each other. The amount of keypoints you label should equal the amount of objects times the total number of keypoints per object. 60 | 61 | Once you have labeled an equal number of points on the left and right image, points will be backprojected, so that you can make sure that everything is correctly configured and that you didn't accidentally label the points in the wrong order. The points are saved at the same time to a file `keypoints.json` in each scene's directory. 62 | 63 | Here are some keyboard actions the tool supports: 64 | - Press `a` to change the left frame with a random frame from the current sequence. 65 | - Press `b` to change the right frame with a random frame from the current sequence. 66 | - Press `` to go to next sequence, after you labeled a sequence. 67 | 68 | Switching frames is especially useful, if for example in one viewpoint a keypoint is occluded and it is hard to annotate accurately. 69 | 70 | Once the points have been saved and backprojected, you can freely press `a` and `b` to swap out the frames to different ones in the sequence. It will project the 3D points back into 2D onto the new frames. You can check that the keypoints project nicely to each frame. If not, you likely misclicked, the viewpoints are too close to each other, there could be an issue with your intrinsics or hand-eye calibration or the camera poses are not accurate for some other reason. 71 | 72 | ### Checking the data 73 | 74 | Once all your sequences have been labeled, you can check that the labels are correct on all frames using `python scripts/show_keypoints.py `, which will play the images one by one and show the backprojected points. 75 | 76 | ### Learning a model 77 | 78 | First, download the weights for the CornerNet backbone model. This can be done from the [CornerNet repository](https://github.com/princeton-vl/CornerNet-Lite). We use the CornerNet-Squeeze model. Place the file at `models/corner_net.pkl`. 79 | 80 | You can train a model with `python scripts/train.py --train --val `. Where `--train` points to the directory containing your training scenes. `--val` points to the directory containing your validation scenes. 81 | 82 | Once done, you can package a model with `python scripts/package_model.py --model lightning_logs/version_x/checkpoints/.ckpt --out model.pt` 83 | 84 | You can then run and check the metrics on a test set using `python scripts/eval_model.py --model model.pt --keypoints `. 85 | 86 | ## General tips 87 | 88 | Here are some general tips that might be of use: 89 | - Collect data at something like 4-5 fps. Generally, frames that are super close to each other aren't that useful and you don't really need every single frame. I.e. configure your camera node to only publish image messages at that rate. 90 | - Increase the publishing rate of your `robot_state_publisher` node to something like 100 or 200. 91 | - Move your robot slowly when collecting the data such that the time synchronization between your camera and robot is not that big of a problem. 92 | - Keep the scenes reasonable. 93 | - Collect data in all the operating conditions in which you will want to be detecting keypoints at. 94 | 95 | 96 | -------------------------------------------------------------------------------- /assets/images/stereolabel.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/assets/images/stereolabel.jpg -------------------------------------------------------------------------------- /assets/images/valve.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/assets/images/valve.jpg -------------------------------------------------------------------------------- /config/calibration.yaml: -------------------------------------------------------------------------------- 1 | cam0: 2 | cam_overlaps: [1] 3 | camera_model: pinhole 4 | distortion_coeffs: [0.14655632604598726, 0.12297180523259119, -0.5214959677970255, 5 | 0.5139323931960924] 6 | distortion_model: equidistant 7 | intrinsics: [698.556012835607, 699.1907430278537, 641.0077159827421, 368.1644084321484] 8 | resolution: [1280, 720] 9 | rostopic: /zedm/zed_node/left_raw/image_raw_color 10 | cam1: 11 | T_cn_cnm1: 12 | - [0.9999956816225143, 0.00029784085839669114, 0.002923700933251551, -0.062421684917401604] 13 | - [-0.0002810221198796635, 0.9999834217813691, -0.005751277161941048, -0.0002341856117885678] 14 | - [-0.002925365428825057, 0.005750430701121079, 0.9999791871753434, -5.759928320471004e-05] 15 | - [0.0, 0.0, 0.0, 1.0] 16 | cam_overlaps: [0] 17 | camera_model: pinhole 18 | distortion_coeffs: [0.22834286859634897, -0.5718130159249208, 1.6170727444831785, 19 | -1.6613629469156743] 20 | distortion_model: equidistant 21 | intrinsics: [693.93451021037, 694.5577956988693, 640.5646956100311, 364.74017968589965] 22 | resolution: [1280, 720] 23 | rostopic: /zedm/zed_node/right_raw/image_raw_color 24 | -------------------------------------------------------------------------------- /config/cups.json: -------------------------------------------------------------------------------- 1 | { 2 | "keypoint_config": [1, 1, 1] 3 | } 4 | -------------------------------------------------------------------------------- /config/valve.json: -------------------------------------------------------------------------------- 1 | { 2 | "keypoint_config": [1, 3] 3 | } 4 | -------------------------------------------------------------------------------- /notebooks/DataloadingTest.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "visible-treaty", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "from matplotlib import pyplot as plt\n", 11 | "import numpy as np\n", 12 | "from argparse import Namespace\n", 13 | "import json" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "id": "modern-third", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "from scripts.train import DataModule, _to_image" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "id": "preceding-valuable", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "with open('../config/cups.json', 'rt') as f:\n", 34 | " keypoint_config = json.load(f)\n", 35 | "module = DataModule(Namespace(train='/home/ken/data/cups_train/', val='/home/ken/data/cups_test/', batch_size=1, workers=1, pool=32), keypoint_config=keypoint_config)\n", 36 | "module.setup('fit')\n", 37 | "\n", 38 | "train_loader = module.train_dataloader()\n", 39 | "train_iterator = iter(train_loader)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "id": "killing-bullet", 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "import cv2\n", 50 | "def resize(target, width=640, height=360):\n", 51 | " return cv2.resize(target, (width, height))\n", 52 | " " 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "id": "soviet-ceramic", 59 | "metadata": { 60 | "tags": [] 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "frame, target, depth, centers = next(train_iterator)\n", 65 | "\n", 66 | "plt.figure(figsize=(14, 8))\n", 67 | "image = _to_image(frame[0].numpy())\n", 68 | "for i in range(2):\n", 69 | " for j in range(2):\n", 70 | " axis = plt.subplot2grid((2, 2), loc=(i, j))\n", 71 | " axis.imshow(image)\n", 72 | " axis.imshow(resize(target[0, i * 2 + j].numpy()), alpha=0.5)\n", 73 | " plt.axis('off')\n", 74 | "plt.tight_layout()\n", 75 | "pass" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "id": "descending-resource", 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "plt.figure(figsize=(14, 8))\n", 86 | "for i in range(2):\n", 87 | " for j in range(2):\n", 88 | " axis = plt.subplot2grid((2, 2), loc=(i, j))\n", 89 | " axis.imshow(image)\n", 90 | " axis.imshow(resize(target[0, i * 2 + j].numpy()), alpha=0.5)\n", 91 | " axis.imshow(resize((np.abs(centers[0].numpy()) > 1e-1).any(axis=0).astype(np.float32)), alpha=0.5, vmin=0.0, vmax=1.0)\n", 92 | " plt.axis('off')\n", 93 | "plt.tight_layout()\n", 94 | "pass" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "id": "loved-swing", 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "vectors = centers.numpy()[0]\n", 105 | "indices = np.zeros((2, 180, 320), dtype=np.uint16)\n", 106 | "for i in range(180):\n", 107 | " for j in range(320):\n", 108 | " indices[:, i, j] = [j, i]\n", 109 | "plt.figure(figsize=(12, 10))\n", 110 | "plt.quiver(vectors[0], vectors[1], units='xy', scale_units='xy', scale=1.0)\n", 111 | "plt.show()\n" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "id": "disciplinary-parker", 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "pixel_indices = np.zeros((2, 180, 320), dtype=np.float32)\n", 122 | "for i in range(180):\n", 123 | " for j in range(320):\n", 124 | " pixel_indices[:, i, j] = [j + 0.5, i + 0.5]" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "id": "micro-abuse", 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "norms = np.linalg.norm(vectors, axis=0)\n", 135 | "where_non_zero = target[0].sum(axis=0) > 0.5\n", 136 | "p_centers = np.zeros((2, pixel_indices.shape[1], pixel_indices.shape[2]), dtype=np.float32)\n", 137 | "p_centers[:, where_non_zero] = pixel_indices[:, where_non_zero] + vectors[:, where_non_zero]" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "id": "narrative-stanford", 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "dotted_image = cv2.resize(image.copy(), (320, 180))\n", 148 | "for point in p_centers[:, where_non_zero].transpose():\n", 149 | " cv2.circle(dotted_image, (point[0], point[1]), 2, (255, 0, 0), -1)\n", 150 | "plt.imshow(dotted_image)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "id": "central-contemporary", 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "assert p_centers[:, where_non_zero].shape[1] > 3\n", 161 | "center_points = np.unique(p_centers[:, where_non_zero].round().astype(np.int32), axis=1)\n", 162 | "assert center_points.shape[1] >= 1 and center_points.shape[1] < 5" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "id": "01c4b265-c08f-41c6-9970-fe0a4c340379", 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "p_centers[:, where_non_zero].shape" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "id": "ffa0aa22-b817-4bf3-a59b-e7eb57ce6ad4", 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [] 182 | } 183 | ], 184 | "metadata": { 185 | "kernelspec": { 186 | "display_name": "vision", 187 | "language": "python", 188 | "name": "vision" 189 | }, 190 | "language_info": { 191 | "codemirror_mode": { 192 | "name": "ipython", 193 | "version": 3 194 | }, 195 | "file_extension": ".py", 196 | "mimetype": "text/x-python", 197 | "name": "python", 198 | "nbconvert_exporter": "python", 199 | "pygments_lexer": "ipython3", 200 | "version": "3.7.10" 201 | } 202 | }, 203 | "nbformat": 4, 204 | "nbformat_minor": 5 205 | } 206 | -------------------------------------------------------------------------------- /notebooks/ModelBench.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "clear-microwave", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "from matplotlib import pyplot as plt\n", 11 | "import numpy as np\n", 12 | "from argparse import Namespace\n", 13 | "import json\n", 14 | "import torch" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "id": "first-eight", 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "from scripts.train import DataModule, _to_image, KeypointModule" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "id": "monthly-prince", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "with open('../config/cups.json', 'rt') as f:\n", 35 | " keypoint_config = json.load(f)\n", 36 | "module = DataModule(Namespace(train='/home/ken/data/cups_train/', val='/home/ken/data/cups_test/', batch_size=1, workers=1, pool=32), keypoint_config=keypoint_config)\n", 37 | "module.setup('fit')\n", 38 | "\n", 39 | "dataloader = module.val_dataloader()\n", 40 | "print(dataloader)\n", 41 | "train_iterator = iter(dataloader)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "considered-parish", 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "import cv2\n", 52 | "def resize(target, width=320, height=180):\n", 53 | " return cv2.resize(target, (width, height))\n", 54 | " " 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "id": "composed-charleston", 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "module = KeypointModule.load_from_checkpoint('../lightning_logs/version_0/checkpoints/epoch=15-step=33567.ckpt', keypoint_config=keypoint_config)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "id": "mineral-vacuum", 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "model = module.model.eval()" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "id": "available-cancellation", 81 | "metadata": { 82 | "tags": [] 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "frame, target, depth, centers = next(train_iterator)\n", 87 | "\n", 88 | "plt.figure(figsize=(14, 8))\n", 89 | "image = _to_image(frame[0].numpy())\n", 90 | "for i in range(2):\n", 91 | " for j in range(2):\n", 92 | " axis = plt.subplot2grid((2, 2), loc=(i, j))\n", 93 | " axis.imshow(resize(image))\n", 94 | " axis.imshow(resize(target[0, i * 2 + j].numpy()), alpha=0.7)\n", 95 | " plt.axis('off')\n", 96 | "plt.tight_layout()\n", 97 | "pass" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "id": "crazy-bookmark", 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "with torch.no_grad():\n", 108 | " heatmap_p, depth_p, centers_p = model(frame)\n", 109 | " heatmap_p = torch.sigmoid(heatmap_p)\n", 110 | "\n", 111 | "plt.figure(figsize=(14, 8))\n", 112 | "image = _to_image(frame[0].numpy())\n", 113 | "for i in range(2):\n", 114 | " for j in range(2):\n", 115 | " axis = plt.subplot2grid((2, 2), loc=(i, j))\n", 116 | " axis.imshow(resize(image))\n", 117 | " axis.imshow(resize(heatmap_p[0, i * 2 + j].detach().numpy()), alpha=0.7, vmin=0.0, vmax=1.0)\n", 118 | " plt.axis('off')\n", 119 | "plt.tight_layout()\n", 120 | "pass" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "id": "742832fc-bb90-4de5-8360-1af36bb7f8d4", 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "with torch.no_grad():\n", 131 | " heatmap_p, depth_p, centers_p = model(frame)\n", 132 | "\n", 133 | "plt.figure(figsize=(14, 8))\n", 134 | "image = _to_image(frame[0].numpy())\n", 135 | "plt.imshow(resize(depth_p[0, 3].detach().numpy()), alpha=0.7, vmin=0.0, vmax=2.0)\n", 136 | "plt.axis('off')\n", 137 | "plt.tight_layout()\n", 138 | "pass" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "id": "sacred-cotton", 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "pixel_indices = np.zeros((2, 180, 320), dtype=np.float32)\n", 149 | "for i in range(180):\n", 150 | " for j in range(320):\n", 151 | " pixel_indices[:, i, j] = [j + 0.5, i + 0.5]" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "id": "indonesian-steam", 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "vectors = centers.numpy()[0]\n", 162 | "norms = np.linalg.norm(vectors, axis=0)\n", 163 | "where_non_zero = np.abs(norms) > 1e-1\n", 164 | "gt_centers = np.zeros((2, pixel_indices.shape[1], pixel_indices.shape[2]), dtype=np.float32)\n", 165 | "gt_centers[:, where_non_zero] = pixel_indices[:, where_non_zero] + vectors[:, where_non_zero]\n", 166 | "\n", 167 | "where_heatmap_non_zero = heatmap_p[0].numpy().sum(axis=0) > 0.1\n", 168 | "p_centers = np.zeros((2, pixel_indices.shape[1], pixel_indices.shape[2]), dtype=np.float32)\n", 169 | "p_centers[:, where_heatmap_non_zero] = pixel_indices[:, where_heatmap_non_zero] + centers_p[0].detach().numpy()[:, where_heatmap_non_zero]\n", 170 | "\n", 171 | "figure = plt.figure(figsize=(10, 5))\n", 172 | "dotted_image = cv2.resize(image.copy(), (320, 180))\n", 173 | "for point in gt_centers[:, where_non_zero].transpose(): \n", 174 | " cv2.circle(dotted_image, (point[0], point[1]), 2, (255, 0, 0), -1)\n", 175 | " \n", 176 | "dotted_image_pred = cv2.resize(image.copy(), (320, 180))\n", 177 | "for point in p_centers[:, where_non_zero].transpose(): \n", 178 | " cv2.circle(dotted_image_pred, (point[0], point[1]), 2, (255, 0, 0), -1)\n", 179 | "\n", 180 | "axis = plt.subplot2grid((1, 2), loc=(0, 0))\n", 181 | "axis.imshow(dotted_image)\n", 182 | "plt.axis('off')\n", 183 | "\n", 184 | "axis = plt.subplot2grid((1, 2), loc=(0, 1))\n", 185 | "axis.imshow(dotted_image_pred)\n", 186 | "plt.axis('off')\n", 187 | "pass" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "id": "hindu-tuesday", 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "id": "essential-receiver", 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "id": "marine-alliance", 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [] 213 | } 214 | ], 215 | "metadata": { 216 | "kernelspec": { 217 | "display_name": "vision", 218 | "language": "python", 219 | "name": "vision" 220 | }, 221 | "language_info": { 222 | "codemirror_mode": { 223 | "name": "ipython", 224 | "version": 3 225 | }, 226 | "file_extension": ".py", 227 | "mimetype": "text/x-python", 228 | "name": "python", 229 | "nbconvert_exporter": "python", 230 | "pygments_lexer": "ipython3", 231 | "version": "3.7.10" 232 | } 233 | }, 234 | "nbformat": 4, 235 | "nbformat_minor": 5 236 | } 237 | -------------------------------------------------------------------------------- /notebooks/OverfittingTest.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "quantitative-muslim", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "from matplotlib import pyplot as plt\n", 13 | "import numpy as np\n", 14 | "from argparse import Namespace\n", 15 | "import json\n", 16 | "import torch" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "amazing-millennium", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "from scripts.train import DataModule, _to_image, KeypointModule\n", 27 | "from perception.loss import KeypointLoss" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "id": "protective-myanmar", 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "with open('../config/cups.json', 'rt') as f:\n", 38 | " keypoint_config = json.load(f)\n", 39 | "module = DataModule(Namespace(train='/home/ken/data/cups_train/', val='/home/ken/data/cups_test/', batch_size=2, workers=1, pool=256), keypoint_config=keypoint_config)\n", 40 | "module.setup('fit')\n", 41 | "\n", 42 | "train_loader = module.train_dataloader()\n", 43 | "train_iterator = iter(train_loader)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "id": "square-college", 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "print(len(train_loader))" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "id": "sought-collins", 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "from tqdm import tqdm\n", 64 | "for _ in tqdm(range(10)):\n", 65 | " next(train_iterator)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "id": "private-bloom", 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "import cv2\n", 76 | "def resize(target, width=640, height=360):\n", 77 | " return cv2.resize(target, (width, height), interpolation=cv2.INTER_LINEAR)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "id": "typical-positive", 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "module = KeypointModule(keypoint_config=keypoint_config)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "id": "brilliant-jonathan", 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "model = module.model" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "id": "equipped-caution", 104 | "metadata": { 105 | "tags": [] 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "frame, target, depth, centers = next(train_iterator)\n", 110 | "\n", 111 | "def to_p(target):\n", 112 | " return target\n", 113 | "\n", 114 | "plt.figure(figsize=(7, 4 * frame.shape[0]))\n", 115 | "rows = frame.shape[0]\n", 116 | "for i in range(rows):\n", 117 | " axis = plt.subplot2grid((rows, 1), loc=(i, 0))\n", 118 | " axis.imshow(_to_image(frame[i].numpy()))\n", 119 | " axis.imshow(resize(to_p(target[i]).sum(dim=0).numpy()), alpha=0.7, vmin=0.0, vmax=1.0)\n", 120 | " plt.axis('off')\n", 121 | "plt.tight_layout()\n", 122 | "pass" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "id": "ultimate-northern", 129 | "metadata": { 130 | "tags": [] 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "optimizer = torch.optim.Adam(lr=3e-4, params=model.parameters())\n", 135 | "loss_fn = KeypointLoss(keypoint_config['keypoint_config'])\n", 136 | "model.train()\n", 137 | "progress = tqdm(range(100))\n", 138 | "for i in progress:\n", 139 | " p_heatmaps, p_depth, p_centers = model(frame)\n", 140 | " loss_value, _ = loss_fn(p_heatmaps, target, p_depth, depth, p_centers, centers)\n", 141 | " progress.set_postfix({'loss': loss_value.item()})\n", 142 | " loss_value.backward()\n", 143 | " optimizer.step()\n", 144 | " optimizer.zero_grad()\n", 145 | " " 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "id": "possible-banking", 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "from torch.nn import functional as F\n", 156 | "print(F.binary_cross_entropy_with_logits(p_heatmaps, target, reduction='none').sum(dim=[1,2,3]).mean() * loss_fn.heatmap_weight)\n", 157 | "print(loss_fn.reduce(F.l1_loss(p_depth[depth > 0.05], depth[depth > 0.05], reduction='none')))\n", 158 | "print(loss_fn.reduction)\n", 159 | "print(loss_fn.focal_loss(p_heatmaps, target).max())\n", 160 | "print(target.max())\n", 161 | "print(torch.sigmoid(p_heatmaps).max())" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "id": "ordinary-arthur", 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "with torch.no_grad():\n", 172 | " model.eval()\n", 173 | " heatmap_p, depth_p, centers_p = model(frame)\n", 174 | " heatmap_p = torch.sigmoid(heatmap_p)\n", 175 | "plt.figure(figsize=(7, 4 * frame.shape[0]))\n", 176 | "rows = frame.shape[0]\n", 177 | "for i in range(rows):\n", 178 | " axis = plt.subplot2grid((rows, 1), loc=(i, 0))\n", 179 | " axis.imshow(_to_image(frame[i].numpy()))\n", 180 | " axis.imshow(resize(heatmap_p[i].sum(dim=0).numpy()), alpha=0.7, vmin=0.0, vmax=1.0)\n", 181 | " plt.axis('off')\n", 182 | "pass" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "id": "extreme-pacific", 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "pixel_indices = np.zeros((2, 180, 320), dtype=np.float32)\n", 193 | "for i in range(180):\n", 194 | " for j in range(320):\n", 195 | " pixel_indices[:, i, j] = [j + 0.5, i + 0.5]" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "id": "favorite-organ", 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "vectors = centers.numpy()[0]\n", 206 | "norms = np.linalg.norm(vectors, axis=0)\n", 207 | "where_non_zero = np.abs(norms) > 1e-2\n", 208 | "gt_centers = np.zeros((2, pixel_indices.shape[1], pixel_indices.shape[2]), dtype=np.float32)\n", 209 | "gt_centers = pixel_indices + vectors\n", 210 | "\n", 211 | "where_heatmap_non_zero = target[0].numpy().sum(axis=0) > 0.1\n", 212 | "\n", 213 | "p_centers = np.zeros((2, pixel_indices.shape[1], pixel_indices.shape[2]), dtype=np.float32)\n", 214 | "p_centers = pixel_indices + centers_p[0].detach().numpy()\n", 215 | "\n", 216 | "figure = plt.figure(figsize=(10, 5))\n", 217 | "dotted_image = cv2.resize(_to_image(frame[0].numpy().copy()), (320, 180))\n", 218 | "for point in gt_centers[:, where_non_zero].transpose(): \n", 219 | " cv2.circle(dotted_image, (point[0], point[1]), 2, (255, 0, 0), -1)\n", 220 | " \n", 221 | "dotted_image_pred = cv2.resize(_to_image(frame[0].numpy().copy()), (320, 180))\n", 222 | "where_pred_non_zero = heatmap_p[0].sum(dim=0) > 0.25\n", 223 | "for point in p_centers[:, where_pred_non_zero].transpose(): \n", 224 | " cv2.circle(dotted_image_pred, (point[0], point[1]), 2, (255, 0, 0), -1)\n", 225 | "\n", 226 | "axis = plt.subplot2grid((1, 2), loc=(0, 0))\n", 227 | "axis.imshow(dotted_image)\n", 228 | "plt.axis('off')\n", 229 | "\n", 230 | "axis = plt.subplot2grid((1, 2), loc=(0, 1))\n", 231 | "axis.imshow(dotted_image_pred)\n", 232 | "plt.axis('off')\n", 233 | "pass" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "id": "rapid-seating", 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "id": "spoken-oxide", 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "id": "liked-ensemble", 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [] 259 | } 260 | ], 261 | "metadata": { 262 | "kernelspec": { 263 | "display_name": "vision", 264 | "language": "python", 265 | "name": "vision" 266 | }, 267 | "language_info": { 268 | "codemirror_mode": { 269 | "name": "ipython", 270 | "version": 3 271 | }, 272 | "file_extension": ".py", 273 | "mimetype": "text/x-python", 274 | "name": "python", 275 | "nbconvert_exporter": "python", 276 | "pygments_lexer": "ipython3", 277 | "version": "3.7.10" 278 | } 279 | }, 280 | "nbformat": 4, 281 | "nbformat_minor": 5 282 | } 283 | -------------------------------------------------------------------------------- /notebooks/keypoint_debug.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "expected-tunisia", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "from matplotlib import pyplot as plt\n", 11 | "import numpy as np\n", 12 | "%matplotlib widget" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "id": "extensive-candidate", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "points_3d = np.array([[ 0., 0., 0. ],\n", 23 | " [-0.03138264, -0.09922726, 0.00946505],\n", 24 | " [-0.07037588, 0.07883613, 0.0089331 ],\n", 25 | " [ 0.10787677, 0.02588217, 0.00836965]])\n", 26 | "keypoints_2d = np.array([[603.39123535, 301.11212158],\n", 27 | " [640.75494385, 396.78601074],\n", 28 | " [556.61352539, 291.11450195],\n", 29 | " [697.81604004, 276.12750244]])\n" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "id": "alleged-clerk", 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "application/vnd.jupyter.widget-view+json": { 41 | "model_id": "77ed4fe527794923953ac0c2b3680b47", 42 | "version_major": 2, 43 | "version_minor": 0 44 | }, 45 | "text/plain": [ 46 | "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" 47 | ] 48 | }, 49 | "metadata": {}, 50 | "output_type": "display_data" 51 | }, 52 | { 53 | "data": { 54 | "text/plain": [ 55 | "(-0.15, 0.15)" 56 | ] 57 | }, 58 | "execution_count": 3, 59 | "metadata": {}, 60 | "output_type": "execute_result" 61 | } 62 | ], 63 | "source": [ 64 | "fig = plt.figure()\n", 65 | "ax = fig.add_subplot(projection='3d')\n", 66 | "ax.scatter(points_3d[:, 0], points_3d[:, 1], points_3d[:, 2], c=np.linspace(0, 1, 4), cmap='summer')\n", 67 | "ax.set_xlim(-0.15, 0.15)\n", 68 | "ax.set_ylim(-0.15, 0.15)\n", 69 | "ax.set_zlim(-0.15, 0.15)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "id": "settled-township", 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "application/vnd.jupyter.widget-view+json": { 81 | "model_id": "326ba677018f4daf96840f6b2f74e98b", 82 | "version_major": 2, 83 | "version_minor": 0 84 | }, 85 | "text/plain": [ 86 | "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" 87 | ] 88 | }, 89 | "metadata": {}, 90 | "output_type": "display_data" 91 | }, 92 | { 93 | "data": { 94 | "text/plain": [ 95 | "(0.0, 720.0)" 96 | ] 97 | }, 98 | "execution_count": 4, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "figure = plt.figure()\n", 105 | "plt.scatter(keypoints_2d[:, 0], keypoints_2d[:, 1], c=np.arange(4), cmap='summer')\n", 106 | "plt.xlim(0, 1280)\n", 107 | "plt.ylim(0, 720)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 5, 113 | "id": "religious-wells", 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "import cv2\n", 118 | "K = np.array([[697.87732212 , 0. , 648.08562626],\n", 119 | " [ 0., 697.28594061, 371.49958099],\n", 120 | " [ 0., 0.,1. ]]) / 8.0\n", 121 | "D = np.array([-1.74610270e-01, 2.75427408e-02, 6.24873971e-05, 9.10956548e-05])" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 21, 127 | "id": "sealed-cambridge", 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "(True,\n", 134 | " array([[ 0.75168222, -0.27787168, 0.5981314 , -0.0314786 ],\n", 135 | " [-0.45333637, -0.87638764, 0.16257562, -0.06893673],\n", 136 | " [ 0.47901981, -0.39335992, -0.78473435, 0.89141784],\n", 137 | " [ 0. , 0. , 0. , 1. ]]))" 138 | ] 139 | }, 140 | "execution_count": 21, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "success, rotvec, tvec = cv2.solvePnP(points_3d, keypoints_2d, K, np.zeros(4), flags=cv2.SOLVEPNP_EPNP)\n", 147 | "T = np.eye(4)\n", 148 | "R, _ = cv2.Rodrigues(rotvec)\n", 149 | "T[:3, :3] = R\n", 150 | "T[:3, 3] = tvec[:, 0]\n", 151 | "success, T" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 22, 157 | "id": "regional-gossip", 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "array([[-0.0314786 , -0.06893673, 0.89141784, 1. ],\n", 164 | " [-0.02183458, 0.03379049, 0.90798941, 1. ],\n", 165 | " [-0.10094206, -0.10467149, 0.81968532, 1. ],\n", 166 | " [ 0.04742468, -0.1391633 , 0.92634399, 1. ]])" 167 | ] 168 | }, 169 | "execution_count": 22, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "predicted_points = (T @ np.concatenate([points_3d, np.ones((4, 1))], axis=1)[:,:, None])[:, :, 0]\n", 176 | "predicted_points" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 24, 182 | "id": "valid-folks", 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "data": { 187 | "application/vnd.jupyter.widget-view+json": { 188 | "model_id": "dc3df26192f6474da4c095a2f249e203", 189 | "version_major": 2, 190 | "version_minor": 0 191 | }, 192 | "text/plain": [ 193 | "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" 194 | ] 195 | }, 196 | "metadata": {}, 197 | "output_type": "display_data" 198 | }, 199 | { 200 | "name": "stdout", 201 | "output_type": "stream", 202 | "text": [ 203 | "[[623.44151511 317.57581015 1. ]\n", 204 | " [631.30364683 397.44882302 1. ]\n", 205 | " [562.14389836 282.4581459 1. ]\n", 206 | " [683.81383124 266.74733235 1. ]]\n" 207 | ] 208 | }, 209 | { 210 | "data": { 211 | "text/plain": [ 212 | "(0.0, 720.0)" 213 | ] 214 | }, 215 | "execution_count": 24, 216 | "metadata": {}, 217 | "output_type": "execute_result" 218 | } 219 | ], 220 | "source": [ 221 | "fig = plt.figure()\n", 222 | "reprojected = (K @ predicted_points[:, :3, None])[:, :, 0]\n", 223 | "reprojected = reprojected / reprojected[:, 2:3]\n", 224 | "print(reprojected)\n", 225 | "\n", 226 | "plt.scatter(keypoints_2d[:, 0], keypoints_2d[:, 1], c=np.linspace(0, 1, keypoints_2d.shape[0]), cmap='spring')\n", 227 | "plt.scatter(reprojected[:, 0], reprojected[:, 1], c=np.linspace(0, 1, reprojected.shape[0]), cmap='spring')\n", 228 | "plt.xlim(0, 1280)\n", 229 | "plt.ylim(0, 720)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 9, 235 | "id": "formal-marketplace", 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "array([[525.72722885, 262.88184879, 0.83918927],\n", 242 | " [564.64292418, 344.10913791, 0.88081192],\n", 243 | " [450.11823921, 233.91501349, 0.82930932],\n", 244 | " [543.28804458, 205.62918175, 0.78049184]])" 245 | ] 246 | }, 247 | "execution_count": 9, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "(K @ predicted_points[:, :3, None])[:, :, 0]" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 10, 259 | "id": "permanent-assessment", 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "data": { 264 | "text/plain": [ 265 | "array([[-0.02599206, -0.07009551, 0.83918927, 1. ],\n", 266 | " [-0.0088821 , 0.02421945, 0.88081192, 1. ],\n", 267 | " [-0.1251584 , -0.10637394, 0.82930932, 1. ],\n", 268 | " [ 0.05368064, -0.1209306 , 0.78049184, 1. ]])" 269 | ] 270 | }, 271 | "execution_count": 10, 272 | "metadata": {}, 273 | "output_type": "execute_result" 274 | } 275 | ], 276 | "source": [ 277 | "predicted_points" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "id": "mature-corpus", 284 | "metadata": {}, 285 | "outputs": [], 286 | "source": [] 287 | } 288 | ], 289 | "metadata": { 290 | "kernelspec": { 291 | "display_name": "vision", 292 | "language": "python", 293 | "name": "vision" 294 | }, 295 | "language_info": { 296 | "codemirror_mode": { 297 | "name": "ipython", 298 | "version": 3 299 | }, 300 | "file_extension": ".py", 301 | "mimetype": "text/x-python", 302 | "name": "python", 303 | "nbconvert_exporter": "python", 304 | "pygments_lexer": "ipython3", 305 | "version": "3.7.10" 306 | } 307 | }, 308 | "nbformat": 4, 309 | "nbformat_minor": 5 310 | } 311 | -------------------------------------------------------------------------------- /perception/__init__.py: -------------------------------------------------------------------------------- 1 | from . import utils 2 | -------------------------------------------------------------------------------- /perception/constants.py: -------------------------------------------------------------------------------- 1 | import hud 2 | import numpy as np 3 | 4 | def _to_camera_matrix(proj): 5 | return np.array([[proj[0], 0., proj[2]], 6 | [0., proj[1], proj[3]], 7 | [0., 0., 1.]], dtype=np.float64) 8 | 9 | KEYPOINT_FILENAME = 'keypoints.json' 10 | IMAGE_HEIGHT = 720 11 | IMAGE_WIDTH = 1280 12 | IMAGE_RECT = hud.Rect(0, 0, IMAGE_WIDTH, IMAGE_HEIGHT) 13 | image_size = (int(IMAGE_RECT.width), int(IMAGE_RECT.height)) 14 | KEYPOINT_COLOR = np.array([1.0, 0.0, 0.0, 1.0]) 15 | 16 | -------------------------------------------------------------------------------- /perception/corner_net_lite/.gitignore: -------------------------------------------------------------------------------- 1 | loss/ 2 | data/ 3 | cache/ 4 | tf_cache/ 5 | debug/ 6 | results/ 7 | 8 | misc/outputs 9 | 10 | evaluation/evaluate_object 11 | evaluation/analyze_object 12 | 13 | nnet/__pycache__/ 14 | 15 | *.swp 16 | 17 | *.pyc 18 | *.o* 19 | -------------------------------------------------------------------------------- /perception/corner_net_lite/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Princeton University 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /perception/corner_net_lite/README.md: -------------------------------------------------------------------------------- 1 | # CornerNet-Lite: Training, Evaluation and Testing Code 2 | Code for reproducing results in the following paper: 3 | 4 | [**CornerNet-Lite: Efficient Keypoint Based Object Detection**](https://arxiv.org/abs/1904.08900) 5 | Hei Law, Yun Teng, Olga Russakovsky, Jia Deng 6 | *arXiv:1904.08900* 7 | 8 | ## Getting Started 9 | ### Software Requirement 10 | - Python 3.7 11 | - PyTorch 1.0.0 12 | - CUDA 10 13 | - GCC 4.9.2 or above 14 | 15 | ### Installing Dependencies 16 | Please first install [Anaconda](https://anaconda.org) and create an Anaconda environment using the provided package list `conda_packagelist.txt`. 17 | ``` 18 | conda create --name CornerNet_Lite --file conda_packagelist.txt --channel pytorch 19 | ``` 20 | 21 | After you create the environment, please activate it. 22 | ``` 23 | source activate CornerNet_Lite 24 | ``` 25 | 26 | ### Compiling Corner Pooling Layers 27 | Compile the C++ implementation of the corner pooling layers. (GCC4.9.2 or above is required.) 28 | ``` 29 | cd /core/models/py_utils/_cpools/ 30 | python setup.py install --user 31 | ``` 32 | 33 | ### Compiling NMS 34 | Compile the NMS code which are originally from [Faster R-CNN](https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/nms/cpu_nms.pyx) and [Soft-NMS](https://github.com/bharatsingh430/soft-nms/blob/master/lib/nms/cpu_nms.pyx). 35 | ``` 36 | cd /core/external 37 | make 38 | ``` 39 | 40 | ### Downloading Models 41 | In this repo, we provide models for the following detectors: 42 | - [CornerNet-Saccade](https://drive.google.com/file/d/1MQDyPRI0HgDHxHToudHqQ-2m8TVBciaa/view?usp=sharing) 43 | - [CornerNet-Squeeze](https://drive.google.com/file/d/1qM8BBYCLUBcZx_UmLT0qMXNTh-Yshp4X/view?usp=sharing) 44 | - [CornerNet](https://drive.google.com/file/d/1e8At_iZWyXQgLlMwHkB83kN-AN85Uff1/view?usp=sharing) 45 | 46 | Put the CornerNet-Saccade model under `/cache/nnet/CornerNet_Saccade/`, CornerNet-Squeeze model under `/cache/nnet/CornerNet_Squeeze/` and CornerNet model under `/cache/nnet/CornerNet/`. (\* Note we use underscore instead of dash in both the directory names for CornerNet-Saccade and CornerNet-Squeeze.) 47 | 48 | Note: The CornerNet model is the same as the one in the original [CornerNet repo](https://github.com/princeton-vl/CornerNet). We just ported it to this new repo. 49 | 50 | ### Running the Demo Script 51 | After downloading the models, you should be able to use the detectors on your own images. We provide a demo script `demo.py` to test if the repo is installed correctly. 52 | ``` 53 | python demo.py 54 | ``` 55 | This script applies CornerNet-Saccade to `demo.jpg` and writes the results to `demo_out.jpg`. 56 | 57 | In the demo script, the default detector is CornerNet-Saccade. You can modify the demo script to test different detectors. For example, if you want to test CornerNet-Squeeze: 58 | ```python 59 | #!/usr/bin/env python 60 | 61 | import cv2 62 | from core.detectors import CornerNet_Squeeze 63 | from core.vis_utils import draw_bboxes 64 | 65 | detector = CornerNet_Squeeze() 66 | image = cv2.imread("demo.jpg") 67 | 68 | bboxes = detector(image) 69 | image = draw_bboxes(image, bboxes) 70 | cv2.imwrite("demo_out.jpg", image) 71 | ``` 72 | 73 | ### Using CornerNet-Lite in Your Project 74 | It is also easy to use CornerNet-Lite in your project. You will need to change the directory name from `CornerNet-Lite` to `CornerNet_Lite`. Otherwise, you won't be able to import CornerNet-Lite. 75 | ``` 76 | Your project 77 | │ README.md 78 | │ ... 79 | │ foo.py 80 | │ 81 | └───CornerNet_Lite 82 | │ 83 | └───directory1 84 | │ 85 | └───... 86 | ``` 87 | 88 | In `foo.py`, you can easily import CornerNet-Saccade by adding: 89 | ```python 90 | from CornerNet_Lite import CornerNet_Saccade 91 | 92 | def foo(): 93 | cornernet = CornerNet_Saccade() 94 | # CornerNet_Saccade is ready to use 95 | 96 | image = cv2.imread('/path/to/your/image') 97 | bboxes = cornernet(image) 98 | ``` 99 | 100 | If you want to train or evaluate the detectors on COCO, please move on to the following steps. 101 | 102 | ## Training and Evaluation 103 | 104 | ### Installing MS COCO APIs 105 | ``` 106 | mkdir -p /data 107 | cd /data 108 | git clone git@github.com:cocodataset/cocoapi.git coco 109 | cd /data/coco/PythonAPI 110 | make install 111 | ``` 112 | 113 | ### Downloading MS COCO Data 114 | - Download the training/validation split we use in our paper from [here](https://drive.google.com/file/d/1dop4188xo5lXDkGtOZUzy2SHOD_COXz4/view?usp=sharing) (originally from [Faster R-CNN](https://github.com/rbgirshick/py-faster-rcnn/tree/master/data)) 115 | - Unzip the file and place `annotations` under `/data/coco` 116 | - Download the images (2014 Train, 2014 Val, 2017 Test) from [here](http://cocodataset.org/#download) 117 | - Create 3 directories, `trainval2014`, `minival2014` and `testdev2017`, under `/data/coco/images/` 118 | - Copy the training/validation/testing images to the corresponding directories according to the annotation files 119 | 120 | To train and evaluate a network, you will need to create a configuration file, which defines the hyperparameters, and a model file, which defines the network architecture. The configuration file should be in JSON format and placed in `/configs/`. Each configuration file should have a corresponding model file in `/core/models/`. i.e. If there is a `.json` in `/configs/`, there should be a `.py` in `/core/models/`. There is only one exception which we will mention later. 121 | 122 | ### Training and Evaluating a Model 123 | To train a model: 124 | ``` 125 | python train.py 126 | ``` 127 | 128 | We provide the configuration files and the model files for CornerNet-Saccade, CornerNet-Squeeze and CornerNet in this repo. Please check the configuration files in `/configs/`. 129 | 130 | To train CornerNet-Saccade: 131 | ``` 132 | python train.py CornerNet_Saccade 133 | ``` 134 | Please adjust the batch size in `CornerNet_Saccade.json` to accommodate the number of GPUs that are available to you. 135 | 136 | To evaluate the trained model: 137 | ``` 138 | python evaluate.py CornerNet_Saccade --testiter 500000 --split 139 | ``` 140 | 141 | If you want to test different hyperparameters during evaluation and do not want to overwrite the original configuration file, you can do so by creating a configuration file with a suffix (`-.json`). There is no need to create `-.py` in `/core/models/`. 142 | 143 | To use the new configuration file: 144 | ``` 145 | python evaluate.py --testiter --split --suffix 146 | ``` 147 | 148 | We also include a configuration file for CornerNet under multi-scale setting, which is `CornerNet-multi_scale.json`, in this repo. 149 | 150 | To use the multi-scale configuration file: 151 | ``` 152 | python evaluate.py CornerNet --testiter --split --suffix multi_scale 153 | -------------------------------------------------------------------------------- /perception/corner_net_lite/__init__.py: -------------------------------------------------------------------------------- 1 | from .core.detectors import CornerNet, CornerNet_Squeeze, CornerNet_Saccade 2 | from .core.vis_utils import draw_bboxes 3 | -------------------------------------------------------------------------------- /perception/corner_net_lite/conda_packagelist.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: linux-64 4 | blas=1.0=mkl 5 | bzip2=1.0.6=h14c3975_5 6 | ca-certificates=2018.12.5=0 7 | cairo=1.14.12=h8948797_3 8 | certifi=2018.11.29=py37_0 9 | cffi=1.11.5=py37he75722e_1 10 | cuda100=1.0=0 11 | cycler=0.10.0=py37_0 12 | cython=0.28.5=py37hf484d3e_0 13 | dbus=1.13.2=h714fa37_1 14 | expat=2.2.6=he6710b0_0 15 | ffmpeg=4.0=hcdf2ecd_0 16 | fontconfig=2.13.0=h9420a91_0 17 | freeglut=3.0.0=hf484d3e_5 18 | freetype=2.9.1=h8a8886c_1 19 | glib=2.56.2=hd408876_0 20 | graphite2=1.3.12=h23475e2_2 21 | gst-plugins-base=1.14.0=hbbd80ab_1 22 | gstreamer=1.14.0=hb453b48_1 23 | harfbuzz=1.8.8=hffaf4a1_0 24 | hdf5=1.10.2=hba1933b_1 25 | icu=58.2=h9c2bf20_1 26 | intel-openmp=2019.0=118 27 | jasper=2.0.14=h07fcdf6_1 28 | jpeg=9b=h024ee3a_2 29 | kiwisolver=1.0.1=py37hf484d3e_0 30 | libedit=3.1.20170329=h6b74fdf_2 31 | libffi=3.2.1=hd88cf55_4 32 | libgcc-ng=8.2.0=hdf63c60_1 33 | libgfortran-ng=7.3.0=hdf63c60_0 34 | libglu=9.0.0=hf484d3e_1 35 | libopencv=3.4.2=hb342d67_1 36 | libopus=1.2.1=hb9ed12e_0 37 | libpng=1.6.35=hbc83047_0 38 | libstdcxx-ng=8.2.0=hdf63c60_1 39 | libtiff=4.0.9=he85c1e1_2 40 | libuuid=1.0.3=h1bed415_2 41 | libvpx=1.7.0=h439df22_0 42 | libxcb=1.13=h1bed415_1 43 | libxml2=2.9.8=h26e45fe_1 44 | matplotlib=3.0.2=py37h5429711_0 45 | mkl=2018.0.3=1 46 | mkl_fft=1.0.6=py37h7dd41cf_0 47 | mkl_random=1.0.1=py37h4414c95_1 48 | ncurses=6.1=hf484d3e_0 49 | ninja=1.8.2=py37h6bb024c_1 50 | numpy=1.15.4=py37h1d66e8a_0 51 | numpy-base=1.15.4=py37h81de0dd_0 52 | olefile=0.46=py37_0 53 | opencv=3.4.2=py37h6fd60c2_1 54 | openssl=1.1.1a=h7b6447c_0 55 | pcre=8.42=h439df22_0 56 | pillow=5.2.0=py37heded4f4_0 57 | pip=10.0.1=py37_0 58 | pixman=0.34.0=hceecf20_3 59 | py-opencv=3.4.2=py37hb342d67_1 60 | pycparser=2.18=py37_1 61 | pyparsing=2.2.0=py37_1 62 | pyqt=5.9.2=py37h05f1152_2 63 | python=3.7.1=h0371630_3 64 | python-dateutil=2.7.3=py37_0 65 | pytorch=1.0.0=py3.7_cuda10.0.130_cudnn7.4.1_1 66 | pytz=2018.5=py37_0 67 | qt=5.9.7=h5867ecd_1 68 | readline=7.0=h7b6447c_5 69 | scikit-learn=0.19.1=py37hedc7406_0 70 | scipy=1.1.0=py37hfa4b5c9_1 71 | setuptools=40.2.0=py37_0 72 | sip=4.19.8=py37hf484d3e_0 73 | six=1.11.0=py37_1 74 | sqlite=3.25.3=h7b6447c_0 75 | tk=8.6.8=hbc83047_0 76 | torchvision=0.2.1=py37_1 77 | tornado=5.1=py37h14c3975_0 78 | tqdm=4.25.0=py37h28b3542_0 79 | wheel=0.31.1=py37_0 80 | xz=5.2.4=h14c3975_4 81 | zlib=1.2.11=ha838bed_2 82 | -------------------------------------------------------------------------------- /perception/corner_net_lite/configs/CornerNet-multi_scale.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "dataset": "COCO", 4 | "batch_size": 49, 5 | "sampling_function": "cornernet", 6 | 7 | "train_split": "trainval", 8 | "val_split": "minival", 9 | 10 | "learning_rate": 0.00025, 11 | "decay_rate": 10, 12 | 13 | "val_iter": 100, 14 | 15 | "opt_algo": "adam", 16 | "prefetch_size": 5, 17 | 18 | "max_iter": 500000, 19 | "stepsize": 450000, 20 | "snapshot": 5000, 21 | 22 | "chunk_sizes": [4, 5, 5, 5, 5, 5, 5, 5, 5, 5], 23 | 24 | "data_dir": "./data" 25 | }, 26 | 27 | "db": { 28 | "rand_scale_min": 0.6, 29 | "rand_scale_max": 1.4, 30 | "rand_scale_step": 0.1, 31 | "rand_scales": null, 32 | 33 | "rand_crop": true, 34 | "rand_color": true, 35 | 36 | "border": 128, 37 | "gaussian_bump": true, 38 | 39 | "input_size": [511, 511], 40 | "output_sizes": [[128, 128]], 41 | 42 | "test_scales": [0.5, 0.75, 1, 1.25, 1.5], 43 | 44 | "top_k": 100, 45 | "categories": 80, 46 | "ae_threshold": 0.5, 47 | "nms_threshold": 0.5, 48 | 49 | "merge_bbox": true, 50 | "weight_exp": 10, 51 | 52 | "max_per_image": 100 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /perception/corner_net_lite/configs/CornerNet.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "dataset": "COCO", 4 | "batch_size": 49, 5 | "sampling_function": "cornernet", 6 | 7 | "train_split": "trainval", 8 | "val_split": "minival", 9 | 10 | "learning_rate": 0.00025, 11 | "decay_rate": 10, 12 | 13 | "val_iter": 100, 14 | 15 | "opt_algo": "adam", 16 | "prefetch_size": 5, 17 | 18 | "max_iter": 500000, 19 | "stepsize": 450000, 20 | "snapshot": 5000, 21 | 22 | "chunk_sizes": [4, 5, 5, 5, 5, 5, 5, 5, 5, 5], 23 | 24 | "data_dir": "./data" 25 | }, 26 | 27 | "db": { 28 | "rand_scale_min": 0.6, 29 | "rand_scale_max": 1.4, 30 | "rand_scale_step": 0.1, 31 | "rand_scales": null, 32 | 33 | "rand_crop": true, 34 | "rand_color": true, 35 | 36 | "border": 128, 37 | "gaussian_bump": true, 38 | "gaussian_iou": 0.3, 39 | 40 | "input_size": [511, 511], 41 | "output_sizes": [[128, 128]], 42 | 43 | "test_scales": [1], 44 | 45 | "top_k": 100, 46 | "categories": 80, 47 | "ae_threshold": 0.5, 48 | "nms_threshold": 0.5, 49 | 50 | "max_per_image": 100 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /perception/corner_net_lite/configs/CornerNet_Saccade.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "dataset": "COCO", 4 | "batch_size": 48, 5 | "sampling_function": "cornernet_saccade", 6 | 7 | "train_split": "trainval", 8 | "val_split": "minival", 9 | 10 | "learning_rate": 0.00025, 11 | "decay_rate": 10, 12 | 13 | "val_iter": 100, 14 | 15 | "opt_algo": "adam", 16 | "prefetch_size": 5, 17 | 18 | "max_iter": 500000, 19 | "stepsize": 450000, 20 | "snapshot": 5000, 21 | 22 | "chunk_sizes": [12, 12, 12, 12] 23 | }, 24 | 25 | "db": { 26 | "rand_scale_min": 0.5, 27 | "rand_scale_max": 1.1, 28 | "rand_scale_step": 0.1, 29 | "rand_scales": null, 30 | 31 | "rand_full_crop": true, 32 | "gaussian_bump": true, 33 | "gaussian_iou": 0.5, 34 | 35 | "min_scale": 16, 36 | "view_sizes": [], 37 | 38 | "height_mult": 31, 39 | "width_mult": 31, 40 | 41 | "input_size": [255, 255], 42 | "output_sizes": [[64, 64]], 43 | 44 | "att_max_crops": 30, 45 | "att_scales": [[1, 2, 4]], 46 | "att_thresholds": [0.3], 47 | 48 | "top_k": 12, 49 | "num_dets": 12, 50 | "categories": 80, 51 | "ae_threshold": 0.3, 52 | "nms_threshold": 0.5, 53 | 54 | "max_per_image": 100 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /perception/corner_net_lite/configs/CornerNet_Squeeze.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "dataset": "COCO", 4 | "batch_size": 55, 5 | "sampling_function": "cornernet", 6 | 7 | "train_split": "trainval", 8 | "val_split": "minival", 9 | 10 | "learning_rate": 0.00025, 11 | "decay_rate": 10, 12 | 13 | "val_iter": 100, 14 | 15 | "opt_algo": "adam", 16 | "prefetch_size": 5, 17 | 18 | "max_iter": 500000, 19 | "stepsize": 450000, 20 | "snapshot": 5000, 21 | 22 | "chunk_sizes": [13, 14, 14, 14], 23 | 24 | "data_dir": "./data" 25 | }, 26 | 27 | "db": { 28 | "rand_scale_min": 0.6, 29 | "rand_scale_max": 1.4, 30 | "rand_scale_step": 0.1, 31 | "rand_scales": null, 32 | 33 | "rand_crop": true, 34 | "rand_color": true, 35 | 36 | "border": 128, 37 | "gaussian_bump": true, 38 | "gaussian_iou": 0.3, 39 | 40 | "input_size": [511, 511], 41 | "output_sizes": [[64, 64]], 42 | 43 | "test_scales": [1], 44 | "test_flipped": false, 45 | 46 | "top_k": 20, 47 | "num_dets": 100, 48 | "categories": 80, 49 | "ae_threshold": 0.5, 50 | "nms_threshold": 0.5, 51 | 52 | "max_per_image": 100 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/corner_net_lite/core/__init__.py -------------------------------------------------------------------------------- /perception/corner_net_lite/core/base.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from .nnet.py_factory import NetworkFactory 4 | 5 | class Base(object): 6 | def __init__(self, db, nnet, func, model=None): 7 | super(Base, self).__init__() 8 | 9 | self._db = db 10 | self._nnet = nnet 11 | self._func = func 12 | 13 | if model is not None: 14 | self._nnet.load_pretrained_params(model) 15 | 16 | self._nnet.cuda() 17 | self._nnet.eval_mode() 18 | 19 | def _inference(self, image, *args, **kwargs): 20 | return self._func(self._db, self._nnet, image.copy(), *args, **kwargs) 21 | 22 | def __call__(self, image, *args, **kwargs): 23 | categories = self._db.configs["categories"] 24 | bboxes = self._inference(image, *args, **kwargs) 25 | return {self._db.cls2name(j): bboxes[j] for j in range(1, categories + 1)} 26 | 27 | def load_cfg(cfg_file): 28 | with open(cfg_file, "r") as f: 29 | cfg = json.load(f) 30 | 31 | cfg_sys = cfg["system"] 32 | cfg_db = cfg["db"] 33 | return cfg_sys, cfg_db 34 | 35 | def load_nnet(cfg_sys, model): 36 | return NetworkFactory(cfg_sys, model) 37 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | class SystemConfig(object): 5 | def __init__(self): 6 | self._configs = {} 7 | self._configs["dataset"] = None 8 | self._configs["sampling_function"] = "coco_detection" 9 | 10 | # Training Config 11 | self._configs["display"] = 5 12 | self._configs["snapshot"] = 400 13 | self._configs["stepsize"] = 5000 14 | self._configs["learning_rate"] = 0.001 15 | self._configs["decay_rate"] = 10 16 | self._configs["max_iter"] = 100000 17 | self._configs["val_iter"] = 20 18 | self._configs["batch_size"] = 1 19 | self._configs["snapshot_name"] = None 20 | self._configs["prefetch_size"] = 100 21 | self._configs["pretrain"] = None 22 | self._configs["opt_algo"] = "adam" 23 | self._configs["chunk_sizes"] = None 24 | 25 | # Directories 26 | self._configs["data_dir"] = "./data" 27 | self._configs["cache_dir"] = "./cache" 28 | self._configs["config_dir"] = "./config" 29 | self._configs["result_dir"] = "./results" 30 | 31 | # Split 32 | self._configs["train_split"] = "training" 33 | self._configs["val_split"] = "validation" 34 | self._configs["test_split"] = "testdev" 35 | 36 | # Rng 37 | self._configs["data_rng"] = np.random.RandomState(123) 38 | self._configs["nnet_rng"] = np.random.RandomState(317) 39 | 40 | @property 41 | def chunk_sizes(self): 42 | return self._configs["chunk_sizes"] 43 | 44 | @property 45 | def train_split(self): 46 | return self._configs["train_split"] 47 | 48 | @property 49 | def val_split(self): 50 | return self._configs["val_split"] 51 | 52 | @property 53 | def test_split(self): 54 | return self._configs["test_split"] 55 | 56 | @property 57 | def full(self): 58 | return self._configs 59 | 60 | @property 61 | def sampling_function(self): 62 | return self._configs["sampling_function"] 63 | 64 | @property 65 | def data_rng(self): 66 | return self._configs["data_rng"] 67 | 68 | @property 69 | def nnet_rng(self): 70 | return self._configs["nnet_rng"] 71 | 72 | @property 73 | def opt_algo(self): 74 | return self._configs["opt_algo"] 75 | 76 | @property 77 | def prefetch_size(self): 78 | return self._configs["prefetch_size"] 79 | 80 | @property 81 | def pretrain(self): 82 | return self._configs["pretrain"] 83 | 84 | @property 85 | def result_dir(self): 86 | result_dir = os.path.join(self._configs["result_dir"], self.snapshot_name) 87 | if not os.path.exists(result_dir): 88 | os.makedirs(result_dir) 89 | return result_dir 90 | 91 | @property 92 | def dataset(self): 93 | return self._configs["dataset"] 94 | 95 | @property 96 | def snapshot_name(self): 97 | return self._configs["snapshot_name"] 98 | 99 | @property 100 | def snapshot_dir(self): 101 | snapshot_dir = os.path.join(self.cache_dir, "nnet", self.snapshot_name) 102 | 103 | if not os.path.exists(snapshot_dir): 104 | os.makedirs(snapshot_dir) 105 | return snapshot_dir 106 | 107 | @property 108 | def snapshot_file(self): 109 | snapshot_file = os.path.join(self.snapshot_dir, self.snapshot_name + "_{}.pkl") 110 | return snapshot_file 111 | 112 | @property 113 | def config_dir(self): 114 | return self._configs["config_dir"] 115 | 116 | @property 117 | def batch_size(self): 118 | return self._configs["batch_size"] 119 | 120 | @property 121 | def max_iter(self): 122 | return self._configs["max_iter"] 123 | 124 | @property 125 | def learning_rate(self): 126 | return self._configs["learning_rate"] 127 | 128 | @property 129 | def decay_rate(self): 130 | return self._configs["decay_rate"] 131 | 132 | @property 133 | def stepsize(self): 134 | return self._configs["stepsize"] 135 | 136 | @property 137 | def snapshot(self): 138 | return self._configs["snapshot"] 139 | 140 | @property 141 | def display(self): 142 | return self._configs["display"] 143 | 144 | @property 145 | def val_iter(self): 146 | return self._configs["val_iter"] 147 | 148 | @property 149 | def data_dir(self): 150 | return self._configs["data_dir"] 151 | 152 | @property 153 | def cache_dir(self): 154 | if not os.path.exists(self._configs["cache_dir"]): 155 | os.makedirs(self._configs["cache_dir"]) 156 | return self._configs["cache_dir"] 157 | 158 | def update_config(self, new): 159 | for key in new: 160 | if key in self._configs: 161 | self._configs[key] = new[key] 162 | return self 163 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/dbs/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco import COCO 2 | 3 | datasets = { 4 | "COCO": COCO 5 | } 6 | 7 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/dbs/base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | class BASE(object): 5 | def __init__(self): 6 | self._split = None 7 | self._db_inds = [] 8 | self._image_ids = [] 9 | 10 | self._mean = np.zeros((3, ), dtype=np.float32) 11 | self._std = np.ones((3, ), dtype=np.float32) 12 | self._eig_val = np.ones((3, ), dtype=np.float32) 13 | self._eig_vec = np.zeros((3, 3), dtype=np.float32) 14 | 15 | self._configs = {} 16 | self._configs["data_aug"] = True 17 | 18 | self._data_rng = None 19 | 20 | @property 21 | def configs(self): 22 | return self._configs 23 | 24 | @property 25 | def mean(self): 26 | return self._mean 27 | 28 | @property 29 | def std(self): 30 | return self._std 31 | 32 | @property 33 | def eig_val(self): 34 | return self._eig_val 35 | 36 | @property 37 | def eig_vec(self): 38 | return self._eig_vec 39 | 40 | @property 41 | def db_inds(self): 42 | return self._db_inds 43 | 44 | @property 45 | def split(self): 46 | return self._split 47 | 48 | def update_config(self, new): 49 | for key in new: 50 | if key in self._configs: 51 | self._configs[key] = new[key] 52 | 53 | def image_ids(self, ind): 54 | return self._image_ids[ind] 55 | 56 | def image_path(self, ind): 57 | pass 58 | 59 | def write_result(self, ind, all_bboxes, all_scores): 60 | pass 61 | 62 | def evaluate(self, name): 63 | pass 64 | 65 | def shuffle_inds(self, quiet=False): 66 | if self._data_rng is None: 67 | self._data_rng = np.random.RandomState(os.getpid()) 68 | 69 | if not quiet: 70 | print("shuffling indices...") 71 | rand_perm = self._data_rng.permutation(len(self._db_inds)) 72 | self._db_inds = self._db_inds[rand_perm] 73 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/dbs/coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | 5 | from .detection import DETECTION 6 | from ..paths import get_file_path 7 | 8 | # COCO bounding boxes are 0-indexed 9 | 10 | class COCO(DETECTION): 11 | def __init__(self, db_config, split=None, sys_config=None): 12 | assert split is None or sys_config is not None 13 | super(COCO, self).__init__(db_config) 14 | 15 | self._mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32) 16 | self._std = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32) 17 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], dtype=np.float32) 18 | self._eig_vec = np.array([ 19 | [-0.58752847, -0.69563484, 0.41340352], 20 | [-0.5832747, 0.00994535, -0.81221408], 21 | [-0.56089297, 0.71832671, 0.41158938] 22 | ], dtype=np.float32) 23 | 24 | self._coco_cls_ids = [ 25 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 26 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 27 | 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 28 | 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 29 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 30 | 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 31 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 32 | 82, 84, 85, 86, 87, 88, 89, 90 33 | ] 34 | 35 | self._coco_cls_names = [ 36 | 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 37 | 'bus', 'train', 'truck', 'boat', 'traffic light', 38 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 39 | 'bird', 'cat', 'dog', 'horse','sheep', 'cow', 'elephant', 40 | 'bear', 'zebra','giraffe', 'backpack', 'umbrella', 41 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 42 | 'snowboard','sports ball', 'kite', 'baseball bat', 43 | 'baseball glove', 'skateboard', 'surfboard', 44 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 45 | 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 46 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 47 | 'donut', 'cake', 'chair', 'couch', 'potted plant', 48 | 'bed', 'dining table', 'toilet', 'tv', 'laptop', 49 | 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 50 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 51 | 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 52 | 'toothbrush' 53 | ] 54 | 55 | self._cls2coco = {ind + 1: coco_id for ind, coco_id in enumerate(self._coco_cls_ids)} 56 | self._coco2cls = {coco_id: cls_id for cls_id, coco_id in self._cls2coco.items()} 57 | self._coco2name = {cls_id: cls_name for cls_id, cls_name in zip(self._coco_cls_ids, self._coco_cls_names)} 58 | self._name2coco = {cls_name: cls_id for cls_name, cls_id in self._coco2name.items()} 59 | 60 | if split is not None: 61 | coco_dir = os.path.join(sys_config.data_dir, "coco") 62 | 63 | self._split = { 64 | "trainval": "trainval2014", 65 | "minival": "minival2014", 66 | "testdev": "testdev2017" 67 | }[split] 68 | self._data_dir = os.path.join(coco_dir, "images", self._split) 69 | self._anno_file = os.path.join(coco_dir, "annotations", "instances_{}.json".format(self._split)) 70 | 71 | self._detections, self._eval_ids = self._load_coco_annos() 72 | self._image_ids = list(self._detections.keys()) 73 | self._db_inds = np.arange(len(self._image_ids)) 74 | 75 | def _load_coco_annos(self): 76 | from pycocotools.coco import COCO 77 | 78 | coco = COCO(self._anno_file) 79 | self._coco = coco 80 | 81 | class_ids = coco.getCatIds() 82 | image_ids = coco.getImgIds() 83 | 84 | eval_ids = {} 85 | detections = {} 86 | for image_id in image_ids: 87 | image = coco.loadImgs(image_id)[0] 88 | dets = [] 89 | 90 | eval_ids[image["file_name"]] = image_id 91 | for class_id in class_ids: 92 | annotation_ids = coco.getAnnIds(imgIds=image["id"], catIds=class_id) 93 | annotations = coco.loadAnns(annotation_ids) 94 | category = self._coco2cls[class_id] 95 | for annotation in annotations: 96 | det = annotation["bbox"] + [category] 97 | det[2] += det[0] 98 | det[3] += det[1] 99 | dets.append(det) 100 | 101 | file_name = image["file_name"] 102 | if len(dets) == 0: 103 | detections[file_name] = np.zeros((0, 5), dtype=np.float32) 104 | else: 105 | detections[file_name] = np.array(dets, dtype=np.float32) 106 | return detections, eval_ids 107 | 108 | def image_path(self, ind): 109 | if self._data_dir is None: 110 | raise ValueError("Data directory is not set") 111 | 112 | db_ind = self._db_inds[ind] 113 | file_name = self._image_ids[db_ind] 114 | return os.path.join(self._data_dir, file_name) 115 | 116 | def detections(self, ind): 117 | db_ind = self._db_inds[ind] 118 | file_name = self._image_ids[db_ind] 119 | return self._detections[file_name].copy() 120 | 121 | def cls2name(self, cls): 122 | coco = self._cls2coco[cls] 123 | return self._coco2name[coco] 124 | 125 | def _to_float(self, x): 126 | return float("{:.2f}".format(x)) 127 | 128 | def convert_to_coco(self, all_bboxes): 129 | detections = [] 130 | for image_id in all_bboxes: 131 | coco_id = self._eval_ids[image_id] 132 | for cls_ind in all_bboxes[image_id]: 133 | category_id = self._cls2coco[cls_ind] 134 | for bbox in all_bboxes[image_id][cls_ind]: 135 | bbox[2] -= bbox[0] 136 | bbox[3] -= bbox[1] 137 | 138 | score = bbox[4] 139 | bbox = list(map(self._to_float, bbox[0:4])) 140 | 141 | detection = { 142 | "image_id": coco_id, 143 | "category_id": category_id, 144 | "bbox": bbox, 145 | "score": float("{:.2f}".format(score)) 146 | } 147 | 148 | detections.append(detection) 149 | return detections 150 | 151 | def evaluate(self, result_json, cls_ids, image_ids): 152 | from pycocotools.cocoeval import COCOeval 153 | 154 | if self._split == "testdev": 155 | return None 156 | 157 | coco = self._coco 158 | 159 | eval_ids = [self._eval_ids[image_id] for image_id in image_ids] 160 | cat_ids = [self._cls2coco[cls_id] for cls_id in cls_ids] 161 | 162 | coco_dets = coco.loadRes(result_json) 163 | coco_eval = COCOeval(coco, coco_dets, "bbox") 164 | coco_eval.params.imgIds = eval_ids 165 | coco_eval.params.catIds = cat_ids 166 | coco_eval.evaluate() 167 | coco_eval.accumulate() 168 | coco_eval.summarize() 169 | return coco_eval.stats[0], coco_eval.stats[12:] 170 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/dbs/detection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .base import BASE 4 | 5 | class DETECTION(BASE): 6 | def __init__(self, db_config): 7 | super(DETECTION, self).__init__() 8 | 9 | # Configs for training 10 | self._configs["categories"] = 80 11 | self._configs["rand_scales"] = [1] 12 | self._configs["rand_scale_min"] = 0.8 13 | self._configs["rand_scale_max"] = 1.4 14 | self._configs["rand_scale_step"] = 0.2 15 | 16 | # Configs for both training and testing 17 | self._configs["input_size"] = [383, 383] 18 | self._configs["output_sizes"] = [[96, 96], [48, 48], [24, 24], [12, 12]] 19 | 20 | self._configs["score_threshold"] = 0.05 21 | self._configs["nms_threshold"] = 0.7 22 | self._configs["max_per_set"] = 40 23 | self._configs["max_per_image"] = 100 24 | self._configs["top_k"] = 20 25 | self._configs["ae_threshold"] = 1 26 | self._configs["nms_kernel"] = 3 27 | self._configs["num_dets"] = 1000 28 | 29 | self._configs["nms_algorithm"] = "exp_soft_nms" 30 | self._configs["weight_exp"] = 8 31 | self._configs["merge_bbox"] = False 32 | 33 | self._configs["data_aug"] = True 34 | self._configs["lighting"] = True 35 | 36 | self._configs["border"] = 64 37 | self._configs["gaussian_bump"] = False 38 | self._configs["gaussian_iou"] = 0.7 39 | self._configs["gaussian_radius"] = -1 40 | self._configs["rand_crop"] = False 41 | self._configs["rand_color"] = False 42 | self._configs["rand_center"] = True 43 | 44 | self._configs["init_sizes"] = [192, 255] 45 | self._configs["view_sizes"] = [] 46 | 47 | self._configs["min_scale"] = 16 48 | self._configs["max_scale"] = 32 49 | 50 | self._configs["att_sizes"] = [[16, 16], [32, 32], [64, 64]] 51 | self._configs["att_ranges"] = [[96, 256], [32, 96], [0, 32]] 52 | self._configs["att_ratios"] = [16, 8, 4] 53 | self._configs["att_scales"] = [1, 1.5, 2] 54 | self._configs["att_thresholds"] = [0.3, 0.3, 0.3, 0.3] 55 | self._configs["att_nms_ks"] = [3, 3, 3] 56 | self._configs["att_max_crops"] = 8 57 | self._configs["ref_dets"] = True 58 | 59 | # Configs for testing 60 | self._configs["test_scales"] = [1] 61 | self._configs["test_flipped"] = True 62 | 63 | self.update_config(db_config) 64 | 65 | if self._configs["rand_scales"] is None: 66 | self._configs["rand_scales"] = np.arange( 67 | self._configs["rand_scale_min"], 68 | self._configs["rand_scale_max"], 69 | self._configs["rand_scale_step"] 70 | ) 71 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/detectors.py: -------------------------------------------------------------------------------- 1 | from .base import Base, load_cfg, load_nnet 2 | from .paths import get_file_path 3 | from .config import SystemConfig 4 | from .dbs.coco import COCO 5 | 6 | class CornerNet(Base): 7 | def __init__(self): 8 | from .test.cornernet import cornernet_inference 9 | from .models.CornerNet import model 10 | 11 | cfg_path = get_file_path("..", "configs", "CornerNet.json") 12 | model_path = get_file_path("..", "cache", "nnet", "CornerNet", "CornerNet_500000.pkl") 13 | 14 | cfg_sys, cfg_db = load_cfg(cfg_path) 15 | sys_cfg = SystemConfig().update_config(cfg_sys) 16 | coco = COCO(cfg_db) 17 | 18 | cornernet = load_nnet(sys_cfg, model()) 19 | super(CornerNet, self).__init__(coco, cornernet, cornernet_inference, model=model_path) 20 | 21 | class CornerNet_Squeeze(Base): 22 | def __init__(self): 23 | from .test.cornernet import cornernet_inference 24 | from .models.CornerNet_Squeeze import model 25 | 26 | cfg_path = get_file_path("..", "configs", "CornerNet_Squeeze.json") 27 | model_path = get_file_path("..", "cache", "nnet", "CornerNet_Squeeze", "CornerNet_Squeeze_500000.pkl") 28 | 29 | cfg_sys, cfg_db = load_cfg(cfg_path) 30 | sys_cfg = SystemConfig().update_config(cfg_sys) 31 | coco = COCO(cfg_db) 32 | 33 | cornernet = load_nnet(sys_cfg, model()) 34 | super(CornerNet_Squeeze, self).__init__(coco, cornernet, cornernet_inference, model=model_path) 35 | 36 | class CornerNet_Saccade(Base): 37 | def __init__(self): 38 | from .test.cornernet_saccade import cornernet_saccade_inference 39 | from .models.CornerNet_Saccade import model 40 | 41 | cfg_path = get_file_path("..", "configs", "CornerNet_Saccade.json") 42 | model_path = get_file_path("..", "cache", "nnet", "CornerNet_Saccade", "CornerNet_Saccade_500000.pkl") 43 | 44 | cfg_sys, cfg_db = load_cfg(cfg_path) 45 | sys_cfg = SystemConfig().update_config(cfg_sys) 46 | coco = COCO(cfg_db) 47 | 48 | cornernet = load_nnet(sys_cfg, model()) 49 | super(CornerNet_Saccade, self).__init__(coco, cornernet, cornernet_saccade_inference, model=model_path) 50 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/external/.gitignore: -------------------------------------------------------------------------------- 1 | bbox.c 2 | bbox.cpython-35m-x86_64-linux-gnu.so 3 | bbox.cpython-36m-x86_64-linux-gnu.so 4 | 5 | nms.c 6 | nms.cpython-35m-x86_64-linux-gnu.so 7 | nms.cpython-36m-x86_64-linux-gnu.so 8 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/external/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/external/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/corner_net_lite/core/external/__init__.py -------------------------------------------------------------------------------- /perception/corner_net_lite/core/external/bbox.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/corner_net_lite/core/external/bbox.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /perception/corner_net_lite/core/external/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/external/nms.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/corner_net_lite/core/external/nms.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /perception/corner_net_lite/core/external/setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from distutils.core import setup 3 | from distutils.extension import Extension 4 | from Cython.Build import cythonize 5 | 6 | extensions = [ 7 | Extension( 8 | "bbox", 9 | ["bbox.pyx"], 10 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"] 11 | ), 12 | Extension( 13 | "nms", 14 | ["nms.pyx"], 15 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"] 16 | ) 17 | ] 18 | 19 | setup( 20 | name="coco", 21 | ext_modules=cythonize(extensions), 22 | include_dirs=[numpy.get_include()] 23 | ) 24 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/CornerNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .py_utils import TopPool, BottomPool, LeftPool, RightPool 5 | 6 | from .py_utils.utils import convolution, residual, corner_pool 7 | from .py_utils.losses import CornerNet_Loss 8 | from .py_utils.modules import hg_module, hg, hg_net 9 | 10 | def make_pool_layer(dim): 11 | return nn.Sequential() 12 | 13 | def make_hg_layer(inp_dim, out_dim, modules): 14 | layers = [residual(inp_dim, out_dim, stride=2)] 15 | layers += [residual(out_dim, out_dim) for _ in range(1, modules)] 16 | return nn.Sequential(*layers) 17 | 18 | class model(hg_net): 19 | def _pred_mod(self, dim): 20 | return nn.Sequential( 21 | convolution(3, 256, 256, with_bn=False), 22 | nn.Conv2d(256, dim, (1, 1)) 23 | ) 24 | 25 | def _merge_mod(self): 26 | return nn.Sequential( 27 | nn.Conv2d(256, 256, (1, 1), bias=False), 28 | nn.BatchNorm2d(256) 29 | ) 30 | 31 | def __init__(self): 32 | stacks = 2 33 | pre = nn.Sequential( 34 | convolution(7, 3, 128, stride=2), 35 | residual(128, 256, stride=2) 36 | ) 37 | hg_mods = nn.ModuleList([ 38 | hg_module( 39 | 5, [256, 256, 384, 384, 384, 512], [2, 2, 2, 2, 2, 4], 40 | make_pool_layer=make_pool_layer, 41 | make_hg_layer=make_hg_layer 42 | ) for _ in range(stacks) 43 | ]) 44 | cnvs = nn.ModuleList([convolution(3, 256, 256) for _ in range(stacks)]) 45 | inters = nn.ModuleList([residual(256, 256) for _ in range(stacks - 1)]) 46 | cnvs_ = nn.ModuleList([self._merge_mod() for _ in range(stacks - 1)]) 47 | inters_ = nn.ModuleList([self._merge_mod() for _ in range(stacks - 1)]) 48 | 49 | hgs = hg(pre, hg_mods, cnvs, inters, cnvs_, inters_) 50 | 51 | tl_modules = nn.ModuleList([corner_pool(256, TopPool, LeftPool) for _ in range(stacks)]) 52 | br_modules = nn.ModuleList([corner_pool(256, BottomPool, RightPool) for _ in range(stacks)]) 53 | 54 | tl_heats = nn.ModuleList([self._pred_mod(80) for _ in range(stacks)]) 55 | br_heats = nn.ModuleList([self._pred_mod(80) for _ in range(stacks)]) 56 | for tl_heat, br_heat in zip(tl_heats, br_heats): 57 | torch.nn.init.constant_(tl_heat[-1].bias, -2.19) 58 | torch.nn.init.constant_(br_heat[-1].bias, -2.19) 59 | 60 | tl_tags = nn.ModuleList([self._pred_mod(1) for _ in range(stacks)]) 61 | br_tags = nn.ModuleList([self._pred_mod(1) for _ in range(stacks)]) 62 | 63 | tl_offs = nn.ModuleList([self._pred_mod(2) for _ in range(stacks)]) 64 | br_offs = nn.ModuleList([self._pred_mod(2) for _ in range(stacks)]) 65 | 66 | super(model, self).__init__( 67 | hgs, tl_modules, br_modules, tl_heats, br_heats, 68 | tl_tags, br_tags, tl_offs, br_offs 69 | ) 70 | 71 | self.loss = CornerNet_Loss(pull_weight=1e-1, push_weight=1e-1) 72 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/CornerNet_Saccade.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .py_utils import TopPool, BottomPool, LeftPool, RightPool 5 | 6 | from .py_utils.utils import convolution, residual, corner_pool 7 | from .py_utils.losses import CornerNet_Saccade_Loss 8 | from .py_utils.modules import saccade_net, saccade_module, saccade 9 | 10 | def make_pool_layer(dim): 11 | return nn.Sequential() 12 | 13 | def make_hg_layer(inp_dim, out_dim, modules): 14 | layers = [residual(inp_dim, out_dim, stride=2)] 15 | layers += [residual(out_dim, out_dim) for _ in range(1, modules)] 16 | return nn.Sequential(*layers) 17 | 18 | class model(saccade_net): 19 | def _pred_mod(self, dim): 20 | return nn.Sequential( 21 | convolution(3, 256, 256, with_bn=False), 22 | nn.Conv2d(256, dim, (1, 1)) 23 | ) 24 | 25 | def _merge_mod(self): 26 | return nn.Sequential( 27 | nn.Conv2d(256, 256, (1, 1), bias=False), 28 | nn.BatchNorm2d(256) 29 | ) 30 | 31 | def __init__(self): 32 | stacks = 3 33 | pre = nn.Sequential( 34 | convolution(7, 3, 128, stride=2), 35 | residual(128, 256, stride=2) 36 | ) 37 | hg_mods = nn.ModuleList([ 38 | saccade_module( 39 | 3, [256, 384, 384, 512], [1, 1, 1, 1], 40 | make_pool_layer=make_pool_layer, 41 | make_hg_layer=make_hg_layer 42 | ) for _ in range(stacks) 43 | ]) 44 | cnvs = nn.ModuleList([convolution(3, 256, 256) for _ in range(stacks)]) 45 | inters = nn.ModuleList([residual(256, 256) for _ in range(stacks - 1)]) 46 | cnvs_ = nn.ModuleList([self._merge_mod() for _ in range(stacks - 1)]) 47 | inters_ = nn.ModuleList([self._merge_mod() for _ in range(stacks - 1)]) 48 | 49 | att_mods = nn.ModuleList([ 50 | nn.ModuleList([ 51 | nn.Sequential( 52 | convolution(3, 384, 256, with_bn=False), 53 | nn.Conv2d(256, 1, (1, 1)) 54 | ), 55 | nn.Sequential( 56 | convolution(3, 384, 256, with_bn=False), 57 | nn.Conv2d(256, 1, (1, 1)) 58 | ), 59 | nn.Sequential( 60 | convolution(3, 256, 256, with_bn=False), 61 | nn.Conv2d(256, 1, (1, 1)) 62 | ) 63 | ]) for _ in range(stacks) 64 | ]) 65 | for att_mod in att_mods: 66 | for att in att_mod: 67 | torch.nn.init.constant_(att[-1].bias, -2.19) 68 | 69 | hgs = saccade(pre, hg_mods, cnvs, inters, cnvs_, inters_) 70 | 71 | tl_modules = nn.ModuleList([corner_pool(256, TopPool, LeftPool) for _ in range(stacks)]) 72 | br_modules = nn.ModuleList([corner_pool(256, BottomPool, RightPool) for _ in range(stacks)]) 73 | 74 | tl_heats = nn.ModuleList([self._pred_mod(80) for _ in range(stacks)]) 75 | br_heats = nn.ModuleList([self._pred_mod(80) for _ in range(stacks)]) 76 | for tl_heat, br_heat in zip(tl_heats, br_heats): 77 | torch.nn.init.constant_(tl_heat[-1].bias, -2.19) 78 | torch.nn.init.constant_(br_heat[-1].bias, -2.19) 79 | 80 | tl_tags = nn.ModuleList([self._pred_mod(1) for _ in range(stacks)]) 81 | br_tags = nn.ModuleList([self._pred_mod(1) for _ in range(stacks)]) 82 | 83 | tl_offs = nn.ModuleList([self._pred_mod(2) for _ in range(stacks)]) 84 | br_offs = nn.ModuleList([self._pred_mod(2) for _ in range(stacks)]) 85 | 86 | super(model, self).__init__( 87 | hgs, tl_modules, br_modules, tl_heats, br_heats, 88 | tl_tags, br_tags, tl_offs, br_offs, att_mods 89 | ) 90 | 91 | self.loss = CornerNet_Saccade_Loss(pull_weight=1e-1, push_weight=1e-1) 92 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/CornerNet_Squeeze.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .py_utils import TopPool, BottomPool, LeftPool, RightPool 5 | 6 | from .py_utils.utils import convolution, corner_pool, residual 7 | from .py_utils.losses import CornerNet_Loss 8 | from .py_utils.modules import hg_module, hg, hg_net 9 | 10 | class fire_module(nn.Module): 11 | def __init__(self, inp_dim, out_dim, sr=2, stride=1): 12 | super(fire_module, self).__init__() 13 | self.conv1 = nn.Conv2d(inp_dim, out_dim // sr, kernel_size=1, stride=1, bias=False) 14 | self.bn1 = nn.BatchNorm2d(out_dim // sr) 15 | self.conv_1x1 = nn.Conv2d(out_dim // sr, out_dim // 2, kernel_size=1, stride=stride, bias=False) 16 | self.conv_3x3 = nn.Conv2d(out_dim // sr, out_dim // 2, kernel_size=3, padding=1, 17 | stride=stride, groups=out_dim // sr, bias=False) 18 | self.bn2 = nn.BatchNorm2d(out_dim) 19 | self.skip = (stride == 1 and inp_dim == out_dim) 20 | self.relu = nn.ReLU(inplace=True) 21 | 22 | def forward(self, x): 23 | conv1 = self.conv1(x) 24 | bn1 = self.bn1(conv1) 25 | conv2 = torch.cat((self.conv_1x1(bn1), self.conv_3x3(bn1)), 1) 26 | bn2 = self.bn2(conv2) 27 | if self.skip: 28 | return self.relu(bn2 + x) 29 | else: 30 | return self.relu(bn2) 31 | 32 | def make_pool_layer(dim): 33 | return nn.Sequential() 34 | 35 | def make_unpool_layer(dim): 36 | return nn.ConvTranspose2d(dim, dim, kernel_size=4, stride=2, padding=1) 37 | 38 | def make_layer(inp_dim, out_dim, modules): 39 | layers = [fire_module(inp_dim, out_dim)] 40 | layers += [fire_module(out_dim, out_dim) for _ in range(1, modules)] 41 | return nn.Sequential(*layers) 42 | 43 | def make_layer_revr(inp_dim, out_dim, modules): 44 | layers = [fire_module(inp_dim, inp_dim) for _ in range(modules - 1)] 45 | layers += [fire_module(inp_dim, out_dim)] 46 | return nn.Sequential(*layers) 47 | 48 | def make_hg_layer(inp_dim, out_dim, modules): 49 | layers = [fire_module(inp_dim, out_dim, stride=2)] 50 | layers += [fire_module(out_dim, out_dim) for _ in range(1, modules)] 51 | return nn.Sequential(*layers) 52 | 53 | class model(hg_net): 54 | def _pred_mod(self, dim): 55 | return nn.Sequential( 56 | convolution(1, 256, 256, with_bn=False), 57 | nn.Conv2d(256, dim, (1, 1)) 58 | ) 59 | 60 | def _merge_mod(self): 61 | return nn.Sequential( 62 | nn.Conv2d(256, 256, (1, 1), bias=False), 63 | nn.BatchNorm2d(256) 64 | ) 65 | 66 | def __init__(self): 67 | stacks = 2 68 | pre = nn.Sequential( 69 | convolution(7, 3, 128, stride=2), 70 | residual(128, 256, stride=2), 71 | residual(256, 256, stride=2) 72 | ) 73 | hg_mods = nn.ModuleList([ 74 | hg_module( 75 | 4, [256, 256, 384, 384, 512], [2, 2, 2, 2, 4], 76 | make_pool_layer=make_pool_layer, 77 | make_unpool_layer=make_unpool_layer, 78 | make_up_layer=make_layer, 79 | make_low_layer=make_layer, 80 | make_hg_layer_revr=make_layer_revr, 81 | make_hg_layer=make_hg_layer 82 | ) for _ in range(stacks) 83 | ]) 84 | cnvs = nn.ModuleList([convolution(3, 256, 256) for _ in range(stacks)]) 85 | inters = nn.ModuleList([residual(256, 256) for _ in range(stacks - 1)]) 86 | cnvs_ = nn.ModuleList([self._merge_mod() for _ in range(stacks - 1)]) 87 | inters_ = nn.ModuleList([self._merge_mod() for _ in range(stacks - 1)]) 88 | 89 | hgs = hg(pre, hg_mods, cnvs, inters, cnvs_, inters_) 90 | 91 | tl_modules = nn.ModuleList([corner_pool(256, TopPool, LeftPool) for _ in range(stacks)]) 92 | br_modules = nn.ModuleList([corner_pool(256, BottomPool, RightPool) for _ in range(stacks)]) 93 | 94 | tl_heats = nn.ModuleList([self._pred_mod(80) for _ in range(stacks)]) 95 | br_heats = nn.ModuleList([self._pred_mod(80) for _ in range(stacks)]) 96 | for tl_heat, br_heat in zip(tl_heats, br_heats): 97 | torch.nn.init.constant_(tl_heat[-1].bias, -2.19) 98 | torch.nn.init.constant_(br_heat[-1].bias, -2.19) 99 | 100 | tl_tags = nn.ModuleList([self._pred_mod(1) for _ in range(stacks)]) 101 | br_tags = nn.ModuleList([self._pred_mod(1) for _ in range(stacks)]) 102 | 103 | tl_offs = nn.ModuleList([self._pred_mod(2) for _ in range(stacks)]) 104 | br_offs = nn.ModuleList([self._pred_mod(2) for _ in range(stacks)]) 105 | 106 | super(model, self).__init__( 107 | hgs, tl_modules, br_modules, tl_heats, br_heats, 108 | tl_tags, br_tags, tl_offs, br_offs 109 | ) 110 | 111 | self.loss = CornerNet_Loss(pull_weight=1e-1, push_weight=1e-1) 112 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/corner_net_lite/core/models/__init__.py -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/py_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from ._cpools import TopPool, BottomPool, LeftPool, RightPool 2 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/py_utils/_cpools/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | cpools.egg-info/ 3 | dist/ 4 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/py_utils/_cpools/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | from torch.autograd import Function 5 | 6 | import top_pool, bottom_pool, left_pool, right_pool 7 | 8 | class TopPoolFunction(Function): 9 | @staticmethod 10 | def forward(ctx, input): 11 | output = top_pool.forward(input)[0] 12 | ctx.save_for_backward(input) 13 | return output 14 | 15 | @staticmethod 16 | def backward(ctx, grad_output): 17 | input = ctx.saved_variables[0] 18 | output = top_pool.backward(input, grad_output)[0] 19 | return output 20 | 21 | class BottomPoolFunction(Function): 22 | @staticmethod 23 | def forward(ctx, input): 24 | output = bottom_pool.forward(input)[0] 25 | ctx.save_for_backward(input) 26 | return output 27 | 28 | @staticmethod 29 | def backward(ctx, grad_output): 30 | input = ctx.saved_variables[0] 31 | output = bottom_pool.backward(input, grad_output)[0] 32 | return output 33 | 34 | class LeftPoolFunction(Function): 35 | @staticmethod 36 | def forward(ctx, input): 37 | output = left_pool.forward(input)[0] 38 | ctx.save_for_backward(input) 39 | return output 40 | 41 | @staticmethod 42 | def backward(ctx, grad_output): 43 | input = ctx.saved_variables[0] 44 | output = left_pool.backward(input, grad_output)[0] 45 | return output 46 | 47 | class RightPoolFunction(Function): 48 | @staticmethod 49 | def forward(ctx, input): 50 | output = right_pool.forward(input)[0] 51 | ctx.save_for_backward(input) 52 | return output 53 | 54 | @staticmethod 55 | def backward(ctx, grad_output): 56 | input = ctx.saved_variables[0] 57 | output = right_pool.backward(input, grad_output)[0] 58 | return output 59 | 60 | class TopPool(nn.Module): 61 | def forward(self, x): 62 | return TopPoolFunction.apply(x) 63 | 64 | class BottomPool(nn.Module): 65 | def forward(self, x): 66 | return BottomPoolFunction.apply(x) 67 | 68 | class LeftPool(nn.Module): 69 | def forward(self, x): 70 | return LeftPoolFunction.apply(x) 71 | 72 | class RightPool(nn.Module): 73 | def forward(self, x): 74 | return RightPoolFunction.apply(x) 75 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/py_utils/_cpools/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CppExtension 3 | 4 | setup( 5 | name="cpools", 6 | ext_modules=[ 7 | CppExtension("top_pool", ["src/top_pool.cpp"]), 8 | CppExtension("bottom_pool", ["src/bottom_pool.cpp"]), 9 | CppExtension("left_pool", ["src/left_pool.cpp"]), 10 | CppExtension("right_pool", ["src/right_pool.cpp"]) 11 | ], 12 | cmdclass={ 13 | "build_ext": BuildExtension 14 | } 15 | ) 16 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/py_utils/_cpools/src/bottom_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get height 12 | int64_t height = input.size(2); 13 | 14 | output.copy_(input); 15 | 16 | for (int64_t ind = 1; ind < height; ind <<= 1) { 17 | at::Tensor max_temp = at::slice(output, 2, ind, height); 18 | at::Tensor cur_temp = at::slice(output, 2, ind, height); 19 | at::Tensor next_temp = at::slice(output, 2, 0, height-ind); 20 | at::max_out(max_temp, cur_temp, next_temp); 21 | } 22 | 23 | return { 24 | output 25 | }; 26 | } 27 | 28 | std::vector pool_backward( 29 | at::Tensor input, 30 | at::Tensor grad_output 31 | ) { 32 | auto output = at::zeros_like(input); 33 | 34 | int32_t batch = input.size(0); 35 | int32_t channel = input.size(1); 36 | int32_t height = input.size(2); 37 | int32_t width = input.size(3); 38 | 39 | auto max_val = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); 40 | auto max_ind = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kLong)); 41 | 42 | auto input_temp = input.select(2, 0); 43 | max_val.copy_(input_temp); 44 | 45 | max_ind.fill_(0); 46 | 47 | auto output_temp = output.select(2, 0); 48 | auto grad_output_temp = grad_output.select(2, 0); 49 | output_temp.copy_(grad_output_temp); 50 | 51 | auto un_max_ind = max_ind.unsqueeze(2); 52 | auto gt_mask = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kByte)); 53 | auto max_temp = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); 54 | for (int32_t ind = 0; ind < height - 1; ++ind) { 55 | input_temp = input.select(2, ind + 1); 56 | at::gt_out(gt_mask, input_temp, max_val); 57 | 58 | at::masked_select_out(max_temp, input_temp, gt_mask); 59 | max_val.masked_scatter_(gt_mask, max_temp); 60 | max_ind.masked_fill_(gt_mask, ind + 1); 61 | 62 | grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2); 63 | output.scatter_add_(2, un_max_ind, grad_output_temp); 64 | } 65 | 66 | return { 67 | output 68 | }; 69 | } 70 | 71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 72 | m.def( 73 | "forward", &pool_forward, "Bottom Pool Forward", 74 | py::call_guard() 75 | ); 76 | m.def( 77 | "backward", &pool_backward, "Bottom Pool Backward", 78 | py::call_guard() 79 | ); 80 | } 81 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/py_utils/_cpools/src/left_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get width 12 | int64_t width = input.size(3); 13 | 14 | output.copy_(input); 15 | 16 | for (int64_t ind = 1; ind < width; ind <<= 1) { 17 | at::Tensor max_temp = at::slice(output, 3, 0, width-ind); 18 | at::Tensor cur_temp = at::slice(output, 3, 0, width-ind); 19 | at::Tensor next_temp = at::slice(output, 3, ind, width); 20 | at::max_out(max_temp, cur_temp, next_temp); 21 | } 22 | 23 | return { 24 | output 25 | }; 26 | } 27 | 28 | std::vector pool_backward( 29 | at::Tensor input, 30 | at::Tensor grad_output 31 | ) { 32 | auto output = at::zeros_like(input); 33 | 34 | int32_t batch = input.size(0); 35 | int32_t channel = input.size(1); 36 | int32_t height = input.size(2); 37 | int32_t width = input.size(3); 38 | 39 | auto max_val = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); 40 | auto max_ind = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kLong)); 41 | 42 | auto input_temp = input.select(3, width - 1); 43 | max_val.copy_(input_temp); 44 | 45 | max_ind.fill_(width - 1); 46 | 47 | auto output_temp = output.select(3, width - 1); 48 | auto grad_output_temp = grad_output.select(3, width - 1); 49 | output_temp.copy_(grad_output_temp); 50 | 51 | auto un_max_ind = max_ind.unsqueeze(3); 52 | auto gt_mask = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kByte)); 53 | auto max_temp = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); 54 | for (int32_t ind = 1; ind < width; ++ind) { 55 | input_temp = input.select(3, width - ind - 1); 56 | at::gt_out(gt_mask, input_temp, max_val); 57 | 58 | at::masked_select_out(max_temp, input_temp, gt_mask); 59 | max_val.masked_scatter_(gt_mask, max_temp); 60 | max_ind.masked_fill_(gt_mask, width - ind - 1); 61 | 62 | grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3); 63 | output.scatter_add_(3, un_max_ind, grad_output_temp); 64 | } 65 | 66 | return { 67 | output 68 | }; 69 | } 70 | 71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 72 | m.def( 73 | "forward", &pool_forward, "Left Pool Forward", 74 | py::call_guard() 75 | ); 76 | m.def( 77 | "backward", &pool_backward, "Left Pool Backward", 78 | py::call_guard() 79 | ); 80 | } 81 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/py_utils/_cpools/src/right_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get width 12 | int64_t width = input.size(3); 13 | 14 | output.copy_(input); 15 | 16 | for (int64_t ind = 1; ind < width; ind <<= 1) { 17 | at::Tensor max_temp = at::slice(output, 3, ind, width); 18 | at::Tensor cur_temp = at::slice(output, 3, ind, width); 19 | at::Tensor next_temp = at::slice(output, 3, 0, width-ind); 20 | at::max_out(max_temp, cur_temp, next_temp); 21 | } 22 | 23 | return { 24 | output 25 | }; 26 | } 27 | 28 | std::vector pool_backward( 29 | at::Tensor input, 30 | at::Tensor grad_output 31 | ) { 32 | at::Tensor output = at::zeros_like(input); 33 | 34 | int32_t batch = input.size(0); 35 | int32_t channel = input.size(1); 36 | int32_t height = input.size(2); 37 | int32_t width = input.size(3); 38 | 39 | auto max_val = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); 40 | auto max_ind = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kLong)); 41 | 42 | auto input_temp = input.select(3, 0); 43 | max_val.copy_(input_temp); 44 | 45 | max_ind.fill_(0); 46 | 47 | auto output_temp = output.select(3, 0); 48 | auto grad_output_temp = grad_output.select(3, 0); 49 | output_temp.copy_(grad_output_temp); 50 | 51 | auto un_max_ind = max_ind.unsqueeze(3); 52 | auto gt_mask = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kByte)); 53 | auto max_temp = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); 54 | for (int32_t ind = 0; ind < width - 1; ++ind) { 55 | input_temp = input.select(3, ind + 1); 56 | at::gt_out(gt_mask, input_temp, max_val); 57 | 58 | at::masked_select_out(max_temp, input_temp, gt_mask); 59 | max_val.masked_scatter_(gt_mask, max_temp); 60 | max_ind.masked_fill_(gt_mask, ind + 1); 61 | 62 | grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3); 63 | output.scatter_add_(3, un_max_ind, grad_output_temp); 64 | } 65 | 66 | return { 67 | output 68 | }; 69 | } 70 | 71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 72 | m.def( 73 | "forward", &pool_forward, "Right Pool Forward", 74 | py::call_guard() 75 | ); 76 | m.def( 77 | "backward", &pool_backward, "Right Pool Backward", 78 | py::call_guard() 79 | ); 80 | } 81 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/py_utils/_cpools/src/top_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector top_pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get height 12 | int64_t height = input.size(2); 13 | 14 | output.copy_(input); 15 | 16 | for (int64_t ind = 1; ind < height; ind <<= 1) { 17 | at::Tensor max_temp = at::slice(output, 2, 0, height-ind); 18 | at::Tensor cur_temp = at::slice(output, 2, 0, height-ind); 19 | at::Tensor next_temp = at::slice(output, 2, ind, height); 20 | at::max_out(max_temp, cur_temp, next_temp); 21 | } 22 | 23 | return { 24 | output 25 | }; 26 | } 27 | 28 | std::vector top_pool_backward( 29 | at::Tensor input, 30 | at::Tensor grad_output 31 | ) { 32 | auto output = at::zeros_like(input); 33 | 34 | int32_t batch = input.size(0); 35 | int32_t channel = input.size(1); 36 | int32_t height = input.size(2); 37 | int32_t width = input.size(3); 38 | 39 | auto max_val = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); 40 | auto max_ind = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kLong)); 41 | 42 | auto input_temp = input.select(2, height - 1); 43 | max_val.copy_(input_temp); 44 | 45 | max_ind.fill_(height - 1); 46 | 47 | auto output_temp = output.select(2, height - 1); 48 | auto grad_output_temp = grad_output.select(2, height - 1); 49 | output_temp.copy_(grad_output_temp); 50 | 51 | auto un_max_ind = max_ind.unsqueeze(2); 52 | auto gt_mask = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kByte)); 53 | auto max_temp = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); 54 | for (int32_t ind = 1; ind < height; ++ind) { 55 | input_temp = input.select(2, height - ind - 1); 56 | at::gt_out(gt_mask, input_temp, max_val); 57 | 58 | at::masked_select_out(max_temp, input_temp, gt_mask); 59 | max_val.masked_scatter_(gt_mask, max_temp); 60 | max_ind.masked_fill_(gt_mask, height - ind - 1); 61 | 62 | grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2); 63 | output.scatter_add_(2, un_max_ind, grad_output_temp); 64 | } 65 | 66 | return { 67 | output 68 | }; 69 | } 70 | 71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 72 | m.def( 73 | "forward", &top_pool_forward, "Top Pool Forward", 74 | py::call_guard() 75 | ); 76 | m.def( 77 | "backward", &top_pool_backward, "Top Pool Backward", 78 | py::call_guard() 79 | ); 80 | } 81 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/py_utils/data_parallel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules import Module 3 | from torch.nn.parallel.scatter_gather import gather 4 | from torch.nn.parallel.replicate import replicate 5 | from torch.nn.parallel.parallel_apply import parallel_apply 6 | 7 | from .scatter_gather import scatter_kwargs 8 | 9 | class DataParallel(Module): 10 | r"""Implements data parallelism at the module level. 11 | 12 | This container parallelizes the application of the given module by 13 | splitting the input across the specified devices by chunking in the batch 14 | dimension. In the forward pass, the module is replicated on each device, 15 | and each replica handles a portion of the input. During the backwards 16 | pass, gradients from each replica are summed into the original module. 17 | 18 | The batch size should be larger than the number of GPUs used. It should 19 | also be an integer multiple of the number of GPUs so that each chunk is the 20 | same size (so that each GPU processes the same number of samples). 21 | 22 | See also: :ref:`cuda-nn-dataparallel-instead` 23 | 24 | Arbitrary positional and keyword inputs are allowed to be passed into 25 | DataParallel EXCEPT Tensors. All variables will be scattered on dim 26 | specified (default 0). Primitive types will be broadcasted, but all 27 | other types will be a shallow copy and can be corrupted if written to in 28 | the model's forward pass. 29 | 30 | Args: 31 | module: module to be parallelized 32 | device_ids: CUDA devices (default: all devices) 33 | output_device: device location of output (default: device_ids[0]) 34 | 35 | Example:: 36 | 37 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) 38 | >>> output = net(input_var) 39 | """ 40 | 41 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well 42 | 43 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 44 | super(DataParallel, self).__init__() 45 | 46 | if not torch.cuda.is_available(): 47 | self.module = module 48 | self.device_ids = [] 49 | return 50 | 51 | if device_ids is None: 52 | device_ids = list(range(torch.cuda.device_count())) 53 | if output_device is None: 54 | output_device = device_ids[0] 55 | self.dim = dim 56 | self.module = module 57 | self.device_ids = device_ids 58 | self.chunk_sizes = chunk_sizes 59 | self.output_device = output_device 60 | if len(self.device_ids) == 1: 61 | self.module.cuda(device_ids[0]) 62 | 63 | def forward(self, *inputs, **kwargs): 64 | if not self.device_ids: 65 | return self.module(*inputs, **kwargs) 66 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) 67 | if len(self.device_ids) == 1: 68 | return self.module(*inputs[0], **kwargs[0]) 69 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 70 | outputs = self.parallel_apply(replicas, inputs, kwargs) 71 | return self.gather(outputs, self.output_device) 72 | 73 | def replicate(self, module, device_ids): 74 | return replicate(module, device_ids) 75 | 76 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes): 77 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes) 78 | 79 | def parallel_apply(self, replicas, inputs, kwargs): 80 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) 81 | 82 | def gather(self, outputs, output_device): 83 | return gather(outputs, output_device, dim=self.dim) 84 | 85 | 86 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): 87 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids. 88 | 89 | This is the functional version of the DataParallel module. 90 | 91 | Args: 92 | module: the module to evaluate in parallel 93 | inputs: inputs to the module 94 | device_ids: GPU ids on which to replicate module 95 | output_device: GPU location of the output Use -1 to indicate the CPU. 96 | (default: device_ids[0]) 97 | Returns: 98 | a Variable containing the result of module(input) located on 99 | output_device 100 | """ 101 | if not isinstance(inputs, tuple): 102 | inputs = (inputs,) 103 | 104 | if device_ids is None: 105 | device_ids = list(range(torch.cuda.device_count())) 106 | 107 | if output_device is None: 108 | output_device = device_ids[0] 109 | 110 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) 111 | if len(device_ids) == 1: 112 | return module(*inputs[0], **module_kwargs[0]) 113 | used_device_ids = device_ids[:len(inputs)] 114 | replicas = replicate(module, used_device_ids) 115 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) 116 | return gather(outputs, output_device, dim) 117 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/py_utils/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .utils import _tranpose_and_gather_feat 5 | 6 | def _sigmoid(x): 7 | return torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 8 | 9 | def _ae_loss(tag0, tag1, mask): 10 | num = mask.sum(dim=1, keepdim=True).float() 11 | tag0 = tag0.squeeze() 12 | tag1 = tag1.squeeze() 13 | 14 | tag_mean = (tag0 + tag1) / 2 15 | 16 | tag0 = torch.pow(tag0 - tag_mean, 2) / (num + 1e-4) 17 | tag0 = tag0[mask].sum() 18 | tag1 = torch.pow(tag1 - tag_mean, 2) / (num + 1e-4) 19 | tag1 = tag1[mask].sum() 20 | pull = tag0 + tag1 21 | 22 | mask = mask.unsqueeze(1) + mask.unsqueeze(2) 23 | mask = mask.eq(2) 24 | num = num.unsqueeze(2) 25 | num2 = (num - 1) * num 26 | dist = tag_mean.unsqueeze(1) - tag_mean.unsqueeze(2) 27 | dist = 1 - torch.abs(dist) 28 | dist = nn.functional.relu(dist, inplace=True) 29 | dist = dist - 1 / (num + 1e-4) 30 | dist = dist / (num2 + 1e-4) 31 | dist = dist[mask] 32 | push = dist.sum() 33 | return pull, push 34 | 35 | def _off_loss(off, gt_off, mask): 36 | num = mask.float().sum() 37 | mask = mask.unsqueeze(2).expand_as(gt_off) 38 | 39 | off = off[mask] 40 | gt_off = gt_off[mask] 41 | 42 | off_loss = nn.functional.smooth_l1_loss(off, gt_off, reduction="sum") 43 | off_loss = off_loss / (num + 1e-4) 44 | return off_loss 45 | 46 | def _focal_loss_mask(preds, gt, mask): 47 | pos_inds = gt.eq(1) 48 | neg_inds = gt.lt(1) 49 | 50 | neg_weights = torch.pow(1 - gt[neg_inds], 4) 51 | 52 | pos_mask = mask[pos_inds] 53 | neg_mask = mask[neg_inds] 54 | 55 | loss = 0 56 | for pred in preds: 57 | pos_pred = pred[pos_inds] 58 | neg_pred = pred[neg_inds] 59 | 60 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) * pos_mask 61 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights * neg_mask 62 | 63 | num_pos = pos_inds.float().sum() 64 | pos_loss = pos_loss.sum() 65 | neg_loss = neg_loss.sum() 66 | 67 | if pos_pred.nelement() == 0: 68 | loss = loss - neg_loss 69 | else: 70 | loss = loss - (pos_loss + neg_loss) / num_pos 71 | return loss 72 | 73 | def _focal_loss(preds, gt): 74 | pos_inds = gt.eq(1) 75 | neg_inds = gt.lt(1) 76 | 77 | neg_weights = torch.pow(1 - gt[neg_inds], 4) 78 | 79 | loss = 0 80 | for pred in preds: 81 | pos_pred = pred[pos_inds] 82 | neg_pred = pred[neg_inds] 83 | 84 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) 85 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights 86 | 87 | num_pos = pos_inds.float().sum() 88 | pos_loss = pos_loss.sum() 89 | neg_loss = neg_loss.sum() 90 | 91 | if pos_pred.nelement() == 0: 92 | loss = loss - neg_loss 93 | else: 94 | loss = loss - (pos_loss + neg_loss) / num_pos 95 | return loss 96 | 97 | class CornerNet_Saccade_Loss(nn.Module): 98 | def __init__(self, pull_weight=1, push_weight=1, off_weight=1, focal_loss=_focal_loss_mask): 99 | super(CornerNet_Saccade_Loss, self).__init__() 100 | 101 | self.pull_weight = pull_weight 102 | self.push_weight = push_weight 103 | self.off_weight = off_weight 104 | self.focal_loss = focal_loss 105 | self.ae_loss = _ae_loss 106 | self.off_loss = _off_loss 107 | 108 | def forward(self, outs, targets): 109 | tl_heats = outs[0] 110 | br_heats = outs[1] 111 | tl_tags = outs[2] 112 | br_tags = outs[3] 113 | tl_offs = outs[4] 114 | br_offs = outs[5] 115 | atts = outs[6] 116 | 117 | gt_tl_heat = targets[0] 118 | gt_br_heat = targets[1] 119 | gt_mask = targets[2] 120 | gt_tl_off = targets[3] 121 | gt_br_off = targets[4] 122 | gt_tl_ind = targets[5] 123 | gt_br_ind = targets[6] 124 | gt_tl_valid = targets[7] 125 | gt_br_valid = targets[8] 126 | gt_atts = targets[9] 127 | 128 | # focal loss 129 | focal_loss = 0 130 | 131 | tl_heats = [_sigmoid(t) for t in tl_heats] 132 | br_heats = [_sigmoid(b) for b in br_heats] 133 | 134 | focal_loss += self.focal_loss(tl_heats, gt_tl_heat, gt_tl_valid) 135 | focal_loss += self.focal_loss(br_heats, gt_br_heat, gt_br_valid) 136 | 137 | atts = [[_sigmoid(a) for a in att] for att in atts] 138 | atts = [[att[ind] for att in atts] for ind in range(len(gt_atts))] 139 | 140 | att_loss = 0 141 | for att, gt_att in zip(atts, gt_atts): 142 | att_loss += _focal_loss(att, gt_att) / max(len(att), 1) 143 | 144 | # tag loss 145 | pull_loss = 0 146 | push_loss = 0 147 | tl_tags = [_tranpose_and_gather_feat(tl_tag, gt_tl_ind) for tl_tag in tl_tags] 148 | br_tags = [_tranpose_and_gather_feat(br_tag, gt_br_ind) for br_tag in br_tags] 149 | for tl_tag, br_tag in zip(tl_tags, br_tags): 150 | pull, push = self.ae_loss(tl_tag, br_tag, gt_mask) 151 | pull_loss += pull 152 | push_loss += push 153 | pull_loss = self.pull_weight * pull_loss 154 | push_loss = self.push_weight * push_loss 155 | 156 | off_loss = 0 157 | tl_offs = [_tranpose_and_gather_feat(tl_off, gt_tl_ind) for tl_off in tl_offs] 158 | br_offs = [_tranpose_and_gather_feat(br_off, gt_br_ind) for br_off in br_offs] 159 | for tl_off, br_off in zip(tl_offs, br_offs): 160 | off_loss += self.off_loss(tl_off, gt_tl_off, gt_mask) 161 | off_loss += self.off_loss(br_off, gt_br_off, gt_mask) 162 | off_loss = self.off_weight * off_loss 163 | 164 | loss = (focal_loss + att_loss + pull_loss + push_loss + off_loss) / max(len(tl_heats), 1) 165 | return loss.unsqueeze(0) 166 | 167 | class CornerNet_Loss(nn.Module): 168 | def __init__(self, pull_weight=1, push_weight=1, off_weight=1, focal_loss=_focal_loss): 169 | super(CornerNet_Loss, self).__init__() 170 | 171 | self.pull_weight = pull_weight 172 | self.push_weight = push_weight 173 | self.off_weight = off_weight 174 | self.focal_loss = focal_loss 175 | self.ae_loss = _ae_loss 176 | self.off_loss = _off_loss 177 | 178 | def forward(self, outs, targets): 179 | tl_heats = outs[0] 180 | br_heats = outs[1] 181 | tl_tags = outs[2] 182 | br_tags = outs[3] 183 | tl_offs = outs[4] 184 | br_offs = outs[5] 185 | 186 | gt_tl_heat = targets[0] 187 | gt_br_heat = targets[1] 188 | gt_mask = targets[2] 189 | gt_tl_off = targets[3] 190 | gt_br_off = targets[4] 191 | gt_tl_ind = targets[5] 192 | gt_br_ind = targets[6] 193 | 194 | # focal loss 195 | focal_loss = 0 196 | 197 | tl_heats = [_sigmoid(t) for t in tl_heats] 198 | br_heats = [_sigmoid(b) for b in br_heats] 199 | 200 | focal_loss += self.focal_loss(tl_heats, gt_tl_heat) 201 | focal_loss += self.focal_loss(br_heats, gt_br_heat) 202 | 203 | # tag loss 204 | pull_loss = 0 205 | push_loss = 0 206 | tl_tags = [_tranpose_and_gather_feat(tl_tag, gt_tl_ind) for tl_tag in tl_tags] 207 | br_tags = [_tranpose_and_gather_feat(br_tag, gt_br_ind) for br_tag in br_tags] 208 | for tl_tag, br_tag in zip(tl_tags, br_tags): 209 | pull, push = self.ae_loss(tl_tag, br_tag, gt_mask) 210 | pull_loss += pull 211 | push_loss += push 212 | pull_loss = self.pull_weight * pull_loss 213 | push_loss = self.push_weight * push_loss 214 | 215 | off_loss = 0 216 | tl_offs = [_tranpose_and_gather_feat(tl_off, gt_tl_ind) for tl_off in tl_offs] 217 | br_offs = [_tranpose_and_gather_feat(br_off, gt_br_ind) for br_off in br_offs] 218 | for tl_off, br_off in zip(tl_offs, br_offs): 219 | off_loss += self.off_loss(tl_off, gt_tl_off, gt_mask) 220 | off_loss += self.off_loss(br_off, gt_br_off, gt_mask) 221 | off_loss = self.off_weight * off_loss 222 | 223 | loss = (focal_loss + pull_loss + push_loss + off_loss) / max(len(tl_heats), 1) 224 | return loss.unsqueeze(0) 225 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/py_utils/scatter_gather.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.nn.parallel._functions import Scatter, Gather 4 | 5 | 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None): 7 | r""" 8 | Slices variables into approximately equal chunks and 9 | distributes them across given GPUs. Duplicates 10 | references to objects that are not variables. Does not 11 | support Tensors. 12 | """ 13 | def scatter_map(obj): 14 | if isinstance(obj, Variable): 15 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj) 16 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 17 | if isinstance(obj, tuple): 18 | return list(zip(*map(scatter_map, obj))) 19 | if isinstance(obj, list): 20 | return list(map(list, zip(*map(scatter_map, obj)))) 21 | if isinstance(obj, dict): 22 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 23 | return [obj for targets in target_gpus] 24 | 25 | return scatter_map(inputs) 26 | 27 | 28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None): 29 | r"""Scatter with support for kwargs dictionary""" 30 | inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else [] 31 | kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else [] 32 | if len(inputs) < len(kwargs): 33 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 34 | elif len(kwargs) < len(inputs): 35 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 36 | inputs = tuple(inputs) 37 | kwargs = tuple(kwargs) 38 | return inputs, kwargs 39 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/models/py_utils/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def _gather_feat(feat, ind, mask=None): 5 | dim = feat.size(2) 6 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 7 | feat = feat.gather(1, ind) 8 | if mask is not None: 9 | mask = mask.unsqueeze(2).expand_as(feat) 10 | feat = feat[mask] 11 | feat = feat.view(-1, dim) 12 | return feat 13 | 14 | def _nms(heat, kernel=1): 15 | pad = (kernel - 1) // 2 16 | 17 | hmax = nn.functional.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad) 18 | keep = (hmax == heat).float() 19 | return heat * keep 20 | 21 | def _tranpose_and_gather_feat(feat, ind): 22 | feat = feat.permute(0, 2, 3, 1).contiguous() 23 | feat = feat.view(feat.size(0), -1, feat.size(3)) 24 | feat = _gather_feat(feat, ind) 25 | return feat 26 | 27 | def _topk(scores, K=20): 28 | batch, cat, height, width = scores.size() 29 | 30 | topk_scores, topk_inds = torch.topk(scores.view(batch, -1), K) 31 | 32 | topk_clses = (topk_inds / (height * width)).int() 33 | 34 | topk_inds = topk_inds % (height * width) 35 | topk_ys = (topk_inds / width).int().float() 36 | topk_xs = (topk_inds % width).int().float() 37 | return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs 38 | 39 | def _decode( 40 | tl_heat, br_heat, tl_tag, br_tag, tl_regr, br_regr, 41 | K=100, kernel=1, ae_threshold=1, num_dets=1000, no_border=False 42 | ): 43 | batch, cat, height, width = tl_heat.size() 44 | 45 | tl_heat = torch.sigmoid(tl_heat) 46 | br_heat = torch.sigmoid(br_heat) 47 | 48 | # perform nms on heatmaps 49 | tl_heat = _nms(tl_heat, kernel=kernel) 50 | br_heat = _nms(br_heat, kernel=kernel) 51 | 52 | tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = _topk(tl_heat, K=K) 53 | br_scores, br_inds, br_clses, br_ys, br_xs = _topk(br_heat, K=K) 54 | 55 | tl_ys = tl_ys.view(batch, K, 1).expand(batch, K, K) 56 | tl_xs = tl_xs.view(batch, K, 1).expand(batch, K, K) 57 | br_ys = br_ys.view(batch, 1, K).expand(batch, K, K) 58 | br_xs = br_xs.view(batch, 1, K).expand(batch, K, K) 59 | 60 | if no_border: 61 | tl_ys_binds = (tl_ys == 0) 62 | tl_xs_binds = (tl_xs == 0) 63 | br_ys_binds = (br_ys == height - 1) 64 | br_xs_binds = (br_xs == width - 1) 65 | 66 | if tl_regr is not None and br_regr is not None: 67 | tl_regr = _tranpose_and_gather_feat(tl_regr, tl_inds) 68 | tl_regr = tl_regr.view(batch, K, 1, 2) 69 | br_regr = _tranpose_and_gather_feat(br_regr, br_inds) 70 | br_regr = br_regr.view(batch, 1, K, 2) 71 | 72 | tl_xs = tl_xs + tl_regr[..., 0] 73 | tl_ys = tl_ys + tl_regr[..., 1] 74 | br_xs = br_xs + br_regr[..., 0] 75 | br_ys = br_ys + br_regr[..., 1] 76 | 77 | # all possible boxes based on top k corners (ignoring class) 78 | bboxes = torch.stack((tl_xs, tl_ys, br_xs, br_ys), dim=3) 79 | 80 | tl_tag = _tranpose_and_gather_feat(tl_tag, tl_inds) 81 | tl_tag = tl_tag.view(batch, K, 1) 82 | br_tag = _tranpose_and_gather_feat(br_tag, br_inds) 83 | br_tag = br_tag.view(batch, 1, K) 84 | dists = torch.abs(tl_tag - br_tag) 85 | 86 | tl_scores = tl_scores.view(batch, K, 1).expand(batch, K, K) 87 | br_scores = br_scores.view(batch, 1, K).expand(batch, K, K) 88 | scores = (tl_scores + br_scores) / 2 89 | 90 | # reject boxes based on classes 91 | tl_clses = tl_clses.view(batch, K, 1).expand(batch, K, K) 92 | br_clses = br_clses.view(batch, 1, K).expand(batch, K, K) 93 | cls_inds = (tl_clses != br_clses) 94 | 95 | # reject boxes based on distances 96 | dist_inds = (dists > ae_threshold) 97 | 98 | # reject boxes based on widths and heights 99 | width_inds = (br_xs < tl_xs) 100 | height_inds = (br_ys < tl_ys) 101 | 102 | if no_border: 103 | scores[tl_ys_binds] = -1 104 | scores[tl_xs_binds] = -1 105 | scores[br_ys_binds] = -1 106 | scores[br_xs_binds] = -1 107 | 108 | scores[cls_inds] = -1 109 | scores[dist_inds] = -1 110 | scores[width_inds] = -1 111 | scores[height_inds] = -1 112 | 113 | scores = scores.view(batch, -1) 114 | scores, inds = torch.topk(scores, num_dets) 115 | scores = scores.unsqueeze(2) 116 | 117 | bboxes = bboxes.view(batch, -1, 4) 118 | bboxes = _gather_feat(bboxes, inds) 119 | 120 | clses = tl_clses.contiguous().view(batch, -1, 1) 121 | clses = _gather_feat(clses, inds).float() 122 | 123 | tl_scores = tl_scores.contiguous().view(batch, -1, 1) 124 | tl_scores = _gather_feat(tl_scores, inds).float() 125 | br_scores = br_scores.contiguous().view(batch, -1, 1) 126 | br_scores = _gather_feat(br_scores, inds).float() 127 | 128 | detections = torch.cat([bboxes, scores, tl_scores, br_scores, clses], dim=2) 129 | return detections 130 | 131 | class upsample(nn.Module): 132 | def __init__(self, scale_factor): 133 | super(upsample, self).__init__() 134 | self.scale_factor = scale_factor 135 | 136 | def forward(self, x): 137 | return nn.functional.interpolate(x, scale_factor=self.scale_factor) 138 | 139 | class merge(nn.Module): 140 | def forward(self, x, y): 141 | return x + y 142 | 143 | class convolution(nn.Module): 144 | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True): 145 | super(convolution, self).__init__() 146 | 147 | pad = (k - 1) // 2 148 | self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn) 149 | self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential() 150 | self.relu = nn.ReLU(inplace=True) 151 | 152 | def forward(self, x): 153 | conv = self.conv(x) 154 | bn = self.bn(conv) 155 | relu = self.relu(bn) 156 | return relu 157 | 158 | class residual(nn.Module): 159 | def __init__(self, inp_dim, out_dim, k=3, stride=1): 160 | super(residual, self).__init__() 161 | p = (k - 1) // 2 162 | 163 | self.conv1 = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(p, p), stride=(stride, stride), bias=False) 164 | self.bn1 = nn.BatchNorm2d(out_dim) 165 | self.relu1 = nn.ReLU(inplace=True) 166 | 167 | self.conv2 = nn.Conv2d(out_dim, out_dim, (k, k), padding=(p, p), bias=False) 168 | self.bn2 = nn.BatchNorm2d(out_dim) 169 | 170 | self.skip = nn.Sequential( 171 | nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False), 172 | nn.BatchNorm2d(out_dim) 173 | ) if stride != 1 or inp_dim != out_dim else nn.Sequential() 174 | self.relu = nn.ReLU(inplace=True) 175 | 176 | def forward(self, x): 177 | conv1 = self.conv1(x) 178 | bn1 = self.bn1(conv1) 179 | relu1 = self.relu1(bn1) 180 | 181 | conv2 = self.conv2(relu1) 182 | bn2 = self.bn2(conv2) 183 | 184 | skip = self.skip(x) 185 | return self.relu(bn2 + skip) 186 | 187 | class corner_pool(nn.Module): 188 | def __init__(self, dim, pool1, pool2): 189 | super(corner_pool, self).__init__() 190 | self._init_layers(dim, pool1, pool2) 191 | 192 | def _init_layers(self, dim, pool1, pool2): 193 | self.p1_conv1 = convolution(3, dim, 128) 194 | self.p2_conv1 = convolution(3, dim, 128) 195 | 196 | self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False) 197 | self.p_bn1 = nn.BatchNorm2d(dim) 198 | 199 | self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False) 200 | self.bn1 = nn.BatchNorm2d(dim) 201 | self.relu1 = nn.ReLU(inplace=True) 202 | 203 | self.conv2 = convolution(3, dim, dim) 204 | 205 | self.pool1 = pool1() 206 | self.pool2 = pool2() 207 | 208 | def forward(self, x): 209 | # pool 1 210 | p1_conv1 = self.p1_conv1(x) 211 | pool1 = self.pool1(p1_conv1) 212 | 213 | # pool 2 214 | p2_conv1 = self.p2_conv1(x) 215 | pool2 = self.pool2(p2_conv1) 216 | 217 | # pool 1 + pool 2 218 | p_conv1 = self.p_conv1(pool1 + pool2) 219 | p_bn1 = self.p_bn1(p_conv1) 220 | 221 | conv1 = self.conv1(x) 222 | bn1 = self.bn1(conv1) 223 | relu1 = self.relu1(p_bn1 + bn1) 224 | 225 | conv2 = self.conv2(relu1) 226 | return conv2 227 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/nnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/corner_net_lite/core/nnet/__init__.py -------------------------------------------------------------------------------- /perception/corner_net_lite/core/nnet/py_factory.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import pickle 4 | import importlib 5 | import torch.nn as nn 6 | 7 | from ..models.py_utils.data_parallel import DataParallel 8 | 9 | torch.manual_seed(317) 10 | 11 | class Network(nn.Module): 12 | def __init__(self, model, loss): 13 | super(Network, self).__init__() 14 | 15 | self.model = model 16 | self.loss = loss 17 | 18 | def forward(self, xs, ys, **kwargs): 19 | preds = self.model(*xs, **kwargs) 20 | loss = self.loss(preds, ys, **kwargs) 21 | return loss 22 | 23 | # for model backward compatibility 24 | # previously model was wrapped by DataParallel module 25 | class DummyModule(nn.Module): 26 | def __init__(self, model): 27 | super(DummyModule, self).__init__() 28 | self.module = model 29 | 30 | def forward(self, *xs, **kwargs): 31 | return self.module(*xs, **kwargs) 32 | 33 | class NetworkFactory(object): 34 | def __init__(self, system_config, model, distributed=False, gpu=None): 35 | super(NetworkFactory, self).__init__() 36 | 37 | self.system_config = system_config 38 | 39 | self.gpu = gpu 40 | self.model = DummyModule(model) 41 | self.loss = model.loss 42 | self.network = Network(self.model, self.loss) 43 | 44 | if distributed: 45 | from apex.parallel import DistributedDataParallel, convert_syncbn_model 46 | torch.cuda.set_device(gpu) 47 | self.network = self.network.cuda(gpu) 48 | self.network = convert_syncbn_model(self.network) 49 | self.network = DistributedDataParallel(self.network) 50 | else: 51 | self.network = DataParallel(self.network, chunk_sizes=system_config.chunk_sizes) 52 | 53 | total_params = 0 54 | for params in self.model.parameters(): 55 | num_params = 1 56 | for x in params.size(): 57 | num_params *= x 58 | total_params += num_params 59 | print("total parameters: {}".format(total_params)) 60 | 61 | if system_config.opt_algo == "adam": 62 | self.optimizer = torch.optim.Adam( 63 | filter(lambda p: p.requires_grad, self.model.parameters()) 64 | ) 65 | elif system_config.opt_algo == "sgd": 66 | self.optimizer = torch.optim.SGD( 67 | filter(lambda p: p.requires_grad, self.model.parameters()), 68 | lr=system_config.learning_rate, 69 | momentum=0.9, weight_decay=0.0001 70 | ) 71 | else: 72 | raise ValueError("unknown optimizer") 73 | 74 | def cuda(self): 75 | self.model.cuda() 76 | 77 | def train_mode(self): 78 | self.network.train() 79 | 80 | def eval_mode(self): 81 | self.network.eval() 82 | 83 | def _t_cuda(self, xs): 84 | if type(xs) is list: 85 | return [x.cuda(self.gpu, non_blocking=True) for x in xs] 86 | return xs.cuda(self.gpu, non_blocking=True) 87 | 88 | def train(self, xs, ys, **kwargs): 89 | xs = [self._t_cuda(x) for x in xs] 90 | ys = [self._t_cuda(y) for y in ys] 91 | 92 | self.optimizer.zero_grad() 93 | loss = self.network(xs, ys) 94 | loss = loss.mean() 95 | loss.backward() 96 | self.optimizer.step() 97 | 98 | return loss 99 | 100 | def validate(self, xs, ys, **kwargs): 101 | with torch.no_grad(): 102 | xs = [self._t_cuda(x) for x in xs] 103 | ys = [self._t_cuda(y) for y in ys] 104 | 105 | loss = self.network(xs, ys) 106 | loss = loss.mean() 107 | return loss 108 | 109 | def test(self, xs, **kwargs): 110 | with torch.no_grad(): 111 | xs = [self._t_cuda(x) for x in xs] 112 | return self.model(*xs, **kwargs) 113 | 114 | def set_lr(self, lr): 115 | print("setting learning rate to: {}".format(lr)) 116 | for param_group in self.optimizer.param_groups: 117 | param_group["lr"] = lr 118 | 119 | def load_pretrained_params(self, pretrained_model): 120 | print("loading from {}".format(pretrained_model)) 121 | with open(pretrained_model, "rb") as f: 122 | params = torch.load(f) 123 | self.model.load_state_dict(params) 124 | 125 | def load_params(self, iteration): 126 | cache_file = self.system_config.snapshot_file.format(iteration) 127 | print("loading model from {}".format(cache_file)) 128 | with open(cache_file, "rb") as f: 129 | params = torch.load(f) 130 | self.model.load_state_dict(params) 131 | 132 | def save_params(self, iteration): 133 | cache_file = self.system_config.snapshot_file.format(iteration) 134 | print("saving model to {}".format(cache_file)) 135 | with open(cache_file, "wb") as f: 136 | params = self.model.state_dict() 137 | torch.save(params, f) 138 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/paths.py: -------------------------------------------------------------------------------- 1 | import pkg_resources 2 | 3 | _package_name = __name__ 4 | 5 | def get_file_path(*paths): 6 | path = "/".join(paths) 7 | return pkg_resources.resource_filename(_package_name, path) 8 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/sample/__init__.py: -------------------------------------------------------------------------------- 1 | from .cornernet import cornernet 2 | from .cornernet_saccade import cornernet_saccade 3 | 4 | def data_sampling_func(sys_configs, db, k_ind, data_aug=True, debug=False): 5 | return globals()[sys_configs.sampling_function](sys_configs, db, k_ind, data_aug, debug) 6 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/sample/cornernet.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import math 3 | import numpy as np 4 | import torch 5 | 6 | from .utils import random_crop, draw_gaussian, gaussian_radius, normalize_, color_jittering_, lighting_ 7 | 8 | def _resize_image(image, detections, size): 9 | detections = detections.copy() 10 | height, width = image.shape[0:2] 11 | new_height, new_width = size 12 | 13 | image = cv2.resize(image, (new_width, new_height)) 14 | 15 | height_ratio = new_height / height 16 | width_ratio = new_width / width 17 | detections[:, 0:4:2] *= width_ratio 18 | detections[:, 1:4:2] *= height_ratio 19 | return image, detections 20 | 21 | def _clip_detections(image, detections): 22 | detections = detections.copy() 23 | height, width = image.shape[0:2] 24 | 25 | detections[:, 0:4:2] = np.clip(detections[:, 0:4:2], 0, width - 1) 26 | detections[:, 1:4:2] = np.clip(detections[:, 1:4:2], 0, height - 1) 27 | keep_inds = ((detections[:, 2] - detections[:, 0]) > 0) & \ 28 | ((detections[:, 3] - detections[:, 1]) > 0) 29 | detections = detections[keep_inds] 30 | return detections 31 | 32 | def cornernet(system_configs, db, k_ind, data_aug, debug): 33 | data_rng = system_configs.data_rng 34 | batch_size = system_configs.batch_size 35 | 36 | categories = db.configs["categories"] 37 | input_size = db.configs["input_size"] 38 | output_size = db.configs["output_sizes"][0] 39 | 40 | border = db.configs["border"] 41 | lighting = db.configs["lighting"] 42 | rand_crop = db.configs["rand_crop"] 43 | rand_color = db.configs["rand_color"] 44 | rand_scales = db.configs["rand_scales"] 45 | gaussian_bump = db.configs["gaussian_bump"] 46 | gaussian_iou = db.configs["gaussian_iou"] 47 | gaussian_rad = db.configs["gaussian_radius"] 48 | 49 | max_tag_len = 128 50 | 51 | # allocating memory 52 | images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) 53 | tl_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) 54 | br_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) 55 | tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) 56 | br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) 57 | tl_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) 58 | br_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) 59 | tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8) 60 | tag_lens = np.zeros((batch_size, ), dtype=np.int32) 61 | 62 | db_size = db.db_inds.size 63 | for b_ind in range(batch_size): 64 | if not debug and k_ind == 0: 65 | db.shuffle_inds() 66 | 67 | db_ind = db.db_inds[k_ind] 68 | k_ind = (k_ind + 1) % db_size 69 | 70 | # reading image 71 | image_path = db.image_path(db_ind) 72 | image = cv2.imread(image_path) 73 | 74 | # reading detections 75 | detections = db.detections(db_ind) 76 | 77 | # cropping an image randomly 78 | if not debug and rand_crop: 79 | image, detections = random_crop(image, detections, rand_scales, input_size, border=border) 80 | 81 | image, detections = _resize_image(image, detections, input_size) 82 | detections = _clip_detections(image, detections) 83 | 84 | width_ratio = output_size[1] / input_size[1] 85 | height_ratio = output_size[0] / input_size[0] 86 | 87 | # flipping an image randomly 88 | if not debug and np.random.uniform() > 0.5: 89 | image[:] = image[:, ::-1, :] 90 | width = image.shape[1] 91 | detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1 92 | 93 | if not debug: 94 | image = image.astype(np.float32) / 255. 95 | if rand_color: 96 | color_jittering_(data_rng, image) 97 | if lighting: 98 | lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) 99 | normalize_(image, db.mean, db.std) 100 | images[b_ind] = image.transpose((2, 0, 1)) 101 | 102 | for ind, detection in enumerate(detections): 103 | category = int(detection[-1]) - 1 104 | 105 | xtl, ytl = detection[0], detection[1] 106 | xbr, ybr = detection[2], detection[3] 107 | 108 | fxtl = (xtl * width_ratio) 109 | fytl = (ytl * height_ratio) 110 | fxbr = (xbr * width_ratio) 111 | fybr = (ybr * height_ratio) 112 | 113 | xtl = int(fxtl) 114 | ytl = int(fytl) 115 | xbr = int(fxbr) 116 | ybr = int(fybr) 117 | 118 | if gaussian_bump: 119 | width = detection[2] - detection[0] 120 | height = detection[3] - detection[1] 121 | 122 | width = math.ceil(width * width_ratio) 123 | height = math.ceil(height * height_ratio) 124 | 125 | if gaussian_rad == -1: 126 | radius = gaussian_radius((height, width), gaussian_iou) 127 | radius = max(0, int(radius)) 128 | else: 129 | radius = gaussian_rad 130 | 131 | draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius) 132 | draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius) 133 | else: 134 | tl_heatmaps[b_ind, category, ytl, xtl] = 1 135 | br_heatmaps[b_ind, category, ybr, xbr] = 1 136 | 137 | tag_ind = tag_lens[b_ind] 138 | tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl] 139 | br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr] 140 | tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl 141 | br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr 142 | tag_lens[b_ind] += 1 143 | 144 | for b_ind in range(batch_size): 145 | tag_len = tag_lens[b_ind] 146 | tag_masks[b_ind, :tag_len] = 1 147 | 148 | images = torch.from_numpy(images) 149 | tl_heatmaps = torch.from_numpy(tl_heatmaps) 150 | br_heatmaps = torch.from_numpy(br_heatmaps) 151 | tl_regrs = torch.from_numpy(tl_regrs) 152 | br_regrs = torch.from_numpy(br_regrs) 153 | tl_tags = torch.from_numpy(tl_tags) 154 | br_tags = torch.from_numpy(br_tags) 155 | tag_masks = torch.from_numpy(tag_masks) 156 | 157 | return { 158 | "xs": [images], 159 | "ys": [tl_heatmaps, br_heatmaps, tag_masks, tl_regrs, br_regrs, tl_tags, br_tags] 160 | }, k_ind 161 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/sample/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import random 4 | 5 | def grayscale(image): 6 | return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 7 | 8 | def normalize_(image, mean, std): 9 | image -= mean 10 | image /= std 11 | 12 | def lighting_(data_rng, image, alphastd, eigval, eigvec): 13 | alpha = data_rng.normal(scale=alphastd, size=(3, )) 14 | image += np.dot(eigvec, eigval * alpha) 15 | 16 | def blend_(alpha, image1, image2): 17 | image1 *= alpha 18 | image2 *= (1 - alpha) 19 | image1 += image2 20 | 21 | def saturation_(data_rng, image, gs, gs_mean, var): 22 | alpha = 1. + data_rng.uniform(low=-var, high=var) 23 | blend_(alpha, image, gs[:, :, None]) 24 | 25 | def brightness_(data_rng, image, gs, gs_mean, var): 26 | alpha = 1. + data_rng.uniform(low=-var, high=var) 27 | image *= alpha 28 | 29 | def contrast_(data_rng, image, gs, gs_mean, var): 30 | alpha = 1. + data_rng.uniform(low=-var, high=var) 31 | blend_(alpha, image, gs_mean) 32 | 33 | def color_jittering_(data_rng, image): 34 | functions = [brightness_, contrast_, saturation_] 35 | random.shuffle(functions) 36 | 37 | gs = grayscale(image) 38 | gs_mean = gs.mean() 39 | for f in functions: 40 | f(data_rng, image, gs, gs_mean, 0.4) 41 | 42 | def gaussian2D(shape, sigma=1): 43 | m, n = [(ss - 1.) / 2. for ss in shape] 44 | y, x = np.ogrid[-m:m+1,-n:n+1] 45 | 46 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) 47 | h[h < np.finfo(h.dtype).eps * h.max()] = 0 48 | return h 49 | 50 | def draw_gaussian(heatmap, center, radius, k=1): 51 | diameter = 2 * radius + 1 52 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 53 | 54 | x, y = center 55 | 56 | height, width = heatmap.shape[0:2] 57 | 58 | left, right = min(x, radius), min(width - x, radius + 1) 59 | top, bottom = min(y, radius), min(height - y, radius + 1) 60 | 61 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 62 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] 63 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) 64 | 65 | def gaussian_radius(det_size, min_overlap): 66 | height, width = det_size 67 | 68 | a1 = 1 69 | b1 = (height + width) 70 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap) 71 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) 72 | r1 = (b1 - sq1) / (2 * a1) 73 | 74 | a2 = 4 75 | b2 = 2 * (height + width) 76 | c2 = (1 - min_overlap) * width * height 77 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) 78 | r2 = (b2 - sq2) / (2 * a2) 79 | 80 | a3 = 4 * min_overlap 81 | b3 = -2 * min_overlap * (height + width) 82 | c3 = (min_overlap - 1) * width * height 83 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) 84 | r3 = (b3 + sq3) / (2 * a3) 85 | return min(r1, r2, r3) 86 | 87 | def _get_border(border, size): 88 | i = 1 89 | while size - border // i <= border // i: 90 | i *= 2 91 | return border // i 92 | 93 | def random_crop(image, detections, random_scales, view_size, border=64): 94 | view_height, view_width = view_size 95 | image_height, image_width = image.shape[0:2] 96 | 97 | scale = np.random.choice(random_scales) 98 | height = int(view_height * scale) 99 | width = int(view_width * scale) 100 | 101 | cropped_image = np.zeros((height, width, 3), dtype=image.dtype) 102 | 103 | w_border = _get_border(border, image_width) 104 | h_border = _get_border(border, image_height) 105 | 106 | ctx = np.random.randint(low=w_border, high=image_width - w_border) 107 | cty = np.random.randint(low=h_border, high=image_height - h_border) 108 | 109 | x0, x1 = max(ctx - width // 2, 0), min(ctx + width // 2, image_width) 110 | y0, y1 = max(cty - height // 2, 0), min(cty + height // 2, image_height) 111 | 112 | left_w, right_w = ctx - x0, x1 - ctx 113 | top_h, bottom_h = cty - y0, y1 - cty 114 | 115 | # crop image 116 | cropped_ctx, cropped_cty = width // 2, height // 2 117 | x_slice = slice(cropped_ctx - left_w, cropped_ctx + right_w) 118 | y_slice = slice(cropped_cty - top_h, cropped_cty + bottom_h) 119 | cropped_image[y_slice, x_slice, :] = image[y0:y1, x0:x1, :] 120 | 121 | # crop detections 122 | cropped_detections = detections.copy() 123 | cropped_detections[:, 0:4:2] -= x0 124 | cropped_detections[:, 1:4:2] -= y0 125 | cropped_detections[:, 0:4:2] += cropped_ctx - left_w 126 | cropped_detections[:, 1:4:2] += cropped_cty - top_h 127 | 128 | return cropped_image, cropped_detections 129 | 130 | def crop_image(image, center, size, output_size=None): 131 | if output_size == None: 132 | output_size = size 133 | 134 | cty, ctx = center 135 | height, width = size 136 | o_height, o_width = output_size 137 | im_height, im_width = image.shape[0:2] 138 | cropped_image = np.zeros((o_height, o_width, 3), dtype=image.dtype) 139 | 140 | x0, x1 = max(0, ctx - width // 2), min(ctx + width // 2, im_width) 141 | y0, y1 = max(0, cty - height // 2), min(cty + height // 2, im_height) 142 | 143 | left, right = ctx - x0, x1 - ctx 144 | top, bottom = cty - y0, y1 - cty 145 | 146 | cropped_cty, cropped_ctx = o_height // 2, o_width // 2 147 | y_slice = slice(cropped_cty - top, cropped_cty + bottom) 148 | x_slice = slice(cropped_ctx - left, cropped_ctx + right) 149 | cropped_image[y_slice, x_slice, :] = image[y0:y1, x0:x1, :] 150 | 151 | border = np.array([ 152 | cropped_cty - top, 153 | cropped_cty + bottom, 154 | cropped_ctx - left, 155 | cropped_ctx + right 156 | ], dtype=np.float32) 157 | 158 | offset = np.array([ 159 | cty - o_height // 2, 160 | ctx - o_width // 2 161 | ]) 162 | 163 | return cropped_image, border, offset 164 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/test/__init__.py: -------------------------------------------------------------------------------- 1 | from .cornernet import cornernet 2 | from .cornernet_saccade import cornernet_saccade 3 | 4 | def test_func(sys_config, db, nnet, result_dir, debug=False): 5 | return globals()[sys_config.sampling_function](db, nnet, result_dir, debug=debug) 6 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/test/cornernet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import json 4 | import numpy as np 5 | import torch 6 | 7 | from tqdm import tqdm 8 | 9 | from ..utils import Timer 10 | from ..vis_utils import draw_bboxes 11 | from ..sample.utils import crop_image 12 | from ..external.nms import soft_nms, soft_nms_merge 13 | 14 | def rescale_dets_(detections, ratios, borders, sizes): 15 | xs, ys = detections[..., 0:4:2], detections[..., 1:4:2] 16 | xs /= ratios[:, 1][:, None, None] 17 | ys /= ratios[:, 0][:, None, None] 18 | xs -= borders[:, 2][:, None, None] 19 | ys -= borders[:, 0][:, None, None] 20 | np.clip(xs, 0, sizes[:, 1][:, None, None], out=xs) 21 | np.clip(ys, 0, sizes[:, 0][:, None, None], out=ys) 22 | 23 | def decode(nnet, images, K, ae_threshold=0.5, kernel=3, num_dets=1000): 24 | detections = nnet.test([images], ae_threshold=ae_threshold, test=True, K=K, kernel=kernel, num_dets=num_dets)[0] 25 | return detections.data.cpu().numpy() 26 | 27 | def cornernet(db, nnet, result_dir, debug=False, decode_func=decode): 28 | debug_dir = os.path.join(result_dir, "debug") 29 | if not os.path.exists(debug_dir): 30 | os.makedirs(debug_dir) 31 | 32 | if db.split != "trainval2014": 33 | db_inds = db.db_inds[:100] if debug else db.db_inds 34 | else: 35 | db_inds = db.db_inds[:100] if debug else db.db_inds[:5000] 36 | 37 | num_images = db_inds.size 38 | categories = db.configs["categories"] 39 | 40 | timer = Timer() 41 | top_bboxes = {} 42 | for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): 43 | db_ind = db_inds[ind] 44 | 45 | image_id = db.image_ids(db_ind) 46 | image_path = db.image_path(db_ind) 47 | image = cv2.imread(image_path) 48 | 49 | timer.tic() 50 | top_bboxes[image_id] = cornernet_inference(db, nnet, image) 51 | timer.toc() 52 | 53 | if debug: 54 | image_path = db.image_path(db_ind) 55 | image = cv2.imread(image_path) 56 | bboxes = { 57 | db.cls2name(j): top_bboxes[image_id][j] 58 | for j in range(1, categories + 1) 59 | } 60 | image = draw_bboxes(image, bboxes) 61 | debug_file = os.path.join(debug_dir, "{}.jpg".format(db_ind)) 62 | cv2.imwrite(debug_file, image) 63 | print('average time: {}'.format(timer.average_time)) 64 | 65 | result_json = os.path.join(result_dir, "results.json") 66 | detections = db.convert_to_coco(top_bboxes) 67 | with open(result_json, "w") as f: 68 | json.dump(detections, f) 69 | 70 | cls_ids = list(range(1, categories + 1)) 71 | image_ids = [db.image_ids(ind) for ind in db_inds] 72 | db.evaluate(result_json, cls_ids, image_ids) 73 | return 0 74 | 75 | def cornernet_inference(db, nnet, image, decode_func=decode): 76 | K = db.configs["top_k"] 77 | ae_threshold = db.configs["ae_threshold"] 78 | nms_kernel = db.configs["nms_kernel"] 79 | num_dets = db.configs["num_dets"] 80 | test_flipped = db.configs["test_flipped"] 81 | 82 | input_size = db.configs["input_size"] 83 | output_size = db.configs["output_sizes"][0] 84 | 85 | scales = db.configs["test_scales"] 86 | weight_exp = db.configs["weight_exp"] 87 | merge_bbox = db.configs["merge_bbox"] 88 | categories = db.configs["categories"] 89 | nms_threshold = db.configs["nms_threshold"] 90 | max_per_image = db.configs["max_per_image"] 91 | nms_algorithm = { 92 | "nms": 0, 93 | "linear_soft_nms": 1, 94 | "exp_soft_nms": 2 95 | }[db.configs["nms_algorithm"]] 96 | 97 | height, width = image.shape[0:2] 98 | 99 | height_scale = (input_size[0] + 1) // output_size[0] 100 | width_scale = (input_size[1] + 1) // output_size[1] 101 | 102 | im_mean = torch.cuda.FloatTensor(db.mean).reshape(1, 3, 1, 1) 103 | im_std = torch.cuda.FloatTensor(db.std).reshape(1, 3, 1, 1) 104 | 105 | detections = [] 106 | for scale in scales: 107 | new_height = int(height * scale) 108 | new_width = int(width * scale) 109 | new_center = np.array([new_height // 2, new_width // 2]) 110 | 111 | inp_height = new_height | 127 112 | inp_width = new_width | 127 113 | 114 | images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) 115 | ratios = np.zeros((1, 2), dtype=np.float32) 116 | borders = np.zeros((1, 4), dtype=np.float32) 117 | sizes = np.zeros((1, 2), dtype=np.float32) 118 | 119 | out_height, out_width = (inp_height + 1) // height_scale, (inp_width + 1) // width_scale 120 | height_ratio = out_height / inp_height 121 | width_ratio = out_width / inp_width 122 | 123 | resized_image = cv2.resize(image, (new_width, new_height)) 124 | resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) 125 | 126 | resized_image = resized_image / 255. 127 | 128 | images[0] = resized_image.transpose((2, 0, 1)) 129 | borders[0] = border 130 | sizes[0] = [int(height * scale), int(width * scale)] 131 | ratios[0] = [height_ratio, width_ratio] 132 | 133 | if test_flipped: 134 | images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) 135 | images = torch.from_numpy(images).cuda() 136 | images -= im_mean 137 | images /= im_std 138 | 139 | dets = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel, num_dets=num_dets) 140 | if test_flipped: 141 | dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] 142 | dets = dets.reshape(1, -1, 8) 143 | 144 | rescale_dets_(dets, ratios, borders, sizes) 145 | dets[:, :, 0:4] /= scale 146 | detections.append(dets) 147 | 148 | detections = np.concatenate(detections, axis=1) 149 | 150 | classes = detections[..., -1] 151 | classes = classes[0] 152 | detections = detections[0] 153 | 154 | # reject detections with negative scores 155 | keep_inds = (detections[:, 4] > -1) 156 | detections = detections[keep_inds] 157 | classes = classes[keep_inds] 158 | 159 | top_bboxes = {} 160 | for j in range(categories): 161 | keep_inds = (classes == j) 162 | top_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) 163 | if merge_bbox: 164 | soft_nms_merge(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) 165 | else: 166 | soft_nms(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm) 167 | top_bboxes[j + 1] = top_bboxes[j + 1][:, 0:5] 168 | 169 | scores = np.hstack([top_bboxes[j][:, -1] for j in range(1, categories + 1)]) 170 | if len(scores) > max_per_image: 171 | kth = len(scores) - max_per_image 172 | thresh = np.partition(scores, kth)[kth] 173 | for j in range(1, categories + 1): 174 | keep_inds = (top_bboxes[j][:, -1] >= thresh) 175 | top_bboxes[j] = top_bboxes[j][keep_inds] 176 | return top_bboxes 177 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .tqdm import stdout_to_tqdm 2 | from .timer import Timer 3 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/utils/timer.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | class Timer(object): 4 | """A simple timer.""" 5 | def __init__(self): 6 | self.total_time = 0. 7 | self.calls = 0 8 | self.start_time = 0. 9 | self.diff = 0. 10 | self.average_time = 0. 11 | 12 | def tic(self): 13 | # using time.time instead of time.clock because time time.clock 14 | # does not normalize for multithreading 15 | self.start_time = time.time() 16 | 17 | def toc(self, average=True): 18 | self.diff = time.time() - self.start_time 19 | self.total_time += self.diff 20 | self.calls += 1 21 | self.average_time = self.total_time / self.calls 22 | if average: 23 | return self.average_time 24 | else: 25 | return self.diff 26 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/utils/tqdm.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import contextlib 4 | 5 | from tqdm import tqdm 6 | 7 | class TqdmFile(object): 8 | dummy_file = None 9 | def __init__(self, dummy_file): 10 | self.dummy_file = dummy_file 11 | 12 | def write(self, x): 13 | if len(x.rstrip()) > 0: 14 | tqdm.write(x, file=self.dummy_file) 15 | 16 | @contextlib.contextmanager 17 | def stdout_to_tqdm(): 18 | save_stdout = sys.stdout 19 | try: 20 | sys.stdout = TqdmFile(sys.stdout) 21 | yield save_stdout 22 | except Exception as exc: 23 | raise exc 24 | finally: 25 | sys.stdout = save_stdout 26 | -------------------------------------------------------------------------------- /perception/corner_net_lite/core/vis_utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | def draw_bboxes(image, bboxes, font_size=0.5, thresh=0.5, colors=None): 5 | """Draws bounding boxes on an image. 6 | 7 | Args: 8 | image: An image in OpenCV format 9 | bboxes: A dictionary representing bounding boxes of different object 10 | categories, where the keys are the names of the categories and the 11 | values are the bounding boxes. The bounding boxes of category should be 12 | stored in a 2D NumPy array, where each row is a bounding box (x1, y1, 13 | x2, y2, score). 14 | font_size: (Optional) Font size of the category names. 15 | thresh: (Optional) Only bounding boxes with scores above the threshold 16 | will be drawn. 17 | colors: (Optional) Color of bounding boxes for each category. If it is 18 | not provided, this function will use random color for each category. 19 | 20 | Returns: 21 | An image with bounding boxes. 22 | """ 23 | 24 | image = image.copy() 25 | for cat_name in bboxes: 26 | keep_inds = bboxes[cat_name][:, -1] > thresh 27 | cat_size = cv2.getTextSize(cat_name, cv2.FONT_HERSHEY_SIMPLEX, font_size, 2)[0] 28 | 29 | if colors is None: 30 | color = np.random.random((3, )) * 0.6 + 0.4 31 | color = (color * 255).astype(np.int32).tolist() 32 | else: 33 | color = colors[cat_name] 34 | 35 | for bbox in bboxes[cat_name][keep_inds]: 36 | bbox = bbox[0:4].astype(np.int32) 37 | if bbox[1] - cat_size[1] - 2 < 0: 38 | cv2.rectangle(image, 39 | (bbox[0], bbox[1] + 2), 40 | (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2), 41 | color, -1 42 | ) 43 | cv2.putText(image, cat_name, 44 | (bbox[0], bbox[1] + cat_size[1] + 2), 45 | cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), thickness=1 46 | ) 47 | else: 48 | cv2.rectangle(image, 49 | (bbox[0], bbox[1] - cat_size[1] - 2), 50 | (bbox[0] + cat_size[0], bbox[1] - 2), 51 | color, -1 52 | ) 53 | cv2.putText(image, cat_name, 54 | (bbox[0], bbox[1] - 2), 55 | cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), thickness=1 56 | ) 57 | cv2.rectangle(image, 58 | (bbox[0], bbox[1]), 59 | (bbox[2], bbox[3]), 60 | color, 2 61 | ) 62 | return image 63 | -------------------------------------------------------------------------------- /perception/corner_net_lite/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/corner_net_lite/demo.jpg -------------------------------------------------------------------------------- /perception/corner_net_lite/demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import cv2 4 | from core.detectors import CornerNet_Saccade 5 | from core.vis_utils import draw_bboxes 6 | 7 | detector = CornerNet_Saccade() 8 | image = cv2.imread("demo.jpg") 9 | 10 | bboxes = detector(image) 11 | image = draw_bboxes(image, bboxes) 12 | cv2.imwrite("demo_out.jpg", image) 13 | -------------------------------------------------------------------------------- /perception/corner_net_lite/evaluate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import json 4 | import torch 5 | import pprint 6 | import argparse 7 | import importlib 8 | 9 | from core.dbs import datasets 10 | from core.test import test_func 11 | from core.config import SystemConfig 12 | from core.nnet.py_factory import NetworkFactory 13 | 14 | torch.backends.cudnn.benchmark = False 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser(description="Evaluation Script") 18 | parser.add_argument("cfg_file", help="config file", type=str) 19 | parser.add_argument("--testiter", dest="testiter", 20 | help="test at iteration i", 21 | default=None, type=int) 22 | parser.add_argument("--split", dest="split", 23 | help="which split to use", 24 | default="validation", type=str) 25 | parser.add_argument("--suffix", dest="suffix", default=None, type=str) 26 | parser.add_argument("--debug", action="store_true") 27 | 28 | args = parser.parse_args() 29 | return args 30 | 31 | def make_dirs(directories): 32 | for directory in directories: 33 | if not os.path.exists(directory): 34 | os.makedirs(directory) 35 | 36 | def test(db, system_config, model, args): 37 | split = args.split 38 | testiter = args.testiter 39 | debug = args.debug 40 | suffix = args.suffix 41 | 42 | result_dir = system_config.result_dir 43 | result_dir = os.path.join(result_dir, str(testiter), split) 44 | 45 | if suffix is not None: 46 | result_dir = os.path.join(result_dir, suffix) 47 | 48 | make_dirs([result_dir]) 49 | 50 | test_iter = system_config.max_iter if testiter is None else testiter 51 | print("loading parameters at iteration: {}".format(test_iter)) 52 | 53 | print("building neural network...") 54 | nnet = NetworkFactory(system_config, model) 55 | print("loading parameters...") 56 | nnet.load_params(test_iter) 57 | 58 | nnet.cuda() 59 | nnet.eval_mode() 60 | test_func(system_config, db, nnet, result_dir, debug=debug) 61 | 62 | def main(args): 63 | if args.suffix is None: 64 | cfg_file = os.path.join("./configs", args.cfg_file + ".json") 65 | else: 66 | cfg_file = os.path.join("./configs", args.cfg_file + "-{}.json".format(args.suffix)) 67 | print("cfg_file: {}".format(cfg_file)) 68 | 69 | with open(cfg_file, "r") as f: 70 | config = json.load(f) 71 | 72 | config["system"]["snapshot_name"] = args.cfg_file 73 | system_config = SystemConfig().update_config(config["system"]) 74 | 75 | model_file = "core.models.{}".format(args.cfg_file) 76 | model_file = importlib.import_module(model_file) 77 | model = model_file.model() 78 | 79 | train_split = system_config.train_split 80 | val_split = system_config.val_split 81 | test_split = system_config.test_split 82 | 83 | split = { 84 | "training": train_split, 85 | "validation": val_split, 86 | "testing": test_split 87 | }[args.split] 88 | 89 | print("loading all datasets...") 90 | dataset = system_config.dataset 91 | print("split: {}".format(split)) 92 | testing_db = datasets[dataset](config["db"], split=split, sys_config=system_config) 93 | 94 | print("system config...") 95 | pprint.pprint(system_config.full) 96 | 97 | print("db config...") 98 | pprint.pprint(testing_db.configs) 99 | 100 | test(testing_db, system_config, model, args) 101 | 102 | if __name__ == "__main__": 103 | args = parse_args() 104 | main(args) 105 | -------------------------------------------------------------------------------- /perception/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/perception/datasets/__init__.py -------------------------------------------------------------------------------- /perception/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import random 3 | from torch.utils import data 4 | 5 | class RoundRobin(data.IterableDataset): 6 | """ 7 | This class will sample iterable datasets in a round robin fashion ad-ininitum. 8 | When a dataset runs out of juice, it will simply reset it. 9 | """ 10 | def __init__(self, datasets): 11 | self.datasets = datasets 12 | self.dataset_count = len(datasets) 13 | 14 | def __iter__(self): 15 | datasets = [iter(d) for d in self.datasets] 16 | i = 0 17 | while True: 18 | current_dataset = datasets[i] 19 | try: 20 | example = next(current_dataset) 21 | i = (i + 1) % self.dataset_count 22 | yield example 23 | except StopIteration as e: 24 | datasets[i] = iter(self.datasets[i]) 25 | continue 26 | 27 | class Chain(data.IterableDataset): 28 | def __init__(self, datasets, shuffle=True, infinite=False): 29 | self.shuffle = shuffle 30 | self.datasets = datasets 31 | self.infinite = infinite 32 | 33 | def __iter__(self): 34 | datasets = self.datasets 35 | if self.shuffle: 36 | random.shuffle(datasets) 37 | if self.infinite: 38 | for dataset in itertools.cycle(self.datasets): 39 | for item in dataset: 40 | try: 41 | yield item 42 | except StopIteration: 43 | continue 44 | else: 45 | for dataset in self.datasets: 46 | for item in dataset: 47 | yield item 48 | 49 | def __len__(self): 50 | return sum(len(d) for d in self.datasets) 51 | 52 | class SamplingPool(data.IterableDataset): 53 | """ 54 | Maintains a pool of N examples and samples randomly from that pool. 55 | Useful for mixing different iterable datasets together. 56 | """ 57 | def __init__(self, dataset, n=1000): 58 | self.dataset = dataset 59 | self.n = n 60 | 61 | def __iter__(self): 62 | pool = [] 63 | iterator = iter(self.dataset) 64 | for _ in range(self.n): 65 | try: 66 | pool.append(next(iterator)) 67 | except StopIteration: 68 | break 69 | 70 | while True: 71 | try: 72 | new_example = next(iterator) 73 | except StopIteration as e: 74 | break 75 | 76 | random_index = random.randint(0, len(pool)-1) 77 | yield pool[random_index] 78 | pool[random_index] = new_example 79 | 80 | # If the dataset is exhausted, empty the pool. 81 | while len(pool) > 0: 82 | random_index = random.randint(0, len(pool)-1) 83 | yield pool[random_index] 84 | del pool[random_index] 85 | 86 | def __len__(self): 87 | return len(self.dataset) 88 | 89 | -------------------------------------------------------------------------------- /perception/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules.loss import _Loss 3 | from torch.nn import functional as F 4 | 5 | class KeypointLoss(_Loss): 6 | def __init__(self, keypoint_config, depth_weight=10.0, center_weight=1.0, size_average=None, reduce=None, reduction='mean'): 7 | super().__init__(size_average, reduce, reduction) 8 | self.keypoint_config = keypoint_config 9 | self.n_keypoint_maps = len(keypoint_config) + 1 # Add one for center map. 10 | self.depth_weight = depth_weight 11 | self.center_weight = center_weight 12 | if reduction == 'mean': 13 | self.reduce = torch.mean 14 | elif reduction == 'sum': 15 | self.reduce = torch.sum 16 | else: 17 | raise NotImplementedError("Unknown reduction method {reduction}, try 'mean' or 'sum'.") 18 | 19 | def forward(self, p_heatmaps, gt_heatmaps, p_depth, gt_depth, p_centers, gt_centers): 20 | """ 21 | predictions: N x D x H x W prediction tensor 22 | gt: N x D x H x W 23 | """ 24 | heatmap_loss = 0.0 25 | depth_loss = 0.0 26 | center_loss = 0.0 27 | heatmap_losses = [] 28 | N = float(gt_heatmaps.shape[0]) 29 | depth_losses = [] 30 | center_losses = [] 31 | for p_hm, p_d, p_center in zip(p_heatmaps, p_depth, p_centers): 32 | loss = F.binary_cross_entropy_with_logits(p_hm, gt_heatmaps, reduction='none').sum(dim=[1,2,3]).mean() 33 | heatmap_loss += loss 34 | heatmap_losses.append(loss) 35 | 36 | where_heat = gt_heatmaps > 0.01 37 | 38 | depth_l1 = F.l1_loss(p_d[where_heat], gt_depth[where_heat], reduction='sum') 39 | depth_loss += depth_l1 / N 40 | depth_losses.append(depth_l1) 41 | 42 | where_heat = where_heat[:, 1:, None].expand(-1, -1, 2, -1, -1) 43 | center_l1 = F.smooth_l1_loss(p_center[where_heat], gt_centers[where_heat], reduction='sum') 44 | center_loss += center_l1 / N 45 | center_losses.append(center_l1) 46 | 47 | loss = heatmap_loss + self.depth_weight * depth_loss + self.center_weight * center_loss 48 | return loss, heatmap_losses, depth_losses, center_losses 49 | 50 | -------------------------------------------------------------------------------- /perception/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch import nn 4 | from torch.nn import functional as F 5 | from perception.corner_net_lite.core.models import CornerNet_Squeeze 6 | from perception.corner_net_lite.core.models.py_utils.utils import convolution 7 | from perception.corner_net_lite.core.base import load_nnet, load_cfg 8 | from perception.corner_net_lite.core.config import SystemConfig 9 | from perception.corner_net_lite.core.nnet.py_factory import NetworkFactory 10 | import timm 11 | 12 | 13 | def prediction_module(int_features, features_out): 14 | return nn.Sequential( 15 | convolution(1, 256, int_features, with_bn=True), 16 | convolution(1, int_features, 32, with_bn=True), 17 | nn.Conv2d(32, features_out, (1, 1), bias=True) 18 | ) 19 | 20 | class HeatmapHead(nn.Module): 21 | def __init__(self, features, heatmaps): 22 | super().__init__() 23 | self.output_head1 = prediction_module(features, heatmaps) 24 | self.output_head2 = prediction_module(features, heatmaps) 25 | self.output_head1[-1].bias.data.fill_(0.01/0.99) 26 | self.output_head2[-1].bias.data.fill_(0.01/0.99) 27 | 28 | def forward(self, heatmaps): 29 | return self.output_head1(heatmaps[0]), self.output_head2(heatmaps[1]) 30 | 31 | class DepthHead(nn.Module): 32 | def __init__(self, features, heatmaps): 33 | super().__init__() 34 | self.output_head1 = prediction_module(features, heatmaps) 35 | self.output_head2 = prediction_module(features, heatmaps) 36 | 37 | def forward(self, x): 38 | out1 = self.output_head1(x[0]) 39 | out2 = self.output_head2(x[1]) 40 | return out1, out2 41 | 42 | class CenterHead(nn.Module): 43 | def __init__(self, features, heatmaps): 44 | super().__init__() 45 | self.outputs = heatmaps - 1 46 | self.output_head1 = prediction_module(features, self.outputs * 2) 47 | self.output_head2 = prediction_module(features, self.outputs * 2) 48 | 49 | def forward(self, x): 50 | N, C, H, W = x[1].shape 51 | out1 = self.output_head1(x[0]) 52 | out2 = self.output_head2(x[1]) 53 | return out1.reshape(N, self.outputs, 2, H, W), out2.reshape(N, self.outputs, 2, H, W) 54 | 55 | def nms(x, size=5): 56 | hmax = nn.functional.max_pool2d(x, (size, size), padding=size // 2, stride=1) 57 | keep = (x == hmax).to(x.dtype) 58 | return x * keep 59 | 60 | class KeypointNet(nn.Module): 61 | def __init__(self, output_size, features=128, heatmaps_out=2, dropout=0.1): 62 | super().__init__() 63 | self.backbone = self._build_hourglass() 64 | self.heatmap_head = HeatmapHead(features, heatmaps_out) 65 | self.depth_head = DepthHead(features, heatmaps_out) 66 | self.center_head = CenterHead(features, heatmaps_out) 67 | self.dropout = nn.Dropout(p=dropout) 68 | 69 | def _build_hourglass(self): 70 | corner_net = CornerNet_Squeeze.model() 71 | config, _ = load_cfg("./perception/corner_net_lite/configs/CornerNet_Squeeze.json") 72 | sys_cfg = SystemConfig().update_config(config) 73 | net = load_nnet(sys_cfg, corner_net) 74 | if torch.cuda.is_available(): 75 | net.load_pretrained_params('./models/corner_net.pkl') 76 | else: 77 | print('Cuda not available. Will not load pretrained params') 78 | return net.model.module.hg 79 | 80 | def forward(self, x): 81 | features = [self.dropout(f) for f in self.backbone(x)] 82 | heatmaps_out = self.heatmap_head(features) 83 | depth_out = self.depth_head(features) 84 | centers_out = self.center_head(features) 85 | return heatmaps_out, depth_out, centers_out 86 | 87 | -------------------------------------------------------------------------------- /perception/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | class Rate: 4 | def __init__(self, rate): 5 | self.rate = rate 6 | self.last_sleep = 0.0 # Long in the future. 7 | self.time_per_step = 1.0 / float(rate) 8 | 9 | def sleep(self): 10 | now = time.time() 11 | time_since_last = now - self.last_sleep 12 | to_sleep = max(self.time_per_step - time_since_last, 0.0) 13 | time.sleep(to_sleep) 14 | self.last_sleep = now 15 | -------------------------------------------------------------------------------- /perception/utils/camera_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | import yaml 5 | from . import linalg 6 | 7 | class PinholeCamera: 8 | def __init__(self, K, D, image_size): 9 | # Camera matrix 10 | self.K = K 11 | self.Kinv = np.linalg.inv(K) 12 | # Distortion parameters 13 | self.D = D 14 | # height, width 15 | self.image_size = np.array(image_size) 16 | assert np.abs(K[0, 2] * 2.0 - image_size[1]) < 0.05 * image_size[1] 17 | 18 | def scale(self, scale): 19 | K = scale_camera_matrix(self.K, np.ones(2) * scale) 20 | return FisheyeCamera(K, self.D, self.image_size * scale) 21 | 22 | def cut(self, offset): 23 | cx = self.K[0, 2] - offset[0] 24 | cy = self.K[1, 2] - offset[1] 25 | K = self.K.copy() 26 | K[0, 2] = cx 27 | K[1, 2] = cy 28 | image_size = self.image_size - 2.0 * offset[::-1] 29 | return FisheyeCamera(K, self.D, image_size) 30 | 31 | def unproject(self, xys, zs): 32 | xs = np.concatenate([xys, np.ones((xys.shape[0], 1))], axis=1) 33 | X = (self.Kinv @ xs[:, :, None])[:, :, 0] * zs[:, None] 34 | return X 35 | 36 | def in_frame(self, x): 37 | """ 38 | x: N x 2 array of points in image frame 39 | returns: N array of boolean values 40 | """ 41 | under = (x <= 0.0).any(axis=1) 42 | over = (x >= self.image_size).any(axis=1) 43 | return np.bitwise_or(under, over) == False 44 | 45 | class RadTanPinholeCamera(PinholeCamera): 46 | def project(self, X, T_CW=np.eye(4)): 47 | """ 48 | X: N x 3 points in world frame as define by T_CW 49 | returns: N x 2 points in image coordinates. 50 | """ 51 | R, _ = cv2.Rodrigues(T_CW[:3, :3]) 52 | x, _ = cv2.projectPoints(X[:, None, :], R, T_CW[:3, 3], self.K, self.D) 53 | x = x[:, 0] 54 | return x 55 | 56 | def undistort(self, xy): 57 | """ 58 | xy: N x 2 image points 59 | returns: N x 2 undistorted image points. 60 | """ 61 | return cv2.undistortPoints(xy[:, None, :], self.K, self.D, 62 | P=self.K)[:, 0, :] 63 | 64 | class FisheyeCamera(PinholeCamera): 65 | def project(self, X, T_CW=np.eye(4)): 66 | """ 67 | X: N x 3 points in world frame as define by T_CW 68 | returns: N x 2 points in image coordinates. 69 | """ 70 | R, _ = cv2.Rodrigues(T_CW[:3, :3]) 71 | x, _ = cv2.fisheye.projectPoints(X[:, None, :], R, T_CW[:3, 3], self.K, self.D) 72 | x = x[:, 0] 73 | return x 74 | 75 | def undistort(self, xy): 76 | """ 77 | xy: N x 2 image points 78 | returns: N x 2 undistorted image points. 79 | """ 80 | return cv2.fisheye.undistortPoints(xy[:, None, :], self.K, self.D, 81 | P=self.K)[:, 0, :] 82 | 83 | 84 | class StereoCamera: 85 | def __init__(self, left_camera, right_camera, T_RL): 86 | self.left_camera = left_camera 87 | self.right_camera = right_camera 88 | self.T_RL = T_RL 89 | self.T_LR = linalg.inv_transform(T_RL) 90 | self.F = fundamental_matrix(T_RL, self.left_camera.K, self.right_camera.K) 91 | 92 | def triangulate(self, left_keypoints, right_keypoints): 93 | left_keypoints = left_keypoints[:, None, :].astype(np.float32) 94 | right_keypoints = right_keypoints[:, None, :].astype(np.float32) 95 | undistorted_left = cv2.fisheye.undistortPoints(left_keypoints, self.left_camera.K, self.left_camera.D, 96 | P=self.left_camera.K)[:, 0, :] 97 | undistorted_right = cv2.fisheye.undistortPoints(right_keypoints, self.right_camera.K, self.right_camera.D, 98 | P=self.right_camera.K)[:, 0, :] 99 | 100 | corrected_left, corrected_right = cv2.correctMatches(self.F, undistorted_left[None], undistorted_right[None]) 101 | corrected_left, corrected_right = corrected_left[0], corrected_right[0] 102 | 103 | P1 = self.left_camera.K @ np.eye(3, 4) 104 | P2 = self.right_camera.K @ self.T_RL[:3] 105 | p_LK = cv2.triangulatePoints( 106 | P1, P2, corrected_left.T, corrected_right.T 107 | ).T # N x 4 108 | p_LK = p_LK[:, :3] / p_LK[:, 3:4] 109 | 110 | return p_LK 111 | 112 | @classmethod 113 | def from_file(cls, calibration_file): 114 | camera = load_calibration_params(calibration_file) 115 | left_camera = FisheyeCamera(camera['K'], camera['D'], camera['image_size']) 116 | right_camera = FisheyeCamera(camera['Kp'], camera['Dp'], camera['image_size']) 117 | return cls(left_camera, right_camera, camera['T_RL']) 118 | 119 | def camera_matrix(intrinsics): 120 | fx, fy, cx, cy = intrinsics 121 | return np.array([[fx, 0., cx], 122 | [0., fy, cy], 123 | [0., 0., 1.]]) 124 | 125 | def projection_matrix(camera_matrix, T_CW): 126 | """ 127 | camera_matrix: 3 x 3 camera calibration matrix. 128 | T_CW: 4x4 matrix transform from global to camera frame. 129 | """ 130 | return camera_matrix @ T_CW[:3, :] 131 | 132 | def from_calibration(calibration_file): 133 | with open(calibration_file, 'rt') as f: 134 | calibration = yaml.load(f.read(), Loader=yaml.SafeLoader) 135 | camera = calibration['cam0'] 136 | 137 | K = camera_matrix(camera['intrinsics']) 138 | D = np.array(camera['distortion_coeffs']) 139 | if camera['distortion_model'] == 'equidistant' and camera['camera_model'] == 'pinhole': 140 | return FisheyeCamera(K, D, camera['resolution'][::-1]) 141 | elif camera['distortion_model'] == 'radtan' and camera['camera_model'] == 'pinhole': 142 | return RadTanPinholeCamera(K, D, camera['resolution'][::-1]) 143 | else: 144 | raise ValueError(f"Unrecognized calibration type {camera['distortion_model']}.") 145 | 146 | def load_calibration_params(calibration_file): 147 | with open(calibration_file, 'rt') as f: 148 | calibration = yaml.load(f.read(), Loader=yaml.SafeLoader) 149 | 150 | left = calibration['cam0'] 151 | K = camera_matrix(left['intrinsics']) 152 | right = calibration['cam1'] 153 | Kp = camera_matrix(right['intrinsics']) 154 | D = np.array(calibration['cam0']['distortion_coeffs']) 155 | Dp = np.array(calibration['cam1']['distortion_coeffs']) 156 | 157 | T_RL = np.array(calibration['cam1']['T_cn_cnm1']) 158 | T_LR = np.eye(4) 159 | T_LR[:3, :3] = T_RL[:3, :3].transpose() 160 | T_LR[:3, 3] = -T_LR[:3, :3] @ T_RL[:3, 3] 161 | image_size = calibration['cam1']['resolution'][::-1] 162 | return { 163 | 'K': K, 164 | 'Kp': Kp, 165 | 'D': D, 166 | 'Dp': Dp, 167 | 'T_LR': T_LR, 168 | 'T_RL': T_RL, 169 | 'image_size': image_size 170 | } 171 | 172 | def scale_camera_matrix(K, scaling_factor): 173 | """ 174 | K: 3 x 3 camera matrix 175 | scaling_factor: array of length 2, x and y scaling factor. 176 | """ 177 | out = K.copy() 178 | out[0, 0] = K[0, 0] * scaling_factor[0] 179 | out[1, 1] = K[1, 1] * scaling_factor[1] 180 | out[0, 2] = K[0, 2] * scaling_factor[0] 181 | out[1, 2] = K[1, 2] * scaling_factor[1] 182 | return out 183 | 184 | def fundamental_matrix(T_RL, K, Kp): 185 | R = T_RL[:3, :3] 186 | t = T_RL[:3, 3] 187 | 188 | C = linalg.skew_matrix(K @ R.T @ t) 189 | return np.linalg.inv(Kp).T @ R @ K.T @ C 190 | 191 | -------------------------------------------------------------------------------- /perception/utils/clustering_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import cluster 3 | 4 | 5 | class KeypointClustering: 6 | def __init__(self, bandwidth): 7 | self.clustering = cluster.MeanShift(bandwidth=bandwidth, cluster_all=True, bin_seeding=True, 8 | min_bin_freq=1) 9 | self.past_clusters = None 10 | 11 | def __call__(self, indices): 12 | """ 13 | indices: N x D numpy array of image coordinates to be clustered. 14 | returns: C x D keypoint estimates. C is the amount of clusters found. 15 | """ 16 | self.clustering.fit(indices) 17 | return self.clustering.cluster_centers_, self.clustering.labels_ 18 | 19 | -------------------------------------------------------------------------------- /perception/utils/linalg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.spatial.transform import Rotation 3 | 4 | def skew_matrix(v): 5 | return np.array([[0.0, -v[2], v[1]], 6 | [v[2], 0.0, -v[0]], 7 | [-v[1], v[0], 0.0]], dtype=v.dtype) 8 | 9 | def inv_transform(T): 10 | out = np.eye(4, dtype=T.dtype) 11 | out[:3, :3] = T[:3, :3].T 12 | out[:3, 3] = -out[:3,:3] @ T[:3, 3] 13 | return out 14 | 15 | def transform_points(T, points): 16 | """ 17 | T: 4 x 4 numpy matrix 18 | points: ... x 3 numpy matrix 19 | """ 20 | return (T[:3, :3] @ points[..., None])[..., 0] + T[:3, 3] 21 | 22 | def angle_between(R1, R2): 23 | return Rotation.from_matrix(R1.T @ R2).as_euler('xyz', degrees=False) 24 | -------------------------------------------------------------------------------- /perception/utils/ros.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.spatial.transform import Rotation 3 | from geometry_msgs import msg as geometry_msgs 4 | 5 | def message_to_transform(message): 6 | T = np.eye(4) 7 | t = message.transform.translation 8 | r = message.transform.rotation 9 | R = Rotation.from_quat([r.x, r.y, r.z, r.w]) 10 | T[:3, 3] = np.array([t.x, t.y, t.z]) 11 | T[:3, :3] = R.as_matrix() 12 | return T 13 | 14 | def transform_to_message(T, parent_frame, child_frame, timestamp): 15 | msg = geometry_msgs.TransformStamped() 16 | msg.header.stamp = timestamp 17 | msg.header.frame_id = parent_frame 18 | msg.child_frame_id = child_frame 19 | msg.transform.translation.x = T[0, 3] 20 | msg.transform.translation.y = T[1, 3] 21 | msg.transform.translation.z = T[2, 3] 22 | quat = Rotation.from_matrix(T[:3, :3]).as_quat() 23 | msg.transform.rotation.x = quat[0] 24 | msg.transform.rotation.y = quat[1] 25 | msg.transform.rotation.z = quat[2] 26 | msg.transform.rotation.w = quat[3] 27 | return msg 28 | 29 | def transform_to_pose(T, frame, timestamp): 30 | msg = geometry_msgs.PoseStamped() 31 | msg.header.stamp = timestamp 32 | msg.header.frame_id = frame 33 | msg.pose.position.x = T[0, 3] 34 | msg.pose.position.y = T[1, 3] 35 | msg.pose.position.z = T[2, 3] 36 | q = Rotation.from_matrix(T[:3, :3]).as_quat() 37 | msg.pose.orientation.x = q[0] 38 | msg.pose.orientation.y = q[1] 39 | msg.pose.orientation.z = q[2] 40 | msg.pose.orientation.w = q[3] 41 | return msg 42 | -------------------------------------------------------------------------------- /perception/utils/timer.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | from datetime import datetime 4 | 5 | class Timing: 6 | def __init__(self): 7 | self.times = {} 8 | self.finished = {} 9 | 10 | def start(self, tag): 11 | assert tag not in self.times, f"{tag} already started" 12 | start = datetime.now() 13 | self.times[tag] = start 14 | 15 | def end(self, tag): 16 | start = self.times[tag] 17 | values = self.finished.get(tag, []) 18 | diff = datetime.now() - start 19 | values.append(diff.total_seconds()) 20 | self.finished[tag] = values 21 | del self.times[tag] 22 | 23 | def print(self): 24 | space = " " * 9 25 | header = f"|\ttag{space}\t|\tavg\t|\tvar\t|" 26 | print(header) 27 | print("-" * (len(header) + 16)) 28 | for tag, values in self.finished.items(): 29 | avg = np.mean(values) 30 | name = tag[:12] + " " * (12 - len(tag)) 31 | std = np.std(values) 32 | print(f"|\t{name}\t|\t{avg}\t|\t{std}\t|") 33 | 34 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | h5py==3.1.0 2 | scikit-video==1.1.11 3 | albumentations 4 | efficientnet_pytorch 5 | pytorch-lightning==1.2.1 6 | scikit-learn==0.24.1 7 | timm==0.4.5 8 | rich 9 | numba 10 | 11 | 12 | -------------------------------------------------------------------------------- /scripts/collect_bags.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import curses 3 | import os 4 | import subprocess 5 | import time 6 | 7 | TOPICS_TO_RECORD = [ 8 | '/tf_static', 9 | '/tf', 10 | '/zedm/zed_node/left_raw/camera_info', 11 | '/zedm/zed_node/left_raw/image_raw_color', 12 | '/zedm/zed_node/right_raw/camera_info', 13 | '/zedm/zed_node/right_raw/image_raw_color', 14 | '/joint_states' 15 | ] 16 | 17 | def read_args(): 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--out', '-o', type=str, default="~/data/bags") 20 | return parser.parse_args() 21 | 22 | WAITING = "Waiting for command." 23 | STARTING = "Starting to record bag." 24 | RECORDING = "Recording bag..." 25 | 26 | class Program: 27 | def __init__(self, screen, flags): 28 | self.screen = screen 29 | self.flags = flags 30 | self._stdout = [] 31 | self.status_line = WAITING 32 | self._inventory() 33 | self._refresh_screen() 34 | 35 | def _inventory(self): 36 | files = sorted(os.listdir(self.flags.out)) 37 | self.current_file = 0 38 | self._recorded_bags = [] 39 | for f in files: 40 | filepath = os.path.join(self.flags.out, f) 41 | if '.bag' in f: 42 | self._recorded_bags.append(filepath) 43 | self.current_file += 1 44 | 45 | def _refresh_screen(self): 46 | self.screen.clear() 47 | self.screen.addstr(0, 0, self.status_line) 48 | for i, filepath in enumerate(self._recorded_bags): 49 | bagname = os.path.basename(filepath) 50 | self.screen.addstr(i + 2, 0, bagname) 51 | 52 | (height, width) = self.screen.getmaxyx() 53 | for i, line in enumerate(self._stdout[-20:]): 54 | self.screen.addstr(height // 2 + i, 0, line) 55 | self.screen.refresh() 56 | 57 | def _add_bag(self, filepath): 58 | self._recorded_bags.append(filepath) 59 | self.current_file += 1 60 | 61 | def _read_stdout(self, process): 62 | text = process.stdout.decode('utf-8') 63 | for line in text.split('\n'): 64 | self._stdout.append(line) 65 | 66 | def _record_bag(self): 67 | self.status_line = STARTING 68 | self._refresh_screen() 69 | time.sleep(5) 70 | filename = '{:03}.bag'.format(self.current_file) 71 | filepath = os.path.join(self.flags.out, filename) 72 | self.status_line = RECORDING 73 | self._refresh_screen() 74 | try: 75 | process = subprocess.run(['rosbag', 'record', '--buffsize=0', '--chunksize=524288', '--output-name', filepath, '--duration', '30'] + TOPICS_TO_RECORD, 76 | stdout=subprocess.PIPE, check=True) 77 | except subprocess.CalledProcessError as e: 78 | print(e) 79 | exit() 80 | 81 | self._read_stdout(process) 82 | self._add_bag(filepath) 83 | self.status_line = WAITING 84 | self._refresh_screen() 85 | 86 | def run(self): 87 | while True: 88 | keypress = self.screen.getkey() 89 | if keypress == 'q': 90 | curses.endwin() 91 | return 92 | elif keypress == '\n': 93 | self._record_bag() 94 | 95 | def main(screen): 96 | curses.noecho() 97 | flags = read_args() 98 | flags.out = os.path.expanduser(flags.out) 99 | 100 | os.makedirs(flags.out, exist_ok=True) 101 | 102 | program = Program(screen, flags) 103 | program.run() 104 | 105 | 106 | if __name__ == "__main__": 107 | curses.wrapper(main) 108 | 109 | 110 | -------------------------------------------------------------------------------- /scripts/constants.py: -------------------------------------------------------------------------------- 1 | import hud 2 | import numpy as np 3 | 4 | def _to_camera_matrix(proj): 5 | return np.array([[proj[0], 0., proj[2]], 6 | [0., proj[1], proj[3]], 7 | [0., 0., 1.]], dtype=np.float64) 8 | 9 | KEYPOINT_FILENAME = 'keypoints.json' 10 | IMAGE_HEIGHT = 720 11 | IMAGE_WIDTH = 1280 12 | IMAGE_RECT = hud.Rect(0, 0, IMAGE_WIDTH, IMAGE_HEIGHT) 13 | image_size = (int(IMAGE_RECT.width), int(IMAGE_RECT.height)) 14 | KEYPOINT_COLOR = np.array([1.0, 0.0, 0.0, 1.0]) 15 | 16 | -------------------------------------------------------------------------------- /scripts/encode_bag.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import rospy 4 | import shutil 5 | import rosbag 6 | import subprocess 7 | import numpy as np 8 | import tf2_py as tf2 9 | import h5py 10 | import skvideo.io 11 | from time import time 12 | from argparse import ArgumentParser 13 | from PIL import Image 14 | from cv_bridge import CvBridge 15 | from geometry_msgs import msg 16 | from perception.utils import ros as ros_utils 17 | 18 | def read_args(): 19 | parser = ArgumentParser() 20 | parser.add_argument('--bags', required=True, help="Path to directory containing rosbags.") 21 | parser.add_argument('--out', '-o', required=True, help="Where to write output files.") 22 | parser.add_argument('--skip', default=0, type=int, help="Skip the first n bags.") 23 | parser.add_argument('--until', default=None, type=int, help="Encode until the nth bag.") 24 | parser.add_argument('--topics', nargs="+", required=True, help="Which topics to encode into the stream.") 25 | parser.add_argument('--frames', nargs='+', required=True, help="The coordinate frames corresponding to the optical frames of each camera topic given to the --topics argument.") 26 | parser.add_argument('--base-frame', default='panda_link0', help="The name of the base tf frame.") 27 | return parser.parse_args() 28 | 29 | bridge = CvBridge() 30 | 31 | def _write_images(folder, data): 32 | for item in data: 33 | image = bridge.imgmsg_to_cv2(item['message'], desired_encoding='rgb8') 34 | image = Image.fromarray(image) 35 | image.save('/tmp/encode_bags_tmp/{}/{:05}.png'.format(folder, item['i'])) 36 | print('Writing /tmp/encode_bags_tmp/{}/{:05}.png'.format(folder, item['i']), end='\r') 37 | print("") 38 | 39 | def _encode_full_video(data, filepath): 40 | writer = skvideo.io.FFmpegWriter(filepath, outputdict={ 41 | '-vcodec': 'libx264', 42 | '-crf': '0', 43 | '-preset': 'fast', 44 | '-framerate': '30' 45 | }) 46 | try: 47 | for i, item in enumerate(data): 48 | print(f"Encoding frame {i:06}", end='\r') 49 | frame = bridge.imgmsg_to_cv2(item['message'], desired_encoding='rgb8') 50 | writer.writeFrame(frame) 51 | finally: 52 | writer.close() 53 | 54 | def _encode_preview(video_file, preview_file): 55 | subprocess.run(['ffmpeg', '-i', video_file, '-c:a', 'copy', 56 | '-framerate', '30', '-c:v', 'libx264', '-crf', '24', '-vf', 'scale=1280:-1', 57 | '-preset', 'fast', '-y', preview_file]) 58 | 59 | class Runner: 60 | def __init__(self): 61 | self.flags = read_args() 62 | self._find_bags() 63 | 64 | def _find_bags(self): 65 | filenames = os.listdir(self.flags.bags) 66 | self._bags = [] 67 | for filename in filenames: 68 | path = os.path.join(self.flags.bags, filename) 69 | if '.bag' in path: 70 | self._bags.append(path) 71 | self._bags.sort() 72 | 73 | def _read_poses(self, out_folder, bag): 74 | print("Reading poses") 75 | tf_tree = tf2.BufferCore(rospy.Duration(360000.0)) 76 | for topic, message, t in bag.read_messages(topics=["/tf", "/tf_static"]): 77 | for tf_message in message.transforms: 78 | if topic == '/tf_static': 79 | tf_tree.set_transform_static(tf_message, f"bag/{topic}") 80 | else: 81 | tf_tree.set_transform(tf_message, f'bag/{topic}') 82 | 83 | return tf_tree 84 | 85 | def _gather_images(self, bag): 86 | image_messages = [] 87 | for topic in self.flags.topics: 88 | for _, message, t in bag.read_messages(topics=topic): 89 | i = len(image_messages) 90 | print("image {:05} time: {}".format(i, t), end="\r") 91 | image_messages.append({ 92 | 'message': message, 93 | 'i': i, 94 | 't': t.to_sec() 95 | }) 96 | 97 | return image_messages 98 | 99 | def _gather_poses(self, tf_tree, image_messages): 100 | print("Looking up poses") 101 | pose_data = [] 102 | i = 0 103 | for frame in self.flags.frames: 104 | for item in image_messages: 105 | try: 106 | # Reminder: ^{B}T^{A} = T_BA = lookup_transform(source_frame=A, target_frame=B) 107 | T_BC = ros_utils.message_to_transform(tf_tree.lookup_transform_core(target_frame=self.flags.base_frame, 108 | source_frame=frame, time=item['message'].header.stamp)) 109 | item['camera_pose'] = T_BC 110 | item['i'] = i # Override index as some frames might have been skipped. 111 | pose_data.append(item) 112 | i += 1 113 | except tf2.ExtrapolationException: 114 | print("Extrapolation exception. Skipping entry {}.".format(i)) 115 | 116 | return pose_data 117 | 118 | def _create_out_folder(self, bag_name): 119 | out_folder = os.path.join(self.flags.out, bag_name.split(os.path.extsep)[0]) 120 | os.makedirs(out_folder, exist_ok=True) 121 | return out_folder 122 | 123 | def _write_poses(self, out_file, poses): 124 | transforms = out_file.create_dataset('camera_transform', (len(poses), 4, 4), dtype=np.float64) 125 | for i, pose in enumerate(poses): 126 | transforms[i] = pose['camera_pose'] 127 | 128 | def _encode_video(self, bag_name, frame_data): 129 | out_folder = os.path.join(self.flags.out, bag_name.split(os.path.extsep)[0]) 130 | 131 | out_file = os.path.join(out_folder, 'frames.mp4') 132 | preview = os.path.join(out_folder, 'frames_preview.mp4') 133 | print("Encoding video {}".format(bag_name)) 134 | 135 | _encode_full_video(frame_data, out_file) 136 | _encode_preview(out_file, preview) 137 | 138 | def main(self): 139 | for path in self._bags[self.flags.skip:self.flags.until]: 140 | with rosbag.Bag(path, 'r') as bag: 141 | bag_name = os.path.basename(path) 142 | out_folder = self._create_out_folder(bag_name) 143 | filename = os.path.join(out_folder, 'data.hdf5') 144 | 145 | with h5py.File(filename, 'w') as h5_file: 146 | tf_tree = self._read_poses(out_folder, bag) 147 | image_frames = self._gather_images(bag) 148 | poses = self._gather_poses(tf_tree, image_frames) 149 | self._write_poses(h5_file, poses) 150 | self._encode_video(bag_name, poses) 151 | 152 | print(f"Done with bag {bag_name}.") 153 | 154 | def __enter__(self): 155 | os.makedirs('/tmp/encode_bags_tmp', exist_ok=True) 156 | os.makedirs(self.flags.out, exist_ok=True) 157 | return self 158 | 159 | def __exit__(self, *args): 160 | shutil.rmtree('/tmp/encode_bags_tmp') 161 | 162 | if __name__ == '__main__': 163 | with Runner() as runner: 164 | runner.main() 165 | 166 | -------------------------------------------------------------------------------- /scripts/make_video.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | ffmpeg -i "$1/%06d.jpg" -r 60 -y -c:v libx264 -vf scale=1280:360 -crf 25 "$1/out.mp4" 5 | 6 | -------------------------------------------------------------------------------- /scripts/package_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import torch 4 | import json 5 | from train import KeypointModule 6 | import yaml 7 | from pathlib import Path 8 | 9 | def read_args(): 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('--model', type=str) 12 | parser.add_argument('--out', type=str, required=True) 13 | return parser.parse_args() 14 | 15 | def load_hparams(path): 16 | version_dir = Path(path).parent.parent.absolute() 17 | with open(os.path.join(version_dir, 'hparams.yaml'), 'rt') as f: 18 | params = yaml.load(f.read(), Loader=yaml.SafeLoader) 19 | return params 20 | 21 | class Model(torch.nn.Module): 22 | def __init__(self, flags, hparams): 23 | super().__init__() 24 | self.model = KeypointModule.load_from_checkpoint(flags.model, **hparams).model 25 | 26 | def forward(self, x): 27 | heatmap, depth, centers = self.model(x) 28 | return torch.sigmoid(heatmap[-1]), depth[-1], centers[-1] 29 | 30 | def main(): 31 | flags = read_args() 32 | hparams = load_hparams(flags.model) 33 | model = Model(flags, hparams).eval().cuda() 34 | 35 | dummy_input = torch.randn(2, 3, 511, 511).cuda() 36 | input_names = ["frames"] 37 | output_names = ["out"] 38 | 39 | with torch.no_grad(): 40 | traced = torch.jit.trace(model, dummy_input) 41 | 42 | traced.save(flags.out) 43 | 44 | 45 | if __name__ == "__main__": 46 | main() 47 | -------------------------------------------------------------------------------- /scripts/show_keypoints.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import json 4 | import time 5 | import hud 6 | import h5py 7 | import numpy as np 8 | import cv2 9 | import yaml 10 | import random 11 | from skvideo import io as video_io 12 | from perception import constants 13 | from perception.utils import camera_utils, Rate, linalg 14 | 15 | def read_args(): 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('base_dir', help="Which directory to encoded video directories in.") 18 | parser.add_argument('--calibration', default='config/calibration.yaml', help="Calibration yaml file.") 19 | parser.add_argument('--rate', '-r', default=30, help="Frames per second.") 20 | parser.add_argument('--seed', type=int, default=0) 21 | return parser.parse_args() 22 | 23 | KEYPOINT_FILENAME = 'keypoints.json' 24 | 25 | class ViewModel: 26 | def __init__(self, flags, directory): 27 | self.flags = flags 28 | self._read_keypoints(directory) 29 | self._load_video(directory) 30 | self._load_metadata(directory) 31 | self.current_frame = 0 32 | 33 | def _read_keypoints(self, base_dir): 34 | filepath = os.path.join(base_dir, KEYPOINT_FILENAME) 35 | with open(filepath, 'r') as f: 36 | contents = json.loads(f.read()) 37 | self.world_points = [np.array(p) for p in contents['3d_points']] 38 | 39 | def _load_video(self, base_dir): 40 | self.video = video_io.vreader(os.path.join(base_dir, 'frames.mp4')) 41 | 42 | def _load_metadata(self, base_dir): 43 | self.hdf = h5py.File(os.path.join(base_dir, 'data.hdf5'), 'r') 44 | self.num_frames = self.hdf['camera_transform'].shape[0] 45 | 46 | self.camera = camera_utils.from_calibration(self.flags.calibration) 47 | 48 | def close(self): 49 | self.hdf.close() 50 | self.video.close() 51 | 52 | def __iter__(self): 53 | return self 54 | 55 | def __next__(self): 56 | if self.current_frame >= self.num_frames: 57 | raise StopIteration() 58 | T_WC = self.hdf['camera_transform'][self.current_frame] 59 | T_CW = linalg.inv_transform(T_WC) 60 | R_c, _ = cv2.Rodrigues(T_CW[:3, :3]) 61 | frame_points = [] 62 | for p_WK in self.world_points: 63 | p_c = self.camera.project(p_WK[None, :3], T_CW) 64 | p_c = p_c.ravel() 65 | 66 | frame_points.append( 67 | hud.utils.to_normalized_device_coordinates( 68 | hud.Point(p_c[0], p_c[1]), 69 | constants.IMAGE_RECT)) 70 | 71 | 72 | frame = next(self.video) 73 | 74 | self.current_frame += 1 75 | return frame, frame_points 76 | 77 | def _transform_point(self, T_WC, point): 78 | T_CW = np.linalg.inv(T_WC) 79 | return T_CW @ point 80 | 81 | 82 | class PointVisualizer: 83 | def __init__(self, flags): 84 | self.flags = flags 85 | self.paused = False 86 | self.next = False 87 | self.done = False 88 | self.window = hud.AppWindow("Keypoints", 640, 360) 89 | self._create_views() 90 | 91 | def _create_views(self): 92 | self.image_pane = hud.ImagePane() 93 | self.image_points = hud.PointLayer([]) 94 | z_stack = hud.ZStack() 95 | z_stack.add_view(self.image_pane) 96 | z_stack.add_view(self.image_points) 97 | 98 | self.window.set_view(z_stack) 99 | self.window.add_key_handler(self._key_callback) 100 | 101 | def _key_callback(self, event): 102 | if event.key == 'Q': 103 | self.done = True 104 | elif event.key == ' ': 105 | self.paused = not self.paused 106 | elif event.key == '\x00': 107 | self.next = True 108 | 109 | def run(self): 110 | random.seed(self.flags.seed) 111 | rate = Rate(self.flags.rate) 112 | if os.path.isfile(os.path.join(self.flags.base_dir, 'keypoints.json')): 113 | directories = [os.path.basename(self.flags.base_dir)] 114 | base_dir = os.path.dirname(self.flags.base_dir) 115 | else: 116 | directories = os.listdir(self.flags.base_dir) 117 | base_dir = self.flags.base_dir 118 | random.shuffle(directories) 119 | for directory in directories: 120 | try: 121 | view_model = ViewModel(self.flags, os.path.join(base_dir, directory)) 122 | print(f"Sequence {directory}") 123 | for frame, points in view_model: 124 | print(f"Current frame {view_model.current_frame}, num frames: {view_model.num_frames}" + 5 * " ", end="\r") 125 | self.image_pane.set_texture(frame) 126 | self.image_points.set_points(points, constants.KEYPOINT_COLOR[None].repeat(len(points), 0)) 127 | if not self.window.update() or self.done: 128 | return 129 | self.window.poll_events() 130 | while self.paused: 131 | self.window.poll_events() 132 | rate.sleep() 133 | if self.next: 134 | self.next = False 135 | break 136 | rate.sleep() 137 | finally: 138 | view_model.close() 139 | 140 | def main(): 141 | flags = read_args() 142 | 143 | app = PointVisualizer(flags) 144 | app.run() 145 | 146 | if __name__ == "__main__": 147 | main() 148 | -------------------------------------------------------------------------------- /scripts/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import torch 4 | import numpy as np 5 | import json 6 | import random 7 | from matplotlib import pyplot as plt 8 | from albumentations.augmentations import transforms 9 | from perception.models import nms 10 | import albumentations as A 11 | from torch.utils.data import DataLoader 12 | from perception.loss import KeypointLoss 13 | from perception.datasets.video import SceneDataset 14 | from perception.models import KeypointNet 15 | import pytorch_lightning as pl 16 | 17 | def read_args(): 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--workers', '-w', type=int, default=8, help="How many workers to use in data loader.") 20 | parser.add_argument('--train', type=str, required=True, help="Path to training dataset.") 21 | parser.add_argument('--val', type=str, required=True, help="Path to validation dataset.") 22 | parser.add_argument('--gpus', type=int, default=1) 23 | parser.add_argument('--fp16', action='store_true', help="Use half-precision.") 24 | parser.add_argument('--pool', default=1000, type=int, help="How many examples to use in shuffle pool") 25 | parser.add_argument('--keypoints', default="config/cups.json", help="Keypoint configuration file.") 26 | parser.add_argument('--batch-size', default=8, type=int) 27 | parser.add_argument('--weight-decay', default=0.01, type=float) 28 | parser.add_argument('--features', default=128, type=int, help="Intermediate features in network.") 29 | parser.add_argument('--center-weight', default=1.0, help="Weight for center loss vs. heatmap loss.") 30 | parser.add_argument('--lr', default=4e-3, type=float, help="Learning rate.") 31 | parser.add_argument('--dropout', default=0.1, type=float) 32 | parser.add_argument('--resume', default=None) 33 | return parser.parse_args() 34 | 35 | def _to_image(image): 36 | image = image.transpose([1, 2, 0]) 37 | image = image * np.array([0.25, 0.25, 0.25]) 38 | image = image + np.array([0.5, 0.5, 0.5]) 39 | return np.clip((image * 255.0).round(), 0.0, 255.0).astype(np.uint8) 40 | 41 | def _init_worker(worker_id): 42 | random.seed(worker_id) 43 | np.random.seed(worker_id) 44 | 45 | class KeypointModule(pl.LightningModule): 46 | def __init__(self, keypoint_config, lr=3e-4, features=128, dropout=0.1, weight_decay=0.01, center_weight=10.0): 47 | super().__init__() 48 | self.lr = lr 49 | self.weight_decay = weight_decay 50 | self.keypoint_config = keypoint_config 51 | self._load_model(features, dropout) 52 | self.loss = KeypointLoss(keypoint_config['keypoint_config'], center_weight=center_weight) 53 | self.save_hyperparameters() 54 | 55 | def _load_model(self, features, dropout): 56 | self.model = KeypointNet([180, 320], features=features, dropout=dropout, heatmaps_out=len(self.keypoint_config["keypoint_config"]) + 1) 57 | 58 | def forward(self, frame, *args, **kwargs): 59 | return self.model(frame, *args, **kwargs) 60 | 61 | def training_step(self, batch, batch_idx): 62 | frame, target, depth, gt_centers = batch 63 | heatmaps, p_depth, p_centers = self(frame) 64 | 65 | loss, heatmap_losses, depth_losses, center_losses = self.loss(heatmaps, target, p_depth, depth, p_centers, gt_centers) 66 | 67 | self.log('train_loss', loss) 68 | self.log('heatmap_loss1', heatmap_losses[0]) 69 | self.log('heatmap_loss2', heatmap_losses[1]) 70 | self.log('depth_loss1', depth_losses[0]) 71 | self.log('depth_loss2', depth_losses[1]) 72 | self.log('center_loss1', center_losses[0]) 73 | self.log('center_loss2', center_losses[1]) 74 | 75 | return loss 76 | 77 | def validation_step(self, batch, batch_idx): 78 | frame, target, depth, gt_centers, _, keypoints = batch 79 | heatmaps, p_depth, p_centers = self(frame) 80 | 81 | loss = self._validation_loss(heatmaps, target, keypoints) 82 | val_loss, heatmap_losses, depth_losses, center_losses = self.loss(heatmaps, target, p_depth, depth, p_centers, gt_centers) 83 | 84 | self.log('val_loss', loss) 85 | self.log('total_heatmap_loss', val_loss) 86 | self.log('val_heatmap_loss1', heatmap_losses[0]) 87 | self.log('val_heatmap_loss2', heatmap_losses[1]) 88 | self.log('val_depth_loss1', depth_losses[0]) 89 | self.log('val_depth_loss2', depth_losses[1]) 90 | self.log('val_center_loss1', center_losses[0]) 91 | self.log('val_center_loss2', center_losses[1]) 92 | 93 | return loss 94 | 95 | def configure_optimizers(self): 96 | optimizer = torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) 97 | schedule = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=10, verbose=True) 98 | return { 99 | 'scheduler': schedule, 100 | 'interval': 'epoch', 101 | 'frequency': 1, 102 | 'monitor': 'train_loss', 103 | 'optimizer': optimizer 104 | } 105 | 106 | def _validation_loss(self, p_heatmaps, gt_heatmap, keypoints): 107 | # heatmaps: N x K x H x W 108 | # target: N x n_objects x K x 2 109 | p_heatmap = torch.sigmoid(p_heatmaps[-1]) 110 | return torch.nn.functional.l1_loss(p_heatmap, gt_heatmap) 111 | 112 | def _build_datasets(sequences, **kwargs): 113 | datasets = [] 114 | for sequence in sequences: 115 | dataset = SceneDataset(sequence, **kwargs) 116 | datasets.append(dataset) 117 | return datasets 118 | 119 | class DataModule(pl.LightningDataModule): 120 | def __init__(self, flags, keypoint_config): 121 | super().__init__() 122 | self.keypoint_config = keypoint_config 123 | datasets = [] 124 | train_directories = os.listdir(flags.train) 125 | train_sequences = sorted([os.path.join(flags.train, d) for d in train_directories]) 126 | val_directories = os.listdir(flags.val) 127 | val_sequences = sorted([os.path.join(flags.val, d) for d in val_directories]) 128 | self.flags = flags 129 | self.train_sequences = train_sequences 130 | self.val_sequences = val_sequences 131 | 132 | def setup(self, stage): 133 | if stage == 'fit': 134 | train_datasets = [] 135 | train_datasets += _build_datasets(self.train_sequences, keypoint_config=self.keypoint_config, augment=True, augment_color=True) 136 | val_datasets = _build_datasets(self.val_sequences, keypoint_config=self.keypoint_config, augment=False, include_pose=True) 137 | train = torch.utils.data.ChainDataset(train_datasets) 138 | self.train = torch.utils.data.BufferedShuffleDataset(train, self.flags.pool) 139 | self.val = torch.utils.data.ChainDataset(val_datasets) 140 | else: 141 | raise NotImplementedError() 142 | 143 | def train_dataloader(self): 144 | return DataLoader(self.train, batch_size=self.flags.batch_size, num_workers=self.flags.workers, 145 | worker_init_fn=_init_worker, 146 | persistent_workers=self.flags.workers > 0) 147 | 148 | def val_dataloader(self): 149 | return DataLoader(self.val, batch_size=self.flags.batch_size * 2, num_workers=self.flags.workers) 150 | 151 | def main(): 152 | flags = read_args() 153 | with open(flags.keypoints) as f: 154 | keypoint_config = json.load(f) 155 | data_module = DataModule(flags, keypoint_config) 156 | if flags.resume is None: 157 | module = KeypointModule(keypoint_config, 158 | lr=flags.lr, 159 | center_weight=flags.center_weight, 160 | features=flags.features, 161 | dropout=flags.dropout, 162 | weight_decay=flags.weight_decay) 163 | else: 164 | module = KeypointModule.load_from_checkpoint(flags.resume, 165 | lr=flags.lr, 166 | center_weight=flags.center_weight, 167 | dropout=flags.dropout, 168 | weight_decay=flags.weight_decay) 169 | 170 | from pytorch_lightning.callbacks import ModelCheckpoint 171 | checkpoint_cb = ModelCheckpoint(monitor='val_loss', 172 | save_top_k=1) 173 | trainer = pl.Trainer( 174 | callbacks=[checkpoint_cb], 175 | gpus=flags.gpus, 176 | reload_dataloaders_every_epoch=False, 177 | precision=16 if flags.fp16 else 32) 178 | 179 | trainer.fit(module, data_module) 180 | 181 | if __name__ == "__main__": 182 | main() 183 | 184 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | setuptools.setup( 4 | name="perception", # Replace with your own username 5 | version="0.0.1", 6 | author="Kenneth Blomqvist", 7 | author_email="hello@keke.dev", 8 | description="A collection of utilities for doing robotic perception in Python and ROS.", 9 | url="https://github.com/kekeblom/perception", 10 | packages=setuptools.find_packages(), 11 | classifiers=[ 12 | "Programming Language :: Python :: 3", 13 | "License :: MIT License" 14 | ], 15 | python_requires='>=3.6', 16 | ) 17 | 18 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/test/__init__.py -------------------------------------------------------------------------------- /test/test_video_dataset.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | import numpy as np 4 | from perception.datasets.video import StereoVideoDataset, _compute_kernel 5 | 6 | class VideoDatasetTest(unittest.TestCase): 7 | def test_add_kernel(self): 8 | kernel = _compute_kernel(50, 25) 9 | target = np.zeros((120, 160), dtype=np.float32) 10 | StereoVideoDataset.kernel = kernel 11 | StereoVideoDataset.kernel_center = 25 12 | StereoVideoDataset.kernel_size = 50 13 | StereoVideoDataset._add_kernel(target, np.array([[80., 60.]])) 14 | self.assertEqual(target.max(), kernel[25, 25]) 15 | self.assertEqual(target[60, 80], target.max()) 16 | 17 | target = np.zeros((120, 160), dtype=np.float32) 18 | StereoVideoDataset._add_kernel(target, np.array([[1., 1.]])) 19 | self.assertEqual(target.max(), kernel[25, 25]) 20 | self.assertEqual(target[1, 1], target.max()) 21 | self.assertGreater(target.max(), 1e-3) 22 | 23 | # Past the end along x-axis. 24 | target = np.zeros((120, 160), dtype=np.float32) 25 | StereoVideoDataset._add_kernel(target, np.array([[165., 60.]])) 26 | self.assertNotEqual(target.max(), kernel[25, 25]) 27 | self.assertEqual(target[60, 159], target.max()) 28 | 29 | # Past end along both axes 30 | target = np.zeros((120, 160), dtype=np.float32) 31 | StereoVideoDataset._add_kernel(target, np.array([[165., 130.]])) 32 | self.assertEqual(target[119, 159], target.max()) 33 | 34 | # Before beginning. 35 | target = np.zeros((120, 160), dtype=np.float32) 36 | StereoVideoDataset._add_kernel(target, np.array([[-10., -130.]])) 37 | self.assertEqual(target[0, 1], target.max()) 38 | 39 | target = np.zeros((720, 1280), dtype=np.float32) 40 | StereoVideoDataset.kernel_size = 50 41 | StereoVideoDataset.kernel_center = 25 42 | StereoVideoDataset.width = 1280 43 | StereoVideoDataset.height = 720 44 | StereoVideoDataset._add_kernel(target, np.array([[456.02, 34.744]])) 45 | self.assertGreater(target.max(), 1e-3) 46 | 47 | target = np.zeros((360, 640), dtype=np.float32) 48 | StereoVideoDataset._add_kernel(target, np.array([[353.5, 153.8]])) 49 | self.assertEqual(target[154, 354], target.max()) 50 | 51 | 52 | 53 | if __name__ == "__main__": 54 | unittest.main() 55 | -------------------------------------------------------------------------------- /test/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethz-asl/object_keypoints/cc5f386fd5699bfb1ff1687910e212773c2d6ecb/test/utils/__init__.py -------------------------------------------------------------------------------- /test/utils/test_ros.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | import rospy 4 | import rostest 5 | import numpy as np 6 | from scipy.spatial.transform import Rotation 7 | from perception.utils import ros as ros_utils 8 | 9 | class RosUtilsTestCase(unittest.TestCase): 10 | @classmethod 11 | def setUpClass(cls): 12 | cls.node = rospy.init_node('test_ros_utils') 13 | 14 | def test_identity(self): 15 | T = np.eye(4) 16 | message = ros_utils.transform_to_message(T, 'parent', 'child', rospy.Time.now()) 17 | T_out = ros_utils.message_to_transform(message) 18 | np.testing.assert_allclose(T_out, T) 19 | 20 | def test_random_rotation(self): 21 | T = np.eye(4) 22 | T[:3, :3] = Rotation.random().as_matrix() 23 | message = ros_utils.transform_to_message(T, 'parent', 'child', rospy.Time.now()) 24 | T_out = ros_utils.message_to_transform(message) 25 | np.testing.assert_allclose(T_out, T) 26 | 27 | def test_random_rotation_with_translation(self): 28 | T = np.eye(4) 29 | T[:3, :3] = Rotation.random().as_matrix() 30 | T[:3, 3] = np.random.uniform(-1, 1, size=3) 31 | message = ros_utils.transform_to_message(T, 'parent', 'child', rospy.Time.now()) 32 | T_out = ros_utils.message_to_transform(message) 33 | np.testing.assert_allclose(T_out, T) 34 | 35 | 36 | if __name__ == "__main__": 37 | unittest.main() 38 | 39 | --------------------------------------------------------------------------------