├── .gitignore ├── LICENSE ├── README.md ├── aruco ├── 99-webcam.rules ├── README.md ├── calibrate_charuco.py ├── camera-params │ └── E4298F4E.yml ├── create_charuco_board.py ├── create_markers.py ├── create_trash_receptacle.py ├── detect_markers.py ├── images │ ├── board-corner-side.jpg │ ├── board-corners.jpg │ ├── cube.jpg │ ├── robot-side.jpg │ └── robot-top.jpg ├── printouts │ ├── charuco-board.pdf │ ├── markers-corners.pdf │ ├── markers-cubes.pdf │ ├── markers-robots.pdf │ └── trash-receptacle.pdf ├── server.py ├── simple_client.py └── utils.py ├── assets ├── corner.obj ├── plane.obj.template ├── plane.urdf.template └── robot.urdf ├── config ├── experiments │ ├── base │ │ ├── large_columns.yml │ │ ├── large_divider.yml │ │ ├── small_columns.yml │ │ └── small_empty.yml │ ├── fixed-step-size │ │ ├── large_columns-fixed_step_size.yml │ │ ├── large_divider-fixed_step_size.yml │ │ ├── small_columns-fixed_step_size.yml │ │ └── small_empty-fixed_step_size.yml │ ├── no-partial-rewards │ │ ├── large_columns-no_partial_rewards.yml │ │ ├── large_divider-no_partial_rewards.yml │ │ ├── small_columns-no_partial_rewards.yml │ │ └── small_empty-no_partial_rewards.yml │ ├── shortest-path-ablations │ │ ├── large_columns-no_sp_components.yml │ │ ├── large_columns-no_sp_from_agent.yml │ │ ├── large_columns-no_sp_in_rewards.yml │ │ ├── large_columns-no_sp_movement.yml │ │ ├── large_columns-no_sp_to_receptacle.yml │ │ ├── large_divider-no_sp_components.yml │ │ ├── large_divider-no_sp_from_agent.yml │ │ ├── large_divider-no_sp_in_rewards.yml │ │ ├── large_divider-no_sp_movement.yml │ │ ├── large_divider-no_sp_to_receptacle.yml │ │ ├── small_columns-no_sp_components.yml │ │ ├── small_columns-no_sp_from_agent.yml │ │ ├── small_columns-no_sp_in_rewards.yml │ │ ├── small_columns-no_sp_movement.yml │ │ ├── small_columns-no_sp_to_receptacle.yml │ │ ├── small_empty-no_sp_components.yml │ │ └── small_empty-no_sp_from_agent.yml │ └── steering-commands │ │ ├── large_columns-steering_commands-no_sp_components.yml │ │ ├── large_columns-steering_commands.yml │ │ ├── large_divider-steering_commands-no_sp_components.yml │ │ ├── large_divider-steering_commands.yml │ │ ├── small_columns-steering_commands-no_sp_components.yml │ │ ├── small_columns-steering_commands.yml │ │ ├── small_empty-steering_commands-no_sp_components.yml │ │ └── small_empty-steering_commands.yml ├── local │ └── small_empty-local.yml └── templates │ └── small_empty.yml ├── download-pretrained.sh ├── enjoy.py ├── environment.py ├── eval_summary.ipynb ├── evaluate.py ├── models.py ├── policies.py ├── requirements.txt ├── resnet.py ├── spfa ├── .gitignore ├── demo.py ├── setup.py └── src │ └── main.cpp ├── stl ├── blade.stl ├── board-corner.stl └── cube.stl ├── tools_click_agent.py ├── tools_generate_experiments.py ├── train.py ├── utils.py ├── vector_action_executor.py ├── vector_click_agent.py ├── vector_controller.py ├── vector_enjoy.py ├── vector_keep_still.py ├── vector_keyboard_controller.py ├── vector_run_mdns.py └── vector_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | __pycache__ 3 | .ipynb_checkpoints 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Jimmy Wu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # spatial-action-maps 2 | 3 | **Update:** Please see our new [spatial-intention-maps](https://github.com/jimmyyhwu/spatial-intention-maps) repository, which extends this work to multi-agent settings. It contains many new improvements to the codebase, and while the focus is on multi-agent, it also supports single-agent training. 4 | 5 | --- 6 | 7 | This code release accompanies the following paper: 8 | 9 | ### Spatial Action Maps for Mobile Manipulation 10 | 11 | Jimmy Wu, Xingyuan Sun, Andy Zeng, Shuran Song, Johnny Lee, Szymon Rusinkiewicz, Thomas Funkhouser 12 | 13 | *Robotics: Science and Systems (RSS)*, 2020 14 | 15 | [Project Page](https://spatial-action-maps.cs.princeton.edu) | [PDF](https://spatial-action-maps.cs.princeton.edu/paper.pdf) | [arXiv](https://arxiv.org/abs/2004.09141) | [Video](https://youtu.be/FcbIcU_VnzU) 16 | 17 | **Abstract:** Typical end-to-end formulations for learning robotic navigation involve predicting a small set of steering command actions (e.g., step forward, turn left, turn right, etc.) from images of the current state (e.g., a bird's-eye view of a SLAM reconstruction). Instead, we show that it can be advantageous to learn with dense action representations defined in the same domain as the state. In this work, we present "spatial action maps," in which the set of possible actions is represented by a pixel map (aligned with the input image of the current state), where each pixel represents a local navigational endpoint at the corresponding scene location. Using ConvNets to infer spatial action maps from state images, action predictions are thereby spatially anchored on local visual features in the scene, enabling significantly faster learning of complex behaviors for mobile manipulation tasks with reinforcement learning. In our experiments, we task a robot with pushing objects to a goal location, and find that policies learned with spatial action maps achieve much better performance than traditional alternatives. 18 | 19 | ![](https://user-images.githubusercontent.com/6546428/91783724-e44bc400-ebb5-11ea-9b98-bfd90e52a82a.gif) | ![](https://user-images.githubusercontent.com/6546428/91783722-e44bc400-ebb5-11ea-902e-0b0468129231.gif) | ![](https://user-images.githubusercontent.com/6546428/91783719-e31a9700-ebb5-11ea-955f-2fa18a0508c1.gif) | ![](https://user-images.githubusercontent.com/6546428/91783721-e3b32d80-ebb5-11ea-82f5-6711e2e79fa6.gif) 20 | :---: | :---: | :---: | :---: 21 | 22 | ![](https://user-images.githubusercontent.com/6546428/91783715-e1e96a00-ebb5-11ea-85fb-cb9e75f7f5a6.gif) | ![](https://user-images.githubusercontent.com/6546428/91783711-df871000-ebb5-11ea-9e6e-4a09d545b508.gif) | ![](https://user-images.githubusercontent.com/6546428/91783716-e2820080-ebb5-11ea-944c-83e49d16d9bc.gif) 23 | :---: | :---: | :---: 24 | 25 | ## Installation 26 | 27 | We recommend using a [`conda`](https://docs.conda.io/en/latest/miniconda.html) environment for this codebase. The following commands will set up a new conda environment with the correct requirements (tested on Ubuntu 18.04.3 LTS): 28 | 29 | ```bash 30 | # Create and activate new conda env 31 | conda create -y -n my-conda-env python=3.7 32 | conda activate my-conda-env 33 | 34 | # Install pytorch and numpy 35 | conda install -y pytorch==1.2.0 torchvision==0.4.0 cudatoolkit=10.0 -c pytorch 36 | conda install -y numpy=1.17.3 37 | 38 | # Install pip requirements 39 | pip install -r requirements.txt 40 | 41 | # Install shortest path module (used in simulation environment) 42 | cd spfa 43 | python setup.py install 44 | ``` 45 | 46 | ## Quickstart 47 | 48 | We provide four pretrained policies, one for each test environment. Use `download-pretrained.sh` to download them: 49 | 50 | ```bash 51 | ./download-pretrained.sh 52 | ``` 53 | 54 | You can then use `enjoy.py` to run a trained policy in the simulation environment. 55 | 56 | For example, to load the pretrained policy for `SmallEmpty`, you can run: 57 | 58 | ```bash 59 | python enjoy.py --config-path logs/20200125T213536-small_empty/config.yml 60 | ``` 61 | 62 | You can also run `enjoy.py` without specifying a config path, and it will find all policies in the `logs` directory and allow you to pick one to run: 63 | 64 | ```bash 65 | python enjoy.py 66 | ``` 67 | 68 | ## Training in the Simulation Environment 69 | 70 | The [`config/experiments`](config/experiments) directory contains template config files for all experiments in the paper. To start a training run, you can give one of the template config files to the `train.py` script. For example, the following will train a policy on the `SmallEmpty` environment: 71 | 72 | ``` 73 | python train.py config/experiments/base/small_empty.yml 74 | ``` 75 | 76 | The training script will create a log directory and checkpoint directory for the new training run inside `logs/` and `checkpoints/`, respectively. Inside the log directory, it will also create a new config file called `config.yml`, which stores training run config variables and can be used to resume training or to load a trained policy for evaluation. 77 | 78 | ### Simulation Environment 79 | 80 | To explore the simulation environment using our proposed dense action space (spatial action maps), you can use the `tools_click_agent.py` script, which will allow you to click on the local overhead map to select actions and move around in the environment. 81 | 82 | ```bash 83 | python tools_click_agent.py 84 | ``` 85 | 86 | ### Evaluation 87 | 88 | Trained policies can be evaluated using the `evaluate.py` script, which takes in the config path for the training run. For example, to evaluate the `SmallEmpty` pretrained policy, you can run: 89 | 90 | ``` 91 | python evaluate.py --config-path logs/20200125T213536-small_empty/config.yml 92 | ``` 93 | 94 | This will load the trained policy from the specified training run, and run evaluation on it. The results are saved to an `.npy` file in the `eval` directory. You can then run `jupyter notebook` and navigate to [`eval_summary.ipynb`](eval_summary.ipynb) to load the `.npy` files and generate tables and plots of the results. 95 | 96 | ## Running in the Real Environment 97 | 98 | We train policies in simulation and run them directly on the real robot by mirroring the real environment inside the simulation. To do this, we first use [ArUco](https://docs.opencv.org/4.1.2/d5/dae/tutorial_aruco_detection.html) markers to estimate 2D poses of robots and cubes in the real environment, and then use the estimated poses to update the simulation. Note that setting up the real environment, particularly the marker pose estimation, can take a fair amount of time and effort. 99 | 100 | ### Vector SDK Setup 101 | 102 | If you previously ran `pip install -r requirements.txt` following the installation instructions above, the `anki_vector` library should already be installed. Run the following command to set up each robot you plan to use: 103 | 104 | ```bash 105 | python -m anki_vector.configure 106 | ``` 107 | 108 | After the setup is complete, you can open the Vector config file located at `~/.anki_vector/sdk_config.ini` to verify that all of your robots are present. 109 | 110 | You can also run some of the [official examples](https://developer.anki.com/vector/docs/downloads.html#sdk-examples) to verify that the setup procedure worked. For further reference, please see the [Vector SDK documentation](https://developer.anki.com/vector/docs/index.html). 111 | 112 | ### Connecting to the Vector 113 | 114 | The following command will try to connect to all the robots in your Vector config file and keep them still. It will print out a message for each robot it successfully connects to, and can be used to verify that the Vector SDK can connect to all of your robots. 115 | 116 | ```bash 117 | python vector_keep_still.py 118 | ``` 119 | 120 | **Note:** If you get the following error, you will need to make a small fix to the `anki_vector` library. 121 | 122 | ``` 123 | AttributeError: module 'anki_vector.connection' has no attribute 'CONTROL_PRIORITY_LEVEL' 124 | ``` 125 | 126 | Locate the `anki_vector/behavior.py` file inside your installed conda libraries. The full path should be in the error message. At the bottom of `anki_vector/behavior.py`, change `connection.CONTROL_PRIORITY_LEVEL.RESERVE_CONTROL` to `connection.ControlPriorityLevel.RESERVE_CONTROL`. 127 | 128 | --- 129 | 130 | Sometimes the IP addresses of your robots will change. To update the Vector config file with the new IP addresses, you can run the following command: 131 | 132 | ```bash 133 | python vector_run_mdns.py 134 | ``` 135 | 136 | The script uses mDNS to find all Vector robots on the local network, and will automatically update their IP addresses in the Vector config file. It will also print out the hostname, IP address, and MAC address of every robot found. Make sure `zeroconf` is installed (`pip install zeroconf`) or mDNS may not work well. Alternatively, you can just open the Vector config file at `~/.anki_vector/sdk_config.ini` in a text editor and manually update the IP addresses. 137 | 138 | ### Controlling the Vector 139 | 140 | The `vector_keyboard_controller.py` script is adapted from the [remote control example](https://github.com/anki/vector-python-sdk/blob/master/examples/apps/remote_control/remote_control.py) in the official SDK, and can be used to verify that you are able to control the robot using the Vector SDK. Use it as follows: 141 | 142 | ```bash 143 | python vector_keyboard_controller.py --robot-index ROBOT_INDEX 144 | ``` 145 | 146 | The `--robot-index` argument specifies the robot you wish to control and refers to the index of the robot in the Vector config file (`~/.anki_vector/sdk_config.ini`). If no robot index is specified, the script will check all robots in the Vector config file and select the first robot it is able to connect to. 147 | 148 | ### 3D Printed Parts 149 | 150 | The real environment setup contains some 3D printed parts. We used the [Sindoh 3DWOX 1](https://www.amazon.com/Sindoh-3DWOX-Printer-New-Model/dp/B07C79C9RB) 3D printer to print them, but other printers should work too. We used PLA filament. All 3D model files are in the [`stl`](stl) directory: 151 | * `cube.stl`: 3D model for the cubes (objects) 152 | * `blade.stl`: 3D model for the bulldozer blade attached to the front of the robot 153 | * `board-corner.stl`: 3D model for the board corners, which are used for pose estimation with ArUco markers 154 | 155 | ### Running Trained Policies on the Real Robot 156 | 157 | First see the [`aruco`](aruco) directory for instructions on setting up pose estimation with ArUco markers. 158 | 159 | Once the setup is completed, make sure the pose estimation server is started before proceeding: 160 | 161 | ```bash 162 | cd aruco 163 | python server.py 164 | ``` 165 | 166 | --- 167 | 168 | The `vector_click_agent.py` script is analogous to `tools_click_agent.py`, and allows you to click on the local overhead map to control the real robot. The script is also useful for verifying that all components of the real environment setup are working correctly, including pose estimation and robot control. The simulation environment should mirror the real setup with millimeter-level precision. You can start it using the following command: 169 | 170 | ```bash 171 | python vector_click_agent.py --robot-index ROBOT_INDEX 172 | ``` 173 | 174 | If the poses in the simulation do not look correct, you can restart the pose estimation server with the `--debug` flag to enable debug visualizations: 175 | 176 | ```bash 177 | cd aruco 178 | python server.py --debug 179 | ``` 180 | 181 | --- 182 | 183 | Once you have verified that manual control with `vector_click_agent.py` works, you can then run a trained policy using the `vector_enjoy.py` script. For example, to load the `SmallEmpty` pretrained policy, you can run: 184 | 185 | ```bash 186 | python vector_enjoy.py --robot-index ROBOT_INDEX --config-path logs/20200125T213536-small_empty/config.yml 187 | ``` 188 | 189 | ## Citation 190 | 191 | If you find this work useful for your research, please consider citing: 192 | 193 | ``` 194 | @inproceedings{wu2020spatial, 195 | title = {Spatial Action Maps for Mobile Manipulation}, 196 | author = {Wu, Jimmy and Sun, Xingyuan and Zeng, Andy and Song, Shuran and Lee, Johnny and Rusinkiewicz, Szymon and Funkhouser, Thomas}, 197 | booktitle = {Proceedings of Robotics: Science and Systems (RSS)}, 198 | year = {2020} 199 | } 200 | ``` 201 | -------------------------------------------------------------------------------- /aruco/99-webcam.rules: -------------------------------------------------------------------------------- 1 | # Logitech C930e 2 | SUBSYSTEMS=="usb", ATTRS{idVendor}=="046d", ATTRS{idProduct}=="0843", MODE="666" 3 | -------------------------------------------------------------------------------- /aruco/README.md: -------------------------------------------------------------------------------- 1 | # Pose Estimation with ArUco Markers 2 | 3 | This directory contains code for setting up a pose estimation server. Using an overhead camera, we detect ArUco markers installed to robots and cubes in the environment to get real-time 2D poses with millimeter-level precision. 4 | 5 | ## Camera Setup 6 | 7 | Please see the main [`README.md`](../README.md) for requirements that should be installed via `pip`. 8 | 9 | We use a [Logitech C930e webcam](https://www.amazon.com/Logitech-C930e-1080P-Video-Webcam/dp/B00CRJWW2G) as our overhead camera. While other cameras may work too, our setup is based on this particular camera. If you are using other cameras, the size of the environment may need to be modified to ensure reliable marker detection, especially if the camera FOV is low (the C930e has 90-degree FOV) or the camera image quality is worse. Also note that while the C930e supports 1080p, we run it at 720p. We found that when using 1080p, the latency would be too large, causing the robot controller to overshoot. 10 | 11 | --- 12 | 13 | The code uses the serial number of your camera to use as an identifier when saving and loading camera parameters. If it cannot get the serial number, the default identifier of `'unknown-camera'` is used. You can run the following command (in this directory) to check whether it can find your camera's serial number, replacing `logitech-c930e` with your camera's name: 14 | 15 | ```bash 16 | python -c "import utils; print(utils.get_camera_identifier('logitech-c930e'))" 17 | ``` 18 | 19 | If you are using a camera that is not the C930e, you can modify `get_usb_device_serial` in `utils.py` to add your camera. 20 | 21 | Additionally, on Ubuntu 18, the following steps may be necessary to enable `pyusb` to access your camera's serial number. Be sure to change the `idVendor` and `idProduct` attributes in `99-webcam.rules` to match those of your camera. 22 | 23 | ```bash 24 | sudo cp 99-webcam.rules /etc/udev/rules.d/99-webcam.rules 25 | sudo udevadm control --reload-rules 26 | sudo service udev restart 27 | sudo udevadm trigger 28 | ``` 29 | 30 | ## Camera Calibration 31 | 32 | The camera needs to be calibrated for accurate marker detection. We use the ChArUco board to calibrate our camera. You can generate a printable ChArUco board using the following command, which output a PDF file to [`printouts/charuco-board.pdf`](printouts/charuco-board.pdf). 33 | 34 | ```bash 35 | python create_charuco_board.py 36 | ``` 37 | 38 | After printing, be sure to check that the squares are actually 24 mm wide, as some printers will rescale the document before printing. 39 | 40 | --- 41 | 42 | To do the calibration, we provide a calibration script based on the [official example](https://github.com/opencv/opencv_contrib/blob/master/modules/aruco/samples/calibrate_camera_charuco.cpp). Run the following command, with additional arguments if you are not using the C930e camera. Try to cover a wide variety of viewpoints. The board should also be kept as flat as possible (by using a clipboard, for example). 43 | 44 | ``` 45 | python calibrate_charuco.py 46 | ``` 47 | 48 | After calibration, the computed camera parameters will be written to a config file located at `camera-params/your-camera-identifier.yml`. 49 | 50 | For more information on camera calibration with ChArUco boards, please see the [official tutorial](https://docs.opencv.org/4.1.2/da/d13/tutorial_aruco_calibration.html). 51 | 52 | ## Marker Installation 53 | 54 | To create printable markers for the robots, cubes, and board corners, run the following command, which will generate several printable marker sheets in [`printouts`](printouts). 55 | 56 | ``` 57 | python create_markers.py 58 | ``` 59 | 60 | We printed the markers on [label paper](https://www.amazon.com/AmazonBasics-Address-Labels-Inkjet-Printers/dp/B074KQRJKN), allowing us to cut the markers out and easily install them. As with the ChArUco board, be sure to measure the printed markers in case the printer rescaled the document. The markers should be 18 mm wide. 61 | 62 | --- 63 | 64 | The `detect_markers.py` script will detect markers visible in the camera image, and is useful for verifying marker IDs and marker orientations. To use it, run the following command: 65 | 66 | ```bash 67 | python detect_markers.py 68 | ``` 69 | 70 | ### Robot Markers 71 | 72 | Since marker detection works best when the markers are installed on a flat surface, we added an [acrylic square](https://www.amazon.com/1-5mm-Clear-Miniature-Bases-Square/dp/B00MNMRFEW) for the robot markers. Marker IDs 0 through 9 are used for the robots. See photos below for placement on the robot. The markers should be parallel to the horizontal plane. 73 | 74 | ![](images/robot-top.jpg) | ![](images/robot-side.jpg) 75 | :---: | :---: 76 | 77 | ### Cube Markers 78 | 79 | Marker IDs 10 through 33 are used for the cubes. See photo below for placement. For each cube, we installed the same marker to all six sides. 80 | 81 | ![](images/cube.jpg) 82 | :---: 83 | 84 | ### Corner Markers 85 | 86 | The board corner markers are used as anchors for the corners of the room in image space. There are two sets of board corner markers, one for the cubes at height 44 mm, and one for the robots at height 70 mm. By computing the positions of the robot and cube markers in image space relative to the board corners, we can determine the 2D poses of the robots and cubes. Marker IDs 42 through 45 are used for the robot board corners, and 46 through 49 for the cube board corners. See photos below for placement. 87 | 88 | ![](images/board-corners.jpg) | ![](images/board-corner-side.jpg) 89 | :---: | :---: 90 | 91 | ## Board Setup 92 | 93 | For setting up the rest of the environment, please reference the photos and videos of the setup on the [project page](https://spatial-action-maps.cs.princeton.edu). 94 | 95 | Note that the camera must be close enough to reliably detect all 8 board corner markers. We placed our camera approximately 74.5 cm above the board. 96 | It is very important that all 8 board corner markers are reliably detected. Without accurate estimates of the board corners to anchor on, all pose estimates will be off. 97 | 98 | You can use the `detect_markers.py` script from before to check whether all of the board corners are detected: 99 | 100 | ```bash 101 | python detect_markers.py 102 | ``` 103 | 104 | ## Pose Estimation 105 | 106 | Once the board setup is complete, you can run the following command to start the pose estimation server: 107 | 108 | ```bash 109 | python server.py 110 | ``` 111 | 112 | Note that the server will not start running until a client connects. We provide `simple_client.py`, which you can use to make sure the server is outputting poses: 113 | 114 | ```bash 115 | python simple_client.py 116 | ``` 117 | 118 | If no pose estimates are showing up in the client, the `--debug` flag can passed to the server to show visualizations of the marker detections: 119 | 120 | ```bash 121 | python server.py --debug 122 | ``` 123 | 124 | At this point, if the client is showing reasonable pose estimates, then the pose estimation server should be ready for use with the rest of the robot setup. Please see the main [`README.md`](../README.md) for further details. 125 | -------------------------------------------------------------------------------- /aruco/calibrate_charuco.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/opencv/opencv_contrib/blob/master/modules/aruco/samples/calibrate_camera_charuco.cpp 2 | import argparse 3 | import sys 4 | from pathlib import Path 5 | import cv2 6 | import numpy as np 7 | import utils 8 | 9 | def main(args): 10 | params = utils.get_charuco_board_parameters() 11 | frame_width = args.frame_width 12 | frame_height = args.frame_height 13 | 14 | # Set up webcam 15 | cap = utils.get_video_cap(frame_width, frame_height, args.camera_id) 16 | 17 | # Set up aruco dict and board 18 | aruco_dict = cv2.aruco.Dictionary_get(params['dict_id']) 19 | board = cv2.aruco.CharucoBoard_create(params['squares_x'], params['squares_y'], params['square_length'], params['marker_length'], aruco_dict) 20 | 21 | # Enable corner refinement 22 | detector_params = cv2.aruco.DetectorParameters_create() 23 | detector_params.cornerRefinementMethod = cv2.aruco.CORNER_REFINE_SUBPIX 24 | 25 | # Capture images 26 | all_corners = [] 27 | all_ids = [] 28 | all_imgs = [] 29 | while True: 30 | _, image = cap.read() 31 | if image is None: 32 | continue 33 | 34 | # Detect markers 35 | corners, ids, rejected = cv2.aruco.detectMarkers(image, aruco_dict, parameters=detector_params) 36 | 37 | # Refine markers 38 | corners, ids, _, _ = cv2.aruco.refineDetectedMarkers(image, board, corners, ids, rejected) 39 | 40 | # Interpolate corners 41 | if ids is not None: 42 | _, curr_charuco_corners, curr_charuco_ids = cv2.aruco.interpolateCornersCharuco(corners, ids, image, board) 43 | 44 | # Draw results 45 | image_copy = image.copy() 46 | if ids is not None: 47 | cv2.aruco.drawDetectedMarkers(image_copy, corners) 48 | 49 | if curr_charuco_corners is not None: 50 | cv2.aruco.drawDetectedCornersCharuco(image_copy, curr_charuco_corners, curr_charuco_ids) 51 | 52 | # Display and wait for keyboard input 53 | cv2.putText(image_copy, "Press 'c' to add current frame. 'ESC' to finish and calibrate", (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) 54 | cv2.imshow('out', image_copy) 55 | key = cv2.waitKey(10) & 0xFF 56 | if key == 27: 57 | break 58 | if key == ord('c') and ids is not None and len(ids) > 4: 59 | print('Frame captured') 60 | all_corners.append(corners) 61 | all_ids.append(ids) 62 | all_imgs.append(image) 63 | 64 | cap.release() 65 | cv2.destroyAllWindows() 66 | 67 | if len(all_imgs) < 1: 68 | print('Not enough captures for calibration') 69 | sys.exit() 70 | 71 | # Aruco calibration 72 | all_corners_concatenated = [] 73 | all_ids_concatenated = [] 74 | marker_counter_per_frame = [] 75 | for corners, ids in zip(all_corners, all_ids): 76 | marker_counter_per_frame.append(len(corners)) 77 | all_corners_concatenated.extend(corners) 78 | all_ids_concatenated.extend(ids) 79 | all_corners_concatenated = np.asarray(all_corners_concatenated) 80 | all_ids_concatenated = np.asarray(all_ids_concatenated) 81 | marker_counter_per_frame = np.asarray(marker_counter_per_frame) 82 | rep_error_aruco, camera_matrix, dist_coeffs, _, _ = cv2.aruco.calibrateCameraAruco(all_corners_concatenated, all_ids_concatenated, marker_counter_per_frame, board, (frame_width, frame_height), None, None) 83 | 84 | # Charuco calibration using previous camera parameters 85 | all_charuco_corners = [] 86 | all_charuco_ids = [] 87 | for corners, ids, image in zip(all_corners, all_ids, all_imgs): 88 | _, curr_charuco_corners, curr_charuco_ids = cv2.aruco.interpolateCornersCharuco(corners, ids, image, board, cameraMatrix=camera_matrix, distCoeffs=dist_coeffs) 89 | all_charuco_corners.append(curr_charuco_corners) 90 | all_charuco_ids.append(curr_charuco_ids) 91 | if len(all_charuco_corners) < 4: 92 | print('Not enough corners for calibration') 93 | sys.exit() 94 | rep_error, camera_matrix, dist_coeffs, _, _ = cv2.aruco.calibrateCameraCharuco(all_charuco_corners, all_charuco_ids, board, (frame_width, frame_height), camera_matrix, dist_coeffs) 95 | 96 | print('Rep Error:', rep_error) 97 | print('Rep Error Aruco:', rep_error_aruco) 98 | 99 | # Save camera parameters 100 | camera_params_file_path = utils.get_camera_params_file_path(camera_name=args.camera_name) 101 | camera_params_dir = Path(camera_params_file_path).parent 102 | if not camera_params_dir.exists(): 103 | camera_params_dir.mkdir() 104 | fs = cv2.FileStorage(camera_params_file_path, cv2.FILE_STORAGE_WRITE) 105 | fs.write('image_width', frame_width) 106 | fs.write('image_height', frame_height) 107 | fs.write('camera_matrix', camera_matrix) 108 | fs.write('distortion_coefficients', dist_coeffs) 109 | fs.write('avg_reprojection_error', rep_error) 110 | fs.release() 111 | print('Calibration saved to ', camera_params_file_path) 112 | 113 | # Show interpolated charuco corners 114 | for image, ids, charuco_corners, charuco_ids in zip(all_imgs, all_ids, all_charuco_corners, all_charuco_ids): 115 | image_copy = image.copy() 116 | if ids is not None: 117 | cv2.aruco.drawDetectedCornersCharuco(image_copy, charuco_corners, charuco_ids) 118 | cv2.imshow('out', image_copy) 119 | key = cv2.waitKey(0) 120 | if key == 27: # Esc key 121 | break 122 | 123 | parser = argparse.ArgumentParser() 124 | parser.add_argument('--camera-id', type=int, default=0) 125 | parser.add_argument('--camera-name', default='logitech-c930e') 126 | parser.add_argument('--frame-width', type=int, default=1280) 127 | parser.add_argument('--frame-height', type=int, default=720) 128 | main(parser.parse_args()) 129 | -------------------------------------------------------------------------------- /aruco/camera-params/E4298F4E.yml: -------------------------------------------------------------------------------- 1 | %YAML:1.0 2 | --- 3 | image_width: 1280 4 | image_height: 720 5 | camera_matrix: !!opencv-matrix 6 | rows: 3 7 | cols: 3 8 | dt: d 9 | data: [ 7.6411985579711256e+02, 0., 5.9815734338115954e+02, 0., 10 | 7.6637366685905761e+02, 3.8239286574063226e+02, 0., 0., 1. ] 11 | distortion_coefficients: !!opencv-matrix 12 | rows: 1 13 | cols: 5 14 | dt: d 15 | data: [ 9.5139551141195544e-02, -2.1833317087727447e-01, 16 | 4.9174824640267026e-04, -2.3096388891734022e-03, 17 | 9.2476783140257507e-02 ] 18 | avg_reprojection_error: 1.1898543183300678e-01 19 | -------------------------------------------------------------------------------- /aruco/create_charuco_board.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | from pathlib import Path 3 | import cv2 4 | from fpdf import FPDF 5 | from PIL import Image 6 | import utils 7 | 8 | output_dir = 'printouts' 9 | pdf_name = 'charuco-board.pdf' 10 | orientation = 'L' 11 | 12 | # Get params 13 | board_params = utils.get_charuco_board_parameters() 14 | paper_params = utils.get_paper_parameters(orientation) 15 | 16 | # Compute board size in pixels 17 | board_length_pixels = board_params['squares_x'] * board_params['square_length_pixels'] 18 | board_width_pixels = board_params['squares_y'] * board_params['square_length_pixels'] 19 | 20 | # Compute board size in mm 21 | board_length_mm = 1000 * board_params['squares_x'] * board_params['square_length'] 22 | board_width_mm = 1000 * board_params['squares_y'] * board_params['square_length'] 23 | 24 | # Create board image 25 | board = cv2.aruco.CharucoBoard_create( 26 | board_params['squares_x'], board_params['squares_y'], 27 | board_params['square_length'], board_params['marker_length'], 28 | cv2.aruco.Dictionary_get(board_params['dict_id']) 29 | ) 30 | scale_factor = (board_length_mm / paper_params['mm_per_in']) * paper_params['ppi'] / board_length_pixels 31 | board_image = board.draw((int(scale_factor * board_length_pixels), int(scale_factor * board_width_pixels))) 32 | 33 | # Create PDF 34 | pdf = FPDF(orientation, 'mm', 'letter') 35 | pdf.add_page() 36 | with tempfile.TemporaryDirectory() as tmp_dir_name: 37 | image_path = str(Path(tmp_dir_name) / 'board.png') 38 | Image.fromarray(board_image).save(image_path) 39 | pdf.image(image_path, x=(paper_params['width_mm'] - board_length_mm) / 2, y=(paper_params['height_mm'] - board_width_mm) / 2, w=board_length_mm, h=board_width_mm) 40 | 41 | # Save PDF 42 | output_dir = Path(output_dir) 43 | if not output_dir.exists(): 44 | output_dir.mkdir(parents=True) 45 | pdf.output(output_dir / pdf_name) 46 | -------------------------------------------------------------------------------- /aruco/create_markers.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | from pathlib import Path 3 | import cv2 4 | from fpdf import FPDF 5 | from PIL import Image 6 | import utils 7 | 8 | def create_markers(marker_type): 9 | marker_ids = utils.get_marker_ids(marker_type) 10 | if marker_type == 'robots': 11 | marker_ids = 5 * marker_ids + marker_ids[:4] 12 | elif marker_type == 'cubes': 13 | marker_ids = [marker_id for marker_id in marker_ids[:8] for _ in range(6)] 14 | elif marker_type == 'corners': 15 | marker_ids = 7 * marker_ids 16 | 17 | output_dir = 'printouts' 18 | pdf_name = 'markers-{}.pdf'.format(marker_type) 19 | orientation = 'P' 20 | sticker_padding_mm = 3 21 | 22 | marker_params = utils.get_marker_parameters() 23 | paper_params = utils.get_paper_parameters(orientation) 24 | 25 | marker_length_mm = 1000 * marker_params['marker_length'] 26 | scale_factor = (marker_length_mm / paper_params['mm_per_in']) * paper_params['ppi'] / marker_params['marker_length_pixels'] 27 | sticker_length_mm = marker_params['sticker_length_mm'][marker_type] 28 | stickers_per_row = int((paper_params['width_mm'] - 2 * paper_params['padding_mm']) / (sticker_length_mm + sticker_padding_mm)) 29 | aruco_dict = cv2.aruco.Dictionary_get(marker_params['dict_id']) 30 | 31 | # Create PDF 32 | pdf = FPDF(orientation, 'mm', 'letter') 33 | pdf.add_page() 34 | with tempfile.TemporaryDirectory() as tmp_dir_name: 35 | for i, marker_id in enumerate(marker_ids): 36 | image_path = str(Path(tmp_dir_name) / '{}.png'.format(marker_id)) 37 | Image.fromarray(cv2.aruco.drawMarker(aruco_dict, marker_id, int(scale_factor * marker_params['marker_length_pixels']))).save(image_path) 38 | center_x = (sticker_length_mm + sticker_padding_mm) * (i % stickers_per_row + 1) 39 | center_y = (sticker_length_mm + sticker_padding_mm) * (i // stickers_per_row + 1) 40 | pdf.rect( 41 | x=(center_x - sticker_length_mm / 2 - pdf.line_width / 2), 42 | y=(center_y - sticker_length_mm / 2 - pdf.line_width / 2), 43 | w=(sticker_length_mm + pdf.line_width), 44 | h=(sticker_length_mm + pdf.line_width)) 45 | pdf.image(image_path, x=(center_x - marker_length_mm / 2), y=(center_y - marker_length_mm / 2), w=marker_length_mm, h=marker_length_mm) 46 | 47 | # Save PDF 48 | output_dir = Path(output_dir) 49 | if not output_dir.exists(): 50 | output_dir.mkdir(parents=True) 51 | pdf.output(output_dir / pdf_name) 52 | 53 | if __name__ == '__main__': 54 | create_markers('robots') 55 | create_markers('cubes') 56 | create_markers('corners') 57 | -------------------------------------------------------------------------------- /aruco/create_trash_receptacle.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | from pathlib import Path 3 | import cv2 4 | from fpdf import FPDF 5 | from PIL import Image 6 | import utils 7 | 8 | marker_ids = list(range(34, 38)) 9 | receptacle_width_mm = 150 10 | padding_mm = 4 11 | line_width = 5 12 | dash_length = 10.15 13 | output_dir = 'printouts' 14 | pdf_name = 'trash-receptacle.pdf' 15 | orientation = 'L' 16 | 17 | marker_params = utils.get_marker_parameters() 18 | paper_params = utils.get_paper_parameters(orientation) 19 | 20 | marker_length_mm = 1000 * marker_params['marker_length'] 21 | scale_factor = (marker_length_mm / paper_params['mm_per_in']) * paper_params['ppi'] / marker_params['marker_length_pixels'] 22 | aruco_dict = cv2.aruco.Dictionary_get(marker_params['dict_id']) 23 | 24 | # Create PDF 25 | pdf = FPDF(orientation, 'mm', 'letter') 26 | pdf.add_page() 27 | pdf.set_line_width(line_width) 28 | with tempfile.TemporaryDirectory() as tmp_dir_name: 29 | for marker_id, (corner_x, corner_y) in zip(marker_ids, [(-1, -1), (1, -1), (1, 1), (-1, 1)]): 30 | image_path = str(Path(tmp_dir_name) / '{}.png'.format(marker_id)) 31 | Image.fromarray(cv2.aruco.drawMarker(aruco_dict, marker_id, int(scale_factor * marker_params['marker_length_pixels']))).save(image_path) 32 | x = paper_params['width_mm'] / 2 + corner_x * receptacle_width_mm / 2 - (corner_x == 1) * marker_length_mm 33 | y = paper_params['height_mm'] / 2 + corner_y * receptacle_width_mm / 2 - (corner_y == 1) * marker_length_mm 34 | pdf.image(image_path, x=x, y=y, w=marker_length_mm, h=marker_length_mm) 35 | offset1 = receptacle_width_mm / 2 - marker_length_mm - padding_mm - line_width / 2 36 | offset2 = receptacle_width_mm / 2 - line_width / 2 37 | for (x1, x2, y1, y2) in [ 38 | (-offset1, offset1, -offset2, -offset2), 39 | (offset2, offset2, -offset1, offset1), 40 | (offset1, -offset1, offset2, offset2), 41 | (-offset2, -offset2, offset1, -offset1), 42 | ]: 43 | pdf.dashed_line( 44 | paper_params['width_mm'] / 2 + x1, paper_params['height_mm'] / 2 + y1, 45 | paper_params['width_mm'] / 2 + x2, paper_params['height_mm'] / 2 + y2, 46 | dash_length=dash_length, space_length=dash_length + 2 * line_width 47 | ) 48 | 49 | # Save PDF 50 | output_dir = Path(output_dir) 51 | if not output_dir.exists(): 52 | output_dir.mkdir(parents=True) 53 | pdf.output(output_dir / pdf_name) 54 | -------------------------------------------------------------------------------- /aruco/detect_markers.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/opencv/opencv_contrib/blob/master/modules/aruco/samples/detect_markers.cpp 2 | import argparse 3 | import cv2 4 | import utils 5 | 6 | def main(args): 7 | # Read camera parameters 8 | camera_params_file_path = utils.get_camera_params_file_path(args.camera_name) 9 | image_width, image_height, camera_matrix, dist_coeffs = utils.get_camera_params(camera_params_file_path) 10 | 11 | # Set up webcam 12 | cap = utils.get_video_cap(image_width, image_height, args.camera_id) 13 | 14 | # Set up aruco dict 15 | params = utils.get_marker_parameters() 16 | aruco_dict = cv2.aruco.Dictionary_get(params['dict_id']) 17 | 18 | # Enable corner refinement 19 | #detector_params = cv2.aruco.DetectorParameters_create() 20 | #detector_params.cornerRefinementMethod = cv2.aruco.CORNER_REFINE_SUBPIX 21 | 22 | while True: 23 | if cv2.waitKey(1) == 27: # Esc key 24 | break 25 | 26 | _, image = cap.read() 27 | if image is None: 28 | continue 29 | 30 | # Undistort image and detect markers 31 | image = cv2.undistort(image, camera_matrix, dist_coeffs) 32 | #corners, ids, _ = cv2.aruco.detectMarkers(image, aruco_dict, parameters=detector_params) 33 | corners, ids, _ = cv2.aruco.detectMarkers(image, aruco_dict) 34 | 35 | # Show detections 36 | image_copy = image.copy() 37 | if ids is not None: 38 | cv2.aruco.drawDetectedMarkers(image_copy, corners, ids) 39 | cv2.imshow('out', image_copy) 40 | 41 | cap.release() 42 | cv2.destroyAllWindows() 43 | 44 | parser = argparse.ArgumentParser() 45 | parser.add_argument('--camera-id', type=int, default=0) 46 | parser.add_argument('--camera-name', default='logitech-c930e') 47 | main(parser.parse_args()) 48 | -------------------------------------------------------------------------------- /aruco/images/board-corner-side.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimmyyhwu/spatial-action-maps/a39ff992eb29f37e1a05e2976ac8c73157bd8ab2/aruco/images/board-corner-side.jpg -------------------------------------------------------------------------------- /aruco/images/board-corners.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimmyyhwu/spatial-action-maps/a39ff992eb29f37e1a05e2976ac8c73157bd8ab2/aruco/images/board-corners.jpg -------------------------------------------------------------------------------- /aruco/images/cube.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimmyyhwu/spatial-action-maps/a39ff992eb29f37e1a05e2976ac8c73157bd8ab2/aruco/images/cube.jpg -------------------------------------------------------------------------------- /aruco/images/robot-side.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimmyyhwu/spatial-action-maps/a39ff992eb29f37e1a05e2976ac8c73157bd8ab2/aruco/images/robot-side.jpg -------------------------------------------------------------------------------- /aruco/images/robot-top.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimmyyhwu/spatial-action-maps/a39ff992eb29f37e1a05e2976ac8c73157bd8ab2/aruco/images/robot-top.jpg -------------------------------------------------------------------------------- /aruco/printouts/charuco-board.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimmyyhwu/spatial-action-maps/a39ff992eb29f37e1a05e2976ac8c73157bd8ab2/aruco/printouts/charuco-board.pdf -------------------------------------------------------------------------------- /aruco/printouts/markers-corners.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimmyyhwu/spatial-action-maps/a39ff992eb29f37e1a05e2976ac8c73157bd8ab2/aruco/printouts/markers-corners.pdf -------------------------------------------------------------------------------- /aruco/printouts/markers-cubes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimmyyhwu/spatial-action-maps/a39ff992eb29f37e1a05e2976ac8c73157bd8ab2/aruco/printouts/markers-cubes.pdf -------------------------------------------------------------------------------- /aruco/printouts/markers-robots.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimmyyhwu/spatial-action-maps/a39ff992eb29f37e1a05e2976ac8c73157bd8ab2/aruco/printouts/markers-robots.pdf -------------------------------------------------------------------------------- /aruco/printouts/trash-receptacle.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimmyyhwu/spatial-action-maps/a39ff992eb29f37e1a05e2976ac8c73157bd8ab2/aruco/printouts/trash-receptacle.pdf -------------------------------------------------------------------------------- /aruco/server.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from multiprocessing.connection import Listener 3 | import cv2 4 | import numpy as np 5 | import utils 6 | 7 | def main(args): 8 | # Read camera parameters 9 | camera_params_file_path = utils.get_camera_params_file_path(args.camera_name) 10 | image_width, image_height, camera_matrix, dist_coeffs = utils.get_camera_params(camera_params_file_path) 11 | 12 | # Set up webcam 13 | cap = utils.get_video_cap(image_width, image_height, args.camera_id) 14 | 15 | # Board and marker params 16 | boards = [{'name': 'robots', 'corner_offset_mm': 36}, {'name': 'cubes', 'corner_offset_mm': 12}] 17 | marker_params = utils.get_marker_parameters() 18 | room_length_mm = 1000 * args.room_length 19 | room_width_mm = 1000 * args.room_width 20 | room_length_pixels = int(room_length_mm * marker_params['pixels_per_mm']) 21 | room_width_pixels = int(room_width_mm * marker_params['pixels_per_mm']) 22 | 23 | # Set up aruco dicts 24 | for board in boards: 25 | aruco_dict = cv2.aruco.Dictionary_get(marker_params['dict_id']) 26 | aruco_dict.bytesList = aruco_dict.bytesList[utils.get_marker_ids('corners_{}'.format(board['name']))] 27 | board['board_dict'] = aruco_dict 28 | aruco_dict = cv2.aruco.Dictionary_get(marker_params['dict_id']) 29 | aruco_dict.bytesList = aruco_dict.bytesList[utils.get_marker_ids(board['name'])] 30 | board['marker_dict'] = aruco_dict 31 | 32 | # Board warping 33 | for board in boards: 34 | corner_offset_pixels = board['corner_offset_mm'] * marker_params['pixels_per_mm'] 35 | board['src_points'] = None 36 | board['dst_points'] = [ 37 | [-corner_offset_pixels, -corner_offset_pixels], 38 | [room_length_pixels + corner_offset_pixels, -corner_offset_pixels], 39 | [room_length_pixels + corner_offset_pixels, room_width_pixels + corner_offset_pixels], 40 | [-corner_offset_pixels, room_width_pixels + corner_offset_pixels] 41 | ] 42 | 43 | # Enable corner refinement 44 | detector_params = cv2.aruco.DetectorParameters_create() 45 | detector_params.cornerRefinementMethod = cv2.aruco.CORNER_REFINE_SUBPIX 46 | 47 | # Set up server 48 | address = ('localhost', 6000) 49 | listener = Listener(address, authkey=b'secret password') 50 | conn = None 51 | 52 | def process_image(image): 53 | # Undistort image 54 | image = cv2.undistort(image, camera_matrix, dist_coeffs) 55 | 56 | data = {} 57 | for board in boards: 58 | board_name = board['name'] 59 | data[board_name] = {} 60 | 61 | if board['src_points'] is None: 62 | # Detect board markers (assumes board won't move since this is only done once) 63 | #board_corners, board_indices, _ = cv2.aruco.detectMarkers(image, board['board_dict'], parameters=detector_params) 64 | board_corners, board_indices, _ = cv2.aruco.detectMarkers(image, board['board_dict']) 65 | 66 | # Show detections 67 | if args.debug: 68 | image_copy = image.copy() 69 | if board_indices is not None: 70 | cv2.aruco.drawDetectedMarkers(image_copy, board_corners, board_indices) 71 | cv2.imshow('{}_board_corners'.format(board_name), image_copy) 72 | 73 | # Ensure board was found 74 | if board_indices is None or len(board_indices) < 4: 75 | data[board_name] = None # None rather than {} to signify board was not detected 76 | continue 77 | 78 | board['src_points'] = [] 79 | for marker_index, corner in sorted(zip(board_indices, board_corners)): 80 | board['src_points'].append(corner.squeeze(0).mean(axis=0).tolist()) 81 | else: 82 | # Warp the board 83 | M = cv2.getPerspectiveTransform(np.asarray(board['src_points'], dtype=np.float32), np.asarray(board['dst_points'], dtype=np.float32)) 84 | warped_image = cv2.warpPerspective(image, M, (room_length_pixels, room_width_pixels)) 85 | 86 | # Detect markers in warped image 87 | corners, indices, _ = cv2.aruco.detectMarkers(warped_image, board['marker_dict'], parameters=detector_params) 88 | 89 | # Show detections 90 | if args.debug: 91 | image_copy = warped_image.copy() 92 | if indices is not None: 93 | cv2.aruco.drawDetectedMarkers(image_copy, corners, indices) 94 | image_copy = cv2.resize(image_copy, (int(image_copy.shape[1] / 2), int(image_copy.shape[0] / 2))) 95 | cv2.imshow(board_name, image_copy) 96 | 97 | if indices is None: 98 | continue 99 | 100 | # Compute poses 101 | board_data = {} 102 | for marker_index, corner in zip(indices, corners): 103 | marker_index = marker_index.item() 104 | marker_corners = corner.squeeze(0) 105 | marker_center = marker_corners.mean(axis=0) 106 | 107 | # Compute heading estimates for each of the four marker corners 108 | diffs = [c - marker_center for c in marker_corners] 109 | angles = np.array([np.arctan2(-diff[1], diff[0]) for diff in diffs]) 110 | angles = angles + np.radians([-135, -45, 45, 135]) 111 | angles = np.mod(angles + np.pi, 2 * np.pi) - np.pi 112 | 113 | # If multiple markers are detected on same cube, use the marker on top (which should have the lowest angle_std) 114 | angle_std = angles.std() 115 | if board_name == 'cubes' and marker_index in board_data and angle_std > board_data[marker_index]['angle_std']: 116 | continue 117 | 118 | # Compute position and heading 119 | position = [ 120 | (marker_center[0] / marker_params['pixels_per_mm'] - room_length_mm / 2) / 1000, 121 | (room_width_mm / 2 - (marker_center[1] / marker_params['pixels_per_mm'])) / 1000 122 | ] 123 | heading = angles.mean() 124 | marker_data = {'position': position, 'heading': heading} 125 | if board_name == 'cubes': 126 | marker_data['angle_std'] = angle_std 127 | board_data[marker_index] = marker_data 128 | data[board_name] = board_data 129 | 130 | return data 131 | 132 | while True: 133 | if cv2.waitKey(1) == 27: # Esc key 134 | break 135 | 136 | if conn is None: 137 | print('Waiting for connection...') 138 | conn = listener.accept() 139 | print('Connected!') 140 | 141 | _, image = cap.read() 142 | if image is None: 143 | continue 144 | 145 | data = process_image(image) 146 | try: 147 | conn.send(data) 148 | except: 149 | conn = None 150 | 151 | cap.release() 152 | cv2.destroyAllWindows() 153 | 154 | if __name__ == '__main__': 155 | parser = argparse.ArgumentParser() 156 | parser.add_argument('--camera-id', type=int, default=0) 157 | parser.add_argument('--camera-name', default='logitech-c930e') 158 | parser.add_argument('--room-length', type=float, default=1.0) 159 | parser.add_argument('--room-width', type=float, default=0.5) 160 | parser.add_argument('--debug', action='store_true') 161 | parser.set_defaults(debug=False) 162 | main(parser.parse_args()) 163 | -------------------------------------------------------------------------------- /aruco/simple_client.py: -------------------------------------------------------------------------------- 1 | import pprint 2 | from multiprocessing.connection import Client 3 | 4 | address = ('localhost', 6000) 5 | conn = Client(address, authkey=b'secret password') 6 | pp = pprint.PrettyPrinter(indent=4) 7 | while True: 8 | data = conn.recv() 9 | pp.pprint(data) 10 | -------------------------------------------------------------------------------- /aruco/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import cv2 3 | import usb.core 4 | 5 | ################################################################################ 6 | # Board and markers 7 | 8 | def get_marker_parameters(): 9 | params = {} 10 | params['dict_id'] = cv2.aruco.DICT_4X4_50 11 | params['marker_length'] = 0.018 # 18 mm 12 | params['marker_length_pixels'] = 6 13 | params['pixels_per_mm'] = 2 # 2 gives much better marker detections than 1 14 | params['sticker_length_mm'] = {'robots': 25, 'cubes': 28, 'corners': 24} 15 | return params 16 | 17 | def get_charuco_board_parameters(): 18 | params = get_marker_parameters() 19 | params['squares_x'] = 10 20 | params['squares_y'] = 7 21 | params['square_length'] = 0.024 # 24 mm 22 | square_length_pixels = (params['marker_length_pixels'] / params['marker_length']) * params['square_length'] 23 | assert not square_length_pixels - int(square_length_pixels) > 1e-8 24 | params['square_length_pixels'] = int(square_length_pixels) 25 | return params 26 | 27 | def get_paper_parameters(orientation='P'): 28 | width, height, padding = 8.5, 11, 0.5 29 | if orientation == 'L': 30 | width, height = height, width 31 | params = {} 32 | params['mm_per_in'] = 25.4 33 | params['width_mm'] = width * params['mm_per_in'] 34 | params['height_mm'] = height * params['mm_per_in'] 35 | params['padding_mm'] = padding * params['mm_per_in'] 36 | params['ppi'] = 600 37 | return params 38 | 39 | def get_marker_ids(marker_type): 40 | if marker_type == 'robots': 41 | return list(range(10)) 42 | if marker_type == 'cubes': 43 | return list(range(10, 34)) 44 | if marker_type == 'corners_robots': 45 | return list(range(42, 46)) 46 | if marker_type == 'corners_cubes': 47 | return list(range(46, 50)) 48 | if marker_type == 'corners': 49 | return get_marker_ids('corners_robots') + get_marker_ids('corners_cubes') 50 | raise Exception 51 | 52 | ################################################################################ 53 | # Camera 54 | 55 | def get_video_cap(frame_width, frame_height, camera_id=0): 56 | cap = cv2.VideoCapture(camera_id) 57 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width) 58 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height) 59 | cap.set(cv2.CAP_PROP_AUTOFOCUS, 0) # Might not actually do anything on macOS 60 | cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) # Reduces latency 61 | assert cap.get(cv2.CAP_PROP_FRAME_WIDTH) == frame_width, (cap.get(cv2.CAP_PROP_FRAME_WIDTH), frame_width) 62 | assert cap.get(cv2.CAP_PROP_FRAME_HEIGHT) == frame_height, (cap.get(cv2.CAP_PROP_FRAME_HEIGHT), frame_height) 63 | return cap 64 | 65 | def get_usb_device_serial(camera_name): 66 | if camera_name == 'logitech-c930e': 67 | id_vendor, id_product = (0x046d, 0x0843) 68 | else: 69 | raise Exception 70 | dev = usb.core.find(idVendor=id_vendor, idProduct=id_product) 71 | return usb.util.get_string(dev, dev.iSerialNumber) 72 | 73 | def get_camera_identifier(camera_name): 74 | try: 75 | return get_usb_device_serial(camera_name) 76 | except: 77 | return 'unknown-camera' 78 | 79 | def get_camera_params_file_path(camera_name='logitech-c930e'): 80 | camera_params_dir = Path('camera-params') 81 | identifier = get_camera_identifier(camera_name) 82 | return str(camera_params_dir / '{}.yml'.format(identifier)) 83 | 84 | def get_camera_params(camera_params_file_path): 85 | assert Path(camera_params_file_path).exists() 86 | fs = cv2.FileStorage(camera_params_file_path, cv2.FILE_STORAGE_READ) 87 | image_width = fs.getNode('image_width').real() 88 | image_height = fs.getNode('image_height').real() 89 | camera_matrix = fs.getNode('camera_matrix').mat() 90 | dist_coeffs = fs.getNode('distortion_coefficients').mat() 91 | fs.release() 92 | return image_width, image_height, camera_matrix, dist_coeffs 93 | -------------------------------------------------------------------------------- /assets/corner.obj: -------------------------------------------------------------------------------- 1 | # Blender v2.79 (sub 0) OBJ File: '' 2 | # www.blender.org 3 | o Object.002_Mesh.002 4 | v -0.050342 0.050342 -0.050000 5 | v 0.050342 0.050342 -0.050000 6 | v 0.004148 0.031208 -0.050000 7 | v -0.050342 0.050342 0.050000 8 | v 0.050342 0.050342 0.050000 9 | v 0.004148 0.031208 0.050000 10 | vn 0.0000 0.0000 -1.0000 11 | vn 0.0000 1.0000 0.0000 12 | vn 0.3827 -0.9239 0.0000 13 | vn -0.3313 -0.9435 0.0000 14 | vn 0.0000 0.0000 1.0000 15 | s off 16 | f 1//1 2//1 3//1 17 | f 2//2 1//2 4//2 5//2 18 | f 3//3 2//3 5//3 6//3 19 | f 1//4 3//4 6//4 4//4 20 | f 6//5 5//5 4//5 21 | o Object.001_Mesh.001 22 | v -0.050342 0.050342 -0.050000 23 | v 0.004148 0.031208 -0.050000 24 | v -0.031208 -0.004148 -0.050000 25 | v -0.050342 0.050342 0.050000 26 | v 0.004148 0.031208 0.050000 27 | v -0.031208 -0.004148 0.050000 28 | vn 0.0000 0.0000 -1.0000 29 | vn 0.3313 0.9435 0.0000 30 | vn 0.7071 -0.7071 0.0000 31 | vn -0.9435 -0.3313 0.0000 32 | vn 0.0000 0.0000 1.0000 33 | s off 34 | f 7//6 8//6 9//6 35 | f 8//7 7//7 10//7 11//7 36 | f 9//8 8//8 11//8 12//8 37 | f 7//9 9//9 12//9 10//9 38 | f 12//10 11//10 10//10 39 | o Object_Mesh 40 | v -0.050342 0.050342 -0.050000 41 | v -0.031208 -0.004148 -0.050000 42 | v -0.050342 -0.050342 -0.050000 43 | v -0.050342 0.050342 0.050000 44 | v -0.031208 -0.004148 0.050000 45 | v -0.050342 -0.050342 0.050000 46 | vn -0.0000 0.0000 -1.0000 47 | vn 0.9435 0.3313 0.0000 48 | vn 0.9239 -0.3827 0.0000 49 | vn -1.0000 0.0000 0.0000 50 | vn -0.0000 0.0000 1.0000 51 | s off 52 | f 13//11 14//11 15//11 53 | f 14//12 13//12 16//12 17//12 54 | f 15//13 14//13 17//13 18//13 55 | f 13//14 15//14 18//14 16//14 56 | f 18//15 17//15 16//15 57 | -------------------------------------------------------------------------------- /assets/plane.obj.template: -------------------------------------------------------------------------------- 1 | # Blender v2.66 (sub 1) OBJ File: '' 2 | # www.blender.org 3 | mtllib plane.mtl 4 | o Plane 5 | v HALFLENGTH.000000 -HALFWIDTH.000000 0.000000 6 | v HALFLENGTH.000000 HALFWIDTH.000000 0.000000 7 | v -HALFLENGTH.000000 HALFWIDTH.000000 0.000000 8 | v -HALFLENGTH.000000 -HALFWIDTH.000000 0.000000 9 | 10 | vt HALFLENGTH.000000 0.000000 11 | vt HALFLENGTH.000000 HALFWIDTH.000000 12 | vt 0.000000 HALFWIDTH.000000 13 | vt 0.000000 0.000000 14 | 15 | usemtl Material 16 | s off 17 | f 1/1 2/2 3/3 18 | f 1/1 3/3 4/4 19 | -------------------------------------------------------------------------------- /assets/plane.urdf.template: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /assets/robot.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /config/experiments/base/large_columns.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_columns 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 4 24 | obstacle_config: large_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/base/large_divider.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_divider 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 4 24 | obstacle_config: large_divider 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/base/small_columns.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_columns 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 4 24 | obstacle_config: small_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/base/small_empty.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_empty 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 4 24 | obstacle_config: small_empty 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/fixed-step-size/large_columns-fixed_step_size.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_columns-fixed_step_size 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: 0.25 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 4 24 | obstacle_config: large_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/fixed-step-size/large_divider-fixed_step_size.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_divider-fixed_step_size 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: 0.25 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 4 24 | obstacle_config: large_divider 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/fixed-step-size/small_columns-fixed_step_size.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_columns-fixed_step_size 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: 0.25 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 4 24 | obstacle_config: small_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/fixed-step-size/small_empty-fixed_step_size.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_empty-fixed_step_size 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: 0.25 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 4 24 | obstacle_config: small_empty 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/no-partial-rewards/large_columns-no_partial_rewards.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_columns-no_partial_rewards 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 4 24 | obstacle_config: large_columns 25 | partial_rewards_scale: 0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/no-partial-rewards/large_divider-no_partial_rewards.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_divider-no_partial_rewards 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 4 24 | obstacle_config: large_divider 25 | partial_rewards_scale: 0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/no-partial-rewards/small_columns-no_partial_rewards.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_columns-no_partial_rewards 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 4 24 | obstacle_config: small_columns 25 | partial_rewards_scale: 0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/no-partial-rewards/small_empty-no_partial_rewards.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_empty-no_partial_rewards 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 4 24 | obstacle_config: small_empty 25 | partial_rewards_scale: 0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/large_columns-no_sp_components.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_columns-no_sp_components 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 3 24 | obstacle_config: large_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: true 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: false 41 | use_shortest_path_movement: false 42 | use_shortest_path_partial_rewards: false 43 | use_shortest_path_to_receptacle_channel: false 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/large_columns-no_sp_from_agent.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_columns-no_sp_from_agent 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 3 24 | obstacle_config: large_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: false 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/large_columns-no_sp_in_rewards.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_columns-no_sp_in_rewards 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 4 24 | obstacle_config: large_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: false 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/large_columns-no_sp_movement.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_columns-no_sp_movement 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 4 24 | obstacle_config: large_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: false 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/large_columns-no_sp_to_receptacle.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_columns-no_sp_to_receptacle 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 4 24 | obstacle_config: large_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: true 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: false 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/large_divider-no_sp_components.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_divider-no_sp_components 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 3 24 | obstacle_config: large_divider 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: true 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: false 41 | use_shortest_path_movement: false 42 | use_shortest_path_partial_rewards: false 43 | use_shortest_path_to_receptacle_channel: false 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/large_divider-no_sp_from_agent.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_divider-no_sp_from_agent 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 3 24 | obstacle_config: large_divider 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: false 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/large_divider-no_sp_in_rewards.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_divider-no_sp_in_rewards 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 4 24 | obstacle_config: large_divider 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: false 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/large_divider-no_sp_movement.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_divider-no_sp_movement 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 4 24 | obstacle_config: large_divider 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: false 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/large_divider-no_sp_to_receptacle.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_divider-no_sp_to_receptacle 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 4 24 | obstacle_config: large_divider 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: true 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: false 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/small_columns-no_sp_components.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_columns-no_sp_components 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 3 24 | obstacle_config: small_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: true 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: false 41 | use_shortest_path_movement: false 42 | use_shortest_path_partial_rewards: false 43 | use_shortest_path_to_receptacle_channel: false 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/small_columns-no_sp_from_agent.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_columns-no_sp_from_agent 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 3 24 | obstacle_config: small_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: false 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/small_columns-no_sp_in_rewards.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_columns-no_sp_in_rewards 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 4 24 | obstacle_config: small_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: false 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/small_columns-no_sp_movement.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_columns-no_sp_movement 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 4 24 | obstacle_config: small_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: false 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/small_columns-no_sp_to_receptacle.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_columns-no_sp_to_receptacle 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 4 24 | obstacle_config: small_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: true 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: false 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/small_empty-no_sp_components.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_empty-no_sp_components 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 3 24 | obstacle_config: small_empty 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: true 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: false 41 | use_shortest_path_movement: false 42 | use_shortest_path_partial_rewards: false 43 | use_shortest_path_to_receptacle_channel: false 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/shortest-path-ablations/small_empty-no_sp_from_agent.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_empty-no_sp_from_agent 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 3 24 | obstacle_config: small_empty 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: false 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/steering-commands/large_columns-steering_commands-no_sp_components.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_columns-steering_commands-no_sp_components 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: 0.25 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 5 24 | obstacle_config: large_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: steering_commands 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 18 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: true 38 | use_double_dqn: true 39 | use_position_channel: true 40 | use_shortest_path_channel: false 41 | use_shortest_path_movement: false 42 | use_shortest_path_partial_rewards: false 43 | use_shortest_path_to_receptacle_channel: false 44 | use_steering_commands: true 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/steering-commands/large_columns-steering_commands.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_columns-steering_commands 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: 0.25 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 6 24 | obstacle_config: large_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: steering_commands 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 18 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: true 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: true 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/steering-commands/large_divider-steering_commands-no_sp_components.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_divider-steering_commands-no_sp_components 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: 0.25 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 5 24 | obstacle_config: large_divider 25 | partial_rewards_scale: 2.0 26 | policy_type: steering_commands 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 18 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: true 38 | use_double_dqn: true 39 | use_position_channel: true 40 | use_shortest_path_channel: false 41 | use_shortest_path_movement: false 42 | use_shortest_path_partial_rewards: false 43 | use_shortest_path_to_receptacle_channel: false 44 | use_steering_commands: true 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/steering-commands/large_divider-steering_commands.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: large_divider-steering_commands 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: 0.25 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 20 23 | num_input_channels: 6 24 | obstacle_config: large_divider 25 | partial_rewards_scale: 2.0 26 | policy_type: steering_commands 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 1.0 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 18 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: true 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: true 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/steering-commands/small_columns-steering_commands-no_sp_components.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_columns-steering_commands-no_sp_components 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: 0.25 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 5 24 | obstacle_config: small_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: steering_commands 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 18 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: true 38 | use_double_dqn: true 39 | use_position_channel: true 40 | use_shortest_path_channel: false 41 | use_shortest_path_movement: false 42 | use_shortest_path_partial_rewards: false 43 | use_shortest_path_to_receptacle_channel: false 44 | use_steering_commands: true 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/steering-commands/small_columns-steering_commands.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_columns-steering_commands 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: 0.25 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 6 24 | obstacle_config: small_columns 25 | partial_rewards_scale: 2.0 26 | policy_type: steering_commands 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 18 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: true 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: true 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/steering-commands/small_empty-steering_commands-no_sp_components.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_empty-steering_commands-no_sp_components 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: 0.25 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 5 24 | obstacle_config: small_empty 25 | partial_rewards_scale: 2.0 26 | policy_type: steering_commands 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 18 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: true 38 | use_double_dqn: true 39 | use_position_channel: true 40 | use_shortest_path_channel: false 41 | use_shortest_path_movement: false 42 | use_shortest_path_partial_rewards: false 43 | use_shortest_path_to_receptacle_channel: false 44 | use_steering_commands: true 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/experiments/steering-commands/small_empty-steering_commands.yml: -------------------------------------------------------------------------------- 1 | batch_size: 32 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_empty-steering_commands 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: 0.25 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 100 15 | learning_rate: 0.01 16 | learning_starts: 1000 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 6 24 | obstacle_config: small_empty 25 | partial_rewards_scale: 2.0 26 | policy_type: steering_commands 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 10000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 18 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: true 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: true 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/local/small_empty-local.yml: -------------------------------------------------------------------------------- 1 | batch_size: 4 2 | checkpoint_dir: null 3 | checkpoint_freq: 1000 4 | checkpoint_path: null 5 | checkpoints_dir: checkpoints 6 | collision_penalty: 0.25 7 | discount_factor: 0.99 8 | distance_to_receptacle_channel_scale: 0.25 9 | experiment_name: small_empty-local 10 | exploration_timesteps: 6000 11 | final_exploration: 0.01 12 | fixed_step_size: null 13 | grad_norm_clipping: 10 14 | inactivity_cutoff: 5 15 | learning_rate: 0.01 16 | learning_starts: 10 17 | log_dir: null 18 | logs_dir: logs 19 | ministep_size: 0.25 20 | model_path: null 21 | nonmovement_penalty: 0.25 22 | num_cubes: 10 23 | num_input_channels: 4 24 | obstacle_config: small_empty 25 | partial_rewards_scale: 2.0 26 | policy_type: dense_action_space 27 | position_channel_scale: 0.25 28 | random_seed: null 29 | replay_buffer_size: 1000 30 | room_length: 1.0 31 | room_width: 0.5 32 | run_name: null 33 | shortest_path_channel_scale: 0.25 34 | steering_commands_num_turns: 4 35 | target_update_freq: 1000 36 | total_timesteps: 60000 37 | use_distance_to_receptacle_channel: false 38 | use_double_dqn: true 39 | use_position_channel: false 40 | use_shortest_path_channel: true 41 | use_shortest_path_movement: true 42 | use_shortest_path_partial_rewards: true 43 | use_shortest_path_to_receptacle_channel: true 44 | use_steering_commands: false 45 | weight_decay: 0.0001 46 | -------------------------------------------------------------------------------- /config/templates/small_empty.yml: -------------------------------------------------------------------------------- 1 | # General 2 | experiment_name: small_empty 3 | run_name: 4 | logs_dir: logs 5 | checkpoints_dir: checkpoints 6 | log_dir: 7 | checkpoint_dir: 8 | model_path: 9 | checkpoint_path: 10 | 11 | # Learning 12 | batch_size: 32 13 | learning_rate: 0.01 14 | weight_decay: 0.0001 15 | grad_norm_clipping: 10 16 | checkpoint_freq: 1000 17 | 18 | # DQN 19 | policy_type: dense_action_space 20 | total_timesteps: 60000 21 | exploration_timesteps: 6000 22 | replay_buffer_size: 10000 23 | use_double_dqn: True 24 | discount_factor: 0.99 25 | final_exploration: 0.01 26 | learning_starts: 1000 27 | target_update_freq: 1000 28 | num_input_channels: 4 29 | 30 | # Simulation 31 | room_length: 1.0 32 | room_width: 0.5 33 | num_cubes: 10 34 | obstacle_config: small_empty 35 | use_distance_to_receptacle_channel: False 36 | distance_to_receptacle_channel_scale: 0.25 37 | use_shortest_path_to_receptacle_channel: True 38 | use_shortest_path_channel: True 39 | shortest_path_channel_scale: 0.25 40 | use_position_channel: False 41 | position_channel_scale: 0.25 42 | partial_rewards_scale: 2.0 43 | use_shortest_path_partial_rewards: True 44 | collision_penalty: 0.25 45 | nonmovement_penalty: 0.25 46 | use_shortest_path_movement: True 47 | fixed_step_size: 48 | use_steering_commands: False 49 | steering_commands_num_turns: 4 50 | ministep_size: 0.25 51 | inactivity_cutoff: 100 52 | random_seed: 53 | -------------------------------------------------------------------------------- /download-pretrained.sh: -------------------------------------------------------------------------------- 1 | # Downloads four pretrained policies, one for each test environment 2 | wget -c https://spatial-action-maps.cs.princeton.edu/pretrained/logs/20200125T213536-small_empty/config.yml -P logs/20200125T213536-small_empty 3 | wget -c https://spatial-action-maps.cs.princeton.edu/pretrained/checkpoints/20200125T213536-small_empty/model_00061000.pth.tar -P checkpoints/20200125T213536-small_empty 4 | wget -c https://spatial-action-maps.cs.princeton.edu/pretrained/logs/20200126T171505-small_columns/config.yml -P logs/20200126T171505-small_columns 5 | wget -c https://spatial-action-maps.cs.princeton.edu/pretrained/checkpoints/20200126T171505-small_columns/model_00061000.pth.tar -P checkpoints/20200126T171505-small_columns 6 | wget -c https://spatial-action-maps.cs.princeton.edu/pretrained/logs/20200126T165108-large_columns/config.yml -P logs/20200126T165108-large_columns 7 | wget -c https://spatial-action-maps.cs.princeton.edu/pretrained/checkpoints/20200126T165108-large_columns/model_00061000.pth.tar -P checkpoints/20200126T165108-large_columns 8 | wget -c https://spatial-action-maps.cs.princeton.edu/pretrained/logs/20200127T015539-large_divider/config.yml -P logs/20200127T015539-large_divider 9 | wget -c https://spatial-action-maps.cs.princeton.edu/pretrained/checkpoints/20200127T015539-large_divider/model_00061000.pth.tar -P checkpoints/20200127T015539-large_divider 10 | -------------------------------------------------------------------------------- /enjoy.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import utils 3 | 4 | def main(args): 5 | config_path = args.config_path 6 | if config_path is None: 7 | config_path = utils.select_run() 8 | if config_path is None: 9 | print('Please provide a config path') 10 | return 11 | cfg = utils.read_config(config_path) 12 | env = utils.get_env_from_cfg(cfg, use_gui=True) 13 | policy = utils.get_policy_from_cfg(cfg, env.get_action_space()) 14 | state = env.reset() 15 | while True: 16 | action, _ = policy.step(state) 17 | state, _, done, _ = env.step(action) 18 | if done: 19 | state = env.reset() 20 | 21 | if __name__ == '__main__': 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--config-path') 24 | main(parser.parse_args()) 25 | -------------------------------------------------------------------------------- /eval_summary.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pathlib import Path\n", 10 | "\n", 11 | "import ipywidgets as widgets\n", 12 | "\n", 13 | "# Avoid non-compliant Type 3 fonts\n", 14 | "import matplotlib\n", 15 | "matplotlib.rcParams['pdf.fonttype'] = 42 # pylint: disable=wrong-import-position\n", 16 | "\n", 17 | "import matplotlib.pyplot as plt\n", 18 | "import numpy as np\n", 19 | "import pandas as pd\n", 20 | "from ipywidgets import interact\n", 21 | "from IPython.display import display\n", 22 | "from tqdm import tqdm_notebook as tqdm\n", 23 | "\n", 24 | "import utils" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "pd.set_option('display.max_rows', None)\n", 34 | "plt.rcParams['figure.figsize'] = (12, 8)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "logs_dir = Path('logs')\n", 44 | "eval_dir = logs_dir.parent / 'eval'\n", 45 | "obstacle_configs = ['small_empty', 'small_columns', 'large_columns', 'large_divider']" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "def load_data(cfg):\n", 55 | " eval_path = eval_dir / '{}.npy'.format(cfg.run_name)\n", 56 | " if eval_path.exists():\n", 57 | " return np.load(eval_path, allow_pickle=True)\n", 58 | " #print('Eval file for {} was not found'.format(cfg.run_name))\n", 59 | " return None" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "def show_table():\n", 69 | " all_data = {}\n", 70 | " for log_dir in tqdm(list(logs_dir.iterdir())):\n", 71 | " # Load eval data for run\n", 72 | " cfg = utils.read_config(str(log_dir / 'config.yml'))\n", 73 | " data = load_data(cfg)\n", 74 | " if data is None:\n", 75 | " continue\n", 76 | "\n", 77 | " # Add mean cubes for run\n", 78 | " if cfg.experiment_name not in all_data:\n", 79 | " all_data[cfg.experiment_name] = []\n", 80 | " mean_cubes = np.mean([episode[-1]['cubes'] for episode in data])\n", 81 | " all_data[cfg.experiment_name].append(mean_cubes)\n", 82 | "\n", 83 | " # Replace runs with mean/std of runs\n", 84 | " for experiment_name, cubes_list in all_data.items():\n", 85 | " all_data[experiment_name] = '{:.2f} ± {:.2f}'.format(np.mean(cubes_list), np.std(cubes_list))\n", 86 | "\n", 87 | " # Display in table\n", 88 | " def f(obstacle_config):\n", 89 | " data = {'cubes': {\n", 90 | " experiment_name: cubes for experiment_name, cubes in all_data.items()\n", 91 | " if experiment_name.startswith(obstacle_config)\n", 92 | " }}\n", 93 | " display(pd.DataFrame(data))\n", 94 | " interact(f, obstacle_config=widgets.Dropdown(options=obstacle_configs))" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": { 101 | "scrolled": false 102 | }, 103 | "outputs": [], 104 | "source": [ 105 | "show_table()" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "def extend_curves(curves):\n", 115 | " if len(curves) == 0:\n", 116 | " return curves\n", 117 | " max_length = max(len(curve) for curve in curves)\n", 118 | " for i, curve in enumerate(curves):\n", 119 | " curves[i] = np.pad(curve, (0, max_length - len(curve)), 'edge')\n", 120 | " return curves" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "def get_curve_for_run(data, step_size):\n", 130 | " curves = []\n", 131 | " for episode in data:\n", 132 | " cubes = np.asarray([step['cubes'] for step in episode])\n", 133 | " cumulative_distance = np.asarray([step['distance'] for step in episode])\n", 134 | " x = np.arange(0, cumulative_distance[-1] + step_size, step_size)\n", 135 | " xp, fp = cumulative_distance, cubes\n", 136 | " curves.append(np.floor(np.interp(x, xp, fp, left=0)))\n", 137 | " curves = extend_curves(curves)\n", 138 | " return np.mean(curves, axis=0)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "def get_label(experiment_name):\n", 148 | " parts = experiment_name.split('-')\n", 149 | " if len(parts) == 1:\n", 150 | " return 'Ours'\n", 151 | " return {\n", 152 | " 'fixed_step_size': 'Ours, fixed step size',\n", 153 | " 'steering_commands': 'Steering commands',\n", 154 | " 'no_partial_rewards': 'Ours, no partial rewards',\n", 155 | " 'no_sp_components': 'Ours, no shortest path components',\n", 156 | " 'no_sp_from_agent': 'Ours, no sp from agent',\n", 157 | " 'no_sp_to_receptacle': 'Ours, no sp to receptacle',\n", 158 | " 'no_sp_movement': 'Ours, no sp movement',\n", 159 | " 'no_sp_in_rewards': 'Ours, no sp in rewards',\n", 160 | " 'steering_commands-no_sp_components': 'Steering commands, no sp',\n", 161 | " }['-'.join(parts[1:])]" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": { 168 | "scrolled": false 169 | }, 170 | "outputs": [], 171 | "source": [ 172 | "def show_curves():\n", 173 | " step_size = 0.1\n", 174 | "\n", 175 | " all_curves = {}\n", 176 | " for log_dir in tqdm(list(logs_dir.iterdir())):\n", 177 | " # Load eval data for run\n", 178 | " cfg = utils.read_config(str(log_dir / 'config.yml'))\n", 179 | " data = load_data(cfg)\n", 180 | " if data is None:\n", 181 | " continue\n", 182 | "\n", 183 | " # Add curve for run\n", 184 | " if cfg.experiment_name not in all_curves:\n", 185 | " all_curves[cfg.experiment_name] = []\n", 186 | " all_curves[cfg.experiment_name].append(get_curve_for_run(data, step_size))\n", 187 | "\n", 188 | " def plot_curves(obstacle_config, experiment_names, fontsize=20):\n", 189 | " for experiment_name in experiment_names:\n", 190 | " curves = extend_curves(all_curves[experiment_name])\n", 191 | " x = np.arange(0, (len(curves[0]) - 0.5) * step_size, step_size)\n", 192 | " y_mean = np.mean(curves, axis=0)\n", 193 | " y_std = np.std(curves, axis=0)\n", 194 | " plt.plot(x, y_mean, label=get_label(experiment_name))\n", 195 | " plt.fill_between(x, y_mean - y_std, y_mean + y_std, alpha=0.2)\n", 196 | " plt.xlabel('Distance (m)', fontsize=fontsize)\n", 197 | " plt.ylabel('Num Blocks', fontsize=fontsize)\n", 198 | " if obstacle_config == 'large_divider':\n", 199 | " plt.xlim(0, 120)\n", 200 | " num_cubes = 20 if obstacle_config.startswith('large') else 10\n", 201 | " plt.ylim(0, num_cubes)\n", 202 | " plt.xticks(fontsize=fontsize - 2)\n", 203 | " plt.yticks(range(0, num_cubes + 1, 2), fontsize=fontsize - 2)\n", 204 | " plt.legend(fontsize=fontsize - 2, loc='upper left')\n", 205 | "\n", 206 | " def f(obstacle_config, experiment_names, save_to_pdf):\n", 207 | " if len(experiment_names) == 0:\n", 208 | " return\n", 209 | " plot_curves(obstacle_config, experiment_names)\n", 210 | " if save_to_pdf:\n", 211 | " plt.savefig('curves-{}.pdf'.format(obstacle_config), bbox_inches='tight')\n", 212 | " else:\n", 213 | " plt.show()\n", 214 | "\n", 215 | " obstacle_config_dropdown = widgets.Dropdown(options=obstacle_configs)\n", 216 | " experiment_names_select = widgets.SelectMultiple(layout=widgets.Layout(width='50%'))\n", 217 | " save_toggle = widgets.ToggleButton(description='Save to PDF')\n", 218 | " def update_experiment_names_options(*_):\n", 219 | " matching_experiment_names = []\n", 220 | " for experiment_name in all_curves:\n", 221 | " if experiment_name.startswith(obstacle_config_dropdown.value):\n", 222 | " matching_experiment_names.append(experiment_name)\n", 223 | " experiment_names_select.options = matching_experiment_names\n", 224 | " experiment_names_select.rows = len(matching_experiment_names)\n", 225 | " obstacle_config_dropdown.observe(update_experiment_names_options)\n", 226 | "\n", 227 | " interact(f, obstacle_config=obstacle_config_dropdown,\n", 228 | " experiment_names=experiment_names_select, save_to_pdf=save_toggle)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": { 235 | "scrolled": false 236 | }, 237 | "outputs": [], 238 | "source": [ 239 | "show_curves()" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [] 248 | } 249 | ], 250 | "metadata": { 251 | "kernelspec": { 252 | "display_name": "Python 3", 253 | "language": "python", 254 | "name": "python3" 255 | }, 256 | "language_info": { 257 | "codemirror_mode": { 258 | "name": "ipython", 259 | "version": 3 260 | }, 261 | "file_extension": ".py", 262 | "mimetype": "text/x-python", 263 | "name": "python", 264 | "nbconvert_exporter": "python", 265 | "pygments_lexer": "ipython3", 266 | "version": "3.7.7" 267 | } 268 | }, 269 | "nbformat": 4, 270 | "nbformat_minor": 2 271 | } 272 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | # Prevent numpy from using up all cpu 2 | import os 3 | os.environ['MKL_NUM_THREADS'] = '1' # pylint: disable=wrong-import-position 4 | import argparse 5 | from pathlib import Path 6 | import numpy as np 7 | import utils 8 | 9 | def _run_eval(cfg, num_episodes=20): 10 | env = utils.get_env_from_cfg(cfg, random_seed=0) 11 | policy = utils.get_policy_from_cfg(cfg, env.get_action_space(), random_seed=0) 12 | data = [[] for _ in range(num_episodes)] 13 | episode_count = 0 14 | state = env.reset() 15 | while True: 16 | action, _ = policy.step(state) 17 | state, _, done, info = env.step(action) 18 | data[episode_count].append({'distance': info['cumulative_distance'], 'cubes': info['cumulative_cubes']}) 19 | if done: 20 | state = env.reset() 21 | episode_count += 1 22 | print('Completed {}/{} episodes'.format(episode_count, num_episodes)) 23 | if episode_count >= num_episodes: 24 | break 25 | return data 26 | 27 | def main(args): 28 | config_path = args.config_path 29 | if config_path is None: 30 | config_path = utils.select_run() 31 | if config_path is None: 32 | print('Please provide a config path') 33 | return 34 | cfg = utils.read_config(config_path) 35 | 36 | eval_dir = Path(cfg.logs_dir).parent / 'eval' 37 | if not eval_dir.exists(): 38 | eval_dir.mkdir(parents=True, exist_ok=True) 39 | 40 | eval_path = eval_dir / '{}.npy'.format(cfg.run_name) 41 | data = _run_eval(cfg) 42 | np.save(eval_path, data) 43 | print(eval_path) 44 | 45 | if __name__ == '__main__': 46 | parser = argparse.ArgumentParser() 47 | parser.add_argument('--config-path') 48 | main(parser.parse_args()) 49 | -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | from torch.nn import Module, Conv2d 2 | import torch.nn.functional as F 3 | import resnet 4 | 5 | 6 | class SteeringCommandsDQN(Module): 7 | def __init__(self, num_input_channels=3, num_output_channels=4): 8 | super().__init__() 9 | self.resnet18 = resnet.resnet18(num_input_channels=num_input_channels, num_classes=num_output_channels) 10 | 11 | def forward(self, x): 12 | return self.resnet18(x) 13 | 14 | class DenseActionSpaceDQN(Module): 15 | def __init__(self, num_input_channels=3, num_output_channels=1): 16 | super().__init__() 17 | self.resnet18 = resnet.resnet18(num_input_channels=num_input_channels) 18 | self.conv1 = Conv2d(512, 128, kernel_size=1, stride=1) 19 | self.conv2 = Conv2d(128, 32, kernel_size=1, stride=1) 20 | self.conv3 = Conv2d(32, num_output_channels, kernel_size=1, stride=1) 21 | 22 | def forward(self, x): 23 | x = self.resnet18.features(x) 24 | x = F.relu(self.conv1(x)) 25 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True) 26 | x = F.relu(self.conv2(x)) 27 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True) 28 | return self.conv3(x) 29 | -------------------------------------------------------------------------------- /policies.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch 3 | from torchvision import transforms 4 | import models 5 | 6 | 7 | class DQNPolicy: 8 | def __init__(self, cfg, action_space, train=False, random_seed=None): 9 | self.cfg = cfg 10 | self.action_space = action_space 11 | self.train = train 12 | 13 | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 14 | self.policy_net = self.build_network() 15 | self.transform = transforms.ToTensor() 16 | 17 | # Resume from checkpoint if applicable 18 | if self.cfg.checkpoint_path is not None: 19 | model_checkpoint = torch.load(self.cfg.model_path, map_location=self.device) 20 | self.policy_net.load_state_dict(model_checkpoint['state_dict']) 21 | if self.train: 22 | self.policy_net.train() 23 | else: 24 | self.policy_net.eval() 25 | print("=> loaded model '{}'".format(self.cfg.model_path)) 26 | 27 | if random_seed is not None: 28 | random.seed(random_seed) 29 | 30 | def build_network(self): 31 | raise NotImplementedError 32 | 33 | def apply_transform(self, s): 34 | return self.transform(s).unsqueeze(0) 35 | 36 | def step(self, state, exploration_eps=None, debug=False): 37 | if exploration_eps is None: 38 | exploration_eps = self.cfg.final_exploration 39 | state = self.apply_transform(state).to(self.device) 40 | with torch.no_grad(): 41 | output = self.policy_net(state).squeeze(0) 42 | if random.random() < exploration_eps: 43 | action = random.randrange(self.action_space) 44 | else: 45 | action = output.view(1, -1).max(1)[1].item() 46 | info = {} 47 | if debug: 48 | info['output'] = output.squeeze(0) 49 | return action, info 50 | 51 | class SteeringCommandsPolicy(DQNPolicy): 52 | def build_network(self): 53 | return torch.nn.DataParallel( 54 | models.SteeringCommandsDQN(num_input_channels=self.cfg.num_input_channels, num_output_channels=self.action_space) 55 | ).to(self.device) 56 | 57 | class DenseActionSpacePolicy(DQNPolicy): 58 | def build_network(self): 59 | return torch.nn.DataParallel( 60 | models.DenseActionSpaceDQN(num_input_channels=self.cfg.num_input_channels) 61 | ).to(self.device) 62 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Training in simulation 2 | pybullet==2.5.0 3 | tensorboard==2.0.0 4 | opencv-python==4.1.1.26 5 | scipy==1.3.1 6 | scikit-image==0.16.1 7 | shapely==1.6.4.post2 8 | pyyaml==5.3 9 | munch==2.5.0 10 | jupyter==1.0.0 11 | pandas==1.0.3 12 | tqdm==4.25.0 13 | prompt-toolkit==3.0.2 14 | future==0.18.2 15 | 16 | # Running on physical robot 17 | anki_vector==0.6.0 18 | zeroconf==0.24.0 19 | pyglet==1.4.8 20 | opencv-contrib-python==4.1.2.30 21 | pyusb==1.0.2 22 | fpdf==1.7.2 23 | -------------------------------------------------------------------------------- /resnet.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py (71322cba652b4ba7bcaa7ae5ee86f539d1ae3a2b) 2 | import torch.nn as nn 3 | import torch.utils.model_zoo as model_zoo 4 | 5 | 6 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 7 | 'resnet152'] 8 | 9 | 10 | model_urls = { 11 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 12 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 13 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 14 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 15 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 16 | } 17 | 18 | 19 | def conv3x3(in_planes, out_planes, stride=1): 20 | """3x3 convolution with padding""" 21 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 22 | padding=1, bias=False) 23 | 24 | 25 | def conv1x1(in_planes, out_planes, stride=1): 26 | """1x1 convolution""" 27 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 28 | 29 | 30 | class BasicBlock(nn.Module): 31 | expansion = 1 32 | 33 | def __init__(self, inplanes, planes, stride=1, downsample=None): 34 | super(BasicBlock, self).__init__() 35 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 36 | self.conv1 = conv3x3(inplanes, planes, stride) 37 | #self.bn1 = nn.BatchNorm2d(planes) 38 | self.relu = nn.ReLU(inplace=True) 39 | self.conv2 = conv3x3(planes, planes) 40 | #self.bn2 = nn.BatchNorm2d(planes) 41 | self.downsample = downsample 42 | self.stride = stride 43 | 44 | def forward(self, x): 45 | identity = x 46 | 47 | out = self.conv1(x) 48 | #out = self.bn1(out) 49 | out = self.relu(out) 50 | 51 | out = self.conv2(out) 52 | #out = self.bn2(out) 53 | 54 | if self.downsample is not None: 55 | identity = self.downsample(x) 56 | 57 | out += identity 58 | out = self.relu(out) 59 | 60 | return out 61 | 62 | 63 | class Bottleneck(nn.Module): 64 | expansion = 4 65 | 66 | def __init__(self, inplanes, planes, stride=1, downsample=None): 67 | super(Bottleneck, self).__init__() 68 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 69 | self.conv1 = conv1x1(inplanes, planes) 70 | self.bn1 = nn.BatchNorm2d(planes) 71 | self.conv2 = conv3x3(planes, planes, stride) 72 | self.bn2 = nn.BatchNorm2d(planes) 73 | self.conv3 = conv1x1(planes, planes * self.expansion) 74 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 75 | self.relu = nn.ReLU(inplace=True) 76 | self.downsample = downsample 77 | self.stride = stride 78 | 79 | def forward(self, x): 80 | identity = x 81 | 82 | out = self.conv1(x) 83 | out = self.bn1(out) 84 | out = self.relu(out) 85 | 86 | out = self.conv2(out) 87 | out = self.bn2(out) 88 | out = self.relu(out) 89 | 90 | out = self.conv3(out) 91 | out = self.bn3(out) 92 | 93 | if self.downsample is not None: 94 | identity = self.downsample(x) 95 | 96 | out += identity 97 | out = self.relu(out) 98 | 99 | return out 100 | 101 | 102 | class ResNet(nn.Module): 103 | 104 | def __init__(self, block, layers, num_input_channels=3, num_classes=1000, zero_init_residual=False): 105 | super(ResNet, self).__init__() 106 | self.inplanes = 64 107 | self.conv1 = nn.Conv2d(num_input_channels, 64, kernel_size=7, stride=2, padding=3, 108 | bias=False) 109 | #self.bn1 = nn.BatchNorm2d(64) 110 | self.relu = nn.ReLU(inplace=True) 111 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 112 | self.layer1 = self._make_layer(block, 64, layers[0]) 113 | #self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 114 | #self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 115 | #self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 116 | self.layer2 = self._make_layer(block, 128, layers[1]) 117 | self.layer3 = self._make_layer(block, 256, layers[2]) 118 | self.layer4 = self._make_layer(block, 512, layers[3]) 119 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 120 | self.fc = nn.Linear(512 * block.expansion, num_classes) 121 | 122 | for m in self.modules(): 123 | if isinstance(m, nn.Conv2d): 124 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 125 | #elif isinstance(m, nn.BatchNorm2d): 126 | # nn.init.constant_(m.weight, 1) 127 | # nn.init.constant_(m.bias, 0) 128 | 129 | # Zero-initialize the last BN in each residual branch, 130 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 131 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 132 | if zero_init_residual: 133 | for m in self.modules(): 134 | if isinstance(m, Bottleneck): 135 | nn.init.constant_(m.bn3.weight, 0) 136 | elif isinstance(m, BasicBlock): 137 | #nn.init.constant_(m.bn2.weight, 0) 138 | pass 139 | 140 | def _make_layer(self, block, planes, blocks, stride=1): 141 | downsample = None 142 | if stride != 1 or self.inplanes != planes * block.expansion: 143 | downsample = nn.Sequential( 144 | conv1x1(self.inplanes, planes * block.expansion, stride), 145 | #nn.BatchNorm2d(planes * block.expansion), 146 | ) 147 | 148 | layers = [] 149 | layers.append(block(self.inplanes, planes, stride, downsample)) 150 | self.inplanes = planes * block.expansion 151 | for _ in range(1, blocks): 152 | layers.append(block(self.inplanes, planes)) 153 | 154 | return nn.Sequential(*layers) 155 | 156 | def features(self, x): 157 | x = self.conv1(x) 158 | #x = self.bn1(x) 159 | x = self.relu(x) 160 | x = self.maxpool(x) 161 | 162 | x = self.layer1(x) 163 | x = self.layer2(x) 164 | x = self.layer3(x) 165 | x = self.layer4(x) 166 | 167 | return x 168 | 169 | def forward(self, x): 170 | x = self.features(x) 171 | x = self.avgpool(x) 172 | x = x.view(x.size(0), -1) 173 | x = self.fc(x) 174 | 175 | return x 176 | 177 | 178 | def resnet18(pretrained=False, **kwargs): 179 | """Constructs a ResNet-18 model. 180 | 181 | Args: 182 | pretrained (bool): If True, returns a model pre-trained on ImageNet 183 | """ 184 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 185 | if pretrained: 186 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 187 | return model 188 | 189 | 190 | def resnet34(pretrained=False, **kwargs): 191 | """Constructs a ResNet-34 model. 192 | 193 | Args: 194 | pretrained (bool): If True, returns a model pre-trained on ImageNet 195 | """ 196 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 197 | if pretrained: 198 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 199 | return model 200 | 201 | 202 | def resnet50(pretrained=False, **kwargs): 203 | """Constructs a ResNet-50 model. 204 | 205 | Args: 206 | pretrained (bool): If True, returns a model pre-trained on ImageNet 207 | """ 208 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 209 | if pretrained: 210 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 211 | return model 212 | 213 | 214 | def resnet101(pretrained=False, **kwargs): 215 | """Constructs a ResNet-101 model. 216 | 217 | Args: 218 | pretrained (bool): If True, returns a model pre-trained on ImageNet 219 | """ 220 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 221 | if pretrained: 222 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 223 | return model 224 | 225 | 226 | def resnet152(pretrained=False, **kwargs): 227 | """Constructs a ResNet-152 model. 228 | 229 | Args: 230 | pretrained (bool): If True, returns a model pre-trained on ImageNet 231 | """ 232 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 233 | if pretrained: 234 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 235 | return model 236 | -------------------------------------------------------------------------------- /spfa/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | dist 3 | spfa.egg-info 4 | .eggs 5 | var 6 | tmp 7 | -------------------------------------------------------------------------------- /spfa/demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | import spfa 4 | 5 | # Create test map 6 | num_rows = 250 7 | num_cols = 350 8 | source_i = 5 9 | source_j = 100 10 | source = (source_i, source_j) 11 | free_space_map = np.ones([num_rows, num_cols], dtype=bool) 12 | free_space_map[200:202, 1:300] = False 13 | 14 | # Run SPFA 15 | dists, parents = spfa.spfa(free_space_map, source) 16 | 17 | # Highlight the path to a target 18 | parents_ij = np.stack((parents // parents.shape[1], parents % parents.shape[1]), axis=2) 19 | parents_ij[parents < 0, :] = (-1, -1) 20 | target_i = 245 21 | target_j = 50 22 | i, j = target_i, target_j 23 | while not (i == source_i and j == source_j): 24 | i, j = parents_ij[i, j] 25 | if i + j < 0: 26 | break 27 | dists[i][j] = 2 * 255 28 | 29 | # Visualize the output 30 | Image.fromarray((0.5 * dists).astype(np.uint8)).save('output.png') 31 | -------------------------------------------------------------------------------- /spfa/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from setuptools.command.build_ext import build_ext 3 | import sys 4 | import setuptools 5 | 6 | __version__ = '0.0.1' 7 | 8 | 9 | class get_pybind_include(object): 10 | def __init__(self, user=False): 11 | self.user = user 12 | 13 | def __str__(self): 14 | import pybind11 15 | return pybind11.get_include(self.user) 16 | 17 | 18 | ext_modules = [ 19 | Extension( 20 | 'spfa', 21 | ['src/main.cpp'], 22 | include_dirs=[ 23 | # Path to pybind11 headers 24 | get_pybind_include(), 25 | get_pybind_include(user=True) 26 | ], 27 | language='c++' 28 | ), 29 | ] 30 | 31 | 32 | # As of Python 3.6, CCompiler has a `has_flag` method. 33 | # cf http://bugs.python.org/issue26689 34 | def has_flag(compiler, flagname): 35 | """Return a boolean indicating whether a flag name is supported on 36 | the specified compiler. 37 | """ 38 | import tempfile 39 | with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f: 40 | f.write('int main (int argc, char **argv) { return 0; }') 41 | try: 42 | compiler.compile([f.name], extra_postargs=[flagname]) 43 | except setuptools.distutils.errors.CompileError: 44 | return False 45 | return True 46 | 47 | 48 | def cpp_flag(compiler): 49 | """Return the -std=c++[11/14/17] compiler flag. 50 | 51 | The newer version is prefered over c++11 (when it is available). 52 | """ 53 | flags = ['-std=c++14', '-std=c++11'] 54 | 55 | for flag in flags: 56 | if has_flag(compiler, flag): return flag 57 | 58 | raise RuntimeError('Unsupported compiler -- at least C++11 support ' 59 | 'is needed!') 60 | 61 | 62 | class BuildExt(build_ext): 63 | """A custom build extension for adding compiler-specific options.""" 64 | c_opts = { 65 | 'msvc': ['/EHsc'], 66 | 'unix': [], 67 | } 68 | l_opts = { 69 | 'msvc': [], 70 | 'unix': [], 71 | } 72 | 73 | if sys.platform == 'darwin': 74 | darwin_opts = ['-stdlib=libc++', '-mmacosx-version-min=10.7'] 75 | c_opts['unix'] += darwin_opts 76 | l_opts['unix'] += darwin_opts 77 | 78 | def build_extensions(self): 79 | ct = self.compiler.compiler_type 80 | opts = self.c_opts.get(ct, []) 81 | link_opts = self.l_opts.get(ct, []) 82 | if ct == 'unix': 83 | opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version()) 84 | opts.append(cpp_flag(self.compiler)) 85 | if has_flag(self.compiler, '-fvisibility=hidden'): 86 | opts.append('-fvisibility=hidden') 87 | elif ct == 'msvc': 88 | opts.append('/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version()) 89 | for ext in self.extensions: 90 | ext.extra_compile_args = opts 91 | ext.extra_link_args = link_opts 92 | build_ext.build_extensions(self) 93 | 94 | setup( 95 | name='spfa', 96 | version=__version__, 97 | description='SPFA implemented in C++', 98 | long_description='', 99 | ext_modules=ext_modules, 100 | install_requires=['pybind11>=2.3'], 101 | setup_requires=['pybind11>=2.3'], 102 | cmdclass={'build_ext': BuildExt}, 103 | zip_safe=False, 104 | ) 105 | -------------------------------------------------------------------------------- /spfa/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace py = pybind11; 5 | 6 | inline int ravel(int i, int j, int num_cols) { 7 | return i * num_cols + j; 8 | } 9 | 10 | py::tuple spfa(py::array_t input_map, std::tuple source) { 11 | const float eps = 1e-6; 12 | const int num_dirs = 8; 13 | const int dirs[num_dirs][2] = {{-1, -1}, {-1, 0}, {-1, 1}, {0, 1}, {1, 1}, {1, 0}, {1, -1}, {0, -1}}; 14 | const float dir_lengths[num_dirs] = {std::sqrt(2.0f), 1, std::sqrt(2.0f), 1, std::sqrt(2.0f), 1, std::sqrt(2.0f), 1}; 15 | 16 | // Process input map 17 | py::buffer_info map_buf = input_map.request(); 18 | int num_rows = map_buf.shape[0]; 19 | int num_cols = map_buf.shape[1]; 20 | bool* map_ptr = (bool *) map_buf.ptr; 21 | 22 | // Get source coordinates 23 | int source_i = std::get<0>(source); 24 | int source_j = std::get<1>(source); 25 | 26 | int max_num_verts = num_rows * num_cols; 27 | int max_edges_per_vert = num_dirs; 28 | const float inf = 2 * max_num_verts; 29 | int queue_size = num_dirs * num_rows * num_cols; 30 | 31 | // Initialize arrays 32 | int* edges = new int[max_num_verts * max_edges_per_vert](); 33 | int* edge_counts = new int[max_num_verts](); 34 | int* queue = new int[queue_size](); 35 | bool* in_queue = new bool[max_num_verts](); 36 | float* weights = new float[max_num_verts * max_edges_per_vert](); 37 | float* dists = new float[max_num_verts]; 38 | for (int i = 0; i < max_num_verts; ++i) 39 | dists[i] = inf; 40 | int* parents = new int[max_num_verts](); 41 | for (int i = 0; i < max_num_verts; ++i) 42 | parents[i] = -1; 43 | 44 | // Build graph 45 | for (int i = 0; i < num_rows; ++i) { 46 | for (int j = 0; j < num_cols; ++j) { 47 | int v = ravel(i, j, num_cols); 48 | if (!map_ptr[v]) 49 | continue; 50 | for (int k = 0; k < num_dirs; ++k) { 51 | int ip = i + dirs[k][0], jp = j + dirs[k][1]; 52 | if (ip < 0 || jp < 0 || ip >= num_rows || jp >= num_cols) 53 | continue; 54 | int vp = ravel(ip, jp, num_cols); 55 | if (!map_ptr[vp]) 56 | continue; 57 | int e = ravel(v, edge_counts[v], max_edges_per_vert); 58 | edges[e] = vp; 59 | weights[e] = dir_lengths[k]; 60 | edge_counts[v]++; 61 | } 62 | } 63 | } 64 | 65 | // SPFA 66 | int s = ravel(source_i, source_j, num_cols); 67 | int head = 0, tail = 0; 68 | dists[s] = 0; 69 | queue[++tail] = s; 70 | in_queue[s] = true; 71 | while (head < tail) { 72 | int u = queue[++head]; 73 | in_queue[u] = false; 74 | for (int j = 0; j < edge_counts[u]; ++j) { 75 | int e = ravel(u, j, max_edges_per_vert); 76 | int v = edges[e]; 77 | float new_dist = dists[u] + weights[e]; 78 | if (new_dist < dists[v]) { 79 | parents[v] = u; 80 | dists[v] = new_dist; 81 | if (!in_queue[v]) { 82 | assert(tail < queue_size); 83 | queue[++tail] = v; 84 | in_queue[v] = true; 85 | if (dists[queue[tail]] < dists[queue[head + 1]]) 86 | std::swap(queue[tail], queue[head + 1]); 87 | } 88 | } 89 | } 90 | } 91 | 92 | // Copy output into numpy array 93 | auto output_dists = py::array_t({num_rows, num_cols}); 94 | auto output_parents = py::array_t({num_rows, num_cols}); 95 | py::buffer_info dists_buf = output_dists.request(), parents_buf = output_parents.request(); 96 | float* dists_ptr = (float *) dists_buf.ptr; 97 | int* parents_ptr = (int *) parents_buf.ptr; 98 | for (int i = 0; i < num_rows; ++i) { 99 | for (int j = 0; j < num_cols; ++j) { 100 | int u = ravel(i, j, num_cols); 101 | dists_ptr[u] = (dists[u] < inf - eps) * dists[u]; 102 | parents_ptr[u] = parents[u]; 103 | } 104 | } 105 | 106 | // Free memory 107 | delete[] edges; 108 | delete[] edge_counts; 109 | delete[] queue; 110 | delete[] in_queue; 111 | delete[] weights; 112 | delete[] dists; 113 | delete[] parents; 114 | 115 | return py::make_tuple(output_dists, output_parents); 116 | } 117 | 118 | PYBIND11_MODULE(spfa, m) { 119 | m.doc() = R"pbdoc( 120 | SPFA implemented in C++ 121 | )pbdoc"; 122 | 123 | m.def("spfa", &spfa, R"pbdoc( 124 | spfa 125 | )pbdoc"); 126 | 127 | #ifdef VERSION_INFO 128 | m.attr("__version__") = VERSION_INFO; 129 | #else 130 | m.attr("__version__") = "dev"; 131 | #endif 132 | } 133 | -------------------------------------------------------------------------------- /stl/blade.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimmyyhwu/spatial-action-maps/a39ff992eb29f37e1a05e2976ac8c73157bd8ab2/stl/blade.stl -------------------------------------------------------------------------------- /stl/board-corner.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimmyyhwu/spatial-action-maps/a39ff992eb29f37e1a05e2976ac8c73157bd8ab2/stl/board-corner.stl -------------------------------------------------------------------------------- /stl/cube.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimmyyhwu/spatial-action-maps/a39ff992eb29f37e1a05e2976ac8c73157bd8ab2/stl/cube.stl -------------------------------------------------------------------------------- /tools_click_agent.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | #import pybullet as p 4 | 5 | import environment 6 | import utils 7 | 8 | 9 | class ClickAgent: 10 | def __init__(self, env): 11 | self.env = env 12 | self.window_name = 'window' 13 | self.reward_img_height = 12 14 | cv2.namedWindow(self.window_name, cv2.WINDOW_GUI_NORMAL) 15 | cv2.setMouseCallback(self.window_name, self.mouse_callback) 16 | self.selected_action = None 17 | 18 | def mouse_callback(self, event, x, y, *_): 19 | if event == cv2.EVENT_LBUTTONDOWN: 20 | self.selected_action = (max(0, y - self.reward_img_height), x) 21 | 22 | def update_display(self, state, last_reward, last_ministeps): 23 | reward_img = utils.get_reward_img(last_reward, last_ministeps, self.reward_img_height, self.env.get_state_width()) 24 | state_img = utils.get_state_visualization(state)[:, :, ::-1] 25 | cv2.imshow(self.window_name, np.concatenate((reward_img, state_img), axis=0)) 26 | 27 | def run(self): 28 | state = self.env.reset() 29 | last_reward = 0 30 | last_ministeps = 0 31 | 32 | done = False 33 | force_reset_env = False 34 | while True: 35 | self.update_display(state, last_reward, last_ministeps) 36 | 37 | # Read keyboard input 38 | key = cv2.waitKey(1) & 0xFF 39 | if key == ord(' '): 40 | force_reset_env = True 41 | elif key == ord('q'): 42 | break 43 | 44 | if self.selected_action is not None: 45 | action = self.selected_action[0] * self.env.get_state_width() + self.selected_action[1] 46 | state, reward, done, info = self.env.step(action) 47 | last_reward = reward 48 | last_ministeps = info['ministeps'] 49 | self.selected_action = None 50 | else: 51 | #p.stepSimulation() # Uncomment to make pybullet window interactive 52 | pass 53 | 54 | if done or force_reset_env: 55 | state = self.env.reset() 56 | done = False 57 | force_reset_env = False 58 | last_reward = 0 59 | last_ministeps = 0 60 | cv2.destroyAllWindows() 61 | 62 | def main(): 63 | # Obstacle config 64 | obstacle_config = 'small_empty' 65 | #obstacle_config = 'small_columns' 66 | #obstacle_config = 'large_columns' 67 | #obstacle_config = 'large_divider' 68 | 69 | # Room config 70 | kwargs = {} 71 | kwargs['room_width'] = 1 if obstacle_config.startswith('large') else 0.5 72 | kwargs['num_cubes'] = 20 if obstacle_config.startswith('large') else 10 73 | kwargs['obstacle_config'] = obstacle_config 74 | #kwargs['random_seed'] = 0 75 | 76 | # Visualization 77 | kwargs['use_gui'] = True 78 | kwargs['show_debug_annotations'] = True 79 | #kwargs['show_occupancy_map'] = True 80 | 81 | # Shortest path components 82 | #kwargs['use_distance_to_receptacle_channel'] = False 83 | #kwargs['use_shortest_path_to_receptacle_channel'] = True 84 | #kwargs['use_shortest_path_channel'] = True 85 | #kwargs['use_shortest_path_partial_rewards'] = True 86 | #kwargs['use_shortest_path_movement'] = True 87 | 88 | env = environment.Environment(**kwargs) 89 | agent = ClickAgent(env) 90 | agent.run() 91 | env.close() 92 | 93 | main() 94 | -------------------------------------------------------------------------------- /tools_generate_experiments.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import utils 3 | 4 | def generate_experiment(experiment_name, template_experiment_name, modify_cfg_fn, output_dir, template_dir='config/experiments/base'): 5 | # Ensure output dir exists 6 | output_dir = Path(output_dir) 7 | if not output_dir.exists(): 8 | output_dir.mkdir(parents=True) 9 | 10 | # Read template config 11 | cfg = utils.read_config(Path(template_dir) / '{}.yml'.format(template_experiment_name)) 12 | 13 | # Apply modifications 14 | cfg.experiment_name = experiment_name 15 | num_fields = len(cfg) 16 | modify_cfg_fn(cfg) 17 | assert num_fields == len(cfg), experiment_name # New fields should not have been added 18 | 19 | # Save new config 20 | utils.write_config(cfg, output_dir / '{}.yml'.format(experiment_name)) 21 | 22 | def main(): 23 | # Different obstacle configs 24 | obstacle_configs = ['small_empty', 'small_columns', 'large_columns', 'large_divider'] 25 | def modify_cfg_obstacle_config(cfg, obstacle_config): 26 | cfg.obstacle_config = obstacle_config 27 | if obstacle_config.startswith('large'): 28 | cfg.num_cubes = 20 29 | cfg.room_width = 1.0 30 | for obstacle_config in obstacle_configs: 31 | generate_experiment(obstacle_config, 'small_empty', lambda cfg: modify_cfg_obstacle_config(cfg, obstacle_config), 'config/experiments/base', template_dir='config/templates') 32 | 33 | # Steering commands 34 | def modify_cfg_steering_commands(cfg): 35 | cfg.policy_type = 'steering_commands' 36 | cfg.use_steering_commands = True 37 | cfg.steering_commands_num_turns = 18 38 | cfg.fixed_step_size = 0.25 39 | cfg.use_position_channel = True 40 | cfg.num_input_channels += 2 41 | for obstacle_config in obstacle_configs: 42 | experiment_name = '-'.join([obstacle_config, 'steering_commands']) 43 | generate_experiment(experiment_name, obstacle_config, modify_cfg_steering_commands, 'config/experiments/steering-commands') 44 | 45 | # Fixed step size 46 | def modify_cfg_fixed_step_size(cfg): 47 | cfg.fixed_step_size = 0.25 48 | for obstacle_config in obstacle_configs: 49 | experiment_name = '-'.join([obstacle_config, 'fixed_step_size']) 50 | generate_experiment(experiment_name, obstacle_config, modify_cfg_fixed_step_size, 'config/experiments/fixed-step-size') 51 | 52 | # Shortest path ablations 53 | def modify_cfg_sp_ablation(cfg, ablation): 54 | if ablation == 'no_sp_movement': 55 | cfg.use_shortest_path_movement = False 56 | elif ablation == 'no_sp_from_agent': 57 | cfg.num_input_channels -= 1 58 | cfg.use_shortest_path_channel = False 59 | elif ablation == 'no_sp_to_receptacle': 60 | cfg.use_distance_to_receptacle_channel = True 61 | cfg.use_shortest_path_to_receptacle_channel = False 62 | elif ablation == 'no_sp_in_rewards': 63 | cfg.use_shortest_path_partial_rewards = False 64 | elif ablation == 'no_sp_components': 65 | cfg.use_shortest_path_movement = False 66 | cfg.num_input_channels -= 1 67 | cfg.use_shortest_path_channel = False 68 | cfg.use_distance_to_receptacle_channel = True 69 | cfg.use_shortest_path_to_receptacle_channel = False 70 | cfg.use_shortest_path_partial_rewards = False 71 | for obstacle_config in obstacle_configs: 72 | for ablation in ['no_sp_movement', 'no_sp_from_agent', 'no_sp_to_receptacle', 'no_sp_in_rewards', 'no_sp_components']: 73 | if obstacle_config == 'small_empty' and ablation in ['no_sp_in_rewards', 'no_sp_movement', 'no_sp_to_receptacle']: 74 | continue 75 | experiment_name = '-'.join([obstacle_config, ablation]) 76 | generate_experiment(experiment_name, obstacle_config, lambda cfg: modify_cfg_sp_ablation(cfg, ablation), 'config/experiments/shortest-path-ablations') 77 | 78 | # Steering commands with no shortest path components 79 | def modify_cfg_steering_commands_no_sp_components(cfg): 80 | modify_cfg_steering_commands(cfg) 81 | modify_cfg_sp_ablation(cfg, 'no_sp_components') 82 | for obstacle_config in obstacle_configs: 83 | experiment_name = '-'.join([obstacle_config, 'steering_commands', 'no_sp_components']) 84 | generate_experiment(experiment_name, obstacle_config, modify_cfg_steering_commands_no_sp_components, 'config/experiments/steering-commands') 85 | 86 | # No partial rewards 87 | def modify_cfg_no_partial_rewards(cfg): 88 | cfg.partial_rewards_scale = 0 89 | for obstacle_config in obstacle_configs: 90 | experiment_name = '-'.join([obstacle_config, 'no_partial_rewards']) 91 | generate_experiment(experiment_name, obstacle_config, modify_cfg_no_partial_rewards, 'config/experiments/no-partial-rewards') 92 | 93 | # For development on local machine 94 | def modify_cfg_to_local(cfg): 95 | cfg.batch_size = 4 96 | cfg.replay_buffer_size = 1000 97 | cfg.learning_starts = 10 98 | cfg.inactivity_cutoff = 5 99 | generate_experiment('small_empty-local', 'small_empty', modify_cfg_to_local, 'config/local') 100 | 101 | main() 102 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html 2 | import argparse 3 | import random 4 | import sys 5 | import time 6 | from collections import namedtuple 7 | from pathlib import Path 8 | 9 | # Prevent numpy from using up all cpu 10 | import os 11 | os.environ['MKL_NUM_THREADS'] = '1' # pylint: disable=wrong-import-position 12 | 13 | import torch 14 | import torch.optim as optim 15 | from torch.nn.functional import smooth_l1_loss 16 | from torch.utils.tensorboard import SummaryWriter 17 | from tqdm import tqdm 18 | 19 | import utils 20 | 21 | 22 | torch.backends.cudnn.benchmark = True 23 | 24 | Transition = namedtuple('Transition', ('state', 'action', 'reward', 'ministeps', 'next_state')) 25 | 26 | class ReplayBuffer: 27 | def __init__(self, capacity): 28 | self.capacity = capacity 29 | self.buffer = [] 30 | self.position = 0 31 | 32 | def push(self, *args): 33 | if len(self.buffer) < self.capacity: 34 | self.buffer.append(None) 35 | self.buffer[self.position] = Transition(*args) 36 | self.position = (self.position + 1) % self.capacity 37 | 38 | def sample(self, batch_size): 39 | transitions = random.sample(self.buffer, batch_size) 40 | return Transition(*zip(*transitions)) 41 | 42 | def __len__(self): 43 | return len(self.buffer) 44 | 45 | def train(cfg, policy_net, target_net, optimizer, batch, transform_func): 46 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 47 | 48 | state_batch = torch.cat([transform_func(s) for s in batch.state]).to(device) # (32, 3, 96, 96) 49 | action_batch = torch.tensor(batch.action, dtype=torch.long).to(device) # (32,) 50 | reward_batch = torch.tensor(batch.reward, dtype=torch.float32).to(device) # (32,) 51 | ministeps_batch = torch.tensor(batch.ministeps, dtype=torch.float32).to(device) # (32,) 52 | non_final_next_states = torch.cat([transform_func(s) for s in batch.next_state if s is not None]).to(device, non_blocking=True) # (?32, 3, 96, 96) 53 | 54 | output = policy_net(state_batch) # (32, 2, 96, 96) 55 | state_action_values = output.view(cfg.batch_size, -1).gather(1, action_batch.unsqueeze(1)).squeeze(1) # (32,) 56 | next_state_values = torch.zeros(cfg.batch_size, dtype=torch.float32, device=device) # (32,) 57 | non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, batch.next_state)), dtype=torch.bool, device=device) # (32,) 58 | 59 | if cfg.use_double_dqn: 60 | with torch.no_grad(): 61 | best_action = policy_net(non_final_next_states).view(non_final_next_states.size(0), -1).max(1)[1].view(non_final_next_states.size(0), 1) # (32?, 1) 62 | next_state_values[non_final_mask] = target_net(non_final_next_states).view(non_final_next_states.size(0), -1).gather(1, best_action).view(-1) # (32?,) 63 | else: 64 | next_state_values[non_final_mask] = target_net(non_final_next_states).view(non_final_next_states.size(0), -1).max(1)[0].detach() # (32,) 65 | 66 | expected_state_action_values = (reward_batch + torch.pow(cfg.discount_factor, ministeps_batch) * next_state_values) # (32,) 67 | td_error = torch.abs(state_action_values - expected_state_action_values).detach() # (32,) 68 | loss = smooth_l1_loss(state_action_values, expected_state_action_values) 69 | 70 | optimizer.zero_grad() 71 | loss.backward() 72 | if cfg.grad_norm_clipping is not None: 73 | torch.nn.utils.clip_grad_norm_(policy_net.parameters(), cfg.grad_norm_clipping) 74 | optimizer.step() 75 | 76 | train_info = {} 77 | train_info['q_value_min'] = output.min().item() 78 | train_info['q_value_max'] = output.max().item() 79 | train_info['td_error'] = td_error.mean() 80 | train_info['loss'] = loss 81 | 82 | return train_info 83 | 84 | def main(cfg): 85 | # Set up logging and checkpointing 86 | log_dir = Path(cfg.log_dir) 87 | checkpoint_dir = Path(cfg.checkpoint_dir) 88 | print('log_dir: {}'.format(log_dir)) 89 | print('checkpoint_dir: {}'.format(checkpoint_dir)) 90 | 91 | # Create environment 92 | kwargs = {} 93 | if sys.platform == 'darwin': 94 | kwargs['use_gui'] = True 95 | env = utils.get_env_from_cfg(cfg, **kwargs) 96 | 97 | # Policy 98 | policy = utils.get_policy_from_cfg(cfg, env.get_action_space(), train=True) 99 | 100 | # Optimizer 101 | optimizer = optim.SGD(policy.policy_net.parameters(), lr=cfg.learning_rate, momentum=0.9, weight_decay=cfg.weight_decay) 102 | 103 | # Replay buffer 104 | replay_buffer = ReplayBuffer(cfg.replay_buffer_size) 105 | 106 | # Resume if applicable 107 | start_timestep = 0 108 | episode = 0 109 | if cfg.checkpoint_path is not None: 110 | checkpoint = torch.load(cfg.checkpoint_path) 111 | start_timestep = checkpoint['timestep'] 112 | episode = checkpoint['episode'] 113 | optimizer.load_state_dict(checkpoint['optimizer']) 114 | replay_buffer = checkpoint['replay_buffer'] 115 | print("=> loaded checkpoint '{}' (timestep {})".format(cfg.checkpoint_path, start_timestep)) 116 | 117 | # Target net 118 | target_net = policy.build_network() 119 | target_net.load_state_dict(policy.policy_net.state_dict()) 120 | target_net.eval() 121 | 122 | # Logging 123 | train_summary_writer = SummaryWriter(log_dir=str(log_dir / 'train')) 124 | visualization_summary_writer = SummaryWriter(log_dir=str(log_dir / 'visualization')) 125 | meters = Meters() 126 | 127 | state = env.reset() 128 | total_timesteps_with_warm_up = cfg.learning_starts + cfg.total_timesteps 129 | for timestep in tqdm(range(start_timestep, total_timesteps_with_warm_up), 130 | initial=start_timestep, total=total_timesteps_with_warm_up, file=sys.stdout): 131 | 132 | start_time = time.time() 133 | 134 | # Select an action 135 | if cfg.exploration_timesteps > 0: 136 | exploration_eps = 1 - min(max(timestep - cfg.learning_starts, 0) / cfg.exploration_timesteps, 1) * (1 - cfg.final_exploration) 137 | else: 138 | exploration_eps = cfg.final_exploration 139 | action, _ = policy.step(state, exploration_eps=exploration_eps) 140 | 141 | # Step the simulation 142 | next_state, reward, done, info = env.step(action) 143 | ministeps = info['ministeps'] 144 | 145 | # Store in buffer 146 | replay_buffer.push(state, action, reward, ministeps, next_state) 147 | state = next_state 148 | 149 | # Reset if episode ended 150 | if done: 151 | state = env.reset() 152 | episode += 1 153 | 154 | # Train network 155 | if timestep >= cfg.learning_starts: 156 | batch = replay_buffer.sample(cfg.batch_size) 157 | train_info = train(cfg, policy.policy_net, target_net, optimizer, batch, policy.apply_transform) 158 | 159 | # Update target network 160 | if (timestep + 1) % cfg.target_update_freq == 0: 161 | target_net.load_state_dict(policy.policy_net.state_dict()) 162 | 163 | step_time = time.time() - start_time 164 | 165 | ################################################################################ 166 | # Logging 167 | 168 | # Meters 169 | meters.update('step_time', step_time) 170 | if timestep >= cfg.learning_starts: 171 | for name, val in train_info.items(): 172 | meters.update(name, val) 173 | 174 | if done: 175 | for name in meters.get_names(): 176 | train_summary_writer.add_scalar(name, meters.avg(name), timestep + 1) 177 | eta_seconds = meters.avg('step_time') * (total_timesteps_with_warm_up - timestep) 178 | meters.reset() 179 | 180 | train_summary_writer.add_scalar('episodes', episode, timestep + 1) 181 | train_summary_writer.add_scalar('eta_hours', eta_seconds / 3600, timestep + 1) 182 | 183 | for name in ['cumulative_cubes', 'cumulative_distance', 'cumulative_reward']: 184 | train_summary_writer.add_scalar(name, info[name], timestep + 1) 185 | 186 | # Visualize Q-network outputs 187 | if timestep >= cfg.learning_starts and not cfg.use_steering_commands: 188 | random_state = random.choice(replay_buffer.buffer).state 189 | _, info = policy.step(random_state, debug=True) 190 | output = info['output'].cpu().numpy() 191 | visualization = utils.get_state_and_output_visualization(random_state, output).transpose((2, 0, 1)) 192 | visualization_summary_writer.add_image('output', visualization, timestep + 1) 193 | 194 | ################################################################################ 195 | # Checkpointing 196 | 197 | if (timestep + 1) % cfg.checkpoint_freq == 0 or timestep + 1 == total_timesteps_with_warm_up: 198 | # Save model 199 | if not checkpoint_dir.exists(): 200 | checkpoint_dir.mkdir(parents=True, exist_ok=True) 201 | model_name = 'model_{:08d}.pth.tar'.format(timestep + 1) 202 | torch.save({ 203 | 'timestep': timestep + 1, 204 | 'state_dict': policy.policy_net.state_dict(), 205 | }, str(checkpoint_dir / model_name)) 206 | 207 | # Save checkpoint 208 | checkpoint_name = 'checkpoint_{:08d}.pth.tar'.format(timestep + 1) 209 | torch.save({ 210 | 'timestep': timestep + 1, 211 | 'episode': episode, 212 | 'optimizer': optimizer.state_dict(), 213 | 'replay_buffer': replay_buffer, 214 | }, str(checkpoint_dir / checkpoint_name)) 215 | 216 | # Save updated config file 217 | cfg.model_path = str(checkpoint_dir / model_name) 218 | cfg.checkpoint_path = str(checkpoint_dir / checkpoint_name) 219 | utils.write_config(cfg, log_dir / 'config.yml') 220 | 221 | # Remove old checkpoint 222 | old_checkpoint_path = checkpoint_dir / 'checkpoint_{:08d}.pth.tar'.format((timestep + 1) - cfg.checkpoint_freq) 223 | if old_checkpoint_path.exists(): 224 | old_checkpoint_path.unlink() 225 | 226 | env.close() 227 | 228 | # Create file to indicate training completed 229 | (log_dir / 'success').touch() 230 | 231 | class AverageMeter: 232 | def __init__(self): 233 | self.val = 0 234 | self.avg = 0 235 | self.sum = 0 236 | self.count = 0 237 | 238 | def reset(self): 239 | self.val = 0 240 | self.avg = 0 241 | self.sum = 0 242 | self.count = 0 243 | 244 | def update(self, val, n=1): 245 | self.val = val 246 | self.sum += val * n 247 | self.count += n 248 | self.avg = self.sum / self.count 249 | 250 | class Meters: 251 | def __init__(self): 252 | self.meters = {} 253 | 254 | def get_names(self): 255 | return self.meters.keys() 256 | 257 | def reset(self): 258 | for _, meter in self.meters.items(): 259 | meter.reset() 260 | 261 | def update(self, name, val): 262 | if name not in self.meters: 263 | self.meters[name] = AverageMeter() 264 | self.meters[name].update(val) 265 | 266 | def avg(self, name): 267 | return self.meters[name].avg 268 | 269 | if __name__ == '__main__': 270 | parser = argparse.ArgumentParser() 271 | parser.add_argument('config_path') 272 | config_path = parser.parse_args().config_path 273 | config_path = utils.setup_run(config_path) 274 | main(utils.read_config(config_path)) 275 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from pathlib import Path 3 | 4 | import cv2 5 | import numpy as np 6 | from matplotlib import cm 7 | from munch import Munch 8 | from prompt_toolkit.shortcuts import radiolist_dialog 9 | 10 | import environment 11 | import policies 12 | 13 | 14 | ################################################################################ 15 | # Experiment management 16 | 17 | def read_config(config_path): 18 | with open(config_path, 'r') as f: 19 | cfg = Munch.fromYAML(f) 20 | return cfg 21 | 22 | def write_config(cfg, config_path): 23 | with open(config_path, 'w') as f: 24 | f.write(cfg.toYAML()) 25 | 26 | def setup_run(config_path): 27 | cfg = read_config(config_path) 28 | 29 | if cfg.log_dir is not None: 30 | # Run has already been set up 31 | return config_path 32 | 33 | # Set up run_name, log_dir, and checkpoint_dir 34 | timestamp = datetime.now().strftime('%Y%m%dT%H%M%S%f') 35 | cfg.run_name = '{}-{}'.format(timestamp, cfg.experiment_name) 36 | log_dir = Path(cfg.logs_dir) / cfg.run_name 37 | log_dir.mkdir(parents=True, exist_ok=True) 38 | cfg.log_dir = str(log_dir) 39 | cfg.checkpoint_dir = str(Path(cfg.checkpoints_dir) / cfg.run_name) 40 | 41 | # Save config file for the new run 42 | config_path = log_dir / 'config.yml' 43 | write_config(cfg, config_path) 44 | 45 | return config_path 46 | 47 | def select_run(): 48 | logs_dir = Path('logs') 49 | log_dirs = [path for path in sorted(logs_dir.iterdir()) if path.is_dir()] 50 | if len(log_dirs) == 0: 51 | return None 52 | 53 | grouped_config_paths = {} 54 | for log_dir in log_dirs: 55 | parts = log_dir.name.split('-') 56 | experiment_name = '-'.join(parts[1:]) 57 | if experiment_name not in grouped_config_paths: 58 | grouped_config_paths[experiment_name] = [] 59 | grouped_config_paths[experiment_name].append(log_dir / 'config.yml') 60 | 61 | if len(grouped_config_paths) > 1: 62 | config_paths = radiolist_dialog( 63 | values=[(value, key) for key, value in sorted(grouped_config_paths.items())], 64 | text='Please select an experiment:').run() 65 | if config_paths is None: 66 | return None 67 | else: 68 | config_paths = next(iter(grouped_config_paths.values())) 69 | 70 | selected_config_path = radiolist_dialog( 71 | values=[(path, path.parent.name) for path in config_paths], 72 | text='Please select a run:').run() 73 | if selected_config_path is None: 74 | return None 75 | 76 | return selected_config_path 77 | 78 | def get_env_from_cfg(cfg, real_env=False, **kwargs): 79 | kwarg_list = [ 80 | 'room_length', 'room_width', 'num_cubes', 'obstacle_config', 81 | 'use_distance_to_receptacle_channel', 'distance_to_receptacle_channel_scale', 82 | 'use_shortest_path_to_receptacle_channel', 'use_shortest_path_channel', 'shortest_path_channel_scale', 83 | 'use_position_channel', 'position_channel_scale', 84 | 'partial_rewards_scale', 'use_shortest_path_partial_rewards', 'collision_penalty', 'nonmovement_penalty', 85 | 'use_shortest_path_movement', 'fixed_step_size', 'use_steering_commands', 'steering_commands_num_turns', 86 | 'ministep_size', 'inactivity_cutoff', 'random_seed', 87 | ] 88 | original_kwargs = {} 89 | for kwarg_name in kwarg_list: 90 | original_kwargs[kwarg_name] = cfg[kwarg_name] 91 | original_kwargs.update(kwargs) 92 | if real_env: 93 | return environment.RealEnvironment(**original_kwargs) 94 | return environment.Environment(**original_kwargs) 95 | 96 | def get_policy_from_cfg(cfg, action_space, **kwargs): 97 | if cfg.policy_type == 'steering_commands': 98 | return policies.SteeringCommandsPolicy(cfg, action_space, **kwargs) 99 | if cfg.policy_type == 'dense_action_space': 100 | return policies.DenseActionSpacePolicy(cfg, action_space, **kwargs) 101 | raise Exception 102 | 103 | ################################################################################ 104 | # Visualization 105 | 106 | JET = np.array([list(cm.jet(i)[:3]) for i in range(256)]) 107 | 108 | def scale_min_max(arr): 109 | return (arr - arr.min()) / (arr.max() - arr.min()) 110 | 111 | def to_uint8_image(image): 112 | return np.round(255.0 * image).astype(np.uint8) 113 | 114 | def get_state_visualization(state): 115 | if state.shape[2] == 2: 116 | return np.stack([state[:, :, 1], state[:, :, 0], state[:, :, 0]], axis=2) # (robot_state_channel, overhead_map, overhead_map) 117 | return np.stack([state[:, :, 1], state[:, :, 0], state[:, :, -1]], axis=2) # (robot_state_channel, overhead_map, distance_channel) 118 | 119 | def get_overhead_image(state): 120 | return np.stack([state[:, :, 0], state[:, :, 0], state[:, :, 0]], axis=2) 121 | 122 | def get_reward_img(reward, ministeps, img_height, state_width): 123 | reward_img = np.zeros((img_height, state_width, 3), dtype=np.float32) 124 | text = '{:.02f}/{:+.02f}'.format(ministeps, reward) 125 | cv2.putText(reward_img, text, (state_width - 5 * len(text), 8), cv2.FONT_HERSHEY_SIMPLEX, 0.25, (1, 1, 1)) 126 | return reward_img 127 | 128 | def get_output_visualization(overhead_image, output): 129 | alpha = 0.5 130 | return (1 - alpha) * overhead_image + alpha * JET[output, :] 131 | 132 | def get_state_and_output_visualization(state, output): 133 | output = to_uint8_image(scale_min_max(output)) 134 | return np.concatenate((get_state_visualization(state), get_output_visualization(get_overhead_image(state), output)), axis=1) 135 | -------------------------------------------------------------------------------- /vector_action_executor.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import vector_controller 4 | import vector_utils as utils 5 | 6 | class VectorActionExecutor: 7 | def __init__(self, robot_index): 8 | if robot_index is None: 9 | robot_name = utils.get_first_available_robot() 10 | if robot_name is None: 11 | print('No robots found') 12 | robot_index = utils.get_robot_indices()[robot_name] 13 | 14 | self.robot_index = robot_index 15 | self.controller = vector_controller.VectorController(self.robot_index) 16 | 17 | self.last_update_time = time.time() 18 | self.last_robot_position = None 19 | self.last_robot_heading = None 20 | self.stuck_count = 0 21 | self.unstuck_count = 0 22 | self.jittering = False 23 | 24 | self.target_end_effector_position = None 25 | 26 | def update_try_action_result(self, try_action_result): 27 | # Simulation results 28 | self.target_end_effector_position = try_action_result['target_end_effector_position'] 29 | self.last_update_time = time.time() 30 | 31 | def step(self, poses): 32 | if self.target_end_effector_position is None: 33 | return 34 | 35 | robot_poses = poses['robots'] 36 | if robot_poses is None or self.robot_index not in robot_poses: 37 | return 38 | 39 | robot_position = robot_poses[self.robot_index]['position'] 40 | robot_heading = robot_poses[self.robot_index]['heading'] 41 | info = { 42 | 'last_robot_position': self.last_robot_position, 43 | 'last_robot_heading': self.last_robot_heading, 44 | 'robot_position': robot_position, 45 | 'robot_heading': robot_heading, 46 | 'target_end_effector_position': self.target_end_effector_position, 47 | } 48 | if self.jittering: 49 | info['robot_heading'] += np.random.uniform(-np.pi, np.pi) 50 | self.last_robot_position = robot_position 51 | self.last_robot_heading = robot_heading 52 | 53 | # Update the controller 54 | self.controller.step(info) 55 | 56 | # Handle robot getting stuck 57 | if self.controller.is_stuck(): 58 | self.stuck_count += 1 59 | else: 60 | self.stuck_count = 0 61 | self.unstuck_count += 1 62 | if self.stuck_count > 30: 63 | self.jittering = True 64 | self.unstuck_count = 0 65 | if self.jittering and self.unstuck_count > 5: 66 | self.jittering = False 67 | 68 | def is_action_completed(self, timeout=10): 69 | if any([self.controller.is_idle(), self.stuck_count > 10, time.time() - self.last_update_time > timeout]): 70 | return True 71 | return False 72 | 73 | def disconnect(self): 74 | self.controller.disconnect() 75 | -------------------------------------------------------------------------------- /vector_click_agent.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from multiprocessing.connection import Client 3 | 4 | import cv2 5 | 6 | import environment 7 | import utils 8 | import vector_action_executor 9 | 10 | 11 | class ClickAgent: 12 | def __init__(self, conn, env, action_executor): 13 | self.conn = conn 14 | self.env = env 15 | self.action_executor = action_executor 16 | self.window_name = 'window' 17 | cv2.namedWindow(self.window_name, cv2.WINDOW_GUI_NORMAL) 18 | #cv2.resizeWindow(self.window_name, 960, 960) 19 | cv2.setMouseCallback(self.window_name, self.mouse_callback) 20 | self.selected_action = None 21 | 22 | def mouse_callback(self, event, x, y, *_): 23 | if event == cv2.EVENT_LBUTTONDOWN: 24 | self.selected_action = (y, x) 25 | 26 | def run(self): 27 | try: 28 | while True: 29 | # Get new pose estimates 30 | poses = None 31 | while self.conn.poll(): # Discard all but the latest data 32 | poses = self.conn.recv() 33 | if poses is None: 34 | continue 35 | 36 | # Update poses in the simulation 37 | self.env.update_poses(poses) 38 | 39 | # Visualize state representation 40 | state = self.env.get_state() 41 | cv2.imshow(self.window_name, utils.get_state_visualization(state)[:, :, ::-1]) 42 | cv2.waitKey(1) 43 | 44 | if self.selected_action is not None: 45 | action = self.selected_action[0] * self.env.get_state_width() + self.selected_action[1] 46 | self.selected_action = None 47 | 48 | # Run selected action through simulation 49 | try_action_result = self.env.try_action(action) 50 | 51 | # Update action executor 52 | self.action_executor.update_try_action_result(try_action_result) 53 | 54 | # Run action executor 55 | self.action_executor.step(poses) 56 | finally: 57 | self.action_executor.disconnect() 58 | 59 | def main(args): 60 | # Connect to aruco server for pose estimates 61 | try: 62 | conn = Client(('localhost', 6000), authkey=b'secret password') 63 | except ConnectionRefusedError: 64 | print('Could not connect to aruco server for pose estimates') 65 | return 66 | 67 | # Create action executor for the real robot 68 | action_executor = vector_action_executor.VectorActionExecutor(args.robot_index) 69 | 70 | # Create pybullet environment to mirror real-world 71 | kwargs = {'num_cubes': args.num_cubes, 'use_gui': True} 72 | cube_indices = list(range(args.num_cubes)) 73 | env = environment.RealEnvironment(robot_index=action_executor.robot_index, cube_indices=cube_indices, **kwargs) 74 | env.reset() 75 | 76 | # Run the agent 77 | agent = ClickAgent(conn, env, action_executor) 78 | agent.run() 79 | 80 | parser = argparse.ArgumentParser() 81 | parser.add_argument('--robot-index', type=int) 82 | parser.add_argument('--num-cubes', type=int, default=10) 83 | main(parser.parse_args()) 84 | -------------------------------------------------------------------------------- /vector_controller.py: -------------------------------------------------------------------------------- 1 | import anki_vector 2 | import numpy as np 3 | import vector_utils 4 | from environment import ROBOT_RADIUS 5 | 6 | MIN_TURN_SPEED = 10 7 | MAX_TURN_SPEED = 150 8 | MIN_MOVE_SPEED = 10 9 | MAX_MOVE_SPEED = 150 10 | #PROPORTIONAL_GAIN = 65 11 | PROPORTIONAL_GAIN = 55 12 | TURN_ERROR_THRESHOLD = np.radians(5) 13 | MOVE_ERROR_THRESHOLD = np.radians(30) 14 | TARGET_DIST_THRESHOLD = 0.010 15 | SLOWING_DIST_THRESHOLD = 0.100 16 | 17 | class VectorController: 18 | def __init__(self, robot_index): 19 | self.robot_name = vector_utils.get_robot_names()[robot_index] 20 | serial = vector_utils.get_robot_serials()[self.robot_name] 21 | #self.robot = anki_vector.AsyncRobot(serial=serial, default_logging=False) 22 | self.robot = anki_vector.AsyncRobot(serial=serial, default_logging=False, behavior_control_level=anki_vector.connection.ControlPriorityLevel.OVERRIDE_BEHAVIORS_PRIORITY) 23 | self.robot.connect() 24 | print('Connected to {}'.format(self.robot_name)) 25 | 26 | self._reset_motors() 27 | self.robot_state = 'idle' 28 | self.stuck = False 29 | 30 | def step(self, info): 31 | last_robot_position, last_robot_heading = info['last_robot_position'], info['last_robot_heading'] 32 | robot_position, robot_heading = info['robot_position'], info['robot_heading'] 33 | target_end_effector_position = info['target_end_effector_position'] 34 | 35 | # Compute target heading 36 | x_movement = target_end_effector_position[0] - robot_position[0] 37 | y_movement = target_end_effector_position[1] - robot_position[1] 38 | dist = np.sqrt(x_movement**2 + y_movement**2) - ROBOT_RADIUS 39 | move_sign = np.sign(dist) 40 | dist = abs(dist) 41 | target_heading = np.arctan2(y_movement, x_movement) 42 | 43 | # Compute heading error 44 | heading_error = target_heading - robot_heading 45 | heading_error = np.mod(heading_error + np.pi, 2 * np.pi) - np.pi 46 | 47 | # Set wheel motors 48 | if self.robot_state == 'idle': 49 | if dist > TARGET_DIST_THRESHOLD or abs(heading_error) > TURN_ERROR_THRESHOLD: 50 | self.robot_state = 'turning' 51 | else: 52 | self.robot.motors.set_wheel_motors(0, 0) 53 | 54 | elif self.robot_state == 'turning': 55 | if abs(heading_error) < TURN_ERROR_THRESHOLD: 56 | self.robot_state = 'moving' 57 | self.robot.motors.set_wheel_motors(0, 0) 58 | else: 59 | signed_turn_speed = PROPORTIONAL_GAIN * heading_error 60 | signed_turn_speed = np.clip(signed_turn_speed, -MAX_TURN_SPEED, MAX_TURN_SPEED) 61 | signed_turn_speed = max(1, MIN_TURN_SPEED / abs(signed_turn_speed)) * signed_turn_speed 62 | self.robot.motors.set_wheel_motors(-signed_turn_speed, signed_turn_speed) 63 | 64 | elif self.robot_state == 'moving': 65 | if abs(heading_error) > MOVE_ERROR_THRESHOLD: # Veered too far off path 66 | self.robot.motors.set_wheel_motors(0, 0) 67 | self.robot_state = 'turning' 68 | elif dist < TARGET_DIST_THRESHOLD: 69 | self.robot.motors.set_wheel_motors(0, 0) 70 | self.robot_state = 'idle' 71 | else: 72 | signed_turn_speed = PROPORTIONAL_GAIN * heading_error 73 | signed_turn_speed = np.clip(signed_turn_speed, -MAX_TURN_SPEED, MAX_TURN_SPEED) 74 | move_speed = min(MAX_MOVE_SPEED, max(MIN_MOVE_SPEED, 1000 * dist)) if dist < SLOWING_DIST_THRESHOLD else MAX_MOVE_SPEED 75 | self.robot.motors.set_wheel_motors(move_sign * move_speed - signed_turn_speed, move_sign * move_speed + signed_turn_speed) 76 | 77 | # Check if stuck 78 | self.stuck = False 79 | if last_robot_position is not None and self.robot_state in ('moving', 'turning'): 80 | position_diff = np.linalg.norm(np.asarray(robot_position) - np.asarray(last_robot_position)) 81 | heading_diff = robot_heading - last_robot_heading 82 | heading_diff = np.mod(heading_diff + np.pi, 2 * np.pi) - np.pi 83 | if position_diff < 0.003 and heading_diff < np.radians(3): 84 | self.stuck = True 85 | 86 | def is_idle(self): 87 | return self.robot_state == 'idle' 88 | 89 | def is_stuck(self): 90 | return self.stuck 91 | 92 | def _reset_motors(self): 93 | self.robot.behavior.set_head_angle(anki_vector.util.degrees(0)) 94 | self.robot.motors.set_wheel_motors(0, 0) 95 | 96 | def disconnect(self): 97 | self._reset_motors() 98 | self.robot.disconnect() 99 | print('Disconnected from {}'.format(self.robot_name)) 100 | -------------------------------------------------------------------------------- /vector_enjoy.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from multiprocessing.connection import Client 3 | 4 | import cv2 5 | 6 | import utils 7 | import vector_action_executor 8 | 9 | 10 | def main(args): 11 | # Connect to aruco server for pose estimates 12 | try: 13 | conn = Client(('localhost', 6000), authkey=b'secret password') 14 | except ConnectionRefusedError: 15 | print('Could not connect to aruco server for pose estimates') 16 | return 17 | 18 | # Create action executor for the real robot 19 | action_executor = vector_action_executor.VectorActionExecutor(args.robot_index) 20 | 21 | # Create env 22 | config_path = args.config_path 23 | if config_path is None: 24 | config_path = utils.select_run() 25 | if config_path is None: 26 | print('Please provide a config path') 27 | return 28 | cfg = utils.read_config(config_path) 29 | kwargs = {'num_cubes': args.num_cubes} 30 | if args.debug: 31 | kwargs['use_gui'] = True 32 | cube_indices = list(range(args.num_cubes)) 33 | env = utils.get_env_from_cfg(cfg, real_env=True, robot_index=action_executor.robot_index, cube_indices=cube_indices, **kwargs) 34 | env.reset() 35 | 36 | # Create policy 37 | policy = utils.get_policy_from_cfg(cfg, env.get_action_space()) 38 | 39 | # Debug visualization 40 | if args.debug: 41 | cv2.namedWindow('out', cv2.WINDOW_NORMAL) 42 | #cv2.resizeWindow('out', 960, 480) 43 | 44 | try: 45 | while True: 46 | # Get new pose estimates 47 | poses = None 48 | while conn.poll(): # ensure up-to-date data 49 | poses = conn.recv() 50 | if poses is None: 51 | continue 52 | 53 | # Update poses in the simulation 54 | env.update_poses(poses) 55 | 56 | # Get new action 57 | state = env.get_state() 58 | if action_executor.is_action_completed() and args.debug: 59 | action, info = policy.step(state, debug=True) 60 | # Visualize 61 | assert not cfg.use_steering_commands 62 | output = info['output'].cpu().numpy() 63 | cv2.imshow('out', utils.get_state_and_output_visualization(state, output)[:, :, ::-1]) 64 | cv2.waitKey(1) 65 | else: 66 | action, _ = policy.step(state) 67 | 68 | # Run selected action through simulation 69 | try_action_result = env.try_action(action) 70 | 71 | if action_executor.is_action_completed(): 72 | # Update action executor 73 | action_executor.update_try_action_result(try_action_result) 74 | 75 | # Run action executor 76 | action_executor.step(poses) 77 | 78 | finally: 79 | action_executor.disconnect() 80 | 81 | parser = argparse.ArgumentParser() 82 | parser.add_argument('--config-path') 83 | parser.add_argument('--robot-index', type=int) 84 | parser.add_argument('--num-cubes', type=int, default=10) 85 | parser.add_argument('--debug', action='store_true') 86 | parser.set_defaults(debug=False) 87 | main(parser.parse_args()) 88 | -------------------------------------------------------------------------------- /vector_keep_still.py: -------------------------------------------------------------------------------- 1 | import time 2 | from multiprocessing.dummy import Pool 3 | import anki_vector 4 | import vector_utils as utils 5 | 6 | def reserve_control(args): 7 | robot_index, (robot_name, robot_serial) = args 8 | try: 9 | anki_vector.behavior.ReserveBehaviorControl(serial=robot_serial)._conn.connect() 10 | with anki_vector.Robot(serial=robot_serial, default_logging=False) as robot: 11 | robot.behavior.set_head_angle(anki_vector.util.degrees(0)) 12 | print('Connected to {} ({})'.format(robot_name, robot_index)) 13 | except anki_vector.exceptions.VectorNotFoundException: 14 | print('Could not find {} ({})'.format(robot_name, robot_index)) 15 | except anki_vector.exceptions.VectorControlTimeoutException: 16 | print('Could not connect to {} ({})'.format(robot_name, robot_index)) 17 | 18 | robot_serials = utils.get_robot_serials() 19 | with Pool(len(robot_serials)) as p: 20 | p.map(reserve_control, enumerate(sorted(robot_serials.items()))) 21 | while True: 22 | time.sleep(1) 23 | -------------------------------------------------------------------------------- /vector_keyboard_controller.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/anki/vector-python-sdk/blob/master/examples/apps/remote_control/remote_control.py 2 | import argparse 3 | import sys 4 | 5 | import anki_vector 6 | import pyglet 7 | from pyglet.window import key 8 | 9 | import vector_utils as utils 10 | 11 | 12 | class RemoteControlVector: 13 | def __init__(self, robot): 14 | self.vector = robot 15 | self.last_lift = None 16 | self.last_head = None 17 | self.last_wheels = None 18 | self.drive_forwards = 0 19 | self.drive_back = 0 20 | self.turn_left = 0 21 | self.turn_right = 0 22 | self.lift_up = 0 23 | self.lift_down = 0 24 | self.head_up = 0 25 | self.head_down = 0 26 | self.go_fast = 0 27 | self.go_slow = 0 28 | 29 | def update_drive_state(self, key_code, is_key_down, speed_changed): 30 | update_driving = True 31 | if key_code == ord('W'): 32 | self.drive_forwards = is_key_down 33 | elif key_code == ord('S'): 34 | self.drive_back = is_key_down 35 | elif key_code == ord('A'): 36 | self.turn_left = is_key_down 37 | elif key_code == ord('D'): 38 | self.turn_right = is_key_down 39 | else: 40 | if not speed_changed: 41 | update_driving = False 42 | return update_driving 43 | 44 | def update_lift_state(self, key_code, is_key_down, speed_changed): 45 | update_lift = True 46 | if key_code == ord('R'): 47 | self.lift_up = is_key_down 48 | elif key_code == ord('F'): 49 | self.lift_down = is_key_down 50 | else: 51 | if not speed_changed: 52 | update_lift = False 53 | return update_lift 54 | 55 | def update_head_state(self, key_code, is_key_down, speed_changed): 56 | update_head = True 57 | if key_code == ord('T'): 58 | self.head_up = is_key_down 59 | elif key_code == ord('G'): 60 | self.head_down = is_key_down 61 | else: 62 | if not speed_changed: 63 | update_head = False 64 | return update_head 65 | 66 | def handle_key(self, key_code, is_shift_down, is_alt_down, is_key_down): 67 | was_go_fast = self.go_fast 68 | was_go_slow = self.go_slow 69 | self.go_fast = is_shift_down 70 | self.go_slow = is_alt_down 71 | speed_changed = (was_go_fast != self.go_fast) or (was_go_slow != self.go_slow) 72 | update_driving = self.update_drive_state(key_code, is_key_down, speed_changed) 73 | update_lift = self.update_lift_state(key_code, is_key_down, speed_changed) 74 | update_head = self.update_head_state(key_code, is_key_down, speed_changed) 75 | if update_driving: 76 | self.update_driving() 77 | if update_head: 78 | self.update_head() 79 | if update_lift: 80 | self.update_lift() 81 | 82 | def pick_speed(self, fast_speed, mid_speed, slow_speed): 83 | if self.go_fast: 84 | if not self.go_slow: 85 | return fast_speed 86 | elif self.go_slow: 87 | return slow_speed 88 | return mid_speed 89 | 90 | def update_lift(self): 91 | lift_speed = self.pick_speed(8, 4, 2) 92 | lift_vel = (self.lift_up - self.lift_down) * lift_speed 93 | if self.last_lift and lift_vel == self.last_lift: 94 | return 95 | self.last_lift = lift_vel 96 | self.vector.motors.set_lift_motor(lift_vel) 97 | 98 | def update_head(self): 99 | head_speed = self.pick_speed(2, 1, 0.5) 100 | head_vel = (self.head_up - self.head_down) * head_speed 101 | if self.last_head and head_vel == self.last_head: 102 | return 103 | self.last_head = head_vel 104 | self.vector.motors.set_head_motor(head_vel) 105 | 106 | def update_driving(self): 107 | drive_dir = (self.drive_forwards - self.drive_back) 108 | turn_dir = (self.turn_right - self.turn_left) 109 | if drive_dir < 0: 110 | turn_dir = -turn_dir 111 | forward_speed = self.pick_speed(150, 75, 50) 112 | turn_speed = self.pick_speed(100, 50, 30) 113 | l_wheel_speed = (drive_dir * forward_speed) + (turn_speed * turn_dir) 114 | r_wheel_speed = (drive_dir * forward_speed) - (turn_speed * turn_dir) 115 | wheel_params = (l_wheel_speed, r_wheel_speed, l_wheel_speed * 4, r_wheel_speed * 4) 116 | if self.last_wheels and wheel_params == self.last_wheels: 117 | return 118 | self.last_wheels = wheel_params 119 | self.vector.motors.set_wheel_motors(*wheel_params) 120 | 121 | def main(args): 122 | # Get robot name 123 | if args.robot_index is not None: 124 | robot_name = utils.get_robot_names()[args.robot_index] 125 | else: 126 | robot_name = utils.get_first_available_robot() 127 | 128 | # Connect to robot 129 | robot_serial = utils.get_robot_serials()[robot_name] 130 | #with anki_vector.Robot(serial=robot_serial, default_logging=False) as robot: 131 | with anki_vector.Robot(serial=robot_serial, default_logging=False, behavior_control_level=anki_vector.connection.ControlPriorityLevel.OVERRIDE_BEHAVIORS_PRIORITY) as robot: 132 | remote_control_vector = RemoteControlVector(robot) 133 | 134 | # Display available keyboard commands 135 | width, height = 320, 240 136 | window = pyglet.window.Window(width, height) 137 | lines = [ 138 | 'ws - drive forward/backward', 139 | 'ad - turn left/right', 140 | 'rf - lift up/down', 141 | 'tg - head up/down' 142 | ] 143 | label = pyglet.text.Label('\n'.join(lines), x=(width // 2), y=(height // 2), width=width, anchor_x='center', anchor_y='center', align='center', multiline=True) 144 | keys = key.KeyStateHandler() 145 | window.push_handlers(keys) 146 | 147 | @window.event 148 | def on_draw(): 149 | window.clear() 150 | label.draw() 151 | is_shift_down = keys[key.LSHIFT] or keys[key.RSHIFT] 152 | is_alt_down = (keys[key.LOPTION] or keys[key.ROPTION]) if sys.platform == 'darwin' else (keys[key.LALT] or keys[key.RALT]) 153 | for k in ['W', 'S', 'A', 'D', 'R', 'F', 'T', 'G']: 154 | key_code = ord(k) 155 | key_constant = ord(k.lower()) 156 | is_key_down = keys[key_constant] 157 | remote_control_vector.handle_key(key_code, is_shift_down, is_alt_down, is_key_down) 158 | 159 | pyglet.app.run() 160 | 161 | parser = argparse.ArgumentParser() 162 | parser.add_argument('--robot-index', type=int) 163 | main(parser.parse_args()) 164 | -------------------------------------------------------------------------------- /vector_run_mdns.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from multiprocessing.dummy import Pool 3 | import anki_vector 4 | import vector_utils as utils 5 | 6 | config = utils.get_config() 7 | robot_names = utils.get_robot_names() 8 | robot_serials = utils.get_robot_serials() 9 | 10 | with Pool(len(robot_names)) as p: 11 | mdns_results = p.map(anki_vector.mdns.VectorMdns.find_vector, robot_names) 12 | for name, result in zip(robot_names, mdns_results): 13 | if result is None: 14 | print('{} was not found'.format(name)) 15 | else: 16 | hostname = result['name'].lower()[:-1] 17 | ip = result['ipv4'] 18 | subprocess.run(['ping', '-c', '1', ip], stdout=subprocess.DEVNULL, check=False) 19 | arp_output = str(subprocess.run(['arp', '-an'], stdout=subprocess.PIPE, check=False).stdout) 20 | mac = arp_output[arp_output.find(ip):].split(' ')[2] 21 | print('Hostname: {}'.format(hostname)) 22 | print('IP address: {}'.format(ip)) 23 | print('MAC address: {}'.format(mac)) 24 | 25 | # Update IP address in config file 26 | config[robot_serials[name]]['ip'] = ip 27 | print() 28 | 29 | utils.write_config(config) 30 | -------------------------------------------------------------------------------- /vector_utils.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | from multiprocessing.dummy import Pool 3 | from pathlib import Path 4 | import anki_vector 5 | 6 | def get_config_path(): 7 | return Path.home() / '.anki_vector/sdk_config.ini' 8 | 9 | def get_config(): 10 | config = configparser.ConfigParser() 11 | config.read(get_config_path()) 12 | return config 13 | 14 | def write_config(config): 15 | with open(get_config_path(), 'w') as f: 16 | config.write(f) 17 | 18 | def get_robot_names(): 19 | config = get_config() 20 | return sorted(config[serial]['name'] for serial in config.sections()) 21 | 22 | def get_robot_serials(): 23 | config = get_config() 24 | return {config[serial]['name']: serial for serial in config.sections()} 25 | 26 | def get_robot_indices(): 27 | config = get_config() 28 | return {config[serial]['name']: i for i, serial in enumerate(config.sections())} 29 | 30 | def get_available_robots(num_robots=10): 31 | def ping(args): 32 | name, serial = args 33 | try: 34 | with anki_vector.Robot(serial=serial, default_logging=False) as _: 35 | return name 36 | except: 37 | return None 38 | 39 | robot_serials = get_robot_serials() 40 | available_names = [] 41 | with Pool(len(robot_serials)) as p: 42 | it = p.imap_unordered(ping, robot_serials.items()) 43 | for name in it: 44 | if name is not None: 45 | available_names.append(name) 46 | if len(available_names) > num_robots: 47 | return available_names 48 | return available_names 49 | 50 | def get_first_available_robot(): 51 | names = get_available_robots(num_robots=1) 52 | if len(names) > 0: 53 | return names[0] 54 | return None 55 | --------------------------------------------------------------------------------