├── .gitignore ├── LICENSE ├── README.md ├── experiments ├── .root ├── assets │ └── robots │ │ └── xarm7 │ │ ├── meshes │ │ ├── collision │ │ │ ├── link1.obj │ │ │ ├── link1.obj.convex.stl │ │ │ ├── link2.obj │ │ │ ├── link2.obj.convex.stl │ │ │ ├── link3.obj │ │ │ ├── link3.obj.convex.stl │ │ │ ├── link4.obj │ │ │ ├── link4.obj.convex.stl │ │ │ ├── link5.obj │ │ │ ├── link5.obj.convex.stl │ │ │ ├── link6.obj │ │ │ ├── link6.obj.convex.stl │ │ │ ├── link7.obj │ │ │ ├── link7.obj.convex.stl │ │ │ ├── link_base.obj │ │ │ └── link_base.obj.convex.stl │ │ └── visual │ │ │ ├── link1.glb │ │ │ ├── link2.glb │ │ │ ├── link3.glb │ │ │ ├── link4.glb │ │ │ ├── link5.glb │ │ │ ├── link6.glb │ │ │ ├── link7.glb │ │ │ └── link_base.glb │ │ └── xarm7.urdf ├── cfg │ ├── default.yaml │ ├── model │ │ ├── friction │ │ │ └── default.yaml │ │ ├── material │ │ │ └── pointnet_nerf_material.yaml │ │ └── pgnd.yaml │ ├── render │ │ └── default.yaml │ ├── sim │ │ └── default.yaml │ └── train │ │ └── default.yaml ├── real_world │ ├── calibrate.py │ ├── camera │ │ ├── __init__.py │ │ ├── multi_realsense.py │ │ ├── shared_memory │ │ │ ├── __init__.py │ │ │ ├── shared_memory_queue.py │ │ │ ├── shared_memory_ring_buffer.py │ │ │ ├── shared_memory_util.py │ │ │ └── shared_ndarray.py │ │ ├── single_realsense.py │ │ └── utils.py │ ├── gs │ │ ├── config │ │ │ └── default.yaml │ │ ├── convert.py │ │ ├── external.py │ │ ├── helpers.py │ │ ├── sh_utils.py │ │ ├── train_utils.py │ │ └── trainer.py │ ├── modules_planning │ │ ├── __init__.py │ │ ├── common │ │ │ ├── communication.py │ │ │ └── xarm.py │ │ ├── dynamics_module.py │ │ ├── kinematics_utils.py │ │ ├── planning_env.py │ │ ├── planning_module.py │ │ ├── segment_perception.py │ │ ├── udp_util.py │ │ └── xarm_controller.py │ ├── modules_teleop │ │ ├── __init__.py │ │ ├── perception.py │ │ ├── recorder.py │ │ ├── robot_env_teleop.py │ │ └── teleop_keyboard.py │ ├── plan.py │ ├── postprocess.py │ ├── reconstruct_gs.py │ ├── teleop.py │ └── utils │ │ ├── __init__.py │ │ ├── env_utils.py │ │ ├── gradio_utils.py │ │ ├── pcd_utils.py │ │ ├── planning_utils.py │ │ ├── render_utils.py │ │ └── track_utils.py ├── scripts │ ├── train_box.sh │ ├── train_bread.sh │ ├── train_cloth.sh │ ├── train_paperbag.sh │ ├── train_rope.sh │ └── train_sloth.sh └── train │ ├── __init__.py │ ├── eval.py │ ├── gs.py │ ├── metric.py │ ├── metric_eval.py │ ├── pv_dataset.py │ ├── pv_train.py │ ├── pv_utils.py │ └── train_eval.py ├── imgs └── teaser.png ├── pgnd ├── __init__.py ├── data │ ├── __init__.py │ ├── dataset.py │ └── dataset_gripper.py ├── ffmpeg.py ├── material │ ├── __init__.py │ ├── network │ │ ├── __init__.py │ │ ├── nerf.py │ │ ├── pointnet.py │ │ └── utils.py │ └── pgnd.py ├── sim │ ├── __init__.py │ ├── friction.py │ ├── model.py │ ├── sim.py │ └── utils.py └── utils.py ├── requirements.txt └── third-party └── diff-gaussian-rasterization-w-depth ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE.md ├── README.md ├── cuda_rasterizer ├── auxiliary.h ├── backward.cu ├── backward.h ├── config.h ├── forward.cu ├── forward.h ├── rasterizer.h ├── rasterizer_impl.cu └── rasterizer_impl.h ├── diff_gaussian_rasterization └── __init__.py ├── ext.cpp ├── rasterize_points.cu ├── rasterize_points.h └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | ## Mac OS 2 | 3 | # General 4 | .DS_Store 5 | .AppleDouble 6 | .LSOverride 7 | 8 | # Icon must end with two \r 9 | Icon 10 | 11 | # Thumbnails 12 | ._* 13 | 14 | # Files that might appear in the root of a volume 15 | .DocumentRevisions-V100 16 | .fseventsd 17 | .Spotlight-V100 18 | .TemporaryItems 19 | .Trashes 20 | .VolumeIcon.icns 21 | .com.apple.timemachine.donotpresent 22 | 23 | # Directories potentially created on remote AFP share 24 | .AppleDB 25 | .AppleDesktop 26 | Network Trash Folder 27 | Temporary Items 28 | .apdisk 29 | 30 | ## Vim 31 | 32 | # Swap 33 | [._]*.s[a-v][a-z] 34 | !*.svg # comment out if you don't need vector files 35 | [._]*.sw[a-p] 36 | [._]s[a-rt-v][a-z] 37 | [._]ss[a-gi-z] 38 | [._]sw[a-p] 39 | 40 | # Session 41 | Session.vim 42 | Sessionx.vim 43 | 44 | # Temporary 45 | .netrwhist 46 | *~ 47 | # Auto-generated tag files 48 | tags 49 | # Persistent undo 50 | [._]*.un~ 51 | 52 | ## Linux 53 | 54 | *~ 55 | 56 | # temporary files which can be created if a process still has a handle open of a deleted file 57 | .fuse_hidden* 58 | 59 | # KDE directory preferences 60 | .directory 61 | 62 | # Linux trash folder which might appear on any partition or disk 63 | .Trash-* 64 | 65 | # .nfs files are created when an open file is removed but is still being accessed 66 | .nfs* 67 | 68 | ## Python 69 | 70 | # Byte-compiled / optimized / DLL files 71 | __pycache__/ 72 | *.py[cod] 73 | *$py.class 74 | 75 | # C extensions 76 | *.so 77 | 78 | # Distribution / packaging 79 | .Python 80 | build/ 81 | develop-eggs/ 82 | dist/ 83 | downloads/ 84 | eggs/ 85 | .eggs/ 86 | lib/ 87 | lib64/ 88 | parts/ 89 | sdist/ 90 | var/ 91 | wheels/ 92 | pip-wheel-metadata/ 93 | share/python-wheels/ 94 | *.egg-info/ 95 | .installed.cfg 96 | *.egg 97 | MANIFEST 98 | 99 | # PyInstaller 100 | # Usually these files are written by a python script from a template 101 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 102 | *.manifest 103 | *.spec 104 | 105 | # Installer logs 106 | pip-log.txt 107 | pip-delete-this-directory.txt 108 | 109 | # Unit test / coverage reports 110 | htmlcov/ 111 | .tox/ 112 | .nox/ 113 | .coverage 114 | .coverage.* 115 | .cache 116 | nosetests.xml 117 | coverage.xml 118 | *.cover 119 | *.py,cover 120 | .hypothesis/ 121 | .pytest_cache/ 122 | 123 | # Translations 124 | *.mo 125 | *.pot 126 | 127 | # Django stuff: 128 | *.log 129 | local_settings.py 130 | db.sqlite3 131 | db.sqlite3-journal 132 | 133 | # Flask stuff: 134 | instance/ 135 | .webassets-cache 136 | 137 | # Scrapy stuff: 138 | .scrapy 139 | 140 | # Sphinx documentation 141 | docs/_build/ 142 | 143 | # PyBuilder 144 | target/ 145 | 146 | # Jupyter Notebook 147 | .ipynb_checkpoints 148 | 149 | # IPython 150 | profile_default/ 151 | ipython_config.py 152 | 153 | # pyenv 154 | .python-version 155 | 156 | # pipenv 157 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 158 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 159 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 160 | # install all needed dependencies. 161 | #Pipfile.lock 162 | 163 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 164 | __pypackages__/ 165 | 166 | # Celery stuff 167 | celerybeat-schedule 168 | celerybeat.pid 169 | 170 | # SageMath parsed files 171 | *.sage.py 172 | 173 | # Environments 174 | .env 175 | .venv 176 | # env/ 177 | venv/ 178 | ENV/ 179 | env.bak/ 180 | venv.bak/ 181 | 182 | # Spyder project settings 183 | .spyderproject 184 | .spyproject 185 | 186 | # Rope project settings 187 | .ropeproject 188 | 189 | # mkdocs documentation 190 | /site 191 | 192 | # mypy 193 | .mypy_cache/ 194 | .dmypy.json 195 | dmypy.json 196 | 197 | # Pyre type checker 198 | .pyre/ 199 | 200 | ## VS Code 201 | /.vscode 202 | 203 | ## wandb 204 | /wandb 205 | 206 | ## gradio 207 | .gradio 208 | 209 | log/ 210 | weights/ 211 | 212 | /third-party/* 213 | !/third-party/diff-gaussian-rasterization-w-depth 214 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011-2024 GitHub Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Particle-Grid Neural Dynamics for Learning Deformable Object Models from RGB-D Videos 2 | 3 | 4 | Kaifeng Zhang1, 5 | 6 | 7 | Baoyu Li2, 8 | 9 | 10 | Kris Hauser2, 11 | 12 | 13 | Yunzhu Li1 14 | 15 | 16 | 1Columbia University, 17 | 2University of Illinois Urbana-Champaign 18 | 19 | [Website](https://kywind.github.io/pgnd/) | 20 | [Paper](https://www.roboticsproceedings.org/rss21/p036.pdf) 21 | 22 | 23 | 24 | ## Interactive Demo 25 | We provide an [interactive demo](https://huggingface.co/spaces/kaifz/pgnd) in Huggingface Spaces. 26 | 27 | ## Installation 28 | 29 | ### Prerequisite 30 | 31 | We recommend installing the latest version of CUDA (12.x) and PyTorch. The CUDA version used to compile PyTorch should be the same as the system's CUDA version to enable installation of the ```diff_gaussian_rasterizer``` package. 32 | 33 | ### Setup an environment 34 | 35 | 1. Prepare python environment 36 | ``` 37 | conda create -n pgnd python=3.10 38 | conda activate pgnd 39 | ``` 40 | 41 | 2. Install PyTorch: [https://pytorch.org/get-started/locally/](https://pytorch.org/get-started/locally/) 42 | ``` 43 | conda install pytorch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 pytorch-cuda=12.4 -c pytorch -c nvidia 44 | ``` 45 | 46 | 3. Install other packages: 47 | ``` 48 | pip install -r requirements.txt 49 | conda install -c dglteam/label/th24_cu124 dgl 50 | conda install conda-forge::ffmpeg 51 | ``` 52 | 53 | 4. Install diff_gaussian_rasterization: 54 | ``` 55 | cd third_party/diff-gaussian-rasterization-w-depth 56 | pip install -e . 57 | ``` 58 | 59 | ## Data and Checkpoints 60 | The dataset and pre-trained checkpoint files could be downloaded from [this link](https://drive.google.com/drive/folders/10E0gpETbwA8d1XntIsP0fgWGHipndAe8?usp=sharing). 61 | 62 | For the dataset, We provide the full training and evaluation datasets for all six categories. The dataset is stored as a zip file for each category, e.g. for box, all the data are stored in data_box.zip. The files should be unzipped and organized as the following (take box and sloth as examples; suppose the data for these two categories are downloaded): 63 | ``` 64 | - experiments/log/data 65 | - 0112_box_processed 66 | - 0112_box2_processed 67 | - 0112_box3_processed 68 | - 1018_sloth_processed 69 | - box_merged 70 | - sloth_merged 71 | ... 72 | ``` 73 | For the checkpoints, the files should be unzipped and organized as the following: 74 | ``` 75 | - experiments/log 76 | - box 77 | - train 78 | - ckpts 79 | - 100000.pt 80 | - hydra.yaml 81 | - sloth 82 | - train 83 | - ckpts 84 | - 100000.pt 85 | - hydra.yaml 86 | ... 87 | ``` 88 | The path needs to match exactly for training and inference scripts to work. If you need to use data in a different format, you may need to directly modify the code to accomodate. 89 | 90 | ## Custom Dataset 91 | 92 | For processing data from raw RGB-D recordings, the following pre-trained detection, segmentation, and tracking models are required. These can be installed by: 93 | ``` 94 | pip install iopath 95 | pip install segment-anything 96 | pip install --no-deps git+https://github.com/IDEA-Research/GroundingDINO 97 | pip install --no-deps git+https://github.com/facebookresearch/sam2 98 | pip install --no-deps git+https://github.com/facebookresearch/co-tracker 99 | ``` 100 | And the weights can be downloaded from [this link](https://drive.google.com/drive/folders/1Z70RgW7oiTIfvdk_qOOtqDGsMFQ0jimY?usp=sharing). Extract files and put them in the ```weights/``` folder. 101 | 102 | The raw data should contain multi-view rgb and depth image recordings, and robot end-effector translation/rotation/gripper openness recordings. An example of raw data is available at 103 | [this link](https://drive.google.com/drive/folders/1QQ2ftIJZWFGzBeFg3bxqy6ktsUgmsHts?usp=drive_link) and could be extracted to ```experiments/log/data/cloth_test```. 104 | 105 | For data processing, the following command runs the data processing code and generates the processed dataset with point tracks: 106 | ``` 107 | python experiments/real_world/postprocess.py 108 | ``` 109 | 110 | ## Training 111 | Once the datasets are prepared, we provide training scripts in the ```experiments/scripts``` folder. 112 | ``` 113 | bash experiments/scripts/train_.sh 114 | ``` 115 | Training could take several hours on a single GPU with memory >= 24GB. It is possible to plot training loss and visualize predictions during validation in wandb with cfg.debug=False. 116 | 117 | ## Inference 118 | 119 | ### Eval 120 | Use the following command to evaluate the trained policy on the evaluation dataset and verify its performance. 121 | The file can also produce 3D Gaussian renderings and particle visualizations when the ```--state_only``` flag is removed 122 | and the source dataset contains reconstructed 3D Gaussians stored as ```.splat``` files (e.g. as in ```experiments/log/data/1018_sloth_processed/episode_0002/gs```). 123 | 124 | ``` 125 | python experiments/train/eval.py --task --state_only 126 | ``` 127 | Note: in the current dataset, some categories still lack GS reconstructions, hence evaluation can only include state-based metrics. The code for reconstruction will be released soon. 128 | 129 | ### Planning 130 | It is possible to perform model-based planning for manipulation tasks using the learned dynamics model by running 131 | ``` 132 | python experiments/real_world/plan.py --config --text_prompts 133 | ``` 134 | This requires building a xArm robot setup with realsense cameras, and calibrating them by 135 | ``` 136 | python experiments/real_world/calibrate.py [--calibrate/--calibrate_bimanual] 137 | ``` 138 | The calibration board needs to be put in a specific position relative to the robot to fix the robot-to-board transformation, and the camera-to-board transformations are processed using OpenCV detection algorithms. For questions about real robot setups, please feel free to reach out to the first author of the paper. We are working to release a more detailed instruction on real robot experiments soon. 139 | 140 | 141 | ## Citation 142 | If you find this repo useful for your research, please consider citing our paper 143 | ``` 144 | @inproceedings{zhang2025particle, 145 | title={Particle-Grid Neural Dynamics for Learning Deformable Object Models from RGB-D Videos}, 146 | author={Zhang, Kaifeng and Li, Baoyu and Hauser, Kris and Li, Yunzhu}, 147 | booktitle={Proceedings of Robotics: Science and Systems (RSS)}, 148 | year={2025} 149 | } 150 | ``` -------------------------------------------------------------------------------- /experiments/.root: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/.root -------------------------------------------------------------------------------- /experiments/assets/robots/xarm7/meshes/visual/link1.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link1.glb -------------------------------------------------------------------------------- /experiments/assets/robots/xarm7/meshes/visual/link2.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link2.glb -------------------------------------------------------------------------------- /experiments/assets/robots/xarm7/meshes/visual/link3.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link3.glb -------------------------------------------------------------------------------- /experiments/assets/robots/xarm7/meshes/visual/link4.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link4.glb -------------------------------------------------------------------------------- /experiments/assets/robots/xarm7/meshes/visual/link5.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link5.glb -------------------------------------------------------------------------------- /experiments/assets/robots/xarm7/meshes/visual/link6.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link6.glb -------------------------------------------------------------------------------- /experiments/assets/robots/xarm7/meshes/visual/link7.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link7.glb -------------------------------------------------------------------------------- /experiments/assets/robots/xarm7/meshes/visual/link_base.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link_base.glb -------------------------------------------------------------------------------- /experiments/assets/robots/xarm7/xarm7.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | -------------------------------------------------------------------------------- /experiments/cfg/default.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - model: pgnd 3 | - render: default 4 | - sim: default 5 | - train: default 6 | - _self_ 7 | - override hydra/job_logging: disabled 8 | - override hydra/hydra_logging: disabled 9 | 10 | hydra: 11 | output_subdir: null 12 | run: 13 | dir: . 14 | 15 | seed: 0 16 | cpu: 0 17 | num_cpus: 128 18 | gpus: 19 | - 0 20 | overwrite: False 21 | resume: False 22 | debug: False 23 | -------------------------------------------------------------------------------- /experiments/cfg/model/friction/default.yaml: -------------------------------------------------------------------------------- 1 | value: 0.0 2 | requires_grad: False -------------------------------------------------------------------------------- /experiments/cfg/model/material/pointnet_nerf_material.yaml: -------------------------------------------------------------------------------- 1 | requires_grad: True 2 | output_scale: 1.0 3 | input_scale: 2.0 4 | radius: 0.2 5 | absolute_y: False 6 | pe_num_func_res: 0 -------------------------------------------------------------------------------- /experiments/cfg/model/pgnd.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - material: pointnet_nerf_material 3 | - friction: default 4 | - _self_ 5 | 6 | ckpt: null 7 | clip_bound: 1.5 8 | eef_t: [0.0, 0.0, 0.01] 9 | gripper_radius: 0.04 10 | -------------------------------------------------------------------------------- /experiments/cfg/render/default.yaml: -------------------------------------------------------------------------------- 1 | width: 512 2 | height: 512 3 | skip_frame: 1 4 | bound: 1.5 5 | fps: 5 6 | radius_scale: 500 7 | center: [0.5, 0.3, 0.5] 8 | distance: 1.4 9 | azimuth: -125 10 | elevation: 30 11 | reflectance: [0.92941176, 0.32941176, 0.23137255] 12 | -------------------------------------------------------------------------------- /experiments/cfg/sim/default.yaml: -------------------------------------------------------------------------------- 1 | num_steps_train: 5 2 | num_steps: 1000 3 | interval: 1 4 | num_grids: [50, 50, 50, 0.02] 5 | dt: 0.1 6 | bound: 3 7 | eps: 1e-7 8 | skip_frame: 1 9 | num_grippers: 1 10 | preprocess_scale: 1.0 11 | preprocess_with_table: True 12 | n_particles: 1000 13 | gripper_forcing: True 14 | gripper_points: False 15 | n_history: 2 16 | uniform: False 17 | -------------------------------------------------------------------------------- /experiments/cfg/train/default.yaml: -------------------------------------------------------------------------------- 1 | name: null 2 | dataset_name: null 3 | source_dataset_name: null 4 | 5 | num_iterations: 100000 6 | resume_iteration: 0 7 | batch_size: 32 8 | num_workers: 8 9 | material_lr: 1e-4 10 | material_wd: 0.0 11 | material_grad_max_norm: 0.1 12 | training_start_episode: 0 13 | training_end_episode: 1000 14 | eval_start_episode: 0 15 | eval_end_episode: 10 16 | iteration_log_interval: 10 17 | iteration_save_interval: 1000 18 | iteration_eval_interval: 10000 19 | loss_factor: 1.0 20 | loss_factor_v: 0.0 21 | loss_factor_x: 100.0 22 | friction_lr: 1e-1 23 | friction_wd: 0.0 24 | friction_grad_max_norm: 0.1 25 | dataset_load_skip_frame: 1 26 | dataset_skip_frame: 1 27 | downsample: False 28 | use_pv: False 29 | use_gs: False 30 | dataset_non_overwrite: True 31 | -------------------------------------------------------------------------------- /experiments/real_world/calibrate.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import argparse 3 | import time 4 | import numpy as np 5 | import torch 6 | import open3d as o3d 7 | from PIL import Image 8 | 9 | from modules import RobotEnv, BimanualRobotEnv 10 | from utils.pcd_utils import get_tabletop_points, visualize_o3d 11 | 12 | def construct_goal_from_perception(): 13 | use_robot = True 14 | 15 | exposure_time = 10 16 | env = RobotEnv( 17 | WH=[848, 480], 18 | obs_fps=5, 19 | n_obs_steps=2, 20 | use_robot=use_robot, 21 | speed=100, 22 | ) 23 | 24 | try: 25 | env.start(exposure_time=exposure_time) 26 | if use_robot: 27 | env.reset_robot() 28 | print('env started') 29 | time.sleep(exposure_time) 30 | print('start recording') 31 | 32 | env.calibrate(re_calibrate=False) 33 | 34 | obs = env.get_obs(get_color=True, get_depth=True) 35 | intr_list = env.get_intrinsics() 36 | R_list, t_list = env.get_extrinsics() 37 | bbox = env.get_bbox() 38 | 39 | rgb_list = [] 40 | depth_list = [] 41 | for i in range(4): 42 | rgb = obs[f'color_{i}'][-1] 43 | depth = obs[f'depth_{i}'][-1] 44 | rgb_list.append(rgb) 45 | depth_list.append(depth) 46 | 47 | pcd = get_tabletop_points(rgb_list, depth_list, R_list, t_list, intr_list, bbox) 48 | 49 | visualize_o3d([pcd]) 50 | o3d.io.write_point_cloud("vis_real_world/target.pcd", pcd) 51 | 52 | finally: 53 | env.stop() 54 | print('env stopped') 55 | 56 | 57 | def calibrate(use_robot=True, reset_robot=True, wrist=None): 58 | exposure_time = 5 59 | env = RobotEnv( 60 | use_camera=True, 61 | WH=[848, 480], 62 | obs_fps=5, 63 | n_obs_steps=2, 64 | use_robot=use_robot, 65 | speed=100, 66 | wrist=wrist, 67 | ) 68 | 69 | try: 70 | env.start(exposure_time=exposure_time) 71 | if use_robot and reset_robot: 72 | env.reset_robot() 73 | print('env started') 74 | time.sleep(exposure_time) 75 | print('start recording') 76 | 77 | env.calibrate(re_calibrate=True) 78 | 79 | finally: 80 | env.stop() 81 | print('env stopped') 82 | 83 | 84 | def calibrate_bimanual(reset_robot=True): 85 | exposure_time = 5 86 | env = BimanualRobotEnv( 87 | WH=[848, 480], 88 | obs_fps=5, 89 | n_obs_steps=2, 90 | speed=100, 91 | gripper_enable=True, 92 | ) 93 | 94 | try: 95 | env.start(exposure_time=exposure_time) 96 | if reset_robot: 97 | env.reset_robot() 98 | print('env started') 99 | time.sleep(exposure_time) 100 | print('start recording') 101 | 102 | env.calibrate(re_calibrate=True) 103 | 104 | finally: 105 | env.stop() 106 | print('env stopped') 107 | 108 | 109 | if __name__ == "__main__": 110 | parser = argparse.ArgumentParser() 111 | parser.add_argument("--calibrate", action="store_true") 112 | parser.add_argument("--calibrate_bimanual", action="store_true") 113 | parser.add_argument("--calibrate_fixed", action="store_true") 114 | parser.add_argument("--construct_goal", action="store_true") 115 | parser.add_argument("--examine_points", action="store_true") 116 | args = parser.parse_args() 117 | if args.calibrate: 118 | calibrate(reset_robot=False) 119 | elif args.calibrate_bimanual: 120 | calibrate_bimanual(reset_robot=False) 121 | elif args.calibrate_fixed: # only calibrate fixed cameras 122 | calibrate(use_robot=False) 123 | elif args.construct_goal: 124 | construct_goal_from_perception() 125 | else: 126 | print("No arguments provided") 127 | -------------------------------------------------------------------------------- /experiments/real_world/camera/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/real_world/camera/__init__.py -------------------------------------------------------------------------------- /experiments/real_world/camera/multi_realsense.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Union, Dict, Callable 2 | import numbers 3 | import time 4 | from multiprocess.managers import SharedMemoryManager 5 | import numpy as np 6 | import pyrealsense2 as rs 7 | from .single_realsense import SingleRealsense 8 | 9 | class MultiRealsense: 10 | def __init__(self, 11 | serial_numbers: Optional[List[str]]=None, 12 | shm_manager: Optional[SharedMemoryManager]=None, 13 | resolution=(1280,720), 14 | capture_fps=30, 15 | put_fps=None, 16 | put_downsample=True, 17 | enable_color=True, 18 | enable_depth=False, 19 | process_depth=False, 20 | enable_infrared=False, 21 | get_max_k=30, 22 | advanced_mode_config: Optional[Union[dict, List[dict]]]=None, 23 | transform: Optional[Union[Callable[[Dict], Dict], List[Callable]]]=None, 24 | vis_transform: Optional[Union[Callable[[Dict], Dict], List[Callable]]]=None, 25 | verbose=False 26 | ): 27 | if shm_manager is None: 28 | shm_manager = SharedMemoryManager() 29 | shm_manager.start() 30 | if serial_numbers is None: 31 | serial_numbers = SingleRealsense.get_connected_devices_serial() 32 | n_cameras = len(serial_numbers) 33 | 34 | advanced_mode_config = repeat_to_list( 35 | advanced_mode_config, n_cameras, dict) 36 | transform = repeat_to_list( 37 | transform, n_cameras, Callable) 38 | vis_transform = repeat_to_list( 39 | vis_transform, n_cameras, Callable) 40 | 41 | cameras = dict() 42 | for i, serial in enumerate(serial_numbers): 43 | cameras[serial] = SingleRealsense( 44 | shm_manager=shm_manager, 45 | serial_number=serial, 46 | resolution=resolution, 47 | capture_fps=capture_fps, 48 | put_fps=put_fps, 49 | put_downsample=put_downsample, 50 | enable_color=enable_color, 51 | enable_depth=enable_depth, 52 | process_depth=process_depth, 53 | enable_infrared=enable_infrared, 54 | get_max_k=get_max_k, 55 | advanced_mode_config=advanced_mode_config[i], 56 | transform=transform[i], 57 | vis_transform=vis_transform[i], 58 | is_master=(i == 0), 59 | verbose=verbose 60 | ) 61 | 62 | self.cameras = cameras 63 | self.serial_numbers = serial_numbers 64 | self.shm_manager = shm_manager 65 | self.resolution = resolution 66 | self.capture_fps = capture_fps 67 | 68 | def __enter__(self): 69 | self.start() 70 | return self 71 | 72 | def __exit__(self, exc_type, exc_val, exc_tb): 73 | self.stop() 74 | 75 | @property 76 | def n_cameras(self): 77 | return len(self.cameras) 78 | 79 | @property 80 | def is_ready(self): 81 | is_ready = True 82 | for camera in self.cameras.values(): 83 | if not camera.is_ready: 84 | is_ready = False 85 | return is_ready 86 | 87 | def start(self, wait=True, put_start_time=None): 88 | if put_start_time is None: 89 | put_start_time = time.time() 90 | for camera in self.cameras.values(): 91 | camera.start(wait=False, put_start_time=put_start_time) 92 | 93 | if wait: 94 | self.start_wait() 95 | 96 | def stop(self, wait=True): 97 | for camera in self.cameras.values(): 98 | camera.stop(wait=False) 99 | 100 | if wait: 101 | self.stop_wait() 102 | 103 | def start_wait(self): 104 | for camera in self.cameras.values(): 105 | print('processing camera {}'.format(camera.serial_number)) 106 | camera.start_wait() 107 | 108 | def stop_wait(self): 109 | for camera in self.cameras.values(): 110 | camera.join() 111 | 112 | def get(self, k=None, index=None, out=None) -> Dict[int, Dict[str, np.ndarray]]: 113 | """ 114 | Return order T,H,W,C 115 | { 116 | 0: { 117 | 'rgb': (T,H,W,C), 118 | 'timestamp': (T,) 119 | }, 120 | 1: ... 121 | } 122 | """ 123 | if index is not None: 124 | this_out = None 125 | this_out = self.cameras[self.serial_numbers[index]].get(k=k, out=this_out) 126 | return this_out 127 | if out is None: 128 | out = dict() 129 | for i, camera in enumerate(self.cameras.values()): 130 | this_out = None 131 | if i in out: 132 | this_out = out[i] 133 | this_out = camera.get(k=k, out=this_out) 134 | out[i] = this_out 135 | return out 136 | 137 | def set_color_option(self, option, value): 138 | n_camera = len(self.cameras) 139 | value = repeat_to_list(value, n_camera, numbers.Number) 140 | for i, camera in enumerate(self.cameras.values()): 141 | camera.set_color_option(option, value[i]) 142 | 143 | def set_exposure(self, exposure=None, gain=None): 144 | """150nit. (0.1 ms, 1/10000s) 145 | gain: (0, 128) 146 | """ 147 | 148 | if exposure is None and gain is None: 149 | # auto exposure 150 | self.set_color_option(rs.option.enable_auto_exposure, 1.0) 151 | else: 152 | # manual exposure 153 | self.set_color_option(rs.option.enable_auto_exposure, 0.0) 154 | if exposure is not None: 155 | self.set_color_option(rs.option.exposure, exposure) 156 | if gain is not None: 157 | self.set_color_option(rs.option.gain, gain) 158 | 159 | def set_white_balance(self, white_balance=None): 160 | if white_balance is None: 161 | self.set_color_option(rs.option.enable_auto_white_balance, 1.0) 162 | else: 163 | self.set_color_option(rs.option.enable_auto_white_balance, 0.0) 164 | self.set_color_option(rs.option.white_balance, white_balance) 165 | 166 | def get_intrinsics(self): 167 | return np.array([c.get_intrinsics() for c in self.cameras.values()]) 168 | 169 | def get_depth_scale(self): 170 | return np.array([c.get_depth_scale() for c in self.cameras.values()]) 171 | 172 | def restart_put(self, start_time): 173 | for camera in self.cameras.values(): 174 | camera.restart_put(start_time) 175 | 176 | 177 | def repeat_to_list(x, n: int, cls): 178 | if x is None: 179 | x = [None] * n 180 | if isinstance(x, cls): 181 | x = [x] * n 182 | assert len(x) == n 183 | return x 184 | -------------------------------------------------------------------------------- /experiments/real_world/camera/shared_memory/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/real_world/camera/shared_memory/__init__.py -------------------------------------------------------------------------------- /experiments/real_world/camera/shared_memory/shared_memory_queue.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Union 2 | import numbers 3 | from queue import Empty, Full 4 | from multiprocess.managers import SharedMemoryManager 5 | import numpy as np 6 | from .shared_memory_util import ArraySpec, SharedAtomicCounter 7 | from .shared_ndarray import SharedNDArray 8 | 9 | 10 | class SharedMemoryQueue: 11 | """ 12 | A Lock-Free FIFO Shared Memory Data Structure. 13 | Stores a sequence of dict of numpy arrays. 14 | """ 15 | 16 | def __init__(self, 17 | shm_manager: SharedMemoryManager, 18 | array_specs: List[ArraySpec], 19 | buffer_size: int 20 | ): 21 | 22 | # create atomic counter 23 | write_counter = SharedAtomicCounter(shm_manager) 24 | read_counter = SharedAtomicCounter(shm_manager) 25 | 26 | # allocate shared memory 27 | shared_arrays = dict() 28 | for spec in array_specs: 29 | key = spec.name 30 | assert key not in shared_arrays 31 | array = SharedNDArray.create_from_shape( 32 | mem_mgr=shm_manager, 33 | shape=(buffer_size,) + tuple(spec.shape), 34 | dtype=spec.dtype) 35 | shared_arrays[key] = array 36 | 37 | self.buffer_size = buffer_size 38 | self.array_specs = array_specs 39 | self.write_counter = write_counter 40 | self.read_counter = read_counter 41 | self.shared_arrays = shared_arrays 42 | 43 | @classmethod 44 | def create_from_examples(cls, 45 | shm_manager: SharedMemoryManager, 46 | examples: Dict[str, Union[np.ndarray, numbers.Number]], 47 | buffer_size: int 48 | ): 49 | specs = list() 50 | for key, value in examples.items(): 51 | shape = None 52 | dtype = None 53 | if isinstance(value, np.ndarray): 54 | shape = value.shape 55 | dtype = value.dtype 56 | assert dtype != np.dtype('O') 57 | elif isinstance(value, numbers.Number): 58 | shape = tuple() 59 | dtype = np.dtype(type(value)) 60 | else: 61 | raise TypeError(f'Unsupported type {type(value)}') 62 | 63 | spec = ArraySpec( 64 | name=key, 65 | shape=shape, 66 | dtype=dtype 67 | ) 68 | specs.append(spec) 69 | 70 | obj = cls( 71 | shm_manager=shm_manager, 72 | array_specs=specs, 73 | buffer_size=buffer_size 74 | ) 75 | return obj 76 | 77 | def qsize(self): 78 | read_count = self.read_counter.load() 79 | write_count = self.write_counter.load() 80 | n_data = write_count - read_count 81 | return n_data 82 | 83 | def empty(self): 84 | n_data = self.qsize() 85 | return n_data <= 0 86 | 87 | def clear(self): 88 | self.read_counter.store(self.write_counter.load()) 89 | 90 | def put(self, data: Dict[str, Union[np.ndarray, numbers.Number]]): 91 | read_count = self.read_counter.load() 92 | write_count = self.write_counter.load() 93 | n_data = write_count - read_count 94 | if n_data >= self.buffer_size: 95 | raise Full() 96 | 97 | next_idx = write_count % self.buffer_size 98 | 99 | # write to shared memory 100 | for key, value in data.items(): 101 | arr: np.ndarray 102 | arr = self.shared_arrays[key].get() 103 | if isinstance(value, np.ndarray): 104 | arr[next_idx] = value 105 | else: 106 | arr[next_idx] = np.array(value, dtype=arr.dtype) 107 | 108 | # update idx 109 | self.write_counter.add(1) 110 | 111 | def get(self, out=None) -> Dict[str, np.ndarray]: 112 | write_count = self.write_counter.load() 113 | read_count = self.read_counter.load() 114 | n_data = write_count - read_count 115 | if n_data <= 0: 116 | raise Empty() 117 | 118 | if out is None: 119 | out = self._allocate_empty() 120 | 121 | next_idx = read_count % self.buffer_size 122 | for key, value in self.shared_arrays.items(): 123 | arr = value.get() 124 | np.copyto(out[key], arr[next_idx]) 125 | 126 | # update idx 127 | self.read_counter.add(1) 128 | return out 129 | 130 | def get_k(self, k, out=None) -> Dict[str, np.ndarray]: 131 | write_count = self.write_counter.load() 132 | read_count = self.read_counter.load() 133 | n_data = write_count - read_count 134 | if n_data <= 0: 135 | raise Empty() 136 | assert k <= n_data 137 | 138 | out = self._get_k_impl(k, read_count, out=out) 139 | self.read_counter.add(k) 140 | return out 141 | 142 | def get_all(self, out=None) -> Dict[str, np.ndarray]: 143 | write_count = self.write_counter.load() 144 | read_count = self.read_counter.load() 145 | n_data = write_count - read_count 146 | if n_data <= 0: 147 | raise Empty() 148 | 149 | out = self._get_k_impl(n_data, read_count, out=out) 150 | self.read_counter.add(n_data) 151 | return out 152 | 153 | def _get_k_impl(self, k, read_count, out=None) -> Dict[str, np.ndarray]: 154 | if out is None: 155 | out = self._allocate_empty(k) 156 | 157 | curr_idx = read_count % self.buffer_size 158 | for key, value in self.shared_arrays.items(): 159 | arr = value.get() 160 | target = out[key] 161 | 162 | start = curr_idx 163 | end = min(start + k, self.buffer_size) 164 | target_start = 0 165 | target_end = (end - start) 166 | target[target_start: target_end] = arr[start:end] 167 | 168 | remainder = k - (end - start) 169 | if remainder > 0: 170 | # wrap around 171 | start = 0 172 | end = start + remainder 173 | target_start = target_end 174 | target_end = k 175 | target[target_start: target_end] = arr[start:end] 176 | 177 | return out 178 | 179 | def _allocate_empty(self, k=None): 180 | result = dict() 181 | for spec in self.array_specs: 182 | shape = spec.shape 183 | if k is not None: 184 | shape = (k,) + shape 185 | result[spec.name] = np.empty( 186 | shape=shape, dtype=spec.dtype) 187 | return result 188 | -------------------------------------------------------------------------------- /experiments/real_world/camera/shared_memory/shared_memory_util.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | from dataclasses import dataclass 3 | import numpy as np 4 | from multiprocess.managers import SharedMemoryManager 5 | from atomics import atomicview, MemoryOrder, UINT 6 | 7 | @dataclass 8 | class ArraySpec: 9 | name: str 10 | shape: Tuple[int] 11 | dtype: np.dtype 12 | 13 | 14 | class SharedAtomicCounter: 15 | def __init__(self, 16 | shm_manager: SharedMemoryManager, 17 | size :int=8 # 64bit int 18 | ): 19 | shm = shm_manager.SharedMemory(size=size) 20 | self.shm = shm 21 | self.size = size 22 | self.store(0) # initialize 23 | 24 | @property 25 | def buf(self): 26 | return self.shm.buf[:self.size] 27 | 28 | def load(self) -> int: 29 | with atomicview(buffer=self.buf, atype=UINT) as a: 30 | value = a.load(order=MemoryOrder.ACQUIRE) 31 | return value 32 | 33 | def store(self, value: int): 34 | with atomicview(buffer=self.buf, atype=UINT) as a: 35 | a.store(value, order=MemoryOrder.RELEASE) 36 | 37 | def add(self, value: int): 38 | with atomicview(buffer=self.buf, atype=UINT) as a: 39 | a.add(value, order=MemoryOrder.ACQ_REL) 40 | -------------------------------------------------------------------------------- /experiments/real_world/camera/shared_memory/shared_ndarray.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import multiprocess 4 | import multiprocess.synchronize 5 | from multiprocess.managers import SharedMemoryManager 6 | from multiprocess.shared_memory import SharedMemory 7 | from typing import Any, TYPE_CHECKING, Generic, Optional, Tuple, TypeVar, Union 8 | 9 | import numpy as np 10 | import numpy.typing as npt 11 | 12 | 13 | SharedMemoryLike = Union[str, SharedMemory] # shared memory or name of shared memory 14 | SharedT = TypeVar("SharedT", bound=np.generic) 15 | 16 | 17 | class SharedNDArray(Generic[SharedT]): 18 | """Class to keep track of and retrieve the data in a shared array 19 | Attributes 20 | ---------- 21 | shm 22 | SharedMemory object containing the data of the array 23 | shape 24 | Shape of the NumPy array 25 | dtype 26 | Type of the NumPy array. Anything that may be passed to the `dtype=` argument in `np.ndarray`. 27 | lock 28 | (Optional) multiprocess.Lock to manage access to the SharedNDArray. This is only created if 29 | lock=True is passed to the constructor, otherwise it is set to `None`. 30 | A SharedNDArray object may be created either directly with a preallocated shared memory object plus the 31 | dtype and shape of the numpy array it represents: 32 | >>> from multiprocess.shared_memory import SharedMemory 33 | >>> import numpy as np 34 | >>> from shared_ndarray2 import SharedNDArray 35 | >>> x = np.array([1, 2, 3]) 36 | >>> shm = SharedMemory(name="x", create=True, size=x.nbytes) 37 | >>> arr = SharedNDArray(shm, x.shape, x.dtype) 38 | >>> arr[:] = x[:] # copy x into the array 39 | >>> print(arr[:]) 40 | [1 2 3] 41 | >>> shm.close() 42 | >>> shm.unlink() 43 | Or using a SharedMemoryManager either from an existing array or from arbitrary shape and nbytes: 44 | >>> from multiprocess.managers import SharedMemoryManager 45 | >>> mem_mgr = SharedMemoryManager() 46 | >>> mem_mgr.start() # Better yet, use SharedMemoryManager context manager 47 | >>> arr = SharedNDArray.from_shape(mem_mgr, x.shape, x.dtype) 48 | >>> arr[:] = x[:] # copy x into the array 49 | >>> print(arr[:]) 50 | [1 2 3] 51 | >>> # -or in one step- 52 | >>> arr = SharedNDArray.from_array(mem_mgr, x) 53 | >>> print(arr[:]) 54 | [1 2 3] 55 | `SharedNDArray` does not subclass numpy.ndarray but rather generates an ndarray on-the-fly in get(), 56 | which is used in __getitem__ and __setitem__. Thus to access the data and/or use any ndarray methods 57 | get() or __getitem__ or __setitem__ must be used 58 | >>> arr.max() # ERROR: SharedNDArray has no `max` method. 59 | Traceback (most recent call last): 60 | .... 61 | AttributeError: SharedNDArray object has no attribute 'max'. To access NumPy ndarray object use .get() method. 62 | >>> arr.get().max() # (or arr[:].max()) OK: This gets an ndarray on which we can operate 63 | 3 64 | >>> y = np.zeros(3) 65 | >>> y[:] = arr # ERROR: Cannot broadcast-assign a SharedNDArray to ndarray `y` 66 | Traceback (most recent call last): 67 | ... 68 | ValueError: setting an array element with a sequence. 69 | >>> y[:] = arr[:] # OK: This gets an ndarray that can be copied element-wise to `y` 70 | >>> mem_mgr.shutdown() 71 | """ 72 | 73 | shm: SharedMemory 74 | # shape: Tuple[int, ...] # is a property 75 | dtype: np.dtype 76 | lock: Optional[multiprocess.synchronize.Lock] 77 | 78 | def __init__( 79 | self, shm: SharedMemoryLike, shape: Tuple[int, ...], dtype: npt.DTypeLike): 80 | """Initialize a SharedNDArray object from existing shared memory, object shape, and dtype. 81 | To initialize a SharedNDArray object from a memory manager and data or shape, use the `from_array() 82 | or `from_shape()` classmethods. 83 | Parameters 84 | ---------- 85 | shm 86 | `multiprocess.shared_memory.SharedMemory` object or name for connecting to an existing block 87 | of shared memory (using SharedMemory constructor) 88 | shape 89 | Shape of the NumPy array to be represented in the shared memory 90 | dtype 91 | Data type for the NumPy array to be represented in shared memory. Any valid argument for 92 | `np.dtype` may be used as it will be converted to an actual `dtype` object. 93 | lock : bool, optional 94 | If True, create a multiprocess.Lock object accessible with the `.lock` attribute, by default 95 | False. If passing the `SharedNDArray` as an argument to a `multiprocess.Pool` function this 96 | should not be used -- see this comment to a Stack Overflow question about `multiprocess.Lock`: 97 | https://stackoverflow.com/questions/25557686/python-sharing-a-lock-between-processes#comment72803059_25558333 98 | Raises 99 | ------ 100 | ValueError 101 | The SharedMemory size (number of bytes) does not match the product of the shape and dtype 102 | itemsize. 103 | """ 104 | if isinstance(shm, str): 105 | shm = SharedMemory(name=shm, create=False) 106 | dtype = np.dtype(dtype) # Try to convert to dtype 107 | assert shm.size >= (dtype.itemsize * np.prod(shape)) 108 | self.shm = shm 109 | self.dtype = dtype 110 | self._shape: Tuple[int, ...] = shape 111 | 112 | def __repr__(self): 113 | # Like numpy's ndarray repr 114 | cls_name = self.__class__.__name__ 115 | nspaces = len(cls_name) + 1 116 | array_repr = str(self.get()) 117 | array_repr = array_repr.replace("\n", "\n" + " " * nspaces) 118 | return f"{cls_name}({array_repr}, dtype={self.dtype})" 119 | 120 | @classmethod 121 | def create_from_array( 122 | cls, mem_mgr: SharedMemoryManager, arr: npt.NDArray[SharedT] 123 | ) -> SharedNDArray[SharedT]: 124 | """Create a SharedNDArray from a SharedMemoryManager and an existing numpy array. 125 | Parameters 126 | ---------- 127 | mem_mgr 128 | Running `multiprocess.managers.SharedMemoryManager` instance from which to create the 129 | SharedMemory for the SharedNDArray 130 | arr 131 | NumPy `ndarray` object to copy into the created SharedNDArray upon initialization. 132 | """ 133 | # Simply use from_shape() to create the SharedNDArray and copy the data into it. 134 | shared_arr = cls.create_from_shape(mem_mgr, arr.shape, arr.dtype) 135 | shared_arr.get()[:] = arr[:] 136 | return shared_arr 137 | 138 | @classmethod 139 | def create_from_shape( 140 | cls, mem_mgr: SharedMemoryManager, shape: Tuple, dtype: npt.DTypeLike) -> SharedNDArray: 141 | """Create a SharedNDArray directly from a SharedMemoryManager 142 | Parameters 143 | ---------- 144 | mem_mgr 145 | SharedMemoryManager instance that has been started 146 | shape 147 | Shape of the array 148 | dtype 149 | Data type for the NumPy array to be represented in shared memory. Any valid argument for 150 | `np.dtype` may be used as it will be converted to an actual `dtype` object. 151 | """ 152 | dtype = np.dtype(dtype) # Convert to dtype if possible 153 | shm = mem_mgr.SharedMemory(np.prod(shape) * dtype.itemsize) 154 | return cls(shm=shm, shape=shape, dtype=dtype) 155 | 156 | @property 157 | def shape(self) -> Tuple[int, ...]: 158 | return self._shape 159 | 160 | 161 | def get(self) -> npt.NDArray[SharedT]: 162 | """Get a numpy array with access to the shared memory""" 163 | return np.ndarray(self.shape, dtype=self.dtype, buffer=self.shm.buf) 164 | 165 | def __del__(self): 166 | self.shm.close() 167 | -------------------------------------------------------------------------------- /experiments/real_world/gs/config/default.yaml: -------------------------------------------------------------------------------- 1 | # training 2 | weight_im: 1.0 3 | weight_seg: 3.0 4 | grad_thresh: 0.0002 5 | remove_threshold: 0.005 6 | remove_thresh_5k: 0.25 7 | scale_scene_radius: 0.05 8 | num_iters: 10000 9 | 10 | # rendering 11 | near: 0.01 12 | far: 100 -------------------------------------------------------------------------------- /experiments/real_world/gs/convert.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from io import BytesIO 3 | 4 | 5 | def quat_mult(q1, q2): 6 | w1, x1, y1, z1 = q1 7 | w2, x2, y2, z2 = q2 8 | w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 9 | x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 10 | y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2 11 | z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2 12 | return np.array([w, x, y, z], dtype=np.float32) 13 | 14 | 15 | def rot_mat_to_quat(rot_mat): 16 | w = np.sqrt(1 + rot_mat[0, 0] + rot_mat[1, 1] + rot_mat[2, 2]) / 2 17 | x = (rot_mat[2, 1] - rot_mat[1, 2]) / (4 * w) 18 | y = (rot_mat[0, 2] - rot_mat[2, 0]) / (4 * w) 19 | z = (rot_mat[1, 0] - rot_mat[0, 1]) / (4 * w) 20 | return np.array([w, x, y, z], dtype=np.float32) 21 | 22 | 23 | def save_to_splat(pts, colors, scales, quats, opacities, output_file, center=True, rotate=True): 24 | if center: 25 | pts_mean = np.mean(pts, axis=0) 26 | pts = pts - pts_mean 27 | buffer = BytesIO() 28 | for (v, c, s, q, o) in zip(pts, colors, scales, quats, opacities): 29 | position = np.array([v[0], v[1], v[2]], dtype=np.float32) 30 | scales = np.array([s[0], s[1], s[2]], dtype=np.float32) 31 | rot = np.array([q[0], q[1], q[2], q[3]], dtype=np.float32) 32 | # SH_C0 = 0.28209479177387814 33 | # color = np.array([0.5 + SH_C0 * c[0], 0.5 + SH_C0 * c[1], 0.5 + SH_C0 * c[2], o[0]]) 34 | color = np.array([c[0], c[1], c[2], o[0]]) 35 | 36 | # rotate around x axis 37 | if rotate: 38 | rot_x_90 = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]], dtype=np.float32) 39 | rot_x_90 = np.linalg.inv(rot_x_90) 40 | position = np.dot(rot_x_90, position) 41 | rot = quat_mult(rot_mat_to_quat(rot_x_90), rot) 42 | 43 | buffer.write(position.tobytes()) 44 | buffer.write(scales.tobytes()) 45 | buffer.write((color * 255).clip(0, 255).astype(np.uint8).tobytes()) 46 | buffer.write( 47 | ((rot / np.linalg.norm(rot)) * 128 + 128) 48 | .clip(0, 255) 49 | .astype(np.uint8) 50 | .tobytes() 51 | ) 52 | with open(output_file, "wb") as f: 53 | f.write(buffer.getvalue()) 54 | 55 | 56 | def read_splat(splat_file): 57 | with open(splat_file, "rb") as f: 58 | data = f.read() 59 | pts = [] 60 | colors = [] 61 | scales = [] 62 | quats = [] 63 | opacities = [] 64 | for i in range(0, len(data), 32): 65 | v = np.frombuffer(data[i : i + 12], dtype=np.float32) 66 | s = np.frombuffer(data[i + 12 : i + 24], dtype=np.float32) 67 | c = np.frombuffer(data[i + 24 : i + 28], dtype=np.uint8) / 255 68 | q = np.frombuffer(data[i + 28 : i + 32], dtype=np.uint8) 69 | q = (q * 1.0 - 128) / 128 70 | pts.append(v) 71 | scales.append(s) 72 | colors.append(c[:3]) 73 | quats.append(q) 74 | opacities.append(c[3:]) 75 | return np.array(pts), np.array(colors), np.array(scales), np.array(quats), np.array(opacities) 76 | -------------------------------------------------------------------------------- /experiments/real_world/gs/helpers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import open3d as o3d 4 | import numpy as np 5 | from diff_gaussian_rasterization import GaussianRasterizationSettings as Camera 6 | 7 | 8 | def setup_camera(w, h, k, w2c, near=0.01, far=100.0, bg=[0, 0, 0], z_threshold=0.2, device='cuda'): 9 | fx, fy, cx, cy = k[0][0], k[1][1], k[0][2], k[1][2] 10 | w2c = torch.tensor(w2c).cuda().float() 11 | cam_center = torch.inverse(w2c)[:3, 3] 12 | w2c = w2c.unsqueeze(0).transpose(1, 2) 13 | opengl_proj = torch.tensor([[2 * fx / w, 0.0, -(w - 2 * cx) / w, 0.0], 14 | [0.0, 2 * fy / h, -(h - 2 * cy) / h, 0.0], 15 | [0.0, 0.0, far / (far - near), -(far * near) / (far - near)], 16 | [0.0, 0.0, 1.0, 0.0]]).cuda().float().unsqueeze(0).transpose(1, 2) 17 | full_proj = w2c.bmm(opengl_proj) 18 | cam = Camera( 19 | image_height=h, 20 | image_width=w, 21 | tanfovx=w / (2 * fx), 22 | tanfovy=h / (2 * fy), 23 | bg=torch.tensor(bg, dtype=torch.float32, device=device), 24 | scale_modifier=1.0, 25 | viewmatrix=w2c.to(device), 26 | projmatrix=full_proj.to(device), 27 | sh_degree=0, 28 | campos=cam_center.to(device), 29 | prefiltered=False, 30 | z_threshold=z_threshold, 31 | ) 32 | return cam 33 | 34 | 35 | def params2rendervar(params): 36 | rendervar = { 37 | 'means3D': params['means3D'], 38 | 'colors_precomp': params['rgb_colors'], 39 | 'rotations': torch.nn.functional.normalize(params['unnorm_rotations']), 40 | 'opacities': torch.sigmoid(params['logit_opacities']), 41 | 'scales': torch.exp(params['log_scales']), 42 | 'means2D': torch.zeros_like(params['means3D'], requires_grad=True, device="cuda") + 0 43 | } 44 | return rendervar 45 | 46 | def params2rendervar_wt(params, t): 47 | print(params['unnorm_rotations'][t]) 48 | rendervar = { 49 | 'means3D': params['means3D'][t], 50 | 'colors_precomp': params['rgb_colors'][t], 51 | 'rotations': torch.nn.functional.normalize(params['unnorm_rotations'][t]), 52 | 'opacities': torch.sigmoid(params['logit_opacities'][t]), 53 | 'scales': torch.exp(params['log_scales'][t]), 54 | 'means2D': torch.zeros_like(params['means3D'][t], requires_grad=True, device="cuda") + 0 55 | } 56 | return rendervar 57 | 58 | def params2rendervar_consistent_rgb(params, variables): 59 | rendervar = { 60 | 'means3D': params['means3D'], 61 | 'colors_precomp': variables['rgb_colors'], 62 | 'rotations': torch.nn.functional.normalize(params['unnorm_rotations']), 63 | 'opacities': torch.sigmoid(params['logit_opacities']), 64 | 'scales': torch.exp(params['log_scales']), 65 | 'means2D': torch.zeros_like(params['means3D'], requires_grad=True, device="cuda") + 0 66 | } 67 | return rendervar 68 | 69 | 70 | def l1_loss_v1(x, y): 71 | return torch.abs((x - y)).mean() 72 | 73 | 74 | def l1_loss_v2(x, y): 75 | return (torch.abs(x - y).sum(-1)).mean() 76 | 77 | 78 | def weighted_l2_loss_v1(x, y, w): 79 | return torch.sqrt(((x - y) ** 2) * w + 1e-20).mean() 80 | 81 | 82 | def weighted_l2_loss_v2(x, y, w): 83 | return torch.sqrt(((x - y) ** 2).sum(-1) * w + 1e-20).mean() 84 | 85 | 86 | def quat_mult(q1, q2): 87 | w1, x1, y1, z1 = q1.T 88 | w2, x2, y2, z2 = q2.T 89 | w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 90 | x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 91 | y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2 92 | z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2 93 | return torch.stack([w, x, y, z]).T 94 | 95 | 96 | def o3d_knn(pts, num_knn): 97 | indices = [] 98 | sq_dists = [] 99 | pcd = o3d.geometry.PointCloud() 100 | # breakpoint() 101 | # print(pts.shape) 102 | pts_cont = np.ascontiguousarray(pts, np.float64) 103 | # print(pts_cont.shape) 104 | pcd.points = o3d.utility.Vector3dVector(np.ascontiguousarray(pts, np.float64)) 105 | if len(pcd.points) == 0: 106 | print("Point cloud is empty!") 107 | else: 108 | pcd_tree = o3d.geometry.KDTreeFlann(pcd) 109 | 110 | for p in pcd.points: 111 | [_, i, d] = pcd_tree.search_knn_vector_3d(p, num_knn + 1) 112 | indices.append(i[1:]) 113 | sq_dists.append(d[1:]) 114 | return np.array(sq_dists), np.array(indices) 115 | 116 | def o3d_knn_tensor(pts_tensor, num_knn): 117 | if pts_tensor.numel() == 0: 118 | print("Point cloud is empty!") 119 | return None, None 120 | 121 | pts_np = pts_tensor.detach().cpu().numpy() if pts_tensor.is_cuda else pts_tensor.numpy() 122 | pts_np_cont = np.ascontiguousarray(pts_np, dtype=np.float64) 123 | 124 | pcd = o3d.geometry.PointCloud() 125 | pcd.points = o3d.utility.Vector3dVector(pts_np_cont) 126 | 127 | pcd_tree = o3d.geometry.KDTreeFlann(pcd) 128 | 129 | indices = [] 130 | sq_dists = [] 131 | 132 | for p in pts_np_cont: 133 | [_, idx, dist] = pcd_tree.search_knn_vector_3d(p, num_knn + 1) 134 | indices.append(idx[1:]) # Skip the first index since it's the point itself 135 | sq_dists.append(dist[1:]) 136 | 137 | return torch.tensor(sq_dists, dtype=pts_tensor.dtype, device=pts_tensor.device), torch.tensor(indices, dtype=torch.long, device=pts_tensor.device) 138 | 139 | 140 | def params2cpu(params, is_initial_timestep): 141 | if is_initial_timestep: 142 | res = {k: v.detach().cpu().contiguous().numpy() for k, v in params.items()} 143 | else: 144 | res = {k: v.detach().cpu().contiguous().numpy() for k, v in params.items() if 145 | k in ['means3D', 'rgb_colors', 'unnorm_rotations']} 146 | return res 147 | 148 | 149 | def save_params(output_params, seq, exp): 150 | to_save = {} 151 | for k in output_params[0].keys(): 152 | if k in output_params[1].keys(): 153 | to_save[k] = np.stack([params[k] for params in output_params]) 154 | else: 155 | to_save[k] = output_params[0][k] 156 | os.makedirs(f"./output/{exp}/{seq}", exist_ok=True) 157 | np.savez(f"./output/{exp}/{seq}/params", **to_save) 158 | 159 | 160 | def farthest_point_sample(xyz, npoint): 161 | """ 162 | Input: 163 | xyz: pointcloud data, [B, N, C] 164 | npoint: number of samples 165 | Return: 166 | centroids: sampled pointcloud index, [B, npoint] 167 | """ 168 | device = xyz.device 169 | B, N, C = xyz.shape 170 | centroids = torch.zeros(B, npoint, dtype=torch.long).to(device) 171 | distance = torch.ones(B, N).to(device) * 1e10 172 | farthest = torch.randint(0, N, (B,), dtype=torch.long).to(device) 173 | batch_indices = torch.arange(B, dtype=torch.long).to(device) 174 | for i in range(npoint): 175 | centroids[:, i] = farthest 176 | centroid = xyz[batch_indices, farthest, :].view(B, 1, C) 177 | dist = torch.sum((xyz - centroid) ** 2, -1) 178 | mask = dist < distance 179 | distance[mask] = dist[mask] 180 | farthest = torch.max(distance, -1)[1] 181 | return centroids 182 | 183 | def quat2mat(q): 184 | norm = torch.sqrt(q[:, 0] * q[:, 0] + q[:, 1] * q[:, 1] + q[:, 2] * q[:, 2] + q[:, 3] * q[:, 3]) 185 | q = q / norm[:, None] 186 | rot = torch.zeros((q.shape[0], 3, 3)).to(q.device) 187 | r = q[:, 0] 188 | x = q[:, 1] 189 | y = q[:, 2] 190 | z = q[:, 3] 191 | rot[:, 0, 0] = 1 - 2 * (y * y + z * z) 192 | rot[:, 0, 1] = 2 * (x * y - r * z) 193 | rot[:, 0, 2] = 2 * (x * z + r * y) 194 | rot[:, 1, 0] = 2 * (x * y + r * z) 195 | rot[:, 1, 1] = 1 - 2 * (x * x + z * z) 196 | rot[:, 1, 2] = 2 * (y * z - r * x) 197 | rot[:, 2, 0] = 2 * (x * z - r * y) 198 | rot[:, 2, 1] = 2 * (y * z + r * x) 199 | rot[:, 2, 2] = 1 - 2 * (x * x + y * y) 200 | return rot 201 | 202 | def rot2quat(rot): 203 | # Preallocate quaternion tensor 204 | q = torch.zeros((rot.shape[0], 4)).to(rot.device) 205 | 206 | # Compute quaternion components 207 | q[:, 0] = 0.5 * torch.sqrt(1 + rot[:, 0, 0] + rot[:, 1, 1] + rot[:, 2, 2]) 208 | q[:, 1] = (rot[:, 2, 1] - rot[:, 1, 2]) / (4 * q[:, 0]) 209 | q[:, 2] = (rot[:, 0, 2] - rot[:, 2, 0]) / (4 * q[:, 0]) 210 | q[:, 3] = (rot[:, 1, 0] - rot[:, 0, 1]) / (4 * q[:, 0]) 211 | 212 | return q -------------------------------------------------------------------------------- /experiments/real_world/gs/sh_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The PlenOctree Authors. 2 | # Redistribution and use in source and binary forms, with or without 3 | # modification, are permitted provided that the following conditions are met: 4 | # 5 | # 1. Redistributions of source code must retain the above copyright notice, 6 | # this list of conditions and the following disclaimer. 7 | # 8 | # 2. Redistributions in binary form must reproduce the above copyright notice, 9 | # this list of conditions and the following disclaimer in the documentation 10 | # and/or other materials provided with the distribution. 11 | # 12 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 13 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 15 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 16 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 18 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 19 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 20 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 21 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 22 | # POSSIBILITY OF SUCH DAMAGE. 23 | 24 | import torch 25 | 26 | C0 = 0.28209479177387814 27 | C1 = 0.4886025119029199 28 | C2 = [ 29 | 1.0925484305920792, 30 | -1.0925484305920792, 31 | 0.31539156525252005, 32 | -1.0925484305920792, 33 | 0.5462742152960396 34 | ] 35 | C3 = [ 36 | -0.5900435899266435, 37 | 2.890611442640554, 38 | -0.4570457994644658, 39 | 0.3731763325901154, 40 | -0.4570457994644658, 41 | 1.445305721320277, 42 | -0.5900435899266435 43 | ] 44 | C4 = [ 45 | 2.5033429417967046, 46 | -1.7701307697799304, 47 | 0.9461746957575601, 48 | -0.6690465435572892, 49 | 0.10578554691520431, 50 | -0.6690465435572892, 51 | 0.47308734787878004, 52 | -1.7701307697799304, 53 | 0.6258357354491761, 54 | ] 55 | 56 | 57 | def eval_sh(deg, sh, dirs): 58 | """ 59 | Evaluate spherical harmonics at unit directions 60 | using hardcoded SH polynomials. 61 | Works with torch/np/jnp. 62 | ... Can be 0 or more batch dimensions. 63 | Args: 64 | deg: int SH deg. Currently, 0-3 supported 65 | sh: jnp.ndarray SH coeffs [..., C, (deg + 1) ** 2] 66 | dirs: jnp.ndarray unit directions [..., 3] 67 | Returns: 68 | [..., C] 69 | """ 70 | assert deg <= 4 and deg >= 0 71 | coeff = (deg + 1) ** 2 72 | assert sh.shape[-1] >= coeff 73 | 74 | result = C0 * sh[..., 0] 75 | if deg > 0: 76 | x, y, z = dirs[..., 0:1], dirs[..., 1:2], dirs[..., 2:3] 77 | result = (result - 78 | C1 * y * sh[..., 1] + 79 | C1 * z * sh[..., 2] - 80 | C1 * x * sh[..., 3]) 81 | 82 | if deg > 1: 83 | xx, yy, zz = x * x, y * y, z * z 84 | xy, yz, xz = x * y, y * z, x * z 85 | result = (result + 86 | C2[0] * xy * sh[..., 4] + 87 | C2[1] * yz * sh[..., 5] + 88 | C2[2] * (2.0 * zz - xx - yy) * sh[..., 6] + 89 | C2[3] * xz * sh[..., 7] + 90 | C2[4] * (xx - yy) * sh[..., 8]) 91 | 92 | if deg > 2: 93 | result = (result + 94 | C3[0] * y * (3 * xx - yy) * sh[..., 9] + 95 | C3[1] * xy * z * sh[..., 10] + 96 | C3[2] * y * (4 * zz - xx - yy) * sh[..., 11] + 97 | C3[3] * z * (2 * zz - 3 * xx - 3 * yy) * sh[..., 12] + 98 | C3[4] * x * (4 * zz - xx - yy) * sh[..., 13] + 99 | C3[5] * z * (xx - yy) * sh[..., 14] + 100 | C3[6] * x * (xx - 3 * yy) * sh[..., 15]) 101 | 102 | if deg > 3: 103 | result = (result + C4[0] * xy * (xx - yy) * sh[..., 16] + 104 | C4[1] * yz * (3 * xx - yy) * sh[..., 17] + 105 | C4[2] * xy * (7 * zz - 1) * sh[..., 18] + 106 | C4[3] * yz * (7 * zz - 3) * sh[..., 19] + 107 | C4[4] * (zz * (35 * zz - 30) + 3) * sh[..., 20] + 108 | C4[5] * xz * (7 * zz - 3) * sh[..., 21] + 109 | C4[6] * (xx - yy) * (7 * zz - 1) * sh[..., 22] + 110 | C4[7] * xz * (xx - 3 * yy) * sh[..., 23] + 111 | C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy)) * sh[..., 24]) 112 | return result 113 | 114 | 115 | def RGB2SH(rgb): 116 | return (rgb - 0.5) / C0 117 | 118 | 119 | def SH2RGB(sh): 120 | return sh * C0 + 0.5 121 | -------------------------------------------------------------------------------- /experiments/real_world/gs/train_utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import torch 3 | import numpy as np 4 | import random 5 | from PIL import Image 6 | 7 | from pgnd.utils import get_root 8 | import sys 9 | root: Path = get_root(__file__) 10 | 11 | from diff_gaussian_rasterization import GaussianRasterizer as Renderer 12 | from gs.helpers import setup_camera, l1_loss_v1, o3d_knn, params2rendervar 13 | from gs.external import calc_ssim, calc_psnr 14 | 15 | 16 | def get_custom_dataset(img_list, seg_list, metadata): 17 | """ 18 | Generates a dataset from given metadata and sequence. 19 | """ 20 | dataset = [] 21 | 22 | # Loop over filenames corresponding to 't' in the metadata 23 | for c in range(len(img_list)): 24 | 25 | # Extract parameters from the metadata 26 | w, h = metadata['w'], metadata['h'] 27 | k = metadata['k'][c] 28 | w2c = metadata['w2c'][c] 29 | 30 | # Set up a camera using extracted parameters and some default values 31 | cam = setup_camera(w, h, k, w2c, near=0.01, far=100) 32 | 33 | # Get the filename of the current image and open it 34 | if isinstance(img_list[c], str): 35 | im = np.array(Image.open(img_list[c])) 36 | im = torch.tensor(im).float().cuda().permute(2, 0, 1) / 255 37 | else: 38 | im = torch.tensor(img_list[c]).permute(2, 0, 1).float().cuda() 39 | if im.max() > 2.0: 40 | im = im / 255 41 | 42 | # Open the corresponding segmentation image and convert it to a tensor 43 | if isinstance(seg_list[c], str): 44 | seg = np.array(Image.open(seg_list[c])).astype(np.float32) 45 | else: 46 | seg = seg_list[c].astype(np.float32) 47 | seg = torch.tensor(seg).float().cuda() 48 | 49 | # Create a color segmentation tensor. It seems to treat the segmentation as binary (object/background) 50 | seg_col = torch.stack((seg, torch.zeros_like(seg), 1 - seg)) 51 | 52 | # Add the data to the dataset 53 | dataset.append({'cam': cam, 'im': im, 'seg': seg_col, 'id': c}) 54 | 55 | return dataset 56 | 57 | 58 | def initialize_params(init_pt_cld, metadata): 59 | """ 60 | Initializes parameters and variables required for a 3D point cloud based on provided data. 61 | """ 62 | 63 | # Extract the segmentation data. 64 | seg = init_pt_cld[:, 6] 65 | 66 | # Define a constant for the maximum number of cameras. 67 | max_cams = 4 68 | 69 | # Compute the squared distance for the K-Nearest Neighbors of each point in the 3D point cloud. 70 | sq_dist, indices = o3d_knn(init_pt_cld[:, :3], 3) 71 | 72 | # Calculate the mean squared distance for the 3 closest points and clip its minimum value. 73 | mean3_sq_dist = sq_dist.mean(-1).clip(min=0.0000001) 74 | 75 | # Initialize various parameters related to the 3D point cloud. 76 | params = { 77 | 'means3D': init_pt_cld[:, :3], # 3D coordinates of the points. 78 | 'rgb_colors': init_pt_cld[:, 3:6], # RGB color values for the points. 79 | 'seg_colors': np.stack((seg, np.zeros_like(seg), 1 - seg), -1), # Segmentation colors. 80 | 'unnorm_rotations': np.tile([1, 0, 0, 0], (seg.shape[0], 1)), # Default rotations for each point. 81 | 'logit_opacities': np.zeros((seg.shape[0], 1)), # Initial opacity values for the points. 82 | 'log_scales': np.tile(np.log(np.sqrt(mean3_sq_dist))[..., None], (1, 3)), # Scale factors for the points. 83 | 'cam_m': np.zeros((max_cams, 3)), # Placeholder for camera motion. 84 | 'cam_c': np.zeros((max_cams, 3)), # Placeholder for camera center. 85 | } 86 | 87 | # Convert the params to PyTorch tensors and move them to the GPU. 88 | params = {k: torch.nn.Parameter(torch.tensor(v).cuda().float().contiguous().requires_grad_(True)) for k, v in params.items()} 89 | # params['rgb_colors'].requires_grad = False 90 | 91 | # Calculate the scene radius based on the camera centers. 92 | cam_centers = np.linalg.inv(metadata['w2c'])[:, :3, 3] 93 | scene_radius = 1.1 * np.max(np.linalg.norm(cam_centers - np.mean(cam_centers, 0)[None], axis=-1)) 94 | 95 | # Initialize other associated variables. 96 | variables = { 97 | 'max_2D_radius': torch.zeros(params['means3D'].shape[0]).cuda().float(), # Maximum 2D radius. 98 | 'scene_radius': scene_radius, # Scene radius. 99 | 'means2D_gradient_accum': torch.zeros(params['means3D'].shape[0]).cuda().float(), # Means2D gradient accumulator. 100 | 'denom': torch.zeros(params['means3D'].shape[0]).cuda().float() # Denominator. 101 | } 102 | 103 | return params, variables 104 | 105 | 106 | def initialize_optimizer(params, variables): 107 | lrs = { 108 | 'means3D': 0.00016 * variables['scene_radius'], 109 | 'rgb_colors': 0.0, 110 | 'seg_colors': 0.0, 111 | 'unnorm_rotations': 0.001, 112 | 'logit_opacities': 0.05, 113 | 'log_scales': 0.001, 114 | 'cam_m': 1e-4, 115 | 'cam_c': 1e-4, 116 | } 117 | param_groups = [{'params': [v], 'name': k, 'lr': lrs[k]} for k, v in params.items()] 118 | return torch.optim.Adam(param_groups, lr=0.0, eps=1e-15) 119 | 120 | 121 | def get_loss(params, curr_data, variables, loss_weights): 122 | 123 | # Initialize dictionary to store various loss components 124 | losses = {} 125 | 126 | # Convert parameters to rendering variables and retain gradient for 'means2D' 127 | rendervar = params2rendervar(params) 128 | rendervar['means2D'].retain_grad() 129 | 130 | # Perform rendering to obtain image, radius, and other outputs 131 | im, radius, _ = Renderer(raster_settings=curr_data['cam'])(**rendervar) 132 | 133 | # Apply camera parameters to modify the rendered image 134 | curr_id = curr_data['id'] 135 | im = torch.exp(params['cam_m'][curr_id])[:, None, None] * im + params['cam_c'][curr_id][:, None, None] 136 | 137 | # Calculate image loss using L1 loss and ssim 138 | losses['im'] = 0.8 * l1_loss_v1(im, curr_data['im']) + 0.2 * (1.0 - calc_ssim(im, curr_data['im'])) 139 | variables['means2D'] = rendervar['means2D'] # Gradient only accum from colour render for densification 140 | 141 | segrendervar = params2rendervar(params) 142 | segrendervar['colors_precomp'] = params['seg_colors'] 143 | seg, _, _, = Renderer(raster_settings=curr_data['cam'])(**segrendervar) 144 | 145 | # Calculate segmentation loss 146 | losses['seg'] = 0.8 * l1_loss_v1(seg, curr_data['seg']) + 0.2 * (1.0 - calc_ssim(seg, curr_data['seg'])) 147 | 148 | # Calculate total loss as weighted sum of individual losses 149 | loss = sum([loss_weights[k] * v for k, v in losses.items()]) 150 | 151 | # Update variables related to rendering radius and seen areas 152 | seen = radius > 0 153 | variables['max_2D_radius'][seen] = torch.max(radius[seen], variables['max_2D_radius'][seen]) 154 | variables['seen'] = seen 155 | return loss, variables 156 | 157 | 158 | def report_progress(params, data, i, progress_bar, num_pts, every_i=100, vis_dir=None): 159 | if i % every_i == 0: 160 | im, _, _, = Renderer(raster_settings=data['cam'])(**params2rendervar(params)) 161 | curr_id = data['id'] 162 | im = torch.exp(params['cam_m'][curr_id])[:, None, None] * im + params['cam_c'][curr_id][:, None, None] 163 | if vis_dir: 164 | Image.fromarray((im.cpu().numpy().clip(0, 1) * 255).astype(np.uint8).transpose(1, 2, 0)).save(f"{vis_dir}/{i:06d}.png") 165 | psnr = calc_psnr(im, data['im']).mean() 166 | progress_bar.set_postfix({"img 0 PSNR": f"{psnr:.{7}f}, number of points: {num_pts}"}) 167 | progress_bar.update(every_i) 168 | 169 | 170 | def get_batch(todo_dataset, dataset): 171 | if not todo_dataset: 172 | todo_dataset = dataset.copy() 173 | curr_data = todo_dataset.pop(random.randint(0, len(todo_dataset) - 1)) 174 | return curr_data 175 | -------------------------------------------------------------------------------- /experiments/real_world/modules_planning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/real_world/modules_planning/__init__.py -------------------------------------------------------------------------------- /experiments/real_world/modules_planning/common/communication.py: -------------------------------------------------------------------------------- 1 | XARM_STATE_PORT = 10101 2 | XARM_CONTROL_PORT = 10102 3 | OBJECT_POSE_PORT = 10109 4 | 5 | XARM_CONTROL_PORT_L = 10102 6 | XARM_CONTROL_PORT_R = 10110 7 | -------------------------------------------------------------------------------- /experiments/real_world/modules_planning/common/xarm.py: -------------------------------------------------------------------------------- 1 | XY_MIN, XY_MAX = 50, 800 2 | X_MIN,X_MAX = 0, 800 3 | Y_MIN,Y_MAX = -350, 350 4 | Z_MIN, Z_MAX = 110, 650 5 | 6 | MOVE_RATE = 1000 # hz; macro velocity = move_rate * xyz_velocity 7 | MOVE_SLEEP = 1 / MOVE_RATE 8 | XYZ_VELOCITY = 1.0 # mm (in every MOVE_SLEEP interval) 9 | ANGLE_VELOCITY_MAX = 0.05 # degree 10 | 11 | GRIPPER_OPEN_MAX = 800 12 | GRIPPER_OPEN_MIN = 0 13 | 14 | POSITION_UPDATE_INTERVAL = 0.02 # 100hz the reader must read faster than this 15 | COMMAND_CHECK_INTERVAL = 0.02 # 100hz command must be sent faster than this -------------------------------------------------------------------------------- /experiments/real_world/modules_planning/kinematics_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import time 4 | import numpy as np 5 | import transforms3d 6 | from pathlib import Path 7 | import open3d as o3d 8 | 9 | import warnings 10 | warnings.filterwarnings("always", category=RuntimeWarning) 11 | 12 | import sapien.core as sapien 13 | 14 | from nclaw.utils import get_root 15 | root: Path = get_root(__file__) 16 | 17 | def np2o3d(pcd, color=None): 18 | # pcd: (n, 3) 19 | # color: (n, 3) 20 | pcd_o3d = o3d.geometry.PointCloud() 21 | pcd_o3d.points = o3d.utility.Vector3dVector(pcd) 22 | if color is not None: 23 | assert pcd.shape[0] == color.shape[0] 24 | assert color.max() <= 1 25 | assert color.min() >= 0 26 | pcd_o3d.colors = o3d.utility.Vector3dVector(color) 27 | return pcd_o3d 28 | 29 | 30 | class KinHelper(): 31 | def __init__(self, robot_name, headless=True): 32 | # load robot 33 | if "xarm7" in robot_name: 34 | urdf_path = str(root / "assets/robots/xarm7/xarm7.urdf") 35 | self.eef_name = 'link7' 36 | else: 37 | raise RuntimeError('robot name not supported') 38 | self.robot_name = robot_name 39 | 40 | # load sapien robot 41 | self.engine = sapien.Engine() 42 | self.scene = self.engine.create_scene() 43 | loader = self.scene.create_urdf_loader() 44 | self.sapien_robot = loader.load(urdf_path) 45 | self.robot_model = self.sapien_robot.create_pinocchio_model() 46 | self.sapien_eef_idx = -1 47 | for link_idx, link in enumerate(self.sapien_robot.get_links()): 48 | if link.name == self.eef_name: 49 | self.sapien_eef_idx = link_idx 50 | break 51 | 52 | # load meshes and offsets from urdf_robot 53 | self.meshes = {} 54 | self.scales = {} 55 | self.offsets = {} 56 | self.pcd_dict = {} 57 | self.tool_meshes = {} 58 | 59 | def compute_fk_sapien_links(self, qpos, link_idx): 60 | fk = self.robot_model.compute_forward_kinematics(qpos) 61 | link_pose_ls = [] 62 | for i in link_idx: 63 | link_pose_ls.append(self.robot_model.get_link_pose(i).to_transformation_matrix()) 64 | return link_pose_ls 65 | 66 | def compute_ik_sapien(self, initial_qpos, cartesian, verbose=False): 67 | """ 68 | Compute IK using sapien 69 | initial_qpos: (N, ) numpy array 70 | cartesian: (6, ) numpy array, x,y,z in meters, r,p,y in radians 71 | """ 72 | tf_mat = np.eye(4) 73 | tf_mat[:3, :3] = transforms3d.euler.euler2mat(ai=cartesian[3], aj=cartesian[4], ak=cartesian[5], axes='sxyz') 74 | tf_mat[:3, 3] = cartesian[0:3] 75 | pose = sapien.Pose.from_transformation_matrix(tf_mat) 76 | 77 | if 'xarm7' in self.robot_name: 78 | active_qmask = np.array([True, True, True, True, True, True, True]) 79 | qpos = self.robot_model.compute_inverse_kinematics( 80 | link_index=self.sapien_eef_idx, 81 | pose=pose, 82 | initial_qpos=initial_qpos, 83 | active_qmask=active_qmask, 84 | ) 85 | if verbose: 86 | print('ik qpos:', qpos) 87 | 88 | # verify ik 89 | fk_pose = self.compute_fk_sapien_links(qpos[0], [self.sapien_eef_idx])[0] 90 | 91 | if verbose: 92 | print('target pose for IK:', tf_mat) 93 | print('fk pose for IK:', fk_pose) 94 | 95 | pose_diff = np.linalg.norm(fk_pose[:3, 3] - tf_mat[:3, 3]) 96 | rot_diff = np.linalg.norm(fk_pose[:3, :3] - tf_mat[:3, :3]) 97 | 98 | if pose_diff > 0.01 or rot_diff > 0.01: 99 | print('ik pose diff:', pose_diff) 100 | print('ik rot diff:', rot_diff) 101 | warnings.warn('ik pose diff or rot diff too large. Return initial qpos.', RuntimeWarning, stacklevel=2, ) 102 | return initial_qpos 103 | return qpos[0] 104 | 105 | 106 | def test_fk(): 107 | robot_name = 'xarm7' 108 | init_qpos = np.array([0, 0, 0, 0, 0, 0, 0]) 109 | end_qpos = np.array([0, 0, 0, 0, 0, 0, 0]) 110 | 111 | kin_helper = KinHelper(robot_name=robot_name, headless=False) 112 | START_ARM_POSE = [0, 0, 0, 0, 0, 0, 0] 113 | 114 | for i in range(100): 115 | curr_qpos = init_qpos + (end_qpos - init_qpos) * i / 100 116 | fk = kin_helper.compute_fk_sapien_links(curr_qpos, [kin_helper.sapien_eef_idx])[0] 117 | fk_euler = transforms3d.euler.mat2euler(fk[:3, :3], axes='sxyz') 118 | 119 | if i == 0: 120 | init_ik_qpos = np.array(START_ARM_POSE) 121 | ik_qpos = kin_helper.compute_ik_sapien(init_ik_qpos, np.array(list(fk[:3, 3]) + list(fk_euler)).astype(np.float32)) 122 | re_fk_pos_mat = kin_helper.compute_fk_sapien_links(ik_qpos, [kin_helper.sapien_eef_idx])[0] 123 | re_fk_euler = transforms3d.euler.mat2euler(re_fk_pos_mat[:3, :3], axes='sxyz') 124 | re_fk_pos = re_fk_pos_mat[:3, 3] 125 | print('re_fk_pos diff:', np.linalg.norm(re_fk_pos - fk[:3, 3])) 126 | print('re_fk_euler diff:', np.linalg.norm(np.array(re_fk_euler) - np.array(fk_euler))) 127 | 128 | 129 | init_ik_qpos = ik_qpos.copy() 130 | qpos_diff = np.linalg.norm(ik_qpos[:6] - curr_qpos[:6]) 131 | if qpos_diff > 0.01: 132 | warnings.warn('qpos diff too large', RuntimeWarning, stacklevel=2, ) 133 | 134 | time.sleep(0.1) 135 | 136 | if __name__ == "__main__": 137 | test_fk() 138 | -------------------------------------------------------------------------------- /experiments/real_world/modules_planning/udp_util.py: -------------------------------------------------------------------------------- 1 | import socket 2 | import numpy as np 3 | import pickle 4 | import threading 5 | import time 6 | 7 | from modules_planning.common.communication import * 8 | 9 | np.set_printoptions(precision=3, suppress=True) 10 | 11 | class udpSender: 12 | def __init__(self, port: dict): 13 | self.port = port 14 | self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 15 | self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) 16 | 17 | def send(self, data: dict): 18 | self.sock.sendto(pickle.dumps(data), ('127.0.0.1', self.port)) 19 | 20 | def close(self): 21 | self.sock.close() 22 | 23 | class udpReceiver: 24 | def __init__(self, ports: dict, re_use_address = False): 25 | self.ports = ports 26 | self.re_use_address = re_use_address 27 | self.results = {} 28 | self.lock = threading.Lock() 29 | self.thread = None 30 | 31 | def receive(self, name, port): 32 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 33 | if self.re_use_address: 34 | sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 35 | sock.bind(('127.0.0.1', port)) 36 | # sock.settimeout(0.1) # Set timeout to 1 second 37 | while self.alive: 38 | try: 39 | t0 = time.time() 40 | try: 41 | data, addr = sock.recvfrom(32768) 42 | except socket.timeout: 43 | continue 44 | data = pickle.loads(data) 45 | with self.lock: 46 | self.results[name] = data 47 | t1 = time.time() 48 | # print(f"Receive {name} data from {addr} in {t1-t0:.6f} seconds") 49 | except BaseException as e: 50 | print(f"Error in receiving {name} data: {e}") 51 | break 52 | sock.close() 53 | print(f"Receive {name} thread stopped") 54 | 55 | def start(self): 56 | self.alive = True 57 | self.thread = [] 58 | for name, port in self.ports.items(): 59 | t = threading.Thread(target=self.receive, args=(name, port)) 60 | t.start() 61 | self.thread.append(t) 62 | 63 | def stop(self): 64 | print("Stopping udpReceiver") 65 | self.alive = False 66 | for t in self.thread: 67 | if t is not None and t.is_alive(): 68 | t.join() 69 | print("udpReceiver stopped") 70 | 71 | def get(self, name=None, pop=False): 72 | with self.lock: 73 | if name is None: 74 | ret = self.results 75 | if pop: 76 | self.results = {} 77 | return ret 78 | else: 79 | if name in self.results: 80 | if pop: 81 | ret = self.results[name] 82 | del self.results[name] 83 | return ret 84 | else: 85 | return None 86 | 87 | if __name__ == '__main__': 88 | udpReceiver = udpReceiver(ports= 89 | { 90 | 'object_pose': OBJECT_POSE_PORT, 91 | 'xarm_state': XARM_STATE_PORT 92 | }) 93 | udpReceiver.start() 94 | try: 95 | while True: 96 | print(udpReceiver.get()) 97 | print() 98 | time.sleep(0.01) 99 | except KeyboardInterrupt: 100 | try: 101 | udpReceiver.lock.release() 102 | except: 103 | pass 104 | udpReceiver.stop() 105 | -------------------------------------------------------------------------------- /experiments/real_world/modules_teleop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/real_world/modules_teleop/__init__.py -------------------------------------------------------------------------------- /experiments/real_world/modules_teleop/recorder.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import os 3 | import time 4 | import numpy as np 5 | import cv2 6 | import open3d as o3d 7 | from threadpoolctl import threadpool_limits 8 | import multiprocess as mp 9 | import threading 10 | from threading import Lock 11 | 12 | from pgnd.utils import get_root 13 | root: Path = get_root(__file__) 14 | 15 | from camera.multi_realsense import MultiRealsense 16 | from camera.single_realsense import SingleRealsense 17 | 18 | 19 | class Recorder(mp.Process): 20 | 21 | def __init__( 22 | self, 23 | realsense: MultiRealsense | SingleRealsense, 24 | capture_fps, 25 | record_fps, 26 | record_time, 27 | process_func, 28 | exp_name=None, 29 | data_dir="data", 30 | verbose=False, 31 | ): 32 | super().__init__() 33 | self.verbose = verbose 34 | 35 | self.capture_fps = capture_fps 36 | self.record_fps = record_fps 37 | self.record_time = record_time 38 | self.exp_name = exp_name 39 | self.data_dir = data_dir 40 | 41 | if self.exp_name is None: 42 | assert self.record_fps == 0 43 | 44 | self.realsense = realsense 45 | self.recorder_q = mp.Queue(maxsize=1) 46 | 47 | self.process_func = process_func 48 | self.num_cam = len(realsense.cameras.keys()) 49 | self.alive = mp.Value('b', False) 50 | self.record_restart = mp.Value('b', False) 51 | self.record_stop = mp.Value('b', False) 52 | 53 | def log(self, msg): 54 | if self.verbose: 55 | print(f"\033[92m{self.name}: {msg}\033[0m") 56 | 57 | @property 58 | def can_record(self): 59 | return self.record_fps != 0 60 | 61 | def run(self): 62 | # limit threads 63 | threadpool_limits(1) 64 | cv2.setNumThreads(1) 65 | 66 | realsense = self.realsense 67 | 68 | capture_fps = self.capture_fps 69 | record_fps = self.record_fps 70 | record_time = self.record_time 71 | 72 | cameras_output = None 73 | recording_frame = float("inf") # local record step index (since current record start), record fps 74 | record_start_frame = 0 # global step index (since process start), capture fps 75 | is_recording = False # recording state flag 76 | timestamps_f = None 77 | 78 | while self.alive.value: 79 | try: 80 | cameras_output = realsense.get(out=cameras_output) 81 | get_time = time.time() 82 | timestamps = [cameras_output[i]['timestamp'].item() for i in range(self.num_cam)] 83 | if is_recording and not all([abs(timestamps[i] - timestamps[i+1]) < 0.05 for i in range(self.num_cam - 1)]): 84 | print(f"Captured at different timestamps: {[f'{x:.2f}' for x in timestamps]}") 85 | 86 | # treat captured time and record time as the same 87 | process_start_time = get_time 88 | process_out = self.process_func(cameras_output) if self.process_func is not None else cameras_output 89 | self.log(f"process time: {time.time() - process_start_time}") 90 | 91 | if not self.recorder_q.full(): 92 | self.recorder_q.put(process_out) 93 | 94 | if self.can_record: 95 | # recording state machine: 96 | # ---restart_cmd---> 97 | # record not_record 98 | # <-- stop_cmd / timeover ---- 99 | if not is_recording and self.record_restart.value == True: 100 | self.record_restart.value = False 101 | 102 | recording_frame = 0 103 | record_start_time = get_time 104 | record_start_frame = cameras_output[0]['step_idx'].item() 105 | 106 | record_dir = root / "log" / self.data_dir / self.exp_name / f"{record_start_time:.0f}" 107 | os.makedirs(record_dir, exist_ok=True) 108 | timestamps_f = open(f'{record_dir}/timestamps.txt', 'a') 109 | 110 | for i in range(self.num_cam): 111 | os.makedirs(f'{record_dir}/camera_{i}/rgb', exist_ok=True) 112 | os.makedirs(f'{record_dir}/camera_{i}/depth', exist_ok=True) 113 | is_recording = True 114 | 115 | elif is_recording and ( 116 | self.record_stop.value == True or 117 | (recording_frame >= record_time * record_fps) 118 | ): 119 | finish_time = get_time 120 | print(f"is_recording {is_recording}, self.record_stop.value {self.record_stop.value}, recording time {recording_frame}, max recording time {record_time} * {record_fps}") 121 | print(f"total time: {finish_time - record_start_time}") 122 | print(f"fps: {recording_frame / (finish_time - record_start_time)}") 123 | is_recording = False 124 | 125 | timestamps_f.close() 126 | self.record_restart.value = False 127 | self.record_stop.value = False 128 | else: 129 | self.record_restart.value = False 130 | self.record_stop.value = False 131 | 132 | # record the frame according to the record_fps 133 | if is_recording and cameras_output[0]['step_idx'].item() >= (recording_frame * (capture_fps // record_fps) + record_start_frame): 134 | timestamps_f.write(' '.join( 135 | [str(cameras_output[i]['step_idx'].item()) for i in range(self.num_cam)] + 136 | [str(np.round(cameras_output[i]['timestamp'].item() - record_start_time, 3)) for i in range(self.num_cam)] + 137 | [str(cameras_output[i]['timestamp'].item()) for i in range(self.num_cam)] 138 | ) + '\n') 139 | timestamps_f.flush() 140 | for i in range(self.num_cam): 141 | cv2.imwrite(f'{record_dir}/camera_{i}/rgb/{recording_frame:06}.jpg', cameras_output[i]['color']) 142 | cv2.imwrite(f'{record_dir}/camera_{i}/depth/{recording_frame:06}.png', cameras_output[i]['depth']) 143 | recording_frame += 1 144 | 145 | except BaseException as e: 146 | print("Recorder error: ", e) 147 | break 148 | 149 | if self.can_record: 150 | if timestamps_f is not None and not timestamps_f.closed: 151 | timestamps_f.close() 152 | finish_time = time.time() 153 | 154 | self.stop() 155 | print("Recorder process stopped") 156 | 157 | 158 | def start(self): 159 | self.alive.value = True 160 | super().start() 161 | 162 | def stop(self): 163 | self.alive.value = False 164 | self.recorder_q.close() 165 | 166 | def set_record_start(self): 167 | if self.record_fps == 0: 168 | print("record disabled because record_fps is 0") 169 | assert self.record_restart.value == False 170 | else: 171 | self.record_restart.value = True 172 | print("record restart cmd received") 173 | 174 | def set_record_stop(self): 175 | if self.record_fps == 0: 176 | print("record disabled because record_fps is 0") 177 | assert self.record_stop.value == False 178 | else: 179 | self.record_stop.value = True 180 | print("record stop cmd received") 181 | -------------------------------------------------------------------------------- /experiments/real_world/plan.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import argparse 3 | import random 4 | import time 5 | import os 6 | import matplotlib.pyplot as plt 7 | from collections import defaultdict 8 | from tqdm import tqdm, trange 9 | import hydra 10 | from omegaconf import DictConfig, OmegaConf 11 | import yaml 12 | from datetime import datetime 13 | import numpy as np 14 | from PIL import Image 15 | import warp as wp 16 | import matplotlib.pyplot as plt 17 | import multiprocess as mp 18 | import torch 19 | import torch.backends.cudnn 20 | import torch.nn as nn 21 | from torch.nn.utils import clip_grad_norm_ 22 | from torch.utils.data import DataLoader 23 | import logging 24 | 25 | from pgnd.utils import get_root, mkdir 26 | from modules_planning.planning_env import RobotPlanningEnv 27 | 28 | root: Path = get_root(__file__) 29 | logging.basicConfig(level=logging.WARNING) 30 | 31 | 32 | def main(args): 33 | mp.set_start_method('spawn') 34 | 35 | with open(root / args.config, 'r') as f: 36 | config = yaml.load(f, Loader=yaml.CLoader) 37 | cfg = OmegaConf.create(config) 38 | 39 | cfg.sim.num_steps = 1000 40 | cfg.sim.gripper_forcing = False 41 | cfg.sim.uniform = True 42 | 43 | iteration = args.iteration 44 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 45 | 46 | ckpt_path = (root / args.config).parent / 'ckpt' / f'{iteration:06d}.pt' 47 | seed = cfg.seed 48 | random.seed(seed) 49 | np.random.seed(seed) 50 | torch.manual_seed(seed) 51 | 52 | # path 53 | datetime_now = datetime.now().strftime('%y%m%d-%H%M%S') 54 | exp_root: Path = root / 'log' / cfg.train.name / 'plan' / datetime_now 55 | mkdir(exp_root, overwrite=cfg.overwrite, resume=cfg.resume) 56 | 57 | env = RobotPlanningEnv( 58 | cfg, 59 | exp_root=exp_root, 60 | ckpt_path=ckpt_path, 61 | resolution=(848, 480), 62 | capture_fps=30, 63 | record_fps=0, 64 | text_prompts=args.text_prompts, 65 | show_annotation=(not args.no_annotation), 66 | use_robot=True, 67 | bimanual=args.bimanual, 68 | gripper_enable=True, 69 | debug=True, 70 | construct_target=args.construct_target, 71 | ) 72 | 73 | env.start() 74 | env.join() 75 | 76 | 77 | if __name__ == '__main__': 78 | arg_parser = argparse.ArgumentParser() 79 | arg_parser.add_argument('--config', type=str, default='log/cloth/train/hydra.yaml') 80 | arg_parser.add_argument('--iteration', type=str, default=100000) 81 | arg_parser.add_argument('--text_prompts', type=str, default='green towel.') 82 | arg_parser.add_argument('--seed', type=int, default=42) 83 | arg_parser.add_argument('--no_annotation', action='store_true') 84 | arg_parser.add_argument('--bimanual', action='store_true') 85 | arg_parser.add_argument('--construct_target', action='store_true') 86 | args = arg_parser.parse_args() 87 | 88 | with torch.no_grad(): 89 | main(args) 90 | -------------------------------------------------------------------------------- /experiments/real_world/teleop.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import argparse 3 | import multiprocess as mp 4 | import sys 5 | sys.path.append(str(Path(__file__).parent.parent.parent)) 6 | 7 | import pgnd 8 | from pgnd.utils import get_root, mkdir 9 | root: Path = get_root(__file__) 10 | sys.path.append(str(root / "real_world")) 11 | 12 | from modules_teleop.robot_env_teleop import RobotTeleopEnv 13 | 14 | 15 | if __name__ == '__main__': 16 | mp.set_start_method('spawn') 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--name', type=str, default='') 20 | parser.add_argument('--bimanual', action='store_true') 21 | args = parser.parse_args() 22 | 23 | assert args.name != '', "Please provide a name for the experiment" 24 | 25 | env = RobotTeleopEnv( 26 | mode='3D', 27 | exp_name=args.name, 28 | resolution=(848, 480), 29 | capture_fps=30, 30 | record_fps=30, 31 | perception_process_func=None, 32 | use_robot=True, 33 | use_gello=True, 34 | bimanual=args.bimanual, 35 | gripper_enable=True, 36 | data_dir="data", 37 | debug=True, 38 | ) 39 | 40 | env.start() 41 | env.join() 42 | -------------------------------------------------------------------------------- /experiments/real_world/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/real_world/utils/__init__.py -------------------------------------------------------------------------------- /experiments/real_world/utils/env_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def get_bounding_box_bimanual(): 5 | return np.array([[0.0, 0.6], [-0.35, 0.45 + 0.75], [-0.65, 0.05]]) # the world frame robot workspace 6 | 7 | 8 | def get_bounding_box(): 9 | return np.array([[0.0, 0.6], [-0.35, 0.45], [-0.65, 0.05]]) # the world frame robot workspace 10 | -------------------------------------------------------------------------------- /experiments/real_world/utils/pcd_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import open3d as o3d 3 | 4 | 5 | def filter_tabletop_points(pcd): 6 | # RANSAC to find table plane 7 | plane_model, inliers = pcd.segment_plane(distance_threshold=0.01, ransac_n=3, num_iterations=1000) 8 | [a, b, c, d] = plane_model 9 | table_plane = np.array([a, b, c, d]) 10 | 11 | # remove points on the table 12 | pcd = pcd.select_by_index(inliers, invert=True) 13 | 14 | outliers = None 15 | new_outlier = None 16 | rm_iter = 0 17 | while new_outlier is None or len(new_outlier.points) > 0: 18 | _, inlier_idx = pcd.remove_statistical_outlier( 19 | nb_neighbors = 20, std_ratio = 1.5 + rm_iter * 0.5 20 | ) 21 | new_pcd = pcd.select_by_index(inlier_idx) 22 | new_outlier = pcd.select_by_index(inlier_idx, invert=True) 23 | if outliers is None: 24 | outliers = new_outlier 25 | else: 26 | outliers += new_outlier 27 | pcd = new_pcd 28 | rm_iter += 1 29 | 30 | return pcd 31 | 32 | 33 | def get_tabletop_points(rgb_list, depth_list, R_list, t_list, intr_list, bbox, depth_threshold=[0, 2]): 34 | # increase if out of memory 35 | stride = 1 36 | 37 | pcd_all = o3d.geometry.PointCloud() 38 | 39 | for i in range(len(rgb_list)): 40 | intr = intr_list[i] 41 | R_cam2board = R_list[i] 42 | t_cam2board = t_list[i] 43 | 44 | depth = depth_list[i].copy().astype(np.float32) 45 | 46 | points = depth2fgpcd(depth, intr) 47 | points = points.reshape(depth.shape[0], depth.shape[1], 3) 48 | points = points[::stride, ::stride, :].reshape(-1, 3) 49 | 50 | mask = np.logical_and( 51 | (depth > depth_threshold[0]), (depth < depth_threshold[1]) 52 | ) # (H, W) 53 | mask = mask[::stride, ::stride].reshape(-1) 54 | 55 | img = rgb_list[i].copy() 56 | 57 | points = points[mask].reshape(-1, 3) 58 | 59 | points = R_cam2board @ points.T + t_cam2board[:, None] 60 | points = points.T # (N, 3) 61 | 62 | pcd = o3d.geometry.PointCloud() 63 | pcd.points = o3d.utility.Vector3dVector(points) 64 | 65 | colors = img[::stride, ::stride, :].reshape(-1, 3).astype(np.float64) 66 | colors = colors[mask].reshape(-1, 3) 67 | colors = colors[:, ::-1].copy() 68 | pcd.colors = o3d.utility.Vector3dVector(colors / 255) 69 | pcd_all += pcd 70 | 71 | pcd = pcd_all 72 | pcd = pcd.crop(o3d.geometry.AxisAlignedBoundingBox(min_bound=bbox[:, 0], max_bound=bbox[:, 1])) 73 | pcd = pcd.voxel_down_sample(voxel_size=0.001) 74 | 75 | # NOTE: simple filtering instead of instance-specific segmentation processing 76 | pcd = filter_tabletop_points(pcd) 77 | return pcd 78 | 79 | 80 | def rpy_to_rotation_matrix(roll, pitch, yaw): 81 | # Assume the input in in degree 82 | roll = roll / 180 * np.pi 83 | pitch = pitch / 180 * np.pi 84 | yaw = yaw / 180 * np.pi 85 | # Define the rotation matrices 86 | Rx = np.array([[1, 0, 0], [0, np.cos(roll), -np.sin(roll)], [0, np.sin(roll), np.cos(roll)]]) 87 | Ry = np.array([[np.cos(pitch), 0, np.sin(pitch)], [0, 1, 0], [-np.sin(pitch), 0, np.cos(pitch)]]) 88 | Rz = np.array([[np.cos(yaw), -np.sin(yaw), 0], [np.sin(yaw), np.cos(yaw), 0], [0, 0, 1]]) 89 | # Combine the rotations 90 | R = Rz @ Ry @ Rx 91 | return R 92 | 93 | 94 | def rotation_matrix_to_rpy(rotation_matrix): 95 | # Get the roll, pitch, yaw in radian 96 | roll = np.arctan2(rotation_matrix[2, 1], rotation_matrix[2, 2]) 97 | pitch = np.arctan2(-rotation_matrix[2, 0], np.sqrt(rotation_matrix[2, 1] ** 2 + rotation_matrix[2, 2] ** 2)) 98 | yaw = np.arctan2(rotation_matrix[1, 0], rotation_matrix[0, 0]) 99 | # Get the roll, pitch, yaw in degree 100 | roll = roll / np.pi * 180 101 | pitch = pitch / np.pi * 180 102 | yaw = yaw / np.pi * 180 103 | return roll, pitch, yaw 104 | 105 | 106 | def depth2fgpcd(depth, intrinsic_matrix): 107 | H, W = depth.shape 108 | x, y = np.meshgrid(np.arange(W), np.arange(H)) 109 | x = x.reshape(-1) 110 | y = y.reshape(-1) 111 | depth = depth.reshape(-1) 112 | points = np.stack([x, y, np.ones_like(x)], axis=1) 113 | points = points * depth[:, None] 114 | points = points @ np.linalg.inv(intrinsic_matrix).T 115 | return points 116 | 117 | 118 | def similarity_transform(from_points, to_points): 119 | 120 | assert len(from_points.shape) == 2, \ 121 | "from_points must be a m x n array" 122 | assert from_points.shape == to_points.shape, \ 123 | "from_points and to_points must have the same shape" 124 | 125 | N, m = from_points.shape 126 | 127 | mean_from = from_points.mean(axis = 0) 128 | mean_to = to_points.mean(axis = 0) 129 | 130 | delta_from = from_points - mean_from # N x m 131 | delta_to = to_points - mean_to # N x m 132 | 133 | sigma_from = (delta_from * delta_from).sum(axis = 1).mean() 134 | sigma_to = (delta_to * delta_to).sum(axis = 1).mean() 135 | 136 | cov_matrix = delta_to.T.dot(delta_from) / N 137 | 138 | U, d, V_t = np.linalg.svd(cov_matrix, full_matrices = True) 139 | cov_rank = np.linalg.matrix_rank(cov_matrix) 140 | S = np.eye(m) 141 | 142 | if cov_rank >= m - 1: # and np.linalg.det(cov_matrix) < 0: # TODO touch calibration 143 | S[m-1, m-1] = -1 144 | elif cov_rank < m-1: 145 | raise ValueError("colinearility detected in covariance matrix:\n{}".format(cov_matrix)) 146 | 147 | R = U.dot(S).dot(V_t) 148 | c = (d * S.diagonal()).sum() / sigma_from 149 | t = mean_to - c*R.dot(mean_from) 150 | 151 | return c, R, t 152 | 153 | 154 | def visualize_o3d(geometries): 155 | frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.2, origin=[0, 0, 0]) 156 | geometries.append(frame) 157 | 158 | pcd = o3d.geometry.PointCloud() 159 | pcd.points = o3d.utility.Vector3dVector() 160 | viewer = o3d.visualization.Visualizer() 161 | viewer.create_window() 162 | for geometry in geometries: 163 | viewer.add_geometry(geometry) 164 | opt = viewer.get_render_option() 165 | opt.background_color = np.asarray([1., 1., 1.]) 166 | viewer.run() 167 | -------------------------------------------------------------------------------- /experiments/real_world/utils/planning_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | import math 6 | 7 | 8 | def chamfer(x, y): # x: (B, N, D), y: (B, M, D) 9 | x = x[:, None].repeat(1, y.shape[1], 1, 1) # (B, M, N, D) 10 | y = y[:, :, None].repeat(1, 1, x.shape[2], 1) # (B, M, N, D) 11 | dis = torch.norm(x - y, 2, dim=-1) # (B, M, N) 12 | dis_xy = torch.mean(dis.min(dim=2).values, dim=1) # dis_xy: mean over N 13 | dis_yx = torch.mean(dis.min(dim=1).values, dim=1) # dis_yx: mean over M 14 | return dis_xy + dis_yx 15 | 16 | 17 | def batch_chamfer_dist(xyz, xyz_gt): 18 | # xyz: (B, N, 3) 19 | # xyz_gt: (M, 3) 20 | 21 | # mean aligning 22 | # chamfer = (xyz.mean(dim=1) - xyz_gt.mean(dim=0)).norm(dim=1) # (B,) 23 | 24 | # chamfer distance 25 | xyz_gt = xyz_gt[None] # (1, M, 3) 26 | dist1 = torch.sqrt(torch.sum((xyz[:, :, None] - xyz_gt[:, None]) ** 2, dim=3)) # (B, N, M) 27 | dist2 = torch.sqrt(torch.sum((xyz_gt[:, None] - xyz[:, :, None]) ** 2, dim=3)) # (B, M, N) 28 | chamfer = torch.mean(torch.min(dist1, dim=1).values, dim=1) + torch.mean(torch.min(dist2, dim=1).values, dim=1) # (B,) 29 | return chamfer 30 | 31 | 32 | def angle_normalize(x): 33 | return (((x + math.pi) % (2 * math.pi)) - math.pi) 34 | 35 | 36 | def clip_actions(action, action_lower_lim, action_upper_lim): 37 | action_new = action.clone() 38 | # action_new[..., 2] = angle_normalize(action[..., 2]) 39 | action_new.data.clamp_(action_lower_lim, action_upper_lim) 40 | return action_new 41 | -------------------------------------------------------------------------------- /experiments/real_world/utils/render_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import time 4 | import kornia 5 | 6 | 7 | def interpolate_motions(bones, motions, relations, xyz, rot=None, quat=None, weights=None, device='cuda', step='n/a'): 8 | # bones: (n_bones, 3) 9 | # motions: (n_bones, 3) 10 | # relations: (n_bones, k) 11 | # indices: (n_bones,) 12 | # xyz: (n_particles, 3) 13 | # rot: (n_particles, 3, 3) 14 | # quat: (n_particles, 4) 15 | # weights: (n_particles, n_bones) 16 | 17 | t0 = time.time() 18 | n_bones, _ = bones.shape 19 | n_particles, _ = xyz.shape 20 | 21 | # Compute the bone transformations 22 | bone_transforms = torch.zeros((n_bones, 4, 4), device=device) 23 | 24 | n_adj = relations.shape[1] 25 | 26 | adj_bones = bones[relations] - bones[:, None] # (n_bones, n_adj, 3) 27 | adj_bones_new = (bones[relations] + motions[relations]) - (bones[:, None] + motions[:, None]) # (n_bones, n_adj, 3) 28 | 29 | W = torch.eye(n_adj, device=device)[None].repeat(n_bones, 1, 1) # (n_bones, n_adj, n_adj) 30 | 31 | # fit a transformation 32 | F = adj_bones_new.permute(0, 2, 1) @ W @ adj_bones # (n_bones, 3, 3) 33 | 34 | cov_rank = torch.linalg.matrix_rank(F) # (n_bones,) 35 | 36 | cov_rank_3_mask = cov_rank == 3 # (n_bones,) 37 | cov_rank_2_mask = cov_rank == 2 # (n_bones,) 38 | cov_rank_1_mask = cov_rank == 1 # (n_bones,) 39 | 40 | F_2_3 = F[cov_rank_2_mask | cov_rank_3_mask] # (n_bones, 3, 3) 41 | F_1 = F[cov_rank_1_mask] # (n_bones, 3, 3) 42 | 43 | # 2 or 3 44 | try: 45 | U, S, V = torch.svd(F_2_3) # S: (n_bones, 3) 46 | S = torch.eye(3, device=device, dtype=torch.float32)[None].repeat(F_2_3.shape[0], 1, 1) 47 | neg_det_mask = torch.linalg.det(F_2_3) < 0 48 | if neg_det_mask.sum() > 0: 49 | print(f'[step {step}] F det < 0 for {neg_det_mask.sum()} bones') 50 | S[neg_det_mask, -1, -1] = -1 51 | R = U @ S @ V.permute(0, 2, 1) 52 | except: 53 | print(f'[step {step}] SVD failed') 54 | import ipdb; ipdb.set_trace() 55 | 56 | neg_1_det_mask = torch.abs(torch.linalg.det(R) + 1) < 1e-3 57 | pos_1_det_mask = torch.abs(torch.linalg.det(R) - 1) < 1e-3 58 | bad_det_mask = ~(neg_1_det_mask | pos_1_det_mask) 59 | 60 | if neg_1_det_mask.sum() > 0: 61 | print(f'[step {step}] det -1') 62 | S[neg_1_det_mask, -1, -1] *= -1 63 | R = U @ S @ V.permute(0, 2, 1) 64 | 65 | try: 66 | assert bad_det_mask.sum() == 0 67 | except: 68 | print(f'[step {step}] Bad det') 69 | import ipdb; ipdb.set_trace() 70 | 71 | try: 72 | if cov_rank_1_mask.sum() > 0: 73 | print(f'[step {step}] F rank 1 for {cov_rank_1_mask.sum()} bones') 74 | U, S, V = torch.svd(F_1) # S: (n_bones', 3) 75 | assert torch.allclose(S[:, 1:], torch.zeros_like(S[:, 1:])) 76 | x = torch.tensor([1., 0., 0.], device=device, dtype=torch.float32)[None].repeat(F_1.shape[0], 1) # (n_bones', 3) 77 | axis = U[:, :, 0] # (n_bones', 3) 78 | perp_axis = torch.linalg.cross(axis, x) # (n_bones', 3) 79 | 80 | perp_axis_norm_mask = torch.norm(perp_axis, dim=1) < 1e-6 81 | 82 | R = torch.zeros((F_1.shape[0], 3, 3), device=device, dtype=torch.float32) 83 | if perp_axis_norm_mask.sum() > 0: 84 | print(f'[step {step}] Perp axis norm 0 for {perp_axis_norm_mask.sum()} bones') 85 | R[perp_axis_norm_mask] = torch.eye(3, device=device, dtype=torch.float32)[None].repeat(perp_axis_norm_mask.sum(), 1, 1) 86 | 87 | perp_axis = perp_axis[~perp_axis_norm_mask] # (n_bones', 3) 88 | x = x[~perp_axis_norm_mask] # (n_bones', 3) 89 | 90 | perp_axis = perp_axis / torch.norm(perp_axis, dim=1, keepdim=True) # (n_bones', 3) 91 | third_axis = torch.linalg.cross(x, perp_axis) # (n_bones', 3) 92 | assert ((torch.norm(third_axis, dim=1) - 1).abs() < 1e-6).all() 93 | third_axis_after = torch.linalg.cross(axis, perp_axis) # (n_bones', 3) 94 | 95 | X = torch.stack([x, perp_axis, third_axis], dim=-1) 96 | Y = torch.stack([axis, perp_axis, third_axis_after], dim=-1) 97 | R[~perp_axis_norm_mask] = Y @ X.permute(0, 2, 1) 98 | except: 99 | R = torch.zeros((F_1.shape[0], 3, 3), device=device, dtype=torch.float32) 100 | R[:, 0, 0] = 1 101 | R[:, 1, 1] = 1 102 | R[:, 2, 2] = 1 103 | 104 | try: 105 | bone_transforms[:, :3, :3] = R 106 | except: 107 | print(f'[step {step}] Bad R') 108 | bone_transforms[:, 0, 0] = 1 109 | bone_transforms[:, 1, 1] = 1 110 | bone_transforms[:, 2, 2] = 1 111 | bone_transforms[:, :3, 3] = motions 112 | 113 | # Compute the weights 114 | if weights is None: 115 | weights = torch.ones((n_particles, n_bones), device=device) 116 | dist = torch.cdist(xyz[None], bones[None])[0] # (n_particles, n_bones) 117 | dist = torch.clamp(dist, min=1e-4) 118 | weights = 1 / dist 119 | weights = weights / weights.sum(dim=1, keepdim=True) # (n_particles, n_bones) 120 | 121 | # Compute the transformed particles 122 | xyz_transformed = torch.zeros((n_particles, n_bones, 3), device=device) 123 | 124 | xyz_transformed = xyz[:, None] - bones[None] # (n_particles, n_bones, 3) 125 | xyz_transformed = torch.einsum('ijk,jkl->ijl', xyz_transformed, bone_transforms[:, :3, :3].permute(0, 2, 1)) # (n_particles, n_bones, 3) 126 | xyz_transformed = xyz_transformed + bone_transforms[:, :3, 3][None] + bones[None] # (n_particles, n_bones, 3) 127 | xyz_transformed = (xyz_transformed * weights[:, :, None]).sum(dim=1) # (n_particles, 3) 128 | 129 | def quaternion_multiply(q1, q2): 130 | # q1: bsz x 4 131 | # q2: bsz x 4 132 | q = torch.zeros_like(q1) 133 | q[:, 0] = q1[:, 0] * q2[:, 0] - q1[:, 1] * q2[:, 1] - q1[:, 2] * q2[:, 2] - q1[:, 3] * q2[:, 3] 134 | q[:, 1] = q1[:, 0] * q2[:, 1] + q1[:, 1] * q2[:, 0] + q1[:, 2] * q2[:, 3] - q1[:, 3] * q2[:, 2] 135 | q[:, 2] = q1[:, 0] * q2[:, 2] - q1[:, 1] * q2[:, 3] + q1[:, 2] * q2[:, 0] + q1[:, 3] * q2[:, 1] 136 | q[:, 3] = q1[:, 0] * q2[:, 3] + q1[:, 1] * q2[:, 2] - q1[:, 2] * q2[:, 1] + q1[:, 3] * q2[:, 0] 137 | return q 138 | 139 | if quat is not None: 140 | base_quats = kornia.geometry.conversions.rotation_matrix_to_quaternion(bone_transforms[:, :3, :3]) # (n_bones, 4) 141 | base_quats = torch.nn.functional.normalize(base_quats, dim=-1) # (n_particles, 4) 142 | quats = (base_quats[None] * weights[:, :, None]).sum(dim=1) # (n_particles, 4) 143 | quats = torch.nn.functional.normalize(quats, dim=-1) 144 | rot = quaternion_multiply(quats, quat) 145 | 146 | # xyz_transformed: (n_particles, 3) 147 | # rot: (n_particles, 3, 3) / (n_particles, 4) 148 | # weights: (n_particles, n_bones) 149 | return xyz_transformed, rot, weights -------------------------------------------------------------------------------- /experiments/real_world/utils/track_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def sample_points_from_masks(masks, num_points): 5 | """ 6 | sample points from masks and return its absolute coordinates 7 | 8 | Args: 9 | masks: np.array with shape (n, h, w) 10 | num_points: int 11 | 12 | Returns: 13 | points: np.array with shape (n, points, 2) 14 | """ 15 | n, h, w = masks.shape 16 | points = [] 17 | 18 | for i in range(n): 19 | # find the valid mask points 20 | indices = np.argwhere(masks[i] == 1) 21 | # the output format of np.argwhere is (y, x) and the shape is (num_points, 2) 22 | # we should convert it to (x, y) 23 | indices = indices[:, ::-1] # (num_points, [y x]) to (num_points, [x y]) 24 | 25 | # import pdb; pdb.set_trace() 26 | if len(indices) == 0: 27 | # if there are no valid points, append an empty array 28 | points.append(np.array([])) 29 | continue 30 | 31 | # resampling if there's not enough points 32 | if len(indices) < num_points: 33 | sampled_indices = np.random.choice(len(indices), num_points, replace=True) 34 | else: 35 | sampled_indices = np.random.choice(len(indices), num_points, replace=False) 36 | 37 | sampled_points = indices[sampled_indices] 38 | points.append(sampled_points) 39 | 40 | # convert to np.array 41 | points = np.array(points, dtype=np.float32) 42 | return points -------------------------------------------------------------------------------- /experiments/scripts/train_box.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python experiments/train/train_eval.py \ 2 | 'gpus=[0]' \ 3 | overwrite=False \ 4 | resume=True \ 5 | debug=True \ 6 | train.name=box/train \ 7 | train.source_dataset_name=data/box_merged/sub_episodes_v \ 8 | train.dataset_name=box/dataset \ 9 | sim.num_grippers=2 \ 10 | sim.gripper_points=True \ 11 | sim.gripper_forcing=False \ 12 | model.material.radius=0.0 \ 13 | train.dataset_load_skip_frame=3 \ 14 | train.dataset_skip_frame=1 \ 15 | train.num_workers=8 \ 16 | train.batch_size=32 \ 17 | train.training_start_episode=0 \ 18 | train.training_end_episode=303 \ 19 | train.eval_start_episode=303 \ 20 | train.eval_end_episode=323 \ 21 | train.num_iterations=100000 \ 22 | train.iteration_eval_interval=1000 \ 23 | train.iteration_log_interval=10 \ 24 | train.iteration_save_interval=1000 25 | -------------------------------------------------------------------------------- /experiments/scripts/train_bread.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python experiments/train/train_eval.py \ 2 | 'gpus=[0]' \ 3 | overwrite=False \ 4 | resume=True \ 5 | debug=True \ 6 | train.name=bread/train \ 7 | train.source_dataset_name=data/bread_merged/sub_episodes_v \ 8 | train.dataset_name=bread/dataset \ 9 | sim.preprocess_scale=3.0 \ 10 | sim.num_grippers=2 \ 11 | sim.n_particles=500 \ 12 | 'sim.num_grids=[100, 100, 100, 0.01]' \ 13 | model.gripper_radius=0.12 \ 14 | model.material.radius=0.0 \ 15 | model.material.pe_num_func_res=4 \ 16 | train.dataset_load_skip_frame=3 \ 17 | train.dataset_skip_frame=1 \ 18 | train.num_workers=8 \ 19 | train.batch_size=16 \ 20 | train.training_start_episode=0 \ 21 | train.training_end_episode=143 \ 22 | train.eval_start_episode=143 \ 23 | train.eval_end_episode=163 \ 24 | train.num_iterations=100000 \ 25 | train.iteration_eval_interval=1000 \ 26 | train.iteration_log_interval=10 \ 27 | train.iteration_save_interval=1000 28 | -------------------------------------------------------------------------------- /experiments/scripts/train_cloth.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python experiments/train/train_eval.py \ 2 | 'gpus=[0]' \ 3 | overwrite=False \ 4 | resume=True \ 5 | debug=True \ 6 | train.name=cloth/train \ 7 | train.source_dataset_name=data/cloth_merged/sub_episodes_v \ 8 | train.dataset_name=cloth/dataset \ 9 | sim.preprocess_scale=0.8 \ 10 | sim.num_grippers=2 \ 11 | train.dataset_load_skip_frame=3 \ 12 | train.dataset_skip_frame=1 \ 13 | train.num_workers=8 \ 14 | train.batch_size=32 \ 15 | train.training_start_episode=0 \ 16 | train.training_end_episode=610 \ 17 | train.eval_start_episode=610 \ 18 | train.eval_end_episode=650 \ 19 | train.num_iterations=100000 \ 20 | train.iteration_eval_interval=1000 \ 21 | train.iteration_log_interval=10 \ 22 | train.iteration_save_interval=1000 23 | -------------------------------------------------------------------------------- /experiments/scripts/train_paperbag.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python experiments/train/train_eval.py \ 2 | 'gpus=[0]' \ 3 | overwrite=False \ 4 | resume=True \ 5 | debug=True \ 6 | train.name=paperbag/train \ 7 | train.source_dataset_name=data/paperbag_merged/sub_episodes_v \ 8 | train.dataset_name=paperbag/dataset \ 9 | train.dataset_load_skip_frame=3 \ 10 | train.dataset_skip_frame=1 \ 11 | train.num_workers=8 \ 12 | train.batch_size=32 \ 13 | train.training_start_episode=0 \ 14 | train.training_end_episode=200 \ 15 | train.eval_start_episode=200 \ 16 | train.eval_end_episode=220 \ 17 | train.num_iterations=100000 \ 18 | train.iteration_eval_interval=1000 \ 19 | train.iteration_log_interval=10 \ 20 | train.iteration_save_interval=1000 21 | -------------------------------------------------------------------------------- /experiments/scripts/train_rope.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python experiments/train/train_eval.py \ 2 | 'gpus=[0]' \ 3 | overwrite=False \ 4 | resume=True \ 5 | debug=True \ 6 | train.name=rope/train \ 7 | train.source_dataset_name=data/rope_merged/sub_episodes_v \ 8 | train.dataset_name=rope/dataset \ 9 | train.dataset_load_skip_frame=3 \ 10 | train.dataset_skip_frame=1 \ 11 | train.num_workers=8 \ 12 | train.batch_size=32 \ 13 | train.training_start_episode=0 \ 14 | train.training_end_episode=651 \ 15 | train.eval_start_episode=651 \ 16 | train.eval_end_episode=691 \ 17 | train.num_iterations=100000 \ 18 | train.iteration_eval_interval=1000 \ 19 | train.iteration_log_interval=10 \ 20 | train.iteration_save_interval=1000 21 | -------------------------------------------------------------------------------- /experiments/scripts/train_sloth.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python experiments/train/train_eval.py \ 2 | 'gpus=[0]' \ 3 | overwrite=False \ 4 | resume=True \ 5 | debug=True \ 6 | train.name=sloth/train \ 7 | train.source_dataset_name=data/sloth_merged/sub_episodes_v \ 8 | train.dataset_name=sloth/dataset \ 9 | train.dataset_load_skip_frame=3 \ 10 | train.dataset_skip_frame=1 \ 11 | train.num_workers=8 \ 12 | train.batch_size=32 \ 13 | train.training_start_episode=0 \ 14 | train.training_end_episode=113 \ 15 | train.eval_start_episode=113 \ 16 | train.eval_end_episode=133 \ 17 | train.num_iterations=100000 \ 18 | train.iteration_eval_interval=1000 \ 19 | train.iteration_log_interval=10 \ 20 | train.iteration_save_interval=1000 21 | -------------------------------------------------------------------------------- /experiments/train/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/train/__init__.py -------------------------------------------------------------------------------- /experiments/train/pv_dataset.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import random 3 | from tqdm import tqdm, trange 4 | 5 | import hydra 6 | from omegaconf import DictConfig, OmegaConf 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | import torch.backends.cudnn 11 | import math 12 | import pyvista as pv 13 | from dgl.geometry import farthest_point_sampler 14 | 15 | from pgnd.utils import get_root, mkdir 16 | from pgnd.ffmpeg import make_video 17 | 18 | from train.pv_utils import Xvfb, get_camera_custom 19 | 20 | 21 | def fps(x, n, random_start=False): 22 | start_idx = random.randint(0, x.shape[0] - 1) if random_start else 0 23 | fps_idx = farthest_point_sampler(x[None], n, start_idx=start_idx)[0] 24 | fps_idx = fps_idx.to(x.device) 25 | return fps_idx 26 | 27 | 28 | @torch.no_grad() 29 | def render( 30 | cfg, 31 | dataset_root, 32 | episode_names, 33 | iteration=None, 34 | start_step=None, 35 | end_step=None, 36 | save_dir=None, 37 | downsample_indices=None, 38 | clean_bg=False, 39 | ): 40 | render_type = 'pv' 41 | 42 | exp_root: Path = dataset_root 43 | state_root: Path = exp_root / 'state' 44 | 45 | video_path_list = [] 46 | for episode_idx, episode in enumerate(episode_names): 47 | 48 | plotter = pv.Plotter(lighting='three lights', off_screen=True, window_size=(cfg.render.width, cfg.render.height)) 49 | plotter.set_background('white') 50 | plotter.camera_position = get_camera_custom(cfg.render.center, cfg.render.distance, cfg.render.azimuth, cfg.render.elevation) 51 | plotter.enable_shadows() 52 | 53 | # add bounding box 54 | scale_x = cfg.sim.num_grids[0] / (cfg.sim.num_grids[0] - 2 * cfg.render.bound) 55 | scale_y = cfg.sim.num_grids[1] / (cfg.sim.num_grids[1] - 2 * cfg.render.bound) 56 | scale_z = cfg.sim.num_grids[2] / (cfg.sim.num_grids[2] - 2 * cfg.render.bound) 57 | scale = np.array([scale_x, scale_y, scale_z]) 58 | scale_mean = np.power(np.prod(scale), 1 / 3) 59 | bbox = pv.Box(bounds=[0, 1, 0, 1, 0, 1]) 60 | if not clean_bg: 61 | plotter.add_mesh(bbox, style='wireframe', color='black') 62 | 63 | # add axis 64 | if not clean_bg: 65 | for axis, color in enumerate(['r', 'g', 'b']): 66 | mesh = pv.Arrow(start=[0, 0, 0], direction=np.eye(3)[axis], scale=0.2) 67 | plotter.add_mesh(mesh, color=color, show_scalar_bar=False) 68 | 69 | episode_state_root = state_root / episode 70 | 71 | episode_image_root = save_dir / f'{episode}_gt' 72 | mkdir(episode_image_root, overwrite=True, resume=True) 73 | 74 | ckpt_paths = list(sorted(episode_state_root.glob('*.pt'), key=lambda x: int(x.stem))) 75 | if start_step is not None and end_step is not None: 76 | ckpt_paths = ckpt_paths[start_step:end_step] 77 | skip_frame = cfg.train.dataset_skip_frame * cfg.train.dataset_load_skip_frame 78 | ckpt_paths = ckpt_paths[skip_frame::skip_frame] 79 | for i, path in enumerate(tqdm(ckpt_paths, desc=render_type)): 80 | 81 | if i % cfg.render.skip_frame != 0: 82 | continue 83 | 84 | ckpt = torch.load(path, map_location='cpu') 85 | p_x = ckpt['x'].cpu().detach().numpy() 86 | if downsample_indices is not None: 87 | p_x = p_x[downsample_indices[0]] 88 | else: 89 | downsample_indices = fps(torch.from_numpy(p_x), cfg.sim.n_particles, random_start=True)[None] 90 | p_x = p_x[downsample_indices[0]] 91 | 92 | x = (p_x - 0.5) * scale + 0.5 93 | 94 | grippers = ckpt['grippers'].cpu().detach().numpy() 95 | n_eef = grippers.shape[0] 96 | 97 | radius = 0.5 * np.power((0.5 ** 3) / x.shape[0], 1 / 3) * scale_mean 98 | x = np.clip(x, radius, 1 - radius) 99 | 100 | polydata = pv.PolyData(x) 101 | plotter.add_mesh(polydata, style='points', name='object', render_points_as_spheres=True, point_size=radius * cfg.render.radius_scale, color=list(cfg.render.reflectance)) 102 | for j in range(n_eef): 103 | gripper = pv.Sphere(center=grippers[j, :3], radius=grippers[j, -2]) 104 | plotter.add_mesh(gripper, color='blue', name=f'gripper_{j}') 105 | 106 | plotter.show(auto_close=False, screenshot=str(episode_image_root / f'{i // cfg.render.skip_frame:04d}.png')) 107 | 108 | plotter.close() 109 | if save_dir is not None: 110 | make_video(episode_image_root, save_dir / f'{episode}_gt.mp4', '%04d.png', cfg.render.fps) 111 | video_path_list.append(save_dir / f'{episode}_gt.mp4') 112 | 113 | return video_path_list 114 | 115 | 116 | @torch.no_grad() 117 | def do_dataset_pv(*args, **kwargs): 118 | with Xvfb(): 119 | ret = render(*args, **kwargs) 120 | return ret -------------------------------------------------------------------------------- /experiments/train/pv_train.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import random 3 | from tqdm import tqdm, trange 4 | 5 | import hydra 6 | from omegaconf import DictConfig, OmegaConf 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | import torch.backends.cudnn 11 | import math 12 | import pyvista as pv 13 | 14 | from pgnd.utils import get_root, mkdir 15 | from pgnd.ffmpeg import make_video 16 | 17 | from train.pv_utils import Xvfb, get_camera_custom 18 | 19 | 20 | @torch.no_grad() 21 | def render( 22 | cfg, 23 | log_root, 24 | iteration, 25 | episode_names, 26 | eval_dirname='eval', 27 | eval_postfix='', 28 | dataset_name='', 29 | start_step=None, 30 | end_step=None, 31 | clean_bg=False, 32 | ): 33 | clean_bg = False 34 | 35 | if dataset_name == '': 36 | eval_name = f'{cfg.train.name}/{eval_dirname}/{iteration:06d}' 37 | else: 38 | eval_name = f'{cfg.train.name}/{eval_dirname}/{dataset_name}/{iteration:06d}' 39 | render_type = 'pv' 40 | 41 | exp_root: Path = log_root / eval_name 42 | state_root: Path = exp_root / 'state' 43 | image_root: Path = exp_root / render_type 44 | mkdir(image_root, overwrite=cfg.overwrite, resume=cfg.resume) 45 | 46 | 47 | video_path_list = [] 48 | for episode_idx, episode in enumerate(episode_names): 49 | 50 | plotter = pv.Plotter(lighting='three lights', off_screen=True, window_size=(cfg.render.width, cfg.render.height)) 51 | plotter.set_background('white') 52 | plotter.camera_position = get_camera_custom(cfg.render.center, cfg.render.distance, cfg.render.azimuth, cfg.render.elevation) 53 | plotter.enable_shadows() 54 | 55 | # add bounding box 56 | scale_x = cfg.sim.num_grids[0] / (cfg.sim.num_grids[0] - 2 * cfg.render.bound) 57 | scale_y = cfg.sim.num_grids[1] / (cfg.sim.num_grids[1] - 2 * cfg.render.bound) 58 | scale_z = cfg.sim.num_grids[2] / (cfg.sim.num_grids[2] - 2 * cfg.render.bound) 59 | scale = np.array([scale_x, scale_y, scale_z]) 60 | scale_mean = np.power(np.prod(scale), 1 / 3) 61 | bbox = pv.Box(bounds=[0, 1, 0, 1, 0, 1]) 62 | if not clean_bg: 63 | plotter.add_mesh(bbox, style='wireframe', color='black') 64 | 65 | # add axis 66 | if not clean_bg: 67 | for axis, color in enumerate(['r', 'g', 'b']): 68 | mesh = pv.Arrow(start=[0, 0, 0], direction=np.eye(3)[axis], scale=0.2) 69 | plotter.add_mesh(mesh, color=color, show_scalar_bar=False) 70 | 71 | episode_state_root = state_root / episode 72 | episode_image_root = image_root / episode 73 | mkdir(episode_image_root, overwrite=cfg.overwrite, resume=cfg.resume) 74 | 75 | ckpt_paths = list(sorted(episode_state_root.glob('*.pt'), key=lambda x: int(x.stem))) 76 | for i, path in enumerate(tqdm(ckpt_paths, desc=render_type)): 77 | 78 | if i % cfg.render.skip_frame != 0: 79 | continue 80 | 81 | ckpt = torch.load(path, map_location='cpu') 82 | p_x = ckpt['x'].cpu().detach().numpy() 83 | 84 | x = (p_x - 0.5) * scale + 0.5 85 | 86 | grippers = ckpt['grippers'].cpu().detach().numpy() 87 | n_eef = grippers.shape[0] 88 | 89 | radius = 0.5 * np.power((0.5 ** 3) / x.shape[0], 1 / 3) * scale_mean 90 | x = np.clip(x, radius, 1 - radius) 91 | 92 | polydata = pv.PolyData(x) 93 | plotter.add_mesh(polydata, style='points', name='object', render_points_as_spheres=True, point_size=radius * cfg.render.radius_scale, color=list(cfg.render.reflectance)) 94 | for j in range(n_eef): 95 | gripper = pv.Sphere(center=grippers[j, :3], radius=0.04) 96 | plotter.add_mesh(gripper, color='blue', name=f'gripper_{j}') 97 | if 'gripper_x' in ckpt: 98 | gripper_points = ckpt['gripper_x'].cpu().detach().numpy() 99 | gripper_points = (gripper_points - 0.5) * scale + 0.5 100 | gripper_points = np.clip(gripper_points, radius, 1 - radius) 101 | gripper_polydata = pv.PolyData(gripper_points) 102 | plotter.add_mesh(gripper_polydata, style='points', name=f'gripper_points', render_points_as_spheres=True, point_size=radius * cfg.render.radius_scale, color='blue') 103 | 104 | plotter.show(auto_close=False, screenshot=str(episode_image_root / f'{i // cfg.render.skip_frame:04d}.png')) 105 | 106 | plotter.close() 107 | make_video(episode_image_root, image_root / f'{episode}{eval_postfix}.mp4', '%04d.png', cfg.render.fps) 108 | video_path_list.append(image_root / f'{episode}{eval_postfix}.mp4') 109 | 110 | return video_path_list 111 | 112 | 113 | @torch.no_grad() 114 | def do_train_pv(*args, **kwargs): 115 | with Xvfb(): 116 | ret = render(*args, **kwargs) 117 | return ret 118 | -------------------------------------------------------------------------------- /experiments/train/pv_utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from tqdm import tqdm 3 | import time 4 | import math 5 | import psutil 6 | import random 7 | import fcntl 8 | import os 9 | import subprocess 10 | import tempfile 11 | from random import randint 12 | from errno import EACCES 13 | 14 | import hydra 15 | from omegaconf import DictConfig 16 | import numpy as np 17 | import torch 18 | import pyvista as pv 19 | 20 | from pgnd.utils import get_root, mkdir 21 | from pgnd.ffmpeg import make_video 22 | 23 | root: Path = get_root(__file__) 24 | 25 | 26 | class Xvfb(object): 27 | 28 | # Maximum value to use for a display. 32-bit maxint is the 29 | # highest Xvfb currently supports 30 | MAX_DISPLAY = 2147483647 31 | SLEEP_TIME_BEFORE_START = 0.1 32 | 33 | def __init__( 34 | self, width=800, height=680, colordepth=24, 35 | tempdir=None, display=None, **kwargs): 36 | self.width = width 37 | self.height = height 38 | self.colordepth = colordepth 39 | self._tempdir = tempdir or tempfile.gettempdir() 40 | self.new_display = display 41 | 42 | if not self.xvfb_exists(): 43 | msg = ( 44 | 'Can not find Xvfb. Please install it with:\n' 45 | ' sudo apt install libgl1-mesa-glx xvfb') 46 | raise EnvironmentError(msg) 47 | 48 | self.extra_xvfb_args = ['-screen', '0', '{}x{}x{}'.format( 49 | self.width, self.height, self.colordepth)] 50 | 51 | for key, value in kwargs.items(): 52 | self.extra_xvfb_args += ['-{}'.format(key), value] 53 | 54 | if 'DISPLAY' in os.environ: 55 | self.orig_display = os.environ['DISPLAY'].split(':')[1] 56 | else: 57 | self.orig_display = None 58 | 59 | self.proc = None 60 | 61 | def __enter__(self): 62 | self.start() 63 | return self 64 | 65 | def __exit__(self, exc_type, exc_val, exc_tb): 66 | self.stop() 67 | 68 | def start(self): 69 | if self.new_display is not None: 70 | if not self._get_lock_for_display(self.new_display): 71 | raise ValueError(f'Could not lock display :{self.new_display}') 72 | else: 73 | self.new_display = self._get_next_unused_display() 74 | display_var = ':{}'.format(self.new_display) 75 | self.xvfb_cmd = ['Xvfb', display_var] + self.extra_xvfb_args 76 | with open(os.devnull, 'w') as fnull: 77 | self.proc = subprocess.Popen( 78 | self.xvfb_cmd, stdout=fnull, stderr=fnull, close_fds=True) 79 | # give Xvfb time to start 80 | time.sleep(self.__class__.SLEEP_TIME_BEFORE_START) 81 | ret_code = self.proc.poll() 82 | if ret_code is None: 83 | self._set_display_var(self.new_display) 84 | else: 85 | self._cleanup_lock_file() 86 | raise RuntimeError( 87 | f'Xvfb did not start ({ret_code}): {self.xvfb_cmd}') 88 | 89 | def stop(self): 90 | try: 91 | if self.orig_display is None: 92 | del os.environ['DISPLAY'] 93 | else: 94 | self._set_display_var(self.orig_display) 95 | if self.proc is not None: 96 | try: 97 | self.proc.terminate() 98 | self.proc.wait() 99 | except OSError: 100 | pass 101 | self.proc = None 102 | finally: 103 | self._cleanup_lock_file() 104 | 105 | def xvfb_exists(self): 106 | """Check that Xvfb is available on PATH and is executable.""" 107 | paths = os.environ['PATH'].split(os.pathsep) 108 | return any(os.access(os.path.join(path, 'Xvfb'), os.X_OK) 109 | for path in paths) 110 | 111 | def _cleanup_lock_file(self): 112 | ''' 113 | This should always get called if the process exits safely 114 | with Xvfb.stop() (whether called explicitly, or by __exit__). 115 | If you are ending up with /tmp/X123-lock files when Xvfb is not 116 | running, then Xvfb is not exiting cleanly. Always either call 117 | Xvfb.stop() in a finally block, or use Xvfb as a context manager 118 | to ensure lock files are purged. 119 | ''' 120 | self._lock_display_file.close() 121 | try: 122 | os.remove(self._lock_display_file.name) 123 | except OSError: 124 | pass 125 | 126 | def _get_lock_for_display(self, display): 127 | ''' 128 | In order to ensure multi-process safety, this method attempts 129 | to acquire an exclusive lock on a temporary file whose name 130 | contains the display number for Xvfb. 131 | ''' 132 | tempfile_path = os.path.join(self._tempdir, '.X{0}-lock'.format(display)) 133 | try: 134 | self._lock_display_file = open(tempfile_path, 'w') 135 | except PermissionError as e: 136 | return False 137 | else: 138 | try: 139 | fcntl.flock(self._lock_display_file, 140 | fcntl.LOCK_EX | fcntl.LOCK_NB) 141 | except BlockingIOError: 142 | return False 143 | else: 144 | return True 145 | 146 | def _get_next_unused_display(self): 147 | ''' 148 | Randomly chooses a display number and tries to acquire a lock for this number. 149 | If the lock could be acquired, returns this number, otherwise choses a new one. 150 | :return: free display number 151 | ''' 152 | while True: 153 | rand = randint(1, self.__class__.MAX_DISPLAY) 154 | if self._get_lock_for_display(rand): 155 | return rand 156 | else: 157 | continue 158 | 159 | def _set_display_var(self, display): 160 | os.environ['DISPLAY'] = ':{}'.format(display) 161 | 162 | 163 | 164 | def get_camera_custom(center=(0.5, 0.3, 0.5), distance=3.4, azimuth=-125, elevation=30): 165 | target = np.array(center) 166 | theta = 90 + azimuth 167 | y = distance * math.sin(math.radians(elevation)) 168 | x = math.cos(math.radians(theta)) * distance * math.cos(math.radians(elevation)) 169 | z = math.sin(math.radians(theta)) * distance * math.cos(math.radians(elevation)) 170 | origin = target + np.array([x, y, z]) 171 | 172 | return [ 173 | origin.tolist(), 174 | target.tolist(), 175 | (0.0, 1.0, 0.0), 176 | ] 177 | -------------------------------------------------------------------------------- /imgs/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/imgs/teaser.png -------------------------------------------------------------------------------- /pgnd/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | numpy.bool = bool # to avoid deprecation error 3 | 4 | from . import utils 5 | from . import ffmpeg 6 | 7 | from . import data 8 | from . import material 9 | from . import sim 10 | -------------------------------------------------------------------------------- /pgnd/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import * 2 | from .dataset_gripper import * 3 | -------------------------------------------------------------------------------- /pgnd/data/dataset_gripper.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Union, Optional 3 | from omegaconf import DictConfig 4 | 5 | import os 6 | import torch 7 | import time 8 | import shutil 9 | import json 10 | import yaml 11 | import random 12 | import glob 13 | import kornia 14 | import numpy as np 15 | import pickle as pkl 16 | import open3d as o3d 17 | from torch.utils.data import Dataset 18 | from dgl.geometry import farthest_point_sampler 19 | from sklearn.neighbors import NearestNeighbors 20 | 21 | 22 | def fps(x, n, device, random_start=False): 23 | start_idx = random.randint(0, x.shape[0] - 1) if random_start else 0 24 | fps_idx = farthest_point_sampler(x[None], n, start_idx=start_idx)[0] 25 | fps_idx = fps_idx.to(x.device) 26 | return fps_idx 27 | 28 | 29 | def read_splat(splat_file): 30 | with open(splat_file, "rb") as f: 31 | data = f.read() 32 | pts = [] 33 | colors = [] 34 | scales = [] 35 | quats = [] 36 | opacities = [] 37 | for i in range(0, len(data), 32): 38 | v = np.frombuffer(data[i : i + 12], dtype=np.float32) 39 | s = np.frombuffer(data[i + 12 : i + 24], dtype=np.float32) 40 | c = np.frombuffer(data[i + 24 : i + 28], dtype=np.uint8) / 255 41 | q = np.frombuffer(data[i + 28 : i + 32], dtype=np.uint8) 42 | q = (q * 1.0 - 128) / 128 43 | pts.append(v) 44 | scales.append(s) 45 | colors.append(c[:3]) 46 | quats.append(q) 47 | opacities.append(c[3:]) 48 | return np.array(pts), np.array(colors), np.array(scales), np.array(quats), np.array(opacities) 49 | 50 | 51 | class RealGripperDataset(Dataset): 52 | 53 | def __init__(self, cfg: DictConfig, device: torch.device, train=False) -> None: 54 | super().__init__() 55 | 56 | pts, colors, scales, quats, opacities = read_splat('experiments/log/gs/ckpts/gripper_new.splat') 57 | self.device = device 58 | self.uniform = cfg.sim.uniform or not train 59 | self.n_particles = 500 60 | 61 | R = np.array( 62 | [[1, 0, 0], 63 | [0, 0, -1], 64 | [0, 1, 0]] 65 | ) 66 | eef_global_T = np.array([cfg.model.eef_t[0], cfg.model.eef_t[1], cfg.model.eef_t[2]]) # 1018_sloth: 0.185, 1018_rope_short: 0.013) 67 | pts = pts + eef_global_T 68 | pts = pts @ R.T 69 | scale = cfg.sim.preprocess_scale 70 | pts = pts * scale 71 | 72 | axis = np.array([0, 1, 0]) 73 | angle = -27 # degree 74 | R = o3d.geometry.get_rotation_matrix_from_axis_angle(axis * np.pi / 180 * angle) 75 | translation = np.array([-0.015, 0.06, 0]) 76 | 77 | pts = pts @ R.T 78 | pts = pts + translation 79 | 80 | R = np.array( 81 | [[0, 0, 1], 82 | [0, 1, 0], 83 | [-1, 0, 0]] 84 | ) 85 | pts = pts @ R.T 86 | 87 | self.gripper_pts = torch.from_numpy(pts).to(torch.float32) 88 | 89 | def __len__(self) -> int: 90 | return 10000000 91 | 92 | def __getitem__(self, index): 93 | x = self.gripper_pts 94 | if self.uniform: 95 | downsample_indices = fps(x, self.n_particles, self.device, random_start=True) 96 | else: 97 | downsample_indices = torch.randperm(x.shape[0])[:self.n_particles] 98 | x = x[downsample_indices] 99 | return x, downsample_indices 100 | -------------------------------------------------------------------------------- /pgnd/ffmpeg.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import subprocess 3 | 4 | 5 | def make_video( 6 | image_root: Path, 7 | video_path: Path, 8 | image_pattern: str = '%04d.png', 9 | frame_rate: int = 10): 10 | 11 | subprocess.run([ 12 | 'ffmpeg', 13 | '-y', 14 | '-hide_banner', 15 | '-loglevel', 'error', 16 | '-framerate', str(frame_rate), 17 | '-i', str(image_root / image_pattern), 18 | '-c:v', 'libx264', 19 | '-pix_fmt', 'yuv420p', 20 | str(video_path) 21 | ]) 22 | 23 | 24 | def cat_videos(input_videos: list[Path], output_video: Path): 25 | 26 | output_video.parent.mkdir(parents=True, exist_ok=True) 27 | 28 | num_videos = len(input_videos) 29 | 30 | if num_videos <= 1: 31 | raise ValueError('concatenating <=1 videos') 32 | 33 | video_args = [] 34 | for input_video in input_videos: 35 | video_args.extend(['-i', str(input_video)]) 36 | 37 | subprocess.run([ 38 | 'ffmpeg', 39 | '-y', 40 | '-hide_banner', 41 | '-loglevel', 'error', 42 | ] + video_args + [ 43 | '-filter_complex', 44 | '{}hstack=inputs={}[v]'.format(''.join([f'[{i}:v]' for i in range(num_videos)]), num_videos), 45 | '-map', '[v]', 46 | str(output_video) 47 | ]) 48 | -------------------------------------------------------------------------------- /pgnd/material/__init__.py: -------------------------------------------------------------------------------- 1 | from .pgnd import * 2 | -------------------------------------------------------------------------------- /pgnd/material/network/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/pgnd/material/network/__init__.py -------------------------------------------------------------------------------- /pgnd/material/network/nerf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class CondNeRFModel(torch.nn.Module): 6 | def __init__( 7 | self, 8 | xyz_dim=3, 9 | condition_dim=64, 10 | out_channel=3, 11 | num_layers=8, 12 | hidden_size=256, 13 | skip_connect_every=4, 14 | ): 15 | super(CondNeRFModel, self).__init__() 16 | 17 | self.dim_xyz = xyz_dim 18 | self.dim_cond = condition_dim 19 | self.skip_connect_every = skip_connect_every 20 | 21 | self.layer1 = torch.nn.Linear(self.dim_xyz + self.dim_cond, hidden_size) 22 | 23 | self.layers = torch.nn.ModuleList() 24 | for i in range(num_layers - 1): 25 | if self.skip_connect_every is not None and i % self.skip_connect_every == 0 and i > 0 and i != num_layers - 1: 26 | self.layers.append(torch.nn.Linear(self.dim_xyz + self.dim_cond + hidden_size, hidden_size)) 27 | else: 28 | self.layers.append(torch.nn.Linear(hidden_size, hidden_size)) 29 | 30 | self.layers_out = torch.nn.ModuleList([ 31 | torch.nn.Linear(hidden_size, hidden_size // 2), 32 | torch.nn.Linear(hidden_size // 2, out_channel), 33 | ]) 34 | self.relu = nn.ReLU() 35 | 36 | def forward(self, xyz, cond): 37 | assert xyz.shape[1] == self.dim_xyz 38 | xyz = xyz[..., :self.dim_xyz] 39 | xyz = torch.cat([xyz, cond], 1) 40 | x = self.layer1(xyz) 41 | for i in range(len(self.layers)): 42 | if ( 43 | self.skip_connect_every is not None 44 | and i % self.skip_connect_every == 0 45 | and i > 0 46 | and i != len(self.layers) - 1 47 | ): 48 | x = torch.cat((x, xyz), dim=-1) 49 | x = self.relu(self.layers[i](x)) 50 | for i in range(len(self.layers_out) - 1): 51 | x = self.relu(self.layers_out[i](x)) 52 | x = self.layers_out[-1](x) 53 | return x 54 | 55 | -------------------------------------------------------------------------------- /pgnd/material/network/pointnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.parallel 4 | import torch.utils.data 5 | from torch.autograd import Variable 6 | import numpy as np 7 | import torch.nn.functional as F 8 | 9 | 10 | class STN3d(nn.Module): 11 | def __init__(self, channel): 12 | super(STN3d, self).__init__() 13 | self.conv1 = torch.nn.Conv1d(channel, 64, 1) 14 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 15 | self.conv3 = torch.nn.Conv1d(128, 1024, 1) 16 | self.fc1 = nn.Linear(1024, 512) 17 | self.fc2 = nn.Linear(512, 256) 18 | self.fc3 = nn.Linear(256, 9) 19 | self.relu = nn.ReLU() 20 | 21 | self.bn1 = nn.BatchNorm1d(64) 22 | self.bn2 = nn.BatchNorm1d(128) 23 | self.bn3 = nn.BatchNorm1d(1024) 24 | self.bn4 = nn.BatchNorm1d(512) 25 | self.bn5 = nn.BatchNorm1d(256) 26 | 27 | def forward(self, x, x_mask): 28 | batchsize = x.size()[0] 29 | x_mask = x_mask.unsqueeze(1) # B, N -> B, 1, N 30 | x = F.relu(self.bn1(self.conv1(x))) 31 | x = F.relu(self.bn2(self.conv2(x))) 32 | x = F.relu(self.bn3(self.conv3(x))) 33 | x = x * x_mask - (~x_mask) * 1e10 34 | x = torch.max(x, 2, keepdim=True)[0] 35 | assert x.min() > -1e10 36 | x = x.view(-1, 1024) 37 | 38 | x = F.relu(self.bn4(self.fc1(x))) 39 | x = F.relu(self.bn5(self.fc2(x))) 40 | x = self.fc3(x) 41 | 42 | iden = Variable(torch.from_numpy(np.array([1, 0, 0, 0, 1, 0, 0, 0, 1]).astype(np.float32))).view(1, 9).repeat( 43 | batchsize, 1) 44 | if x.is_cuda: 45 | iden = iden.cuda() 46 | x = x + iden 47 | x = x.view(-1, 3, 3) 48 | return x 49 | 50 | 51 | class STNkd(nn.Module): 52 | def __init__(self, k=64): 53 | super(STNkd, self).__init__() 54 | self.conv1 = torch.nn.Conv1d(k, 64, 1) 55 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 56 | self.conv3 = torch.nn.Conv1d(128, 1024, 1) 57 | self.fc1 = nn.Linear(1024, 512) 58 | self.fc2 = nn.Linear(512, 256) 59 | self.fc3 = nn.Linear(256, k * k) 60 | self.relu = nn.ReLU() 61 | 62 | self.bn1 = nn.BatchNorm1d(64) 63 | self.bn2 = nn.BatchNorm1d(128) 64 | self.bn3 = nn.BatchNorm1d(1024) 65 | self.bn4 = nn.BatchNorm1d(512) 66 | self.bn5 = nn.BatchNorm1d(256) 67 | 68 | self.k = k 69 | 70 | def forward(self, x, x_mask): 71 | batchsize = x.size()[0] 72 | x_mask = x_mask.unsqueeze(1) 73 | x = F.relu(self.bn1(self.conv1(x))) 74 | x = F.relu(self.bn2(self.conv2(x))) 75 | x = F.relu(self.bn3(self.conv3(x))) 76 | x = x * x_mask - (~x_mask) * 1e10 77 | x = torch.max(x, 2, keepdim=True)[0] 78 | assert x.min() > -1e10 79 | x = x.view(-1, 1024) 80 | 81 | x = F.relu(self.bn4(self.fc1(x))) 82 | x = F.relu(self.bn5(self.fc2(x))) 83 | x = self.fc3(x) 84 | 85 | iden = Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32))).view(1, self.k * self.k).repeat( 86 | batchsize, 1) 87 | if x.is_cuda: 88 | iden = iden.cuda() 89 | x = x + iden 90 | x = x.view(-1, self.k, self.k) 91 | return x 92 | 93 | 94 | class PointNetEncoder(nn.Module): 95 | def __init__(self, global_feat=True, feature_transform=False, feature_dim=1024, channel=3): 96 | super(PointNetEncoder, self).__init__() 97 | self.stn = STN3d(channel) 98 | self.conv1 = torch.nn.Conv1d(channel, 64, 1) 99 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 100 | self.conv3 = torch.nn.Conv1d(128, feature_dim, 1) 101 | self.bn1 = nn.BatchNorm1d(64) 102 | self.bn2 = nn.BatchNorm1d(128) 103 | self.bn3 = nn.BatchNorm1d(feature_dim) 104 | self.feature_dim = feature_dim 105 | self.global_feat = global_feat 106 | self.feature_transform = feature_transform 107 | if self.feature_transform: 108 | self.fstn = STNkd(k=64) 109 | 110 | def forward(self, x, x_mask=None): 111 | B, D, N = x.size() 112 | if x_mask is None: 113 | x_mask = torch.ones(B, N, dtype=torch.bool, device=x.device) 114 | if x_mask.dtype != torch.bool: 115 | x_mask = x_mask > 0 116 | trans = self.stn(x, x_mask) 117 | x = x.transpose(2, 1) 118 | if D > 3: 119 | feature = x[:, :, 3:] 120 | x = x[:, :, :3] 121 | x = torch.bmm(x, trans) 122 | if D > 3: 123 | x = torch.cat([x, feature], dim=2) 124 | x = x.transpose(2, 1) 125 | x = F.relu(self.bn1(self.conv1(x))) 126 | 127 | if self.feature_transform: 128 | trans_feat = self.fstn(x, x_mask) 129 | x = x.transpose(2, 1) 130 | x = torch.bmm(x, trans_feat) 131 | x = x.transpose(2, 1) 132 | else: 133 | trans_feat = None 134 | 135 | pointfeat = x 136 | x = F.relu(self.bn2(self.conv2(x))) 137 | x = self.bn3(self.conv3(x)) 138 | 139 | if not self.global_feat: 140 | x = x.view(-1, self.feature_dim, N) 141 | return x, trans, trans_feat 142 | 143 | x = torch.max(x, 2, keepdim=True)[0] 144 | x = x.view(-1, self.feature_dim) 145 | if self.global_feat: 146 | return x, trans, trans_feat 147 | else: 148 | x = x.view(-1, self.feature_dim, 1).repeat(1, 1, N) 149 | return torch.cat([x, pointfeat], 1), trans, trans_feat 150 | 151 | -------------------------------------------------------------------------------- /pgnd/material/network/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def get_grid_locations(x, num_grids_list, dx): 5 | bsz = x.shape[0] 6 | x_grid = torch.stack(torch.meshgrid( 7 | torch.linspace(0, (num_grids_list[0] - 1) * dx, num_grids_list[0], device=x.device), 8 | torch.linspace(0, (num_grids_list[1] - 1) * dx, num_grids_list[1], device=x.device), 9 | torch.linspace(0, (num_grids_list[2] - 1) * dx, num_grids_list[2], device=x.device), 10 | ), dim=-1).reshape(-1, 3) 11 | grid_idxs = torch.stack(torch.meshgrid( 12 | torch.linspace(0, (num_grids_list[0] - 1), num_grids_list[0], device=x.device), 13 | torch.linspace(0, (num_grids_list[1] - 1), num_grids_list[1], device=x.device), 14 | torch.linspace(0, (num_grids_list[2] - 1), num_grids_list[2], device=x.device), 15 | ), dim=-1).reshape(-1, 3) 16 | 17 | grid_hits = torch.zeros(bsz, num_grids_list[0], num_grids_list[1], num_grids_list[2], device=x.device) 18 | for i in range(3): 19 | for j in range(3): 20 | for k in range(3): 21 | grid_hits[:, \ 22 | ((x[:, :, 0] / dx - 0.5).int() + i).clamp(0, num_grids_list[0] - 1), \ 23 | ((x[:, :, 1] / dx - 0.5).int() + j).clamp(0, num_grids_list[1] - 1), \ 24 | ((x[:, :, 2] / dx - 0.5).int() + k).clamp(0, num_grids_list[2] - 1)] = 1 25 | grid_hits = grid_hits.sum(0) > 0 26 | 27 | x_grid = x_grid[grid_hits.reshape(-1)].reshape(1, -1, 3).repeat(bsz, 1, 1) 28 | grid_idxs = grid_idxs[grid_hits.reshape(-1)] 29 | return x_grid, grid_idxs 30 | 31 | 32 | def fill_grid_locations(feat, grid_idxs, num_grids_list): 33 | # feat: (bsz, num_active_grids, feature_dim) 34 | # grid_idxs: (num_active_grids, 3) 35 | bsz = feat.shape[0] 36 | feat_filled = torch.zeros(bsz, num_grids_list[0], num_grids_list[1], num_grids_list[2], 3, device=feat.device) 37 | grid_idxs_1d = grid_idxs[:, 0] * num_grids_list[1] * num_grids_list[2] + grid_idxs[:, 1] * num_grids_list[2] + grid_idxs[:, 2] 38 | feat_filled = feat_filled.reshape(bsz, -1, 3) 39 | feat_filled[:, grid_idxs_1d.long()] = feat.clone() 40 | feat_filled = feat_filled.reshape(bsz, num_grids_list[0], num_grids_list[1], num_grids_list[2], 3) 41 | return feat_filled 42 | -------------------------------------------------------------------------------- /pgnd/material/pgnd.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from omegaconf import DictConfig 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | from torch import Tensor 8 | 9 | from .network.pointnet import PointNetEncoder 10 | from .network.nerf import CondNeRFModel 11 | from .network.utils import get_grid_locations, fill_grid_locations 12 | 13 | 14 | class PGNDModel(nn.Module): 15 | 16 | def __init__(self, cfg: DictConfig) -> None: 17 | super().__init__() 18 | 19 | self.feature_dim = 64 20 | self.radius = cfg.model.material.radius 21 | self.n_history = cfg.sim.n_history 22 | self.num_grids_list = cfg.sim.num_grids[:3] 23 | self.dx = cfg.sim.num_grids[3] 24 | self.inv_dx = 1 / self.dx 25 | self.requires_grad = cfg.model.material.requires_grad 26 | self.pe_num_func = int(np.log2(self.inv_dx)) + cfg.model.material.pe_num_func_res 27 | self.pe_dim = 3 + self.pe_num_func * 6 28 | self.output_scale = cfg.model.material.output_scale 29 | self.input_scale = cfg.model.material.input_scale 30 | self.absolute_y = cfg.model.material.absolute_y 31 | 32 | self.encoder = PointNetEncoder( 33 | global_feat=(cfg.model.material.radius <= 0), 34 | feature_transform=False, 35 | feature_dim=self.feature_dim, 36 | channel=6 * (1 + self.n_history), 37 | ) 38 | self.decoder = CondNeRFModel( 39 | xyz_dim=self.pe_dim, 40 | condition_dim=self.feature_dim, 41 | out_channel=3, 42 | num_layers=2, 43 | hidden_size=64, 44 | skip_connect_every=4, 45 | ) 46 | 47 | def positional_encoding(self, tensor): 48 | num_encoding_functions = self.pe_num_func 49 | if num_encoding_functions == 0: 50 | assert include_input 51 | return tensor 52 | 53 | encoding = [tensor] # include the input itself 54 | frequency_bands = 2.0 ** torch.linspace( 55 | 0.0, 56 | num_encoding_functions - 1, 57 | num_encoding_functions, 58 | dtype=tensor.dtype, 59 | device=tensor.device, 60 | ) 61 | 62 | for freq in frequency_bands: 63 | for func in [torch.sin, torch.cos]: 64 | encoding.append(func(tensor * freq)) 65 | 66 | # Special case, for no positional encoding 67 | if len(encoding) == 1: 68 | return encoding[0] 69 | else: 70 | return torch.cat(encoding, dim=-1) 71 | 72 | def forward(self, x: Tensor, v: Tensor, x_his: Tensor, v_his: Tensor, enabled: Tensor) -> Tensor: 73 | # x: (bsz, num_particles, 3) 74 | # v: (bsz, num_particles, 3) 75 | bsz = x.shape[0] 76 | num_particles = x.shape[1] 77 | v = v * self.input_scale 78 | v_his = v_his * self.input_scale 79 | 80 | x_his = x_his.reshape(bsz, num_particles, self.n_history, 3) 81 | v_his = v_his.reshape(bsz, num_particles, self.n_history, 3) 82 | x_his = x_his.detach() 83 | v_his = v_his.detach() 84 | 85 | x_grid, grid_idxs = get_grid_locations(x, self.num_grids_list, self.dx) 86 | x_grid = x_grid.detach() 87 | grid_idxs = grid_idxs.detach() 88 | 89 | # centering 90 | x_center = x.mean(1, keepdim=True) 91 | if self.absolute_y: 92 | x_center[:, :, 1] = 0 # only centering x and z 93 | x = x - x_center 94 | x_his = x_his - x_center[:, :, None] 95 | if self.training: 96 | # random azimuth 97 | theta = torch.rand(bsz, 1, device=x.device) * 2 * np.pi 98 | rot = torch.stack([ 99 | torch.cos(theta), torch.zeros_like(theta), torch.sin(theta), 100 | torch.zeros_like(theta), torch.ones_like(theta), torch.zeros_like(theta), 101 | -torch.sin(theta), torch.zeros_like(theta), torch.cos(theta), 102 | ], dim=-1).reshape(bsz, 3, 3) 103 | inv_rot = rot.transpose(1, 2) 104 | else: 105 | rot = torch.eye(3, dtype=x.dtype, device=x.device).unsqueeze(0).repeat(bsz, 1, 1) 106 | inv_rot = rot.transpose(1, 2) 107 | x = torch.einsum('bij,bjk->bik', x, rot) 108 | x_his = torch.einsum('bcij,bjk->bcik', x_his, rot) # (bsz, num_particles, n_history, 3) 109 | v = torch.einsum('bij,bjk->bik', v, rot) 110 | v_his = torch.einsum('bcij,bjk->bcik', v_his, rot) # (bsz, num_particles, n_history, 3) 111 | 112 | x_grid = x_grid - x_center 113 | x_grid = x_grid @ rot 114 | x_his = x_his.reshape(bsz, num_particles, self.n_history * 3) 115 | v_his = v_his.reshape(bsz, num_particles, self.n_history * 3) 116 | 117 | feat = torch.cat([x, v, x_his, v_his], dim=-1) # (bsz, num_particles, 6 * (1 + n_history)) 118 | feat = feat.permute(0, 2, 1) # (bsz, 6 * (1 + n_history), num_particles) 119 | feat, trans, trans_feat = self.encoder(feat, enabled) # feat: (bsz, feature_dim, num_particles) 120 | 121 | if self.radius > 0: 122 | # aggregate neighborhood 123 | # x.shape: (bsz, num_particles, 3) 124 | # x_grid.shape: (bsz, num_grids_total, 3) 125 | dist_pt_grid = torch.cdist(x_grid, x, p=2) 126 | mask = dist_pt_grid < self.radius # (bsz, num_grids_total, num_particles) 127 | mask_normed = mask / (mask.sum(dim=-1, keepdim=True) + 1e-5) # for each grid, normalize the weights 128 | mask_normed = mask_normed.detach() 129 | feat = mask_normed @ feat.permute(0, 2, 1) # (bsz, num_grids_total, feature_dim) 130 | else: 131 | # global max pooling 132 | feat = feat[:, None, :].repeat(1, x_grid.shape[1], 1) # (bsz, num_grids_total, feature_dim) 133 | 134 | # positional encoding and decoder 135 | feat = feat.reshape(-1, self.feature_dim) 136 | x_grid = x_grid.reshape(-1, 3) 137 | x_grid = self.positional_encoding(x_grid) 138 | feat = self.decoder(x_grid, feat) 139 | feat = feat * self.output_scale 140 | feat = feat.reshape(bsz, -1, feat.shape[-1]) 141 | feat = torch.bmm(feat, inv_rot) 142 | 143 | # expand to full grid 144 | feat = fill_grid_locations(feat, grid_idxs, self.num_grids_list) 145 | return feat 146 | -------------------------------------------------------------------------------- /pgnd/sim/__init__.py: -------------------------------------------------------------------------------- 1 | from .friction import * 2 | from .model import * 3 | from .sim import * 4 | from .utils import * 5 | -------------------------------------------------------------------------------- /pgnd/sim/friction.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Friction(nn.Module): 6 | def __init__(self, mu_init=0.5): 7 | super(Friction, self).__init__() 8 | self.mu = torch.nn.Parameter(torch.tensor(mu_init)) 9 | 10 | def clip(self): 11 | self.mu.data = torch.clamp(self.mu.data, 0, 1) 12 | -------------------------------------------------------------------------------- /pgnd/sim/model.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Union, Sequence, Any 2 | from omegaconf import DictConfig 3 | import numpy as np 4 | import torch 5 | from torch import Tensor 6 | import warp as wp 7 | 8 | from .utils import ConstantBatch, StaticsBatch, CollidersBatch, GridDataBatch, ParticleDataBatch 9 | 10 | 11 | class SimModelBatch: 12 | 13 | def __init__(self, constant: ConstantBatch, device: wp.context.Devicelike = None, batch_size: int = 1, requires_grad: bool = False) -> None: 14 | self.constant = constant 15 | self.device = wp.get_device(device) 16 | self.requires_grad = requires_grad 17 | self.batch_size = batch_size 18 | 19 | def update_friction(self, friction) -> None: 20 | self.constant.update_friction(friction, self.requires_grad) 21 | 22 | @staticmethod 23 | @wp.kernel 24 | def grid_op_batch( 25 | constant: ConstantBatch, 26 | colliders: CollidersBatch, 27 | grid_curr: GridDataBatch, 28 | grid_next: GridDataBatch, 29 | ) -> None: 30 | 31 | b, px, py, pz = wp.tid() 32 | 33 | v = grid_curr.v[b, px, py, pz] 34 | 35 | friction = constant.friction[b, 0] 36 | 37 | if px < constant.bound and v[0] < 0.0: 38 | impulse = wp.vec3(v[0], 0.0, 0.0) 39 | v = wp.vec3(0.0, v[1], v[2]) 40 | friction_impulse = wp.float32(-1.) * friction * wp.length(impulse) * wp.normalize(v) 41 | friction_impulse *= wp.min(1.0, wp.length(v) / (wp.length(friction_impulse) + 1e-10)) 42 | v = v + friction_impulse 43 | if py < constant.bound and v[1] < 0.0: 44 | impulse = wp.vec3(0.0, v[1], 0.0) 45 | v = wp.vec3(v[0], 0.0, v[2]) 46 | friction_impulse = wp.float32(-1.) * friction * wp.length(impulse) * wp.normalize(v) 47 | friction_impulse *= wp.min(1.0, wp.length(v) / (wp.length(friction_impulse) + 1e-10)) 48 | v = v + friction_impulse 49 | if pz < constant.bound and v[2] < 0.0: 50 | impulse = wp.vec3(0.0, 0.0, v[2]) 51 | v = wp.vec3(v[0], v[1], 0.0) 52 | friction_impulse = wp.float32(-1.) * friction * wp.length(impulse) * wp.normalize(v) 53 | friction_impulse *= wp.min(1.0, wp.length(v) / (wp.length(friction_impulse) + 1e-10)) 54 | v = v + friction_impulse 55 | if px > int(constant.num_grids_list[0]) - constant.bound and v[0] > 0.0: 56 | impulse = wp.vec3(v[0], 0.0, 0.0) 57 | v = wp.vec3(0.0, v[1], v[2]) 58 | friction_impulse = wp.float32(-1.) * friction * wp.length(impulse) * wp.normalize(v) 59 | friction_impulse *= wp.min(1.0, wp.length(v) / (wp.length(friction_impulse) + 1e-10)) 60 | v = v + friction_impulse 61 | if py > int(constant.num_grids_list[1]) - constant.bound and v[1] > 0.0: 62 | impulse = wp.vec3(0.0, v[1], 0.0) 63 | v = wp.vec3(v[0], 0.0, v[2]) 64 | friction_impulse = wp.float32(-1.) * friction * wp.length(impulse) * wp.normalize(v) 65 | friction_impulse *= wp.min(1.0, wp.length(v) / (wp.length(friction_impulse) + 1e-10)) 66 | v = v + friction_impulse 67 | if pz > int(constant.num_grids_list[2]) - constant.bound and v[2] > 0.0: 68 | impulse = wp.vec3(0.0, 0.0, v[2]) 69 | v = wp.vec3(v[0], v[1], 0.0) 70 | friction_impulse = wp.float32(-1.) * friction * wp.length(impulse) * wp.normalize(v) 71 | friction_impulse *= wp.min(1.0, wp.length(v) / (wp.length(friction_impulse) + 1e-10)) 72 | v = v + friction_impulse 73 | 74 | for gripper_id in range(colliders.gripper_centers.shape[1]): 75 | 76 | dx = wp.float32(px) * constant.dx - colliders.gripper_centers[b, gripper_id][0] 77 | dy = wp.float32(py) * constant.dx - colliders.gripper_centers[b, gripper_id][1] 78 | dz = wp.float32(pz) * constant.dx - colliders.gripper_centers[b, gripper_id][2] 79 | 80 | grid_from_gripper = wp.vec3(dx, dy, dz) 81 | grid_from_gripper_norm = wp.length(grid_from_gripper) 82 | 83 | gripper_add_radii = 0.0 84 | if grid_from_gripper_norm < colliders.gripper_radii[b, gripper_id] + gripper_add_radii and colliders.gripper_open[b, gripper_id] < 0.5: 85 | if wp.length(colliders.gripper_quat[b, gripper_id]) > 0: # filter out 86 | gripper_vel = colliders.gripper_vels[b, gripper_id] 87 | gripper_quat_vel = colliders.gripper_quat_vel[b, gripper_id] 88 | 89 | gripper_angular_vel = wp.length(gripper_quat_vel) 90 | gripper_quat_axis = gripper_quat_vel / (gripper_angular_vel + 1e-10) 91 | 92 | grid_from_gripper_axis = grid_from_gripper - \ 93 | wp.dot(gripper_quat_axis, grid_from_gripper) * gripper_quat_axis 94 | v = wp.cross(gripper_quat_vel, grid_from_gripper_axis) + gripper_vel 95 | 96 | else: 97 | v = colliders.gripper_vels[b, gripper_id] 98 | 99 | grid_next.v[b, px, py, pz] = v 100 | 101 | @staticmethod 102 | @wp.kernel 103 | def g2p_batch( 104 | constant: ConstantBatch, 105 | statics: StaticsBatch, 106 | particle_curr: ParticleDataBatch, 107 | grid_next: GridDataBatch, 108 | particle_next: ParticleDataBatch, 109 | ) -> None: 110 | 111 | b, p = wp.tid() 112 | 113 | if statics.enabled[b, p] == 0: 114 | return 115 | 116 | p_x = particle_curr.x[b, p] * constant.inv_dx 117 | base_x = int(p_x[0] - 0.5) 118 | base_y = int(p_x[1] - 0.5) 119 | base_z = int(p_x[2] - 0.5) 120 | f_x = p_x - wp.vec3( 121 | float(base_x), 122 | float(base_y), 123 | float(base_z)) 124 | 125 | # quadratic kernels [http://mpm.graphics Eqn. 123, with x=fx,fx-1,fx-2] 126 | wa = wp.vec3(1.5) - f_x 127 | wb = f_x - wp.vec3(1.0) 128 | wc = f_x - wp.vec3(0.5) 129 | 130 | w = wp.mat33( 131 | wp.cw_mul(wa, wa) * 0.5, 132 | wp.vec3(0.75) - wp.cw_mul(wb, wb), 133 | wp.cw_mul(wc, wc) * 0.5, 134 | ) 135 | 136 | new_v = wp.vec3(0.0) 137 | 138 | for i in range(3): 139 | for j in range(3): 140 | for k in range(3): 141 | weight = w[0, i] * w[1, j] * w[2, k] 142 | 143 | v = grid_next.v[b, base_x + i, base_y + j, base_z + k] 144 | new_v = new_v + weight * v 145 | 146 | particle_next.v[b, p] = new_v 147 | 148 | bound = statics.clip_bound[b, p] * constant.dx + constant.eps 149 | new_x = particle_curr.x[b, p] + constant.dt * new_v 150 | new_x = wp.vec3( 151 | wp.clamp(new_x[0], 0.0 + bound, float(constant.num_grids_list[0]) * constant.dx - bound), 152 | wp.clamp(new_x[1], 0.0 + bound, float(constant.num_grids_list[1]) * constant.dx - bound), 153 | wp.clamp(new_x[2], 0.0 + bound, float(constant.num_grids_list[2]) * constant.dx - bound), 154 | ) 155 | particle_next.x[b, p] = new_x 156 | 157 | 158 | def build_model( 159 | cfg: DictConfig, 160 | batch_size: int, 161 | device: wp.context.Devicelike = None, 162 | requires_grad: bool = False 163 | ) -> SimModelBatch: 164 | 165 | dt: float = eval(cfg.sim.dt) if isinstance(cfg.sim.dt, str) else cfg.sim.dt 166 | bound: int = cfg.sim.bound 167 | eps: float = cfg.sim.eps 168 | assert len(cfg.sim.num_grids) == 4 169 | num_grids_list: list = cfg.sim.num_grids[:3] 170 | dx: float = cfg.sim.num_grids[3] 171 | inv_dx: float = 1 / dx 172 | 173 | constant = ConstantBatch() 174 | constant.init() 175 | constant.dt = dt 176 | constant.bound = bound 177 | constant.dx = dx 178 | constant.inv_dx = inv_dx 179 | constant.eps = eps 180 | friction = np.array([cfg.model.friction.value], dtype=np.float32)[None].repeat(batch_size, axis=0) 181 | constant.friction = wp.from_numpy(friction, dtype=float) # [bsz, 1] 182 | constant.num_grids_list = wp.vec3(*num_grids_list) 183 | 184 | model = SimModelBatch(constant, device, batch_size, requires_grad) 185 | return model 186 | -------------------------------------------------------------------------------- /pgnd/sim/sim.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from omegaconf import DictConfig 3 | import torch 4 | import torch.autograd as autograd 5 | import torch.nn as nn 6 | from torch import Tensor 7 | import warp as wp 8 | 9 | from .utils import StaticsBatch, CollidersBatch, ParticleDataBatch, GridDataBatch 10 | from .model import SimModelBatch, build_model 11 | from pgnd.utils import Tape, CondTape 12 | 13 | 14 | class SimFunctionWithFrictionBatch(autograd.Function): 15 | 16 | @staticmethod 17 | def forward( 18 | ctx: autograd.function.FunctionCtx, 19 | model: SimModelBatch, 20 | statics: StaticsBatch, 21 | colliders: CollidersBatch, 22 | particles_curr: ParticleDataBatch, 23 | particles_next: ParticleDataBatch, 24 | grid_curr: GridDataBatch, 25 | grid_next: GridDataBatch, 26 | friction: Tensor, 27 | x: Tensor, 28 | v: Tensor, 29 | pred: Tensor) -> tuple[Tensor, Tensor]: 30 | 31 | tape: Tape = Tape() 32 | model.update_friction(friction) 33 | particles_curr.from_torch(x=x, v=v) 34 | grid_curr.clear() 35 | grid_curr.zero_grad() 36 | grid_curr.from_torch(v=pred) 37 | grid_next.clear() 38 | grid_next.zero_grad() 39 | 40 | device = model.device 41 | constant = model.constant 42 | batch_size = model.batch_size 43 | 44 | num_grids_list = [int(constant.num_grids_list[0]), int(constant.num_grids_list[1]), int(constant.num_grids_list[2])] 45 | num_particles = particles_curr.x.shape[1] 46 | 47 | with CondTape(tape, model.requires_grad): 48 | wp.launch(model.grid_op_batch, 49 | dim=[batch_size] + [num_grids_list[0], num_grids_list[1], num_grids_list[2]], 50 | inputs=[constant, colliders, grid_curr], 51 | outputs=[grid_next], 52 | device=device 53 | ) 54 | wp.launch(model.g2p_batch, 55 | dim=[batch_size, num_particles], 56 | inputs=[constant, statics, particles_curr, grid_next], 57 | outputs=[particles_next], 58 | device=device 59 | ) 60 | 61 | x_next, v_next = particles_next.to_torch() 62 | 63 | ctx.tape = tape 64 | ctx.particles_curr = particles_curr 65 | ctx.particles_next = particles_next 66 | ctx.grid_curr = grid_curr 67 | 68 | return x_next, v_next 69 | 70 | @staticmethod 71 | def backward( 72 | ctx: autograd.function.FunctionCtx, 73 | grad_x_next: Tensor, 74 | grad_v_next: Tensor, 75 | ) -> tuple[None, None, None, None, None, None, None, None, Tensor, Tensor, Tensor]: 76 | 77 | tape: Tape = ctx.tape 78 | particles_curr: ParticleDataBatch = ctx.particles_curr 79 | particles_next: ParticleDataBatch = ctx.particles_next 80 | grid_curr: GridDataBatch = ctx.grid_curr 81 | 82 | if torch.isnan(grad_x_next).any() or torch.isnan(grad_v_next).any(): 83 | import ipdb; ipdb.set_trace() 84 | 85 | tape.backward( 86 | grads={ 87 | particles_next.x: wp.from_torch(grad_x_next.contiguous(), dtype=wp.vec3), 88 | particles_next.v: wp.from_torch(grad_v_next.contiguous(), dtype=wp.vec3), 89 | } 90 | ) 91 | 92 | grad_x, grad_v = particles_curr.to_torch_grad() 93 | grad_pred = grid_curr.to_torch_grad() 94 | 95 | if grad_x is not None: 96 | torch.nan_to_num_(grad_x, 0.0, 0.0, 0.0) 97 | if grad_v is not None: 98 | torch.nan_to_num_(grad_v, 0.0, 0.0, 0.0) 99 | if grad_pred is not None: 100 | torch.nan_to_num_(grad_pred, 0.0, 0.0, 0.0) 101 | 102 | return None, None, None, None, None, None, None, None, grad_x, grad_v, grad_pred 103 | 104 | 105 | class CacheDiffSimWithFrictionBatch(nn.Module): 106 | 107 | def __init__(self, cfg: DictConfig, num_steps: int, batch_size: int, device: wp.context.Devicelike = None, requires_grad: bool = False) -> None: 108 | super().__init__() 109 | self.model = build_model(cfg=cfg, batch_size=batch_size, device=device, requires_grad=requires_grad) 110 | self.curr_particles = [None for _ in range(num_steps)] 111 | self.next_particles = [None for _ in range(num_steps)] 112 | self.curr_grids = [None for _ in range(num_steps)] 113 | self.next_grids = [None for _ in range(num_steps)] 114 | 115 | def forward(self, statics: StaticsBatch, colliders: CollidersBatch, step: int, x: Tensor, v: Tensor, friction: Tensor, pred: Tensor) -> tuple[Tensor, Tensor]: 116 | shape = (x.size(0), x.size(1)) 117 | if self.curr_particles[step] is None: 118 | self.curr_particles[step] = ParticleDataBatch() 119 | self.curr_particles[step].init(shape, self.model.device, self.model.requires_grad) 120 | if self.next_particles[step] is None: 121 | self.next_particles[step] = ParticleDataBatch() 122 | self.next_particles[step].init(shape, self.model.device, self.model.requires_grad) 123 | particles_curr = self.curr_particles[step] 124 | particles_next = self.next_particles[step] 125 | 126 | shape = (pred.size(0), pred.size(1), pred.size(2), pred.size(3)) 127 | if self.curr_grids[step] is None: 128 | self.curr_grids[step] = GridDataBatch() 129 | self.curr_grids[step].init(shape, self.model.device, self.model.requires_grad) 130 | grid_curr = self.curr_grids[step] 131 | if self.next_grids[step] is None: 132 | self.next_grids[step] = GridDataBatch() 133 | self.next_grids[step].init(shape, self.model.device, self.model.requires_grad) 134 | grid_next = self.next_grids[step] 135 | 136 | return SimFunctionWithFrictionBatch.apply(self.model, statics, colliders, particles_curr, particles_next, grid_curr, grid_next, friction, x, v, pred) 137 | -------------------------------------------------------------------------------- /pgnd/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Dict, Optional 2 | from pathlib import Path 3 | from omegaconf import DictConfig 4 | import sys 5 | import shutil 6 | import numpy as np 7 | import wandb 8 | import warp as wp 9 | 10 | 11 | Tape = wp.Tape 12 | 13 | class CondTape(object): 14 | def __init__(self, tape: Optional[Tape], cond: bool = True) -> None: 15 | self.tape = tape 16 | self.cond = cond 17 | 18 | def __enter__(self): 19 | if self.tape is not None and self.cond: 20 | self.tape.__enter__() 21 | 22 | def __exit__(self, exc_type, exc_value, traceback): 23 | if self.tape is not None and self.cond: 24 | self.tape.__exit__(exc_type, exc_value, traceback) 25 | 26 | 27 | def cfg2dict(cfg: DictConfig) -> Dict: 28 | """ 29 | Recursively convert OmegaConf to vanilla dict 30 | :param cfg: 31 | :return: 32 | """ 33 | cfg_dict = {} 34 | for k, v in cfg.items(): 35 | if type(v) == DictConfig: 36 | cfg_dict[k] = cfg2dict(v) 37 | else: 38 | cfg_dict[k] = v 39 | return cfg_dict 40 | 41 | 42 | class Logger: 43 | 44 | def __init__(self, cfg, project='pgnd-train', entity='kaiz'): 45 | wandb.init(project=project, entity=entity, name=cfg.train.name) 46 | wandb.config = cfg2dict(cfg) 47 | 48 | def add_scalar(self, tag, scalar, step=None): 49 | wandb.log({tag: scalar}, step=step) 50 | 51 | def add_image(self, tag, img, step=None, scale=True): 52 | if scale: 53 | img = (img - img.min()) / (img.max() - img.min()) 54 | wandb.log({tag: wandb.Image(img)}, step=step) 55 | 56 | def add_video(self, tag, video, step=None): 57 | wandb.log({tag: wandb.Video(video)}, step=step) 58 | 59 | 60 | def mkdir(path: Path, resume=False, overwrite=False) -> None: 61 | while True: 62 | if overwrite: 63 | if path.is_dir(): 64 | print('overwriting directory ({})'.format(path)) 65 | shutil.rmtree(path, ignore_errors=True) 66 | path.mkdir(parents=True, exist_ok=True) 67 | return 68 | elif resume: 69 | print('resuming directory ({})'.format(path)) 70 | path.mkdir(parents=True, exist_ok=True) 71 | return 72 | else: 73 | if path.exists(): 74 | feedback = input('target directory ({}) already exists, overwrite? [Y/r/n] '.format(path)) 75 | ret = feedback.casefold() 76 | else: 77 | ret = 'y' 78 | if ret == 'n': 79 | sys.exit(0) 80 | elif ret == 'r': 81 | resume = True 82 | elif ret == 'y': 83 | overwrite = True 84 | 85 | 86 | def get_root(path: Union[str, Path], name: str = '.root') -> Path: 87 | root = Path(path).resolve() 88 | while not (root / name).is_file(): 89 | root = root.parent 90 | return root 91 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | warp-lang==1.1.0 2 | trimesh 3 | pyyaml 4 | numpy==1.26.4 5 | scipy 6 | transformers 7 | scikit-learn 8 | scikit-image 9 | pyvista 10 | opencv-contrib-python 11 | open3d 12 | moviepy 13 | matplotlib 14 | kornia 15 | ipdb 16 | h5py 17 | gradio 18 | ffmpeg 19 | einops 20 | dill 21 | av 22 | timm 23 | omegaconf 24 | hydra-core 25 | wandb -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | diff_gaussian_rasterization.egg-info/ 3 | dist/ 4 | .idea/ 5 | third_party/ -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/glm"] 2 | path = third_party/glm 3 | url = https://github.com/g-truc/glm.git 4 | -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | cmake_minimum_required(VERSION 3.20) 13 | 14 | project(DiffRast LANGUAGES CUDA CXX) 15 | 16 | set(CMAKE_CXX_STANDARD 17) 17 | set(CMAKE_CXX_EXTENSIONS OFF) 18 | set(CMAKE_CUDA_STANDARD 17) 19 | 20 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 21 | 22 | add_library(CudaRasterizer 23 | cuda_rasterizer/backward.h 24 | cuda_rasterizer/backward.cu 25 | cuda_rasterizer/forward.h 26 | cuda_rasterizer/forward.cu 27 | cuda_rasterizer/auxiliary.h 28 | cuda_rasterizer/rasterizer_impl.cu 29 | cuda_rasterizer/rasterizer_impl.h 30 | cuda_rasterizer/rasterizer.h 31 | ) 32 | 33 | set_target_properties(CudaRasterizer PROPERTIES CUDA_ARCHITECTURES "75;86") 34 | 35 | target_include_directories(CudaRasterizer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/cuda_rasterizer) 36 | target_include_directories(CudaRasterizer PRIVATE third_party/glm ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) 37 | -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/LICENSE.md: -------------------------------------------------------------------------------- 1 | Gaussian-Splatting License 2 | =========================== 3 | 4 | **Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**. 5 | The *Software* is in the process of being registered with the Agence pour la Protection des 6 | Programmes (APP). 7 | 8 | The *Software* is still being developed by the *Licensor*. 9 | 10 | *Licensor*'s goal is to allow the research community to use, test and evaluate 11 | the *Software*. 12 | 13 | ## 1. Definitions 14 | 15 | *Licensee* means any person or entity that uses the *Software* and distributes 16 | its *Work*. 17 | 18 | *Licensor* means the owners of the *Software*, i.e Inria and MPII 19 | 20 | *Software* means the original work of authorship made available under this 21 | License ie gaussian-splatting. 22 | 23 | *Work* means the *Software* and any additions to or derivative works of the 24 | *Software* that are made available under this License. 25 | 26 | 27 | ## 2. Purpose 28 | This license is intended to define the rights granted to the *Licensee* by 29 | Licensors under the *Software*. 30 | 31 | ## 3. Rights granted 32 | 33 | For the above reasons Licensors have decided to distribute the *Software*. 34 | Licensors grant non-exclusive rights to use the *Software* for research purposes 35 | to research users (both academic and industrial), free of charge, without right 36 | to sublicense.. The *Software* may be used "non-commercially", i.e., for research 37 | and/or evaluation purposes only. 38 | 39 | Subject to the terms and conditions of this License, you are granted a 40 | non-exclusive, royalty-free, license to reproduce, prepare derivative works of, 41 | publicly display, publicly perform and distribute its *Work* and any resulting 42 | derivative works in any form. 43 | 44 | ## 4. Limitations 45 | 46 | **4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do 47 | so under this License, (b) you include a complete copy of this License with 48 | your distribution, and (c) you retain without modification any copyright, 49 | patent, trademark, or attribution notices that are present in the *Work*. 50 | 51 | **4.2 Derivative Works.** You may specify that additional or different terms apply 52 | to the use, reproduction, and distribution of your derivative works of the *Work* 53 | ("Your Terms") only if (a) Your Terms provide that the use limitation in 54 | Section 2 applies to your derivative works, and (b) you identify the specific 55 | derivative works that are subject to Your Terms. Notwithstanding Your Terms, 56 | this License (including the redistribution requirements in Section 3.1) will 57 | continue to apply to the *Work* itself. 58 | 59 | **4.3** Any other use without of prior consent of Licensors is prohibited. Research 60 | users explicitly acknowledge having received from Licensors all information 61 | allowing to appreciate the adequacy between of the *Software* and their needs and 62 | to undertake all necessary precautions for its execution and use. 63 | 64 | **4.4** The *Software* is provided both as a compiled library file and as source 65 | code. In case of using the *Software* for a publication or other results obtained 66 | through the use of the *Software*, users are strongly encouraged to cite the 67 | corresponding publications as explained in the documentation of the *Software*. 68 | 69 | ## 5. Disclaimer 70 | 71 | THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES 72 | WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY 73 | UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL 74 | CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES 75 | OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL 76 | USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR 77 | ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE 78 | AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 79 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 80 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) 81 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 82 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR 83 | IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*. -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/README.md: -------------------------------------------------------------------------------- 1 | 2 | This is a clone of https://github.com/graphdeco-inria/diff-gaussian-rasterization/tree/59f5f77e3ddbac3ed9db93ec2cfe99ed6c5d121d 3 | 4 | However, it has been edited by Jonathon Luiten to also render 'depth' as well as colour. 5 | 6 | This is needed for Jonathon's Dynamic 3D Gaussians work which can be found here: http://dynamic3dgaussians.github.io 7 | 8 | By default, the depth is calculated as 'median depth', where the depth is the depth of the Gaussian center which causes the accumulated rays transmittance to drop below 0.5. 9 | If a ray doesn't reach this threshold it is given a default depth of 15. This median depth avoids the depth floaters around depth boundaries that 'mean depth' would give. 10 | If 'mean depth' is preffered, there is commented out code which also calculates 'mean depth'. 11 | See lines 307-308 and 363-372 of cuda_rasterizer/forward.cu. 12 | 13 | Note that the backward pass for the depth has not been implemented, so it won't work for training with depth ground-truth. 14 | 15 | Note that the code in this repo follows the (non commercial) license of Inria as laid out in LICENSE.md 16 | 17 | If you're using this as part of the Dynamic 3D Gaussians code, just follow the installation instruction for that codebase. 18 | 19 | To install this stand-alone I have been doing the following (although I don't think this is necessarily the best way): 20 | ``` 21 | git clone git@github.com:git@github.com:JonathonLuiten/diff-gaussian-rasterization-w-depth.git 22 | cd diff-gaussian-rasterization-w-depth 23 | python setup.py install 24 | pip install . 25 | ``` 26 | 27 | Original readme below: 28 | 29 | # Differential Gaussian Rasterization 30 | 31 | Used as the rasterization engine for the paper "3D Gaussian Splatting for Real-Time Rendering of Radiance Fields". If you can make use of it in your own research, please be so kind to cite us. 32 | 33 |
34 |
35 |

BibTeX

36 |
@Article{kerbl3Dgaussians,
37 |       author       = {Kerbl, Bernhard and Kopanas, Georgios and Leimk{\"u}hler, Thomas and Drettakis, George},
38 |       title        = {3D Gaussian Splatting for Real-Time Radiance Field Rendering},
39 |       journal      = {ACM Transactions on Graphics},
40 |       number       = {4},
41 |       volume       = {42},
42 |       month        = {July},
43 |       year         = {2023},
44 |       url          = {https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/}
45 | }
46 |
47 |
-------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/cuda_rasterizer/auxiliary.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_AUXILIARY_H_INCLUDED 13 | #define CUDA_RASTERIZER_AUXILIARY_H_INCLUDED 14 | 15 | #include "config.h" 16 | #include "stdio.h" 17 | 18 | #define BLOCK_SIZE (BLOCK_X * BLOCK_Y) 19 | #define NUM_WARPS (BLOCK_SIZE/32) 20 | 21 | // Spherical harmonics coefficients 22 | __device__ const float SH_C0 = 0.28209479177387814f; 23 | __device__ const float SH_C1 = 0.4886025119029199f; 24 | __device__ const float SH_C2[] = { 25 | 1.0925484305920792f, 26 | -1.0925484305920792f, 27 | 0.31539156525252005f, 28 | -1.0925484305920792f, 29 | 0.5462742152960396f 30 | }; 31 | __device__ const float SH_C3[] = { 32 | -0.5900435899266435f, 33 | 2.890611442640554f, 34 | -0.4570457994644658f, 35 | 0.3731763325901154f, 36 | -0.4570457994644658f, 37 | 1.445305721320277f, 38 | -0.5900435899266435f 39 | }; 40 | 41 | __forceinline__ __device__ float ndc2Pix(float v, int S) 42 | { 43 | return ((v + 1.0) * S - 1.0) * 0.5; 44 | } 45 | 46 | __forceinline__ __device__ void getRect(const float2 p, int max_radius, uint2& rect_min, uint2& rect_max, dim3 grid) 47 | { 48 | rect_min = { 49 | min(grid.x, max((int)0, (int)((p.x - max_radius) / BLOCK_X))), 50 | min(grid.y, max((int)0, (int)((p.y - max_radius) / BLOCK_Y))) 51 | }; 52 | rect_max = { 53 | min(grid.x, max((int)0, (int)((p.x + max_radius + BLOCK_X - 1) / BLOCK_X))), 54 | min(grid.y, max((int)0, (int)((p.y + max_radius + BLOCK_Y - 1) / BLOCK_Y))) 55 | }; 56 | } 57 | 58 | __forceinline__ __device__ float3 transformPoint4x3(const float3& p, const float* matrix) 59 | { 60 | float3 transformed = { 61 | matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12], 62 | matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13], 63 | matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14], 64 | }; 65 | return transformed; 66 | } 67 | 68 | __forceinline__ __device__ float4 transformPoint4x4(const float3& p, const float* matrix) 69 | { 70 | float4 transformed = { 71 | matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12], 72 | matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13], 73 | matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14], 74 | matrix[3] * p.x + matrix[7] * p.y + matrix[11] * p.z + matrix[15] 75 | }; 76 | return transformed; 77 | } 78 | 79 | __forceinline__ __device__ float3 transformVec4x3(const float3& p, const float* matrix) 80 | { 81 | float3 transformed = { 82 | matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z, 83 | matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z, 84 | matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z, 85 | }; 86 | return transformed; 87 | } 88 | 89 | __forceinline__ __device__ float3 transformVec4x3Transpose(const float3& p, const float* matrix) 90 | { 91 | float3 transformed = { 92 | matrix[0] * p.x + matrix[1] * p.y + matrix[2] * p.z, 93 | matrix[4] * p.x + matrix[5] * p.y + matrix[6] * p.z, 94 | matrix[8] * p.x + matrix[9] * p.y + matrix[10] * p.z, 95 | }; 96 | return transformed; 97 | } 98 | 99 | __forceinline__ __device__ float dnormvdz(float3 v, float3 dv) 100 | { 101 | float sum2 = v.x * v.x + v.y * v.y + v.z * v.z; 102 | float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); 103 | float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32; 104 | return dnormvdz; 105 | } 106 | 107 | __forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv) 108 | { 109 | float sum2 = v.x * v.x + v.y * v.y + v.z * v.z; 110 | float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); 111 | 112 | float3 dnormvdv; 113 | dnormvdv.x = ((+sum2 - v.x * v.x) * dv.x - v.y * v.x * dv.y - v.z * v.x * dv.z) * invsum32; 114 | dnormvdv.y = (-v.x * v.y * dv.x + (sum2 - v.y * v.y) * dv.y - v.z * v.y * dv.z) * invsum32; 115 | dnormvdv.z = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32; 116 | return dnormvdv; 117 | } 118 | 119 | __forceinline__ __device__ float4 dnormvdv(float4 v, float4 dv) 120 | { 121 | float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w; 122 | float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); 123 | 124 | float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w }; 125 | float vdv_sum = vdv.x + vdv.y + vdv.z + vdv.w; 126 | float4 dnormvdv; 127 | dnormvdv.x = ((sum2 - v.x * v.x) * dv.x - v.x * (vdv_sum - vdv.x)) * invsum32; 128 | dnormvdv.y = ((sum2 - v.y * v.y) * dv.y - v.y * (vdv_sum - vdv.y)) * invsum32; 129 | dnormvdv.z = ((sum2 - v.z * v.z) * dv.z - v.z * (vdv_sum - vdv.z)) * invsum32; 130 | dnormvdv.w = ((sum2 - v.w * v.w) * dv.w - v.w * (vdv_sum - vdv.w)) * invsum32; 131 | return dnormvdv; 132 | } 133 | 134 | __forceinline__ __device__ float sigmoid(float x) 135 | { 136 | return 1.0f / (1.0f + expf(-x)); 137 | } 138 | 139 | __forceinline__ __device__ bool in_frustum(int idx, 140 | const float* orig_points, 141 | const float* viewmatrix, 142 | const float* projmatrix, 143 | const float z_threshold, 144 | bool prefiltered, 145 | float3& p_view) 146 | { 147 | float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] }; 148 | 149 | // Bring points to screen space 150 | float4 p_hom = transformPoint4x4(p_orig, projmatrix); 151 | float p_w = 1.0f / (p_hom.w + 0.0000001f); 152 | float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w }; 153 | p_view = transformPoint4x3(p_orig, viewmatrix); 154 | 155 | if (p_view.z <= z_threshold) // || ((p_proj.x < -1.3 || p_proj.x > 1.3 || p_proj.y < -1.3 || p_proj.y > 1.3))) 156 | { 157 | if (prefiltered) 158 | { 159 | printf("Point is filtered although prefiltered is set. This shouldn't happen!"); 160 | __trap(); 161 | } 162 | return false; 163 | } 164 | return true; 165 | } 166 | 167 | #endif -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/cuda_rasterizer/backward.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_BACKWARD_H_INCLUDED 13 | #define CUDA_RASTERIZER_BACKWARD_H_INCLUDED 14 | 15 | #include 16 | #include "cuda_runtime.h" 17 | #include "device_launch_parameters.h" 18 | #define GLM_FORCE_CUDA 19 | #include 20 | 21 | namespace BACKWARD 22 | { 23 | void render( 24 | const dim3 grid, dim3 block, 25 | const uint2* ranges, 26 | const uint32_t* point_list, 27 | int W, int H, 28 | const float* bg_color, 29 | const float2* means2D, 30 | const float4* conic_opacity, 31 | const float* colors, 32 | const float* final_Ts, 33 | const uint32_t* n_contrib, 34 | const float* dL_dpixels, 35 | float3* dL_dmean2D, 36 | float4* dL_dconic2D, 37 | float* dL_dopacity, 38 | float* dL_dcolors); 39 | 40 | void preprocess( 41 | int P, int D, int M, 42 | const float3* means, 43 | const int* radii, 44 | const float* shs, 45 | const bool* clamped, 46 | const glm::vec3* scales, 47 | const glm::vec4* rotations, 48 | const float scale_modifier, 49 | const float* cov3Ds, 50 | const float* view, 51 | const float* proj, 52 | const float focal_x, float focal_y, 53 | const float tan_fovx, float tan_fovy, 54 | const glm::vec3* campos, 55 | const float3* dL_dmean2D, 56 | const float* dL_dconics, 57 | glm::vec3* dL_dmeans, 58 | float* dL_dcolor, 59 | float* dL_dcov3D, 60 | float* dL_dsh, 61 | glm::vec3* dL_dscale, 62 | glm::vec4* dL_drot); 63 | } 64 | 65 | #endif -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/cuda_rasterizer/config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_CONFIG_H_INCLUDED 13 | #define CUDA_RASTERIZER_CONFIG_H_INCLUDED 14 | 15 | #define NUM_CHANNELS 3 // Default 3, RGB 16 | #define BLOCK_X 16 17 | #define BLOCK_Y 16 18 | 19 | #endif -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/cuda_rasterizer/forward.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_FORWARD_H_INCLUDED 13 | #define CUDA_RASTERIZER_FORWARD_H_INCLUDED 14 | 15 | #include 16 | #include "cuda_runtime.h" 17 | #include "device_launch_parameters.h" 18 | #define GLM_FORCE_CUDA 19 | #include 20 | 21 | namespace FORWARD 22 | { 23 | // Perform initial steps for each Gaussian prior to rasterization. 24 | void preprocess(int P, int D, int M, 25 | const float* orig_points, 26 | const glm::vec3* scales, 27 | const float scale_modifier, 28 | const glm::vec4* rotations, 29 | const float* opacities, 30 | const float* shs, 31 | bool* clamped, 32 | const float* cov3D_precomp, 33 | const float* colors_precomp, 34 | const float* viewmatrix, 35 | const float* projmatrix, 36 | const glm::vec3* cam_pos, 37 | const int W, int H, 38 | const float focal_x, float focal_y, 39 | const float tan_fovx, float tan_fovy, 40 | int* radii, 41 | float2* points_xy_image, 42 | float* depths, 43 | float* cov3Ds, 44 | float* colors, 45 | float4* conic_opacity, 46 | const dim3 grid, 47 | uint32_t* tiles_touched, 48 | bool prefiltered, 49 | const float z_threshold); 50 | 51 | // Main rasterization method. 52 | void render( 53 | const dim3 grid, dim3 block, 54 | const uint2* ranges, 55 | const uint32_t* point_list, 56 | int W, int H, 57 | const float2* points_xy_image, 58 | const float* features, 59 | const float4* conic_opacity, 60 | float* final_T, 61 | uint32_t* n_contrib, 62 | const float* bg_color, 63 | float* out_color, 64 | const float* depth, 65 | float* out_depth); 66 | } 67 | 68 | 69 | #endif -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/cuda_rasterizer/rasterizer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_H_INCLUDED 13 | #define CUDA_RASTERIZER_H_INCLUDED 14 | 15 | #include 16 | #include 17 | 18 | namespace CudaRasterizer 19 | { 20 | class Rasterizer 21 | { 22 | public: 23 | 24 | static void markVisible( 25 | int P, 26 | float* means3D, 27 | float* viewmatrix, 28 | float* projmatrix, 29 | bool* present); 30 | 31 | static int forward( 32 | std::function geometryBuffer, 33 | std::function binningBuffer, 34 | std::function imageBuffer, 35 | const int P, int D, int M, 36 | const float* background, 37 | const int width, int height, 38 | const float* means3D, 39 | const float* shs, 40 | const float* colors_precomp, 41 | const float* opacities, 42 | const float* scales, 43 | const float scale_modifier, 44 | const float* rotations, 45 | const float* cov3D_precomp, 46 | const float* viewmatrix, 47 | const float* projmatrix, 48 | const float* cam_pos, 49 | const float tan_fovx, float tan_fovy, 50 | const bool prefiltered, 51 | const float z_threshold, 52 | float* out_color, 53 | float* out_depth, 54 | int* radii = nullptr); 55 | 56 | static void backward( 57 | const int P, int D, int M, int R, 58 | const float* background, 59 | const int width, int height, 60 | const float* means3D, 61 | const float* shs, 62 | const float* colors_precomp, 63 | const float* scales, 64 | const float scale_modifier, 65 | const float* rotations, 66 | const float* cov3D_precomp, 67 | const float* viewmatrix, 68 | const float* projmatrix, 69 | const float* campos, 70 | const float tan_fovx, float tan_fovy, 71 | const int* radii, 72 | char* geom_buffer, 73 | char* binning_buffer, 74 | char* image_buffer, 75 | const float* dL_dpix, 76 | float* dL_dmean2D, 77 | float* dL_dconic, 78 | float* dL_dopacity, 79 | float* dL_dcolor, 80 | float* dL_dmean3D, 81 | float* dL_dcov3D, 82 | float* dL_dsh, 83 | float* dL_dscale, 84 | float* dL_drot); 85 | }; 86 | }; 87 | 88 | #endif -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/cuda_rasterizer/rasterizer_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #pragma once 13 | 14 | #include 15 | #include 16 | #include "rasterizer.h" 17 | #include 18 | #include 19 | 20 | namespace CudaRasterizer 21 | { 22 | template 23 | static void obtain(char*& chunk, T*& ptr, std::size_t count, std::size_t alignment) 24 | { 25 | std::size_t offset = (reinterpret_cast(chunk) + alignment - 1) & ~(alignment - 1); 26 | ptr = reinterpret_cast(offset); 27 | chunk = reinterpret_cast(ptr + count); 28 | } 29 | 30 | struct GeometryState 31 | { 32 | size_t scan_size; 33 | float* depths; 34 | char* scanning_space; 35 | bool* clamped; 36 | int* internal_radii; 37 | float2* means2D; 38 | float* cov3D; 39 | float4* conic_opacity; 40 | float* rgb; 41 | uint32_t* point_offsets; 42 | uint32_t* tiles_touched; 43 | 44 | static GeometryState fromChunk(char*& chunk, size_t P); 45 | }; 46 | 47 | struct ImageState 48 | { 49 | uint2* ranges; 50 | uint32_t* n_contrib; 51 | float* accum_alpha; 52 | 53 | static ImageState fromChunk(char*& chunk, size_t N); 54 | }; 55 | 56 | struct BinningState 57 | { 58 | size_t sorting_size; 59 | uint64_t* point_list_keys_unsorted; 60 | uint64_t* point_list_keys; 61 | uint32_t* point_list_unsorted; 62 | uint32_t* point_list; 63 | char* list_sorting_space; 64 | 65 | static BinningState fromChunk(char*& chunk, size_t P); 66 | }; 67 | 68 | template 69 | size_t required(size_t P) 70 | { 71 | char* size = nullptr; 72 | T::fromChunk(size, P); 73 | return ((size_t)size) + 128; 74 | } 75 | }; -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/diff_gaussian_rasterization/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | from typing import NamedTuple 13 | import torch.nn as nn 14 | import torch 15 | from . import _C 16 | 17 | def rasterize_gaussians( 18 | means3D, 19 | means2D, 20 | sh, 21 | colors_precomp, 22 | opacities, 23 | scales, 24 | rotations, 25 | cov3Ds_precomp, 26 | raster_settings, 27 | ): 28 | return _RasterizeGaussians.apply( 29 | means3D, 30 | means2D, 31 | sh, 32 | colors_precomp, 33 | opacities, 34 | scales, 35 | rotations, 36 | cov3Ds_precomp, 37 | raster_settings, 38 | ) 39 | 40 | class _RasterizeGaussians(torch.autograd.Function): 41 | @staticmethod 42 | def forward( 43 | ctx, 44 | means3D, 45 | means2D, 46 | sh, 47 | colors_precomp, 48 | opacities, 49 | scales, 50 | rotations, 51 | cov3Ds_precomp, 52 | raster_settings, 53 | ): 54 | 55 | # Restructure arguments the way that the C++ lib expects them 56 | args = ( 57 | raster_settings.bg, 58 | means3D, 59 | colors_precomp, 60 | opacities, 61 | scales, 62 | rotations, 63 | raster_settings.scale_modifier, 64 | cov3Ds_precomp, 65 | raster_settings.viewmatrix, 66 | raster_settings.projmatrix, 67 | raster_settings.tanfovx, 68 | raster_settings.tanfovy, 69 | raster_settings.image_height, 70 | raster_settings.image_width, 71 | sh, 72 | raster_settings.sh_degree, 73 | raster_settings.campos, 74 | raster_settings.prefiltered, 75 | raster_settings.z_threshold, 76 | ) 77 | 78 | # Invoke C++/CUDA rasterizer 79 | # num_rendered, color, radii, geomBuffer, binningBuffer, imgBuffer = _C.rasterize_gaussians(*args) 80 | num_rendered, color, radii, geomBuffer, binningBuffer, imgBuffer, depth = _C.rasterize_gaussians(*args) 81 | 82 | # Keep relevant tensors for backward 83 | ctx.raster_settings = raster_settings 84 | ctx.num_rendered = num_rendered 85 | ctx.save_for_backward(colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh, geomBuffer, binningBuffer, imgBuffer) 86 | return color, radii, depth 87 | 88 | @staticmethod 89 | def backward(ctx, grad_out_color, _, depth): 90 | 91 | # Restore necessary values from context 92 | num_rendered = ctx.num_rendered 93 | raster_settings = ctx.raster_settings 94 | colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh, geomBuffer, binningBuffer, imgBuffer = ctx.saved_tensors 95 | 96 | # Restructure args as C++ method expects them 97 | args = (raster_settings.bg, 98 | means3D, 99 | radii, 100 | colors_precomp, 101 | scales, 102 | rotations, 103 | raster_settings.scale_modifier, 104 | cov3Ds_precomp, 105 | raster_settings.viewmatrix, 106 | raster_settings.projmatrix, 107 | raster_settings.tanfovx, 108 | raster_settings.tanfovy, 109 | grad_out_color, 110 | sh, 111 | raster_settings.sh_degree, 112 | raster_settings.campos, 113 | geomBuffer, 114 | num_rendered, 115 | binningBuffer, 116 | imgBuffer) 117 | 118 | # Compute gradients for relevant tensors by invoking backward method 119 | grad_means2D, grad_colors_precomp, grad_opacities, grad_means3D, grad_cov3Ds_precomp, grad_sh, grad_scales, grad_rotations = _C.rasterize_gaussians_backward(*args) 120 | 121 | grads = ( 122 | grad_means3D, 123 | grad_means2D, 124 | grad_sh, 125 | grad_colors_precomp, 126 | grad_opacities, 127 | grad_scales, 128 | grad_rotations, 129 | grad_cov3Ds_precomp, 130 | None, 131 | ) 132 | 133 | return grads 134 | 135 | class GaussianRasterizationSettings(NamedTuple): 136 | image_height: int 137 | image_width: int 138 | tanfovx : float 139 | tanfovy : float 140 | bg : torch.Tensor 141 | scale_modifier : float 142 | viewmatrix : torch.Tensor 143 | projmatrix : torch.Tensor 144 | sh_degree : int 145 | campos : torch.Tensor 146 | prefiltered : bool 147 | z_threshold : float 148 | 149 | class GaussianRasterizer(nn.Module): 150 | def __init__(self, raster_settings): 151 | super().__init__() 152 | self.raster_settings = raster_settings 153 | 154 | def markVisible(self, positions): 155 | # Mark visible points (based on frustum culling for camera) with a boolean 156 | with torch.no_grad(): 157 | raster_settings = self.raster_settings 158 | visible = _C.mark_visible( 159 | positions, 160 | raster_settings.viewmatrix, 161 | raster_settings.projmatrix) 162 | 163 | return visible 164 | 165 | def forward(self, means3D, means2D, opacities, shs = None, colors_precomp = None, scales = None, rotations = None, cov3D_precomp = None): 166 | 167 | raster_settings = self.raster_settings 168 | 169 | if (shs is None and colors_precomp is None) or (shs is not None and colors_precomp is not None): 170 | raise Exception('Please provide excatly one of either SHs or precomputed colors!') 171 | 172 | if ((scales is None or rotations is None) and cov3D_precomp is None) or ((scales is not None or rotations is not None) and cov3D_precomp is not None): 173 | raise Exception('Please provide exactly one of either scale/rotation pair or precomputed 3D covariance!') 174 | 175 | if shs is None: 176 | shs = torch.Tensor([]) 177 | if colors_precomp is None: 178 | colors_precomp = torch.Tensor([]) 179 | 180 | if scales is None: 181 | scales = torch.Tensor([]) 182 | if rotations is None: 183 | rotations = torch.Tensor([]) 184 | if cov3D_precomp is None: 185 | cov3D_precomp = torch.Tensor([]) 186 | 187 | # Invoke C++/CUDA rasterization routine 188 | return rasterize_gaussians( 189 | means3D, 190 | means2D, 191 | shs, 192 | colors_precomp, 193 | opacities, 194 | scales, 195 | rotations, 196 | cov3D_precomp, 197 | raster_settings, 198 | ) 199 | 200 | -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/ext.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #include 13 | #include "rasterize_points.h" 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("rasterize_gaussians", &RasterizeGaussiansCUDA); 17 | m.def("rasterize_gaussians_backward", &RasterizeGaussiansBackwardCUDA); 18 | m.def("mark_visible", &markVisible); 19 | } -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/rasterize_points.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "cuda_rasterizer/config.h" 22 | #include "cuda_rasterizer/rasterizer.h" 23 | #include 24 | #include 25 | #include 26 | 27 | std::function resizeFunctional(torch::Tensor& t) { 28 | auto lambda = [&t](size_t N) { 29 | t.resize_({(long long)N}); 30 | return reinterpret_cast(t.contiguous().data_ptr()); 31 | }; 32 | return lambda; 33 | } 34 | 35 | std::tuple 36 | RasterizeGaussiansCUDA( 37 | const torch::Tensor& background, 38 | const torch::Tensor& means3D, 39 | const torch::Tensor& colors, 40 | const torch::Tensor& opacity, 41 | const torch::Tensor& scales, 42 | const torch::Tensor& rotations, 43 | const float scale_modifier, 44 | const torch::Tensor& cov3D_precomp, 45 | const torch::Tensor& viewmatrix, 46 | const torch::Tensor& projmatrix, 47 | const float tan_fovx, 48 | const float tan_fovy, 49 | const int image_height, 50 | const int image_width, 51 | const torch::Tensor& sh, 52 | const int degree, 53 | const torch::Tensor& campos, 54 | const bool prefiltered, 55 | const float z_threshold) 56 | { 57 | if (means3D.ndimension() != 2 || means3D.size(1) != 3) { 58 | AT_ERROR("means3D must have dimensions (num_points, 3)"); 59 | } 60 | 61 | const int P = means3D.size(0); 62 | const int H = image_height; 63 | const int W = image_width; 64 | 65 | auto int_opts = means3D.options().dtype(torch::kInt32); 66 | auto float_opts = means3D.options().dtype(torch::kFloat32); 67 | 68 | torch::Tensor out_color = torch::full({NUM_CHANNELS, H, W}, 0.0, float_opts); 69 | torch::Tensor radii = torch::full({P}, 0, means3D.options().dtype(torch::kInt32)); 70 | torch::Tensor out_depth = torch::full({1, H, W}, 0.0, float_opts); 71 | 72 | torch::Device device(torch::kCUDA); 73 | torch::TensorOptions options(torch::kByte); 74 | torch::Tensor geomBuffer = torch::empty({0}, options.device(device)); 75 | torch::Tensor binningBuffer = torch::empty({0}, options.device(device)); 76 | torch::Tensor imgBuffer = torch::empty({0}, options.device(device)); 77 | std::function geomFunc = resizeFunctional(geomBuffer); 78 | std::function binningFunc = resizeFunctional(binningBuffer); 79 | std::function imgFunc = resizeFunctional(imgBuffer); 80 | 81 | int rendered = 0; 82 | if(P != 0) 83 | { 84 | int M = 0; 85 | if(sh.size(0) != 0) 86 | { 87 | M = sh.size(1); 88 | } 89 | 90 | rendered = CudaRasterizer::Rasterizer::forward( 91 | geomFunc, 92 | binningFunc, 93 | imgFunc, 94 | P, degree, M, 95 | background.contiguous().data(), 96 | W, H, 97 | means3D.contiguous().data(), 98 | sh.contiguous().data_ptr(), 99 | colors.contiguous().data(), 100 | opacity.contiguous().data(), 101 | scales.contiguous().data_ptr(), 102 | scale_modifier, 103 | rotations.contiguous().data_ptr(), 104 | cov3D_precomp.contiguous().data(), 105 | viewmatrix.contiguous().data(), 106 | projmatrix.contiguous().data(), 107 | campos.contiguous().data(), 108 | tan_fovx, 109 | tan_fovy, 110 | prefiltered, 111 | z_threshold, 112 | out_color.contiguous().data(), 113 | out_depth.contiguous().data(), 114 | radii.contiguous().data()); 115 | } 116 | return std::make_tuple(rendered, out_color, radii, geomBuffer, binningBuffer, imgBuffer, out_depth); 117 | } 118 | 119 | std::tuple 120 | RasterizeGaussiansBackwardCUDA( 121 | const torch::Tensor& background, 122 | const torch::Tensor& means3D, 123 | const torch::Tensor& radii, 124 | const torch::Tensor& colors, 125 | const torch::Tensor& scales, 126 | const torch::Tensor& rotations, 127 | const float scale_modifier, 128 | const torch::Tensor& cov3D_precomp, 129 | const torch::Tensor& viewmatrix, 130 | const torch::Tensor& projmatrix, 131 | const float tan_fovx, 132 | const float tan_fovy, 133 | const torch::Tensor& dL_dout_color, 134 | const torch::Tensor& sh, 135 | const int degree, 136 | const torch::Tensor& campos, 137 | const torch::Tensor& geomBuffer, 138 | const int R, 139 | const torch::Tensor& binningBuffer, 140 | const torch::Tensor& imageBuffer) 141 | { 142 | const int P = means3D.size(0); 143 | const int H = dL_dout_color.size(1); 144 | const int W = dL_dout_color.size(2); 145 | 146 | int M = 0; 147 | if(sh.size(0) != 0) 148 | { 149 | M = sh.size(1); 150 | } 151 | 152 | torch::Tensor dL_dmeans3D = torch::zeros({P, 3}, means3D.options()); 153 | torch::Tensor dL_dmeans2D = torch::zeros({P, 3}, means3D.options()); 154 | torch::Tensor dL_dcolors = torch::zeros({P, NUM_CHANNELS}, means3D.options()); 155 | torch::Tensor dL_dconic = torch::zeros({P, 2, 2}, means3D.options()); 156 | torch::Tensor dL_dopacity = torch::zeros({P, 1}, means3D.options()); 157 | torch::Tensor dL_dcov3D = torch::zeros({P, 6}, means3D.options()); 158 | torch::Tensor dL_dsh = torch::zeros({P, M, 3}, means3D.options()); 159 | torch::Tensor dL_dscales = torch::zeros({P, 3}, means3D.options()); 160 | torch::Tensor dL_drotations = torch::zeros({P, 4}, means3D.options()); 161 | 162 | if(P != 0) 163 | { 164 | CudaRasterizer::Rasterizer::backward(P, degree, M, R, 165 | background.contiguous().data(), 166 | W, H, 167 | means3D.contiguous().data(), 168 | sh.contiguous().data(), 169 | colors.contiguous().data(), 170 | scales.data_ptr(), 171 | scale_modifier, 172 | rotations.data_ptr(), 173 | cov3D_precomp.contiguous().data(), 174 | viewmatrix.contiguous().data(), 175 | projmatrix.contiguous().data(), 176 | campos.contiguous().data(), 177 | tan_fovx, 178 | tan_fovy, 179 | radii.contiguous().data(), 180 | reinterpret_cast(geomBuffer.contiguous().data_ptr()), 181 | reinterpret_cast(binningBuffer.contiguous().data_ptr()), 182 | reinterpret_cast(imageBuffer.contiguous().data_ptr()), 183 | dL_dout_color.contiguous().data(), 184 | dL_dmeans2D.contiguous().data(), 185 | dL_dconic.contiguous().data(), 186 | dL_dopacity.contiguous().data(), 187 | dL_dcolors.contiguous().data(), 188 | dL_dmeans3D.contiguous().data(), 189 | dL_dcov3D.contiguous().data(), 190 | dL_dsh.contiguous().data(), 191 | dL_dscales.contiguous().data(), 192 | dL_drotations.contiguous().data()); 193 | } 194 | 195 | return std::make_tuple(dL_dmeans2D, dL_dcolors, dL_dopacity, dL_dmeans3D, dL_dcov3D, dL_dsh, dL_dscales, dL_drotations); 196 | } 197 | 198 | torch::Tensor markVisible( 199 | torch::Tensor& means3D, 200 | torch::Tensor& viewmatrix, 201 | torch::Tensor& projmatrix) 202 | { 203 | const int P = means3D.size(0); 204 | 205 | torch::Tensor present = torch::full({P}, false, means3D.options().dtype(at::kBool)); 206 | 207 | if(P != 0) 208 | { 209 | CudaRasterizer::Rasterizer::markVisible(P, 210 | means3D.contiguous().data(), 211 | viewmatrix.contiguous().data(), 212 | projmatrix.contiguous().data(), 213 | present.contiguous().data()); 214 | } 215 | 216 | return present; 217 | } -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/rasterize_points.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #pragma once 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | std::tuple 19 | RasterizeGaussiansCUDA( 20 | const torch::Tensor& background, 21 | const torch::Tensor& means3D, 22 | const torch::Tensor& colors, 23 | const torch::Tensor& opacity, 24 | const torch::Tensor& scales, 25 | const torch::Tensor& rotations, 26 | const float scale_modifier, 27 | const torch::Tensor& cov3D_precomp, 28 | const torch::Tensor& viewmatrix, 29 | const torch::Tensor& projmatrix, 30 | const float tan_fovx, 31 | const float tan_fovy, 32 | const int image_height, 33 | const int image_width, 34 | const torch::Tensor& sh, 35 | const int degree, 36 | const torch::Tensor& campos, 37 | const bool prefiltered, 38 | const float z_threshold); 39 | 40 | std::tuple 41 | RasterizeGaussiansBackwardCUDA( 42 | const torch::Tensor& background, 43 | const torch::Tensor& means3D, 44 | const torch::Tensor& radii, 45 | const torch::Tensor& colors, 46 | const torch::Tensor& scales, 47 | const torch::Tensor& rotations, 48 | const float scale_modifier, 49 | const torch::Tensor& cov3D_precomp, 50 | const torch::Tensor& viewmatrix, 51 | const torch::Tensor& projmatrix, 52 | const float tan_fovx, 53 | const float tan_fovy, 54 | const torch::Tensor& dL_dout_color, 55 | const torch::Tensor& sh, 56 | const int degree, 57 | const torch::Tensor& campos, 58 | const torch::Tensor& geomBuffer, 59 | const int R, 60 | const torch::Tensor& binningBuffer, 61 | const torch::Tensor& imageBuffer); 62 | 63 | torch::Tensor markVisible( 64 | torch::Tensor& means3D, 65 | torch::Tensor& viewmatrix, 66 | torch::Tensor& projmatrix); -------------------------------------------------------------------------------- /third-party/diff-gaussian-rasterization-w-depth/setup.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | from setuptools import setup 13 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension 14 | import os 15 | os.path.dirname(os.path.abspath(__file__)) 16 | 17 | setup( 18 | name="diff_gaussian_rasterization", 19 | packages=['diff_gaussian_rasterization'], 20 | ext_modules=[ 21 | CUDAExtension( 22 | name="diff_gaussian_rasterization._C", 23 | sources=[ 24 | "cuda_rasterizer/rasterizer_impl.cu", 25 | "cuda_rasterizer/forward.cu", 26 | "cuda_rasterizer/backward.cu", 27 | "rasterize_points.cu", 28 | "ext.cpp"], 29 | extra_compile_args={"nvcc": ["-I" + os.path.join(os.path.dirname(os.path.abspath(__file__)), "third_party/glm/")]}) 30 | ], 31 | cmdclass={ 32 | 'build_ext': BuildExtension 33 | } 34 | ) 35 | --------------------------------------------------------------------------------