├── .gitignore
├── LICENSE
├── README.md
├── experiments
    ├── .root
    ├── assets
    │   └── robots
    │   │   └── xarm7
    │   │       ├── meshes
    │   │           ├── collision
    │   │           │   ├── link1.obj
    │   │           │   ├── link1.obj.convex.stl
    │   │           │   ├── link2.obj
    │   │           │   ├── link2.obj.convex.stl
    │   │           │   ├── link3.obj
    │   │           │   ├── link3.obj.convex.stl
    │   │           │   ├── link4.obj
    │   │           │   ├── link4.obj.convex.stl
    │   │           │   ├── link5.obj
    │   │           │   ├── link5.obj.convex.stl
    │   │           │   ├── link6.obj
    │   │           │   ├── link6.obj.convex.stl
    │   │           │   ├── link7.obj
    │   │           │   ├── link7.obj.convex.stl
    │   │           │   ├── link_base.obj
    │   │           │   └── link_base.obj.convex.stl
    │   │           └── visual
    │   │           │   ├── link1.glb
    │   │           │   ├── link2.glb
    │   │           │   ├── link3.glb
    │   │           │   ├── link4.glb
    │   │           │   ├── link5.glb
    │   │           │   ├── link6.glb
    │   │           │   ├── link7.glb
    │   │           │   └── link_base.glb
    │   │       └── xarm7.urdf
    ├── cfg
    │   ├── default.yaml
    │   ├── model
    │   │   ├── friction
    │   │   │   └── default.yaml
    │   │   ├── material
    │   │   │   └── pointnet_nerf_material.yaml
    │   │   └── pgnd.yaml
    │   ├── render
    │   │   └── default.yaml
    │   ├── sim
    │   │   └── default.yaml
    │   └── train
    │   │   └── default.yaml
    ├── real_world
    │   ├── calibrate.py
    │   ├── camera
    │   │   ├── __init__.py
    │   │   ├── multi_realsense.py
    │   │   ├── shared_memory
    │   │   │   ├── __init__.py
    │   │   │   ├── shared_memory_queue.py
    │   │   │   ├── shared_memory_ring_buffer.py
    │   │   │   ├── shared_memory_util.py
    │   │   │   └── shared_ndarray.py
    │   │   ├── single_realsense.py
    │   │   └── utils.py
    │   ├── gs
    │   │   ├── config
    │   │   │   └── default.yaml
    │   │   ├── convert.py
    │   │   ├── external.py
    │   │   ├── helpers.py
    │   │   ├── sh_utils.py
    │   │   ├── train_utils.py
    │   │   └── trainer.py
    │   ├── modules_planning
    │   │   ├── __init__.py
    │   │   ├── common
    │   │   │   ├── communication.py
    │   │   │   └── xarm.py
    │   │   ├── dynamics_module.py
    │   │   ├── kinematics_utils.py
    │   │   ├── planning_env.py
    │   │   ├── planning_module.py
    │   │   ├── segment_perception.py
    │   │   ├── udp_util.py
    │   │   └── xarm_controller.py
    │   ├── modules_teleop
    │   │   ├── __init__.py
    │   │   ├── perception.py
    │   │   ├── recorder.py
    │   │   ├── robot_env_teleop.py
    │   │   └── teleop_keyboard.py
    │   ├── plan.py
    │   ├── postprocess.py
    │   ├── reconstruct_gs.py
    │   ├── teleop.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── env_utils.py
    │   │   ├── gradio_utils.py
    │   │   ├── pcd_utils.py
    │   │   ├── planning_utils.py
    │   │   ├── render_utils.py
    │   │   └── track_utils.py
    ├── scripts
    │   ├── train_box.sh
    │   ├── train_bread.sh
    │   ├── train_cloth.sh
    │   ├── train_paperbag.sh
    │   ├── train_rope.sh
    │   └── train_sloth.sh
    └── train
    │   ├── __init__.py
    │   ├── eval.py
    │   ├── gs.py
    │   ├── metric.py
    │   ├── metric_eval.py
    │   ├── pv_dataset.py
    │   ├── pv_train.py
    │   ├── pv_utils.py
    │   └── train_eval.py
├── imgs
    └── teaser.png
├── pgnd
    ├── __init__.py
    ├── data
    │   ├── __init__.py
    │   ├── dataset.py
    │   └── dataset_gripper.py
    ├── ffmpeg.py
    ├── material
    │   ├── __init__.py
    │   ├── network
    │   │   ├── __init__.py
    │   │   ├── nerf.py
    │   │   ├── pointnet.py
    │   │   └── utils.py
    │   └── pgnd.py
    ├── sim
    │   ├── __init__.py
    │   ├── friction.py
    │   ├── model.py
    │   ├── sim.py
    │   └── utils.py
    └── utils.py
├── requirements.txt
└── third-party
    └── diff-gaussian-rasterization-w-depth
        ├── .gitignore
        ├── .gitmodules
        ├── CMakeLists.txt
        ├── LICENSE.md
        ├── README.md
        ├── cuda_rasterizer
            ├── auxiliary.h
            ├── backward.cu
            ├── backward.h
            ├── config.h
            ├── forward.cu
            ├── forward.h
            ├── rasterizer.h
            ├── rasterizer_impl.cu
            └── rasterizer_impl.h
        ├── diff_gaussian_rasterization
            └── __init__.py
        ├── ext.cpp
        ├── rasterize_points.cu
        ├── rasterize_points.h
        └── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Mac OS
  2 | 
  3 | # General
  4 | .DS_Store
  5 | .AppleDouble
  6 | .LSOverride
  7 | 
  8 | # Icon must end with two \r
  9 | Icon
 10 | 
 11 | # Thumbnails
 12 | ._*
 13 | 
 14 | # Files that might appear in the root of a volume
 15 | .DocumentRevisions-V100
 16 | .fseventsd
 17 | .Spotlight-V100
 18 | .TemporaryItems
 19 | .Trashes
 20 | .VolumeIcon.icns
 21 | .com.apple.timemachine.donotpresent
 22 | 
 23 | # Directories potentially created on remote AFP share
 24 | .AppleDB
 25 | .AppleDesktop
 26 | Network Trash Folder
 27 | Temporary Items
 28 | .apdisk
 29 | 
 30 | ## Vim
 31 | 
 32 | # Swap
 33 | [._]*.s[a-v][a-z]
 34 | !*.svg  # comment out if you don't need vector files
 35 | [._]*.sw[a-p]
 36 | [._]s[a-rt-v][a-z]
 37 | [._]ss[a-gi-z]
 38 | [._]sw[a-p]
 39 | 
 40 | # Session
 41 | Session.vim
 42 | Sessionx.vim
 43 | 
 44 | # Temporary
 45 | .netrwhist
 46 | *~
 47 | # Auto-generated tag files
 48 | tags
 49 | # Persistent undo
 50 | [._]*.un~
 51 | 
 52 | ## Linux
 53 | 
 54 | *~
 55 | 
 56 | # temporary files which can be created if a process still has a handle open of a deleted file
 57 | .fuse_hidden*
 58 | 
 59 | # KDE directory preferences
 60 | .directory
 61 | 
 62 | # Linux trash folder which might appear on any partition or disk
 63 | .Trash-*
 64 | 
 65 | # .nfs files are created when an open file is removed but is still being accessed
 66 | .nfs*
 67 | 
 68 | ## Python
 69 | 
 70 | # Byte-compiled / optimized / DLL files
 71 | __pycache__/
 72 | *.py[cod]
 73 | *$py.class
 74 | 
 75 | # C extensions
 76 | *.so
 77 | 
 78 | # Distribution / packaging
 79 | .Python
 80 | build/
 81 | develop-eggs/
 82 | dist/
 83 | downloads/
 84 | eggs/
 85 | .eggs/
 86 | lib/
 87 | lib64/
 88 | parts/
 89 | sdist/
 90 | var/
 91 | wheels/
 92 | pip-wheel-metadata/
 93 | share/python-wheels/
 94 | *.egg-info/
 95 | .installed.cfg
 96 | *.egg
 97 | MANIFEST
 98 | 
 99 | # PyInstaller
100 | #  Usually these files are written by a python script from a template
101 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
102 | *.manifest
103 | *.spec
104 | 
105 | # Installer logs
106 | pip-log.txt
107 | pip-delete-this-directory.txt
108 | 
109 | # Unit test / coverage reports
110 | htmlcov/
111 | .tox/
112 | .nox/
113 | .coverage
114 | .coverage.*
115 | .cache
116 | nosetests.xml
117 | coverage.xml
118 | *.cover
119 | *.py,cover
120 | .hypothesis/
121 | .pytest_cache/
122 | 
123 | # Translations
124 | *.mo
125 | *.pot
126 | 
127 | # Django stuff:
128 | *.log
129 | local_settings.py
130 | db.sqlite3
131 | db.sqlite3-journal
132 | 
133 | # Flask stuff:
134 | instance/
135 | .webassets-cache
136 | 
137 | # Scrapy stuff:
138 | .scrapy
139 | 
140 | # Sphinx documentation
141 | docs/_build/
142 | 
143 | # PyBuilder
144 | target/
145 | 
146 | # Jupyter Notebook
147 | .ipynb_checkpoints
148 | 
149 | # IPython
150 | profile_default/
151 | ipython_config.py
152 | 
153 | # pyenv
154 | .python-version
155 | 
156 | # pipenv
157 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
158 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
159 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
160 | #   install all needed dependencies.
161 | #Pipfile.lock
162 | 
163 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
164 | __pypackages__/
165 | 
166 | # Celery stuff
167 | celerybeat-schedule
168 | celerybeat.pid
169 | 
170 | # SageMath parsed files
171 | *.sage.py
172 | 
173 | # Environments
174 | .env
175 | .venv
176 | # env/
177 | venv/
178 | ENV/
179 | env.bak/
180 | venv.bak/
181 | 
182 | # Spyder project settings
183 | .spyderproject
184 | .spyproject
185 | 
186 | # Rope project settings
187 | .ropeproject
188 | 
189 | # mkdocs documentation
190 | /site
191 | 
192 | # mypy
193 | .mypy_cache/
194 | .dmypy.json
195 | dmypy.json
196 | 
197 | # Pyre type checker
198 | .pyre/
199 | 
200 | ## VS Code
201 | /.vscode
202 | 
203 | ## wandb
204 | /wandb
205 | 
206 | ## gradio
207 | .gradio
208 | 
209 | log/
210 | weights/
211 | 
212 | /third-party/*
213 | !/third-party/diff-gaussian-rasterization-w-depth
214 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2011-2024 GitHub Inc.
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Particle-Grid Neural Dynamics for Learning Deformable Object Models from RGB-D Videos
  2 | 
  3 | <span class="author-block">
  4 | <a target="_blank" href="https://kywind.github.io/">Kaifeng Zhang</a><sup>1</sup>,
  5 | </span>
  6 | <span class="author-block">
  7 | <a target="_blank" href="https://boey-li.github.io/">Baoyu Li</a><sup>2</sup>,
  8 | </span>
  9 | <span class="author-block">
 10 | <a target="_blank" href="https://kkhauser.web.illinois.edu/">Kris Hauser</a><sup>2</sup>,
 11 | </span>
 12 | <span class="author-block">
 13 | <a target="_blank" href="https://yunzhuli.github.io/">Yunzhu Li</a><sup>1</sup>
 14 | </span>
 15 | 
 16 | <span class="author-block"><sup>1</sup>Columbia University,</span>
 17 | <span class="author-block"><sup>2</sup>University of Illinois Urbana-Champaign</span>
 18 | 
 19 | [Website](https://kywind.github.io/pgnd/) |
 20 | [Paper](https://www.roboticsproceedings.org/rss21/p036.pdf)
 21 | 
 22 | <img src="imgs/teaser.png" width="100%"/>
 23 | 
 24 | ## Interactive Demo
 25 | We provide an [interactive demo](https://huggingface.co/spaces/kaifz/pgnd) in Huggingface Spaces.
 26 | 
 27 | ## Installation
 28 | 
 29 | ### Prerequisite
 30 | 
 31 | We recommend installing the latest version of CUDA (12.x) and PyTorch. The CUDA version used to compile PyTorch should be the same as the system's CUDA version to enable installation of the ```diff_gaussian_rasterizer``` package.
 32 | 
 33 | ### Setup an environment
 34 | 
 35 | 1. Prepare python environment
 36 | ```
 37 | conda create -n pgnd python=3.10
 38 | conda activate pgnd
 39 | ```
 40 | 
 41 | 2. Install PyTorch: [https://pytorch.org/get-started/locally/](https://pytorch.org/get-started/locally/)
 42 | ```
 43 | conda install pytorch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 pytorch-cuda=12.4 -c pytorch -c nvidia
 44 | ```
 45 | 
 46 | 3. Install other packages:
 47 | ```
 48 | pip install -r requirements.txt
 49 | conda install -c dglteam/label/th24_cu124 dgl
 50 | conda install conda-forge::ffmpeg
 51 | ```
 52 | 
 53 | 4. Install diff_gaussian_rasterization:
 54 | ```
 55 | cd third_party/diff-gaussian-rasterization-w-depth
 56 | pip install -e .
 57 | ```
 58 | 
 59 | ## Data and Checkpoints
 60 | The dataset and pre-trained checkpoint files could be downloaded from [this link](https://drive.google.com/drive/folders/10E0gpETbwA8d1XntIsP0fgWGHipndAe8?usp=sharing). 
 61 | 
 62 | For the dataset, We provide the full training and evaluation datasets for all six categories. The dataset is stored as a zip file for each category, e.g. for box, all the data are stored in data_box.zip. The files should be unzipped and organized as the following (take box and sloth as examples; suppose the data for these two categories are downloaded):
 63 | ```
 64 | - experiments/log/data
 65 |   - 0112_box_processed
 66 |   - 0112_box2_processed
 67 |   - 0112_box3_processed
 68 |   - 1018_sloth_processed
 69 |   - box_merged
 70 |   - sloth_merged
 71 |   ...
 72 | ```
 73 | For the checkpoints, the files should be unzipped and organized as the following:
 74 | ```
 75 | - experiments/log
 76 |   - box
 77 |     - train
 78 |       - ckpts
 79 |         - 100000.pt
 80 |       - hydra.yaml
 81 |   - sloth
 82 |     - train
 83 |       - ckpts
 84 |         - 100000.pt
 85 |       - hydra.yaml
 86 |   ...
 87 | ```
 88 | The path needs to match exactly for training and inference scripts to work. If you need to use data in a different format, you may need to directly modify the code to accomodate.
 89 | 
 90 | ## Custom Dataset
 91 | 
 92 | For processing data from raw RGB-D recordings, the following pre-trained detection, segmentation, and tracking models are required. These can be installed by:
 93 | ```
 94 | pip install iopath
 95 | pip install segment-anything
 96 | pip install --no-deps git+https://github.com/IDEA-Research/GroundingDINO
 97 | pip install --no-deps git+https://github.com/facebookresearch/sam2
 98 | pip install --no-deps git+https://github.com/facebookresearch/co-tracker
 99 | ```
100 | And the weights can be downloaded from [this link](https://drive.google.com/drive/folders/1Z70RgW7oiTIfvdk_qOOtqDGsMFQ0jimY?usp=sharing). Extract files and put them in the ```weights/``` folder.
101 | 
102 | The raw data should contain multi-view rgb and depth image recordings, and robot end-effector translation/rotation/gripper openness recordings. An example of raw data is available at 
103 | [this link](https://drive.google.com/drive/folders/1QQ2ftIJZWFGzBeFg3bxqy6ktsUgmsHts?usp=drive_link) and could be extracted to ```experiments/log/data/cloth_test```. 
104 | 
105 | For data processing, the following command runs the data processing code and generates the processed dataset with point tracks:
106 | ```
107 | python experiments/real_world/postprocess.py
108 | ```
109 | 
110 | ## Training
111 | Once the datasets are prepared, we provide training scripts in the ```experiments/scripts``` folder. 
112 | ```
113 | bash experiments/scripts/train_<material_name>.sh
114 | ```
115 | Training could take several hours on a single GPU with memory >= 24GB. It is possible to plot training loss and visualize predictions during validation in wandb with cfg.debug=False.
116 | 
117 | ## Inference
118 | 
119 | ### Eval
120 | Use the following command to evaluate the trained policy on the evaluation dataset and verify its performance. 
121 | The file can also produce 3D Gaussian renderings and particle visualizations when the ```--state_only``` flag is removed
122 | and the source dataset contains reconstructed 3D Gaussians stored as ```.splat``` files (e.g. as in ```experiments/log/data/1018_sloth_processed/episode_0002/gs```).
123 | 
124 | ```
125 | python experiments/train/eval.py --task <material_name> --state_only
126 | ```
127 | Note: in the current dataset, some categories still lack GS reconstructions, hence evaluation can only include state-based metrics. The code for reconstruction will be released soon.
128 | 
129 | ### Planning
130 | It is possible to perform model-based planning for manipulation tasks using the learned dynamics model by running
131 | ```
132 | python experiments/real_world/plan.py --config <model_yaml_path> --text_prompts <prompts>
133 | ```
134 | This requires building a xArm robot setup with realsense cameras, and calibrating them by
135 | ```
136 | python experiments/real_world/calibrate.py [--calibrate/--calibrate_bimanual]
137 | ```
138 | The calibration board needs to be put in a specific position relative to the robot to fix the robot-to-board transformation, and the camera-to-board transformations are processed using OpenCV detection algorithms. For questions about real robot setups, please feel free to reach out to the first author of the paper. We are working to release a more detailed instruction on real robot experiments soon.
139 | 
140 | 
141 | ## Citation
142 | If you find this repo useful for your research, please consider citing our paper
143 | ```
144 | @inproceedings{zhang2025particle,
145 |   title={Particle-Grid Neural Dynamics for Learning Deformable Object Models from RGB-D Videos},
146 |   author={Zhang, Kaifeng and Li, Baoyu and Hauser, Kris and Li, Yunzhu},
147 |   booktitle={Proceedings of Robotics: Science and Systems (RSS)},
148 |   year={2025}
149 | }
150 | ```


--------------------------------------------------------------------------------
/experiments/.root:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/.root


--------------------------------------------------------------------------------
/experiments/assets/robots/xarm7/meshes/visual/link1.glb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link1.glb


--------------------------------------------------------------------------------
/experiments/assets/robots/xarm7/meshes/visual/link2.glb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link2.glb


--------------------------------------------------------------------------------
/experiments/assets/robots/xarm7/meshes/visual/link3.glb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link3.glb


--------------------------------------------------------------------------------
/experiments/assets/robots/xarm7/meshes/visual/link4.glb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link4.glb


--------------------------------------------------------------------------------
/experiments/assets/robots/xarm7/meshes/visual/link5.glb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link5.glb


--------------------------------------------------------------------------------
/experiments/assets/robots/xarm7/meshes/visual/link6.glb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link6.glb


--------------------------------------------------------------------------------
/experiments/assets/robots/xarm7/meshes/visual/link7.glb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link7.glb


--------------------------------------------------------------------------------
/experiments/assets/robots/xarm7/meshes/visual/link_base.glb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/assets/robots/xarm7/meshes/visual/link_base.glb


--------------------------------------------------------------------------------
/experiments/assets/robots/xarm7/xarm7.urdf:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <!--GLB Source: https://github.com/sapien-sim/xarm7/tree/master/sapien_xarm7/xarm_urdf/xarm_description/meshes-->
  3 | <robot name="xarm7">
  4 |   <link name="link_base">
  5 |     <inertial>
  6 |       <origin rpy="0 0 0" xyz="-0.021131 -0.0016302 0.056488"/>
  7 |       <mass value="0.88556"/>
  8 |       <inertia ixx="0.0030595" ixy="0.00012259" ixz="-0.00062705" iyy="0.0037783" iyz="0.00027023" izz="0.0020125"/>
  9 |     </inertial>
 10 |     <visual>
 11 |       <geometry>
 12 |         <mesh filename="meshes/visual/link_base.glb"/>
 13 |       </geometry>
 14 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 15 |     </visual>
 16 |     <collision>
 17 |       <geometry>
 18 |         <mesh filename="meshes/collision/link_base.obj"/>
 19 |       </geometry>
 20 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 21 |     </collision>
 22 |   </link>
 23 |   <link name="link1">
 24 |     <inertial>
 25 |       <origin rpy="0 0 0" xyz="-0.0002 0.02905 -0.01233"/>
 26 |       <mass value="2.382"/>
 27 |       <inertia ixx="0.0056905" ixy="-1.579e-05" ixz="5.125e-06" iyy="0.0049566" iyz="-0.000873378" izz="0.003316654"/>
 28 |     </inertial>
 29 |     <visual>
 30 |       <geometry>
 31 |         <mesh filename="meshes/visual/link1.glb"/>
 32 |       </geometry>
 33 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 34 |     </visual>
 35 |     <collision>
 36 |       <geometry>
 37 |         <mesh filename="meshes/collision/link1.obj"/>
 38 |       </geometry>
 39 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 40 |     </collision>
 41 |   </link>
 42 |   <joint name="joint1" type="revolute">
 43 |     <origin rpy="0 0 0" xyz="0 0 0.267"/>
 44 |     <parent link="link_base"/>
 45 |     <child link="link1"/>
 46 |     <axis xyz="0 0 1"/>
 47 |     <limit effort="50" lower="-6.28318530718" upper="6.28318530718" velocity="3.14"/>
 48 |     <dynamics damping="10" friction="1"/>
 49 |   </joint>
 50 |   <link name="link2">
 51 |     <inertial>
 52 |       <origin rpy="0 0 0" xyz="0.00022 -0.12856 0.01735"/>
 53 |       <mass value="1.869"/>
 54 |       <inertia ixx="0.0095989" ixy="1.541e-06" ixz="5.56e-06" iyy="0.00382472" iyz="-0.00317156" izz="0.007565669"/>
 55 |     </inertial>
 56 |     <visual>
 57 |       <geometry>
 58 |         <mesh filename="meshes/visual/link2.glb"/>
 59 |       </geometry>
 60 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 61 |     </visual>
 62 |     <collision>
 63 |       <geometry>
 64 |         <mesh filename="meshes/collision/link2.obj"/>
 65 |       </geometry>
 66 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 67 |     </collision>
 68 |   </link>
 69 |   <joint name="joint2" type="revolute">
 70 |     <origin rpy="-1.5708 0 0" xyz="0 0 0"/>
 71 |     <parent link="link1"/>
 72 |     <child link="link2"/>
 73 |     <axis xyz="0 0 1"/>
 74 |     <limit effort="50" lower="-2.059" upper="2.0944" velocity="3.14"/>
 75 |     <dynamics damping="10" friction="1"/>
 76 |   </joint>
 77 |   <link name="link3">
 78 |     <inertial>
 79 |       <origin rpy="0 0 0" xyz="0.0466 -0.02463 -0.00768"/>
 80 |       <mass value="1.6383"/>
 81 |       <inertia ixx="0.00310955" ixy="0.00030837" ixz="-0.00058453" iyy="0.00264483" iyz="0.000338893" izz="0.0026624"/>
 82 |     </inertial>
 83 |     <visual>
 84 |       <geometry>
 85 |         <mesh filename="meshes/visual/link3.glb"/>
 86 |       </geometry>
 87 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 88 |     </visual>
 89 |     <collision>
 90 |       <geometry>
 91 |         <mesh filename="meshes/collision/link3.obj"/>
 92 |       </geometry>
 93 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 94 |     </collision>
 95 |   </link>
 96 |   <joint name="joint3" type="revolute">
 97 |     <origin rpy="1.5708 0 0" xyz="0 -0.293 0"/>
 98 |     <parent link="link2"/>
 99 |     <child link="link3"/>
100 |     <axis xyz="0 0 1"/>
101 |     <limit effort="30" lower="-6.28318530718" upper="6.28318530718" velocity="3.14"/>
102 |     <dynamics damping="5" friction="1"/>
103 |   </joint>
104 |   <link name="link4">
105 |     <inertial>
106 |       <origin rpy="0 0 0" xyz="0.07047 -0.11575 0.012"/>
107 |       <mass value="1.7269"/>
108 |       <inertia ixx="0.005889" ixy="0.00137112" ixz="0.00088143" iyy="0.00359703" iyz="-0.001762155" izz="0.00543244"/>
109 |     </inertial>
110 |     <visual>
111 |       <geometry>
112 |         <mesh filename="meshes/visual/link4.glb"/>
113 |       </geometry>
114 |       <origin rpy="0 0 0" xyz="0 0 0"/>
115 |     </visual>
116 |     <collision>
117 |       <geometry>
118 |         <mesh filename="meshes/collision/link4.obj"/>
119 |       </geometry>
120 |       <origin rpy="0 0 0" xyz="0 0 0"/>
121 |     </collision>
122 |   </link>
123 |   <joint name="joint4" type="revolute">
124 |     <origin rpy="1.5708 0 0" xyz="0.0525 0 0"/>
125 |     <parent link="link3"/>
126 |     <child link="link4"/>
127 |     <axis xyz="0 0 1"/>
128 |     <limit effort="30" lower="-0.19198" upper="3.927" velocity="3.14"/>
129 |     <dynamics damping="5" friction="1"/>
130 |   </joint>
131 |   <link name="link5">
132 |     <inertial>
133 |       <origin rpy="0 0 0" xyz="-0.00032 0.01604 -0.026"/>
134 |       <mass value="1.3203"/>
135 |       <inertia ixx="0.00534665" ixy="1.5117e-05" ixz="-3.69e-07" iyy="0.0049779" iyz="-0.00022132" izz="0.0013624"/>
136 |     </inertial>
137 |     <visual>
138 |       <geometry>
139 |         <mesh filename="meshes/visual/link5.glb"/>
140 |       </geometry>
141 |       <origin rpy="0 0 0" xyz="0 0 0"/>
142 |     </visual>
143 |     <collision>
144 |       <geometry>
145 |         <mesh filename="meshes/collision/link5.obj"/>
146 |       </geometry>
147 |       <origin rpy="0 0 0" xyz="0 0 0"/>
148 |     </collision>
149 |   </link>
150 |   <joint name="joint5" type="revolute">
151 |     <origin rpy="1.5708 0 0" xyz="0.0775 -0.3425 0"/>
152 |     <parent link="link4"/>
153 |     <child link="link5"/>
154 |     <axis xyz="0 0 1"/>
155 |     <limit effort="30" lower="-6.28318530718" upper="6.28318530718" velocity="3.14"/>
156 |     <dynamics damping="5" friction="1"/>
157 |   </joint>
158 |   <link name="link6">
159 |     <inertial>
160 |       <origin rpy="0 0 0" xyz="0.06469 0.03278 0.02141"/>
161 |       <mass value="1.325"/>
162 |       <inertia ixx="0.0014745" ixy="-0.000488" ixz="0.0002953" iyy="0.0019037" iyz="0.00014749" izz="0.0023652"/>
163 |     </inertial>
164 |     <visual>
165 |       <geometry>
166 |         <mesh filename="meshes/visual/link6.glb"/>
167 |       </geometry>
168 |       <origin rpy="0 0 0" xyz="0 0 0"/>
169 |     </visual>
170 |     <collision>
171 |       <geometry>
172 |         <mesh filename="meshes/collision/link6.obj"/>
173 |       </geometry>
174 |       <origin rpy="0 0 0" xyz="0 0 0"/>
175 |     </collision>
176 |   </link>
177 |   <joint name="joint6" type="revolute">
178 |     <origin rpy="1.5708 0 0" xyz="0 0 0"/>
179 |     <parent link="link5"/>
180 |     <child link="link6"/>
181 |     <axis xyz="0 0 1"/>
182 |     <limit effort="20" lower="-1.69297" upper="3.14159265359" velocity="3.14"/>
183 |     <dynamics damping="2" friction="1"/>
184 |   </joint>
185 |   <link name="link7">
186 |     <inertial>
187 |       <origin rpy="0 0 0" xyz="0 -0.00677 -0.01098"/>
188 |       <mass value="0.17"/>
189 |       <inertia ixx="9.3e-05" ixy="-0.0" ixz="-0.0" iyy="5.87e-05" iyz="-3.6e-06" izz="0.000132"/>
190 |     </inertial>
191 |     <visual>
192 |       <geometry>
193 |         <mesh filename="meshes/visual/link7.glb"/>
194 |       </geometry>
195 |       <origin rpy="0 0 0" xyz="0 0 0"/>
196 |     </visual>
197 |     <collision>
198 |       <geometry>
199 |         <mesh filename="meshes/collision/link7.obj"/>
200 |       </geometry>
201 |       <origin rpy="0 0 0" xyz="0 0 0"/>
202 |     </collision>
203 |   </link>
204 |   <joint name="joint7" type="revolute">
205 |     <origin rpy="-1.5708 0 0" xyz="0.076 0.097 0"/>
206 |     <parent link="link6"/>
207 |     <child link="link7"/>
208 |     <axis xyz="0 0 1"/>
209 |     <limit effort="20" lower="-6.28318530718" upper="6.28318530718" velocity="3.14"/>
210 |     <dynamics damping="2" friction="1"/>
211 |   </joint>
212 |   <link name="link_eef"/>
213 |   <joint name="joint_eef" type="fixed">
214 |     <origin rpy="0 0 0" xyz="0 0 0"/>
215 |     <parent link="link7"/>
216 |     <child link="link_eef"/>
217 |   </joint>
218 | </robot>
219 | 
220 | 


--------------------------------------------------------------------------------
/experiments/cfg/default.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - model: pgnd
 3 |   - render: default
 4 |   - sim: default
 5 |   - train: default
 6 |   - _self_
 7 |   - override hydra/job_logging: disabled
 8 |   - override hydra/hydra_logging: disabled
 9 | 
10 | hydra:
11 |   output_subdir: null
12 |   run:
13 |     dir: .
14 | 
15 | seed: 0
16 | cpu: 0
17 | num_cpus: 128
18 | gpus: 
19 |   - 0
20 | overwrite: False
21 | resume: False
22 | debug: False
23 | 


--------------------------------------------------------------------------------
/experiments/cfg/model/friction/default.yaml:
--------------------------------------------------------------------------------
1 | value: 0.0
2 | requires_grad: False


--------------------------------------------------------------------------------
/experiments/cfg/model/material/pointnet_nerf_material.yaml:
--------------------------------------------------------------------------------
1 | requires_grad: True
2 | output_scale: 1.0
3 | input_scale: 2.0
4 | radius: 0.2
5 | absolute_y: False
6 | pe_num_func_res: 0


--------------------------------------------------------------------------------
/experiments/cfg/model/pgnd.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - material: pointnet_nerf_material
 3 |   - friction: default
 4 |   - _self_
 5 | 
 6 | ckpt: null
 7 | clip_bound: 1.5
 8 | eef_t: [0.0, 0.0, 0.01]
 9 | gripper_radius: 0.04
10 | 


--------------------------------------------------------------------------------
/experiments/cfg/render/default.yaml:
--------------------------------------------------------------------------------
 1 | width: 512
 2 | height: 512
 3 | skip_frame: 1
 4 | bound: 1.5
 5 | fps: 5
 6 | radius_scale: 500
 7 | center: [0.5, 0.3, 0.5]
 8 | distance: 1.4
 9 | azimuth: -125
10 | elevation: 30
11 | reflectance: [0.92941176, 0.32941176, 0.23137255]
12 | 


--------------------------------------------------------------------------------
/experiments/cfg/sim/default.yaml:
--------------------------------------------------------------------------------
 1 | num_steps_train: 5
 2 | num_steps: 1000
 3 | interval: 1
 4 | num_grids: [50, 50, 50, 0.02]
 5 | dt: 0.1
 6 | bound: 3
 7 | eps: 1e-7
 8 | skip_frame: 1
 9 | num_grippers: 1
10 | preprocess_scale: 1.0
11 | preprocess_with_table: True
12 | n_particles: 1000
13 | gripper_forcing: True
14 | gripper_points: False
15 | n_history: 2
16 | uniform: False
17 | 


--------------------------------------------------------------------------------
/experiments/cfg/train/default.yaml:
--------------------------------------------------------------------------------
 1 | name: null
 2 | dataset_name: null
 3 | source_dataset_name: null
 4 | 
 5 | num_iterations: 100000
 6 | resume_iteration: 0
 7 | batch_size: 32
 8 | num_workers: 8
 9 | material_lr: 1e-4
10 | material_wd: 0.0
11 | material_grad_max_norm: 0.1
12 | training_start_episode: 0
13 | training_end_episode: 1000
14 | eval_start_episode: 0
15 | eval_end_episode: 10
16 | iteration_log_interval: 10
17 | iteration_save_interval: 1000
18 | iteration_eval_interval: 10000
19 | loss_factor: 1.0
20 | loss_factor_v: 0.0
21 | loss_factor_x: 100.0
22 | friction_lr: 1e-1
23 | friction_wd: 0.0
24 | friction_grad_max_norm: 0.1
25 | dataset_load_skip_frame: 1
26 | dataset_skip_frame: 1
27 | downsample: False
28 | use_pv: False
29 | use_gs: False
30 | dataset_non_overwrite: True
31 | 


--------------------------------------------------------------------------------
/experiments/real_world/calibrate.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import argparse
  3 | import time
  4 | import numpy as np
  5 | import torch
  6 | import open3d as o3d
  7 | from PIL import Image
  8 | 
  9 | from modules import RobotEnv, BimanualRobotEnv
 10 | from utils.pcd_utils import get_tabletop_points, visualize_o3d
 11 | 
 12 | def construct_goal_from_perception():
 13 |     use_robot = True
 14 | 
 15 |     exposure_time = 10
 16 |     env = RobotEnv(
 17 |         WH=[848, 480],
 18 |         obs_fps=5,
 19 |         n_obs_steps=2,
 20 |         use_robot=use_robot,
 21 |         speed=100,
 22 |     )
 23 | 
 24 |     try:
 25 |         env.start(exposure_time=exposure_time)
 26 |         if use_robot:
 27 |             env.reset_robot()
 28 |         print('env started')
 29 |         time.sleep(exposure_time)
 30 |         print('start recording')
 31 | 
 32 |         env.calibrate(re_calibrate=False)
 33 | 
 34 |         obs = env.get_obs(get_color=True, get_depth=True)
 35 |         intr_list = env.get_intrinsics()
 36 |         R_list, t_list = env.get_extrinsics()
 37 |         bbox = env.get_bbox()
 38 | 
 39 |         rgb_list = []
 40 |         depth_list = []
 41 |         for i in range(4):
 42 |             rgb = obs[f'color_{i}'][-1]
 43 |             depth = obs[f'depth_{i}'][-1]
 44 |             rgb_list.append(rgb)
 45 |             depth_list.append(depth)
 46 | 
 47 |         pcd = get_tabletop_points(rgb_list, depth_list, R_list, t_list, intr_list, bbox)
 48 | 
 49 |         visualize_o3d([pcd])
 50 |         o3d.io.write_point_cloud("vis_real_world/target.pcd", pcd)
 51 | 
 52 |     finally:
 53 |         env.stop()
 54 |         print('env stopped')
 55 | 
 56 | 
 57 | def calibrate(use_robot=True, reset_robot=True, wrist=None):
 58 |     exposure_time = 5
 59 |     env = RobotEnv(
 60 |         use_camera=True,
 61 |         WH=[848, 480],
 62 |         obs_fps=5,
 63 |         n_obs_steps=2,
 64 |         use_robot=use_robot,
 65 |         speed=100,
 66 |         wrist=wrist,
 67 |     )
 68 | 
 69 |     try:
 70 |         env.start(exposure_time=exposure_time)
 71 |         if use_robot and reset_robot:
 72 |             env.reset_robot()
 73 |         print('env started')
 74 |         time.sleep(exposure_time)
 75 |         print('start recording')
 76 | 
 77 |         env.calibrate(re_calibrate=True)
 78 |     
 79 |     finally:
 80 |         env.stop()
 81 |         print('env stopped')
 82 | 
 83 | 
 84 | def calibrate_bimanual(reset_robot=True):
 85 |     exposure_time = 5
 86 |     env = BimanualRobotEnv(
 87 |         WH=[848, 480],
 88 |         obs_fps=5,
 89 |         n_obs_steps=2,
 90 |         speed=100,
 91 |         gripper_enable=True,
 92 |     )
 93 | 
 94 |     try:
 95 |         env.start(exposure_time=exposure_time)
 96 |         if reset_robot:
 97 |             env.reset_robot()
 98 |         print('env started')
 99 |         time.sleep(exposure_time)
100 |         print('start recording')
101 | 
102 |         env.calibrate(re_calibrate=True)
103 |     
104 |     finally:
105 |         env.stop()
106 |         print('env stopped')
107 | 
108 | 
109 | if __name__ == "__main__":
110 |     parser = argparse.ArgumentParser()
111 |     parser.add_argument("--calibrate", action="store_true")
112 |     parser.add_argument("--calibrate_bimanual", action="store_true")
113 |     parser.add_argument("--calibrate_fixed", action="store_true")
114 |     parser.add_argument("--construct_goal", action="store_true")
115 |     parser.add_argument("--examine_points", action="store_true")
116 |     args = parser.parse_args()
117 |     if args.calibrate:
118 |         calibrate(reset_robot=False)
119 |     elif args.calibrate_bimanual:
120 |         calibrate_bimanual(reset_robot=False)
121 |     elif args.calibrate_fixed:  # only calibrate fixed cameras
122 |         calibrate(use_robot=False)
123 |     elif args.construct_goal:
124 |         construct_goal_from_perception()
125 |     else:
126 |         print("No arguments provided")
127 | 


--------------------------------------------------------------------------------
/experiments/real_world/camera/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/real_world/camera/__init__.py


--------------------------------------------------------------------------------
/experiments/real_world/camera/multi_realsense.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional, Union, Dict, Callable
  2 | import numbers
  3 | import time
  4 | from multiprocess.managers import SharedMemoryManager
  5 | import numpy as np
  6 | import pyrealsense2 as rs
  7 | from .single_realsense import SingleRealsense
  8 | 
  9 | class MultiRealsense:
 10 |     def __init__(self,
 11 |         serial_numbers: Optional[List[str]]=None,
 12 |         shm_manager: Optional[SharedMemoryManager]=None,
 13 |         resolution=(1280,720),
 14 |         capture_fps=30,
 15 |         put_fps=None,
 16 |         put_downsample=True,
 17 |         enable_color=True,
 18 |         enable_depth=False,
 19 |         process_depth=False,
 20 |         enable_infrared=False,
 21 |         get_max_k=30,
 22 |         advanced_mode_config: Optional[Union[dict, List[dict]]]=None,
 23 |         transform: Optional[Union[Callable[[Dict], Dict], List[Callable]]]=None,
 24 |         vis_transform: Optional[Union[Callable[[Dict], Dict], List[Callable]]]=None,
 25 |         verbose=False
 26 |         ):
 27 |         if shm_manager is None:
 28 |             shm_manager = SharedMemoryManager()
 29 |             shm_manager.start()
 30 |         if serial_numbers is None:
 31 |             serial_numbers = SingleRealsense.get_connected_devices_serial()
 32 |         n_cameras = len(serial_numbers)
 33 | 
 34 |         advanced_mode_config = repeat_to_list(
 35 |             advanced_mode_config, n_cameras, dict)
 36 |         transform = repeat_to_list(
 37 |             transform, n_cameras, Callable)
 38 |         vis_transform = repeat_to_list(
 39 |             vis_transform, n_cameras, Callable)
 40 | 
 41 |         cameras = dict()
 42 |         for i, serial in enumerate(serial_numbers):
 43 |             cameras[serial] = SingleRealsense(
 44 |                 shm_manager=shm_manager,
 45 |                 serial_number=serial,
 46 |                 resolution=resolution,
 47 |                 capture_fps=capture_fps,
 48 |                 put_fps=put_fps,
 49 |                 put_downsample=put_downsample,
 50 |                 enable_color=enable_color,
 51 |                 enable_depth=enable_depth,
 52 |                 process_depth=process_depth,
 53 |                 enable_infrared=enable_infrared,
 54 |                 get_max_k=get_max_k,
 55 |                 advanced_mode_config=advanced_mode_config[i],
 56 |                 transform=transform[i],
 57 |                 vis_transform=vis_transform[i],
 58 |                 is_master=(i == 0),
 59 |                 verbose=verbose
 60 |             )
 61 |         
 62 |         self.cameras = cameras
 63 |         self.serial_numbers = serial_numbers
 64 |         self.shm_manager = shm_manager
 65 |         self.resolution = resolution
 66 |         self.capture_fps = capture_fps
 67 | 
 68 |     def __enter__(self):
 69 |         self.start()
 70 |         return self
 71 |     
 72 |     def __exit__(self, exc_type, exc_val, exc_tb):
 73 |         self.stop()
 74 |     
 75 |     @property
 76 |     def n_cameras(self):
 77 |         return len(self.cameras)
 78 |     
 79 |     @property
 80 |     def is_ready(self):
 81 |         is_ready = True
 82 |         for camera in self.cameras.values():
 83 |             if not camera.is_ready:
 84 |                 is_ready = False
 85 |         return is_ready
 86 |     
 87 |     def start(self, wait=True, put_start_time=None):
 88 |         if put_start_time is None:
 89 |             put_start_time = time.time()
 90 |         for camera in self.cameras.values():
 91 |             camera.start(wait=False, put_start_time=put_start_time)
 92 | 
 93 |         if wait:
 94 |             self.start_wait()
 95 |     
 96 |     def stop(self, wait=True):
 97 |         for camera in self.cameras.values():
 98 |             camera.stop(wait=False)
 99 |         
100 |         if wait:
101 |             self.stop_wait()
102 | 
103 |     def start_wait(self):
104 |         for camera in self.cameras.values():
105 |             print('processing camera {}'.format(camera.serial_number))
106 |             camera.start_wait()
107 | 
108 |     def stop_wait(self):
109 |         for camera in self.cameras.values():
110 |             camera.join()
111 |     
112 |     def get(self, k=None, index=None, out=None) -> Dict[int, Dict[str, np.ndarray]]:
113 |         """
114 |         Return order T,H,W,C
115 |         {
116 |             0: {
117 |                 'rgb': (T,H,W,C),
118 |                 'timestamp': (T,)
119 |             },
120 |             1: ...
121 |         }
122 |         """
123 |         if index is not None:
124 |             this_out = None
125 |             this_out = self.cameras[self.serial_numbers[index]].get(k=k, out=this_out)
126 |             return this_out
127 |         if out is None:
128 |             out = dict()
129 |         for i, camera in enumerate(self.cameras.values()):
130 |             this_out = None
131 |             if i in out:
132 |                 this_out = out[i]
133 |             this_out = camera.get(k=k, out=this_out)
134 |             out[i] = this_out
135 |         return out
136 |     
137 |     def set_color_option(self, option, value):
138 |         n_camera = len(self.cameras)
139 |         value = repeat_to_list(value, n_camera, numbers.Number)
140 |         for i, camera in enumerate(self.cameras.values()):
141 |             camera.set_color_option(option, value[i])
142 | 
143 |     def set_exposure(self, exposure=None, gain=None):
144 |         """150nit. (0.1 ms, 1/10000s)
145 |         gain: (0, 128)
146 |         """
147 | 
148 |         if exposure is None and gain is None:
149 |             # auto exposure
150 |             self.set_color_option(rs.option.enable_auto_exposure, 1.0)
151 |         else:
152 |             # manual exposure
153 |             self.set_color_option(rs.option.enable_auto_exposure, 0.0)
154 |             if exposure is not None:
155 |                 self.set_color_option(rs.option.exposure, exposure)
156 |             if gain is not None:
157 |                 self.set_color_option(rs.option.gain, gain)
158 |     
159 |     def set_white_balance(self, white_balance=None):
160 |         if white_balance is None:
161 |             self.set_color_option(rs.option.enable_auto_white_balance, 1.0)
162 |         else:
163 |             self.set_color_option(rs.option.enable_auto_white_balance, 0.0)
164 |             self.set_color_option(rs.option.white_balance, white_balance)
165 | 
166 |     def get_intrinsics(self):
167 |         return np.array([c.get_intrinsics() for c in self.cameras.values()])
168 | 
169 |     def get_depth_scale(self):
170 |         return np.array([c.get_depth_scale() for c in self.cameras.values()])
171 |     
172 |     def restart_put(self, start_time):
173 |         for camera in self.cameras.values():
174 |             camera.restart_put(start_time)
175 | 
176 | 
177 | def repeat_to_list(x, n: int, cls):
178 |     if x is None:
179 |         x = [None] * n
180 |     if isinstance(x, cls):
181 |         x = [x] * n
182 |     assert len(x) == n
183 |     return x
184 | 


--------------------------------------------------------------------------------
/experiments/real_world/camera/shared_memory/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/real_world/camera/shared_memory/__init__.py


--------------------------------------------------------------------------------
/experiments/real_world/camera/shared_memory/shared_memory_queue.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Union
  2 | import numbers
  3 | from queue import Empty, Full
  4 | from multiprocess.managers import SharedMemoryManager
  5 | import numpy as np
  6 | from .shared_memory_util import ArraySpec, SharedAtomicCounter
  7 | from .shared_ndarray import SharedNDArray
  8 | 
  9 | 
 10 | class SharedMemoryQueue:
 11 |     """
 12 |     A Lock-Free FIFO Shared Memory Data Structure.
 13 |     Stores a sequence of dict of numpy arrays.
 14 |     """
 15 | 
 16 |     def __init__(self,
 17 |             shm_manager: SharedMemoryManager,
 18 |             array_specs: List[ArraySpec],
 19 |             buffer_size: int
 20 |         ):
 21 | 
 22 |         # create atomic counter
 23 |         write_counter = SharedAtomicCounter(shm_manager)
 24 |         read_counter = SharedAtomicCounter(shm_manager)
 25 |         
 26 |         # allocate shared memory
 27 |         shared_arrays = dict()
 28 |         for spec in array_specs:
 29 |             key = spec.name
 30 |             assert key not in shared_arrays
 31 |             array = SharedNDArray.create_from_shape(
 32 |                 mem_mgr=shm_manager,
 33 |                 shape=(buffer_size,) + tuple(spec.shape),
 34 |                 dtype=spec.dtype)
 35 |             shared_arrays[key] = array
 36 |         
 37 |         self.buffer_size = buffer_size
 38 |         self.array_specs = array_specs
 39 |         self.write_counter = write_counter
 40 |         self.read_counter = read_counter
 41 |         self.shared_arrays = shared_arrays
 42 |     
 43 |     @classmethod
 44 |     def create_from_examples(cls, 
 45 |             shm_manager: SharedMemoryManager,
 46 |             examples: Dict[str, Union[np.ndarray, numbers.Number]], 
 47 |             buffer_size: int
 48 |             ):
 49 |         specs = list()
 50 |         for key, value in examples.items():
 51 |             shape = None
 52 |             dtype = None
 53 |             if isinstance(value, np.ndarray):
 54 |                 shape = value.shape
 55 |                 dtype = value.dtype
 56 |                 assert dtype != np.dtype('O')
 57 |             elif isinstance(value, numbers.Number):
 58 |                 shape = tuple()
 59 |                 dtype = np.dtype(type(value))
 60 |             else:
 61 |                 raise TypeError(f'Unsupported type {type(value)}')
 62 | 
 63 |             spec = ArraySpec(
 64 |                 name=key,
 65 |                 shape=shape,
 66 |                 dtype=dtype
 67 |             )
 68 |             specs.append(spec)
 69 | 
 70 |         obj = cls(
 71 |             shm_manager=shm_manager,
 72 |             array_specs=specs,
 73 |             buffer_size=buffer_size
 74 |             )
 75 |         return obj
 76 |     
 77 |     def qsize(self):
 78 |         read_count = self.read_counter.load()
 79 |         write_count = self.write_counter.load()
 80 |         n_data = write_count - read_count
 81 |         return n_data
 82 |     
 83 |     def empty(self):
 84 |         n_data = self.qsize()
 85 |         return n_data <= 0
 86 |     
 87 |     def clear(self):
 88 |         self.read_counter.store(self.write_counter.load())
 89 |     
 90 |     def put(self, data: Dict[str, Union[np.ndarray, numbers.Number]]):
 91 |         read_count = self.read_counter.load()
 92 |         write_count = self.write_counter.load()
 93 |         n_data = write_count - read_count
 94 |         if n_data >= self.buffer_size:
 95 |             raise Full()
 96 |         
 97 |         next_idx = write_count % self.buffer_size
 98 | 
 99 |         # write to shared memory
100 |         for key, value in data.items():
101 |             arr: np.ndarray
102 |             arr = self.shared_arrays[key].get()
103 |             if isinstance(value, np.ndarray):
104 |                 arr[next_idx] = value
105 |             else:
106 |                 arr[next_idx] = np.array(value, dtype=arr.dtype)
107 | 
108 |         # update idx
109 |         self.write_counter.add(1)
110 |     
111 |     def get(self, out=None) -> Dict[str, np.ndarray]:
112 |         write_count = self.write_counter.load()
113 |         read_count = self.read_counter.load()
114 |         n_data = write_count - read_count
115 |         if n_data <= 0:
116 |             raise Empty()
117 | 
118 |         if out is None:
119 |             out = self._allocate_empty()
120 | 
121 |         next_idx = read_count % self.buffer_size
122 |         for key, value in self.shared_arrays.items():
123 |             arr = value.get()
124 |             np.copyto(out[key], arr[next_idx])
125 |         
126 |         # update idx
127 |         self.read_counter.add(1)
128 |         return out
129 | 
130 |     def get_k(self, k, out=None) -> Dict[str, np.ndarray]:
131 |         write_count = self.write_counter.load()
132 |         read_count = self.read_counter.load()
133 |         n_data = write_count - read_count
134 |         if n_data <= 0:
135 |             raise Empty()
136 |         assert k <= n_data
137 | 
138 |         out = self._get_k_impl(k, read_count, out=out)
139 |         self.read_counter.add(k)
140 |         return out
141 | 
142 |     def get_all(self, out=None) -> Dict[str, np.ndarray]:
143 |         write_count = self.write_counter.load()
144 |         read_count = self.read_counter.load()
145 |         n_data = write_count - read_count
146 |         if n_data <= 0:
147 |             raise Empty()
148 | 
149 |         out = self._get_k_impl(n_data, read_count, out=out)
150 |         self.read_counter.add(n_data)
151 |         return out
152 |     
153 |     def _get_k_impl(self, k, read_count, out=None) -> Dict[str, np.ndarray]:
154 |         if out is None:
155 |             out = self._allocate_empty(k)
156 | 
157 |         curr_idx = read_count % self.buffer_size
158 |         for key, value in self.shared_arrays.items():
159 |             arr = value.get()
160 |             target = out[key]
161 | 
162 |             start = curr_idx
163 |             end = min(start + k, self.buffer_size)
164 |             target_start = 0
165 |             target_end = (end - start)
166 |             target[target_start: target_end] = arr[start:end]
167 | 
168 |             remainder = k - (end - start)
169 |             if remainder > 0:
170 |                 # wrap around
171 |                 start = 0
172 |                 end = start + remainder
173 |                 target_start = target_end
174 |                 target_end = k
175 |                 target[target_start: target_end] = arr[start:end]
176 | 
177 |         return out
178 |     
179 |     def _allocate_empty(self, k=None):
180 |         result = dict()
181 |         for spec in self.array_specs:
182 |             shape = spec.shape
183 |             if k is not None:
184 |                 shape = (k,) + shape
185 |             result[spec.name] = np.empty(
186 |                 shape=shape, dtype=spec.dtype)
187 |         return result
188 | 


--------------------------------------------------------------------------------
/experiments/real_world/camera/shared_memory/shared_memory_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | from dataclasses import dataclass
 3 | import numpy as np
 4 | from multiprocess.managers import SharedMemoryManager
 5 | from atomics import atomicview, MemoryOrder, UINT
 6 | 
 7 | @dataclass
 8 | class ArraySpec:
 9 |     name: str
10 |     shape: Tuple[int]
11 |     dtype: np.dtype
12 | 
13 | 
14 | class SharedAtomicCounter:
15 |     def __init__(self, 
16 |             shm_manager: SharedMemoryManager, 
17 |             size :int=8 # 64bit int
18 |             ):
19 |         shm = shm_manager.SharedMemory(size=size)
20 |         self.shm = shm
21 |         self.size = size
22 |         self.store(0) # initialize
23 | 
24 |     @property
25 |     def buf(self):
26 |         return self.shm.buf[:self.size]
27 | 
28 |     def load(self) -> int:
29 |         with atomicview(buffer=self.buf, atype=UINT) as a: 
30 |             value = a.load(order=MemoryOrder.ACQUIRE)
31 |         return value
32 |     
33 |     def store(self, value: int):
34 |         with atomicview(buffer=self.buf, atype=UINT) as a:
35 |             a.store(value, order=MemoryOrder.RELEASE)
36 |     
37 |     def add(self, value: int):
38 |         with atomicview(buffer=self.buf, atype=UINT) as a:
39 |             a.add(value, order=MemoryOrder.ACQ_REL)
40 | 


--------------------------------------------------------------------------------
/experiments/real_world/camera/shared_memory/shared_ndarray.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import multiprocess
  4 | import multiprocess.synchronize
  5 | from multiprocess.managers import SharedMemoryManager
  6 | from multiprocess.shared_memory import SharedMemory
  7 | from typing import Any, TYPE_CHECKING, Generic, Optional, Tuple, TypeVar, Union
  8 | 
  9 | import numpy as np
 10 | import numpy.typing as npt
 11 | 
 12 | 
 13 | SharedMemoryLike = Union[str, SharedMemory]  # shared memory or name of shared memory
 14 | SharedT = TypeVar("SharedT", bound=np.generic)
 15 | 
 16 | 
 17 | class SharedNDArray(Generic[SharedT]):
 18 |     """Class to keep track of and retrieve the data in a shared array
 19 |     Attributes
 20 |     ----------
 21 |     shm
 22 |         SharedMemory object containing the data of the array
 23 |     shape
 24 |         Shape of the NumPy array
 25 |     dtype
 26 |         Type of the NumPy array. Anything that may be passed to the `dtype=` argument in `np.ndarray`.
 27 |     lock
 28 |         (Optional) multiprocess.Lock to manage access to the SharedNDArray. This is only created if
 29 |         lock=True is passed to the constructor, otherwise it is set to `None`.
 30 |     A SharedNDArray object may be created either directly with a preallocated shared memory object plus the
 31 |     dtype and shape of the numpy array it represents:
 32 |     >>> from multiprocess.shared_memory import SharedMemory
 33 |     >>> import numpy as np
 34 |     >>> from shared_ndarray2 import SharedNDArray
 35 |     >>> x = np.array([1, 2, 3])
 36 |     >>> shm = SharedMemory(name="x", create=True, size=x.nbytes)
 37 |     >>> arr = SharedNDArray(shm, x.shape, x.dtype)
 38 |     >>> arr[:] = x[:]  # copy x into the array
 39 |     >>> print(arr[:])
 40 |     [1 2 3]
 41 |     >>> shm.close()
 42 |     >>> shm.unlink()
 43 |     Or using a SharedMemoryManager either from an existing array or from arbitrary shape and nbytes:
 44 |     >>> from multiprocess.managers import SharedMemoryManager
 45 |     >>> mem_mgr = SharedMemoryManager()
 46 |     >>> mem_mgr.start()  # Better yet, use SharedMemoryManager context manager
 47 |     >>> arr = SharedNDArray.from_shape(mem_mgr, x.shape, x.dtype)
 48 |     >>> arr[:] = x[:]  # copy x into the array
 49 |     >>> print(arr[:])
 50 |     [1 2 3]
 51 |     >>> # -or in one step-
 52 |     >>> arr = SharedNDArray.from_array(mem_mgr, x)
 53 |     >>> print(arr[:])
 54 |     [1 2 3]
 55 |     `SharedNDArray` does not subclass numpy.ndarray but rather generates an ndarray on-the-fly in get(),
 56 |     which is used in __getitem__ and __setitem__. Thus to access the data and/or use any ndarray methods
 57 |     get() or __getitem__ or __setitem__ must be used
 58 |     >>> arr.max()  # ERROR: SharedNDArray has no `max` method.
 59 |     Traceback (most recent call last):
 60 |         ....
 61 |     AttributeError: SharedNDArray object has no attribute 'max'. To access NumPy ndarray object use .get() method.
 62 |     >>> arr.get().max()  # (or arr[:].max())  OK: This gets an ndarray on which we can operate
 63 |     3
 64 |     >>> y = np.zeros(3)
 65 |     >>> y[:] = arr  # ERROR: Cannot broadcast-assign a SharedNDArray to ndarray `y`
 66 |     Traceback (most recent call last):
 67 |         ...
 68 |     ValueError: setting an array element with a sequence.
 69 |     >>> y[:] = arr[:]  # OK: This gets an ndarray that can be copied element-wise to `y`
 70 |     >>> mem_mgr.shutdown()
 71 |     """
 72 | 
 73 |     shm: SharedMemory
 74 |     # shape: Tuple[int, ...]  # is a property
 75 |     dtype: np.dtype
 76 |     lock: Optional[multiprocess.synchronize.Lock]
 77 | 
 78 |     def __init__(
 79 |         self, shm: SharedMemoryLike, shape: Tuple[int, ...], dtype: npt.DTypeLike):
 80 |         """Initialize a SharedNDArray object from existing shared memory, object shape, and dtype.
 81 |         To initialize a SharedNDArray object from a memory manager and data or shape, use the `from_array()
 82 |         or `from_shape()` classmethods.
 83 |         Parameters
 84 |         ----------
 85 |         shm
 86 |             `multiprocess.shared_memory.SharedMemory` object or name for connecting to an existing block
 87 |             of shared memory (using SharedMemory constructor)
 88 |         shape
 89 |             Shape of the NumPy array to be represented in the shared memory
 90 |         dtype
 91 |             Data type for the NumPy array to be represented in shared memory. Any valid argument for
 92 |             `np.dtype` may be used as it will be converted to an actual `dtype` object.
 93 |         lock : bool, optional
 94 |             If True, create a multiprocess.Lock object accessible with the `.lock` attribute, by default
 95 |             False.  If passing the `SharedNDArray` as an argument to a `multiprocess.Pool` function this
 96 |             should not be used -- see this comment to a Stack Overflow question about `multiprocess.Lock`:
 97 |             https://stackoverflow.com/questions/25557686/python-sharing-a-lock-between-processes#comment72803059_25558333
 98 |         Raises
 99 |         ------
100 |         ValueError
101 |             The SharedMemory size (number of bytes) does not match the product of the shape and dtype
102 |             itemsize.
103 |         """
104 |         if isinstance(shm, str):
105 |             shm = SharedMemory(name=shm, create=False)
106 |         dtype = np.dtype(dtype)  # Try to convert to dtype
107 |         assert shm.size >= (dtype.itemsize * np.prod(shape))
108 |         self.shm = shm
109 |         self.dtype = dtype
110 |         self._shape: Tuple[int, ...] = shape
111 | 
112 |     def __repr__(self):
113 |         # Like numpy's ndarray repr
114 |         cls_name = self.__class__.__name__
115 |         nspaces = len(cls_name) + 1
116 |         array_repr = str(self.get())
117 |         array_repr = array_repr.replace("\n", "\n" + " " * nspaces)
118 |         return f"{cls_name}({array_repr}, dtype={self.dtype})"
119 | 
120 |     @classmethod
121 |     def create_from_array(
122 |         cls, mem_mgr: SharedMemoryManager, arr: npt.NDArray[SharedT]
123 |     ) -> SharedNDArray[SharedT]:
124 |         """Create a SharedNDArray from a SharedMemoryManager and an existing numpy array.
125 |         Parameters
126 |         ----------
127 |         mem_mgr
128 |             Running `multiprocess.managers.SharedMemoryManager` instance from which to create the
129 |             SharedMemory for the SharedNDArray
130 |         arr
131 |             NumPy `ndarray` object to copy into the created SharedNDArray upon initialization.
132 |         """
133 |         # Simply use from_shape() to create the SharedNDArray and copy the data into it.
134 |         shared_arr = cls.create_from_shape(mem_mgr, arr.shape, arr.dtype)
135 |         shared_arr.get()[:] = arr[:]
136 |         return shared_arr
137 | 
138 |     @classmethod
139 |     def create_from_shape(
140 |         cls, mem_mgr: SharedMemoryManager, shape: Tuple, dtype: npt.DTypeLike) -> SharedNDArray:
141 |         """Create a SharedNDArray directly from a SharedMemoryManager
142 |         Parameters
143 |         ----------
144 |         mem_mgr
145 |             SharedMemoryManager instance that has been started
146 |         shape
147 |             Shape of the array
148 |         dtype
149 |             Data type for the NumPy array to be represented in shared memory. Any valid argument for
150 |             `np.dtype` may be used as it will be converted to an actual `dtype` object.
151 |         """
152 |         dtype = np.dtype(dtype)  # Convert to dtype if possible
153 |         shm = mem_mgr.SharedMemory(np.prod(shape) * dtype.itemsize)
154 |         return cls(shm=shm, shape=shape, dtype=dtype)
155 | 
156 |     @property
157 |     def shape(self) -> Tuple[int, ...]:
158 |         return self._shape
159 | 
160 | 
161 |     def get(self) -> npt.NDArray[SharedT]:
162 |         """Get a numpy array with access to the shared memory"""
163 |         return np.ndarray(self.shape, dtype=self.dtype, buffer=self.shm.buf)
164 | 
165 |     def __del__(self):
166 |         self.shm.close()
167 | 


--------------------------------------------------------------------------------
/experiments/real_world/gs/config/default.yaml:
--------------------------------------------------------------------------------
 1 | # training
 2 | weight_im: 1.0
 3 | weight_seg: 3.0
 4 | grad_thresh: 0.0002
 5 | remove_threshold: 0.005
 6 | remove_thresh_5k: 0.25
 7 | scale_scene_radius: 0.05
 8 | num_iters: 10000
 9 | 
10 | # rendering
11 | near: 0.01
12 | far: 100


--------------------------------------------------------------------------------
/experiments/real_world/gs/convert.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from io import BytesIO
 3 | 
 4 | 
 5 | def quat_mult(q1, q2):
 6 |     w1, x1, y1, z1 = q1
 7 |     w2, x2, y2, z2 = q2
 8 |     w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
 9 |     x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
10 |     y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
11 |     z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
12 |     return np.array([w, x, y, z], dtype=np.float32)
13 | 
14 | 
15 | def rot_mat_to_quat(rot_mat):
16 |     w = np.sqrt(1 + rot_mat[0, 0] + rot_mat[1, 1] + rot_mat[2, 2]) / 2
17 |     x = (rot_mat[2, 1] - rot_mat[1, 2]) / (4 * w)
18 |     y = (rot_mat[0, 2] - rot_mat[2, 0]) / (4 * w)
19 |     z = (rot_mat[1, 0] - rot_mat[0, 1]) / (4 * w)
20 |     return np.array([w, x, y, z], dtype=np.float32)
21 | 
22 | 
23 | def save_to_splat(pts, colors, scales, quats, opacities, output_file, center=True, rotate=True):
24 |     if center:
25 |         pts_mean = np.mean(pts, axis=0)
26 |         pts = pts - pts_mean
27 |     buffer = BytesIO()
28 |     for (v, c, s, q, o) in zip(pts, colors, scales, quats, opacities):
29 |         position = np.array([v[0], v[1], v[2]], dtype=np.float32)
30 |         scales = np.array([s[0], s[1], s[2]], dtype=np.float32)
31 |         rot = np.array([q[0], q[1], q[2], q[3]], dtype=np.float32)
32 |         # SH_C0 = 0.28209479177387814
33 |         # color = np.array([0.5 + SH_C0 * c[0], 0.5 + SH_C0 * c[1], 0.5 + SH_C0 * c[2], o[0]])
34 |         color = np.array([c[0], c[1], c[2], o[0]])
35 | 
36 |         # rotate around x axis
37 |         if rotate:
38 |             rot_x_90 = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]], dtype=np.float32)
39 |             rot_x_90 = np.linalg.inv(rot_x_90)
40 |             position = np.dot(rot_x_90, position)
41 |             rot = quat_mult(rot_mat_to_quat(rot_x_90), rot)
42 | 
43 |         buffer.write(position.tobytes())
44 |         buffer.write(scales.tobytes())
45 |         buffer.write((color * 255).clip(0, 255).astype(np.uint8).tobytes())
46 |         buffer.write(
47 |             ((rot / np.linalg.norm(rot)) * 128 + 128)
48 |             .clip(0, 255)
49 |             .astype(np.uint8)
50 |             .tobytes()
51 |         )
52 |     with open(output_file, "wb") as f:
53 |         f.write(buffer.getvalue())
54 | 
55 | 
56 | def read_splat(splat_file):
57 |     with open(splat_file, "rb") as f:
58 |         data = f.read()
59 |     pts = []
60 |     colors = []
61 |     scales = []
62 |     quats = []
63 |     opacities = []
64 |     for i in range(0, len(data), 32):
65 |         v = np.frombuffer(data[i : i + 12], dtype=np.float32)
66 |         s = np.frombuffer(data[i + 12 : i + 24], dtype=np.float32)
67 |         c = np.frombuffer(data[i + 24 : i + 28], dtype=np.uint8) / 255
68 |         q = np.frombuffer(data[i + 28 : i + 32], dtype=np.uint8)
69 |         q = (q * 1.0 - 128) / 128
70 |         pts.append(v)
71 |         scales.append(s)
72 |         colors.append(c[:3])
73 |         quats.append(q)
74 |         opacities.append(c[3:])
75 |     return np.array(pts), np.array(colors), np.array(scales), np.array(quats), np.array(opacities)
76 | 


--------------------------------------------------------------------------------
/experiments/real_world/gs/helpers.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import os
  3 | import open3d as o3d
  4 | import numpy as np
  5 | from diff_gaussian_rasterization import GaussianRasterizationSettings as Camera
  6 | 
  7 | 
  8 | def setup_camera(w, h, k, w2c, near=0.01, far=100.0, bg=[0, 0, 0], z_threshold=0.2, device='cuda'):
  9 |     fx, fy, cx, cy = k[0][0], k[1][1], k[0][2], k[1][2]
 10 |     w2c = torch.tensor(w2c).cuda().float()
 11 |     cam_center = torch.inverse(w2c)[:3, 3]
 12 |     w2c = w2c.unsqueeze(0).transpose(1, 2)
 13 |     opengl_proj = torch.tensor([[2 * fx / w, 0.0, -(w - 2 * cx) / w, 0.0],
 14 |                                 [0.0, 2 * fy / h, -(h - 2 * cy) / h, 0.0],
 15 |                                 [0.0, 0.0, far / (far - near), -(far * near) / (far - near)],
 16 |                                 [0.0, 0.0, 1.0, 0.0]]).cuda().float().unsqueeze(0).transpose(1, 2)
 17 |     full_proj = w2c.bmm(opengl_proj)
 18 |     cam = Camera(
 19 |         image_height=h,
 20 |         image_width=w,
 21 |         tanfovx=w / (2 * fx),
 22 |         tanfovy=h / (2 * fy),
 23 |         bg=torch.tensor(bg, dtype=torch.float32, device=device),
 24 |         scale_modifier=1.0,
 25 |         viewmatrix=w2c.to(device),
 26 |         projmatrix=full_proj.to(device),
 27 |         sh_degree=0,
 28 |         campos=cam_center.to(device),
 29 |         prefiltered=False,
 30 |         z_threshold=z_threshold,
 31 |     )
 32 |     return cam
 33 | 
 34 | 
 35 | def params2rendervar(params):
 36 |     rendervar = {
 37 |         'means3D': params['means3D'],
 38 |         'colors_precomp': params['rgb_colors'],
 39 |         'rotations': torch.nn.functional.normalize(params['unnorm_rotations']),
 40 |         'opacities': torch.sigmoid(params['logit_opacities']),
 41 |         'scales': torch.exp(params['log_scales']),
 42 |         'means2D': torch.zeros_like(params['means3D'], requires_grad=True, device="cuda") + 0
 43 |     }
 44 |     return rendervar
 45 | 
 46 | def params2rendervar_wt(params, t):
 47 |     print(params['unnorm_rotations'][t])
 48 |     rendervar = {
 49 |         'means3D': params['means3D'][t],
 50 |         'colors_precomp': params['rgb_colors'][t],
 51 |         'rotations': torch.nn.functional.normalize(params['unnorm_rotations'][t]),
 52 |         'opacities': torch.sigmoid(params['logit_opacities'][t]),
 53 |         'scales': torch.exp(params['log_scales'][t]),
 54 |         'means2D': torch.zeros_like(params['means3D'][t], requires_grad=True, device="cuda") + 0
 55 |     }
 56 |     return rendervar
 57 | 
 58 | def params2rendervar_consistent_rgb(params, variables):
 59 |     rendervar = {
 60 |         'means3D': params['means3D'],
 61 |         'colors_precomp': variables['rgb_colors'],
 62 |         'rotations': torch.nn.functional.normalize(params['unnorm_rotations']),
 63 |         'opacities': torch.sigmoid(params['logit_opacities']),
 64 |         'scales': torch.exp(params['log_scales']),
 65 |         'means2D': torch.zeros_like(params['means3D'], requires_grad=True, device="cuda") + 0
 66 |     }
 67 |     return rendervar
 68 | 
 69 | 
 70 | def l1_loss_v1(x, y):
 71 |     return torch.abs((x - y)).mean()
 72 | 
 73 | 
 74 | def l1_loss_v2(x, y):
 75 |     return (torch.abs(x - y).sum(-1)).mean()
 76 | 
 77 | 
 78 | def weighted_l2_loss_v1(x, y, w):
 79 |     return torch.sqrt(((x - y) ** 2) * w + 1e-20).mean()
 80 | 
 81 | 
 82 | def weighted_l2_loss_v2(x, y, w):
 83 |     return torch.sqrt(((x - y) ** 2).sum(-1) * w + 1e-20).mean()
 84 | 
 85 | 
 86 | def quat_mult(q1, q2):
 87 |     w1, x1, y1, z1 = q1.T
 88 |     w2, x2, y2, z2 = q2.T
 89 |     w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
 90 |     x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
 91 |     y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
 92 |     z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
 93 |     return torch.stack([w, x, y, z]).T
 94 | 
 95 | 
 96 | def o3d_knn(pts, num_knn):
 97 |     indices = []
 98 |     sq_dists = []
 99 |     pcd = o3d.geometry.PointCloud()
100 |     # breakpoint()
101 |     # print(pts.shape)
102 |     pts_cont = np.ascontiguousarray(pts, np.float64)
103 |     # print(pts_cont.shape)
104 |     pcd.points = o3d.utility.Vector3dVector(np.ascontiguousarray(pts, np.float64))
105 |     if len(pcd.points) == 0:
106 |         print("Point cloud is empty!")
107 |     else:
108 |         pcd_tree = o3d.geometry.KDTreeFlann(pcd)
109 | 
110 |     for p in pcd.points:
111 |         [_, i, d] = pcd_tree.search_knn_vector_3d(p, num_knn + 1)
112 |         indices.append(i[1:])
113 |         sq_dists.append(d[1:])
114 |     return np.array(sq_dists), np.array(indices)
115 | 
116 | def o3d_knn_tensor(pts_tensor, num_knn):
117 |     if pts_tensor.numel() == 0:
118 |         print("Point cloud is empty!")
119 |         return None, None
120 | 
121 |     pts_np = pts_tensor.detach().cpu().numpy() if pts_tensor.is_cuda else pts_tensor.numpy()
122 |     pts_np_cont = np.ascontiguousarray(pts_np, dtype=np.float64)
123 | 
124 |     pcd = o3d.geometry.PointCloud()
125 |     pcd.points = o3d.utility.Vector3dVector(pts_np_cont)
126 | 
127 |     pcd_tree = o3d.geometry.KDTreeFlann(pcd)
128 |     
129 |     indices = []
130 |     sq_dists = []
131 | 
132 |     for p in pts_np_cont:
133 |         [_, idx, dist] = pcd_tree.search_knn_vector_3d(p, num_knn + 1)
134 |         indices.append(idx[1:])  # Skip the first index since it's the point itself
135 |         sq_dists.append(dist[1:])
136 | 
137 |     return torch.tensor(sq_dists, dtype=pts_tensor.dtype, device=pts_tensor.device), torch.tensor(indices, dtype=torch.long, device=pts_tensor.device)
138 | 
139 | 
140 | def params2cpu(params, is_initial_timestep):
141 |     if is_initial_timestep:
142 |         res = {k: v.detach().cpu().contiguous().numpy() for k, v in params.items()}
143 |     else:
144 |         res = {k: v.detach().cpu().contiguous().numpy() for k, v in params.items() if
145 |                k in ['means3D', 'rgb_colors', 'unnorm_rotations']}
146 |     return res
147 | 
148 | 
149 | def save_params(output_params, seq, exp):
150 |     to_save = {}
151 |     for k in output_params[0].keys():
152 |         if k in output_params[1].keys():
153 |             to_save[k] = np.stack([params[k] for params in output_params])
154 |         else:
155 |             to_save[k] = output_params[0][k]
156 |     os.makedirs(f"./output/{exp}/{seq}", exist_ok=True)
157 |     np.savez(f"./output/{exp}/{seq}/params", **to_save)
158 | 
159 | 
160 | def farthest_point_sample(xyz, npoint):
161 |     """
162 |     Input:
163 |         xyz: pointcloud data, [B, N, C]
164 |         npoint: number of samples
165 |     Return:
166 |         centroids: sampled pointcloud index, [B, npoint]
167 |     """
168 |     device = xyz.device
169 |     B, N, C = xyz.shape
170 |     centroids = torch.zeros(B, npoint, dtype=torch.long).to(device)
171 |     distance = torch.ones(B, N).to(device) * 1e10
172 |     farthest = torch.randint(0, N, (B,), dtype=torch.long).to(device)
173 |     batch_indices = torch.arange(B, dtype=torch.long).to(device)
174 |     for i in range(npoint):
175 |         centroids[:, i] = farthest
176 |         centroid = xyz[batch_indices, farthest, :].view(B, 1, C)
177 |         dist = torch.sum((xyz - centroid) ** 2, -1)
178 |         mask = dist < distance
179 |         distance[mask] = dist[mask]
180 |         farthest = torch.max(distance, -1)[1]
181 |     return centroids
182 | 
183 | def quat2mat(q):
184 |     norm = torch.sqrt(q[:, 0] * q[:, 0] + q[:, 1] * q[:, 1] + q[:, 2] * q[:, 2] + q[:, 3] * q[:, 3])
185 |     q = q / norm[:, None]
186 |     rot = torch.zeros((q.shape[0], 3, 3)).to(q.device)
187 |     r = q[:, 0]
188 |     x = q[:, 1]
189 |     y = q[:, 2]
190 |     z = q[:, 3]
191 |     rot[:, 0, 0] = 1 - 2 * (y * y + z * z)
192 |     rot[:, 0, 1] = 2 * (x * y - r * z)
193 |     rot[:, 0, 2] = 2 * (x * z + r * y)
194 |     rot[:, 1, 0] = 2 * (x * y + r * z)
195 |     rot[:, 1, 1] = 1 - 2 * (x * x + z * z)
196 |     rot[:, 1, 2] = 2 * (y * z - r * x)
197 |     rot[:, 2, 0] = 2 * (x * z - r * y)
198 |     rot[:, 2, 1] = 2 * (y * z + r * x)
199 |     rot[:, 2, 2] = 1 - 2 * (x * x + y * y)
200 |     return rot
201 | 
202 | def rot2quat(rot):
203 |     # Preallocate quaternion tensor
204 |     q = torch.zeros((rot.shape[0], 4)).to(rot.device)
205 |     
206 |     # Compute quaternion components
207 |     q[:, 0] = 0.5 * torch.sqrt(1 + rot[:, 0, 0] + rot[:, 1, 1] + rot[:, 2, 2])
208 |     q[:, 1] = (rot[:, 2, 1] - rot[:, 1, 2]) / (4 * q[:, 0])
209 |     q[:, 2] = (rot[:, 0, 2] - rot[:, 2, 0]) / (4 * q[:, 0])
210 |     q[:, 3] = (rot[:, 1, 0] - rot[:, 0, 1]) / (4 * q[:, 0])
211 |     
212 |     return q


--------------------------------------------------------------------------------
/experiments/real_world/gs/sh_utils.py:
--------------------------------------------------------------------------------
  1 | #  Copyright 2021 The PlenOctree Authors.
  2 | #  Redistribution and use in source and binary forms, with or without
  3 | #  modification, are permitted provided that the following conditions are met:
  4 | #
  5 | #  1. Redistributions of source code must retain the above copyright notice,
  6 | #  this list of conditions and the following disclaimer.
  7 | #
  8 | #  2. Redistributions in binary form must reproduce the above copyright notice,
  9 | #  this list of conditions and the following disclaimer in the documentation
 10 | #  and/or other materials provided with the distribution.
 11 | #
 12 | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 13 | #  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 14 | #  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 15 | #  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 16 | #  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 17 | #  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 18 | #  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 19 | #  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 20 | #  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 21 | #  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 22 | #  POSSIBILITY OF SUCH DAMAGE.
 23 | 
 24 | import torch
 25 | 
 26 | C0 = 0.28209479177387814
 27 | C1 = 0.4886025119029199
 28 | C2 = [
 29 |     1.0925484305920792,
 30 |     -1.0925484305920792,
 31 |     0.31539156525252005,
 32 |     -1.0925484305920792,
 33 |     0.5462742152960396
 34 | ]
 35 | C3 = [
 36 |     -0.5900435899266435,
 37 |     2.890611442640554,
 38 |     -0.4570457994644658,
 39 |     0.3731763325901154,
 40 |     -0.4570457994644658,
 41 |     1.445305721320277,
 42 |     -0.5900435899266435
 43 | ]
 44 | C4 = [
 45 |     2.5033429417967046,
 46 |     -1.7701307697799304,
 47 |     0.9461746957575601,
 48 |     -0.6690465435572892,
 49 |     0.10578554691520431,
 50 |     -0.6690465435572892,
 51 |     0.47308734787878004,
 52 |     -1.7701307697799304,
 53 |     0.6258357354491761,
 54 | ]
 55 | 
 56 | 
 57 | def eval_sh(deg, sh, dirs):
 58 |     """
 59 |     Evaluate spherical harmonics at unit directions
 60 |     using hardcoded SH polynomials.
 61 |     Works with torch/np/jnp.
 62 |     ... Can be 0 or more batch dimensions.
 63 |     Args:
 64 |         deg: int SH deg. Currently, 0-3 supported
 65 |         sh: jnp.ndarray SH coeffs [..., C, (deg + 1) ** 2]
 66 |         dirs: jnp.ndarray unit directions [..., 3]
 67 |     Returns:
 68 |         [..., C]
 69 |     """
 70 |     assert deg <= 4 and deg >= 0
 71 |     coeff = (deg + 1) ** 2
 72 |     assert sh.shape[-1] >= coeff
 73 | 
 74 |     result = C0 * sh[..., 0]
 75 |     if deg > 0:
 76 |         x, y, z = dirs[..., 0:1], dirs[..., 1:2], dirs[..., 2:3]
 77 |         result = (result -
 78 |                   C1 * y * sh[..., 1] +
 79 |                   C1 * z * sh[..., 2] -
 80 |                   C1 * x * sh[..., 3])
 81 | 
 82 |         if deg > 1:
 83 |             xx, yy, zz = x * x, y * y, z * z
 84 |             xy, yz, xz = x * y, y * z, x * z
 85 |             result = (result +
 86 |                       C2[0] * xy * sh[..., 4] +
 87 |                       C2[1] * yz * sh[..., 5] +
 88 |                       C2[2] * (2.0 * zz - xx - yy) * sh[..., 6] +
 89 |                       C2[3] * xz * sh[..., 7] +
 90 |                       C2[4] * (xx - yy) * sh[..., 8])
 91 | 
 92 |             if deg > 2:
 93 |                 result = (result +
 94 |                           C3[0] * y * (3 * xx - yy) * sh[..., 9] +
 95 |                           C3[1] * xy * z * sh[..., 10] +
 96 |                           C3[2] * y * (4 * zz - xx - yy) * sh[..., 11] +
 97 |                           C3[3] * z * (2 * zz - 3 * xx - 3 * yy) * sh[..., 12] +
 98 |                           C3[4] * x * (4 * zz - xx - yy) * sh[..., 13] +
 99 |                           C3[5] * z * (xx - yy) * sh[..., 14] +
100 |                           C3[6] * x * (xx - 3 * yy) * sh[..., 15])
101 | 
102 |                 if deg > 3:
103 |                     result = (result + C4[0] * xy * (xx - yy) * sh[..., 16] +
104 |                               C4[1] * yz * (3 * xx - yy) * sh[..., 17] +
105 |                               C4[2] * xy * (7 * zz - 1) * sh[..., 18] +
106 |                               C4[3] * yz * (7 * zz - 3) * sh[..., 19] +
107 |                               C4[4] * (zz * (35 * zz - 30) + 3) * sh[..., 20] +
108 |                               C4[5] * xz * (7 * zz - 3) * sh[..., 21] +
109 |                               C4[6] * (xx - yy) * (7 * zz - 1) * sh[..., 22] +
110 |                               C4[7] * xz * (xx - 3 * yy) * sh[..., 23] +
111 |                               C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy)) * sh[..., 24])
112 |     return result
113 | 
114 | 
115 | def RGB2SH(rgb):
116 |     return (rgb - 0.5) / C0
117 | 
118 | 
119 | def SH2RGB(sh):
120 |     return sh * C0 + 0.5
121 | 


--------------------------------------------------------------------------------
/experiments/real_world/gs/train_utils.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import torch
  3 | import numpy as np
  4 | import random
  5 | from PIL import Image
  6 | 
  7 | from pgnd.utils import get_root
  8 | import sys
  9 | root: Path = get_root(__file__)
 10 | 
 11 | from diff_gaussian_rasterization import GaussianRasterizer as Renderer
 12 | from gs.helpers import setup_camera, l1_loss_v1, o3d_knn, params2rendervar
 13 | from gs.external import calc_ssim, calc_psnr
 14 | 
 15 | 
 16 | def get_custom_dataset(img_list, seg_list, metadata):
 17 |     """
 18 |     Generates a dataset from given metadata and sequence.
 19 |     """
 20 |     dataset = []
 21 |     
 22 |     # Loop over filenames corresponding to 't' in the metadata
 23 |     for c in range(len(img_list)):
 24 |         
 25 |         # Extract parameters from the metadata
 26 |         w, h = metadata['w'], metadata['h']
 27 |         k = metadata['k'][c]
 28 |         w2c = metadata['w2c'][c]
 29 |         
 30 |         # Set up a camera using extracted parameters and some default values
 31 |         cam = setup_camera(w, h, k, w2c, near=0.01, far=100)
 32 |         
 33 |         # Get the filename of the current image and open it
 34 |         if isinstance(img_list[c], str):
 35 |             im = np.array(Image.open(img_list[c]))
 36 |             im = torch.tensor(im).float().cuda().permute(2, 0, 1) / 255
 37 |         else:
 38 |             im = torch.tensor(img_list[c]).permute(2, 0, 1).float().cuda()
 39 |             if im.max() > 2.0: 
 40 |                 im = im / 255
 41 |         
 42 |         # Open the corresponding segmentation image and convert it to a tensor
 43 |         if isinstance(seg_list[c], str):
 44 |             seg = np.array(Image.open(seg_list[c])).astype(np.float32)
 45 |         else:
 46 |             seg = seg_list[c].astype(np.float32)
 47 |         seg = torch.tensor(seg).float().cuda()
 48 | 
 49 |         # Create a color segmentation tensor. It seems to treat the segmentation as binary (object/background)
 50 |         seg_col = torch.stack((seg, torch.zeros_like(seg), 1 - seg))
 51 |         
 52 |         # Add the data to the dataset
 53 |         dataset.append({'cam': cam, 'im': im, 'seg': seg_col, 'id': c})
 54 | 
 55 |     return dataset
 56 | 
 57 | 
 58 | def initialize_params(init_pt_cld, metadata):
 59 |     """
 60 |     Initializes parameters and variables required for a 3D point cloud based on provided data.
 61 |     """
 62 | 
 63 |     # Extract the segmentation data.
 64 |     seg = init_pt_cld[:, 6]
 65 | 
 66 |     # Define a constant for the maximum number of cameras.
 67 |     max_cams = 4
 68 | 
 69 |     # Compute the squared distance for the K-Nearest Neighbors of each point in the 3D point cloud.
 70 |     sq_dist, indices = o3d_knn(init_pt_cld[:, :3], 3)
 71 | 
 72 |     # Calculate the mean squared distance for the 3 closest points and clip its minimum value.
 73 |     mean3_sq_dist = sq_dist.mean(-1).clip(min=0.0000001)
 74 | 
 75 |     # Initialize various parameters related to the 3D point cloud.
 76 |     params = {
 77 |         'means3D': init_pt_cld[:, :3],                          # 3D coordinates of the points.
 78 |         'rgb_colors': init_pt_cld[:, 3:6],                      # RGB color values for the points.
 79 |         'seg_colors': np.stack((seg, np.zeros_like(seg), 1 - seg), -1),  # Segmentation colors.
 80 |         'unnorm_rotations': np.tile([1, 0, 0, 0], (seg.shape[0], 1)),  # Default rotations for each point.
 81 |         'logit_opacities': np.zeros((seg.shape[0], 1)),         # Initial opacity values for the points.
 82 |         'log_scales': np.tile(np.log(np.sqrt(mean3_sq_dist))[..., None], (1, 3)),  # Scale factors for the points.
 83 |         'cam_m': np.zeros((max_cams, 3)),                       # Placeholder for camera motion.
 84 |         'cam_c': np.zeros((max_cams, 3)),                       # Placeholder for camera center.
 85 |     }
 86 | 
 87 |     # Convert the params to PyTorch tensors and move them to the GPU.
 88 |     params = {k: torch.nn.Parameter(torch.tensor(v).cuda().float().contiguous().requires_grad_(True)) for k, v in params.items()}
 89 |     # params['rgb_colors'].requires_grad = False
 90 | 
 91 |     # Calculate the scene radius based on the camera centers.
 92 |     cam_centers = np.linalg.inv(metadata['w2c'])[:, :3, 3]
 93 |     scene_radius = 1.1 * np.max(np.linalg.norm(cam_centers - np.mean(cam_centers, 0)[None], axis=-1))
 94 | 
 95 |     # Initialize other associated variables.
 96 |     variables = {
 97 |         'max_2D_radius': torch.zeros(params['means3D'].shape[0]).cuda().float(), # Maximum 2D radius.
 98 |         'scene_radius': scene_radius,                                          # Scene radius.
 99 |         'means2D_gradient_accum': torch.zeros(params['means3D'].shape[0]).cuda().float(), # Means2D gradient accumulator.
100 |         'denom': torch.zeros(params['means3D'].shape[0]).cuda().float()        # Denominator.
101 |     }
102 | 
103 |     return params, variables
104 | 
105 | 
106 | def initialize_optimizer(params, variables):
107 |     lrs = {
108 |         'means3D': 0.00016 * variables['scene_radius'],
109 |         'rgb_colors': 0.0,
110 |         'seg_colors': 0.0,
111 |         'unnorm_rotations': 0.001,
112 |         'logit_opacities': 0.05,
113 |         'log_scales': 0.001,
114 |         'cam_m': 1e-4,
115 |         'cam_c': 1e-4,
116 |     }
117 |     param_groups = [{'params': [v], 'name': k, 'lr': lrs[k]} for k, v in params.items()]
118 |     return torch.optim.Adam(param_groups, lr=0.0, eps=1e-15)
119 | 
120 | 
121 | def get_loss(params, curr_data, variables, loss_weights):
122 | 
123 |     # Initialize dictionary to store various loss components
124 |     losses = {}
125 | 
126 |     # Convert parameters to rendering variables and retain gradient for 'means2D'
127 |     rendervar = params2rendervar(params)
128 |     rendervar['means2D'].retain_grad()
129 | 
130 |     # Perform rendering to obtain image, radius, and other outputs
131 |     im, radius, _ = Renderer(raster_settings=curr_data['cam'])(**rendervar)
132 | 
133 |     # Apply camera parameters to modify the rendered image
134 |     curr_id = curr_data['id']
135 |     im = torch.exp(params['cam_m'][curr_id])[:, None, None] * im + params['cam_c'][curr_id][:, None, None]
136 | 
137 |     # Calculate image loss using L1 loss and ssim
138 |     losses['im'] = 0.8 * l1_loss_v1(im, curr_data['im']) + 0.2 * (1.0 - calc_ssim(im, curr_data['im']))
139 |     variables['means2D'] = rendervar['means2D']  # Gradient only accum from colour render for densification
140 | 
141 |     segrendervar = params2rendervar(params)
142 |     segrendervar['colors_precomp'] = params['seg_colors']
143 |     seg, _, _, = Renderer(raster_settings=curr_data['cam'])(**segrendervar)
144 | 
145 |     # Calculate segmentation loss
146 |     losses['seg'] = 0.8 * l1_loss_v1(seg, curr_data['seg']) + 0.2 * (1.0 - calc_ssim(seg, curr_data['seg']))
147 | 
148 |     # Calculate total loss as weighted sum of individual losses
149 |     loss = sum([loss_weights[k] * v for k, v in losses.items()])
150 | 
151 |     # Update variables related to rendering radius and seen areas
152 |     seen = radius > 0
153 |     variables['max_2D_radius'][seen] = torch.max(radius[seen], variables['max_2D_radius'][seen])
154 |     variables['seen'] = seen
155 |     return loss, variables
156 | 
157 | 
158 | def report_progress(params, data, i, progress_bar, num_pts, every_i=100, vis_dir=None):
159 |     if i % every_i == 0:
160 |         im, _, _, = Renderer(raster_settings=data['cam'])(**params2rendervar(params))
161 |         curr_id = data['id']
162 |         im = torch.exp(params['cam_m'][curr_id])[:, None, None] * im + params['cam_c'][curr_id][:, None, None]
163 |         if vis_dir:
164 |             Image.fromarray((im.cpu().numpy().clip(0, 1) * 255).astype(np.uint8).transpose(1, 2, 0)).save(f"{vis_dir}/{i:06d}.png")
165 |         psnr = calc_psnr(im, data['im']).mean()
166 |         progress_bar.set_postfix({"img 0 PSNR": f"{psnr:.{7}f}, number of points: {num_pts}"})
167 |         progress_bar.update(every_i)
168 | 
169 | 
170 | def get_batch(todo_dataset, dataset):
171 |     if not todo_dataset:
172 |         todo_dataset = dataset.copy()
173 |     curr_data = todo_dataset.pop(random.randint(0, len(todo_dataset) - 1))
174 |     return curr_data
175 | 


--------------------------------------------------------------------------------
/experiments/real_world/modules_planning/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/real_world/modules_planning/__init__.py


--------------------------------------------------------------------------------
/experiments/real_world/modules_planning/common/communication.py:
--------------------------------------------------------------------------------
1 | XARM_STATE_PORT = 10101
2 | XARM_CONTROL_PORT = 10102
3 | OBJECT_POSE_PORT = 10109
4 | 
5 | XARM_CONTROL_PORT_L = 10102
6 | XARM_CONTROL_PORT_R = 10110
7 | 


--------------------------------------------------------------------------------
/experiments/real_world/modules_planning/common/xarm.py:
--------------------------------------------------------------------------------
 1 | XY_MIN, XY_MAX = 50, 800
 2 | X_MIN,X_MAX = 0, 800
 3 | Y_MIN,Y_MAX = -350, 350
 4 | Z_MIN, Z_MAX = 110, 650
 5 | 
 6 | MOVE_RATE = 1000  # hz; macro velocity = move_rate * xyz_velocity 
 7 | MOVE_SLEEP = 1 / MOVE_RATE
 8 | XYZ_VELOCITY = 1.0  # mm (in every MOVE_SLEEP interval)
 9 | ANGLE_VELOCITY_MAX = 0.05  # degree
10 | 
11 | GRIPPER_OPEN_MAX = 800
12 | GRIPPER_OPEN_MIN = 0
13 | 
14 | POSITION_UPDATE_INTERVAL = 0.02  # 100hz the reader must read faster than this
15 | COMMAND_CHECK_INTERVAL = 0.02  # 100hz command must be sent faster than this


--------------------------------------------------------------------------------
/experiments/real_world/modules_planning/kinematics_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import copy
  3 | import time
  4 | import numpy as np
  5 | import transforms3d
  6 | from pathlib import Path
  7 | import open3d as o3d
  8 | 
  9 | import warnings
 10 | warnings.filterwarnings("always", category=RuntimeWarning)
 11 | 
 12 | import sapien.core as sapien
 13 | 
 14 | from nclaw.utils import get_root
 15 | root: Path = get_root(__file__)
 16 | 
 17 | def np2o3d(pcd, color=None):
 18 |     # pcd: (n, 3)
 19 |     # color: (n, 3)
 20 |     pcd_o3d = o3d.geometry.PointCloud()
 21 |     pcd_o3d.points = o3d.utility.Vector3dVector(pcd)
 22 |     if color is not None:
 23 |         assert pcd.shape[0] == color.shape[0]
 24 |         assert color.max() <= 1
 25 |         assert color.min() >= 0
 26 |         pcd_o3d.colors = o3d.utility.Vector3dVector(color)
 27 |     return pcd_o3d
 28 | 
 29 | 
 30 | class KinHelper():
 31 |     def __init__(self, robot_name, headless=True):
 32 |         # load robot
 33 |         if "xarm7" in robot_name:
 34 |             urdf_path = str(root / "assets/robots/xarm7/xarm7.urdf")
 35 |             self.eef_name = 'link7'
 36 |         else:
 37 |             raise RuntimeError('robot name not supported')
 38 |         self.robot_name = robot_name
 39 | 
 40 |         # load sapien robot
 41 |         self.engine = sapien.Engine()
 42 |         self.scene = self.engine.create_scene()
 43 |         loader = self.scene.create_urdf_loader()
 44 |         self.sapien_robot = loader.load(urdf_path)
 45 |         self.robot_model = self.sapien_robot.create_pinocchio_model()
 46 |         self.sapien_eef_idx = -1
 47 |         for link_idx, link in enumerate(self.sapien_robot.get_links()):
 48 |             if link.name == self.eef_name:
 49 |                 self.sapien_eef_idx = link_idx
 50 |                 break
 51 | 
 52 |         # load meshes and offsets from urdf_robot
 53 |         self.meshes = {}
 54 |         self.scales = {}
 55 |         self.offsets = {}
 56 |         self.pcd_dict = {}
 57 |         self.tool_meshes = {}
 58 | 
 59 |     def compute_fk_sapien_links(self, qpos, link_idx):
 60 |         fk = self.robot_model.compute_forward_kinematics(qpos)
 61 |         link_pose_ls = []
 62 |         for i in link_idx:
 63 |             link_pose_ls.append(self.robot_model.get_link_pose(i).to_transformation_matrix())
 64 |         return link_pose_ls
 65 | 
 66 |     def compute_ik_sapien(self, initial_qpos, cartesian, verbose=False):
 67 |         """
 68 |         Compute IK using sapien
 69 |         initial_qpos: (N, ) numpy array
 70 |         cartesian: (6, ) numpy array, x,y,z in meters, r,p,y in radians
 71 |         """
 72 |         tf_mat = np.eye(4)
 73 |         tf_mat[:3, :3] = transforms3d.euler.euler2mat(ai=cartesian[3], aj=cartesian[4], ak=cartesian[5], axes='sxyz')
 74 |         tf_mat[:3, 3] = cartesian[0:3]
 75 |         pose = sapien.Pose.from_transformation_matrix(tf_mat)
 76 | 
 77 |         if 'xarm7' in self.robot_name:
 78 |             active_qmask = np.array([True, True, True, True, True, True, True])
 79 |         qpos = self.robot_model.compute_inverse_kinematics(
 80 |             link_index=self.sapien_eef_idx, 
 81 |             pose=pose,
 82 |             initial_qpos=initial_qpos, 
 83 |             active_qmask=active_qmask, 
 84 |             )
 85 |         if verbose:
 86 |             print('ik qpos:', qpos)
 87 | 
 88 |         # verify ik
 89 |         fk_pose = self.compute_fk_sapien_links(qpos[0], [self.sapien_eef_idx])[0]
 90 |         
 91 |         if verbose:
 92 |             print('target pose for IK:', tf_mat)
 93 |             print('fk pose for IK:', fk_pose)
 94 |         
 95 |         pose_diff = np.linalg.norm(fk_pose[:3, 3] - tf_mat[:3, 3])
 96 |         rot_diff = np.linalg.norm(fk_pose[:3, :3] - tf_mat[:3, :3])
 97 |         
 98 |         if pose_diff > 0.01 or rot_diff > 0.01:
 99 |             print('ik pose diff:', pose_diff)
100 |             print('ik rot diff:', rot_diff)
101 |             warnings.warn('ik pose diff or rot diff too large. Return initial qpos.', RuntimeWarning, stacklevel=2, )
102 |             return initial_qpos
103 |         return qpos[0]
104 | 
105 | 
106 | def test_fk():
107 |     robot_name = 'xarm7'
108 |     init_qpos = np.array([0, 0, 0, 0, 0, 0, 0])
109 |     end_qpos = np.array([0, 0, 0, 0, 0, 0, 0])
110 |     
111 |     kin_helper = KinHelper(robot_name=robot_name, headless=False)
112 |     START_ARM_POSE = [0, 0, 0, 0, 0, 0, 0]
113 | 
114 |     for i in range(100):
115 |         curr_qpos = init_qpos + (end_qpos - init_qpos) * i / 100
116 |         fk = kin_helper.compute_fk_sapien_links(curr_qpos, [kin_helper.sapien_eef_idx])[0]
117 |         fk_euler = transforms3d.euler.mat2euler(fk[:3, :3], axes='sxyz')
118 | 
119 |         if i == 0:
120 |             init_ik_qpos = np.array(START_ARM_POSE)
121 |         ik_qpos = kin_helper.compute_ik_sapien(init_ik_qpos, np.array(list(fk[:3, 3]) + list(fk_euler)).astype(np.float32))
122 |         re_fk_pos_mat = kin_helper.compute_fk_sapien_links(ik_qpos, [kin_helper.sapien_eef_idx])[0]
123 |         re_fk_euler = transforms3d.euler.mat2euler(re_fk_pos_mat[:3, :3], axes='sxyz')
124 |         re_fk_pos = re_fk_pos_mat[:3, 3]
125 |         print('re_fk_pos diff:', np.linalg.norm(re_fk_pos - fk[:3, 3]))
126 |         print('re_fk_euler diff:', np.linalg.norm(np.array(re_fk_euler) - np.array(fk_euler)))
127 |         
128 | 
129 |         init_ik_qpos = ik_qpos.copy()
130 |         qpos_diff = np.linalg.norm(ik_qpos[:6] - curr_qpos[:6])
131 |         if qpos_diff > 0.01:
132 |             warnings.warn('qpos diff too large', RuntimeWarning, stacklevel=2, )
133 | 
134 |         time.sleep(0.1)
135 | 
136 | if __name__ == "__main__":
137 |     test_fk()
138 | 


--------------------------------------------------------------------------------
/experiments/real_world/modules_planning/udp_util.py:
--------------------------------------------------------------------------------
  1 | import socket
  2 | import numpy as np
  3 | import pickle
  4 | import threading
  5 | import time
  6 | 
  7 | from modules_planning.common.communication import *
  8 | 
  9 | np.set_printoptions(precision=3, suppress=True)
 10 | 
 11 | class udpSender:
 12 |     def __init__(self, port: dict):
 13 |         self.port = port
 14 |         self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
 15 |         self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
 16 | 
 17 |     def send(self, data: dict):
 18 |         self.sock.sendto(pickle.dumps(data), ('127.0.0.1', self.port))
 19 | 
 20 |     def close(self):
 21 |         self.sock.close()
 22 | 
 23 | class udpReceiver:
 24 |     def __init__(self, ports: dict, re_use_address = False):
 25 |         self.ports = ports
 26 |         self.re_use_address = re_use_address
 27 |         self.results = {}
 28 |         self.lock = threading.Lock()
 29 |         self.thread = None
 30 | 
 31 |     def receive(self, name, port):
 32 |         sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
 33 |         if self.re_use_address:
 34 |             sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
 35 |         sock.bind(('127.0.0.1', port))
 36 |         # sock.settimeout(0.1)  # Set timeout to 1 second
 37 |         while self.alive:
 38 |             try:
 39 |                 t0 = time.time()
 40 |                 try:
 41 |                     data, addr = sock.recvfrom(32768)
 42 |                 except socket.timeout:
 43 |                     continue
 44 |                 data = pickle.loads(data)
 45 |                 with self.lock:
 46 |                     self.results[name] = data
 47 |                 t1 = time.time()
 48 |                 # print(f"Receive {name} data from {addr} in {t1-t0:.6f} seconds")
 49 |             except BaseException as e:
 50 |                 print(f"Error in receiving {name} data: {e}")
 51 |                 break
 52 |         sock.close()
 53 |         print(f"Receive {name} thread stopped")
 54 | 
 55 |     def start(self):
 56 |         self.alive = True
 57 |         self.thread = []
 58 |         for name, port in self.ports.items():
 59 |             t = threading.Thread(target=self.receive, args=(name, port))
 60 |             t.start()
 61 |             self.thread.append(t)
 62 | 
 63 |     def stop(self):
 64 |         print("Stopping udpReceiver")
 65 |         self.alive = False
 66 |         for t in self.thread:
 67 |             if t is not None and t.is_alive():
 68 |                 t.join()
 69 |         print("udpReceiver stopped")
 70 | 
 71 |     def get(self, name=None, pop=False):
 72 |         with self.lock:
 73 |             if name is None:
 74 |                 ret = self.results
 75 |                 if pop:
 76 |                     self.results = {}
 77 |                 return ret
 78 |             else:
 79 |                 if name in self.results:
 80 |                     if pop:
 81 |                         ret = self.results[name]
 82 |                         del self.results[name]
 83 |                         return ret
 84 |                 else:
 85 |                     return None
 86 | 
 87 | if __name__ == '__main__':
 88 |     udpReceiver = udpReceiver(ports=
 89 |                             {
 90 |                             'object_pose': OBJECT_POSE_PORT,
 91 |                             'xarm_state': XARM_STATE_PORT
 92 |                             })
 93 |     udpReceiver.start()
 94 |     try:
 95 |         while True:
 96 |             print(udpReceiver.get())
 97 |             print()
 98 |             time.sleep(0.01)
 99 |     except KeyboardInterrupt:
100 |         try:
101 |             udpReceiver.lock.release()
102 |         except:
103 |             pass
104 |         udpReceiver.stop()
105 | 


--------------------------------------------------------------------------------
/experiments/real_world/modules_teleop/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/real_world/modules_teleop/__init__.py


--------------------------------------------------------------------------------
/experiments/real_world/modules_teleop/recorder.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import os
  3 | import time
  4 | import numpy as np
  5 | import cv2
  6 | import open3d as o3d
  7 | from threadpoolctl import threadpool_limits
  8 | import multiprocess as mp
  9 | import threading
 10 | from threading import Lock
 11 | 
 12 | from pgnd.utils import get_root
 13 | root: Path = get_root(__file__)
 14 | 
 15 | from camera.multi_realsense import MultiRealsense
 16 | from camera.single_realsense import SingleRealsense
 17 | 
 18 | 
 19 | class Recorder(mp.Process):
 20 | 
 21 |     def __init__(
 22 |         self,
 23 |         realsense: MultiRealsense | SingleRealsense, 
 24 |         capture_fps, 
 25 |         record_fps, 
 26 |         record_time, 
 27 |         process_func,
 28 |         exp_name=None,
 29 |         data_dir="data",
 30 |         verbose=False,
 31 |     ):
 32 |         super().__init__()
 33 |         self.verbose = verbose
 34 | 
 35 |         self.capture_fps = capture_fps
 36 |         self.record_fps = record_fps
 37 |         self.record_time = record_time
 38 |         self.exp_name = exp_name
 39 |         self.data_dir = data_dir
 40 | 
 41 |         if self.exp_name is None:
 42 |             assert self.record_fps == 0
 43 | 
 44 |         self.realsense = realsense
 45 |         self.recorder_q = mp.Queue(maxsize=1)
 46 | 
 47 |         self.process_func = process_func
 48 |         self.num_cam = len(realsense.cameras.keys())
 49 |         self.alive = mp.Value('b', False)
 50 |         self.record_restart = mp.Value('b', False)
 51 |         self.record_stop = mp.Value('b', False)
 52 | 
 53 |     def log(self, msg):
 54 |         if self.verbose:
 55 |             print(f"\033[92m{self.name}: {msg}\033[0m")
 56 | 
 57 |     @property
 58 |     def can_record(self):
 59 |         return self.record_fps != 0
 60 | 
 61 |     def run(self):
 62 |         # limit threads
 63 |         threadpool_limits(1)
 64 |         cv2.setNumThreads(1)
 65 | 
 66 |         realsense = self.realsense
 67 | 
 68 |         capture_fps = self.capture_fps
 69 |         record_fps = self.record_fps
 70 |         record_time = self.record_time
 71 | 
 72 |         cameras_output = None
 73 |         recording_frame = float("inf")  # local record step index (since current record start), record fps
 74 |         record_start_frame = 0  # global step index (since process start), capture fps
 75 |         is_recording = False  # recording state flag
 76 |         timestamps_f = None
 77 | 
 78 |         while self.alive.value:
 79 |             try: 
 80 |                 cameras_output = realsense.get(out=cameras_output)
 81 |                 get_time = time.time()
 82 |                 timestamps = [cameras_output[i]['timestamp'].item() for i in range(self.num_cam)]
 83 |                 if is_recording and not all([abs(timestamps[i] - timestamps[i+1]) < 0.05 for i in range(self.num_cam - 1)]):
 84 |                     print(f"Captured at different timestamps: {[f'{x:.2f}' for x in timestamps]}")
 85 | 
 86 |                 # treat captured time and record time as the same
 87 |                 process_start_time = get_time
 88 |                 process_out = self.process_func(cameras_output) if self.process_func is not None else cameras_output
 89 |                 self.log(f"process time: {time.time() - process_start_time}")
 90 |             
 91 |                 if not self.recorder_q.full():
 92 |                     self.recorder_q.put(process_out)             
 93 | 
 94 |                 if self.can_record:
 95 |                     # recording state machine:
 96 |                     #           ---restart_cmd--->
 97 |                     # record                           not_record
 98 |                     #       <-- stop_cmd / timeover ----
 99 |                     if not is_recording and self.record_restart.value == True:
100 |                         self.record_restart.value = False
101 | 
102 |                         recording_frame = 0
103 |                         record_start_time = get_time
104 |                         record_start_frame = cameras_output[0]['step_idx'].item()
105 |                         
106 |                         record_dir = root / "log" / self.data_dir / self.exp_name / f"{record_start_time:.0f}"
107 |                         os.makedirs(record_dir, exist_ok=True)
108 |                         timestamps_f = open(f'{record_dir}/timestamps.txt', 'a')
109 |                         
110 |                         for i in range(self.num_cam):
111 |                             os.makedirs(f'{record_dir}/camera_{i}/rgb', exist_ok=True)
112 |                             os.makedirs(f'{record_dir}/camera_{i}/depth', exist_ok=True)
113 |                         is_recording = True
114 |                     
115 |                     elif is_recording and (
116 |                         self.record_stop.value == True or 
117 |                         (recording_frame >= record_time * record_fps)
118 |                     ):
119 |                         finish_time = get_time
120 |                         print(f"is_recording {is_recording}, self.record_stop.value {self.record_stop.value}, recording time {recording_frame}, max recording time {record_time} * {record_fps}")
121 |                         print(f"total time: {finish_time - record_start_time}")
122 |                         print(f"fps: {recording_frame / (finish_time - record_start_time)}")
123 |                         is_recording = False
124 |                     
125 |                         timestamps_f.close()
126 |                         self.record_restart.value = False
127 |                         self.record_stop.value = False
128 |                     else:
129 |                         self.record_restart.value = False
130 |                         self.record_stop.value = False
131 | 
132 |                     # record the frame according to the record_fps
133 |                     if is_recording and cameras_output[0]['step_idx'].item() >= (recording_frame * (capture_fps // record_fps) + record_start_frame):
134 |                         timestamps_f.write(' '.join(
135 |                             [str(cameras_output[i]['step_idx'].item()) for i in range(self.num_cam)] + 
136 |                             [str(np.round(cameras_output[i]['timestamp'].item() - record_start_time, 3)) for i in range(self.num_cam)] + 
137 |                             [str(cameras_output[i]['timestamp'].item()) for i in range(self.num_cam)]
138 |                         ) + '\n')
139 |                         timestamps_f.flush()
140 |                         for i in range(self.num_cam):
141 |                             cv2.imwrite(f'{record_dir}/camera_{i}/rgb/{recording_frame:06}.jpg', cameras_output[i]['color'])
142 |                             cv2.imwrite(f'{record_dir}/camera_{i}/depth/{recording_frame:06}.png', cameras_output[i]['depth'])
143 |                         recording_frame += 1
144 | 
145 |             except BaseException as e:
146 |                 print("Recorder error: ", e)
147 |                 break
148 | 
149 |         if self.can_record:
150 |             if timestamps_f is not None and not timestamps_f.closed:
151 |                 timestamps_f.close()
152 |             finish_time = time.time()
153 | 
154 |         self.stop()
155 |         print("Recorder process stopped")
156 | 
157 | 
158 |     def start(self):
159 |         self.alive.value = True
160 |         super().start()
161 | 
162 |     def stop(self):
163 |         self.alive.value = False
164 |         self.recorder_q.close()
165 |     
166 |     def set_record_start(self):
167 |         if self.record_fps == 0:
168 |             print("record disabled because record_fps is 0")
169 |             assert self.record_restart.value == False
170 |         else:
171 |             self.record_restart.value = True
172 |             print("record restart cmd received")
173 | 
174 |     def set_record_stop(self):
175 |         if self.record_fps == 0:
176 |             print("record disabled because record_fps is 0")
177 |             assert self.record_stop.value == False
178 |         else:
179 |             self.record_stop.value = True
180 |             print("record stop cmd received")
181 | 


--------------------------------------------------------------------------------
/experiments/real_world/plan.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import argparse
 3 | import random
 4 | import time
 5 | import os
 6 | import matplotlib.pyplot as plt
 7 | from collections import defaultdict
 8 | from tqdm import tqdm, trange
 9 | import hydra
10 | from omegaconf import DictConfig, OmegaConf
11 | import yaml
12 | from datetime import datetime
13 | import numpy as np
14 | from PIL import Image
15 | import warp as wp
16 | import matplotlib.pyplot as plt
17 | import multiprocess as mp
18 | import torch
19 | import torch.backends.cudnn
20 | import torch.nn as nn
21 | from torch.nn.utils import clip_grad_norm_
22 | from torch.utils.data import DataLoader
23 | import logging
24 | 
25 | from pgnd.utils import get_root, mkdir
26 | from modules_planning.planning_env import RobotPlanningEnv
27 | 
28 | root: Path = get_root(__file__)
29 | logging.basicConfig(level=logging.WARNING)
30 | 
31 | 
32 | def main(args):
33 |     mp.set_start_method('spawn')
34 | 
35 |     with open(root / args.config, 'r') as f:
36 |         config = yaml.load(f, Loader=yaml.CLoader)
37 |     cfg = OmegaConf.create(config)
38 | 
39 |     cfg.sim.num_steps = 1000
40 |     cfg.sim.gripper_forcing = False
41 |     cfg.sim.uniform = True
42 | 
43 |     iteration = args.iteration
44 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
45 | 
46 |     ckpt_path = (root / args.config).parent / 'ckpt' / f'{iteration:06d}.pt'
47 |     seed = cfg.seed
48 |     random.seed(seed)
49 |     np.random.seed(seed)
50 |     torch.manual_seed(seed)
51 | 
52 |     # path
53 |     datetime_now = datetime.now().strftime('%y%m%d-%H%M%S')
54 |     exp_root: Path = root / 'log' / cfg.train.name / 'plan' / datetime_now
55 |     mkdir(exp_root, overwrite=cfg.overwrite, resume=cfg.resume)
56 | 
57 |     env = RobotPlanningEnv(
58 |         cfg,
59 |         exp_root=exp_root,
60 |         ckpt_path=ckpt_path,
61 |         resolution=(848, 480),
62 |         capture_fps=30,
63 |         record_fps=0,
64 |         text_prompts=args.text_prompts,
65 |         show_annotation=(not args.no_annotation),
66 |         use_robot=True,
67 |         bimanual=args.bimanual,
68 |         gripper_enable=True,
69 |         debug=True,
70 |         construct_target=args.construct_target,
71 |     )
72 | 
73 |     env.start()
74 |     env.join()
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     arg_parser = argparse.ArgumentParser()
79 |     arg_parser.add_argument('--config', type=str, default='log/cloth/train/hydra.yaml')
80 |     arg_parser.add_argument('--iteration', type=str, default=100000)
81 |     arg_parser.add_argument('--text_prompts', type=str, default='green towel.')
82 |     arg_parser.add_argument('--seed', type=int, default=42)
83 |     arg_parser.add_argument('--no_annotation', action='store_true')
84 |     arg_parser.add_argument('--bimanual', action='store_true')
85 |     arg_parser.add_argument('--construct_target', action='store_true')
86 |     args = arg_parser.parse_args()
87 | 
88 |     with torch.no_grad():
89 |         main(args)
90 | 


--------------------------------------------------------------------------------
/experiments/real_world/teleop.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import argparse
 3 | import multiprocess as mp
 4 | import sys
 5 | sys.path.append(str(Path(__file__).parent.parent.parent))
 6 | 
 7 | import pgnd
 8 | from pgnd.utils import get_root, mkdir
 9 | root: Path = get_root(__file__)
10 | sys.path.append(str(root / "real_world"))
11 | 
12 | from modules_teleop.robot_env_teleop import RobotTeleopEnv
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     mp.set_start_method('spawn')
17 | 
18 |     parser = argparse.ArgumentParser()
19 |     parser.add_argument('--name', type=str, default='')
20 |     parser.add_argument('--bimanual', action='store_true')
21 |     args = parser.parse_args()
22 | 
23 |     assert args.name != '', "Please provide a name for the experiment"
24 | 
25 |     env = RobotTeleopEnv(
26 |         mode='3D',
27 |         exp_name=args.name,
28 |         resolution=(848, 480),
29 |         capture_fps=30,
30 |         record_fps=30,
31 |         perception_process_func=None,
32 |         use_robot=True,
33 |         use_gello=True,
34 |         bimanual=args.bimanual,
35 |         gripper_enable=True,
36 |         data_dir="data",
37 |         debug=True,
38 |     )
39 | 
40 |     env.start()
41 |     env.join()
42 | 


--------------------------------------------------------------------------------
/experiments/real_world/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/real_world/utils/__init__.py


--------------------------------------------------------------------------------
/experiments/real_world/utils/env_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def get_bounding_box_bimanual():
 5 |     return np.array([[0.0, 0.6], [-0.35, 0.45 + 0.75], [-0.65, 0.05]])  # the world frame robot workspace
 6 | 
 7 | 
 8 | def get_bounding_box():
 9 |     return np.array([[0.0, 0.6], [-0.35, 0.45], [-0.65, 0.05]])  # the world frame robot workspace
10 | 


--------------------------------------------------------------------------------
/experiments/real_world/utils/pcd_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import open3d as o3d
  3 | 
  4 | 
  5 | def filter_tabletop_points(pcd):
  6 |     # RANSAC to find table plane
  7 |     plane_model, inliers = pcd.segment_plane(distance_threshold=0.01, ransac_n=3, num_iterations=1000)
  8 |     [a, b, c, d] = plane_model
  9 |     table_plane = np.array([a, b, c, d])
 10 | 
 11 |     # remove points on the table
 12 |     pcd = pcd.select_by_index(inliers, invert=True)
 13 | 
 14 |     outliers = None
 15 |     new_outlier = None
 16 |     rm_iter = 0
 17 |     while new_outlier is None or len(new_outlier.points) > 0:
 18 |         _, inlier_idx = pcd.remove_statistical_outlier(
 19 |             nb_neighbors = 20, std_ratio = 1.5 + rm_iter * 0.5
 20 |         )
 21 |         new_pcd = pcd.select_by_index(inlier_idx)
 22 |         new_outlier = pcd.select_by_index(inlier_idx, invert=True)
 23 |         if outliers is None:
 24 |             outliers = new_outlier
 25 |         else:
 26 |             outliers += new_outlier
 27 |         pcd = new_pcd
 28 |         rm_iter += 1
 29 | 
 30 |     return pcd
 31 | 
 32 | 
 33 | def get_tabletop_points(rgb_list, depth_list, R_list, t_list, intr_list, bbox, depth_threshold=[0, 2]):
 34 |     # increase if out of memory
 35 |     stride = 1
 36 | 
 37 |     pcd_all = o3d.geometry.PointCloud()
 38 | 
 39 |     for i in range(len(rgb_list)):
 40 |         intr = intr_list[i]
 41 |         R_cam2board = R_list[i]
 42 |         t_cam2board = t_list[i]
 43 | 
 44 |         depth = depth_list[i].copy().astype(np.float32)
 45 | 
 46 |         points = depth2fgpcd(depth, intr)
 47 |         points = points.reshape(depth.shape[0], depth.shape[1], 3)
 48 |         points = points[::stride, ::stride, :].reshape(-1, 3)
 49 | 
 50 |         mask = np.logical_and(
 51 |             (depth > depth_threshold[0]), (depth < depth_threshold[1])
 52 |         )  # (H, W)
 53 |         mask = mask[::stride, ::stride].reshape(-1)
 54 | 
 55 |         img = rgb_list[i].copy()
 56 | 
 57 |         points = points[mask].reshape(-1, 3)
 58 | 
 59 |         points = R_cam2board @ points.T + t_cam2board[:, None]
 60 |         points = points.T  # (N, 3)
 61 | 
 62 |         pcd = o3d.geometry.PointCloud()
 63 |         pcd.points = o3d.utility.Vector3dVector(points)
 64 | 
 65 |         colors = img[::stride, ::stride, :].reshape(-1, 3).astype(np.float64)
 66 |         colors = colors[mask].reshape(-1, 3)
 67 |         colors = colors[:, ::-1].copy()
 68 |         pcd.colors = o3d.utility.Vector3dVector(colors / 255)
 69 |         pcd_all += pcd
 70 | 
 71 |     pcd = pcd_all
 72 |     pcd = pcd.crop(o3d.geometry.AxisAlignedBoundingBox(min_bound=bbox[:, 0], max_bound=bbox[:, 1]))
 73 |     pcd = pcd.voxel_down_sample(voxel_size=0.001)
 74 | 
 75 |     # NOTE: simple filtering instead of instance-specific segmentation processing
 76 |     pcd = filter_tabletop_points(pcd)
 77 |     return pcd
 78 | 
 79 | 
 80 | def rpy_to_rotation_matrix(roll, pitch, yaw):
 81 |     # Assume the input in in degree
 82 |     roll = roll / 180 * np.pi
 83 |     pitch = pitch / 180 * np.pi
 84 |     yaw = yaw / 180 * np.pi
 85 |     # Define the rotation matrices
 86 |     Rx = np.array([[1, 0, 0], [0, np.cos(roll), -np.sin(roll)], [0, np.sin(roll), np.cos(roll)]])
 87 |     Ry = np.array([[np.cos(pitch), 0, np.sin(pitch)], [0, 1, 0], [-np.sin(pitch), 0, np.cos(pitch)]])
 88 |     Rz = np.array([[np.cos(yaw), -np.sin(yaw), 0], [np.sin(yaw), np.cos(yaw), 0], [0, 0, 1]])
 89 |     # Combine the rotations
 90 |     R = Rz @ Ry @ Rx
 91 |     return R
 92 | 
 93 | 
 94 | def rotation_matrix_to_rpy(rotation_matrix):
 95 |     # Get the roll, pitch, yaw in radian
 96 |     roll = np.arctan2(rotation_matrix[2, 1], rotation_matrix[2, 2])
 97 |     pitch = np.arctan2(-rotation_matrix[2, 0], np.sqrt(rotation_matrix[2, 1] ** 2 + rotation_matrix[2, 2] ** 2))
 98 |     yaw = np.arctan2(rotation_matrix[1, 0], rotation_matrix[0, 0])
 99 |     # Get the roll, pitch, yaw in degree
100 |     roll = roll / np.pi * 180
101 |     pitch = pitch / np.pi * 180
102 |     yaw = yaw / np.pi * 180
103 |     return roll, pitch, yaw
104 | 
105 | 
106 | def depth2fgpcd(depth, intrinsic_matrix):
107 |     H, W = depth.shape
108 |     x, y = np.meshgrid(np.arange(W), np.arange(H))
109 |     x = x.reshape(-1)
110 |     y = y.reshape(-1)
111 |     depth = depth.reshape(-1)
112 |     points = np.stack([x, y, np.ones_like(x)], axis=1)
113 |     points = points * depth[:, None]
114 |     points = points @ np.linalg.inv(intrinsic_matrix).T
115 |     return points
116 | 
117 | 
118 | def similarity_transform(from_points, to_points):
119 |     
120 |     assert len(from_points.shape) == 2, \
121 |         "from_points must be a m x n array"
122 |     assert from_points.shape == to_points.shape, \
123 |         "from_points and to_points must have the same shape"
124 |     
125 |     N, m = from_points.shape
126 |     
127 |     mean_from = from_points.mean(axis = 0)
128 |     mean_to = to_points.mean(axis = 0)
129 |     
130 |     delta_from = from_points - mean_from # N x m
131 |     delta_to = to_points - mean_to       # N x m
132 |     
133 |     sigma_from = (delta_from * delta_from).sum(axis = 1).mean()
134 |     sigma_to = (delta_to * delta_to).sum(axis = 1).mean()
135 |     
136 |     cov_matrix = delta_to.T.dot(delta_from) / N
137 |     
138 |     U, d, V_t = np.linalg.svd(cov_matrix, full_matrices = True)
139 |     cov_rank = np.linalg.matrix_rank(cov_matrix)
140 |     S = np.eye(m)
141 |     
142 |     if cov_rank >= m - 1: #  and np.linalg.det(cov_matrix) < 0:  # TODO touch calibration
143 |         S[m-1, m-1] = -1
144 |     elif cov_rank < m-1:
145 |         raise ValueError("colinearility detected in covariance matrix:\n{}".format(cov_matrix))
146 |     
147 |     R = U.dot(S).dot(V_t)
148 |     c = (d * S.diagonal()).sum() / sigma_from
149 |     t = mean_to - c*R.dot(mean_from)
150 |     
151 |     return c, R, t
152 | 
153 | 
154 | def visualize_o3d(geometries):
155 |     frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.2, origin=[0, 0, 0])
156 |     geometries.append(frame)
157 | 
158 |     pcd = o3d.geometry.PointCloud()
159 |     pcd.points = o3d.utility.Vector3dVector()
160 |     viewer = o3d.visualization.Visualizer()
161 |     viewer.create_window()
162 |     for geometry in geometries:
163 |         viewer.add_geometry(geometry)
164 |     opt = viewer.get_render_option()
165 |     opt.background_color = np.asarray([1., 1., 1.])
166 |     viewer.run()
167 | 


--------------------------------------------------------------------------------
/experiments/real_world/utils/planning_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | import math
 6 | 
 7 | 
 8 | def chamfer(x, y):  # x: (B, N, D), y: (B, M, D)
 9 |     x = x[:, None].repeat(1, y.shape[1], 1, 1)  # (B, M, N, D)
10 |     y = y[:, :, None].repeat(1, 1, x.shape[2], 1)  # (B, M, N, D)
11 |     dis = torch.norm(x - y, 2, dim=-1)  # (B, M, N)
12 |     dis_xy = torch.mean(dis.min(dim=2).values, dim=1)  # dis_xy: mean over N
13 |     dis_yx = torch.mean(dis.min(dim=1).values, dim=1)  # dis_yx: mean over M
14 |     return dis_xy + dis_yx
15 | 
16 | 
17 | def batch_chamfer_dist(xyz, xyz_gt):
18 |     # xyz: (B, N, 3)
19 |     # xyz_gt: (M, 3)
20 | 
21 |     # mean aligning
22 |     # chamfer = (xyz.mean(dim=1) - xyz_gt.mean(dim=0)).norm(dim=1)  # (B,)
23 | 
24 |     # chamfer distance
25 |     xyz_gt = xyz_gt[None]  # (1, M, 3)
26 |     dist1 = torch.sqrt(torch.sum((xyz[:, :, None] - xyz_gt[:, None]) ** 2, dim=3))  # (B, N, M)
27 |     dist2 = torch.sqrt(torch.sum((xyz_gt[:, None] - xyz[:, :, None]) ** 2, dim=3))  # (B, M, N)
28 |     chamfer = torch.mean(torch.min(dist1, dim=1).values, dim=1) + torch.mean(torch.min(dist2, dim=1).values, dim=1)  # (B,)
29 |     return chamfer
30 | 
31 | 
32 | def angle_normalize(x):
33 |     return (((x + math.pi) % (2 * math.pi)) - math.pi)
34 | 
35 | 
36 | def clip_actions(action, action_lower_lim, action_upper_lim):
37 |     action_new = action.clone()
38 |     # action_new[..., 2] = angle_normalize(action[..., 2])
39 |     action_new.data.clamp_(action_lower_lim, action_upper_lim)
40 |     return action_new
41 | 


--------------------------------------------------------------------------------
/experiments/real_world/utils/render_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import time
  4 | import kornia
  5 | 
  6 | 
  7 | def interpolate_motions(bones, motions, relations, xyz, rot=None, quat=None, weights=None, device='cuda', step='n/a'):
  8 |     # bones: (n_bones, 3)
  9 |     # motions: (n_bones, 3)
 10 |     # relations: (n_bones, k)
 11 |     # indices: (n_bones,)
 12 |     # xyz: (n_particles, 3)
 13 |     # rot: (n_particles, 3, 3)
 14 |     # quat: (n_particles, 4)
 15 |     # weights: (n_particles, n_bones)
 16 | 
 17 |     t0 = time.time()
 18 |     n_bones, _ = bones.shape
 19 |     n_particles, _ = xyz.shape
 20 | 
 21 |     # Compute the bone transformations
 22 |     bone_transforms = torch.zeros((n_bones, 4, 4),  device=device)
 23 | 
 24 |     n_adj = relations.shape[1]
 25 |     
 26 |     adj_bones = bones[relations] - bones[:, None]  # (n_bones, n_adj, 3)
 27 |     adj_bones_new = (bones[relations] + motions[relations]) - (bones[:, None] + motions[:, None])  # (n_bones, n_adj, 3)
 28 | 
 29 |     W = torch.eye(n_adj, device=device)[None].repeat(n_bones, 1, 1)  # (n_bones, n_adj, n_adj)
 30 | 
 31 |     # fit a transformation
 32 |     F = adj_bones_new.permute(0, 2, 1) @ W @ adj_bones  # (n_bones, 3, 3)
 33 |     
 34 |     cov_rank = torch.linalg.matrix_rank(F)  # (n_bones,)
 35 |     
 36 |     cov_rank_3_mask = cov_rank == 3  # (n_bones,)
 37 |     cov_rank_2_mask = cov_rank == 2  # (n_bones,)
 38 |     cov_rank_1_mask = cov_rank == 1  # (n_bones,)
 39 | 
 40 |     F_2_3 = F[cov_rank_2_mask | cov_rank_3_mask]  # (n_bones, 3, 3)
 41 |     F_1 = F[cov_rank_1_mask]  # (n_bones, 3, 3)
 42 | 
 43 |     # 2 or 3
 44 |     try:
 45 |         U, S, V = torch.svd(F_2_3)  # S: (n_bones, 3)
 46 |         S = torch.eye(3, device=device, dtype=torch.float32)[None].repeat(F_2_3.shape[0], 1, 1)
 47 |         neg_det_mask = torch.linalg.det(F_2_3) < 0
 48 |         if neg_det_mask.sum() > 0:
 49 |             print(f'[step {step}] F det < 0 for {neg_det_mask.sum()} bones')
 50 |             S[neg_det_mask, -1, -1] = -1
 51 |         R = U @ S @ V.permute(0, 2, 1)
 52 |     except:
 53 |         print(f'[step {step}] SVD failed')
 54 |         import ipdb; ipdb.set_trace()
 55 | 
 56 |     neg_1_det_mask = torch.abs(torch.linalg.det(R) + 1) < 1e-3
 57 |     pos_1_det_mask = torch.abs(torch.linalg.det(R) - 1) < 1e-3
 58 |     bad_det_mask = ~(neg_1_det_mask | pos_1_det_mask)
 59 | 
 60 |     if neg_1_det_mask.sum() > 0:
 61 |         print(f'[step {step}] det -1')
 62 |         S[neg_1_det_mask, -1, -1] *= -1
 63 |         R = U @ S @ V.permute(0, 2, 1)
 64 | 
 65 |     try:
 66 |         assert bad_det_mask.sum() == 0
 67 |     except:
 68 |         print(f'[step {step}] Bad det')
 69 |         import ipdb; ipdb.set_trace()
 70 | 
 71 |     try:
 72 |         if cov_rank_1_mask.sum() > 0:
 73 |             print(f'[step {step}] F rank 1 for {cov_rank_1_mask.sum()} bones')
 74 |             U, S, V = torch.svd(F_1)  # S: (n_bones', 3)
 75 |             assert torch.allclose(S[:, 1:], torch.zeros_like(S[:, 1:]))
 76 |             x = torch.tensor([1., 0., 0.], device=device, dtype=torch.float32)[None].repeat(F_1.shape[0], 1)  # (n_bones', 3)
 77 |             axis = U[:, :, 0]  # (n_bones', 3)
 78 |             perp_axis = torch.linalg.cross(axis, x)  # (n_bones', 3)
 79 | 
 80 |             perp_axis_norm_mask = torch.norm(perp_axis, dim=1) < 1e-6
 81 | 
 82 |             R = torch.zeros((F_1.shape[0], 3, 3), device=device, dtype=torch.float32)
 83 |             if perp_axis_norm_mask.sum() > 0:
 84 |                 print(f'[step {step}] Perp axis norm 0 for {perp_axis_norm_mask.sum()} bones')
 85 |                 R[perp_axis_norm_mask] = torch.eye(3, device=device, dtype=torch.float32)[None].repeat(perp_axis_norm_mask.sum(), 1, 1)
 86 | 
 87 |             perp_axis = perp_axis[~perp_axis_norm_mask]  # (n_bones', 3)
 88 |             x = x[~perp_axis_norm_mask]  # (n_bones', 3)
 89 | 
 90 |             perp_axis = perp_axis / torch.norm(perp_axis, dim=1, keepdim=True)  # (n_bones', 3)
 91 |             third_axis = torch.linalg.cross(x, perp_axis)  # (n_bones', 3)
 92 |             assert ((torch.norm(third_axis, dim=1) - 1).abs() < 1e-6).all()
 93 |             third_axis_after = torch.linalg.cross(axis, perp_axis)  # (n_bones', 3)
 94 | 
 95 |             X = torch.stack([x, perp_axis, third_axis], dim=-1)
 96 |             Y = torch.stack([axis, perp_axis, third_axis_after], dim=-1)
 97 |             R[~perp_axis_norm_mask] = Y @ X.permute(0, 2, 1)
 98 |     except:
 99 |         R = torch.zeros((F_1.shape[0], 3, 3), device=device, dtype=torch.float32)
100 |         R[:, 0, 0] = 1
101 |         R[:, 1, 1] = 1
102 |         R[:, 2, 2] = 1
103 | 
104 |     try:
105 |         bone_transforms[:, :3, :3] = R
106 |     except:
107 |         print(f'[step {step}] Bad R')
108 |         bone_transforms[:, 0, 0] = 1
109 |         bone_transforms[:, 1, 1] = 1
110 |         bone_transforms[:, 2, 2] = 1
111 |     bone_transforms[:, :3, 3] = motions
112 | 
113 |     # Compute the weights
114 |     if weights is None:
115 |         weights = torch.ones((n_particles, n_bones), device=device)
116 |         dist = torch.cdist(xyz[None], bones[None])[0]  # (n_particles, n_bones)
117 |         dist = torch.clamp(dist, min=1e-4)
118 |         weights = 1 / dist
119 |         weights = weights / weights.sum(dim=1, keepdim=True)  # (n_particles, n_bones)
120 |     
121 |     # Compute the transformed particles
122 |     xyz_transformed = torch.zeros((n_particles, n_bones, 3), device=device)
123 | 
124 |     xyz_transformed = xyz[:, None] - bones[None]  # (n_particles, n_bones, 3)
125 |     xyz_transformed = torch.einsum('ijk,jkl->ijl', xyz_transformed, bone_transforms[:, :3, :3].permute(0, 2, 1))  # (n_particles, n_bones, 3)
126 |     xyz_transformed = xyz_transformed + bone_transforms[:, :3, 3][None] + bones[None]  # (n_particles, n_bones, 3)
127 |     xyz_transformed = (xyz_transformed * weights[:, :, None]).sum(dim=1)  # (n_particles, 3)
128 | 
129 |     def quaternion_multiply(q1, q2):
130 |         # q1: bsz x 4
131 |         # q2: bsz x 4
132 |         q = torch.zeros_like(q1)
133 |         q[:, 0] = q1[:, 0] * q2[:, 0] - q1[:, 1] * q2[:, 1] - q1[:, 2] * q2[:, 2] - q1[:, 3] * q2[:, 3]
134 |         q[:, 1] = q1[:, 0] * q2[:, 1] + q1[:, 1] * q2[:, 0] + q1[:, 2] * q2[:, 3] - q1[:, 3] * q2[:, 2]
135 |         q[:, 2] = q1[:, 0] * q2[:, 2] - q1[:, 1] * q2[:, 3] + q1[:, 2] * q2[:, 0] + q1[:, 3] * q2[:, 1]
136 |         q[:, 3] = q1[:, 0] * q2[:, 3] + q1[:, 1] * q2[:, 2] - q1[:, 2] * q2[:, 1] + q1[:, 3] * q2[:, 0]
137 |         return q
138 | 
139 |     if quat is not None:
140 |         base_quats = kornia.geometry.conversions.rotation_matrix_to_quaternion(bone_transforms[:, :3, :3])  # (n_bones, 4)
141 |         base_quats = torch.nn.functional.normalize(base_quats, dim=-1)  # (n_particles, 4)
142 |         quats = (base_quats[None] * weights[:, :, None]).sum(dim=1)  # (n_particles, 4)
143 |         quats = torch.nn.functional.normalize(quats, dim=-1)
144 |         rot = quaternion_multiply(quats, quat)
145 | 
146 |     # xyz_transformed: (n_particles, 3)
147 |     # rot: (n_particles, 3, 3) / (n_particles, 4)
148 |     # weights: (n_particles, n_bones)
149 |     return xyz_transformed, rot, weights


--------------------------------------------------------------------------------
/experiments/real_world/utils/track_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def sample_points_from_masks(masks, num_points):
 5 |     """
 6 |     sample points from masks and return its absolute coordinates
 7 | 
 8 |     Args:
 9 |         masks: np.array with shape (n, h, w)
10 |         num_points: int
11 | 
12 |     Returns:
13 |         points: np.array with shape (n, points, 2)
14 |     """
15 |     n, h, w = masks.shape
16 |     points = []
17 | 
18 |     for i in range(n):
19 |         # find the valid mask points
20 |         indices = np.argwhere(masks[i] == 1)  
21 |         # the output format of np.argwhere is (y, x) and the shape is (num_points, 2)
22 |         # we should convert it to (x, y)
23 |         indices = indices[:, ::-1]  # (num_points, [y x]) to (num_points, [x y])
24 |         
25 |         # import pdb; pdb.set_trace()
26 |         if len(indices) == 0:
27 |             # if there are no valid points, append an empty array
28 |             points.append(np.array([]))
29 |             continue
30 |         
31 |         # resampling if there's not enough points
32 |         if len(indices) < num_points:
33 |             sampled_indices = np.random.choice(len(indices), num_points, replace=True)
34 |         else:
35 |             sampled_indices = np.random.choice(len(indices), num_points, replace=False)
36 |         
37 |         sampled_points = indices[sampled_indices]
38 |         points.append(sampled_points)
39 | 
40 |     # convert to np.array
41 |     points = np.array(points, dtype=np.float32)
42 |     return points


--------------------------------------------------------------------------------
/experiments/scripts/train_box.sh:
--------------------------------------------------------------------------------
 1 | CUDA_VISIBLE_DEVICES=0 python experiments/train/train_eval.py \
 2 | 'gpus=[0]' \
 3 | overwrite=False \
 4 | resume=True \
 5 | debug=True \
 6 | train.name=box/train \
 7 | train.source_dataset_name=data/box_merged/sub_episodes_v \
 8 | train.dataset_name=box/dataset \
 9 | sim.num_grippers=2 \
10 | sim.gripper_points=True \
11 | sim.gripper_forcing=False \
12 | model.material.radius=0.0 \
13 | train.dataset_load_skip_frame=3 \
14 | train.dataset_skip_frame=1 \
15 | train.num_workers=8 \
16 | train.batch_size=32 \
17 | train.training_start_episode=0 \
18 | train.training_end_episode=303 \
19 | train.eval_start_episode=303 \
20 | train.eval_end_episode=323 \
21 | train.num_iterations=100000 \
22 | train.iteration_eval_interval=1000 \
23 | train.iteration_log_interval=10 \
24 | train.iteration_save_interval=1000
25 | 


--------------------------------------------------------------------------------
/experiments/scripts/train_bread.sh:
--------------------------------------------------------------------------------
 1 | CUDA_VISIBLE_DEVICES=0 python experiments/train/train_eval.py \
 2 | 'gpus=[0]' \
 3 | overwrite=False \
 4 | resume=True \
 5 | debug=True \
 6 | train.name=bread/train \
 7 | train.source_dataset_name=data/bread_merged/sub_episodes_v \
 8 | train.dataset_name=bread/dataset \
 9 | sim.preprocess_scale=3.0 \
10 | sim.num_grippers=2 \
11 | sim.n_particles=500 \
12 | 'sim.num_grids=[100, 100, 100, 0.01]' \
13 | model.gripper_radius=0.12 \
14 | model.material.radius=0.0 \
15 | model.material.pe_num_func_res=4 \
16 | train.dataset_load_skip_frame=3 \
17 | train.dataset_skip_frame=1 \
18 | train.num_workers=8 \
19 | train.batch_size=16 \
20 | train.training_start_episode=0 \
21 | train.training_end_episode=143 \
22 | train.eval_start_episode=143 \
23 | train.eval_end_episode=163 \
24 | train.num_iterations=100000 \
25 | train.iteration_eval_interval=1000 \
26 | train.iteration_log_interval=10 \
27 | train.iteration_save_interval=1000 
28 | 


--------------------------------------------------------------------------------
/experiments/scripts/train_cloth.sh:
--------------------------------------------------------------------------------
 1 | CUDA_VISIBLE_DEVICES=0 python experiments/train/train_eval.py \
 2 | 'gpus=[0]' \
 3 | overwrite=False \
 4 | resume=True \
 5 | debug=True \
 6 | train.name=cloth/train \
 7 | train.source_dataset_name=data/cloth_merged/sub_episodes_v \
 8 | train.dataset_name=cloth/dataset \
 9 | sim.preprocess_scale=0.8 \
10 | sim.num_grippers=2 \
11 | train.dataset_load_skip_frame=3 \
12 | train.dataset_skip_frame=1 \
13 | train.num_workers=8 \
14 | train.batch_size=32 \
15 | train.training_start_episode=0 \
16 | train.training_end_episode=610 \
17 | train.eval_start_episode=610 \
18 | train.eval_end_episode=650 \
19 | train.num_iterations=100000 \
20 | train.iteration_eval_interval=1000 \
21 | train.iteration_log_interval=10 \
22 | train.iteration_save_interval=1000 
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/train_paperbag.sh:
--------------------------------------------------------------------------------
 1 | CUDA_VISIBLE_DEVICES=0 python experiments/train/train_eval.py \
 2 | 'gpus=[0]' \
 3 | overwrite=False \
 4 | resume=True \
 5 | debug=True \
 6 | train.name=paperbag/train \
 7 | train.source_dataset_name=data/paperbag_merged/sub_episodes_v \
 8 | train.dataset_name=paperbag/dataset \
 9 | train.dataset_load_skip_frame=3 \
10 | train.dataset_skip_frame=1 \
11 | train.num_workers=8 \
12 | train.batch_size=32 \
13 | train.training_start_episode=0 \
14 | train.training_end_episode=200 \
15 | train.eval_start_episode=200 \
16 | train.eval_end_episode=220 \
17 | train.num_iterations=100000 \
18 | train.iteration_eval_interval=1000 \
19 | train.iteration_log_interval=10 \
20 | train.iteration_save_interval=1000 
21 | 


--------------------------------------------------------------------------------
/experiments/scripts/train_rope.sh:
--------------------------------------------------------------------------------
 1 | CUDA_VISIBLE_DEVICES=0 python experiments/train/train_eval.py \
 2 | 'gpus=[0]' \
 3 | overwrite=False \
 4 | resume=True \
 5 | debug=True \
 6 | train.name=rope/train \
 7 | train.source_dataset_name=data/rope_merged/sub_episodes_v \
 8 | train.dataset_name=rope/dataset \
 9 | train.dataset_load_skip_frame=3 \
10 | train.dataset_skip_frame=1 \
11 | train.num_workers=8 \
12 | train.batch_size=32 \
13 | train.training_start_episode=0 \
14 | train.training_end_episode=651 \
15 | train.eval_start_episode=651 \
16 | train.eval_end_episode=691 \
17 | train.num_iterations=100000 \
18 | train.iteration_eval_interval=1000 \
19 | train.iteration_log_interval=10 \
20 | train.iteration_save_interval=1000 
21 | 


--------------------------------------------------------------------------------
/experiments/scripts/train_sloth.sh:
--------------------------------------------------------------------------------
 1 | CUDA_VISIBLE_DEVICES=0 python experiments/train/train_eval.py \
 2 | 'gpus=[0]' \
 3 | overwrite=False \
 4 | resume=True \
 5 | debug=True \
 6 | train.name=sloth/train \
 7 | train.source_dataset_name=data/sloth_merged/sub_episodes_v \
 8 | train.dataset_name=sloth/dataset \
 9 | train.dataset_load_skip_frame=3 \
10 | train.dataset_skip_frame=1 \
11 | train.num_workers=8 \
12 | train.batch_size=32 \
13 | train.training_start_episode=0 \
14 | train.training_end_episode=113 \
15 | train.eval_start_episode=113 \
16 | train.eval_end_episode=133 \
17 | train.num_iterations=100000 \
18 | train.iteration_eval_interval=1000 \
19 | train.iteration_log_interval=10 \
20 | train.iteration_save_interval=1000 
21 | 


--------------------------------------------------------------------------------
/experiments/train/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/experiments/train/__init__.py


--------------------------------------------------------------------------------
/experiments/train/pv_dataset.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import random
  3 | from tqdm import tqdm, trange
  4 | 
  5 | import hydra
  6 | from omegaconf import DictConfig, OmegaConf
  7 | import numpy as np
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.backends.cudnn
 11 | import math
 12 | import pyvista as pv
 13 | from dgl.geometry import farthest_point_sampler
 14 | 
 15 | from pgnd.utils import get_root, mkdir
 16 | from pgnd.ffmpeg import make_video
 17 | 
 18 | from train.pv_utils import Xvfb, get_camera_custom
 19 | 
 20 | 
 21 | def fps(x, n, random_start=False):
 22 |     start_idx = random.randint(0, x.shape[0] - 1) if random_start else 0
 23 |     fps_idx = farthest_point_sampler(x[None], n, start_idx=start_idx)[0]
 24 |     fps_idx = fps_idx.to(x.device)
 25 |     return fps_idx
 26 | 
 27 | 
 28 | @torch.no_grad()
 29 | def render(
 30 |     cfg,
 31 |     dataset_root,
 32 |     episode_names,
 33 |     iteration=None,
 34 |     start_step=None,
 35 |     end_step=None,
 36 |     save_dir=None,
 37 |     downsample_indices=None,
 38 |     clean_bg=False,
 39 | ):
 40 |     render_type = 'pv'
 41 | 
 42 |     exp_root: Path = dataset_root
 43 |     state_root: Path = exp_root / 'state'
 44 | 
 45 |     video_path_list = []
 46 |     for episode_idx, episode in enumerate(episode_names):
 47 | 
 48 |         plotter = pv.Plotter(lighting='three lights', off_screen=True, window_size=(cfg.render.width, cfg.render.height))
 49 |         plotter.set_background('white')
 50 |         plotter.camera_position = get_camera_custom(cfg.render.center, cfg.render.distance, cfg.render.azimuth, cfg.render.elevation)
 51 |         plotter.enable_shadows()
 52 | 
 53 |         # add bounding box
 54 |         scale_x = cfg.sim.num_grids[0] / (cfg.sim.num_grids[0] - 2 * cfg.render.bound)
 55 |         scale_y = cfg.sim.num_grids[1] / (cfg.sim.num_grids[1] - 2 * cfg.render.bound)
 56 |         scale_z = cfg.sim.num_grids[2] / (cfg.sim.num_grids[2] - 2 * cfg.render.bound)
 57 |         scale = np.array([scale_x, scale_y, scale_z])
 58 |         scale_mean = np.power(np.prod(scale), 1 / 3)
 59 |         bbox = pv.Box(bounds=[0, 1, 0, 1, 0, 1])
 60 |         if not clean_bg:
 61 |             plotter.add_mesh(bbox, style='wireframe', color='black')
 62 |         
 63 |         # add axis
 64 |         if not clean_bg:
 65 |             for axis, color in enumerate(['r', 'g', 'b']):
 66 |                 mesh = pv.Arrow(start=[0, 0, 0], direction=np.eye(3)[axis], scale=0.2)
 67 |                 plotter.add_mesh(mesh, color=color, show_scalar_bar=False)
 68 | 
 69 |         episode_state_root = state_root / episode
 70 | 
 71 |         episode_image_root = save_dir / f'{episode}_gt'
 72 |         mkdir(episode_image_root, overwrite=True, resume=True)
 73 | 
 74 |         ckpt_paths = list(sorted(episode_state_root.glob('*.pt'), key=lambda x: int(x.stem)))
 75 |         if start_step is not None and end_step is not None:
 76 |             ckpt_paths = ckpt_paths[start_step:end_step]
 77 |         skip_frame = cfg.train.dataset_skip_frame * cfg.train.dataset_load_skip_frame
 78 |         ckpt_paths = ckpt_paths[skip_frame::skip_frame]
 79 |         for i, path in enumerate(tqdm(ckpt_paths, desc=render_type)):
 80 | 
 81 |             if i % cfg.render.skip_frame != 0:
 82 |                 continue
 83 | 
 84 |             ckpt = torch.load(path, map_location='cpu')
 85 |             p_x = ckpt['x'].cpu().detach().numpy()
 86 |             if downsample_indices is not None:
 87 |                 p_x = p_x[downsample_indices[0]]
 88 |             else:
 89 |                 downsample_indices = fps(torch.from_numpy(p_x), cfg.sim.n_particles, random_start=True)[None]
 90 |                 p_x = p_x[downsample_indices[0]]
 91 | 
 92 |             x = (p_x - 0.5) * scale + 0.5
 93 | 
 94 |             grippers = ckpt['grippers'].cpu().detach().numpy()
 95 |             n_eef = grippers.shape[0]
 96 | 
 97 |             radius = 0.5 * np.power((0.5 ** 3) / x.shape[0], 1 / 3) * scale_mean
 98 |             x = np.clip(x, radius, 1 - radius)
 99 | 
100 |             polydata = pv.PolyData(x)
101 |             plotter.add_mesh(polydata, style='points', name='object', render_points_as_spheres=True, point_size=radius * cfg.render.radius_scale, color=list(cfg.render.reflectance))
102 |             for j in range(n_eef):
103 |                 gripper = pv.Sphere(center=grippers[j, :3], radius=grippers[j, -2])
104 |                 plotter.add_mesh(gripper, color='blue', name=f'gripper_{j}')
105 | 
106 |             plotter.show(auto_close=False, screenshot=str(episode_image_root / f'{i // cfg.render.skip_frame:04d}.png'))
107 | 
108 |         plotter.close()
109 |         if save_dir is not None:
110 |             make_video(episode_image_root, save_dir / f'{episode}_gt.mp4', '%04d.png', cfg.render.fps)
111 |             video_path_list.append(save_dir / f'{episode}_gt.mp4')
112 | 
113 |     return video_path_list
114 | 
115 | 
116 | @torch.no_grad()
117 | def do_dataset_pv(*args, **kwargs):
118 |     with Xvfb():
119 |         ret = render(*args, **kwargs)
120 |     return ret


--------------------------------------------------------------------------------
/experiments/train/pv_train.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import random
  3 | from tqdm import tqdm, trange
  4 | 
  5 | import hydra
  6 | from omegaconf import DictConfig, OmegaConf
  7 | import numpy as np
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.backends.cudnn
 11 | import math
 12 | import pyvista as pv
 13 | 
 14 | from pgnd.utils import get_root, mkdir
 15 | from pgnd.ffmpeg import make_video
 16 | 
 17 | from train.pv_utils import Xvfb, get_camera_custom
 18 | 
 19 | 
 20 | @torch.no_grad()
 21 | def render(
 22 |     cfg,
 23 |     log_root,
 24 |     iteration,
 25 |     episode_names,
 26 |     eval_dirname='eval',
 27 |     eval_postfix='',
 28 |     dataset_name='',
 29 |     start_step=None,
 30 |     end_step=None,
 31 |     clean_bg=False,
 32 | ):
 33 |     clean_bg = False
 34 | 
 35 |     if dataset_name == '':
 36 |         eval_name = f'{cfg.train.name}/{eval_dirname}/{iteration:06d}'
 37 |     else:
 38 |         eval_name = f'{cfg.train.name}/{eval_dirname}/{dataset_name}/{iteration:06d}'
 39 |     render_type = 'pv'
 40 | 
 41 |     exp_root: Path = log_root / eval_name
 42 |     state_root: Path = exp_root / 'state'
 43 |     image_root: Path = exp_root / render_type
 44 |     mkdir(image_root, overwrite=cfg.overwrite, resume=cfg.resume)
 45 | 
 46 | 
 47 |     video_path_list = []
 48 |     for episode_idx, episode in enumerate(episode_names):
 49 | 
 50 |         plotter = pv.Plotter(lighting='three lights', off_screen=True, window_size=(cfg.render.width, cfg.render.height))
 51 |         plotter.set_background('white')
 52 |         plotter.camera_position = get_camera_custom(cfg.render.center, cfg.render.distance, cfg.render.azimuth, cfg.render.elevation)
 53 |         plotter.enable_shadows()
 54 | 
 55 |         # add bounding box
 56 |         scale_x = cfg.sim.num_grids[0] / (cfg.sim.num_grids[0] - 2 * cfg.render.bound)
 57 |         scale_y = cfg.sim.num_grids[1] / (cfg.sim.num_grids[1] - 2 * cfg.render.bound)
 58 |         scale_z = cfg.sim.num_grids[2] / (cfg.sim.num_grids[2] - 2 * cfg.render.bound)
 59 |         scale = np.array([scale_x, scale_y, scale_z])
 60 |         scale_mean = np.power(np.prod(scale), 1 / 3)
 61 |         bbox = pv.Box(bounds=[0, 1, 0, 1, 0, 1])
 62 |         if not clean_bg:
 63 |             plotter.add_mesh(bbox, style='wireframe', color='black')
 64 |         
 65 |         # add axis
 66 |         if not clean_bg:
 67 |             for axis, color in enumerate(['r', 'g', 'b']):
 68 |                 mesh = pv.Arrow(start=[0, 0, 0], direction=np.eye(3)[axis], scale=0.2)
 69 |                 plotter.add_mesh(mesh, color=color, show_scalar_bar=False)
 70 | 
 71 |         episode_state_root = state_root / episode
 72 |         episode_image_root = image_root / episode
 73 |         mkdir(episode_image_root, overwrite=cfg.overwrite, resume=cfg.resume)
 74 | 
 75 |         ckpt_paths = list(sorted(episode_state_root.glob('*.pt'), key=lambda x: int(x.stem)))
 76 |         for i, path in enumerate(tqdm(ckpt_paths, desc=render_type)):
 77 | 
 78 |             if i % cfg.render.skip_frame != 0:
 79 |                 continue
 80 | 
 81 |             ckpt = torch.load(path, map_location='cpu')
 82 |             p_x = ckpt['x'].cpu().detach().numpy()
 83 | 
 84 |             x = (p_x - 0.5) * scale + 0.5
 85 | 
 86 |             grippers = ckpt['grippers'].cpu().detach().numpy()
 87 |             n_eef = grippers.shape[0]
 88 | 
 89 |             radius = 0.5 * np.power((0.5 ** 3) / x.shape[0], 1 / 3) * scale_mean
 90 |             x = np.clip(x, radius, 1 - radius)
 91 | 
 92 |             polydata = pv.PolyData(x)
 93 |             plotter.add_mesh(polydata, style='points', name='object', render_points_as_spheres=True, point_size=radius * cfg.render.radius_scale, color=list(cfg.render.reflectance))
 94 |             for j in range(n_eef):
 95 |                 gripper = pv.Sphere(center=grippers[j, :3], radius=0.04)
 96 |                 plotter.add_mesh(gripper, color='blue', name=f'gripper_{j}')
 97 |             if 'gripper_x' in ckpt:
 98 |                 gripper_points = ckpt['gripper_x'].cpu().detach().numpy()
 99 |                 gripper_points = (gripper_points - 0.5) * scale + 0.5
100 |                 gripper_points = np.clip(gripper_points, radius, 1 - radius)
101 |                 gripper_polydata = pv.PolyData(gripper_points)
102 |                 plotter.add_mesh(gripper_polydata, style='points', name=f'gripper_points', render_points_as_spheres=True, point_size=radius * cfg.render.radius_scale, color='blue')
103 | 
104 |             plotter.show(auto_close=False, screenshot=str(episode_image_root / f'{i // cfg.render.skip_frame:04d}.png'))
105 | 
106 |         plotter.close()
107 |         make_video(episode_image_root, image_root / f'{episode}{eval_postfix}.mp4', '%04d.png', cfg.render.fps)
108 |         video_path_list.append(image_root / f'{episode}{eval_postfix}.mp4')
109 | 
110 |     return video_path_list
111 | 
112 | 
113 | @torch.no_grad()
114 | def do_train_pv(*args, **kwargs):
115 |     with Xvfb():
116 |         ret = render(*args, **kwargs)
117 |     return ret
118 | 


--------------------------------------------------------------------------------
/experiments/train/pv_utils.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from tqdm import tqdm
  3 | import time
  4 | import math
  5 | import psutil
  6 | import random
  7 | import fcntl
  8 | import os
  9 | import subprocess
 10 | import tempfile
 11 | from random import randint
 12 | from errno import EACCES
 13 | 
 14 | import hydra
 15 | from omegaconf import DictConfig
 16 | import numpy as np
 17 | import torch
 18 | import pyvista as pv
 19 | 
 20 | from pgnd.utils import get_root, mkdir
 21 | from pgnd.ffmpeg import make_video
 22 | 
 23 | root: Path = get_root(__file__)
 24 | 
 25 | 
 26 | class Xvfb(object):
 27 | 
 28 |     # Maximum value to use for a display. 32-bit maxint is the
 29 |     # highest Xvfb currently supports
 30 |     MAX_DISPLAY = 2147483647
 31 |     SLEEP_TIME_BEFORE_START = 0.1
 32 | 
 33 |     def __init__(
 34 |             self, width=800, height=680, colordepth=24,
 35 |             tempdir=None, display=None, **kwargs):
 36 |         self.width = width
 37 |         self.height = height
 38 |         self.colordepth = colordepth
 39 |         self._tempdir = tempdir or tempfile.gettempdir()
 40 |         self.new_display = display
 41 | 
 42 |         if not self.xvfb_exists():
 43 |             msg = (
 44 |                 'Can not find Xvfb. Please install it with:\n'
 45 |                 '   sudo apt install libgl1-mesa-glx xvfb')
 46 |             raise EnvironmentError(msg)
 47 | 
 48 |         self.extra_xvfb_args = ['-screen', '0', '{}x{}x{}'.format(
 49 |                                 self.width, self.height, self.colordepth)]
 50 | 
 51 |         for key, value in kwargs.items():
 52 |             self.extra_xvfb_args += ['-{}'.format(key), value]
 53 | 
 54 |         if 'DISPLAY' in os.environ:
 55 |             self.orig_display = os.environ['DISPLAY'].split(':')[1]
 56 |         else:
 57 |             self.orig_display = None
 58 | 
 59 |         self.proc = None
 60 | 
 61 |     def __enter__(self):
 62 |         self.start()
 63 |         return self
 64 | 
 65 |     def __exit__(self, exc_type, exc_val, exc_tb):
 66 |         self.stop()
 67 | 
 68 |     def start(self):
 69 |         if self.new_display is not None:
 70 |             if not self._get_lock_for_display(self.new_display):
 71 |                 raise ValueError(f'Could not lock display :{self.new_display}')
 72 |         else:
 73 |             self.new_display = self._get_next_unused_display()
 74 |         display_var = ':{}'.format(self.new_display)
 75 |         self.xvfb_cmd = ['Xvfb', display_var] + self.extra_xvfb_args
 76 |         with open(os.devnull, 'w') as fnull:
 77 |             self.proc = subprocess.Popen(
 78 |                 self.xvfb_cmd, stdout=fnull, stderr=fnull, close_fds=True)
 79 |         # give Xvfb time to start
 80 |         time.sleep(self.__class__.SLEEP_TIME_BEFORE_START)
 81 |         ret_code = self.proc.poll()
 82 |         if ret_code is None:
 83 |             self._set_display_var(self.new_display)
 84 |         else:
 85 |             self._cleanup_lock_file()
 86 |             raise RuntimeError(
 87 |                 f'Xvfb did not start ({ret_code}): {self.xvfb_cmd}')
 88 | 
 89 |     def stop(self):
 90 |         try:
 91 |             if self.orig_display is None:
 92 |                 del os.environ['DISPLAY']
 93 |             else:
 94 |                 self._set_display_var(self.orig_display)
 95 |             if self.proc is not None:
 96 |                 try:
 97 |                     self.proc.terminate()
 98 |                     self.proc.wait()
 99 |                 except OSError:
100 |                     pass
101 |                 self.proc = None
102 |         finally:
103 |             self._cleanup_lock_file()
104 | 
105 |     def xvfb_exists(self):
106 |         """Check that Xvfb is available on PATH and is executable."""
107 |         paths = os.environ['PATH'].split(os.pathsep)
108 |         return any(os.access(os.path.join(path, 'Xvfb'), os.X_OK)
109 |                    for path in paths)
110 | 
111 |     def _cleanup_lock_file(self):
112 |         '''
113 |         This should always get called if the process exits safely
114 |         with Xvfb.stop() (whether called explicitly, or by __exit__).
115 |         If you are ending up with /tmp/X123-lock files when Xvfb is not
116 |         running, then Xvfb is not exiting cleanly. Always either call
117 |         Xvfb.stop() in a finally block, or use Xvfb as a context manager
118 |         to ensure lock files are purged.
119 |         '''
120 |         self._lock_display_file.close()
121 |         try:
122 |             os.remove(self._lock_display_file.name)
123 |         except OSError:
124 |             pass
125 | 
126 |     def _get_lock_for_display(self, display):
127 |         '''
128 |         In order to ensure multi-process safety, this method attempts
129 |         to acquire an exclusive lock on a temporary file whose name
130 |         contains the display number for Xvfb.
131 |         '''
132 |         tempfile_path = os.path.join(self._tempdir, '.X{0}-lock'.format(display))
133 |         try:
134 |             self._lock_display_file = open(tempfile_path, 'w')
135 |         except PermissionError as e:
136 |             return False
137 |         else:
138 |             try:
139 |                 fcntl.flock(self._lock_display_file,
140 |                             fcntl.LOCK_EX | fcntl.LOCK_NB)
141 |             except BlockingIOError:
142 |                 return False
143 |             else:
144 |                 return True
145 | 
146 |     def _get_next_unused_display(self):
147 |         '''
148 |         Randomly chooses a display number and tries to acquire a lock for this number.
149 |         If the lock could be acquired, returns this number, otherwise choses a new one.
150 |         :return: free display number
151 |         '''
152 |         while True:
153 |             rand = randint(1, self.__class__.MAX_DISPLAY)
154 |             if self._get_lock_for_display(rand):
155 |                 return rand
156 |             else:
157 |                 continue
158 | 
159 |     def _set_display_var(self, display):
160 |         os.environ['DISPLAY'] = ':{}'.format(display)
161 | 
162 | 
163 | 
164 | def get_camera_custom(center=(0.5, 0.3, 0.5), distance=3.4, azimuth=-125, elevation=30):
165 |     target = np.array(center)
166 |     theta = 90 + azimuth
167 |     y = distance * math.sin(math.radians(elevation))
168 |     x = math.cos(math.radians(theta)) * distance * math.cos(math.radians(elevation))
169 |     z = math.sin(math.radians(theta)) * distance * math.cos(math.radians(elevation))
170 |     origin = target + np.array([x, y, z])
171 | 
172 |     return [
173 |         origin.tolist(),
174 |         target.tolist(),
175 |         (0.0, 1.0, 0.0),
176 |     ]
177 | 


--------------------------------------------------------------------------------
/imgs/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/imgs/teaser.png


--------------------------------------------------------------------------------
/pgnd/__init__.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | numpy.bool = bool  # to avoid deprecation error
 3 | 
 4 | from . import utils
 5 | from . import ffmpeg
 6 | 
 7 | from . import data
 8 | from . import material
 9 | from . import sim
10 | 


--------------------------------------------------------------------------------
/pgnd/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import *
2 | from .dataset_gripper import *
3 | 


--------------------------------------------------------------------------------
/pgnd/data/dataset_gripper.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from typing import Union, Optional
  3 | from omegaconf import DictConfig
  4 | 
  5 | import os
  6 | import torch
  7 | import time
  8 | import shutil
  9 | import json
 10 | import yaml
 11 | import random
 12 | import glob
 13 | import kornia
 14 | import numpy as np
 15 | import pickle as pkl
 16 | import open3d as o3d
 17 | from torch.utils.data import Dataset
 18 | from dgl.geometry import farthest_point_sampler
 19 | from sklearn.neighbors import NearestNeighbors
 20 | 
 21 | 
 22 | def fps(x, n, device, random_start=False):
 23 |     start_idx = random.randint(0, x.shape[0] - 1) if random_start else 0
 24 |     fps_idx = farthest_point_sampler(x[None], n, start_idx=start_idx)[0]
 25 |     fps_idx = fps_idx.to(x.device)
 26 |     return fps_idx
 27 | 
 28 | 
 29 | def read_splat(splat_file):
 30 |     with open(splat_file, "rb") as f:
 31 |         data = f.read()
 32 |     pts = []
 33 |     colors = []
 34 |     scales = []
 35 |     quats = []
 36 |     opacities = []
 37 |     for i in range(0, len(data), 32):
 38 |         v = np.frombuffer(data[i : i + 12], dtype=np.float32)
 39 |         s = np.frombuffer(data[i + 12 : i + 24], dtype=np.float32)
 40 |         c = np.frombuffer(data[i + 24 : i + 28], dtype=np.uint8) / 255
 41 |         q = np.frombuffer(data[i + 28 : i + 32], dtype=np.uint8)
 42 |         q = (q * 1.0 - 128) / 128
 43 |         pts.append(v)
 44 |         scales.append(s)
 45 |         colors.append(c[:3])
 46 |         quats.append(q)
 47 |         opacities.append(c[3:])
 48 |     return np.array(pts), np.array(colors), np.array(scales), np.array(quats), np.array(opacities)
 49 | 
 50 | 
 51 | class RealGripperDataset(Dataset):
 52 | 
 53 |     def __init__(self, cfg: DictConfig, device: torch.device, train=False) -> None:
 54 |         super().__init__()
 55 | 
 56 |         pts, colors, scales, quats, opacities = read_splat('experiments/log/gs/ckpts/gripper_new.splat')
 57 |         self.device = device
 58 |         self.uniform = cfg.sim.uniform or not train
 59 |         self.n_particles = 500
 60 | 
 61 |         R = np.array(
 62 |             [[1, 0, 0],
 63 |             [0, 0, -1],
 64 |             [0, 1, 0]]
 65 |         )
 66 |         eef_global_T = np.array([cfg.model.eef_t[0], cfg.model.eef_t[1], cfg.model.eef_t[2]])  # 1018_sloth: 0.185, 1018_rope_short: 0.013)
 67 |         pts = pts + eef_global_T
 68 |         pts = pts @ R.T
 69 |         scale = cfg.sim.preprocess_scale
 70 |         pts = pts * scale
 71 | 
 72 |         axis = np.array([0, 1, 0])
 73 |         angle = -27  # degree
 74 |         R = o3d.geometry.get_rotation_matrix_from_axis_angle(axis * np.pi / 180 * angle)
 75 |         translation = np.array([-0.015, 0.06, 0])
 76 | 
 77 |         pts = pts @ R.T
 78 |         pts = pts + translation
 79 | 
 80 |         R = np.array(
 81 |             [[0, 0, 1],
 82 |             [0, 1, 0],
 83 |             [-1, 0, 0]]
 84 |         )
 85 |         pts = pts @ R.T
 86 | 
 87 |         self.gripper_pts = torch.from_numpy(pts).to(torch.float32)
 88 | 
 89 |     def __len__(self) -> int:
 90 |         return 10000000
 91 | 
 92 |     def __getitem__(self, index):
 93 |         x = self.gripper_pts
 94 |         if self.uniform:
 95 |             downsample_indices = fps(x, self.n_particles, self.device, random_start=True)
 96 |         else:
 97 |             downsample_indices = torch.randperm(x.shape[0])[:self.n_particles]
 98 |         x = x[downsample_indices]
 99 |         return x, downsample_indices
100 | 


--------------------------------------------------------------------------------
/pgnd/ffmpeg.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import subprocess
 3 | 
 4 | 
 5 | def make_video(
 6 |         image_root: Path,
 7 |         video_path: Path,
 8 |         image_pattern: str = '%04d.png',
 9 |         frame_rate: int = 10):
10 | 
11 |     subprocess.run([
12 |         'ffmpeg',
13 |         '-y',
14 |         '-hide_banner',
15 |         '-loglevel', 'error',
16 |         '-framerate', str(frame_rate),
17 |         '-i', str(image_root / image_pattern),
18 |         '-c:v', 'libx264',
19 |         '-pix_fmt', 'yuv420p',
20 |         str(video_path)
21 |     ])
22 | 
23 | 
24 | def cat_videos(input_videos: list[Path], output_video: Path):
25 | 
26 |     output_video.parent.mkdir(parents=True, exist_ok=True)
27 | 
28 |     num_videos = len(input_videos)
29 | 
30 |     if num_videos <= 1:
31 |         raise ValueError('concatenating <=1 videos')
32 | 
33 |     video_args = []
34 |     for input_video in input_videos:
35 |         video_args.extend(['-i', str(input_video)])
36 | 
37 |     subprocess.run([
38 |         'ffmpeg',
39 |         '-y',
40 |         '-hide_banner',
41 |         '-loglevel', 'error',
42 |     ] + video_args + [
43 |         '-filter_complex',
44 |         '{}hstack=inputs={}[v]'.format(''.join([f'[{i}:v]' for i in range(num_videos)]), num_videos),
45 |         '-map', '[v]',
46 |         str(output_video)
47 |     ])
48 | 


--------------------------------------------------------------------------------
/pgnd/material/__init__.py:
--------------------------------------------------------------------------------
1 | from .pgnd import *
2 | 


--------------------------------------------------------------------------------
/pgnd/material/network/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kywind/pgnd/2c4fdf9399eefc41745da8f2c0abf8adb60cdda0/pgnd/material/network/__init__.py


--------------------------------------------------------------------------------
/pgnd/material/network/nerf.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class CondNeRFModel(torch.nn.Module):
 6 |     def __init__(
 7 |         self,
 8 |         xyz_dim=3,
 9 |         condition_dim=64,
10 |         out_channel=3,
11 |         num_layers=8,
12 |         hidden_size=256,
13 |         skip_connect_every=4,
14 |     ):
15 |         super(CondNeRFModel, self).__init__()
16 | 
17 |         self.dim_xyz = xyz_dim
18 |         self.dim_cond = condition_dim
19 |         self.skip_connect_every = skip_connect_every
20 | 
21 |         self.layer1 = torch.nn.Linear(self.dim_xyz + self.dim_cond, hidden_size)
22 |         
23 |         self.layers = torch.nn.ModuleList()
24 |         for i in range(num_layers - 1):
25 |             if self.skip_connect_every is not None and i % self.skip_connect_every == 0 and i > 0 and i != num_layers - 1:
26 |                 self.layers.append(torch.nn.Linear(self.dim_xyz + self.dim_cond + hidden_size, hidden_size))
27 |             else:
28 |                 self.layers.append(torch.nn.Linear(hidden_size, hidden_size))
29 | 
30 |         self.layers_out = torch.nn.ModuleList([
31 |             torch.nn.Linear(hidden_size, hidden_size // 2),
32 |             torch.nn.Linear(hidden_size // 2, out_channel),
33 |         ])
34 |         self.relu = nn.ReLU()
35 | 
36 |     def forward(self, xyz, cond):
37 |         assert xyz.shape[1] == self.dim_xyz
38 |         xyz = xyz[..., :self.dim_xyz]
39 |         xyz = torch.cat([xyz, cond], 1)
40 |         x = self.layer1(xyz)
41 |         for i in range(len(self.layers)):
42 |             if (
43 |                 self.skip_connect_every is not None
44 |                 and i % self.skip_connect_every == 0
45 |                 and i > 0
46 |                 and i != len(self.layers) - 1
47 |             ):
48 |                 x = torch.cat((x, xyz), dim=-1)
49 |             x = self.relu(self.layers[i](x))
50 |         for i in range(len(self.layers_out) - 1):
51 |             x = self.relu(self.layers_out[i](x))
52 |         x = self.layers_out[-1](x)
53 |         return x
54 | 
55 | 


--------------------------------------------------------------------------------
/pgnd/material/network/pointnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.parallel
  4 | import torch.utils.data
  5 | from torch.autograd import Variable
  6 | import numpy as np
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class STN3d(nn.Module):
 11 |     def __init__(self, channel):
 12 |         super(STN3d, self).__init__()
 13 |         self.conv1 = torch.nn.Conv1d(channel, 64, 1)
 14 |         self.conv2 = torch.nn.Conv1d(64, 128, 1)
 15 |         self.conv3 = torch.nn.Conv1d(128, 1024, 1)
 16 |         self.fc1 = nn.Linear(1024, 512)
 17 |         self.fc2 = nn.Linear(512, 256)
 18 |         self.fc3 = nn.Linear(256, 9)
 19 |         self.relu = nn.ReLU()
 20 | 
 21 |         self.bn1 = nn.BatchNorm1d(64)
 22 |         self.bn2 = nn.BatchNorm1d(128)
 23 |         self.bn3 = nn.BatchNorm1d(1024)
 24 |         self.bn4 = nn.BatchNorm1d(512)
 25 |         self.bn5 = nn.BatchNorm1d(256)
 26 | 
 27 |     def forward(self, x, x_mask):
 28 |         batchsize = x.size()[0]
 29 |         x_mask = x_mask.unsqueeze(1)  # B, N -> B, 1, N
 30 |         x = F.relu(self.bn1(self.conv1(x)))
 31 |         x = F.relu(self.bn2(self.conv2(x)))
 32 |         x = F.relu(self.bn3(self.conv3(x)))
 33 |         x = x * x_mask - (~x_mask) * 1e10
 34 |         x = torch.max(x, 2, keepdim=True)[0]
 35 |         assert x.min() > -1e10
 36 |         x = x.view(-1, 1024)
 37 | 
 38 |         x = F.relu(self.bn4(self.fc1(x)))
 39 |         x = F.relu(self.bn5(self.fc2(x)))
 40 |         x = self.fc3(x)
 41 | 
 42 |         iden = Variable(torch.from_numpy(np.array([1, 0, 0, 0, 1, 0, 0, 0, 1]).astype(np.float32))).view(1, 9).repeat(
 43 |             batchsize, 1)
 44 |         if x.is_cuda:
 45 |             iden = iden.cuda()
 46 |         x = x + iden
 47 |         x = x.view(-1, 3, 3)
 48 |         return x
 49 | 
 50 | 
 51 | class STNkd(nn.Module):
 52 |     def __init__(self, k=64):
 53 |         super(STNkd, self).__init__()
 54 |         self.conv1 = torch.nn.Conv1d(k, 64, 1)
 55 |         self.conv2 = torch.nn.Conv1d(64, 128, 1)
 56 |         self.conv3 = torch.nn.Conv1d(128, 1024, 1)
 57 |         self.fc1 = nn.Linear(1024, 512)
 58 |         self.fc2 = nn.Linear(512, 256)
 59 |         self.fc3 = nn.Linear(256, k * k)
 60 |         self.relu = nn.ReLU()
 61 | 
 62 |         self.bn1 = nn.BatchNorm1d(64)
 63 |         self.bn2 = nn.BatchNorm1d(128)
 64 |         self.bn3 = nn.BatchNorm1d(1024)
 65 |         self.bn4 = nn.BatchNorm1d(512)
 66 |         self.bn5 = nn.BatchNorm1d(256)
 67 | 
 68 |         self.k = k
 69 | 
 70 |     def forward(self, x, x_mask):
 71 |         batchsize = x.size()[0]
 72 |         x_mask = x_mask.unsqueeze(1)
 73 |         x = F.relu(self.bn1(self.conv1(x)))
 74 |         x = F.relu(self.bn2(self.conv2(x)))
 75 |         x = F.relu(self.bn3(self.conv3(x)))
 76 |         x = x * x_mask - (~x_mask) * 1e10
 77 |         x = torch.max(x, 2, keepdim=True)[0]
 78 |         assert x.min() > -1e10
 79 |         x = x.view(-1, 1024)
 80 | 
 81 |         x = F.relu(self.bn4(self.fc1(x)))
 82 |         x = F.relu(self.bn5(self.fc2(x)))
 83 |         x = self.fc3(x)
 84 | 
 85 |         iden = Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32))).view(1, self.k * self.k).repeat(
 86 |             batchsize, 1)
 87 |         if x.is_cuda:
 88 |             iden = iden.cuda()
 89 |         x = x + iden
 90 |         x = x.view(-1, self.k, self.k)
 91 |         return x
 92 | 
 93 | 
 94 | class PointNetEncoder(nn.Module):
 95 |     def __init__(self, global_feat=True, feature_transform=False, feature_dim=1024, channel=3):
 96 |         super(PointNetEncoder, self).__init__()
 97 |         self.stn = STN3d(channel)
 98 |         self.conv1 = torch.nn.Conv1d(channel, 64, 1)
 99 |         self.conv2 = torch.nn.Conv1d(64, 128, 1)
100 |         self.conv3 = torch.nn.Conv1d(128, feature_dim, 1)
101 |         self.bn1 = nn.BatchNorm1d(64)
102 |         self.bn2 = nn.BatchNorm1d(128)
103 |         self.bn3 = nn.BatchNorm1d(feature_dim)
104 |         self.feature_dim = feature_dim
105 |         self.global_feat = global_feat
106 |         self.feature_transform = feature_transform
107 |         if self.feature_transform:
108 |             self.fstn = STNkd(k=64)
109 | 
110 |     def forward(self, x, x_mask=None):
111 |         B, D, N = x.size()
112 |         if x_mask is None:
113 |             x_mask = torch.ones(B, N, dtype=torch.bool, device=x.device)
114 |         if x_mask.dtype != torch.bool:
115 |             x_mask = x_mask > 0
116 |         trans = self.stn(x, x_mask)
117 |         x = x.transpose(2, 1)
118 |         if D > 3:
119 |             feature = x[:, :, 3:]
120 |             x = x[:, :, :3]
121 |         x = torch.bmm(x, trans)
122 |         if D > 3:
123 |             x = torch.cat([x, feature], dim=2)
124 |         x = x.transpose(2, 1)
125 |         x = F.relu(self.bn1(self.conv1(x)))
126 | 
127 |         if self.feature_transform:
128 |             trans_feat = self.fstn(x, x_mask)
129 |             x = x.transpose(2, 1)
130 |             x = torch.bmm(x, trans_feat)
131 |             x = x.transpose(2, 1)
132 |         else:
133 |             trans_feat = None
134 | 
135 |         pointfeat = x
136 |         x = F.relu(self.bn2(self.conv2(x)))
137 |         x = self.bn3(self.conv3(x))
138 | 
139 |         if not self.global_feat:
140 |             x = x.view(-1, self.feature_dim, N)
141 |             return x, trans, trans_feat
142 | 
143 |         x = torch.max(x, 2, keepdim=True)[0]
144 |         x = x.view(-1, self.feature_dim)
145 |         if self.global_feat:
146 |             return x, trans, trans_feat
147 |         else:
148 |             x = x.view(-1, self.feature_dim, 1).repeat(1, 1, N)
149 |             return torch.cat([x, pointfeat], 1), trans, trans_feat
150 | 
151 | 


--------------------------------------------------------------------------------
/pgnd/material/network/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def get_grid_locations(x, num_grids_list, dx):
 5 |     bsz = x.shape[0]
 6 |     x_grid = torch.stack(torch.meshgrid(
 7 |         torch.linspace(0, (num_grids_list[0] - 1) * dx, num_grids_list[0], device=x.device),
 8 |         torch.linspace(0, (num_grids_list[1] - 1) * dx, num_grids_list[1], device=x.device),
 9 |         torch.linspace(0, (num_grids_list[2] - 1) * dx, num_grids_list[2], device=x.device),
10 |     ), dim=-1).reshape(-1, 3)
11 |     grid_idxs = torch.stack(torch.meshgrid(
12 |         torch.linspace(0, (num_grids_list[0] - 1), num_grids_list[0], device=x.device),
13 |         torch.linspace(0, (num_grids_list[1] - 1), num_grids_list[1], device=x.device),
14 |         torch.linspace(0, (num_grids_list[2] - 1), num_grids_list[2], device=x.device),
15 |     ), dim=-1).reshape(-1, 3)
16 | 
17 |     grid_hits = torch.zeros(bsz, num_grids_list[0], num_grids_list[1], num_grids_list[2], device=x.device)
18 |     for i in range(3):
19 |         for j in range(3):
20 |             for k in range(3):
21 |                 grid_hits[:, \
22 |                     ((x[:, :, 0] / dx - 0.5).int() + i).clamp(0, num_grids_list[0] - 1), \
23 |                     ((x[:, :, 1] / dx - 0.5).int() + j).clamp(0, num_grids_list[1] - 1), \
24 |                     ((x[:, :, 2] / dx - 0.5).int() + k).clamp(0, num_grids_list[2] - 1)] = 1
25 |     grid_hits = grid_hits.sum(0) > 0
26 |     
27 |     x_grid = x_grid[grid_hits.reshape(-1)].reshape(1, -1, 3).repeat(bsz, 1, 1)
28 |     grid_idxs = grid_idxs[grid_hits.reshape(-1)]
29 |     return x_grid, grid_idxs
30 | 
31 | 
32 | def fill_grid_locations(feat, grid_idxs, num_grids_list):  
33 |     # feat: (bsz, num_active_grids, feature_dim)
34 |     # grid_idxs: (num_active_grids, 3)
35 |     bsz = feat.shape[0]
36 |     feat_filled = torch.zeros(bsz, num_grids_list[0], num_grids_list[1], num_grids_list[2], 3, device=feat.device)
37 |     grid_idxs_1d = grid_idxs[:, 0] * num_grids_list[1] * num_grids_list[2] + grid_idxs[:, 1] * num_grids_list[2] + grid_idxs[:, 2]
38 |     feat_filled = feat_filled.reshape(bsz, -1, 3)
39 |     feat_filled[:, grid_idxs_1d.long()] = feat.clone()
40 |     feat_filled = feat_filled.reshape(bsz, num_grids_list[0], num_grids_list[1], num_grids_list[2], 3)
41 |     return feat_filled
42 | 


--------------------------------------------------------------------------------
/pgnd/material/pgnd.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | from omegaconf import DictConfig
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | from torch import Tensor
  8 | 
  9 | from .network.pointnet import PointNetEncoder
 10 | from .network.nerf import CondNeRFModel
 11 | from .network.utils import get_grid_locations, fill_grid_locations
 12 | 
 13 | 
 14 | class PGNDModel(nn.Module):
 15 | 
 16 |     def __init__(self, cfg: DictConfig) -> None:
 17 |         super().__init__()
 18 | 
 19 |         self.feature_dim = 64
 20 |         self.radius = cfg.model.material.radius
 21 |         self.n_history = cfg.sim.n_history
 22 |         self.num_grids_list = cfg.sim.num_grids[:3]
 23 |         self.dx = cfg.sim.num_grids[3]
 24 |         self.inv_dx = 1 / self.dx
 25 |         self.requires_grad = cfg.model.material.requires_grad
 26 |         self.pe_num_func = int(np.log2(self.inv_dx)) + cfg.model.material.pe_num_func_res
 27 |         self.pe_dim = 3 + self.pe_num_func * 6
 28 |         self.output_scale = cfg.model.material.output_scale
 29 |         self.input_scale = cfg.model.material.input_scale
 30 |         self.absolute_y = cfg.model.material.absolute_y
 31 | 
 32 |         self.encoder = PointNetEncoder(
 33 |             global_feat=(cfg.model.material.radius <= 0), 
 34 |             feature_transform=False, 
 35 |             feature_dim=self.feature_dim, 
 36 |             channel=6 * (1 + self.n_history),
 37 |         )
 38 |         self.decoder = CondNeRFModel(
 39 |             xyz_dim=self.pe_dim,
 40 |             condition_dim=self.feature_dim,
 41 |             out_channel=3,
 42 |             num_layers=2,
 43 |             hidden_size=64,
 44 |             skip_connect_every=4,
 45 |         )
 46 | 
 47 |     def positional_encoding(self, tensor):
 48 |         num_encoding_functions = self.pe_num_func
 49 |         if num_encoding_functions == 0:
 50 |             assert include_input
 51 |             return tensor
 52 | 
 53 |         encoding = [tensor]  # include the input itself
 54 |         frequency_bands = 2.0 ** torch.linspace(
 55 |             0.0,
 56 |             num_encoding_functions - 1,
 57 |             num_encoding_functions,
 58 |             dtype=tensor.dtype,
 59 |             device=tensor.device,
 60 |         )
 61 | 
 62 |         for freq in frequency_bands:
 63 |             for func in [torch.sin, torch.cos]:
 64 |                 encoding.append(func(tensor * freq))
 65 | 
 66 |         # Special case, for no positional encoding
 67 |         if len(encoding) == 1:
 68 |             return encoding[0]
 69 |         else:
 70 |             return torch.cat(encoding, dim=-1)
 71 | 
 72 |     def forward(self, x: Tensor, v: Tensor, x_his: Tensor, v_his: Tensor, enabled: Tensor) -> Tensor:
 73 |         # x: (bsz, num_particles, 3)
 74 |         # v: (bsz, num_particles, 3)
 75 |         bsz = x.shape[0]
 76 |         num_particles = x.shape[1]
 77 |         v = v * self.input_scale
 78 |         v_his = v_his * self.input_scale
 79 | 
 80 |         x_his = x_his.reshape(bsz, num_particles, self.n_history, 3)
 81 |         v_his = v_his.reshape(bsz, num_particles, self.n_history, 3)
 82 |         x_his = x_his.detach()
 83 |         v_his = v_his.detach()
 84 |         
 85 |         x_grid, grid_idxs = get_grid_locations(x, self.num_grids_list, self.dx)
 86 |         x_grid = x_grid.detach()
 87 |         grid_idxs = grid_idxs.detach()
 88 | 
 89 |         # centering
 90 |         x_center = x.mean(1, keepdim=True)
 91 |         if self.absolute_y:
 92 |             x_center[:, :, 1] = 0  # only centering x and z
 93 |         x = x - x_center
 94 |         x_his = x_his - x_center[:, :, None]
 95 |         if self.training:
 96 |             # random azimuth
 97 |             theta = torch.rand(bsz, 1, device=x.device) * 2 * np.pi
 98 |             rot = torch.stack([
 99 |                 torch.cos(theta), torch.zeros_like(theta), torch.sin(theta),
100 |                 torch.zeros_like(theta), torch.ones_like(theta), torch.zeros_like(theta),
101 |                 -torch.sin(theta), torch.zeros_like(theta), torch.cos(theta),
102 |             ], dim=-1).reshape(bsz, 3, 3)
103 |             inv_rot = rot.transpose(1, 2)
104 |         else:
105 |             rot = torch.eye(3, dtype=x.dtype, device=x.device).unsqueeze(0).repeat(bsz, 1, 1)
106 |             inv_rot = rot.transpose(1, 2)
107 |         x = torch.einsum('bij,bjk->bik', x, rot)
108 |         x_his = torch.einsum('bcij,bjk->bcik', x_his, rot)  # (bsz, num_particles, n_history, 3)
109 |         v = torch.einsum('bij,bjk->bik', v, rot)
110 |         v_his = torch.einsum('bcij,bjk->bcik', v_his, rot)  # (bsz, num_particles, n_history, 3)
111 | 
112 |         x_grid = x_grid - x_center
113 |         x_grid = x_grid @ rot
114 |         x_his = x_his.reshape(bsz, num_particles, self.n_history * 3)
115 |         v_his = v_his.reshape(bsz, num_particles, self.n_history * 3)
116 | 
117 |         feat = torch.cat([x, v, x_his, v_his], dim=-1)  # (bsz, num_particles, 6 * (1 + n_history))
118 |         feat = feat.permute(0, 2, 1)  # (bsz, 6 * (1 + n_history), num_particles)
119 |         feat, trans, trans_feat = self.encoder(feat, enabled)  # feat: (bsz, feature_dim, num_particles)
120 | 
121 |         if self.radius > 0:
122 |             # aggregate neighborhood
123 |             # x.shape: (bsz, num_particles, 3)
124 |             # x_grid.shape: (bsz, num_grids_total, 3)
125 |             dist_pt_grid = torch.cdist(x_grid, x, p=2)
126 |             mask = dist_pt_grid < self.radius  # (bsz, num_grids_total, num_particles)
127 |             mask_normed = mask / (mask.sum(dim=-1, keepdim=True) + 1e-5)  # for each grid, normalize the weights
128 |             mask_normed = mask_normed.detach()
129 |             feat = mask_normed @ feat.permute(0, 2, 1)  # (bsz, num_grids_total, feature_dim)
130 |         else:
131 |             # global max pooling
132 |             feat = feat[:, None, :].repeat(1, x_grid.shape[1], 1)  # (bsz, num_grids_total, feature_dim)
133 | 
134 |         # positional encoding and decoder
135 |         feat = feat.reshape(-1, self.feature_dim)
136 |         x_grid = x_grid.reshape(-1, 3)
137 |         x_grid = self.positional_encoding(x_grid)
138 |         feat = self.decoder(x_grid, feat)
139 |         feat = feat * self.output_scale
140 |         feat = feat.reshape(bsz, -1, feat.shape[-1])
141 |         feat = torch.bmm(feat, inv_rot)
142 | 
143 |         # expand to full grid
144 |         feat = fill_grid_locations(feat, grid_idxs, self.num_grids_list)
145 |         return feat
146 | 


--------------------------------------------------------------------------------
/pgnd/sim/__init__.py:
--------------------------------------------------------------------------------
1 | from .friction import *
2 | from .model import *
3 | from .sim import *
4 | from .utils import *
5 | 


--------------------------------------------------------------------------------
/pgnd/sim/friction.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Friction(nn.Module):
 6 |     def __init__(self, mu_init=0.5):
 7 |         super(Friction, self).__init__()
 8 |         self.mu = torch.nn.Parameter(torch.tensor(mu_init))
 9 |     
10 |     def clip(self):
11 |         self.mu.data = torch.clamp(self.mu.data, 0, 1)
12 | 


--------------------------------------------------------------------------------
/pgnd/sim/model.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, Union, Sequence, Any
  2 | from omegaconf import DictConfig
  3 | import numpy as np
  4 | import torch
  5 | from torch import Tensor
  6 | import warp as wp
  7 | 
  8 | from .utils import ConstantBatch, StaticsBatch, CollidersBatch, GridDataBatch, ParticleDataBatch
  9 | 
 10 | 
 11 | class SimModelBatch:
 12 | 
 13 |     def __init__(self, constant: ConstantBatch, device: wp.context.Devicelike = None, batch_size: int = 1, requires_grad: bool = False) -> None:
 14 |         self.constant = constant
 15 |         self.device = wp.get_device(device)
 16 |         self.requires_grad = requires_grad
 17 |         self.batch_size = batch_size
 18 | 
 19 |     def update_friction(self, friction) -> None:
 20 |         self.constant.update_friction(friction, self.requires_grad)
 21 | 
 22 |     @staticmethod
 23 |     @wp.kernel
 24 |     def grid_op_batch(
 25 |             constant: ConstantBatch,
 26 |             colliders: CollidersBatch,
 27 |             grid_curr: GridDataBatch,
 28 |             grid_next: GridDataBatch,
 29 |         ) -> None:
 30 | 
 31 |         b, px, py, pz = wp.tid()
 32 | 
 33 |         v = grid_curr.v[b, px, py, pz]
 34 | 
 35 |         friction = constant.friction[b, 0]
 36 | 
 37 |         if px < constant.bound and v[0] < 0.0:
 38 |             impulse = wp.vec3(v[0], 0.0, 0.0)
 39 |             v = wp.vec3(0.0, v[1], v[2])
 40 |             friction_impulse = wp.float32(-1.) * friction * wp.length(impulse) * wp.normalize(v)
 41 |             friction_impulse *= wp.min(1.0, wp.length(v) / (wp.length(friction_impulse) + 1e-10))
 42 |             v = v + friction_impulse
 43 |         if py < constant.bound and v[1] < 0.0:
 44 |             impulse = wp.vec3(0.0, v[1], 0.0)
 45 |             v = wp.vec3(v[0], 0.0, v[2])
 46 |             friction_impulse = wp.float32(-1.) * friction * wp.length(impulse) * wp.normalize(v)
 47 |             friction_impulse *= wp.min(1.0, wp.length(v) / (wp.length(friction_impulse) + 1e-10))
 48 |             v = v + friction_impulse
 49 |         if pz < constant.bound and v[2] < 0.0:
 50 |             impulse = wp.vec3(0.0, 0.0, v[2])
 51 |             v = wp.vec3(v[0], v[1], 0.0)
 52 |             friction_impulse = wp.float32(-1.) * friction * wp.length(impulse) * wp.normalize(v)
 53 |             friction_impulse *= wp.min(1.0, wp.length(v) / (wp.length(friction_impulse) + 1e-10))
 54 |             v = v + friction_impulse
 55 |         if px > int(constant.num_grids_list[0]) - constant.bound and v[0] > 0.0:
 56 |             impulse = wp.vec3(v[0], 0.0, 0.0)
 57 |             v = wp.vec3(0.0, v[1], v[2])
 58 |             friction_impulse = wp.float32(-1.) * friction * wp.length(impulse) * wp.normalize(v)
 59 |             friction_impulse *= wp.min(1.0, wp.length(v) / (wp.length(friction_impulse) + 1e-10))
 60 |             v = v + friction_impulse
 61 |         if py > int(constant.num_grids_list[1]) - constant.bound and v[1] > 0.0:
 62 |             impulse = wp.vec3(0.0, v[1], 0.0)
 63 |             v = wp.vec3(v[0], 0.0, v[2])
 64 |             friction_impulse = wp.float32(-1.) * friction * wp.length(impulse) * wp.normalize(v)
 65 |             friction_impulse *= wp.min(1.0, wp.length(v) / (wp.length(friction_impulse) + 1e-10))
 66 |             v = v + friction_impulse
 67 |         if pz > int(constant.num_grids_list[2]) - constant.bound and v[2] > 0.0:
 68 |             impulse = wp.vec3(0.0, 0.0, v[2])
 69 |             v = wp.vec3(v[0], v[1], 0.0)
 70 |             friction_impulse = wp.float32(-1.) * friction * wp.length(impulse) * wp.normalize(v)
 71 |             friction_impulse *= wp.min(1.0, wp.length(v) / (wp.length(friction_impulse) + 1e-10))
 72 |             v = v + friction_impulse
 73 | 
 74 |         for gripper_id in range(colliders.gripper_centers.shape[1]):
 75 | 
 76 |             dx = wp.float32(px) * constant.dx - colliders.gripper_centers[b, gripper_id][0]
 77 |             dy = wp.float32(py) * constant.dx - colliders.gripper_centers[b, gripper_id][1]
 78 |             dz = wp.float32(pz) * constant.dx - colliders.gripper_centers[b, gripper_id][2]
 79 | 
 80 |             grid_from_gripper = wp.vec3(dx, dy, dz)
 81 |             grid_from_gripper_norm = wp.length(grid_from_gripper)
 82 | 
 83 |             gripper_add_radii = 0.0
 84 |             if grid_from_gripper_norm < colliders.gripper_radii[b, gripper_id] + gripper_add_radii and colliders.gripper_open[b, gripper_id] < 0.5:
 85 |                 if wp.length(colliders.gripper_quat[b, gripper_id]) > 0:  # filter out
 86 |                     gripper_vel = colliders.gripper_vels[b, gripper_id]
 87 |                     gripper_quat_vel = colliders.gripper_quat_vel[b, gripper_id]
 88 | 
 89 |                     gripper_angular_vel = wp.length(gripper_quat_vel)
 90 |                     gripper_quat_axis = gripper_quat_vel / (gripper_angular_vel + 1e-10)
 91 | 
 92 |                     grid_from_gripper_axis = grid_from_gripper - \
 93 |                         wp.dot(gripper_quat_axis, grid_from_gripper) * gripper_quat_axis
 94 |                     v = wp.cross(gripper_quat_vel, grid_from_gripper_axis) + gripper_vel
 95 | 
 96 |                 else:
 97 |                     v = colliders.gripper_vels[b, gripper_id]
 98 | 
 99 |         grid_next.v[b, px, py, pz] = v
100 | 
101 |     @staticmethod
102 |     @wp.kernel
103 |     def g2p_batch(
104 |             constant: ConstantBatch,
105 |             statics: StaticsBatch,
106 |             particle_curr: ParticleDataBatch,
107 |             grid_next: GridDataBatch,
108 |             particle_next: ParticleDataBatch,
109 |         ) -> None:
110 | 
111 |         b, p = wp.tid()
112 | 
113 |         if statics.enabled[b, p] == 0:
114 |             return
115 | 
116 |         p_x = particle_curr.x[b, p] * constant.inv_dx
117 |         base_x = int(p_x[0] - 0.5)
118 |         base_y = int(p_x[1] - 0.5)
119 |         base_z = int(p_x[2] - 0.5)
120 |         f_x = p_x - wp.vec3(
121 |             float(base_x),
122 |             float(base_y),
123 |             float(base_z))
124 | 
125 |         # quadratic kernels  [http://mpm.graphics   Eqn. 123, with x=fx,fx-1,fx-2]
126 |         wa = wp.vec3(1.5) - f_x
127 |         wb = f_x - wp.vec3(1.0)
128 |         wc = f_x - wp.vec3(0.5)
129 | 
130 |         w = wp.mat33(
131 |             wp.cw_mul(wa, wa) * 0.5,
132 |             wp.vec3(0.75) - wp.cw_mul(wb, wb),
133 |             wp.cw_mul(wc, wc) * 0.5,
134 |         )
135 | 
136 |         new_v = wp.vec3(0.0)
137 | 
138 |         for i in range(3):
139 |             for j in range(3):
140 |                 for k in range(3):
141 |                     weight = w[0, i] * w[1, j] * w[2, k]
142 | 
143 |                     v = grid_next.v[b, base_x + i, base_y + j, base_z + k]
144 |                     new_v = new_v + weight * v
145 | 
146 |         particle_next.v[b, p] = new_v
147 | 
148 |         bound = statics.clip_bound[b, p] * constant.dx + constant.eps
149 |         new_x = particle_curr.x[b, p] + constant.dt * new_v
150 |         new_x = wp.vec3(
151 |             wp.clamp(new_x[0], 0.0 + bound, float(constant.num_grids_list[0]) * constant.dx - bound),
152 |             wp.clamp(new_x[1], 0.0 + bound, float(constant.num_grids_list[1]) * constant.dx - bound),
153 |             wp.clamp(new_x[2], 0.0 + bound, float(constant.num_grids_list[2]) * constant.dx - bound),
154 |         )
155 |         particle_next.x[b, p] = new_x
156 | 
157 | 
158 | def build_model(
159 |         cfg: DictConfig,
160 |         batch_size: int,
161 |         device: wp.context.Devicelike = None,
162 |         requires_grad: bool = False
163 |     ) -> SimModelBatch:
164 | 
165 |     dt: float = eval(cfg.sim.dt) if isinstance(cfg.sim.dt, str) else cfg.sim.dt
166 |     bound: int = cfg.sim.bound
167 |     eps: float = cfg.sim.eps
168 |     assert len(cfg.sim.num_grids) == 4
169 |     num_grids_list: list = cfg.sim.num_grids[:3]
170 |     dx: float = cfg.sim.num_grids[3]
171 |     inv_dx: float = 1 / dx
172 | 
173 |     constant = ConstantBatch()
174 |     constant.init()
175 |     constant.dt = dt
176 |     constant.bound = bound
177 |     constant.dx = dx
178 |     constant.inv_dx = inv_dx
179 |     constant.eps = eps
180 |     friction = np.array([cfg.model.friction.value], dtype=np.float32)[None].repeat(batch_size, axis=0)
181 |     constant.friction = wp.from_numpy(friction, dtype=float)  # [bsz, 1]
182 |     constant.num_grids_list = wp.vec3(*num_grids_list)
183 | 
184 |     model = SimModelBatch(constant, device, batch_size, requires_grad)
185 |     return model
186 | 


--------------------------------------------------------------------------------
/pgnd/sim/sim.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | from omegaconf import DictConfig
  3 | import torch
  4 | import torch.autograd as autograd
  5 | import torch.nn as nn
  6 | from torch import Tensor
  7 | import warp as wp
  8 | 
  9 | from .utils import StaticsBatch, CollidersBatch, ParticleDataBatch, GridDataBatch
 10 | from .model import SimModelBatch, build_model
 11 | from pgnd.utils import Tape, CondTape
 12 | 
 13 | 
 14 | class SimFunctionWithFrictionBatch(autograd.Function):
 15 | 
 16 |     @staticmethod
 17 |     def forward(
 18 |             ctx: autograd.function.FunctionCtx,
 19 |             model: SimModelBatch,
 20 |             statics: StaticsBatch,
 21 |             colliders: CollidersBatch,
 22 |             particles_curr: ParticleDataBatch,
 23 |             particles_next: ParticleDataBatch,
 24 |             grid_curr: GridDataBatch,
 25 |             grid_next: GridDataBatch,
 26 |             friction: Tensor,
 27 |             x: Tensor,
 28 |             v: Tensor,
 29 |             pred: Tensor) -> tuple[Tensor, Tensor]:
 30 | 
 31 |         tape: Tape = Tape()
 32 |         model.update_friction(friction)
 33 |         particles_curr.from_torch(x=x, v=v)
 34 |         grid_curr.clear()
 35 |         grid_curr.zero_grad()
 36 |         grid_curr.from_torch(v=pred)
 37 |         grid_next.clear()
 38 |         grid_next.zero_grad()
 39 | 
 40 |         device = model.device
 41 |         constant = model.constant
 42 |         batch_size = model.batch_size
 43 | 
 44 |         num_grids_list = [int(constant.num_grids_list[0]), int(constant.num_grids_list[1]), int(constant.num_grids_list[2])]
 45 |         num_particles = particles_curr.x.shape[1]
 46 | 
 47 |         with CondTape(tape, model.requires_grad):
 48 |             wp.launch(model.grid_op_batch, 
 49 |                 dim=[batch_size] + [num_grids_list[0], num_grids_list[1], num_grids_list[2]],
 50 |                 inputs=[constant, colliders, grid_curr], 
 51 |                 outputs=[grid_next],
 52 |                 device=device
 53 |             )
 54 |             wp.launch(model.g2p_batch, 
 55 |                 dim=[batch_size, num_particles], 
 56 |                 inputs=[constant, statics, particles_curr, grid_next], 
 57 |                 outputs=[particles_next],
 58 |                 device=device
 59 |             )
 60 | 
 61 |         x_next, v_next = particles_next.to_torch()
 62 | 
 63 |         ctx.tape = tape
 64 |         ctx.particles_curr = particles_curr
 65 |         ctx.particles_next = particles_next
 66 |         ctx.grid_curr = grid_curr
 67 | 
 68 |         return x_next, v_next
 69 | 
 70 |     @staticmethod
 71 |     def backward(
 72 |             ctx: autograd.function.FunctionCtx,
 73 |             grad_x_next: Tensor,
 74 |             grad_v_next: Tensor,
 75 |             ) -> tuple[None, None, None, None, None, None, None, None, Tensor, Tensor, Tensor]:
 76 | 
 77 |         tape: Tape = ctx.tape
 78 |         particles_curr: ParticleDataBatch = ctx.particles_curr
 79 |         particles_next: ParticleDataBatch = ctx.particles_next
 80 |         grid_curr: GridDataBatch = ctx.grid_curr
 81 | 
 82 |         if torch.isnan(grad_x_next).any() or torch.isnan(grad_v_next).any():
 83 |             import ipdb; ipdb.set_trace()
 84 | 
 85 |         tape.backward(
 86 |             grads={
 87 |                 particles_next.x: wp.from_torch(grad_x_next.contiguous(), dtype=wp.vec3),
 88 |                 particles_next.v: wp.from_torch(grad_v_next.contiguous(), dtype=wp.vec3),
 89 |             }
 90 |         )
 91 | 
 92 |         grad_x, grad_v = particles_curr.to_torch_grad()
 93 |         grad_pred = grid_curr.to_torch_grad()
 94 | 
 95 |         if grad_x is not None:
 96 |             torch.nan_to_num_(grad_x, 0.0, 0.0, 0.0)
 97 |         if grad_v is not None:
 98 |             torch.nan_to_num_(grad_v, 0.0, 0.0, 0.0)
 99 |         if grad_pred is not None:
100 |             torch.nan_to_num_(grad_pred, 0.0, 0.0, 0.0)
101 | 
102 |         return None, None, None, None, None, None, None, None, grad_x, grad_v, grad_pred
103 | 
104 | 
105 | class CacheDiffSimWithFrictionBatch(nn.Module):
106 | 
107 |     def __init__(self, cfg: DictConfig, num_steps: int, batch_size: int, device: wp.context.Devicelike = None, requires_grad: bool = False) -> None:
108 |         super().__init__()
109 |         self.model = build_model(cfg=cfg, batch_size=batch_size, device=device, requires_grad=requires_grad)
110 |         self.curr_particles = [None for _ in range(num_steps)]
111 |         self.next_particles = [None for _ in range(num_steps)]
112 |         self.curr_grids = [None for _ in range(num_steps)]
113 |         self.next_grids = [None for _ in range(num_steps)]
114 | 
115 |     def forward(self, statics: StaticsBatch, colliders: CollidersBatch, step: int, x: Tensor, v: Tensor, friction: Tensor, pred: Tensor) -> tuple[Tensor, Tensor]:
116 |         shape = (x.size(0), x.size(1))
117 |         if self.curr_particles[step] is None:
118 |             self.curr_particles[step] = ParticleDataBatch()
119 |             self.curr_particles[step].init(shape, self.model.device, self.model.requires_grad)
120 |         if self.next_particles[step] is None:
121 |             self.next_particles[step] = ParticleDataBatch()
122 |             self.next_particles[step].init(shape, self.model.device, self.model.requires_grad)
123 |         particles_curr = self.curr_particles[step]
124 |         particles_next = self.next_particles[step]
125 | 
126 |         shape = (pred.size(0), pred.size(1), pred.size(2), pred.size(3))
127 |         if self.curr_grids[step] is None:
128 |             self.curr_grids[step] = GridDataBatch()
129 |             self.curr_grids[step].init(shape, self.model.device, self.model.requires_grad)
130 |         grid_curr = self.curr_grids[step]
131 |         if self.next_grids[step] is None:
132 |             self.next_grids[step] = GridDataBatch()
133 |             self.next_grids[step].init(shape, self.model.device, self.model.requires_grad)
134 |         grid_next = self.next_grids[step]
135 | 
136 |         return SimFunctionWithFrictionBatch.apply(self.model, statics, colliders, particles_curr, particles_next, grid_curr, grid_next, friction, x, v, pred)
137 | 


--------------------------------------------------------------------------------
/pgnd/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Union, Dict, Optional
 2 | from pathlib import Path
 3 | from omegaconf import DictConfig
 4 | import sys
 5 | import shutil
 6 | import numpy as np
 7 | import wandb
 8 | import warp as wp
 9 | 
10 | 
11 | Tape = wp.Tape
12 | 
13 | class CondTape(object):
14 |     def __init__(self, tape: Optional[Tape], cond: bool = True) -> None:
15 |         self.tape = tape
16 |         self.cond = cond
17 | 
18 |     def __enter__(self):
19 |         if self.tape is not None and self.cond:
20 |             self.tape.__enter__()
21 | 
22 |     def __exit__(self, exc_type, exc_value, traceback):
23 |         if self.tape is not None and self.cond:
24 |             self.tape.__exit__(exc_type, exc_value, traceback)
25 | 
26 | 
27 | def cfg2dict(cfg: DictConfig) -> Dict:
28 |     """
29 |     Recursively convert OmegaConf to vanilla dict
30 |     :param cfg:
31 |     :return:
32 |     """
33 |     cfg_dict = {}
34 |     for k, v in cfg.items():
35 |         if type(v) == DictConfig:
36 |             cfg_dict[k] = cfg2dict(v)
37 |         else:
38 |             cfg_dict[k] = v
39 |     return cfg_dict
40 | 
41 | 
42 | class Logger:
43 | 
44 |     def __init__(self, cfg, project='pgnd-train', entity='kaiz'):
45 |         wandb.init(project=project, entity=entity, name=cfg.train.name)
46 |         wandb.config = cfg2dict(cfg)
47 |     
48 |     def add_scalar(self, tag, scalar, step=None):
49 |         wandb.log({tag: scalar}, step=step)
50 | 
51 |     def add_image(self, tag, img, step=None, scale=True):
52 |         if scale:
53 |             img = (img - img.min()) / (img.max() - img.min())
54 |         wandb.log({tag: wandb.Image(img)}, step=step)
55 | 
56 |     def add_video(self, tag, video, step=None):
57 |         wandb.log({tag: wandb.Video(video)}, step=step)
58 | 
59 | 
60 | def mkdir(path: Path, resume=False, overwrite=False) -> None:
61 |     while True:
62 |         if overwrite:
63 |             if path.is_dir():
64 |                 print('overwriting directory ({})'.format(path))
65 |             shutil.rmtree(path, ignore_errors=True)
66 |             path.mkdir(parents=True, exist_ok=True)
67 |             return
68 |         elif resume:
69 |             print('resuming directory ({})'.format(path))
70 |             path.mkdir(parents=True, exist_ok=True)
71 |             return
72 |         else:
73 |             if path.exists():
74 |                 feedback = input('target directory ({}) already exists, overwrite? [Y/r/n] '.format(path))
75 |                 ret = feedback.casefold()
76 |             else:
77 |                 ret = 'y'
78 |             if ret == 'n':
79 |                 sys.exit(0)
80 |             elif ret == 'r':
81 |                 resume = True
82 |             elif ret == 'y':
83 |                 overwrite = True
84 | 
85 | 
86 | def get_root(path: Union[str, Path], name: str = '.root') -> Path:
87 |     root = Path(path).resolve()
88 |     while not (root / name).is_file():
89 |         root = root.parent
90 |     return root
91 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | warp-lang==1.1.0
 2 | trimesh
 3 | pyyaml
 4 | numpy==1.26.4
 5 | scipy
 6 | transformers
 7 | scikit-learn
 8 | scikit-image
 9 | pyvista
10 | opencv-contrib-python
11 | open3d
12 | moviepy
13 | matplotlib
14 | kornia
15 | ipdb
16 | h5py
17 | gradio
18 | ffmpeg
19 | einops
20 | dill
21 | av
22 | timm
23 | omegaconf
24 | hydra-core
25 | wandb


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | diff_gaussian_rasterization.egg-info/
3 | dist/
4 | .idea/
5 | third_party/


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "third_party/glm"]
2 | 	path = third_party/glm
3 | 	url = https://github.com/g-truc/glm.git
4 | 


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | cmake_minimum_required(VERSION 3.20)
13 | 
14 | project(DiffRast LANGUAGES CUDA CXX)
15 | 
16 | set(CMAKE_CXX_STANDARD 17)
17 | set(CMAKE_CXX_EXTENSIONS OFF)
18 | set(CMAKE_CUDA_STANDARD 17)
19 | 
20 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
21 | 
22 | add_library(CudaRasterizer
23 | 	cuda_rasterizer/backward.h
24 | 	cuda_rasterizer/backward.cu
25 | 	cuda_rasterizer/forward.h
26 | 	cuda_rasterizer/forward.cu
27 | 	cuda_rasterizer/auxiliary.h
28 | 	cuda_rasterizer/rasterizer_impl.cu
29 | 	cuda_rasterizer/rasterizer_impl.h
30 | 	cuda_rasterizer/rasterizer.h
31 | )
32 | 
33 | set_target_properties(CudaRasterizer PROPERTIES CUDA_ARCHITECTURES "75;86")
34 | 
35 | target_include_directories(CudaRasterizer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/cuda_rasterizer)
36 | target_include_directories(CudaRasterizer PRIVATE third_party/glm ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
37 | 


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Gaussian-Splatting License  
 2 | ===========================  
 3 | 
 4 | **Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**.  
 5 | The *Software* is in the process of being registered with the Agence pour la Protection des  
 6 | Programmes (APP).  
 7 | 
 8 | The *Software* is still being developed by the *Licensor*.  
 9 | 
10 | *Licensor*'s goal is to allow the research community to use, test and evaluate  
11 | the *Software*.  
12 | 
13 | ## 1.  Definitions  
14 | 
15 | *Licensee* means any person or entity that uses the *Software* and distributes  
16 | its *Work*.  
17 | 
18 | *Licensor* means the owners of the *Software*, i.e Inria and MPII  
19 | 
20 | *Software* means the original work of authorship made available under this  
21 | License ie gaussian-splatting.  
22 | 
23 | *Work* means the *Software* and any additions to or derivative works of the  
24 | *Software* that are made available under this License.  
25 | 
26 | 
27 | ## 2.  Purpose  
28 | This license is intended to define the rights granted to the *Licensee* by  
29 | Licensors under the *Software*.  
30 | 
31 | ## 3.  Rights granted  
32 | 
33 | For the above reasons Licensors have decided to distribute the *Software*.  
34 | Licensors grant non-exclusive rights to use the *Software* for research purposes  
35 | to research users (both academic and industrial), free of charge, without right  
36 | to sublicense.. The *Software* may be used "non-commercially", i.e., for research  
37 | and/or evaluation purposes only.  
38 | 
39 | Subject to the terms and conditions of this License, you are granted a  
40 | non-exclusive, royalty-free, license to reproduce, prepare derivative works of,  
41 | publicly display, publicly perform and distribute its *Work* and any resulting  
42 | derivative works in any form.  
43 | 
44 | ## 4.  Limitations  
45 | 
46 | **4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do  
47 | so under this License, (b) you include a complete copy of this License with  
48 | your distribution, and (c) you retain without modification any copyright,  
49 | patent, trademark, or attribution notices that are present in the *Work*.  
50 | 
51 | **4.2 Derivative Works.** You may specify that additional or different terms apply  
52 | to the use, reproduction, and distribution of your derivative works of the *Work*  
53 | ("Your Terms") only if (a) Your Terms provide that the use limitation in  
54 | Section 2 applies to your derivative works, and (b) you identify the specific  
55 | derivative works that are subject to Your Terms. Notwithstanding Your Terms,  
56 | this License (including the redistribution requirements in Section 3.1) will  
57 | continue to apply to the *Work* itself.  
58 | 
59 | **4.3** Any other use without of prior consent of Licensors is prohibited. Research  
60 | users explicitly acknowledge having received from Licensors all information  
61 | allowing to appreciate the adequacy between of the *Software* and their needs and  
62 | to undertake all necessary precautions for its execution and use.  
63 | 
64 | **4.4** The *Software* is provided both as a compiled library file and as source  
65 | code. In case of using the *Software* for a publication or other results obtained  
66 | through the use of the *Software*, users are strongly encouraged to cite the  
67 | corresponding publications as explained in the documentation of the *Software*.  
68 | 
69 | ## 5.  Disclaimer  
70 | 
71 | THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES  
72 | WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY  
73 | UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL  
74 | CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES  
75 | OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL  
76 | USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR  
77 | ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE  
78 | AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR  
79 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE  
80 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION)  
81 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT  
82 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR  
83 | IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*.  


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | This is a clone of https://github.com/graphdeco-inria/diff-gaussian-rasterization/tree/59f5f77e3ddbac3ed9db93ec2cfe99ed6c5d121d
 3 | 
 4 | However, it has been edited by Jonathon Luiten to also render 'depth' as well as colour.
 5 | 
 6 | This is needed for Jonathon's Dynamic 3D Gaussians work which can be found here: http://dynamic3dgaussians.github.io
 7 | 
 8 | By default, the depth is calculated as 'median depth', where the depth is the depth of the Gaussian center which causes the accumulated rays transmittance to drop below 0.5.
 9 | If a ray doesn't reach this threshold it is given a default depth of 15. This median depth avoids the depth floaters around depth boundaries that 'mean depth' would give.
10 | If 'mean depth' is preffered, there is commented out code which also calculates 'mean depth'.
11 | See lines 307-308 and 363-372 of cuda_rasterizer/forward.cu.
12 | 
13 | Note that the backward pass for the depth has not been implemented, so it won't work for training with depth ground-truth.
14 | 
15 | Note that the code in this repo follows the (non commercial) license of Inria as laid out in LICENSE.md
16 | 
17 | If you're using this as part of the Dynamic 3D Gaussians code, just follow the installation instruction for that codebase.
18 | 
19 | To install this stand-alone I have been doing the following (although I don't think this is necessarily the best way):
20 | ```
21 | git clone git@github.com:git@github.com:JonathonLuiten/diff-gaussian-rasterization-w-depth.git
22 | cd diff-gaussian-rasterization-w-depth
23 | python setup.py install
24 | pip install .
25 | ```
26 | 
27 | Original readme below:
28 | 
29 | # Differential Gaussian Rasterization
30 | 
31 | Used as the rasterization engine for the paper "3D Gaussian Splatting for Real-Time Rendering of Radiance Fields". If you can make use of it in your own research, please be so kind to cite us.
32 | 
33 | <section class="section" id="BibTeX">
34 |   <div class="container is-max-desktop content">
35 |     <h2 class="title">BibTeX</h2>
36 |     <pre><code>@Article{kerbl3Dgaussians,
37 |       author       = {Kerbl, Bernhard and Kopanas, Georgios and Leimk{\"u}hler, Thomas and Drettakis, George},
38 |       title        = {3D Gaussian Splatting for Real-Time Radiance Field Rendering},
39 |       journal      = {ACM Transactions on Graphics},
40 |       number       = {4},
41 |       volume       = {42},
42 |       month        = {July},
43 |       year         = {2023},
44 |       url          = {https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/}
45 | }</code></pre>
46 |   </div>
47 | </section>


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/cuda_rasterizer/auxiliary.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2023, Inria
  3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 |  * All rights reserved.
  5 |  *
  6 |  * This software is free for non-commercial, research and evaluation use 
  7 |  * under the terms of the LICENSE.md file.
  8 |  *
  9 |  * For inquiries contact  george.drettakis@inria.fr
 10 |  */
 11 | 
 12 | #ifndef CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
 13 | #define CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
 14 | 
 15 | #include "config.h"
 16 | #include "stdio.h"
 17 | 
 18 | #define BLOCK_SIZE (BLOCK_X * BLOCK_Y)
 19 | #define NUM_WARPS (BLOCK_SIZE/32)
 20 | 
 21 | // Spherical harmonics coefficients
 22 | __device__ const float SH_C0 = 0.28209479177387814f;
 23 | __device__ const float SH_C1 = 0.4886025119029199f;
 24 | __device__ const float SH_C2[] = {
 25 | 	1.0925484305920792f,
 26 | 	-1.0925484305920792f,
 27 | 	0.31539156525252005f,
 28 | 	-1.0925484305920792f,
 29 | 	0.5462742152960396f
 30 | };
 31 | __device__ const float SH_C3[] = {
 32 | 	-0.5900435899266435f,
 33 | 	2.890611442640554f,
 34 | 	-0.4570457994644658f,
 35 | 	0.3731763325901154f,
 36 | 	-0.4570457994644658f,
 37 | 	1.445305721320277f,
 38 | 	-0.5900435899266435f
 39 | };
 40 | 
 41 | __forceinline__ __device__ float ndc2Pix(float v, int S)
 42 | {
 43 | 	return ((v + 1.0) * S - 1.0) * 0.5;
 44 | }
 45 | 
 46 | __forceinline__ __device__ void getRect(const float2 p, int max_radius, uint2& rect_min, uint2& rect_max, dim3 grid)
 47 | {
 48 | 	rect_min = {
 49 | 		min(grid.x, max((int)0, (int)((p.x - max_radius) / BLOCK_X))),
 50 | 		min(grid.y, max((int)0, (int)((p.y - max_radius) / BLOCK_Y)))
 51 | 	};
 52 | 	rect_max = {
 53 | 		min(grid.x, max((int)0, (int)((p.x + max_radius + BLOCK_X - 1) / BLOCK_X))),
 54 | 		min(grid.y, max((int)0, (int)((p.y + max_radius + BLOCK_Y - 1) / BLOCK_Y)))
 55 | 	};
 56 | }
 57 | 
 58 | __forceinline__ __device__ float3 transformPoint4x3(const float3& p, const float* matrix)
 59 | {
 60 | 	float3 transformed = {
 61 | 		matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
 62 | 		matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
 63 | 		matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
 64 | 	};
 65 | 	return transformed;
 66 | }
 67 | 
 68 | __forceinline__ __device__ float4 transformPoint4x4(const float3& p, const float* matrix)
 69 | {
 70 | 	float4 transformed = {
 71 | 		matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
 72 | 		matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
 73 | 		matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
 74 | 		matrix[3] * p.x + matrix[7] * p.y + matrix[11] * p.z + matrix[15]
 75 | 	};
 76 | 	return transformed;
 77 | }
 78 | 
 79 | __forceinline__ __device__ float3 transformVec4x3(const float3& p, const float* matrix)
 80 | {
 81 | 	float3 transformed = {
 82 | 		matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z,
 83 | 		matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z,
 84 | 		matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z,
 85 | 	};
 86 | 	return transformed;
 87 | }
 88 | 
 89 | __forceinline__ __device__ float3 transformVec4x3Transpose(const float3& p, const float* matrix)
 90 | {
 91 | 	float3 transformed = {
 92 | 		matrix[0] * p.x + matrix[1] * p.y + matrix[2] * p.z,
 93 | 		matrix[4] * p.x + matrix[5] * p.y + matrix[6] * p.z,
 94 | 		matrix[8] * p.x + matrix[9] * p.y + matrix[10] * p.z,
 95 | 	};
 96 | 	return transformed;
 97 | }
 98 | 
 99 | __forceinline__ __device__ float dnormvdz(float3 v, float3 dv)
100 | {
101 | 	float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
102 | 	float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
103 | 	float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
104 | 	return dnormvdz;
105 | }
106 | 
107 | __forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv)
108 | {
109 | 	float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
110 | 	float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
111 | 
112 | 	float3 dnormvdv;
113 | 	dnormvdv.x = ((+sum2 - v.x * v.x) * dv.x - v.y * v.x * dv.y - v.z * v.x * dv.z) * invsum32;
114 | 	dnormvdv.y = (-v.x * v.y * dv.x + (sum2 - v.y * v.y) * dv.y - v.z * v.y * dv.z) * invsum32;
115 | 	dnormvdv.z = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
116 | 	return dnormvdv;
117 | }
118 | 
119 | __forceinline__ __device__ float4 dnormvdv(float4 v, float4 dv)
120 | {
121 | 	float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
122 | 	float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
123 | 
124 | 	float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w };
125 | 	float vdv_sum = vdv.x + vdv.y + vdv.z + vdv.w;
126 | 	float4 dnormvdv;
127 | 	dnormvdv.x = ((sum2 - v.x * v.x) * dv.x - v.x * (vdv_sum - vdv.x)) * invsum32;
128 | 	dnormvdv.y = ((sum2 - v.y * v.y) * dv.y - v.y * (vdv_sum - vdv.y)) * invsum32;
129 | 	dnormvdv.z = ((sum2 - v.z * v.z) * dv.z - v.z * (vdv_sum - vdv.z)) * invsum32;
130 | 	dnormvdv.w = ((sum2 - v.w * v.w) * dv.w - v.w * (vdv_sum - vdv.w)) * invsum32;
131 | 	return dnormvdv;
132 | }
133 | 
134 | __forceinline__ __device__ float sigmoid(float x)
135 | {
136 | 	return 1.0f / (1.0f + expf(-x));
137 | }
138 | 
139 | __forceinline__ __device__ bool in_frustum(int idx,
140 | 	const float* orig_points,
141 | 	const float* viewmatrix,
142 | 	const float* projmatrix,
143 | 	const float z_threshold,
144 | 	bool prefiltered,
145 | 	float3& p_view)
146 | {
147 | 	float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] };
148 | 
149 | 	// Bring points to screen space
150 | 	float4 p_hom = transformPoint4x4(p_orig, projmatrix);
151 | 	float p_w = 1.0f / (p_hom.w + 0.0000001f);
152 | 	float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w };
153 | 	p_view = transformPoint4x3(p_orig, viewmatrix);
154 | 
155 | 	if (p_view.z <= z_threshold) // || ((p_proj.x < -1.3 || p_proj.x > 1.3 || p_proj.y < -1.3 || p_proj.y > 1.3)))
156 | 	{
157 | 		if (prefiltered)
158 | 		{
159 | 			printf("Point is filtered although prefiltered is set. This shouldn't happen!");
160 | 			__trap();
161 | 		}
162 | 		return false;
163 | 	}
164 | 	return true;
165 | }
166 | 
167 | #endif


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/cuda_rasterizer/backward.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #ifndef CUDA_RASTERIZER_BACKWARD_H_INCLUDED
13 | #define CUDA_RASTERIZER_BACKWARD_H_INCLUDED
14 | 
15 | #include <cuda.h>
16 | #include "cuda_runtime.h"
17 | #include "device_launch_parameters.h"
18 | #define GLM_FORCE_CUDA
19 | #include <glm/glm.hpp>
20 | 
21 | namespace BACKWARD
22 | {
23 | 	void render(
24 | 		const dim3 grid, dim3 block,
25 | 		const uint2* ranges,
26 | 		const uint32_t* point_list,
27 | 		int W, int H,
28 | 		const float* bg_color,
29 | 		const float2* means2D,
30 | 		const float4* conic_opacity,
31 | 		const float* colors,
32 | 		const float* final_Ts,
33 | 		const uint32_t* n_contrib,
34 | 		const float* dL_dpixels,
35 | 		float3* dL_dmean2D,
36 | 		float4* dL_dconic2D,
37 | 		float* dL_dopacity,
38 | 		float* dL_dcolors);
39 | 
40 | 	void preprocess(
41 | 		int P, int D, int M,
42 | 		const float3* means,
43 | 		const int* radii,
44 | 		const float* shs,
45 | 		const bool* clamped,
46 | 		const glm::vec3* scales,
47 | 		const glm::vec4* rotations,
48 | 		const float scale_modifier,
49 | 		const float* cov3Ds,
50 | 		const float* view,
51 | 		const float* proj,
52 | 		const float focal_x, float focal_y,
53 | 		const float tan_fovx, float tan_fovy,
54 | 		const glm::vec3* campos,
55 | 		const float3* dL_dmean2D,
56 | 		const float* dL_dconics,
57 | 		glm::vec3* dL_dmeans,
58 | 		float* dL_dcolor,
59 | 		float* dL_dcov3D,
60 | 		float* dL_dsh,
61 | 		glm::vec3* dL_dscale,
62 | 		glm::vec4* dL_drot);
63 | }
64 | 
65 | #endif


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/cuda_rasterizer/config.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #ifndef CUDA_RASTERIZER_CONFIG_H_INCLUDED
13 | #define CUDA_RASTERIZER_CONFIG_H_INCLUDED
14 | 
15 | #define NUM_CHANNELS 3 // Default 3, RGB
16 | #define BLOCK_X 16
17 | #define BLOCK_Y 16
18 | 
19 | #endif


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/cuda_rasterizer/forward.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #ifndef CUDA_RASTERIZER_FORWARD_H_INCLUDED
13 | #define CUDA_RASTERIZER_FORWARD_H_INCLUDED
14 | 
15 | #include <cuda.h>
16 | #include "cuda_runtime.h"
17 | #include "device_launch_parameters.h"
18 | #define GLM_FORCE_CUDA
19 | #include <glm/glm.hpp>
20 | 
21 | namespace FORWARD
22 | {
23 | 	// Perform initial steps for each Gaussian prior to rasterization.
24 | 	void preprocess(int P, int D, int M,
25 | 		const float* orig_points,
26 | 		const glm::vec3* scales,
27 | 		const float scale_modifier,
28 | 		const glm::vec4* rotations,
29 | 		const float* opacities,
30 | 		const float* shs,
31 | 		bool* clamped,
32 | 		const float* cov3D_precomp,
33 | 		const float* colors_precomp,
34 | 		const float* viewmatrix,
35 | 		const float* projmatrix,
36 | 		const glm::vec3* cam_pos,
37 | 		const int W, int H,
38 | 		const float focal_x, float focal_y,
39 | 		const float tan_fovx, float tan_fovy,
40 | 		int* radii,
41 | 		float2* points_xy_image,
42 | 		float* depths,
43 | 		float* cov3Ds,
44 | 		float* colors,
45 | 		float4* conic_opacity,
46 | 		const dim3 grid,
47 | 		uint32_t* tiles_touched,
48 | 		bool prefiltered,
49 | 		const float z_threshold);
50 | 
51 | 	// Main rasterization method.
52 | 	void render(
53 | 		const dim3 grid, dim3 block,
54 | 		const uint2* ranges,
55 | 		const uint32_t* point_list,
56 | 		int W, int H,
57 | 		const float2* points_xy_image,
58 | 		const float* features,
59 | 		const float4* conic_opacity,
60 | 		float* final_T,
61 | 		uint32_t* n_contrib,
62 | 		const float* bg_color,
63 | 		float* out_color,
64 | 		const float* depth,
65 | 	    float* out_depth);
66 | }
67 | 
68 | 
69 | #endif


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/cuda_rasterizer/rasterizer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #ifndef CUDA_RASTERIZER_H_INCLUDED
13 | #define CUDA_RASTERIZER_H_INCLUDED
14 | 
15 | #include <vector>
16 | #include <functional>
17 | 
18 | namespace CudaRasterizer
19 | {
20 | 	class Rasterizer
21 | 	{
22 | 	public:
23 | 
24 | 		static void markVisible(
25 | 			int P,
26 | 			float* means3D,
27 | 			float* viewmatrix,
28 | 			float* projmatrix,
29 | 			bool* present);
30 | 
31 | 		static int forward(
32 | 			std::function<char* (size_t)> geometryBuffer,
33 | 			std::function<char* (size_t)> binningBuffer,
34 | 			std::function<char* (size_t)> imageBuffer,
35 | 			const int P, int D, int M,
36 | 			const float* background,
37 | 			const int width, int height,
38 | 			const float* means3D,
39 | 			const float* shs,
40 | 			const float* colors_precomp,
41 | 			const float* opacities,
42 | 			const float* scales,
43 | 			const float scale_modifier,
44 | 			const float* rotations,
45 | 			const float* cov3D_precomp,
46 | 			const float* viewmatrix,
47 | 			const float* projmatrix,
48 | 			const float* cam_pos,
49 | 			const float tan_fovx, float tan_fovy,
50 | 			const bool prefiltered,
51 | 			const float z_threshold,
52 | 			float* out_color,
53 | 			float* out_depth,
54 | 			int* radii = nullptr);
55 | 
56 | 		static void backward(
57 | 			const int P, int D, int M, int R,
58 | 			const float* background,
59 | 			const int width, int height,
60 | 			const float* means3D,
61 | 			const float* shs,
62 | 			const float* colors_precomp,
63 | 			const float* scales,
64 | 			const float scale_modifier,
65 | 			const float* rotations,
66 | 			const float* cov3D_precomp,
67 | 			const float* viewmatrix,
68 | 			const float* projmatrix,
69 | 			const float* campos,
70 | 			const float tan_fovx, float tan_fovy,
71 | 			const int* radii,
72 | 			char* geom_buffer,
73 | 			char* binning_buffer,
74 | 			char* image_buffer,
75 | 			const float* dL_dpix,
76 | 			float* dL_dmean2D,
77 | 			float* dL_dconic,
78 | 			float* dL_dopacity,
79 | 			float* dL_dcolor,
80 | 			float* dL_dmean3D,
81 | 			float* dL_dcov3D,
82 | 			float* dL_dsh,
83 | 			float* dL_dscale,
84 | 			float* dL_drot);
85 | 	};
86 | };
87 | 
88 | #endif


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/cuda_rasterizer/rasterizer_impl.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #pragma once
13 | 
14 | #include <iostream>
15 | #include <vector>
16 | #include "rasterizer.h"
17 | #include <cuda_runtime_api.h>
18 | #include <cstdint>
19 | 
20 | namespace CudaRasterizer
21 | {
22 | 	template <typename T>
23 | 	static void obtain(char*& chunk, T*& ptr, std::size_t count, std::size_t alignment)
24 | 	{
25 | 		std::size_t offset = (reinterpret_cast<std::uintptr_t>(chunk) + alignment - 1) & ~(alignment - 1);
26 | 		ptr = reinterpret_cast<T*>(offset);
27 | 		chunk = reinterpret_cast<char*>(ptr + count);
28 | 	}
29 | 
30 | 	struct GeometryState
31 | 	{
32 | 		size_t scan_size;
33 | 		float* depths;
34 | 		char* scanning_space;
35 | 		bool* clamped;
36 | 		int* internal_radii;
37 | 		float2* means2D;
38 | 		float* cov3D;
39 | 		float4* conic_opacity;
40 | 		float* rgb;
41 | 		uint32_t* point_offsets;
42 | 		uint32_t* tiles_touched;
43 | 
44 | 		static GeometryState fromChunk(char*& chunk, size_t P);
45 | 	};
46 | 
47 | 	struct ImageState
48 | 	{
49 | 		uint2* ranges;
50 | 		uint32_t* n_contrib;
51 | 		float* accum_alpha;
52 | 
53 | 		static ImageState fromChunk(char*& chunk, size_t N);
54 | 	};
55 | 
56 | 	struct BinningState
57 | 	{
58 | 		size_t sorting_size;
59 | 		uint64_t* point_list_keys_unsorted;
60 | 		uint64_t* point_list_keys;
61 | 		uint32_t* point_list_unsorted;
62 | 		uint32_t* point_list;
63 | 		char* list_sorting_space;
64 | 
65 | 		static BinningState fromChunk(char*& chunk, size_t P);
66 | 	};
67 | 
68 | 	template<typename T> 
69 | 	size_t required(size_t P)
70 | 	{
71 | 		char* size = nullptr;
72 | 		T::fromChunk(size, P);
73 | 		return ((size_t)size) + 128;
74 | 	}
75 | };


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/diff_gaussian_rasterization/__init__.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | from typing import NamedTuple
 13 | import torch.nn as nn
 14 | import torch
 15 | from . import _C
 16 | 
 17 | def rasterize_gaussians(
 18 |     means3D,
 19 |     means2D,
 20 |     sh,
 21 |     colors_precomp,
 22 |     opacities,
 23 |     scales,
 24 |     rotations,
 25 |     cov3Ds_precomp,
 26 |     raster_settings,
 27 | ):
 28 |     return _RasterizeGaussians.apply(
 29 |         means3D,
 30 |         means2D,
 31 |         sh,
 32 |         colors_precomp,
 33 |         opacities,
 34 |         scales,
 35 |         rotations,
 36 |         cov3Ds_precomp,
 37 |         raster_settings,
 38 |     )
 39 | 
 40 | class _RasterizeGaussians(torch.autograd.Function):
 41 |     @staticmethod
 42 |     def forward(
 43 |         ctx,
 44 |         means3D,
 45 |         means2D,
 46 |         sh,
 47 |         colors_precomp,
 48 |         opacities,
 49 |         scales,
 50 |         rotations,
 51 |         cov3Ds_precomp,
 52 |         raster_settings,
 53 |     ):
 54 | 
 55 |         # Restructure arguments the way that the C++ lib expects them
 56 |         args = (
 57 |             raster_settings.bg, 
 58 |             means3D,
 59 |             colors_precomp,
 60 |             opacities,
 61 |             scales,
 62 |             rotations,
 63 |             raster_settings.scale_modifier,
 64 |             cov3Ds_precomp,
 65 |             raster_settings.viewmatrix,
 66 |             raster_settings.projmatrix,
 67 |             raster_settings.tanfovx,
 68 |             raster_settings.tanfovy,
 69 |             raster_settings.image_height,
 70 |             raster_settings.image_width,
 71 |             sh,
 72 |             raster_settings.sh_degree,
 73 |             raster_settings.campos,
 74 |             raster_settings.prefiltered,
 75 |             raster_settings.z_threshold,
 76 |         )
 77 | 
 78 |         # Invoke C++/CUDA rasterizer
 79 |         # num_rendered, color, radii, geomBuffer, binningBuffer, imgBuffer = _C.rasterize_gaussians(*args)
 80 |         num_rendered, color, radii, geomBuffer, binningBuffer, imgBuffer, depth = _C.rasterize_gaussians(*args)
 81 | 
 82 |         # Keep relevant tensors for backward
 83 |         ctx.raster_settings = raster_settings
 84 |         ctx.num_rendered = num_rendered
 85 |         ctx.save_for_backward(colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh, geomBuffer, binningBuffer, imgBuffer)
 86 |         return color, radii, depth
 87 | 
 88 |     @staticmethod
 89 |     def backward(ctx, grad_out_color, _, depth):
 90 | 
 91 |         # Restore necessary values from context
 92 |         num_rendered = ctx.num_rendered
 93 |         raster_settings = ctx.raster_settings
 94 |         colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh, geomBuffer, binningBuffer, imgBuffer = ctx.saved_tensors
 95 | 
 96 |         # Restructure args as C++ method expects them
 97 |         args = (raster_settings.bg,
 98 |                 means3D, 
 99 |                 radii, 
100 |                 colors_precomp, 
101 |                 scales, 
102 |                 rotations, 
103 |                 raster_settings.scale_modifier, 
104 |                 cov3Ds_precomp, 
105 |                 raster_settings.viewmatrix, 
106 |                 raster_settings.projmatrix, 
107 |                 raster_settings.tanfovx, 
108 |                 raster_settings.tanfovy, 
109 |                 grad_out_color, 
110 |                 sh, 
111 |                 raster_settings.sh_degree, 
112 |                 raster_settings.campos,
113 |                 geomBuffer,
114 |                 num_rendered,
115 |                 binningBuffer,
116 |                 imgBuffer)
117 | 
118 |         # Compute gradients for relevant tensors by invoking backward method
119 |         grad_means2D, grad_colors_precomp, grad_opacities, grad_means3D, grad_cov3Ds_precomp, grad_sh, grad_scales, grad_rotations = _C.rasterize_gaussians_backward(*args)        
120 | 
121 |         grads = (
122 |             grad_means3D,
123 |             grad_means2D,
124 |             grad_sh,
125 |             grad_colors_precomp,
126 |             grad_opacities,
127 |             grad_scales,
128 |             grad_rotations,
129 |             grad_cov3Ds_precomp,
130 |             None,
131 |         )
132 | 
133 |         return grads
134 | 
135 | class GaussianRasterizationSettings(NamedTuple):
136 |     image_height: int
137 |     image_width: int 
138 |     tanfovx : float
139 |     tanfovy : float
140 |     bg : torch.Tensor
141 |     scale_modifier : float
142 |     viewmatrix : torch.Tensor
143 |     projmatrix : torch.Tensor
144 |     sh_degree : int
145 |     campos : torch.Tensor
146 |     prefiltered : bool
147 |     z_threshold : float
148 | 
149 | class GaussianRasterizer(nn.Module):
150 |     def __init__(self, raster_settings):
151 |         super().__init__()
152 |         self.raster_settings = raster_settings
153 | 
154 |     def markVisible(self, positions):
155 |         # Mark visible points (based on frustum culling for camera) with a boolean 
156 |         with torch.no_grad():
157 |             raster_settings = self.raster_settings
158 |             visible = _C.mark_visible(
159 |                 positions,
160 |                 raster_settings.viewmatrix,
161 |                 raster_settings.projmatrix)
162 |             
163 |         return visible
164 | 
165 |     def forward(self, means3D, means2D, opacities, shs = None, colors_precomp = None, scales = None, rotations = None, cov3D_precomp = None):
166 |         
167 |         raster_settings = self.raster_settings
168 | 
169 |         if (shs is None and colors_precomp is None) or (shs is not None and colors_precomp is not None):
170 |             raise Exception('Please provide excatly one of either SHs or precomputed colors!')
171 |         
172 |         if ((scales is None or rotations is None) and cov3D_precomp is None) or ((scales is not None or rotations is not None) and cov3D_precomp is not None):
173 |             raise Exception('Please provide exactly one of either scale/rotation pair or precomputed 3D covariance!')
174 |         
175 |         if shs is None:
176 |             shs = torch.Tensor([])
177 |         if colors_precomp is None:
178 |             colors_precomp = torch.Tensor([])
179 | 
180 |         if scales is None:
181 |             scales = torch.Tensor([])
182 |         if rotations is None:
183 |             rotations = torch.Tensor([])
184 |         if cov3D_precomp is None:
185 |             cov3D_precomp = torch.Tensor([])
186 | 
187 |         # Invoke C++/CUDA rasterization routine
188 |         return rasterize_gaussians(
189 |             means3D,
190 |             means2D,
191 |             shs,
192 |             colors_precomp,
193 |             opacities,
194 |             scales, 
195 |             rotations,
196 |             cov3D_precomp,
197 |             raster_settings, 
198 |         )
199 | 
200 | 


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/ext.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #include <torch/extension.h>
13 | #include "rasterize_points.h"
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("rasterize_gaussians", &RasterizeGaussiansCUDA);
17 |   m.def("rasterize_gaussians_backward", &RasterizeGaussiansBackwardCUDA);
18 |   m.def("mark_visible", &markVisible);
19 | }


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/rasterize_points.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2023, Inria
  3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 |  * All rights reserved.
  5 |  *
  6 |  * This software is free for non-commercial, research and evaluation use 
  7 |  * under the terms of the LICENSE.md file.
  8 |  *
  9 |  * For inquiries contact  george.drettakis@inria.fr
 10 |  */
 11 | 
 12 | #include <math.h>
 13 | #include <torch/extension.h>
 14 | #include <cstdio>
 15 | #include <sstream>
 16 | #include <iostream>
 17 | #include <tuple>
 18 | #include <stdio.h>
 19 | #include <cuda_runtime_api.h>
 20 | #include <memory>
 21 | #include "cuda_rasterizer/config.h"
 22 | #include "cuda_rasterizer/rasterizer.h"
 23 | #include <fstream>
 24 | #include <string>
 25 | #include <functional>
 26 | 
 27 | std::function<char*(size_t N)> resizeFunctional(torch::Tensor& t) {
 28 |     auto lambda = [&t](size_t N) {
 29 |         t.resize_({(long long)N});
 30 | 		return reinterpret_cast<char*>(t.contiguous().data_ptr());
 31 |     };
 32 |     return lambda;
 33 | }
 34 | 
 35 | std::tuple<int, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
 36 | RasterizeGaussiansCUDA(
 37 | 	const torch::Tensor& background,
 38 | 	const torch::Tensor& means3D,
 39 |     const torch::Tensor& colors,
 40 |     const torch::Tensor& opacity,
 41 | 	const torch::Tensor& scales,
 42 | 	const torch::Tensor& rotations,
 43 | 	const float scale_modifier,
 44 | 	const torch::Tensor& cov3D_precomp,
 45 | 	const torch::Tensor& viewmatrix,
 46 | 	const torch::Tensor& projmatrix,
 47 | 	const float tan_fovx, 
 48 | 	const float tan_fovy,
 49 |     const int image_height,
 50 |     const int image_width,
 51 | 	const torch::Tensor& sh,
 52 | 	const int degree,
 53 | 	const torch::Tensor& campos,
 54 | 	const bool prefiltered,
 55 | 	const float z_threshold)
 56 | {
 57 |   if (means3D.ndimension() != 2 || means3D.size(1) != 3) {
 58 |     AT_ERROR("means3D must have dimensions (num_points, 3)");
 59 |   }
 60 |   
 61 |   const int P = means3D.size(0);
 62 |   const int H = image_height;
 63 |   const int W = image_width;
 64 | 
 65 |   auto int_opts = means3D.options().dtype(torch::kInt32);
 66 |   auto float_opts = means3D.options().dtype(torch::kFloat32);
 67 | 
 68 |   torch::Tensor out_color = torch::full({NUM_CHANNELS, H, W}, 0.0, float_opts);
 69 |   torch::Tensor radii = torch::full({P}, 0, means3D.options().dtype(torch::kInt32));
 70 |   torch::Tensor out_depth = torch::full({1, H, W}, 0.0, float_opts);
 71 |   
 72 |   torch::Device device(torch::kCUDA);
 73 |   torch::TensorOptions options(torch::kByte);
 74 |   torch::Tensor geomBuffer = torch::empty({0}, options.device(device));
 75 |   torch::Tensor binningBuffer = torch::empty({0}, options.device(device));
 76 |   torch::Tensor imgBuffer = torch::empty({0}, options.device(device));
 77 |   std::function<char*(size_t)> geomFunc = resizeFunctional(geomBuffer);
 78 |   std::function<char*(size_t)> binningFunc = resizeFunctional(binningBuffer);
 79 |   std::function<char*(size_t)> imgFunc = resizeFunctional(imgBuffer);
 80 |   
 81 |   int rendered = 0;
 82 |   if(P != 0)
 83 |   {
 84 | 	  int M = 0;
 85 | 	  if(sh.size(0) != 0)
 86 | 	  {
 87 | 		M = sh.size(1);
 88 |       }
 89 | 
 90 | 	  rendered = CudaRasterizer::Rasterizer::forward(
 91 | 	    geomFunc,
 92 | 		binningFunc,
 93 | 		imgFunc,
 94 | 	    P, degree, M,
 95 | 		background.contiguous().data<float>(),
 96 | 		W, H,
 97 | 		means3D.contiguous().data<float>(),
 98 | 		sh.contiguous().data_ptr<float>(),
 99 | 		colors.contiguous().data<float>(), 
100 | 		opacity.contiguous().data<float>(), 
101 | 		scales.contiguous().data_ptr<float>(),
102 | 		scale_modifier,
103 | 		rotations.contiguous().data_ptr<float>(),
104 | 		cov3D_precomp.contiguous().data<float>(), 
105 | 		viewmatrix.contiguous().data<float>(), 
106 | 		projmatrix.contiguous().data<float>(),
107 | 		campos.contiguous().data<float>(),
108 | 		tan_fovx,
109 | 		tan_fovy,
110 | 		prefiltered,
111 | 		z_threshold,
112 | 		out_color.contiguous().data<float>(),
113 | 		out_depth.contiguous().data<float>(),
114 | 		radii.contiguous().data<int>());
115 |   }
116 |   return std::make_tuple(rendered, out_color, radii, geomBuffer, binningBuffer, imgBuffer, out_depth);
117 | }
118 | 
119 | std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
120 |  RasterizeGaussiansBackwardCUDA(
121 |  	const torch::Tensor& background,
122 | 	const torch::Tensor& means3D,
123 | 	const torch::Tensor& radii,
124 |     const torch::Tensor& colors,
125 | 	const torch::Tensor& scales,
126 | 	const torch::Tensor& rotations,
127 | 	const float scale_modifier,
128 | 	const torch::Tensor& cov3D_precomp,
129 | 	const torch::Tensor& viewmatrix,
130 |     const torch::Tensor& projmatrix,
131 | 	const float tan_fovx,
132 | 	const float tan_fovy,
133 |     const torch::Tensor& dL_dout_color,
134 | 	const torch::Tensor& sh,
135 | 	const int degree,
136 | 	const torch::Tensor& campos,
137 | 	const torch::Tensor& geomBuffer,
138 | 	const int R,
139 | 	const torch::Tensor& binningBuffer,
140 | 	const torch::Tensor& imageBuffer) 
141 | {
142 |   const int P = means3D.size(0);
143 |   const int H = dL_dout_color.size(1);
144 |   const int W = dL_dout_color.size(2);
145 |   
146 |   int M = 0;
147 |   if(sh.size(0) != 0)
148 |   {	
149 | 	M = sh.size(1);
150 |   }
151 | 
152 |   torch::Tensor dL_dmeans3D = torch::zeros({P, 3}, means3D.options());
153 |   torch::Tensor dL_dmeans2D = torch::zeros({P, 3}, means3D.options());
154 |   torch::Tensor dL_dcolors = torch::zeros({P, NUM_CHANNELS}, means3D.options());
155 |   torch::Tensor dL_dconic = torch::zeros({P, 2, 2}, means3D.options());
156 |   torch::Tensor dL_dopacity = torch::zeros({P, 1}, means3D.options());
157 |   torch::Tensor dL_dcov3D = torch::zeros({P, 6}, means3D.options());
158 |   torch::Tensor dL_dsh = torch::zeros({P, M, 3}, means3D.options());
159 |   torch::Tensor dL_dscales = torch::zeros({P, 3}, means3D.options());
160 |   torch::Tensor dL_drotations = torch::zeros({P, 4}, means3D.options());
161 |   
162 |   if(P != 0)
163 |   {  
164 | 	  CudaRasterizer::Rasterizer::backward(P, degree, M, R,
165 | 	  background.contiguous().data<float>(),
166 | 	  W, H, 
167 | 	  means3D.contiguous().data<float>(),
168 | 	  sh.contiguous().data<float>(),
169 | 	  colors.contiguous().data<float>(),
170 | 	  scales.data_ptr<float>(),
171 | 	  scale_modifier,
172 | 	  rotations.data_ptr<float>(),
173 | 	  cov3D_precomp.contiguous().data<float>(),
174 | 	  viewmatrix.contiguous().data<float>(),
175 | 	  projmatrix.contiguous().data<float>(),
176 | 	  campos.contiguous().data<float>(),
177 | 	  tan_fovx,
178 | 	  tan_fovy,
179 | 	  radii.contiguous().data<int>(),
180 | 	  reinterpret_cast<char*>(geomBuffer.contiguous().data_ptr()),
181 | 	  reinterpret_cast<char*>(binningBuffer.contiguous().data_ptr()),
182 | 	  reinterpret_cast<char*>(imageBuffer.contiguous().data_ptr()),
183 | 	  dL_dout_color.contiguous().data<float>(),
184 | 	  dL_dmeans2D.contiguous().data<float>(),
185 | 	  dL_dconic.contiguous().data<float>(),  
186 | 	  dL_dopacity.contiguous().data<float>(),
187 | 	  dL_dcolors.contiguous().data<float>(),
188 | 	  dL_dmeans3D.contiguous().data<float>(),
189 | 	  dL_dcov3D.contiguous().data<float>(),
190 | 	  dL_dsh.contiguous().data<float>(),
191 | 	  dL_dscales.contiguous().data<float>(),
192 | 	  dL_drotations.contiguous().data<float>());
193 |   }
194 | 
195 |   return std::make_tuple(dL_dmeans2D, dL_dcolors, dL_dopacity, dL_dmeans3D, dL_dcov3D, dL_dsh, dL_dscales, dL_drotations);
196 | }
197 | 
198 | torch::Tensor markVisible(
199 | 		torch::Tensor& means3D,
200 | 		torch::Tensor& viewmatrix,
201 | 		torch::Tensor& projmatrix)
202 | { 
203 |   const int P = means3D.size(0);
204 |   
205 |   torch::Tensor present = torch::full({P}, false, means3D.options().dtype(at::kBool));
206 |  
207 |   if(P != 0)
208 |   {
209 | 	CudaRasterizer::Rasterizer::markVisible(P,
210 | 		means3D.contiguous().data<float>(),
211 | 		viewmatrix.contiguous().data<float>(),
212 | 		projmatrix.contiguous().data<float>(),
213 | 		present.contiguous().data<bool>());
214 |   }
215 |   
216 |   return present;
217 | }


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/rasterize_points.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #pragma once
13 | #include <torch/extension.h>
14 | #include <cstdio>
15 | #include <tuple>
16 | #include <string>
17 | 	
18 | std::tuple<int, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
19 | RasterizeGaussiansCUDA(
20 | 	const torch::Tensor& background,
21 | 	const torch::Tensor& means3D,
22 |     const torch::Tensor& colors,
23 |     const torch::Tensor& opacity,
24 | 	const torch::Tensor& scales,
25 | 	const torch::Tensor& rotations,
26 | 	const float scale_modifier,
27 | 	const torch::Tensor& cov3D_precomp,
28 | 	const torch::Tensor& viewmatrix,
29 | 	const torch::Tensor& projmatrix,
30 | 	const float tan_fovx, 
31 | 	const float tan_fovy,
32 |     const int image_height,
33 |     const int image_width,
34 | 	const torch::Tensor& sh,
35 | 	const int degree,
36 | 	const torch::Tensor& campos,
37 | 	const bool prefiltered,
38 | 	const float z_threshold);
39 | 
40 | std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
41 |  RasterizeGaussiansBackwardCUDA(
42 |  	const torch::Tensor& background,
43 | 	const torch::Tensor& means3D,
44 | 	const torch::Tensor& radii,
45 |     const torch::Tensor& colors,
46 | 	const torch::Tensor& scales,
47 | 	const torch::Tensor& rotations,
48 | 	const float scale_modifier,
49 | 	const torch::Tensor& cov3D_precomp,
50 | 	const torch::Tensor& viewmatrix,
51 |     const torch::Tensor& projmatrix,
52 | 	const float tan_fovx, 
53 | 	const float tan_fovy,
54 |     const torch::Tensor& dL_dout_color,
55 | 	const torch::Tensor& sh,
56 | 	const int degree,
57 | 	const torch::Tensor& campos,
58 | 	const torch::Tensor& geomBuffer,
59 | 	const int R,
60 | 	const torch::Tensor& binningBuffer,
61 | 	const torch::Tensor& imageBuffer);
62 | 		
63 | torch::Tensor markVisible(
64 | 		torch::Tensor& means3D,
65 | 		torch::Tensor& viewmatrix,
66 | 		torch::Tensor& projmatrix);


--------------------------------------------------------------------------------
/third-party/diff-gaussian-rasterization-w-depth/setup.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | from setuptools import setup
13 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension
14 | import os
15 | os.path.dirname(os.path.abspath(__file__))
16 | 
17 | setup(
18 |     name="diff_gaussian_rasterization",
19 |     packages=['diff_gaussian_rasterization'],
20 |     ext_modules=[
21 |         CUDAExtension(
22 |             name="diff_gaussian_rasterization._C",
23 |             sources=[
24 |             "cuda_rasterizer/rasterizer_impl.cu",
25 |             "cuda_rasterizer/forward.cu",
26 |             "cuda_rasterizer/backward.cu",
27 |             "rasterize_points.cu",
28 |             "ext.cpp"],
29 |             extra_compile_args={"nvcc": ["-I" + os.path.join(os.path.dirname(os.path.abspath(__file__)), "third_party/glm/")]})
30 |         ],
31 |     cmdclass={
32 |         'build_ext': BuildExtension
33 |     }
34 | )
35 | 


--------------------------------------------------------------------------------