├── .github └── workflows │ ├── ci.yml │ ├── install-base.sh │ └── publish.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── doc ├── base-dockerfile ├── input.jpeg └── systemnew-sm.JPG ├── examples ├── grasp_depth_image.py ├── grasp_realsense.py ├── loader.py ├── render_heatmap.py └── render_pointcloud.py ├── griffig ├── __init__.py ├── action │ ├── __init__.py │ ├── checker.py │ └── converter.py ├── griffig.py ├── infer │ ├── __init__.py │ ├── inference.py │ ├── inference_actor_critic.py │ ├── inference_base.py │ ├── inference_planar.py │ ├── inference_semantic.py │ └── selection.py └── utility │ ├── __init__.py │ ├── box_finder.py │ ├── heatmap.py │ ├── image.py │ ├── model_data.py │ └── model_library.py ├── include └── griffig │ ├── box_data.hpp │ ├── grasp.hpp │ ├── griffig.hpp │ ├── gripper.hpp │ ├── ndarray_converter.hpp │ ├── orthographic_image.hpp │ ├── pointcloud.hpp │ ├── renderer.hpp │ └── robot_pose.hpp ├── pyproject.toml ├── requirements.txt ├── setup.py ├── src ├── griffig.cpp └── python.cpp └── test ├── data ├── 1-rc.jpeg └── 1-rd.jpeg ├── loader.py ├── test_box.py └── test_draw.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | container: 10 | image: griffig.xyz/base 11 | credentials: 12 | username: ${{ secrets.DOCKER_REGISTRY_USER }} 13 | password: ${{ secrets.DOCKER_REGISTRY_PASSWORD }} 14 | 15 | steps: 16 | - name: Checkout repository and submodules 17 | uses: actions/checkout@v2 18 | with: 19 | submodules: recursive 20 | 21 | - name: Build and test 22 | run: | 23 | pip install . 24 | python3 test/test_box.py 25 | -------------------------------------------------------------------------------- /.github/workflows/install-base.sh: -------------------------------------------------------------------------------- 1 | # Fix Cmake find *.so.0 2 | ln -s /usr/lib64/libOpenGL.so.0 /usr/lib64/libOpenGL.so 3 | 4 | # Fix CMake find Numpy 5 | python3.9 -m pip install --no-cache-dir numpy 6 | sed -i '3092s/.*/"import sys; import numpy; sys.stdout.write(numpy.get_include())"/' /opt/_internal/tools/lib/python3.9/site-packages/cmake/data/share/cmake-3.21/Modules/FindPython/Support.cmake 7 | 8 | # Install Eigen 9 | git clone https://gitlab.com/libeigen/eigen.git 10 | cd eigen 11 | git checkout 3.3.9 12 | mkdir build && cd build 13 | cmake .. 14 | make -j2 15 | make install 16 | cd ../../ 17 | 18 | # Checkout PyBind11 19 | git clone https://github.com/pybind/pybind11.git 20 | cd pybind11 21 | git checkout v2.6.2 22 | cd ../ 23 | 24 | # Install OpenCV 25 | git clone https://github.com/opencv/opencv.git 26 | cd opencv 27 | git checkout 4.5.2 28 | mkdir build && cd build 29 | cmake -DCMAKE_INSTALL_PREFIX=/usr -DWITH_VTK=OFF -DWITH_GTK=OFF -DWITH_PROTOBUF=OFF -DBUILD_PERF_TESTS=OFF -DBUILD_TESTS=OFF -DWITH_WEBP=OFF -DBUILD_opencv_ml=OFF -DBUILD_opencv_calib3d=OFF -DBUILD_opencv_videoio=OFF -DBUILD_opencv_gapi=OFF -DBUILD_opencv_stitching=OFF -DBUILD_opencv_objdetect=OFF -DBUILD_opencv_flann=OFF -DBUILD_opencv_video=OFF -DBUILD_opencv_features2d=OFF .. 30 | make -j2 31 | make install 32 | cd ../../ 33 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | # on: [push, pull_request] 4 | on: 5 | release: 6 | types: [created] 7 | 8 | jobs: 9 | deploy: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout repository and submodules 14 | uses: actions/checkout@v2 15 | with: 16 | submodules: recursive 17 | 18 | - name: Set up Python 19 | uses: actions/setup-python@v2 20 | with: 21 | python-version: 3.9 22 | 23 | - name: Install dependencies 24 | run: | 25 | python3 -m pip install --upgrade pip 26 | python3 -m pip install setuptools wheel twine --upgrade 27 | 28 | - name: Build source wheels 29 | run: | 30 | python3 setup.py sdist 31 | 32 | # - name: Build manylinux wheels 33 | # uses: RalfG/python-wheels-manylinux-build@v0.3.4 34 | # with: 35 | # python-versions: 'cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39' 36 | # build-requirements: 'numpy' 37 | # system-packages: 'cmake libglvnd-opengl mesa-libGL mesa-libGL-devel mesa-libGLU mesa-libGLU-devel mesa-libEGL mesa-libEGL-devel glew glew-devel' 38 | # pre-build-command: 'sh .github/workflows/install-base.sh' 39 | 40 | - name: Publish 41 | env: 42 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 43 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 44 | run: | 45 | twine upload dist/*.tar.gz 46 | # twine upload dist/*-manylinux*.whl 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | .vscode 3 | .mypy_cache 4 | __pycache__ 5 | *.pyc 6 | 7 | test/data/* 8 | !test/data/1-rc.jpeg 9 | !test/data/1-rd.jpeg 10 | 11 | dist 12 | *.egg-info 13 | wheels 14 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "affx"] 2 | path = affx 3 | url = https://github.com/pantor/affx.git 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | 3 | 4 | project(griffig VERSION 0.0.11 LANGUAGES CXX) 5 | include(FetchContent) 6 | 7 | 8 | option(USE_INTERNAL_PYBIND11 "Build python module" OFF) 9 | 10 | 11 | find_package(Eigen3 REQUIRED) 12 | find_package(GLEW REQUIRED) 13 | find_package(OpenCV REQUIRED COMPONENTS core imgproc) 14 | find_package(OpenGL REQUIRED COMPONENTS EGL) 15 | find_package(Python3 REQUIRED COMPONENTS NumPy) 16 | 17 | 18 | if(USE_INTERNAL_PYBIND11) 19 | FetchContent_Declare(pybind11_fetch GIT_REPOSITORY https://github.com/pybind/pybind11.git GIT_TAG v2.6.2) 20 | FetchContent_MakeAvailable(pybind11_fetch) 21 | else() 22 | find_package(pybind11 REQUIRED) 23 | endif() 24 | 25 | 26 | add_subdirectory(affx) 27 | 28 | 29 | add_library(griffig src/griffig.cpp) 30 | target_compile_features(griffig PUBLIC cxx_std_17) 31 | target_include_directories(griffig PUBLIC include) 32 | target_link_libraries(griffig PUBLIC affx opencv_core opencv_imgproc OpenGL::EGL OpenGL::GLU GLEW::GLEW pybind11::embed Python3::NumPy) 33 | 34 | 35 | pybind11_add_module(_griffig src/python.cpp) 36 | target_link_libraries(_griffig PUBLIC griffig) 37 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM griffig.xyz/base 2 | 3 | WORKDIR griffig 4 | 5 | RUN python3 -m pip install loguru scipy==1.5 Pillow cmake 6 | 7 | COPY affx/ affx 8 | COPY griffig/ griffig 9 | COPY include/ include 10 | COPY src/ src 11 | COPY CMakeLists.txt . 12 | COPY README.md . 13 | COPY setup.py . 14 | 15 | RUN python3 -m pip install . 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Lars Berscheid 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include affx * 2 | recursive-include griffig * 3 | recursive-include include * 4 | recursive-include src * 5 | 6 | include CMakeLists.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

Griffig

3 |

4 | Robotic Manipulation Learned from Imitation and Self-Supervision. 5 |

6 |
7 |

8 | 9 | CI 10 | 11 | 12 | 13 | Issues 14 | 15 | 16 | 17 | Releases 18 | 19 | 20 | 21 | MIT 22 | 23 |

24 | 25 | Griffig is a library (in particular) for 6D robotic grasping, learned from large-scale imitation and self-supervised real-world training. Using an action-centric approach, Griffig does not need object models and requires only a simple depth camera as input. In our [model library](https://griffig.xyz), we publish models pre-trained in densly cluttered bin picking scenarios. Griffig is able to calculate grasp points with high performance (< 70ms), and is yet robust achieving grasp rates as high as 95% for typical use-cases. This repository contains the source code and corresponding library of our upcoming paper *Learning 6D Robotic Grasping using a Fully Convolutional Actor-Critic Architecture*. 26 | 27 | [
](https://griffig.xyz) 28 | 29 |

30 | You can find many videos on griffig.xyz! 31 |

32 | 33 | ## Installation 34 | 35 | Griffig is a library for Python 3.7+. You can install Griffig via [PyPI](https://pypi.org/project/griffig/) by 36 | ```bash 37 | # Install Eigen >3.3, OpenCV >3, and OpenGL (e.g. via apt) 38 | (sudo) apt install libeigen3-dev libopencv-dev libgl1-mesa-dev libglu1-mesa-dev libegl1-mesa-dev libglew-dev 39 | 40 | pip install griffig 41 | ``` 42 | Of course, a NVIDIA GPU with corresponding CUDA version is highly recommended. When building from source, you can either call `pip install .` or use CMake directly to build the C++ core of Griffig. We also provide a Docker container to get started more easily. 43 | 44 | 45 | ## Tutorial 46 | 47 | We focused on making *Griffig* easy to use! In this tutorial, we use a RGBD pointcloud of the scene to detect a 6D grasp point with an additional pre-shaped gripper stroke. We use a common parallel two-finger gripper and a RealSense D435 camera for recording. Griffig includes a small library of pre-trained models. As with all data-driven methods, make sure to match our robotic system as much as possible. The main output of Griffig is a *grasp point*. Then, the robot should move its gripper to a pre-shaped position and approach the point along a trajectory parallel to its gripper fingers. Be careful of possible collisions that might always happen in bin picking. 48 | 49 | [
](https://griffig.xyz/dataset/viewer) 50 | 51 | A typical scene looks like the color (left) and depth (right) images above. The (orthographic) images are rendered from pointclouds, and show the bin randomly filled with objects of multiple types. Now, we want to find the *best* grasp within the bin. You can find working examples in the corresponding [directory](). At first, we need to import `griffig`, generate a pointcloud, and create the main `Griffig` instance. 52 | 53 | ```python 54 | from griffig import Affine, Griffig, Gripper, Pointcloud, BoxData 55 | 56 | # Griffig requires a RGB pointcloud of the scene 57 | pointcloud = Pointcloud.fromRealsense(camera.record_pointcloud()) 58 | 59 | # Specify some options 60 | griffig = Griffig( 61 | model='two-finger', # Use the default model for a two-finger gripper 62 | gripper=Gripper( 63 | min_stroke=0.01, # [m] 64 | max_stroke=0.10, # [m], to limit pre-shaped stroke 65 | ), 66 | ) 67 | ``` 68 | 69 | Next to the model name (or a path for your own models), we input some details about the robots gripper. In particular, we limit the pre-shaped gripper stroke (or called width). We can now calculate the best possible grasp within the scene. To get the grasp in the global frame in return, we pass Griffig the camera pose of the pointcloud. 70 | 71 | ```python 72 | grasp = griffig.calculate_grasp(pointcloud, camera_pose=camera_pose) 73 | 74 | # Make use of the grasp here! 75 | print(f'Grasp at {grasp.pose} with probability {grasp.estimated_reward}) 76 | ``` 77 | The grasp pose is given as an [Affx](https://github.com/pantor/affx) affine, which is a very light wrapper around [Eigen's](https://eigen.tuxfamily.org) `Affine3d` class. On top, we can easily generate a heatmap of grasp probabilities as a PIL image to visualize our model. 78 | 79 | ```python 80 | heatmap = griffig.calculate_heatmap() 81 | heatmap.show() 82 | ``` 83 | 84 | Furthermore, we show the usage of the *Griffig* library in a few more details. 85 | 86 | 87 | ### BoxData Class 88 | 89 | We can define a box to avoid grasps outside of the bin (and even worse: grasps of the bin itself). A box can be constructed by its contour given as a polygon in the image frame. To construct a cubic box, we've simplify this method by 90 | 91 | ```python 92 | box_data = BoxData( 93 | size=(0.2, 0.3, 0.1), # (x, y, z) [m] 94 | center=(0.0, 0.0, 0.0), # At the center [m] 95 | ) 96 | ``` 97 | with the size and center position of the box. 98 | 99 | 100 | ### Gripper Class 101 | 102 | We use the gripper class for collision checks and to specify the minimum and maximum gripper stroke 103 | 104 | ```python 105 | gripper = Gripper( # Everything in [m] 106 | # Pre-shaped stroke interval 107 | min_stroke=0.01, 108 | max_stroke=0.10, 109 | # Size of a bounding box for optional collision check 110 | finger_width=0.02, # Finger width 111 | finger_extent=0.008, # Finger extent (in direction of finger opening/closing) 112 | finger_height=0.1, # Finger height from tip to gripper base 113 | # An optional offset for the local grasp pose 114 | offset=Affine(z=0.02), 115 | ) 116 | ``` 117 | 118 | ### Griffig Class 119 | 120 | The `Griffig` class is the main interface for grasp calculations. You can create a griffig instance with following arguments: 121 | 122 | ```python 123 | griffig = Griffig( 124 | model='two-finger-planar', 125 | gripper=gripper, 126 | box_data=box_data, # Might be None 127 | avoid_collisions=True, # If true, check collisions using the given pointcloud and gripper data 128 | ) 129 | ``` 130 | 131 | Griffig includes a [model library](https://griffig.xyz/model-library) for different tasks and downloads them automatically. 132 | 133 | 134 | ### Pointcloud Class 135 | 136 | Griffig uses its own pointcloud class as input to its rendering pipeline. Currently, three possible inputs are supported: 137 | 138 | ```python 139 | # (1) Input from a realsense frame 140 | pointcloud = Pointcloud(realsense_frame=<...>) 141 | 142 | # (2) Input from a ROS Pointcloud2 message 143 | pointcloud = Pointcloud(ros_message=<...>) 144 | 145 | # (3) The raw pointer variant... 146 | pointcloud = Pointcloud(type=PointType.XYZRGB, data=cloud_data.ptr()) 147 | 148 | # Then, we can render the pointcloud as a PIL image 149 | image = griffig.render(pointcloud) 150 | image.show() 151 | ``` 152 | Note that the pointcloud does only store the pointer to the data, but doesn't hold anything. 153 | 154 | 155 | ### Grasp Class 156 | 157 | The calculated grasp contains - of course - information about its grasp pose, but also some more details. At first, we calculate the next grasp based on the current pointcloud input. 158 | 159 | ```python 160 | grasp = griffig.calculate_grasp(pointcloud, camera_pose=camera_pose) # Get grasp in the global frame using the camera pose 161 | 162 | print(f'Calculated grasp {grasp} in {grasp.calculation_duration} [ms].') # Calculation duration in [ms] 163 | ``` 164 | 165 | If using a GPU, the grasp calculation should not take longer than a few 100ms, and most probably below 70ms! Then, a typical grasp pipeline would look like this: 166 | 167 | ```python 168 | # (1) Check if the grasp reward is below a user-defined threshold 169 | if grasp.estimated_reward < 0.2: # Grasp probability in [0, 1] 170 | print('The bin is probably empty!') 171 | 172 | approach_start = grasp.pose * Affine(z=-0.12) # Approx. finger length [m] 173 | 174 | # (2) Move robot to start of approach trajectory 175 | robot.move_linear(approach_start) 176 | 177 | # (3) Move gripper to pre-shaped grasp stroke 178 | robot.move_gripper(grasp.pose.d) 179 | 180 | # (4) Move robot to actual grasp pose 181 | robot_move_linear(grasp.pose) 182 | 183 | # (5) And finally close the gripper 184 | robot.close_gripper() 185 | ``` 186 | 187 | The robot should have grasped something! If something went wrong, make sure to call `griffig.report_grasp_failure()`, so that griffig will output a different grasp next time. 188 | 189 | 190 | ## Development 191 | 192 | Griffig is written in C++17 and Python 3.7 (or higher). It is tested on Ubuntu 20.04 against following dependency versions: 193 | 194 | - OpenCV 4.5 195 | - Eigen 3.3.9 196 | - PyBind11 2.6 197 | - TensorFlow 2.4 198 | 199 | To build the docker image, call `docker build .`. 200 | 201 | 202 | ## License 203 | 204 | Griffig is published under the permissive MIT license. Feel free to contact us in case of commercial interest. 205 | -------------------------------------------------------------------------------- /doc/base-dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.4.1 2 | 3 | ENV TZ=Europe/Berlin 4 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone 5 | 6 | # Install Git (latest version) 7 | RUN apt-get update && apt-get install -y software-properties-common 8 | RUN add-apt-repository ppa:git-core/ppa 9 | RUN apt-get update && apt-get install -y git 10 | 11 | # Install Cmake 12 | RUN python3 -m pip install cmake 13 | 14 | # Install OpenCV 15 | RUN git clone https://github.com/opencv/opencv.git &&\ 16 | cd opencv &&\ 17 | git checkout 4.5.2 &&\ 18 | mkdir build && cd build &&\ 19 | cmake -DWITH_VTK=OFF -DWITH_GTK=OFF -DWITH_PROTOBUF=OFF -DBUILD_PERF_TESTS=OFF -DBUILD_TESTS=OFF -DWITH_WEBP=OFF .. &&\ 20 | make &&\ 21 | make install &&\ 22 | cd ../../ 23 | 24 | # Install PyBind11 25 | RUN git clone https://github.com/pybind/pybind11.git &&\ 26 | cd pybind11 &&\ 27 | git checkout v2.6.2 &&\ 28 | mkdir build && cd build &&\ 29 | cmake -DPYBIND11_TEST=OFF .. &&\ 30 | make &&\ 31 | make install &&\ 32 | cd ../../ 33 | 34 | # Install EGL 35 | RUN apt-get install -y libgl1-mesa-dev libglu1-mesa-dev libgles2-mesa-dev libglew-dev libeigen3-dev 36 | 37 | # Install Python3 Numpy C files 38 | RUN apt-get install -y python3-numpy 39 | -------------------------------------------------------------------------------- /doc/input.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pantor/griffig/0b10ef5d69902b14a4d648a809a51933a8f5fe8a/doc/input.jpeg -------------------------------------------------------------------------------- /doc/systemnew-sm.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pantor/griffig/0b10ef5d69902b14a4d648a809a51933a8f5fe8a/doc/systemnew-sm.JPG -------------------------------------------------------------------------------- /examples/grasp_depth_image.py: -------------------------------------------------------------------------------- 1 | from griffig import BoxData, Griffig 2 | 3 | from loader import Loader 4 | 5 | 6 | if __name__ == '__main__': 7 | box_data = BoxData([-0.002, -0.0065, 0.0], [0.174, 0.282, 0.0]) 8 | griffig = Griffig('two-finger-planar', gpu=0) 9 | 10 | image = Loader.get_image('1') 11 | 12 | # Calculate grasp multiple times to fully load NN the first time 13 | for _ in range(3): 14 | grasp = griffig.calculate_grasp_from_image(image, box_data) 15 | 16 | print(grasp) 17 | print(f'Calculation duration: {grasp.calculation_duration*1000:0.2f} [ms], details: {grasp.detail_durations}') 18 | 19 | img = griffig.draw_grasp_on_image(image, grasp, channels='RGB') 20 | img.show() 21 | -------------------------------------------------------------------------------- /examples/grasp_realsense.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | import pyrealsense2 as rs 4 | 5 | from griffig import Griffig, Gripper, BoxData, Pointcloud 6 | from loader import Loader 7 | 8 | 9 | class RealsenseReader: 10 | def __init__(self, bag_file): 11 | self.bag_file = bag_file 12 | self.colorizer = rs.colorizer() 13 | self.pipeline = rs.pipeline() 14 | self.config = rs.config() 15 | rs.config.enable_device_from_file(self.config, self.bag_file) 16 | self.config.enable_stream(rs.stream.depth, 1280, 720, rs.format.any, 30) 17 | self.config.enable_stream(rs.stream.color, 1280, 720, rs.format.any, 30) 18 | 19 | def __del__(self): 20 | self.config.disable_all_streams() 21 | 22 | def wait_for_frames(self): 23 | self.pipeline.start(self.config) 24 | return self.pipeline.wait_for_frames() 25 | 26 | 27 | if __name__ == '__main__': 28 | parser = ArgumentParser() 29 | parser.add_argument('-i', '--input', type=str, default=str(Loader.data_path / '20210318_110437.bag')) 30 | args = parser.parse_args() 31 | 32 | box_data = BoxData( 33 | center=(0.0, 0.017, 0.0), # At the center [m] 34 | size=(0.18, 0.285, 0.1), # (x, y, z) [m] 35 | ) 36 | 37 | gripper = Gripper( # Some information about the gripper 38 | min_stroke=0.01, # Min. pre-shaped width in [m] 39 | max_stroke=0.10, # Max. pre-shaped width in [m] 40 | ) 41 | 42 | griffig = Griffig( 43 | model='two-finger-planar', # Use the default model for a two-finger gripper 44 | gripper=gripper, 45 | box_data=box_data, 46 | typical_camera_distance=0.41, # Typical distance between camera and bin (used for depth image rendering) [m] 47 | ) 48 | 49 | realsense = RealsenseReader(args.input) 50 | frames = realsense.wait_for_frames() 51 | 52 | pointcloud = Pointcloud(realsense_frames=frames) 53 | 54 | # Return image (depth channel) with grasp for visualization 55 | grasp, image = griffig.calculate_grasp(pointcloud, return_image=True, channels='D') 56 | print(grasp) 57 | image.show() 58 | -------------------------------------------------------------------------------- /examples/loader.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import cv2 4 | import numpy as np 5 | 6 | from griffig import OrthographicImage 7 | 8 | 9 | class Loader: 10 | data_path = Path(__file__).parent.absolute().parent / 'test' / 'data' 11 | 12 | @classmethod 13 | def get_image(cls, episode_id: str): 14 | mat_color = cv2.imread(str(cls.data_path / f'{episode_id}-rc.jpeg'), cv2.IMREAD_UNCHANGED).astype(np.uint16) * 255 15 | mat_depth = cv2.imread(str(cls.data_path / f'{episode_id}-rd.jpeg'), cv2.IMREAD_UNCHANGED).astype(np.uint16) * 255 16 | mat_depth = cv2.cvtColor(mat_depth, cv2.COLOR_BGR2GRAY) 17 | 18 | mat = np.concatenate([mat_color, np.expand_dims(mat_depth, axis=2)], axis=2) 19 | return OrthographicImage(mat, 2000.0, 0.22, 0.41) 20 | -------------------------------------------------------------------------------- /examples/render_heatmap.py: -------------------------------------------------------------------------------- 1 | from griffig import BoxData, Griffig 2 | 3 | from loader import Loader 4 | 5 | 6 | if __name__ == '__main__': 7 | box_data = BoxData([-0.002, -0.0065, 0.0], [0.174, 0.282, 0.0]) 8 | griffig = Griffig('two-finger-planar', gpu=0) 9 | 10 | image = Loader.get_image('1') 11 | heatmap = griffig.calculate_heatmap_from_image(image, box_data) 12 | heatmap.show() 13 | -------------------------------------------------------------------------------- /examples/render_pointcloud.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import rospy 4 | from sensor_msgs.msg import PointCloud2 5 | 6 | from griffig import Griffig, Pointcloud, Renderer 7 | 8 | 9 | if __name__ == '__main__': 10 | base = Path(__file__).parent 11 | rospy.init_node('pointcloud_renderer', anonymous=True) 12 | 13 | # Orthographic image renderer: (width, height), typical distance [m], pixel density [px/m], depth difference [m] 14 | renderer = Renderer((752, 480), 0.41, 2000.0, 0.19) 15 | 16 | pointcloud_message = rospy.wait_for_message('/camera/depth/color/points', PointCloud2) 17 | 18 | pointcloud = Pointcloud(ros_message=pointcloud_message) 19 | print(f'Pointcloud has {pointcloud.size} points') 20 | 21 | image = renderer.render_pointcloud(pointcloud) 22 | pillow_image = Griffig.convert_to_pillow_image(image, channels='RGBD') 23 | pillow_image.save(base / 'data' / 'image-rendered.png') 24 | -------------------------------------------------------------------------------- /griffig/__init__.py: -------------------------------------------------------------------------------- 1 | from pyaffx import Affine 2 | 3 | from _griffig import ( 4 | BoxData, 5 | Grasp, 6 | Gripper, 7 | OrthographicImage, 8 | PointType, 9 | Pointcloud, 10 | Renderer, 11 | RobotPose, 12 | ) 13 | 14 | from .griffig import Griffig 15 | from .infer.inference import Inference 16 | from .utility.heatmap import Heatmap 17 | from .utility.model_data import ModelData, ModelArchitecture 18 | from .utility.model_library import ModelLibrary 19 | -------------------------------------------------------------------------------- /griffig/action/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pantor/griffig/0b10ef5d69902b14a4d648a809a51933a8f5fe8a/griffig/action/__init__.py -------------------------------------------------------------------------------- /griffig/action/checker.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | from pyaffx import Affine 5 | from _griffig import BoxData, Grasp, Gripper, OrthographicImage, RobotPose 6 | from ..utility.image import get_area_of_interest 7 | 8 | 9 | class Checker: 10 | def __init__(self, converter, lateral_grasps=False, avoid_collisions=True): 11 | self.converter = converter 12 | self.lateral_grasps = lateral_grasps 13 | self.avoid_collisions = avoid_collisions 14 | 15 | def find_grasp(self, grasp_gen, image: OrthographicImage, box_data: BoxData = None, gripper: Gripper = None): 16 | grasp = next(grasp_gen) 17 | 18 | while not self.check(grasp, image, box_data, gripper): 19 | try: 20 | grasp = next(grasp_gen) 21 | except StopIteration: 22 | return None 23 | 24 | grasp_gen.close() 25 | 26 | if gripper: 27 | grasp.pose = grasp.pose * gripper.offset 28 | 29 | return grasp 30 | 31 | def check(self, grasp: Grasp, image: OrthographicImage, box_data: BoxData = None, gripper: Gripper = None): 32 | self.converter.index_to_action(grasp) 33 | 34 | area_size = (0.1, 0.1) # [m] 35 | image_area = get_area_of_interest(image, grasp.pose, (int(image.pixel_size * area_size[0]), int(image.pixel_size * area_size[1]))) 36 | 37 | self.converter.calculate_z(image_area, grasp) 38 | 39 | if self.lateral_grasps: 40 | self.converter.calculate_b(image_area, grasp) 41 | self.converter.calculate_c(image_area, grasp) 42 | 43 | is_safe = np.isfinite([grasp.pose.x, grasp.pose.y, grasp.pose.z]).all() 44 | 45 | if box_data: 46 | is_safe &= box_data.is_pose_inside(RobotPose(grasp.pose, d=grasp.stroke)) 47 | 48 | if self.avoid_collisions: 49 | pass # Do the gripper rendering here 50 | 51 | return is_safe 52 | -------------------------------------------------------------------------------- /griffig/action/converter.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | from ..utility.image import crop 5 | 6 | 7 | class Converter: 8 | def __init__(self, gripper_strokes, z_offset=0.0): 9 | self.gripper_strokes = gripper_strokes 10 | self.z_offset = z_offset 11 | 12 | self.gripper_width = 0.016 # [m] 13 | self.gripper_height = 0.004 # [m] 14 | 15 | self.max_lateral_angle = 0.5 # [rad] 16 | self.image_area_size = 0.12 # [m] 17 | 18 | def __len__(self): 19 | return len(self.gripper_strokes) 20 | 21 | def index_to_action(self, grasp): 22 | grasp.stroke = self.gripper_strokes[grasp.index] 23 | 24 | def consider_indices(self, gripper): 25 | arr = np.array(self.gripper_strokes) 26 | return (gripper.min_stroke <= arr) & (arr <= gripper.max_stroke) 27 | 28 | def calculate_z(self, image_area, grasp): 29 | """Model-based calculation of the grasp distance z""" 30 | 31 | mat_area_image = image_area.mat[:, :, 3] 32 | mat_area_image = mat_area_image.astype(np.float32) / np.iinfo(image_area.mat.dtype).max 33 | mat_area_image[mat_area_image < 0.02] = np.NaN # Make every not found pixel NaN 34 | 35 | # Get distance at gripper for possible collisions 36 | area_center_default_size = 0.012 # [m] 37 | area_center_default_size_px = image_area.pixel_size * area_center_default_size 38 | 39 | gripper_one_side_size = 0.5 * image_area.pixel_size * (grasp.stroke + 0.002) # [px] 40 | area_center = crop(mat_area_image, (area_center_default_size_px, area_center_default_size_px)) 41 | side_gripper_image_size = (image_area.pixel_size * 0.025, image_area.pixel_size * 0.025) 42 | area_left = crop(mat_area_image, side_gripper_image_size, (-gripper_one_side_size, 0)) 43 | area_right = crop(mat_area_image, side_gripper_image_size, (gripper_one_side_size, 0)) 44 | 45 | z_raw = image_area.depth_from_value(np.nanmedian(area_center) * 255 * 255) 46 | if z_raw is np.NaN: 47 | area_center = crop(mat_area_image, (image_area.pixel_size * 0.03, image_area.pixel_size * 0.03)) 48 | z_raw = image_area.depth_from_value(np.nanmedian(area_center) * 255 * 255) 49 | 50 | if z_raw is np.NaN: 51 | print('[converter] z is NaN!') 52 | 53 | analyze_collision = True and area_left.size > 0 and area_right.size > 0 54 | 55 | if analyze_collision: 56 | z_raw_left = image_area.depth_from_value(np.nanmin(area_left) * 255 * 255) 57 | z_raw_right = image_area.depth_from_value(np.nanmin(area_right) * 255 * 255) 58 | z_raw_collision = min(z_raw_left, z_raw_right) - 0.03 # [m] 59 | else: 60 | z_raw_collision = np.Inf 61 | 62 | grasp.pose.z = min(z_raw, z_raw_collision) + self.z_offset # Get the maximum [m] for impedance mode 63 | 64 | def calculate_b(self, image_area, grasp): 65 | b_lateral_default_size_px_height = int(image_area.pixel_size * self.image_area_size) 66 | 67 | b_gripper_height = int(image_area.pixel_size * self.gripper_width) 68 | b_gripper_width = int(image_area.pixel_size * self.gripper_height) 69 | 70 | b_lateral_default_size_px_width = image_area.pixel_size * (action.pose.d + self.gripper_height) # [px] 71 | b_lateral_area = crop(image_area, (b_lateral_default_size_px_height, b_lateral_default_size_px_width)) / 255 72 | 73 | b_lateral_area_left = b_lateral_area[:, :2*b_gripper_width] / 255 74 | b_lateral_area_right = b_lateral_area[:, -2*b_gripper_width:] / 255 75 | 76 | values_left = np.gradient(b_lateral_area_left, axis=0) 77 | values_right = np.gradient(b_lateral_area_right, axis=0) 78 | 79 | weights_left = np.ones_like(values_left) 80 | weights_left[np.isnan(values_left)] = np.nan 81 | 82 | weights_right = np.ones_like(values_right) 83 | weights_right[np.isnan(values_right)] = np.nan 84 | 85 | a = np.linspace(-self.image_area_size / 2, self.image_area_size / 2, b_lateral_area.shape[0]) 86 | sigma = 1.3 * self.gripper_width 87 | s = 0.042 * np.exp(-0.5 * np.power(a / sigma, 2.)) / (np.sqrt(2 * np.pi) * sigma) 88 | s = np.expand_dims(s, -1) 89 | weights_left *= s 90 | weights_right *= s 91 | 92 | weighted_gradient = np.nansum(weights_left * values_left) / weights_left.size + np.nansum(weights_right * values_right) / weights_right.size 93 | 94 | new_b = np.arctan2((image_area.max_depth - image_area.min_depth) * 0.5 * weighted_gradient, 1. / image_area.pixel_size) 95 | grasp.pose.b = np.clip(new_b, -self.max_lateral_angle, self.max_lateral_angle) 96 | 97 | def calculate_c(self, image_area, grasp): 98 | c_lateral_default_size_px_height = int(image_area.pixel_size * 0.016 / 2) 99 | c_lateral_default_size_px_width = image_area.pixel_size * (action.pose.d + 0.004) # [px] 100 | c_lateral_area = crop(image_area, (c_lateral_default_size_px_height, c_lateral_default_size_px_width)) / 255 101 | 102 | mask = np.isnan(c_lateral_area).astype(np.uint8) 103 | 104 | # Make distance constant 2cm below the grasp point 105 | min_value_for_gradient = image_area.value_from_depth(-action.pose.z + 0.04) / (255 * 255) 106 | lateral_area_pre_inpaint = c_lateral_area.astype(np.uint8) 107 | lateral_area_pre_inpaint[lateral_area_pre_inpaint < 255 * min_value_for_gradient] = 255 * min_value_for_gradient 108 | c_lateral_area_inpaint = cv2.inpaint(lateral_area_pre_inpaint, mask, 3, cv2.INPAINT_TELEA) / 255 109 | 110 | gradient = np.mean(np.diff(c_lateral_area_inpaint, axis=1), axis=0) 111 | 112 | left_gradient_max = np.max(gradient) 113 | right_gradient_max = np.min(gradient) 114 | 115 | gradient_norm = (image_area.max_depth - image_area.min_depth) 116 | left_gamma_1 = np.pi / 2 - np.arctan2(gradient_norm * left_gradient_max, 1. / image_area.pixel_size) 117 | right_gamma_1 = -np.pi / 2 - np.arctan2(gradient_norm * right_gradient_max, 1. / image_area.pixel_size) 118 | 119 | gamma_mean = np.arctan2(gradient_norm * np.mean(gradient), 1. / image_area.pixel_size) 120 | 121 | certainty_parameter = 0.0 122 | eps = 0.03 123 | if left_gamma_1 < eps and right_gamma_1 > -eps: 124 | new_c = (left_gamma_1 + right_gamma_1 + 2 * gamma_mean) / 4 125 | elif left_gamma_1 >= eps and right_gamma_1 <= -eps: 126 | new_c = gamma_mean 127 | elif left_gamma_1 <= eps: 128 | new_c = ((1 - certainty_parameter) * right_gamma_1 + gamma_mean) / 2 129 | else: 130 | new_c = ((1 - certainty_parameter) * -left_gamma_1 + gamma_mean) / 2 131 | 132 | grasp.pose.c = np.clip(new_c, -self.max_lateral_angle, self.max_lateral_angle) 133 | -------------------------------------------------------------------------------- /griffig/griffig.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | from pathlib import Path 3 | 4 | import cv2 5 | import numpy as np 6 | from PIL import Image 7 | 8 | from _griffig import BoxData, Gripper, Pointcloud, Renderer, RobotPose 9 | from .action.checker import Checker 10 | from .action.converter import Converter 11 | from .infer.inference import Inference 12 | from .infer.selection import Method, Max, Top 13 | from .utility.heatmap import Heatmap 14 | from .utility.image import draw_around_box, draw_pose 15 | from .utility.model_library import ModelData, ModelLibrary, ModelArchitecture 16 | 17 | 18 | class Griffig: 19 | def __init__( 20 | self, 21 | model: Union[str, ModelData, Path] ='two-finger-planar', 22 | gripper: Gripper = None, 23 | box_data: BoxData = None, 24 | typical_camera_distance: int = None, 25 | avoid_collisions=False, 26 | gpu: int = None, 27 | verbose = 0, 28 | ): 29 | self.gripper = gripper 30 | self.box_data = box_data 31 | 32 | self.model_data = model if isinstance(model, ModelData) else ModelLibrary.load_model_data(model) 33 | self.inference = Inference.create(self.model_data, gpu=gpu, verbose=verbose) 34 | 35 | self.converter = Converter(self.model_data.gripper_widths) 36 | self.checker = Checker(self.converter, avoid_collisions=avoid_collisions) 37 | 38 | self.typical_camera_distance = typical_camera_distance if typical_camera_distance is not None else 0.5 39 | 40 | if box_data: 41 | self.renderer = Renderer(box_data, self.typical_camera_distance, self.model_data.pixel_size, self.model_data.depth_diff) 42 | else: 43 | self.renderer = Renderer((752, 480), self.typical_camera_distance, self.model_data.pixel_size, self.model_data.depth_diff) 44 | 45 | self.last_grasp_successful = True 46 | 47 | def render(self, pointcloud: Pointcloud, pixel_size=None, min_depth=None, max_depth=None, position=[0.0, 0.0, 0.0]): 48 | pixel_size = pixel_size if pixel_size is not None else self.model_data.pixel_size 49 | min_depth = min_depth if min_depth is not None else self.typical_camera_distance - self.model_data.depth_diff 50 | max_depth = max_depth if max_depth is not None else self.typical_camera_distance 51 | 52 | img = self.renderer.render_pointcloud_mat(pointcloud, pixel_size, min_depth, max_depth, position) 53 | return self.convert_to_pillow_image(img) 54 | 55 | def calculate_grasp(self, pointcloud: Pointcloud, camera_pose=None, box_data=None, gripper=None, method=None, return_image=False, channels='RGBD'): 56 | image = self.renderer.render_pointcloud(pointcloud) 57 | grasp = self.calculate_grasp_from_image(image, box_data=box_data, gripper=gripper, method=method) 58 | 59 | if return_image: 60 | image = image.clone() 61 | return grasp, self.draw_grasp_on_image(image, grasp, channels=channels) 62 | return grasp 63 | 64 | def calculate_grasp_from_image(self, image, box_data=None, gripper=None, method=None): 65 | box_data = box_data if box_data else self.box_data 66 | gripper = gripper if gripper else self.gripper 67 | selection_method = method if method else (Max() if self.last_grasp_successful else Top(5)) 68 | 69 | action_generator = self.inference.infer(image, selection_method, box_data=box_data) 70 | grasp = self.checker.find_grasp(action_generator, image, box_data=box_data, gripper=self.gripper) 71 | self.last_grasp_successful = True 72 | return grasp 73 | 74 | def calculate_heatmap(self, pointcloud: Pointcloud, box_data: BoxData = None, a_space=None): 75 | pixel_size = self.model_data.pixel_size 76 | min_depth = self.typical_camera_distance - self.model_data.depth_diff 77 | max_depth = self.typical_camera_distance 78 | position = [0.0, 0.0, 0.0] 79 | 80 | img = self.renderer.render_pointcloud_mat(pointcloud, pixel_size, min_depth, max_depth, position) 81 | return self.calculate_heatmap_from_image(img, box_data, a_space) 82 | 83 | def calculate_heatmap_from_image(self, image, box_data: BoxData = None, a_space=None): 84 | a_space = a_space if a_space is not None else [0.0] 85 | heatmapper = Heatmap(self.inference, a_space=a_space) 86 | heatmap = heatmapper.render(image, box_data=box_data) 87 | return Image.fromarray((heatmap[:, :, 2::-1]).astype(np.uint8)) 88 | 89 | def report_grasp_failure(self): 90 | self.last_grasp_successful = False 91 | 92 | @classmethod 93 | def convert_to_pillow_image(cls, image, channels='RGBD'): 94 | mat = cv2.convertScaleAbs(cv2.cvtColor(image.mat, cv2.COLOR_BGRA2RGBA), alpha=(255.0/65535.0)).astype(np.uint8) 95 | pillow_image = Image.fromarray(mat, 'RGBA') 96 | if channels == 'RGB': 97 | return pillow_image.convert('RGB') 98 | elif channels == 'D': 99 | return pillow_image.getchannel('A') 100 | return pillow_image 101 | 102 | @classmethod 103 | def draw_box_on_image(cls, image, box_data, draw_lines=False, channels='RGBD'): 104 | draw_around_box(image, box_data, draw_lines) 105 | return cls.convert_to_pillow_image(image, channels) 106 | 107 | @classmethod 108 | def draw_grasp_on_image(cls, image, grasp, channels='RGBD', convert_to_rgb=True): 109 | if channels == 'D' and convert_to_rgb: 110 | image.mat = cv2.cvtColor(image.mat[:, :, 3], cv2.COLOR_GRAY2RGB) 111 | channels = 'RGB' 112 | 113 | draw_pose(image, RobotPose(grasp.pose, d=grasp.stroke)) 114 | return cls.convert_to_pillow_image(image, channels) 115 | -------------------------------------------------------------------------------- /griffig/infer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pantor/griffig/0b10ef5d69902b14a4d648a809a51933a8f5fe8a/griffig/infer/__init__.py -------------------------------------------------------------------------------- /griffig/infer/inference.py: -------------------------------------------------------------------------------- 1 | from ..utility.model_data import ModelArchitecture 2 | from ..infer.inference_actor_critic import InferenceActorCritic 3 | from ..infer.inference_planar import InferencePlanar 4 | from ..infer.inference_semantic import InferencePlanarSemantic 5 | 6 | 7 | class Inference: 8 | """A factory class for inference""" 9 | 10 | @classmethod 11 | def create(cls, model_data, *params, **kwargs): 12 | if ModelArchitecture(model_data.architecture) == ModelArchitecture.ModelBasedConvolution: 13 | raise Exception('Architecture model based convolution is not implemented!') 14 | 15 | if ModelArchitecture(model_data.architecture) == ModelArchitecture.PlanarSemantic: 16 | return InferencePlanarSemantic(model_data, *params, **kwargs) 17 | 18 | if ModelArchitecture(model_data.architecture) == ModelArchitecture.ActorCritic: 19 | return InferenceActorCritic(model_data, *params, **kwargs) 20 | 21 | return InferencePlanar(model_data, *params, **kwargs) 22 | -------------------------------------------------------------------------------- /griffig/infer/inference_actor_critic.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | 3 | import numpy as np 4 | from scipy.ndimage import gaussian_filter 5 | 6 | from _griffig import BoxData, Grasp, Gripper, OrthographicImage 7 | from ..infer.inference_base import InferenceBase 8 | from ..infer.selection import Method, Max 9 | 10 | 11 | class InferenceActorCritic(InferenceBase): 12 | def infer(self, image: OrthographicImage, method: Method, box_data: BoxData = None, gripper: Gripper = None): 13 | start = time() 14 | 15 | input_images = self.get_input_images(image, box_data) 16 | 17 | pre_duration = time() - start 18 | start = time() 19 | 20 | rewards_and_actions = self.model(input_images) 21 | 22 | nn_duration = time() - start 23 | start = time() 24 | 25 | estimated_rewards = np.array(rewards_and_actions[::2]) 26 | action_from_actor = np.array(rewards_and_actions[1::2]) 27 | 28 | if self.model_data.output[0] == 'reward+human': 29 | estimated_rewards = estimated_rewards[:, :, :, :, :4] 30 | 31 | if gripper: 32 | possible_indices = gripper.consider_indices(self.model_data.gripper_widths) 33 | for i in range(estimated_rewards.shape[0]): 34 | self.set_last_dim_to_zero(estimated_rewards[i], np.invert(possible_indices)) 35 | 36 | if self.gaussian_sigma: 37 | for i in range(estimated_rewards.shape[0]): 38 | for j in range(estimated_rewards.shape[1]): 39 | estimated_rewards[i][j] = gaussian_filter(estimated_rewards[i][j], self.gaussian_sigma) 40 | 41 | for _ in range(estimated_rewards.size): 42 | index_raveled = method(estimated_rewards) 43 | index = np.unravel_index(index_raveled, estimated_rewards.shape) 44 | 45 | action = Grasp() 46 | action.index = index[4] 47 | action.pose = self.pose_from_index(index[1:], estimated_rewards.shape[1:], image) 48 | action.pose.z, action.pose.b, action.pose.c = action_from_actor[index[0], index[1], index[2], index[3]] 49 | action.estimated_reward = estimated_rewards[index] 50 | action.detail_durations = { 51 | 'pre': pre_duration, 52 | 'nn': nn_duration, 53 | 'post': time() - start, 54 | } 55 | action.calculation_duration = sum(action.detail_durations.values()) 56 | 57 | yield action 58 | 59 | method.disable(index, estimated_rewards) 60 | return 61 | -------------------------------------------------------------------------------- /griffig/infer/inference_base.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import os 3 | 4 | import cv2 5 | from loguru import logger 6 | import numpy as np 7 | import tensorflow.keras as tk 8 | 9 | from pyaffx import Affine 10 | from _griffig import BoxData, RobotPose, OrthographicImage 11 | from ..utility.image import draw_around_box, draw_around_box2, get_inference_image, get_box_projection 12 | 13 | 14 | class InferenceBase: 15 | def __init__(self, model_data, gaussian_sigma=None, gpu: int = None, seed: int = None, verbose=0): 16 | self.model_data = model_data 17 | self.model = self._load_model(model_data.path, 'grasp', gpu=gpu) 18 | self.gaussian_sigma = gaussian_sigma 19 | self.rs = np.random.default_rng(seed=seed) 20 | self.verbose = verbose 21 | 22 | self.size_area_cropped = model_data.size_area_cropped 23 | self.size_result = model_data.size_result 24 | self.scale_factors = (self.size_area_cropped[0] / self.size_result[0], self.size_area_cropped[1] / self.size_result[1]) 25 | self.a_space = np.linspace(-np.pi/2 + 0.1, np.pi/2 - 0.1, 20) # [rad] # Don't use a=0.0 -> even number 26 | self.keep_indixes = None 27 | 28 | def _load_model(self, path: Path, submodel=None, gpu=None): 29 | if os.getenv('GRIFFIG_HARDWARE') == 'jetson-nano': 30 | import tensorflow as tf 31 | 32 | logger.info('Detected NVIDIA Jetson Nano Platform') 33 | device = tf.config.list_physical_devices('GPU') 34 | tf.config.experimental.set_memory_growth(device[0], True) 35 | tf.config.experimental.set_virtual_device_configuration(device[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=512)]) 36 | 37 | elif gpu is not None: 38 | import tensorflow as tf 39 | 40 | devices = tf.config.list_physical_devices('GPU') 41 | tf.config.experimental.set_visible_devices(devices[gpu], 'GPU') 42 | for device in devices: 43 | tf.config.experimental.set_memory_growth(device, True) 44 | 45 | model = tk.models.load_model(path, compile=False) 46 | 47 | if submodel: 48 | model = model.get_layer(submodel) 49 | 50 | # TensorRT 51 | use_tensorrt = os.getenv('GRIFFIG_HARDWARE') == 'jetson-nano' 52 | if use_tensorrt: 53 | import tensorflow as tf 54 | from tensorflow.python.compiler.tensorrt import trt_convert as trt 55 | 56 | converted_path = path / 'converted' 57 | 58 | if not converted_path.exists(): 59 | submodel_path = path / 'submodel' 60 | model.save(str(submodel_path)) 61 | 62 | logger.info('Convert to TensorRT') 63 | conversion_params = trt.TrtConversionParams( 64 | precision_mode=trt.TrtPrecisionMode.FP16, 65 | # max_batch_size=32, 66 | ) 67 | 68 | def my_input_fn(): 69 | # Let's assume a network with 2 input tensors. We generate 3 sets 70 | # of dummy input data: 71 | input_shapes = [[(20, 110, 110, 4)], # min and max range for 1st input list 72 | [(20, 110, 110, 4)], # min and max range for 2nd list of two tensors 73 | [(20, 110, 110, 4)]] # 3rd input list 74 | for shapes in input_shapes: 75 | # return a list of input tensors 76 | yield [np.zeros(x).astype(np.float32) for x in shapes] 77 | 78 | converter = trt.TrtGraphConverterV2( 79 | input_saved_model_dir=str(submodel_path), 80 | conversion_params=conversion_params 81 | ) 82 | converter.convert() 83 | converter.build(input_fn=my_input_fn) 84 | 85 | converter.save(str(converted_path)) 86 | 87 | root = tf.saved_model.load(str(converted_path)) 88 | func = root.signatures['serving_default'] 89 | 90 | def predict(x): 91 | x = tf.convert_to_tensor(x) 92 | output = [y.numpy() for y in func(x).values()] 93 | 94 | if len(output) == 1: 95 | return output[0] 96 | return output 97 | return predict 98 | 99 | def predict(x): 100 | return model.predict_on_batch(x) 101 | return predict 102 | 103 | def _get_size_cropped(self, image, box_data: BoxData): 104 | box_projection = get_box_projection(image, box_data) 105 | center = np.array([image.mat.shape[1], image.mat.shape[0]]) / 2 106 | farthest_corner = np.max(np.linalg.norm(box_projection - center, axis=1)) 107 | side_length = int(np.ceil(2 * farthest_corner * self.size_result[0] / self.size_area_cropped[0])) 108 | return (side_length, side_length) 109 | 110 | def pose_from_index(self, index, index_shape, image: OrthographicImage, resolution_factor=2.0): 111 | return Affine( 112 | x=resolution_factor * self.scale_factors[0] * image.position_from_index(index[1], index_shape[1]), 113 | y=resolution_factor * self.scale_factors[1] * image.position_from_index(index[2], index_shape[2]), 114 | a=self.a_space[index[0]], 115 | ).inverse() 116 | 117 | def get_input_images(self, orig_image, box_data: BoxData): 118 | image = orig_image.clone() 119 | size_cropped = self._get_size_cropped(orig_image, box_data) 120 | 121 | if box_data: 122 | draw_around_box2(image, box_data) 123 | 124 | result_ = [get_inference_image(image, Affine(a=a), size_cropped, self.size_area_cropped, self.size_result, return_mat=True) for a in self.a_space] 125 | 126 | if self.verbose: 127 | cv2.imwrite('/tmp/test-input-c.png', result_[10][:, :, 3]) 128 | cv2.imwrite('/tmp/test-input-d.png', result_[10][:, :, 3]) 129 | 130 | result = np.array(result_, dtype=np.float32) / np.iinfo(image.mat.dtype).max 131 | if len(result.shape) == 3: 132 | result = np.expand_dims(result, axis=-1) 133 | 134 | return result 135 | 136 | @classmethod 137 | def keep_array_at_last_indixes(cls, array, indixes) -> None: 138 | mask = np.zeros(array.shape) 139 | mask[:, :, :, indixes] = 1 140 | array *= mask 141 | 142 | @classmethod 143 | def set_last_dim_to_zero(cls, array, indixes): 144 | array[:, :, :, indixes] = 0 145 | -------------------------------------------------------------------------------- /griffig/infer/inference_planar.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | 3 | import cv2 4 | import numpy as np 5 | from scipy.ndimage import gaussian_filter 6 | 7 | from _griffig import BoxData, Grasp, Gripper, OrthographicImage 8 | from ..infer.inference_base import InferenceBase 9 | from ..infer.selection import Method, Max 10 | from ..utility.image import draw_around_box2, get_box_projection, get_inference_image 11 | 12 | 13 | class InferencePlanar(InferenceBase): 14 | def infer(self, image: OrthographicImage, method: Method, box_data: BoxData = None, gripper: Gripper = None): 15 | start = time() 16 | input_images = self.get_input_images(image, box_data) 17 | 18 | pre_duration = time() - start 19 | start = time() 20 | 21 | estimated_reward = self.model(input_images) 22 | 23 | nn_duration = time() - start 24 | start = time() 25 | 26 | if self.gaussian_sigma: 27 | for i in range(estimated_reward.shape[0]): 28 | estimated_reward[i] = gaussian_filter(estimated_reward[i], self.gaussian_sigma) 29 | 30 | if gripper: 31 | possible_indices = gripper.consider_indices(self.model_data.gripper_widths) 32 | self.set_last_dim_to_zero(estimated_reward, np.invert(possible_indices)) 33 | 34 | for _ in range(estimated_reward.size): 35 | index_raveled = method(estimated_reward) 36 | index = np.unravel_index(index_raveled, estimated_reward.shape) 37 | 38 | action = Grasp() 39 | action.index = index[3] 40 | action.pose = self.pose_from_index(index, estimated_reward.shape, image) 41 | action.pose.z = np.nan 42 | action.estimated_reward = estimated_reward[index] 43 | action.detail_durations = { 44 | 'pre': pre_duration, 45 | 'nn': nn_duration, 46 | 'post': time() - start, 47 | } 48 | action.calculation_duration = sum(action.detail_durations.values()) 49 | 50 | yield action 51 | 52 | method.disable(index, estimated_reward) 53 | return 54 | -------------------------------------------------------------------------------- /griffig/infer/inference_semantic.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | 3 | import cv2 4 | import numpy as np 5 | from scipy.ndimage import gaussian_filter 6 | 7 | from pyaffx import Affine 8 | from _griffig import BoxData, Grasp, Gripper, OrthographicImage 9 | from ..infer.inference_base import InferenceBase 10 | from ..infer.selection import Method, Max 11 | from ..utility.image import draw_around_box2, get_box_projection, get_inference_image 12 | 13 | 14 | class InferencePlanarSemantic(InferenceBase): 15 | def infer(self, image: OrthographicImage, object_image: OrthographicImage, method: Method, box_data: BoxData = None, gripper: Gripper = None): 16 | assert object_image is not None 17 | 18 | start = time() 19 | 20 | input_images = self.get_input_images(image, box_data) 21 | input_object_images = [get_inference_image(object_image, Affine(a=a), (224, 224), (224, 224), (224, 224), return_mat=True) for a in self.a_space] 22 | 23 | input_object_images = np.array(input_object_images) / np.iinfo(object_image.mat.dtype).max 24 | 25 | pre_duration = time() - start 26 | start = time() 27 | 28 | estimated_grasp_reward, estimated_object_reward = self.model([input_images, [input_object_images]]) 29 | 30 | nn_duration = time() - start 31 | start = time() 32 | 33 | estimated_reward = estimated_object_reward * estimated_grasp_reward # np.cbrt(estimated_object_reward * np.power(estimated_grasp_reward, 2)) 34 | 35 | if gripper: 36 | possible_indices = gripper.consider_indices(self.model_data.gripper_widths) 37 | self.set_last_dim_to_zero(estimated_reward, np.invert(possible_indices)) 38 | 39 | for _ in range(estimated_reward.size): 40 | index_raveled = method(estimated_reward) 41 | index = np.unravel_index(index_raveled, estimated_reward.shape) 42 | 43 | action = Grasp() 44 | action.index = index[3] 45 | action.pose = self.pose_from_index(index, estimated_reward.shape, image) 46 | action.pose.z = np.nan 47 | action.estimated_reward = estimated_reward[index] 48 | action.detail_durations = { 49 | 'pre': pre_duration, 50 | 'nn': nn_duration, 51 | 'post': time() - start, 52 | } 53 | action.calculation_duration = sum(action.detail_durations.values()) 54 | 55 | yield action 56 | 57 | method.disable(index, estimated_reward) 58 | return 59 | -------------------------------------------------------------------------------- /griffig/infer/selection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Method: 5 | def __call__(self, x): 6 | raise NotImplementedError() 7 | 8 | def disable(self, index, x): 9 | raise NotImplementedError 10 | 11 | def __repr__(self): 12 | raise NotImplementedError 13 | 14 | 15 | class Max(Method): 16 | def __call__(self, x): 17 | return x.argmax() 18 | 19 | def disable(self, index, x): 20 | x[index] = -np.inf 21 | 22 | def __repr__(self): 23 | return 'Max' 24 | 25 | 26 | class Top(Method): 27 | def __init__(self, n=5): 28 | self.n = n 29 | 30 | def __call__(self, x): 31 | return np.random.choice(np.argpartition(x, -self.n, axis=None)[-self.n:]) 32 | 33 | def disable(self, index, x): 34 | x[index] = -np.inf 35 | 36 | def __repr__(self): 37 | return 'Top' + str(self.n) 38 | 39 | 40 | class PowerProb(Method): 41 | def __init__(self, power=4): 42 | self.power = power 43 | 44 | def __call__(self, x): 45 | raveled = np.power(np.ravel(x), self.power) 46 | return np.random.choice(np.arange(x.size), p=(raveled / np.sum(raveled))) 47 | 48 | def disable(self, index, x): 49 | x[index] = 0.0 50 | 51 | def __repr__(self): 52 | return 'PowerProb' + str(self.power) 53 | 54 | 55 | class RandomInference(Method): 56 | def __call__(self, x): 57 | return np.random.choice(np.arange(x.size)) 58 | 59 | def disable(self, index, x): 60 | pass 61 | 62 | def __repr__(self): 63 | return 'RandomInference' 64 | -------------------------------------------------------------------------------- /griffig/utility/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pantor/griffig/0b10ef5d69902b14a4d648a809a51933a8f5fe8a/griffig/utility/__init__.py -------------------------------------------------------------------------------- /griffig/utility/box_finder.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | from _griffig import BoxData 5 | 6 | 7 | class BoxFinder: 8 | def __init__(self, method='highest-contour', height_buffer=20): 9 | self.method = method 10 | self.height_buffer = height_buffer 11 | 12 | @staticmethod 13 | def contour_rect_area(x): 14 | _, _, w, h = cv2.boundingRect(x) 15 | return w * h 16 | 17 | def find(self, image): 18 | assert self.method == 'highest-contour' 19 | 20 | depth = image.mat[:, :, 3] 21 | 22 | max_value = np.max(depth) - self.height_buffer 23 | _, depth = cv2.threshold(depth, max_value, 255, cv2.THRESH_BINARY) 24 | canny_output = cv2.Canny(depth, 255 / 2, 255) 25 | contours, _ = cv2.findContours(canny_output, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 26 | 27 | if len(contours) < 2: 28 | return None 29 | 30 | # Find second largest contour (inside of the box) 31 | sorted_contours = sorted(contours, key=self.contour_rect_area, reverse=True) 32 | x, y, w, h = cv2.boundingRect(sorted_contours[1]) 33 | 34 | box_center = np.array([y + (h - image.mat.shape[0]) / 2, -x + (-w + image.mat.shape[1]) / 2, 0.0]) / image.pixel_size 35 | box_size = np.array([h, -w, 0.1]) / image.pixel_size 36 | return BoxData(box_center, box_size) 37 | -------------------------------------------------------------------------------- /griffig/utility/heatmap.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | from pyaffx import Affine 5 | from _griffig import BoxData, OrthographicImage 6 | from ..utility.image import draw_line, get_inference_image 7 | from ..utility.model_data import ModelArchitecture 8 | 9 | 10 | class Heatmap: 11 | def __init__(self, inference, a_space=None): 12 | self.inference = inference 13 | 14 | if a_space is not None: 15 | self.inference.a_space = a_space 16 | 17 | def calculate_heat(self, reward, size_cropped, size_result): 18 | size_reward_center = (reward.shape[1] / 2, reward.shape[2] / 2) 19 | scale = self.inference.size_area_cropped[0] / self.inference.size_result[0] * ((size_cropped[0] - 30) / reward.shape[1]) 20 | 21 | a_space_idx = range(len(self.inference.a_space)) 22 | 23 | heat_values = np.zeros(size_result[::-1], dtype=np.float) 24 | for i in a_space_idx: 25 | a = self.inference.a_space[i] 26 | rot_mat = cv2.getRotationMatrix2D(size_reward_center, -a * 180.0 / np.pi, scale) 27 | rot_mat[0][2] += size_result[0] / 2 - size_reward_center[0] 28 | rot_mat[1][2] += size_result[1] / 2 - size_reward_center[1] 29 | heat_values += cv2.warpAffine(reward[i], rot_mat, size_result, borderValue=0) 30 | 31 | norm = (5 * heat_values.max() + len(a_space_idx)) / 6 32 | return (heat_values / norm * 255.0).astype(np.uint8) 33 | 34 | @staticmethod 35 | def get_background(image, use_rgb): 36 | if len(image.mat.shape) >= 3 and image.mat.shape[-1] >= 3: 37 | if use_rgb: 38 | back = cv2.cvtColor(image.mat[:, :, :3], cv2.COLOR_RGB2GRAY) 39 | return cv2.cvtColor(back, cv2.COLOR_GRAY2RGB) 40 | 41 | return cv2.cvtColor(image.mat[:, :, 3:], cv2.COLOR_GRAY2RGB) 42 | 43 | return cv2.cvtColor(image.mat, cv2.COLOR_GRAY2RGB) 44 | 45 | def render( 46 | self, 47 | image: OrthographicImage, 48 | object_image: OrthographicImage = None, 49 | goal_image: OrthographicImage = None, 50 | box_data: BoxData = None, 51 | alpha=0.5, 52 | use_rgb=True, # Otherwise depth 53 | save_path=None, 54 | reward_index=None, 55 | draw_lateral=False, 56 | draw_shifts=False, 57 | draw_indices=None, 58 | alpha_human=0.0, 59 | ): 60 | input_images = self.inference.get_input_images(image, box_data) 61 | 62 | # if goal_image: 63 | # input_images += self.inference.get_input_images(goal_image, box_data) 64 | 65 | if self.inference.model_data.architecture == ModelArchitecture.ActorCritic: 66 | estimated_reward, actor_result = self.inference.model(input_images) 67 | 68 | elif self.inference.model_data.architecture == ModelArchitecture.PlanarSemantic: 69 | input_object_images = [get_inference_image(object_image, Affine(a=a), (224, 224), (224, 224), (224, 224), return_mat=True) for a in self.inference.a_space] 70 | input_object_images = np.array(input_object_images) / np.iinfo(object_image.mat.dtype).max 71 | 72 | estimated_grasp_reward, estimated_object_reward = self.inference.model([input_images, [input_object_images]]) 73 | estimated_reward = estimated_object_reward * estimated_grasp_reward 74 | 75 | else: 76 | estimated_reward = self.inference.model(input_images) 77 | actor_result = None 78 | 79 | if reward_index is not None: 80 | estimated_reward = estimated_reward[reward_index] 81 | 82 | if estimated_reward.shape[-1] > 4: # self.inference.model_data.output[0] == 'reward+human': 83 | estimated_reward = (1 - alpha_human) * estimated_reward[:, :, :, :4] + alpha_human * estimated_reward[:, :, :, 4:] 84 | 85 | # reward_reduced = np.maximum(estimated_reward, 0) 86 | reward_reduced = np.mean(estimated_reward, axis=3) 87 | # reward_reduced = estimated_reward[:, :, :, 0] 88 | 89 | # For heatmapping the actor 90 | # reward_reduced = actor_result[:, :, :, 2] 91 | # reward_reduced = (reward_reduced - np.min(reward_reduced)) / np.ptp(reward_reduced) 92 | # reward_reduced += 0.5 93 | 94 | size_cropped = input_images[0].shape[1::-1] 95 | size_result = image.mat.shape[1::-1] 96 | 97 | heat = self.calculate_heat(reward_reduced, size_cropped, size_result) 98 | heat = cv2.applyColorMap(heat.astype(np.uint8), cv2.COLORMAP_JET) 99 | 100 | background = self.get_background(image, use_rgb) 101 | if background.dtype == np.uint16: 102 | background = background.astype(np.float32) / 255 103 | else: 104 | background = background.astype(np.float32) 105 | result = (1 - alpha) * background + alpha * heat 106 | result = OrthographicImage(result.astype(np.float32), image.pixel_size, image.min_depth, image.max_depth) 107 | 108 | if draw_indices is not None: 109 | self.draw_indices(result, reward_reduced, draw_indices) 110 | 111 | if draw_lateral: 112 | for _ in range(10): 113 | index = np.unravel_index(estimated_reward.argmax(), estimated_reward.shape) 114 | action = actor_result[index[0], index[1], index[2]] 115 | self.draw_lateral(result, estimated_reward.shape, index, action) 116 | estimated_reward[np.unravel_index(estimated_reward.argmax(), estimated_reward.shape)] = 0 117 | 118 | if draw_shifts: 119 | for _ in range(10): 120 | self.draw_arrow(result, reward_reduced, np.unravel_index(reward_reduced.argmax(), reward_reduced.shape)) 121 | reward_reduced[np.unravel_index(reward_reduced.argmax(), reward_reduced.shape)] = 0 122 | 123 | if save_path: 124 | cv2.imwrite(str(save_path), result.mat) 125 | return result.mat 126 | 127 | def draw_lateral(self, image: OrthographicImage, reward_shape, index, action): 128 | pose = self.inference.pose_from_index(index, reward_shape, image) 129 | arrow_color = (255*255, 255*255, 255*255) 130 | draw_line(image, pose, Affine(0.0, 0.0), Affine(a=pose.a, b=action[1], c=action[2]) * Affine(0.0, 0.0, -0.14), color=arrow_color, thickness=1) 131 | 132 | def draw_indices(self, image: OrthographicImage, reward_shape, indices): 133 | point_color = (255, 255, 255) 134 | 135 | for index in indices: 136 | pose = self.inference.pose_from_index(index, reward_shape, image) 137 | pose.x /= reward_shape[1] / 40 138 | pose.y /= reward_shape[2] / 40 139 | 140 | draw_line(image, pose, Affine(-0.001, 0), Affine(0.001, 0), color=point_color, thickness=1) 141 | draw_line(image, pose, Affine(0, -0.001), Affine(0, 0.001), color=point_color, thickness=1) 142 | 143 | def draw_arrow(self, image: OrthographicImage, reward_shape, index): 144 | pose = self.inference.pose_from_index(index, reward_shape, image) 145 | 146 | arrow_color = (255, 255, 255) 147 | draw_line(image, pose, Affine(0, 0), Affine(0.036, 0), color=arrow_color, thickness=2) 148 | draw_line(image, pose, Affine(0.036, 0.0), Affine(0.026, -0.008), color=arrow_color, thickness=2) 149 | draw_line(image, pose, Affine(0.036, 0.0), Affine(0.026, 0.008), color=arrow_color, thickness=2) 150 | -------------------------------------------------------------------------------- /griffig/utility/image.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List, Sequence, Tuple, Optional, Union 2 | 3 | import cv2 4 | import numpy as np 5 | 6 | from _griffig import BoxData, Gripper, RobotPose, OrthographicImage 7 | from pyaffx import Affine 8 | 9 | 10 | def get_color_scale(dtype): 11 | if dtype == np.float32: 12 | return 1. / 255 13 | if dtype == np.uint16: 14 | return 255 15 | return 1 16 | 17 | 18 | def crop(mat_image: Any, size_output: Sequence[float], vec=(0, 0)) -> Any: 19 | margin_x_lower = int(round((mat_image.shape[0] - size_output[0]) / 2 + vec[1])) 20 | margin_y_lower = int(round((mat_image.shape[1] - size_output[1]) / 2 + vec[0])) 21 | margin_x_upper = margin_x_lower + int(round(size_output[0])) 22 | margin_y_upper = margin_y_lower + int(round(size_output[1])) 23 | return mat_image[margin_x_lower:margin_x_upper, margin_y_lower:margin_y_upper] 24 | 25 | 26 | def get_transformation(x: float, y: float, a: float, center: Sequence[float], scale=1.0, cropped: Sequence[float] = None): # [rad] 27 | trans = cv2.getRotationMatrix2D((round(center[0] - x), round(center[1] - y)), a * 180.0 / np.pi, scale) # [deg] 28 | trans[0][2] += x + scale * cropped[0] / 2 - center[0] if cropped else x 29 | trans[1][2] += y + scale * cropped[1] / 2 - center[1] if cropped else y 30 | return trans 31 | 32 | 33 | def get_area_of_interest( 34 | image: OrthographicImage, 35 | pose: Affine, 36 | size_cropped: Tuple[float, float] = None, 37 | size_result: Tuple[float, float] = None, 38 | flags=cv2.INTER_LINEAR, 39 | return_mat=False, 40 | planar=True, 41 | ) -> Union[OrthographicImage, Any]: 42 | size_input = (image.mat.shape[1], image.mat.shape[0]) 43 | center_image = (image.mat.shape[1] / 2, image.mat.shape[0] / 2) 44 | 45 | if size_result and size_cropped: 46 | scale = size_result[0] / size_cropped[0] 47 | assert scale == (size_result[1] / size_cropped[1]) 48 | elif size_result: 49 | scale = size_result[0] / size_input[0] 50 | assert scale == (size_result[1] / size_input[1]) 51 | else: 52 | scale = 1.0 53 | 54 | size_final = size_result or size_cropped or size_input 55 | 56 | if not planar: 57 | pose = image.pose.inverse() * pose 58 | 59 | trans = get_transformation( 60 | image.pixel_size * pose.y, 61 | image.pixel_size * pose.x, 62 | -pose.a, 63 | center_image, 64 | scale=scale, 65 | cropped=size_cropped, 66 | ) 67 | mat_result = cv2.warpAffine(image.mat, trans, size_final, flags=flags, borderMode=cv2.BORDER_REPLICATE) # INTERPOLATION_METHOD 68 | if return_mat: 69 | return mat_result 70 | 71 | image_pose = Affine(x=pose.x, y=pose.y, a=-pose.a) * image.pose 72 | 73 | return OrthographicImage( 74 | mat_result, 75 | scale * image.pixel_size, 76 | image.min_depth, 77 | image.max_depth, 78 | image.camera, 79 | image_pose, 80 | ) 81 | 82 | 83 | def get_inference_image( 84 | image: OrthographicImage, 85 | pose: Affine, 86 | size_cropped: Tuple[float, float], 87 | size_area_cropped: Tuple[float, float], 88 | size_area_result: Tuple[float, float], 89 | return_mat=False, 90 | ) -> OrthographicImage: 91 | size_input = (image.mat.shape[1], image.mat.shape[0]) 92 | center_image = (size_input[0] / 2, size_input[1] / 2) 93 | 94 | scale = size_area_result[0] / size_area_cropped[0] 95 | 96 | trans = get_transformation( 97 | image.pixel_size * pose.y, 98 | image.pixel_size * pose.x, 99 | pose.a, 100 | center_image, 101 | scale=scale, 102 | cropped=(size_cropped[0] / scale, size_cropped[1] / scale), 103 | ) 104 | mat_result = cv2.warpAffine(image.mat, trans, size_cropped, borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_AREA) 105 | if return_mat: 106 | return mat_result 107 | 108 | image_pose = Affine(x=pose.x, y=pose.y, a=pose.a) * image.pose 109 | 110 | return OrthographicImage( 111 | mat_result, 112 | scale * image.pixel_size, 113 | image.min_depth, 114 | image.max_depth, 115 | image.camera, 116 | image_pose, 117 | ) 118 | 119 | 120 | def _get_rect_contour(center: Sequence[float], size: Sequence[float]) -> List[Sequence[float]]: 121 | return [ 122 | [center[0] + size[0] / 2, center[1] + size[1] / 2, size[2]], 123 | [center[0] + size[0] / 2, center[1] - size[1] / 2, size[2]], 124 | [center[0] - size[0] / 2, center[1] - size[1] / 2, size[2]], 125 | [center[0] - size[0] / 2, center[1] + size[1] / 2, size[2]], 126 | ] 127 | 128 | 129 | def get_box_projection(image: OrthographicImage, box_data: BoxData): 130 | if not box_data: 131 | return 132 | 133 | box_border = [Affine(*p) for p in box_data.contour] 134 | return [image.project(p) for p in box_border] 135 | 136 | 137 | def draw_line( 138 | image: OrthographicImage, 139 | action_pose: Affine, 140 | pt1: Affine, 141 | pt2: Affine, 142 | color, 143 | thickness=1, 144 | ) -> None: 145 | cm = get_color_scale(image.mat.dtype) 146 | pose = image.pose.inverse() * action_pose 147 | pt1_projection = image.project(pose * pt1) 148 | pt2_projection = image.project(pose * pt2) 149 | cv2.line(image.mat, tuple(pt1_projection), tuple(pt2_projection), (color[0] * cm, color[1] * cm, color[2] * cm), thickness, lineType=cv2.LINE_AA) 150 | 151 | 152 | def draw_polygon( 153 | image: OrthographicImage, 154 | action_pose: Affine, 155 | polygon, 156 | color, 157 | thickness=1, 158 | ) -> None: 159 | cm = get_color_scale(image.mat.dtype) 160 | pose = image.pose.inverse() * action_pose 161 | polygon_projection = np.asarray([tuple(image.project(pose * p)) for p in polygon]) 162 | cv2.polylines(image.mat, [polygon_projection], True, (color[0] * cm, color[1] * cm, color[2] * cm), thickness, lineType=cv2.LINE_AA) 163 | 164 | 165 | def draw_around_box(image: OrthographicImage, box_data: Optional[BoxData], draw_lines=False) -> None: 166 | if not box_data: 167 | return 168 | 169 | assert box_data, 'Box contour should be drawn, but is false.' 170 | 171 | box_border = [Affine(*p) for p in box_data.contour] 172 | image_border = [Affine(*p) for p in _get_rect_contour([0.0, 0.0, 0.0], [10.0, 10.0, box_data.contour[0][2]])] 173 | box_projection = [image.project(image.pose.inverse() * p) for p in box_border] 174 | 175 | cm = get_color_scale(image.mat.dtype) 176 | 177 | if draw_lines: 178 | number_channels = image.mat.shape[-1] if len(image.mat.shape) > 2 else 1 179 | 180 | color = np.array([255 * cm] * number_channels) # White 181 | cv2.polylines(image.mat, [np.asarray(box_projection)], True, color, 2, lineType=cv2.LINE_AA) 182 | 183 | else: 184 | color_array = np.array([image.mat[np.clip(p[1], 0, image.mat.shape[0] - 1), np.clip(p[0], 0, image.mat.shape[1] - 1)] for p in box_projection], dtype=np.float32) 185 | if len(color_array.shape) > 1: 186 | color_array[np.mean(color_array, axis=1) < cm] = np.nan 187 | else: 188 | color_array[color_array < cm] = np.nan 189 | color = np.nanmean(color_array, axis=0) 190 | np.nan_to_num(color, copy=False) 191 | image_border_projection = [image.project(image.pose.inverse() * p) for p in image_border] 192 | cv2.fillPoly(image.mat, np.array([image_border_projection, box_projection]), color.tolist()) 193 | 194 | 195 | def draw_object(image: OrthographicImage, object_data) -> None: 196 | if 'polygon' not in object_data: 197 | return 198 | 199 | cm = get_color_scale(image.mat.dtype) 200 | polygon = np.asarray([(int(p['x'] * image.mat.shape[1]), int(p['y'] * image.mat.shape[0])) for p in object_data['polygon']]) 201 | cv2.polylines(image.mat, [polygon], True, (0 * cm, 255 * cm, 255 * cm), 1, lineType=cv2.LINE_AA) 202 | 203 | 204 | def draw_pose(image: OrthographicImage, action_pose: RobotPose, convert_to_rgb=False, calibration_pattern=False, border_thickness=2, line_thickness=1) -> None: 205 | if convert_to_rgb and image.mat.ndim == 2: 206 | image.mat = cv2.cvtColor(image.mat, cv2.COLOR_GRAY2RGB) 207 | 208 | color_rect = (255, 0, 0) # Blue 209 | color_lines = (0, 0, 255) # Red 210 | color_direction = (0, 255, 0) # Green 211 | 212 | rect = [Affine(*p) for p in _get_rect_contour([0.0, 0.0, 0.0], [200.0 / image.pixel_size, 200.0 / image.pixel_size, 0.0])] 213 | 214 | draw_polygon(image, action_pose, rect, color_rect, 2) 215 | 216 | draw_line(image, action_pose, Affine(90 / image.pixel_size, 0), Affine(100 / image.pixel_size, 0), color_rect, border_thickness) 217 | draw_line(image, action_pose, Affine(0.012, action_pose.d / 2), Affine(-0.012, action_pose.d / 2), color_lines, line_thickness) 218 | draw_line(image, action_pose, Affine(0.012, -action_pose.d / 2), Affine(-0.012, -action_pose.d / 2), color_lines, line_thickness) 219 | draw_line(image, action_pose, Affine(0, action_pose.d / 2), Affine(0, -action_pose.d / 2), color_lines, line_thickness) 220 | draw_line(image, action_pose, Affine(0.006, 0), Affine(-0.006, 0), color_lines, line_thickness) 221 | draw_line(image, action_pose, Affine(z=0.14), Affine(), color_direction, line_thickness) 222 | 223 | if calibration_pattern: 224 | color_calibration = (255, 255, 255) 225 | draw_line(image, action_pose, Affine(0, -0.1), Affine(0, 0.1), color_calibration, line_thickness) 226 | draw_line(image, action_pose, Affine(-0.1, 0), Affine(0.1, 0), color_calibration, line_thickness) 227 | 228 | 229 | def draw_around_box2(image, box_data: BoxData, draw_lines=False): 230 | if not box_data: 231 | return 232 | 233 | assert box_data, 'Box contour should be drawn, but is false.' 234 | 235 | box_border = [Affine(*p) for p in box_data.contour] 236 | image_border = [Affine(*p) for p in _get_rect_contour([0.0, 0.0, 0.0], [10.0, 10.0, box_data.contour[0][2]])] 237 | box_projection = [image.project(p) for p in box_border] 238 | 239 | cm = get_color_scale(image.mat.dtype) 240 | 241 | if draw_lines: 242 | number_channels = image.mat.shape[-1] if len(image.mat.shape) > 2 else 1 243 | color = np.array([255 * cm] * number_channels) # White 244 | cv2.polylines(image.mat, [np.asarray(box_projection)], True, color, 2, lineType=cv2.LINE_AA) 245 | 246 | else: 247 | color_array = np.array([image.mat[np.clip(p[1], 0, image.mat.shape[0] - 1), np.clip(p[0], 0, image.mat.shape[1] - 1)] for p in box_projection], dtype=np.float32) 248 | if len(color_array.shape) > 1: 249 | color_array[np.mean(color_array, axis=1) < cm] = np.nan 250 | else: 251 | color_array[color_array < cm] = np.nan 252 | 253 | color = np.nanmean(color_array, axis=0) 254 | np.nan_to_num(color, copy=False) 255 | 256 | image_border_projection = [image.project(p) for p in image_border] 257 | cv2.fillPoly(image.mat, np.array([image_border_projection, box_projection]), color.tolist()) 258 | 259 | 260 | def draw_pose2(image, grasp, gripper: Gripper, convert_to_rgb=False): 261 | if convert_to_rgb and image.mat.ndim == 2: 262 | image.mat = cv2.cvtColor(image.mat, cv2.COLOR_GRAY2RGB) 263 | 264 | color_rect = (255, 0, 0) # Blue 265 | color_lines = (0, 0, 255) # Red 266 | color_direction = (0, 255, 0) # Green 267 | 268 | rect = [Affine(*p) for p in _get_rect_contour([0.0, 0.0, 0.0], [200.0 / image.pixel_size, 200.0 / image.pixel_size, 0.0])] 269 | 270 | action_pose = RobotPose(grasp.pose, d=grasp.stroke) 271 | 272 | draw_polygon(image, action_pose, rect, color_rect, 2) 273 | draw_line(image, action_pose, Affine(90 / image.pixel_size, 0), Affine(100 / image.pixel_size, 0), color_rect, 2) 274 | 275 | half_width = gripper.finger_width / 2 276 | 277 | draw_line(image, action_pose, Affine(half_width, action_pose.d / 2), Affine(-half_width, action_pose.d / 2), color_lines, 1) 278 | draw_line(image, action_pose, Affine(half_width, -action_pose.d / 2), Affine(-half_width, -action_pose.d / 2), color_lines, 1) 279 | 280 | half_width_height = half_width + gripper.finger_height 281 | draw_line(image, action_pose, Affine(half_width_height, action_pose.d / 2), Affine(-half_width_height, action_pose.d / 2), color_lines, 1) 282 | draw_line(image, action_pose, Affine(half_width_height, -action_pose.d / 2), Affine(-half_width_height, -action_pose.d / 2), color_lines, 1) 283 | 284 | draw_line(image, action_pose, Affine(0, action_pose.d / 2), Affine(0, -action_pose.d / 2), color_lines, 1) 285 | draw_line(image, action_pose, Affine(0.006, 0), Affine(-0.006, 0), color_lines, 1) 286 | 287 | if np.isfinite(action_pose.z) and (action_pose.b != 0 or action_pose.c != 0): 288 | draw_line(image, action_pose, Affine(z=0.14), Affine(), color_direction, 1) 289 | -------------------------------------------------------------------------------- /griffig/utility/model_data.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import List 3 | from pathlib import Path 4 | 5 | 6 | class ModelArchitecture(str, Enum): 7 | Planar = 'planar' # Model-based z, and bc=0 8 | PlanarTypes = 'planar-types' # Model-based z, and bc=0, for multiple learned types 9 | PlanarZ = 'planar-z' # Regression-based z, and bc=0 10 | PlanarSemantic = 'planar-semantic' # Semantic Grasping 11 | Lateral = 'lateral' # Model-based z, b, and c 12 | ActorCritic = 'actor-critic' # Actor-critic 13 | ModelBasedConvolution = 'model-based-convolution' # Model-based convolution 14 | NonFCNActorCritic = 'non-fcn-actor-critic' # Non-fully-convolutional actor-critic 15 | NonFCNPlanar = 'non-fcn-planar' # Non-fully-convolutional planar 16 | 17 | 18 | class ModelData: 19 | def __init__( 20 | self, 21 | name: str = None, 22 | path: Path = None, 23 | architecture: ModelArchitecture = None, 24 | pixel_size: float = None, 25 | depth_diff: float = None, 26 | gripper_widths: List[float] = None, 27 | description: str = None, 28 | size_area_cropped = None, 29 | size_result = None, 30 | task = None, 31 | input = None, 32 | input_type = None, 33 | output = None, 34 | version = None, 35 | ): 36 | self.name = name 37 | self.path = path 38 | self.architecture = architecture 39 | self.pixel_size = pixel_size 40 | self.depth_diff = depth_diff 41 | self.gripper_widths = gripper_widths 42 | self.description = description 43 | self.size_area_cropped = size_area_cropped 44 | self.size_result = size_result 45 | self.task = task 46 | self.input = input 47 | self.input_type = input_type 48 | self.output = output 49 | self.version = version 50 | 51 | def to_dict(self): 52 | return self.__dict__ 53 | 54 | -------------------------------------------------------------------------------- /griffig/utility/model_library.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | from pathlib import Path 4 | from shutil import unpack_archive 5 | from tempfile import NamedTemporaryFile 6 | from typing import Union 7 | from urllib.request import urlopen 8 | 9 | import requests 10 | 11 | from .model_data import ModelArchitecture, ModelData 12 | 13 | 14 | class ModelLibrary: 15 | remote_url = 'https://griffig.xyz/api/v1/models/' 16 | tmp_path = Path('/tmp') / 'griffig' / 'models' 17 | 18 | @classmethod 19 | def get_model_version_from_name(cls, name: str) -> int: 20 | return int(name.split('-')[-1][1:]) 21 | 22 | @classmethod 23 | def load_model_data(cls, name: Union[str, Path]) -> ModelData: 24 | model_path = name 25 | 26 | if isinstance(model_path, str): 27 | matched_model_globs = glob.glob(str(cls.tmp_path / f'{name}-v*')) 28 | if not matched_model_globs: 29 | try: 30 | r = requests.get(url=cls.remote_url + name, timeout=1.0) # [s] 31 | except requests.exceptions.Timeout as e: 32 | raise Exception('Could not search for model, please use a local model via a Path input.') from e 33 | 34 | if r.status_code == 404: 35 | raise Exception(f'Model {name} not found!') 36 | 37 | model_info = r.json() 38 | print(f"Download model {model_info['name']} version {model_info['version']} to {model_path}...") 39 | 40 | model_path = cls.tmp_path / f"{model_info['name']}-v{model_info['version']}" 41 | model_path.mkdir(parents=True, exist_ok=True) 42 | 43 | model_remote_url = model_info['download_path'] 44 | with urlopen(model_remote_url) as zipresp, NamedTemporaryFile() as tfile: 45 | tfile.write(zipresp.read()) 46 | tfile.seek(0) 47 | unpack_archive(tfile.name, model_path, format='zip') 48 | 49 | else: 50 | matched_model_globs.sort(key=cls.get_model_version_from_name, reverse=True) 51 | version = cls.get_model_version_from_name(matched_model_globs[0]) 52 | model_path = Path(matched_model_globs[0]) 53 | print(f'Found model {name} version {version} file at {model_path}') 54 | 55 | if isinstance(name, Path): 56 | name = name.name 57 | 58 | with open(model_path / 'model_data.json', 'r') as read_file: 59 | model_data = ModelData(**json.load(read_file)) 60 | model_data.path = model_path / name / 'model' / 'data' / 'model' # model_data.path 61 | 62 | return model_data 63 | -------------------------------------------------------------------------------- /include/griffig/box_data.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | 11 | 12 | struct BoxData { 13 | using Affine = affx::Affine; 14 | 15 | //! The contour of the box 16 | std::vector> contour; 17 | 18 | //! An optional pose of the center 19 | std::optional pose; 20 | 21 | explicit BoxData() { } 22 | explicit BoxData(const std::vector>& contour, const std::optional& pose = std::nullopt): contour(contour), pose(pose) { } 23 | explicit BoxData(const std::array& center, const std::array& size, const std::optional& pose = std::nullopt): pose(pose) { 24 | contour = { 25 | {center[0] + size[0] / 2, center[1] + size[1] / 2, size[2]}, 26 | {center[0] + size[0] / 2, center[1] - size[1] / 2, size[2]}, 27 | {center[0] - size[0] / 2, center[1] - size[1] / 2, size[2]}, 28 | {center[0] - size[0] / 2, center[1] + size[1] / 2, size[2]}, 29 | }; 30 | } 31 | 32 | std::array get_rect(float pixel_size, int offset) const { 33 | std::vector cont; 34 | for (auto e: contour) { 35 | cont.push_back({(float)e[0] * pixel_size, (float)e[1] * pixel_size}); 36 | } 37 | auto rect = cv::boundingRect(cont); 38 | return { 39 | 2*std::max(std::abs(rect.y), std::abs(rect.y + rect.height)) + offset, 40 | 2*std::max(std::abs(rect.x), std::abs(rect.x + rect.width)) + offset 41 | }; 42 | } 43 | 44 | bool is_pose_inside(const RobotPose& pose) const { 45 | double half_stroke = 0.5 * (pose.d + 0.002); // [m] 46 | auto gripper_b1_trans = (pose * Affine(0.0, half_stroke, 0.0)).translation(); 47 | auto gripper_b2_trans = (pose * Affine(0.0, -half_stroke, 0.0)).translation(); 48 | cv::Point2f gripper_b1 {(float)gripper_b1_trans[0], (float)gripper_b1_trans[1]}; 49 | cv::Point2f gripper_b2 {(float)gripper_b2_trans[0], (float)gripper_b2_trans[1]}; 50 | 51 | std::vector check_contour; 52 | for (auto c: contour) { 53 | check_contour.emplace_back(cv::Point2f(c[0], c[1])); 54 | } 55 | 56 | bool jaw1_inside_box = cv::pointPolygonTest(check_contour, gripper_b1, false) >= 0; 57 | bool jaw2_inside_box = cv::pointPolygonTest(check_contour, gripper_b2, false) >= 0; 58 | 59 | bool start_point_inside_box = true; 60 | if (!std::isnan(pose.z())) { 61 | auto helper_pose_trans = (pose * Affine(0.0, 0.0, 0.16)).translation(); 62 | cv::Point2f helper_pose {(float)helper_pose_trans[0], (float)helper_pose_trans[1]}; 63 | start_point_inside_box = cv::pointPolygonTest(check_contour, helper_pose, false) >= 0; 64 | } 65 | 66 | return jaw1_inside_box && jaw2_inside_box && start_point_inside_box; 67 | } 68 | 69 | std::string toString() const { 70 | std::string result = "Box contour ("; 71 | for (auto e: contour) { 72 | result += "(" + std::to_string(e[0]) + ", " + std::to_string(e[1]) + ", " + std::to_string(e[2]) + ")"; 73 | } 74 | return result + ")"; 75 | } 76 | }; 77 | -------------------------------------------------------------------------------- /include/griffig/grasp.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | 8 | enum class ActionType { 9 | Grasp, 10 | }; 11 | 12 | 13 | struct Grasp { 14 | using Affine = affx::Affine; 15 | 16 | ActionType action_type {ActionType::Grasp}; 17 | 18 | Affine pose; 19 | double stroke; 20 | 21 | size_t index; // Index of inference model 22 | double estimated_reward; 23 | 24 | // Calculation durations 25 | std::map detail_durations; 26 | double calculation_duration {0.0}; 27 | 28 | // explicit Grasp() { } 29 | explicit Grasp(const Affine& pose = Affine(), double stroke = 0.0, size_t index = 0, double estimated_reward = 0.0): pose(pose), stroke(stroke), index(index), estimated_reward(estimated_reward) { } 30 | 31 | std::string toString() const { 32 | return pose.toString() + " d: " + std::to_string(stroke) + " estimated reward: " + std::to_string(estimated_reward); 33 | } 34 | }; 35 | -------------------------------------------------------------------------------- /include/griffig/griffig.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | -------------------------------------------------------------------------------- /include/griffig/gripper.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | 10 | struct Gripper { 11 | //! The min. and max. gripper width (distance between fingers) 12 | double min_stroke {0.0}, max_stroke {std::numeric_limits::infinity()}; 13 | 14 | //! A box around each finger 15 | double finger_width, finger_extent, finger_height; 16 | 17 | //! Offset transformation (local in the grippers reference frame) 18 | affx::Affine offset {}; 19 | 20 | explicit Gripper(double min_stroke = 0.0, double max_stroke = std::numeric_limits::infinity(), double finger_width = 0.0, double finger_extent = 0.0, double finger_height = 0.0, affx::Affine offset = affx::Affine()): 21 | min_stroke(min_stroke), max_stroke(max_stroke), finger_width(finger_width), finger_extent(finger_extent), finger_height(finger_height), offset(offset) { } 22 | 23 | std::vector consider_indices(const std::vector& gripper_widths) const { 24 | std::vector result (gripper_widths.size()); 25 | for (size_t i = 0; i < gripper_widths.size(); ++i) { 26 | result[i] = (min_stroke <= gripper_widths[i] && gripper_widths[i] <= max_stroke); 27 | } 28 | return result; 29 | } 30 | }; 31 | -------------------------------------------------------------------------------- /include/griffig/ndarray_converter.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // borrowed in spirit from https://github.com/yati-sagade/opencv-ndarray-conversion 3 | // MIT License 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 13 | #include 14 | 15 | 16 | #if PY_VERSION_HEX >= 0x03000000 17 | #define PyInt_Check PyLong_Check 18 | #define PyInt_AsLong PyLong_AsLong 19 | #endif 20 | 21 | 22 | static PyObject* opencv_error = 0; 23 | 24 | static int failmsg(const char *fmt, ...) { 25 | char str[1000]; 26 | 27 | va_list ap; 28 | va_start(ap, fmt); 29 | vsnprintf(str, sizeof(str), fmt, ap); 30 | va_end(ap); 31 | 32 | PyErr_SetString(PyExc_TypeError, str); 33 | return 0; 34 | } 35 | 36 | 37 | class PyAllowThreads { 38 | PyThreadState* _state; 39 | 40 | public: 41 | PyAllowThreads() : _state(PyEval_SaveThread()) {} 42 | ~PyAllowThreads() { 43 | PyEval_RestoreThread(_state); 44 | } 45 | }; 46 | 47 | 48 | class PyEnsureGIL { 49 | PyGILState_STATE _state; 50 | 51 | public: 52 | PyEnsureGIL() : _state(PyGILState_Ensure()) {} 53 | ~PyEnsureGIL() { 54 | PyGILState_Release(_state); 55 | } 56 | }; 57 | 58 | 59 | class NumpyAllocator: public cv::MatAllocator { 60 | public: 61 | NumpyAllocator() { 62 | stdAllocator = cv::Mat::getStdAllocator(); 63 | } 64 | 65 | ~NumpyAllocator() {} 66 | 67 | cv::UMatData* allocate(PyObject* o, int dims, const int* sizes, int type, size_t* step) const { 68 | cv::UMatData* u = new cv::UMatData(this); 69 | u->data = u->origdata = (uchar*)PyArray_DATA((PyArrayObject*) o); 70 | npy_intp* _strides = PyArray_STRIDES((PyArrayObject*) o); 71 | for (int i = 0; i < dims - 1; i++) { 72 | step[i] = (size_t)_strides[i]; 73 | } 74 | step[dims-1] = CV_ELEM_SIZE(type); 75 | u->size = sizes[0]*step[0]; 76 | u->userdata = o; 77 | return u; 78 | } 79 | 80 | #if CV_VERSION_MAJOR >= 4 81 | cv::UMatData* allocate(int dims0, const int* sizes, int type, void* data, size_t* step, cv::AccessFlag flags, cv::UMatUsageFlags usageFlags) const 82 | #else 83 | cv::UMatData* allocate(int dims0, const int* sizes, int type, void* data, size_t* step, int flags, cv::UMatUsageFlags usageFlags) const 84 | #endif 85 | { 86 | if (data != 0) { 87 | CV_Error(cv::Error::StsAssert, "The data should normally be NULL!"); 88 | // probably this is safe to do in such extreme case 89 | return stdAllocator->allocate(dims0, sizes, type, data, step, flags, usageFlags); 90 | } 91 | PyEnsureGIL gil; 92 | 93 | int depth = CV_MAT_DEPTH(type); 94 | int cn = CV_MAT_CN(type); 95 | const int f = (int)(sizeof(size_t)/8); 96 | int typenum = depth == CV_8U ? NPY_UBYTE : depth == CV_8S ? NPY_BYTE : 97 | depth == CV_16U ? NPY_USHORT : depth == CV_16S ? NPY_SHORT : 98 | depth == CV_32S ? NPY_INT : depth == CV_32F ? NPY_FLOAT : 99 | depth == CV_64F ? NPY_DOUBLE : f*NPY_ULONGLONG + (f^1)*NPY_UINT; 100 | int i, dims = dims0; 101 | cv::AutoBuffer _sizes(dims + 1); 102 | for ( i = 0; i < dims; i++) { 103 | _sizes[i] = sizes[i]; 104 | } 105 | if (cn > 1) { 106 | _sizes[dims++] = cn; 107 | } 108 | PyObject* o = PyArray_SimpleNew(dims, _sizes, typenum); 109 | if (!o) { 110 | CV_Error_(cv::Error::StsError, ("The numpy array of typenum=%d, ndims=%d can not be created", typenum, dims)); 111 | } 112 | return allocate(o, dims0, sizes, type, step); 113 | } 114 | 115 | #if CV_VERSION_MAJOR >= 4 116 | bool allocate(cv::UMatData* u, cv::AccessFlag accessFlags, cv::UMatUsageFlags usageFlags) const 117 | #else 118 | bool allocate(cv::UMatData* u, int accessFlags, cv::UMatUsageFlags usageFlags) const 119 | #endif 120 | { 121 | return stdAllocator->allocate(u, accessFlags, usageFlags); 122 | } 123 | 124 | void deallocate(cv::UMatData* u) const { 125 | if (!u) { 126 | return; 127 | } 128 | 129 | PyEnsureGIL gil; 130 | CV_Assert(u->urefcount >= 0); 131 | CV_Assert(u->refcount >= 0); 132 | if (u->refcount == 0) { 133 | PyObject* o = (PyObject*)u->userdata; 134 | Py_XDECREF(o); 135 | delete u; 136 | } 137 | } 138 | 139 | const cv::MatAllocator* stdAllocator; 140 | }; 141 | 142 | 143 | struct NDArrayConverter { 144 | // must call this first, or the other routines don't work! 145 | static bool init_numpy() { 146 | import_array1(false); 147 | return true; 148 | } 149 | 150 | static bool toMat(PyObject* o, cv::Mat &m); 151 | static PyObject* toNDArray(const cv::Mat& mat); 152 | }; 153 | 154 | 155 | NumpyAllocator g_numpyAllocator; 156 | 157 | 158 | bool NDArrayConverter::toMat(PyObject *o, cv::Mat &m) { 159 | bool allowND = true; 160 | if (!o || o == Py_None) { 161 | if (!m.data) { 162 | m.allocator = &g_numpyAllocator; 163 | } 164 | return true; 165 | } 166 | 167 | if (PyInt_Check(o)) { 168 | double v[] = {static_cast(PyInt_AsLong((PyObject*)o)), 0., 0., 0.}; 169 | m = cv::Mat(4, 1, CV_64F, v).clone(); 170 | return true; 171 | } 172 | if (PyFloat_Check(o)) { 173 | double v[] = {PyFloat_AsDouble((PyObject*)o), 0., 0., 0.}; 174 | m = cv::Mat(4, 1, CV_64F, v).clone(); 175 | return true; 176 | } 177 | if (PyTuple_Check(o)) { 178 | int i, sz = (int)PyTuple_Size((PyObject*)o); 179 | m = cv::Mat(sz, 1, CV_64F); 180 | for (i = 0; i < sz; i++) { 181 | PyObject* oi = PyTuple_GET_ITEM(o, i); 182 | if (PyInt_Check(oi)) { 183 | m.at(i) = (double)PyInt_AsLong(oi); 184 | } else if (PyFloat_Check(oi)) { 185 | m.at(i) = (double)PyFloat_AsDouble(oi); 186 | } else { 187 | failmsg("return value is not a numerical tuple"); 188 | m.release(); 189 | return false; 190 | } 191 | } 192 | return true; 193 | } 194 | 195 | if (!PyArray_Check(o)) { 196 | failmsg("return value is not a numpy array, neither a scalar"); 197 | return false; 198 | } 199 | 200 | PyArrayObject* oarr = (PyArrayObject*) o; 201 | 202 | bool needcopy = false, needcast = false; 203 | int typenum = PyArray_TYPE(oarr), new_typenum = typenum; 204 | int type = typenum == NPY_UBYTE ? CV_8U : 205 | typenum == NPY_BYTE ? CV_8S : 206 | typenum == NPY_USHORT ? CV_16U : 207 | typenum == NPY_SHORT ? CV_16S : 208 | typenum == NPY_INT ? CV_32S : 209 | typenum == NPY_INT32 ? CV_32S : 210 | typenum == NPY_FLOAT ? CV_32F : 211 | typenum == NPY_DOUBLE ? CV_64F : -1; 212 | 213 | if (type < 0) { 214 | if (typenum == NPY_INT64 || typenum == NPY_UINT64 || typenum == NPY_LONG) { 215 | needcopy = needcast = true; 216 | new_typenum = NPY_INT; 217 | type = CV_32S; 218 | } else { 219 | failmsg("return value data type = %d is not supported", typenum); 220 | return false; 221 | } 222 | } 223 | 224 | #ifndef CV_MAX_DIM 225 | const int CV_MAX_DIM = 32; 226 | #endif 227 | 228 | int ndims = PyArray_NDIM(oarr); 229 | if(ndims >= CV_MAX_DIM) { 230 | failmsg("return value dimensionality (=%d) is too high", ndims); 231 | return false; 232 | } 233 | 234 | int size[CV_MAX_DIM+1]; 235 | size_t step[CV_MAX_DIM+1]; 236 | size_t elemsize = CV_ELEM_SIZE1(type); 237 | const npy_intp* _sizes = PyArray_DIMS(oarr); 238 | const npy_intp* _strides = PyArray_STRIDES(oarr); 239 | bool ismultichannel = ndims == 3 && _sizes[2] <= CV_CN_MAX; 240 | 241 | for (int i = ndims-1; i >= 0 && !needcopy; i--) { 242 | // these checks handle cases of 243 | // a) multi-dimensional (ndims > 2) arrays, as well as simpler 1- and 2-dimensional cases 244 | // b) transposed arrays, where _strides[] elements go in non-descending order 245 | // c) flipped arrays, where some of _strides[] elements are negative 246 | // the _sizes[i] > 1 is needed to avoid spurious copies when NPY_RELAXED_STRIDES is set 247 | if ( (i == ndims-1 && _sizes[i] > 1 && (size_t)_strides[i] != elemsize) || 248 | (i < ndims-1 && _sizes[i] > 1 && _strides[i] < _strides[i+1]) ) 249 | needcopy = true; 250 | } 251 | 252 | if (ismultichannel && _strides[1] != (npy_intp)elemsize*_sizes[2]) { 253 | needcopy = true; 254 | } 255 | 256 | if (needcopy) { 257 | if (needcast) { 258 | o = PyArray_Cast(oarr, new_typenum); 259 | oarr = (PyArrayObject*) o; 260 | } 261 | else { 262 | oarr = PyArray_GETCONTIGUOUS(oarr); 263 | o = (PyObject*) oarr; 264 | } 265 | 266 | _strides = PyArray_STRIDES(oarr); 267 | } 268 | 269 | // Normalize strides in case NPY_RELAXED_STRIDES is set 270 | size_t default_step = elemsize; 271 | for (int i = ndims - 1; i >= 0; --i) { 272 | size[i] = (int)_sizes[i]; 273 | if (size[i] > 1) { 274 | step[i] = (size_t)_strides[i]; 275 | default_step = step[i] * size[i]; 276 | } else { 277 | step[i] = default_step; 278 | default_step *= size[i]; 279 | } 280 | } 281 | 282 | // handle degenerate case 283 | if (ndims == 0) { 284 | size[ndims] = 1; 285 | step[ndims] = elemsize; 286 | ndims++; 287 | } 288 | 289 | if (ismultichannel) { 290 | ndims--; 291 | type |= CV_MAKETYPE(0, size[2]); 292 | } 293 | 294 | if (ndims > 2 && !allowND) { 295 | failmsg("return value has more than 2 dimensions"); 296 | return false; 297 | } 298 | 299 | m = cv::Mat(ndims, size, type, PyArray_DATA(oarr), step); 300 | m.u = g_numpyAllocator.allocate(o, ndims, size, type, step); 301 | m.addref(); 302 | 303 | if (!needcopy) { 304 | Py_INCREF(o); 305 | } 306 | m.allocator = &g_numpyAllocator; 307 | return true; 308 | } 309 | 310 | PyObject* NDArrayConverter::toNDArray(const cv::Mat& m) { 311 | if (!m.data) { 312 | Py_RETURN_NONE; 313 | } 314 | 315 | cv::Mat temp, *p = (cv::Mat*)&m; 316 | if (!p->u || p->allocator != &g_numpyAllocator) { 317 | temp.allocator = &g_numpyAllocator; 318 | try { 319 | PyAllowThreads allowThreads; 320 | m.copyTo(temp); 321 | } catch (const cv::Exception &e) { 322 | PyErr_SetString(opencv_error, e.what()); 323 | return 0; 324 | } 325 | p = &temp; 326 | } 327 | 328 | PyObject* o = (PyObject*)p->u->userdata; 329 | Py_INCREF(o); 330 | return o; 331 | } 332 | 333 | 334 | namespace pybind11 { namespace detail { 335 | template<> 336 | struct type_caster { 337 | PYBIND11_TYPE_CASTER(cv::Mat, _("numpy.ndarray")); 338 | 339 | bool load(handle src, bool) { 340 | return NDArrayConverter::toMat(src.ptr(), value); 341 | } 342 | 343 | static handle cast(const cv::Mat& m, return_value_policy, handle defval) { 344 | return handle(NDArrayConverter::toNDArray(m)); 345 | } 346 | }; 347 | 348 | }} // namespace pybind11::detail 349 | -------------------------------------------------------------------------------- /include/griffig/orthographic_image.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | 11 | 12 | class OrthographicImage { 13 | using Affine = affx::Affine; 14 | 15 | int min_value {std::numeric_limits::min()}; 16 | int max_value {std::numeric_limits::max()}; 17 | 18 | template 19 | T crop(double value, double min, double max) const { 20 | return std::min(std::max(value, min), max); 21 | } 22 | 23 | public: 24 | double pixel_size; // [px/m] 25 | double min_depth, max_depth; // [m] 26 | 27 | cv::Mat mat; 28 | 29 | std::optional camera; 30 | Affine pose {}; 31 | 32 | explicit OrthographicImage(const cv::Mat& mat, double pixel_size, double min_depth, double max_depth, const std::optional& camera = std::nullopt, const Affine& pose = Affine()): mat(mat), pixel_size(pixel_size), min_depth(min_depth), max_depth(max_depth), camera(camera), pose(pose) { } 33 | OrthographicImage(const OrthographicImage& image) { 34 | pixel_size = image.pixel_size; 35 | min_depth = image.min_depth; 36 | max_depth = image.max_depth; 37 | 38 | min_value = image.min_value; 39 | max_value = image.max_value; 40 | 41 | mat = image.mat.clone(); 42 | camera = image.camera; 43 | pose = image.pose; 44 | } 45 | 46 | double depthFromValue(double value) const { 47 | return max_depth + (value / max_value) * (min_depth - max_depth); 48 | } 49 | 50 | double valueFromDepth(double depth) const { 51 | double value = std::round((depth - max_depth) / (min_depth - max_depth) * max_value); 52 | return crop(value, min_value, max_value); 53 | } 54 | 55 | std::array project(const Affine& point) { 56 | return { 57 | int(round(mat.size().width / 2 - pixel_size * point.y())), 58 | int(round(mat.size().height / 2 - pixel_size * point.x())), 59 | }; 60 | } 61 | 62 | Affine inverse_project(const std::array& point) { 63 | // double d = depthFromValue(mat.at(point[1], point[0])); 64 | 65 | double x = (mat.size().height / 2 - point[1]) / pixel_size; 66 | double y = (mat.size().width / 2 - point[0]) / pixel_size; 67 | double z = 0.0; // TODO Test 68 | return Affine(x, y, z, 1.0, 0.0, 0.0, 0.0); 69 | } 70 | 71 | double positionFromIndex(int idx, int length) const { 72 | return ((idx + 0.5) - double(length) / 2) / pixel_size; 73 | } 74 | 75 | int indexFromPosition(double position, int length) const { 76 | return ((position * pixel_size) + (length / 2) - 0.5); 77 | } 78 | }; 79 | -------------------------------------------------------------------------------- /include/griffig/pointcloud.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | 7 | enum class PointType { 8 | XYZ, 9 | XYZRGB, 10 | XYZWRGBA, 11 | UV, 12 | }; 13 | 14 | 15 | namespace PointTypes { 16 | struct XYZ { 17 | float x, y, z; 18 | }; 19 | 20 | struct XYZRGB { 21 | float x, y, z; 22 | float r, g, b; 23 | }; 24 | 25 | struct XYZWRGBA { 26 | float x, y, z, w; 27 | unsigned char r, g, b, a; 28 | }; 29 | 30 | struct UV { 31 | float u, v; 32 | }; 33 | } 34 | 35 | 36 | class Texture { 37 | GLuint gl_handle {0}; 38 | 39 | public: 40 | explicit Texture() { 41 | glGenTextures(1, &gl_handle); 42 | } 43 | 44 | ~Texture() { 45 | glFinish(); 46 | glDeleteTextures(1, &gl_handle); 47 | } 48 | 49 | void upload(size_t width, size_t height, const void* data) { 50 | glBindTexture(GL_TEXTURE_2D, gl_handle); 51 | glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, data); 52 | 53 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 54 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 55 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP); 56 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP); 57 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 58 | glBindTexture(GL_TEXTURE_2D, 0); 59 | } 60 | 61 | GLuint get_gl_handle() const { 62 | return gl_handle; 63 | } 64 | }; 65 | 66 | 67 | struct Pointcloud { 68 | pybind11::object pc; 69 | 70 | size_t size {0}; 71 | int width {0}, height {0}; 72 | 73 | PointType point_type; 74 | const void* vertices; 75 | 76 | Texture tex; 77 | const void* tex_coords; 78 | 79 | explicit Pointcloud() { } 80 | explicit Pointcloud(size_t size, PointType point_type, const void* vertices): size(size), point_type(point_type), vertices(vertices) { } 81 | explicit Pointcloud(size_t size, int width, int height, const void* vertices, const void* texture, const void* tex_coords): size(size), point_type(PointType::XYZ), width(width), height(height), vertices(vertices), tex_coords(tex_coords) { 82 | tex.upload(width, height, texture); 83 | } 84 | }; 85 | -------------------------------------------------------------------------------- /include/griffig/renderer.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | // For Eigen3 compability 12 | #ifdef Success 13 | #undef Success 14 | #endif 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | 24 | class Renderer { 25 | using Affine = affx::Affine; 26 | 27 | EGLDisplay egl_display; 28 | EGLContext egl_context; 29 | GLuint egl_framebuffer, egl_color, egl_depth, egl_stencil; 30 | 31 | void init_egl(int width, int height) { 32 | egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY); 33 | if (egl_display == EGL_NO_DISPLAY) { 34 | std::cout << "display: " << eglGetError() << std::endl; 35 | exit(EXIT_FAILURE); 36 | } 37 | 38 | EGLint major, minor; 39 | if (!eglInitialize(egl_display, &major, &minor)) { 40 | std::cout << "init: " << eglGetError() << std::endl; 41 | exit(EXIT_FAILURE); 42 | } 43 | 44 | EGLint const configAttribs[] = { 45 | // EGL_RED_SIZE, 1, 46 | // EGL_GREEN_SIZE, 1, 47 | // EGL_BLUE_SIZE, 1, 48 | EGL_NONE 49 | }; 50 | 51 | EGLint numConfigs; 52 | EGLConfig eglCfg; 53 | if (!eglChooseConfig(egl_display, configAttribs, NULL, 0, &numConfigs)) { 54 | std::cout << "choose config: " << eglGetError() << std::endl; 55 | exit(EXIT_FAILURE); 56 | } 57 | 58 | // std::cout << "numConfigs: " << numConfigs << std::endl; 59 | eglBindAPI(EGL_OPENGL_API); 60 | 61 | egl_context = eglCreateContext(egl_display, eglCfg, EGL_NO_CONTEXT, NULL); 62 | if (egl_context == NULL) { 63 | std::cout << "create context: " << eglGetError() << std::endl; 64 | exit(EXIT_FAILURE); 65 | } 66 | 67 | if (!eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, egl_context)) { 68 | std::cout << "make current: " << eglGetError() << std::endl; 69 | exit(EXIT_FAILURE); 70 | } 71 | 72 | GLenum glewinit = glewInit(); 73 | if (glewinit != GLEW_OK) { 74 | std::cout << "glew init: " << glewinit << std::endl; 75 | exit(EXIT_FAILURE); 76 | } 77 | 78 | glGenFramebuffers(1, &egl_framebuffer); 79 | 80 | glGenTextures(1, &egl_color); 81 | glGenRenderbuffers(1, &egl_depth); 82 | glGenRenderbuffers(1, &egl_stencil); 83 | 84 | glBindFramebuffer(GL_FRAMEBUFFER, egl_framebuffer); 85 | 86 | glBindTexture(GL_TEXTURE_2D, egl_color); 87 | glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB16, width, height, 0, GL_RGBA, GL_UNSIGNED_SHORT, NULL); 88 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 89 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 90 | glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, egl_color, 0); 91 | 92 | glBindRenderbuffer(GL_RENDERBUFFER, egl_depth); 93 | glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH32F_STENCIL8, width, height); 94 | glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, egl_depth); 95 | glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, egl_depth); 96 | 97 | glViewport(0, 0, width, height); 98 | glClearColor(0, 0, 0, 0); 99 | 100 | const cv::Size size {width, height}; 101 | color = cv::Mat::zeros(size, CV_16UC4); 102 | depth_32f = cv::Mat::zeros(size, CV_32FC1); 103 | depth_16u = cv::Mat::zeros(size, CV_16UC1); 104 | mask = cv::Mat::zeros(size, CV_8UC1); 105 | } 106 | 107 | void close_egl() { 108 | eglTerminate(egl_display); 109 | } 110 | 111 | void draw_affines(const std::array& affines) { 112 | for (auto affine: affines) { 113 | glVertex3d(affine.y(), affine.x(), -affine.z()); 114 | } 115 | } 116 | 117 | void draw_cube(const Affine& pose, std::array size) { 118 | auto tl = pose * Affine(-size[0] / 2, size[1] / 2, 0.0); 119 | auto tr = pose * Affine(size[0] / 2, size[1] / 2, 0.0); 120 | auto bl = pose * Affine(-size[0] / 2, -size[1] / 2, 0.0); 121 | auto br = pose * Affine(size[0] / 2, -size[1] / 2, 0.0); 122 | 123 | auto tlu = pose * Affine(-size[0] / 2, size[1] / 2, size[2]); 124 | auto tru = pose * Affine(size[0] / 2, size[1] / 2, size[2]); 125 | auto blu = pose * Affine(-size[0] / 2, -size[1] / 2, size[2]); 126 | auto bru = pose * Affine(size[0] / 2, -size[1] / 2, size[2]); 127 | 128 | draw_affines({tl, tr, br, bl}); 129 | draw_affines({tl, tr, tru, tlu}); 130 | draw_affines({bl, br, bru, blu}); 131 | draw_affines({tl, bl, blu, tlu}); 132 | draw_affines({tr, br, bru, tru}); 133 | } 134 | 135 | void draw_gripper(const RobotPose& pose, const Gripper& gripper) const { 136 | Affine finger_left (0.0, pose.d / 2, 0.0); 137 | Affine finger_right (0.0, -pose.d / 2, 0.0); 138 | std::vector finger_size_left = { 139 | Affine(gripper.finger_width / 2, 0.0, 0.0), 140 | Affine(gripper.finger_width / 2, gripper.finger_extent, 0.0), 141 | Affine(-gripper.finger_width / 2, gripper.finger_extent, 0.0), 142 | Affine(-gripper.finger_width / 2, 0.0, 0.0), 143 | }; 144 | std::vector finger_size_right = { 145 | Affine(gripper.finger_width / 2, 0.0, 0.0), 146 | Affine(gripper.finger_width / 2, -gripper.finger_extent, 0.0), 147 | Affine(-gripper.finger_width / 2, -gripper.finger_extent, 0.0), 148 | Affine(-gripper.finger_width / 2, 0.0, 0.0), 149 | }; 150 | 151 | glColor3f(0.0, 0.0, 1.0); 152 | glBegin(GL_QUADS); 153 | for (auto pt: finger_size_left) { 154 | auto pt2 = pose * finger_left * pt; 155 | glVertex3d(pt2.y(), pt2.x(), -pt2.z()); 156 | } 157 | 158 | for (auto pt: finger_size_right) { 159 | auto pt2 = pose * finger_right * pt; 160 | glVertex3d(pt2.y(), pt2.x(), -pt2.z()); 161 | } 162 | glEnd(); 163 | } 164 | 165 | void draw_box(const BoxData& box_contour, const cv::Mat& image) const { 166 | if (box_contour.contour.size() != 4) { 167 | throw std::runtime_error("Box must have 4 corners currently."); 168 | } 169 | 170 | std::array projection, modelview; 171 | std::array viewport; 172 | glGetDoublev(GL_PROJECTION_MATRIX, projection.data()); 173 | glGetDoublev(GL_MODELVIEW_MATRIX, modelview.data()); 174 | glGetIntegerv(GL_VIEWPORT, viewport.data()); 175 | 176 | std::array coord; 177 | std::vector colors; 178 | colors.resize(4); 179 | 180 | for (size_t i = 0; i < 4; ++i) { 181 | auto& c = box_contour.contour.at(i); 182 | gluProject(c[1], c[0], c[2], modelview.data(), projection.data(), viewport.data(), coord.data(), coord.data() + 1, coord.data() + 2); 183 | colors[i] = ((cv::Vec4f)image.at(coord[1], coord[0])) / (255 * 255); 184 | } 185 | 186 | float r_mean {0}, g_mean {0}, b_mean {0}; 187 | for (auto c: colors) { 188 | r_mean += c[0] / colors.size(); 189 | g_mean += c[1] / colors.size(); 190 | b_mean += c[2] / colors.size(); 191 | } 192 | 193 | auto& c0 = box_contour.contour.at(0); 194 | auto& c1 = box_contour.contour.at(1); 195 | auto& c2 = box_contour.contour.at(2); 196 | auto& c3 = box_contour.contour.at(3); 197 | 198 | glColor3f(r_mean, g_mean, b_mean); 199 | glBegin(GL_QUADS); 200 | glVertex3d(c0[1], 1.0, c0[2]); 201 | glVertex3d(c0[1], c0[0], c0[2]); 202 | glVertex3d(c1[1], c1[0], c1[2]); 203 | glVertex3d(c1[1], 1.0, c1[2]); 204 | 205 | glVertex3d(-1.0, c1[0], c1[2]); 206 | glVertex3d(c1[1], c1[0], c1[2]); 207 | glVertex3d(c2[1], c2[0], c2[2]); 208 | glVertex3d(-1.0, c2[0], c2[2]); 209 | 210 | glVertex3d(c2[1], -1.0, c2[2]); 211 | glVertex3d(c2[1], c2[0], c2[2]); 212 | glVertex3d(c3[1], c3[0], c3[2]); 213 | glVertex3d(c3[1], -1.0, c3[2]); 214 | 215 | glVertex3d(1.0, c3[0], c3[2]); 216 | glVertex3d(c3[1], c3[0], c3[2]); 217 | glVertex3d(c0[1], c0[0], c0[2]); 218 | glVertex3d(1.0, c0[0], c0[2]); 219 | glEnd(); 220 | 221 | glBegin(GL_TRIANGLES); 222 | glVertex3d(c0[1], 1.0, c0[2]); 223 | glVertex3d(c0[1], c1[0], c1[2]); 224 | glVertex3d(-1.0, c1[0], c1[2]); 225 | 226 | glVertex3d(c1[1], -1.0, c1[2]); 227 | glVertex3d(c1[1], c2[0], c2[2]); 228 | glVertex3d(-1.0, c2[0], c2[2]); 229 | 230 | glVertex3d(c2[1], -1.0, c2[2]); 231 | glVertex3d(c2[1], c3[0], c3[2]); 232 | glVertex3d(1.0, c3[0], c3[2]); 233 | 234 | glVertex3d(c3[1], 1.0, c3[2]); 235 | glVertex3d(c3[1], c0[0], c0[2]); 236 | glVertex3d(1.0, c0[0], c0[2]); 237 | glEnd(); 238 | } 239 | 240 | public: 241 | int width, height; 242 | double pixel_size; 243 | double typical_camera_distance; 244 | double depth_diff; 245 | 246 | std::optional box_contour; 247 | std::array camera_position {0.0, 0.0, 0.0}; 248 | cv::Mat color, depth_32f, depth_16u, mask; 249 | 250 | // When no box_data is given: Griffig Main 251 | explicit Renderer(const std::array& size, double typical_camera_distance, double pixel_size, double depth_diff): width(size[0]), height(size[1]), typical_camera_distance(typical_camera_distance), pixel_size(pixel_size), depth_diff(depth_diff) { 252 | init_egl(width, height); 253 | } 254 | 255 | // For C++ camera server, here we don't have model data (No Python wrapper) 256 | explicit Renderer(const std::array& size, const std::array& position): width(size[0]), height(size[1]), camera_position(position) { 257 | init_egl(width, height); 258 | } 259 | 260 | // Recommended constructor: Learned Grasping, Griffig Main 261 | explicit Renderer(const BoxData& box_data, double typical_camera_distance, double pixel_size, double depth_diff): box_contour(box_data), typical_camera_distance(typical_camera_distance), pixel_size(pixel_size), depth_diff(depth_diff) { 262 | const auto size = box_data.get_rect(pixel_size, 5); 263 | width = size[0]; 264 | height = size[1]; 265 | init_egl(width, height); 266 | } 267 | 268 | ~Renderer() { 269 | close_egl(); 270 | } 271 | 272 | void draw_box_on_image(OrthographicImage& image) { 273 | if (width != image.mat.cols || height != image.mat.rows) { 274 | throw std::runtime_error("Renderer size mismatch."); 275 | } 276 | 277 | const cv::Size size {(int)width, (int)height}; 278 | color = cv::Mat::zeros(size, CV_16UC4); 279 | depth_32f = cv::Mat::zeros(size, CV_32FC1); 280 | depth_16u = cv::Mat::zeros(size, CV_16UC4); 281 | mask = cv::Mat::zeros(size, CV_8UC1); 282 | 283 | glEnable(GL_TEXTURE_2D); 284 | glEnable(GL_DEPTH_TEST); 285 | glEnable(GL_STENCIL_TEST); 286 | glBindTexture(GL_TEXTURE_2D, 0); 287 | 288 | glStencilMask(0xFF); 289 | glStencilFunc(GL_ALWAYS, 0xFF, 0xFF); 290 | glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); 291 | 292 | glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 293 | 294 | const double alpha = 1.0 / (2 * image.pixel_size); 295 | glMatrixMode(GL_PROJECTION); 296 | glLoadIdentity(); 297 | glOrtho(alpha * width, -alpha * width, -alpha * height, alpha * height, image.min_depth, image.max_depth); 298 | 299 | glMatrixMode(GL_MODELVIEW); 300 | glLoadIdentity(); 301 | gluLookAt(camera_position[0], camera_position[1], camera_position[2], 0.0, 0.0, 1.0, 0.0, -1.0, 0.0); 302 | 303 | if (box_contour) { 304 | draw_box(*box_contour, image.mat); 305 | } 306 | 307 | glReadPixels(0, 0, mask.cols, mask.rows, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, mask.data); 308 | glReadPixels(0, 0, depth_32f.cols, depth_32f.rows, GL_DEPTH_COMPONENT, GL_FLOAT, depth_32f.data); 309 | glReadPixels(0, 0, color.cols, color.rows, GL_RGBA, GL_UNSIGNED_SHORT, color.data); 310 | 311 | depth_32f = (1 - depth_32f) * 255 * 255; 312 | depth_32f.convertTo(depth_16u, CV_16U); 313 | 314 | const int from_to[] = {0, 3}; 315 | mixChannels(&depth_16u, 1, &color, 1, from_to, 1); 316 | color.copyTo(image.mat, mask); 317 | } 318 | 319 | void draw_gripper_on_image(OrthographicImage& image, const Gripper& gripper, const RobotPose& pose) { 320 | if (width != image.mat.cols || height != image.mat.rows) { 321 | throw std::runtime_error("Renderer size mismatch."); 322 | } 323 | 324 | const cv::Size size {(int)width, (int)height}; 325 | color = cv::Mat::zeros(size, CV_16UC4); 326 | depth_32f = cv::Mat::zeros(size, CV_32FC1); 327 | depth_16u = cv::Mat::zeros(size, CV_16UC4); 328 | mask = cv::Mat::zeros(size, CV_8UC1); 329 | 330 | glEnable(GL_TEXTURE_2D); 331 | glEnable(GL_DEPTH_TEST); 332 | glEnable(GL_STENCIL_TEST); 333 | glBindTexture(GL_TEXTURE_2D, 0); 334 | glBindFramebuffer(GL_FRAMEBUFFER, egl_framebuffer); 335 | 336 | glStencilMask(0xFF); 337 | glStencilFunc(GL_ALWAYS, 0xFF, 0xFF); 338 | glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); 339 | 340 | glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 341 | 342 | const double alpha = 1.0 / (2 * image.pixel_size); 343 | glMatrixMode(GL_PROJECTION); 344 | glLoadIdentity(); 345 | glOrtho(alpha * width, -alpha * width, -alpha * height, alpha * height, image.min_depth, image.max_depth); 346 | 347 | glMatrixMode(GL_MODELVIEW); 348 | glLoadIdentity(); 349 | gluLookAt(camera_position[0], camera_position[1], camera_position[2], 0.0, 0.0, 1.0, 0.0, -1.0, 0.0); 350 | 351 | draw_gripper(pose, gripper); 352 | 353 | glReadPixels(0, 0, mask.cols, mask.rows, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, mask.data); 354 | glReadPixels(0, 0, depth_32f.cols, depth_32f.rows, GL_DEPTH_COMPONENT, GL_FLOAT, depth_32f.data); 355 | glReadPixels(0, 0, color.cols, color.rows, GL_RGBA, GL_UNSIGNED_SHORT, color.data); 356 | 357 | depth_32f = (1 - depth_32f) * 255 * 255; 358 | depth_32f.convertTo(depth_16u, CV_16U); 359 | 360 | const int from_to[] = {0, 3}; 361 | mixChannels(&depth_16u, 1, &color, 1, from_to, 1); 362 | color.copyTo(image.mat, mask); 363 | } 364 | 365 | bool check_gripper_collision(OrthographicImage& image, const Gripper& gripper, const RobotPose& pose) { 366 | int width = image.mat.cols; 367 | int height = image.mat.rows; 368 | cv::Mat depth_32f = cv::Mat::zeros(cv::Size(width, height), CV_32FC1); 369 | 370 | glClearDepth(0.0); 371 | 372 | glClear(GL_DEPTH_BUFFER_BIT); 373 | glEnable(GL_DEPTH_TEST); 374 | glDepthFunc(GL_GREATER); 375 | 376 | const double alpha = 1.0 / (2 * image.pixel_size); 377 | glMatrixMode(GL_PROJECTION); 378 | glLoadIdentity(); 379 | glOrtho(alpha * width, -alpha * width, -alpha * height, alpha * height, image.min_depth, image.max_depth); 380 | 381 | glMatrixMode(GL_MODELVIEW); 382 | glLoadIdentity(); 383 | gluLookAt(camera_position[0], camera_position[1], camera_position[2], 0, 0, 1, 0, -1, 0); 384 | 385 | glBegin(GL_QUADS); 386 | 387 | // Render fingers 388 | std::array finger_box = {gripper.finger_width, gripper.finger_extent, gripper.finger_height}; 389 | draw_cube(image.pose.inverse() * pose * Affine(0.0, pose.d / 2, 0.0), finger_box); 390 | draw_cube(image.pose.inverse() * pose * Affine(0.0, -pose.d / 2, 0.0), finger_box); 391 | 392 | // Render gripper 393 | // if (gripper_size[0] > 0.0) { 394 | // draw_cube(image.pose.inverse() * pose * Affine(0.0, 0.0, finger_height), gripper_size); 395 | // } 396 | 397 | glEnd(); 398 | 399 | glPixelStorei(GL_PACK_ALIGNMENT, (depth_32f.step & 3) ? 1 : 4); 400 | glPixelStorei(GL_PACK_ROW_LENGTH, depth_32f.step / depth_32f.elemSize()); 401 | glReadPixels(0, 0, depth_32f.cols, depth_32f.rows, GL_DEPTH_COMPONENT, GL_FLOAT, depth_32f.data); 402 | 403 | depth_32f = (1 - depth_32f); 404 | 405 | cv::Mat image_16u = cv::Mat::zeros(cv::Size(width, height), CV_16UC1); 406 | cv::Mat image_32f = cv::Mat::zeros(cv::Size(width, height), CV_32FC1); 407 | cv::extractChannel(image.mat, image_16u, 3); 408 | image_16u.convertTo(image_32f, CV_32F); 409 | image_32f /= 255 * 255; 410 | 411 | bool no_collision = cv::checkRange(depth_32f - image_32f, true, 0, 0.0, 1.01); 412 | return !no_collision; 413 | } 414 | 415 | template 416 | OrthographicImage render_pointcloud(const Pointcloud& cloud) { 417 | const double min_depth = typical_camera_distance - depth_diff; 418 | const double max_depth = typical_camera_distance; 419 | 420 | return render_pointcloud(cloud, pixel_size, min_depth, max_depth); 421 | } 422 | 423 | template 424 | OrthographicImage render_pointcloud(const Pointcloud& cloud, double pixel_density, double min_depth, double max_depth) { 425 | cv::Mat mat = render_pointcloud_mat(cloud, pixel_density, min_depth, max_depth, camera_position); 426 | return OrthographicImage(mat, pixel_density, min_depth, max_depth); 427 | } 428 | 429 | template 430 | cv::Mat render_pointcloud_mat(const Pointcloud& cloud, double pixel_density, double min_depth, double max_depth, const std::array& camera_position) { 431 | cv::Size size {(int)width, (int)height}; 432 | color = cv::Mat::zeros(size, CV_16UC4); 433 | depth_32f = cv::Mat::zeros(size, CV_32FC1); 434 | depth_16u = cv::Mat::zeros(size, CV_16UC1); 435 | 436 | if (!cloud.size) { 437 | if constexpr (draw_texture) { 438 | return color; 439 | } else { 440 | return depth_16u; 441 | } 442 | } 443 | 444 | glEnable(GL_TEXTURE_2D); 445 | glEnable(GL_DEPTH_TEST); 446 | glBindTexture(GL_TEXTURE_2D, 0); 447 | 448 | glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); 449 | 450 | const double alpha = 1.0 / (2 * pixel_density); 451 | glMatrixMode(GL_PROJECTION); 452 | glLoadIdentity(); 453 | glOrtho(alpha * size.width, -alpha * size.width, -alpha * size.height, alpha * size.height, min_depth, max_depth); 454 | 455 | glMatrixMode(GL_MODELVIEW); 456 | glLoadIdentity(); 457 | gluLookAt(camera_position[0], camera_position[1], camera_position[2], 0.0, 0.0, 1.0, 0.0, -1.0, 0.0); 458 | 459 | if constexpr (draw_texture) { 460 | const float tex_border_color[] = { 0.8f, 0.8f, 0.8f, 0.8f }; 461 | 462 | glEnable(GL_TEXTURE_2D); 463 | glBindTexture(GL_TEXTURE_2D, cloud.tex.get_gl_handle()); 464 | 465 | glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, tex_border_color); 466 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, 0x812F); 467 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, 0x812F); 468 | } 469 | 470 | glEnable(GL_POINT_SMOOTH); 471 | 472 | // glPointSize((float)size.width / 640); 473 | auto color_format = GL_BGRA; 474 | 475 | // GLuint vertBufID {0}; 476 | // glGenBuffers(1, &vertBufID); 477 | // glBindBuffer(GL_ARRAY_BUFFER, vertBufID); 478 | // glBufferData(GL_ARRAY_BUFFER, cloud.size*sizeof(PointTypes::XYZ), cloud.vertices, GL_STATIC_DRAW); 479 | 480 | // glBindBuffer(GL_ARRAY_BUFFER, vertBufID); 481 | // int stride = sizeof(PointTypes::XYZ); 482 | // char *offset = (char*)NULL; 483 | 484 | // glVertexPointer(3, GL_FLOAT, stride, offset); 485 | // glEnableClientState(GL_VERTEX_ARRAY); 486 | 487 | // glDrawArrays(GL_POINTS, 0, cloud.size); 488 | // glDisableClientState(GL_VERTEX_ARRAY); 489 | 490 | glBegin(GL_POINTS); 491 | { 492 | if (cloud.point_type == PointType::XYZ) { 493 | for (size_t i = 0; i < cloud.size; ++i) { 494 | glVertex3fv(&((PointTypes::XYZ *)cloud.vertices + i)->x); 495 | if constexpr (draw_texture) { 496 | glTexCoord2fv(&((PointTypes::UV *)cloud.tex_coords + i)->u); 497 | } 498 | } 499 | 500 | } else if (cloud.point_type == PointType::XYZWRGBA) { 501 | color_format = GL_RGBA; 502 | 503 | for (size_t i = 0; i < cloud.size; ++i) { 504 | glVertex3fv(&((PointTypes::XYZWRGBA *)cloud.vertices + i)->x); 505 | if constexpr (draw_texture) { 506 | glColor3ubv(&((PointTypes::XYZWRGBA *)cloud.vertices + i)->r); 507 | } 508 | } 509 | } 510 | } 511 | glEnd(); 512 | 513 | glPixelStorei(GL_PACK_ALIGNMENT, (color.step & 3) ? 1 : 4); 514 | glReadPixels(0, 0, depth_32f.cols, depth_32f.rows, GL_DEPTH_COMPONENT, GL_FLOAT, depth_32f.data); 515 | 516 | depth_32f = (1 - depth_32f) * 255 * 255; 517 | depth_32f.convertTo(depth_16u, CV_16U); 518 | 519 | if constexpr (!draw_texture) { 520 | cv::cvtColor(depth_16u, depth_16u, cv::COLOR_RGB2GRAY); 521 | return depth_16u; 522 | } 523 | 524 | glReadPixels(0, 0, color.cols, color.rows, color_format, GL_UNSIGNED_SHORT, color.data); 525 | 526 | const int from_to[] = {0, 3}; 527 | mixChannels(&depth_16u, 1, &color, 1, from_to, 1); 528 | return color; 529 | } 530 | }; -------------------------------------------------------------------------------- /include/griffig/robot_pose.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | 6 | struct RobotPose: affx::Affine { 7 | double d; 8 | 9 | explicit RobotPose(double x, double y, double z, double a, double b, double c, double d): affx::Affine(x, y, z, a, b, c), d(d) { } 10 | explicit RobotPose(double x, double y, double z, double q_w, double q_x, double q_y, double q_z, double d): affx::Affine(x, y, z, q_w, q_x, q_y, q_z), d(d) { } 11 | explicit RobotPose(const affx::Affine& affine, double d): affx::Affine(affine), d(d) { } 12 | 13 | RobotPose operator *(const affx::Affine& a) const { 14 | affx::Affine result = Affine::operator *(a); 15 | return RobotPose(result, d); 16 | } 17 | 18 | friend RobotPose operator*(const affx::Affine& a, const RobotPose &p) { 19 | affx::Affine result = a.operator *(p); 20 | return RobotPose(result, p.d); 21 | } 22 | 23 | std::string toString() const { 24 | return affx::Affine::toString() + ", d: " + std::to_string(d); 25 | } 26 | }; 27 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | requires-python = ">=3.6" 3 | 4 | 5 | [build-system] 6 | requires = ["setuptools>=42", "wheel", "cmake>=3.14", "oldest-supported-numpy"] 7 | build-backend = "setuptools.build_meta" 8 | 9 | 10 | [tool.cibuildwheel] 11 | before-build = "sh .github/workflows/install-base.sh" 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | . -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.version import LooseVersion 2 | import os 3 | import re 4 | import subprocess 5 | import sys 6 | 7 | from setuptools import setup, Extension, find_packages 8 | from setuptools.command.build_ext import build_ext 9 | 10 | 11 | with open('README.md', 'r') as readme_file: 12 | long_description = readme_file.read() 13 | 14 | 15 | class CMakeExtension(Extension): 16 | def __init__(self, name, sourcedir=''): 17 | Extension.__init__(self, name, sources=[]) 18 | self.sourcedir = os.path.abspath(sourcedir) 19 | 20 | 21 | class CMakeBuild(build_ext): 22 | def run(self): 23 | try: 24 | out = subprocess.check_output(['cmake', '--version']) 25 | except OSError as err: 26 | raise RuntimeError( 27 | 'CMake must be installed to build the following extensions: ' + 28 | ', '.join(e.name for e in self.extensions) 29 | ) from err 30 | 31 | cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1)) 32 | if cmake_version < LooseVersion('3.14.0'): 33 | raise RuntimeError('CMake >= 3.14.0 is required') 34 | 35 | for ext in self.extensions: 36 | self.build_extension(ext) 37 | 38 | def build_extension(self, ext): 39 | extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) 40 | 41 | # required for auto-detection of auxiliary "native" libs 42 | if not extdir.endswith(os.path.sep): 43 | extdir += os.path.sep 44 | 45 | build_type = os.environ.get('BUILD_TYPE', 'Release') 46 | build_args = [ 47 | '--config', build_type, 48 | '--', '-j2', 49 | ] 50 | 51 | # Pile all .so in one place and use $ORIGIN as RPATH 52 | cmake_args = [ 53 | '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, 54 | '-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=' + extdir, 55 | '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE=' + extdir, 56 | '-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE=' + extdir, 57 | f'-DPYBIND11_PYTHON_VERSION={sys.version_info.major}.{sys.version_info.minor}', 58 | '-DCMAKE_BUILD_WITH_INSTALL_RPATH=TRUE', 59 | '-DCMAKE_INSTALL_RPATH=$ORIGIN', 60 | '-DCMAKE_BUILD_TYPE=' + build_type, 61 | '-DUSE_INTERNAL_PYBIND11=ON', 62 | ] 63 | 64 | if not os.path.exists(self.build_temp): 65 | os.makedirs(self.build_temp) 66 | 67 | subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp) 68 | subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) 69 | 70 | setup( 71 | name='griffig', 72 | version='0.0.11', 73 | description='Robotic Manipulation Learned from Imitation and Self-Supervision', 74 | long_description=long_description, 75 | long_description_content_type='text/markdown', 76 | author='Lars Berscheid', 77 | author_email='lars.berscheid@kit.edu', 78 | url='https://github.com/pantor/griffig', 79 | packages=find_packages(), 80 | license='MIT', 81 | ext_modules=[CMakeExtension('_griffig'), CMakeExtension('pyaffx')], 82 | cmdclass=dict(build_ext=CMakeBuild), 83 | keywords=['robot', 'robotics', 'grasping', 'robot-learning'], 84 | classifiers=[ 85 | 'Development Status :: 3 - Alpha', 86 | 'Intended Audience :: Science/Research', 87 | 'Topic :: Scientific/Engineering', 88 | 'License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)', 89 | 'Programming Language :: C++', 90 | ], 91 | setup_requires=[ 92 | 'setuptools>=18.0', 93 | 'cmake>=3.14', 94 | 'numpy', 95 | ], 96 | install_requires=[ 97 | 'loguru', 98 | 'tensorflow>=2.4', 99 | 'opencv-python', 100 | 'numpy', 101 | 'scipy>=1.5', 102 | 'Pillow', 103 | ], 104 | python_requires='>=3.6', 105 | ) 106 | -------------------------------------------------------------------------------- /src/griffig.cpp: -------------------------------------------------------------------------------- 1 | #include -------------------------------------------------------------------------------- /src/python.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | 10 | namespace py = pybind11; 11 | using namespace pybind11::literals; // to bring in the `_a` literal 12 | using Affine = affx::Affine; 13 | 14 | 15 | PYBIND11_MODULE(_griffig, m) { 16 | NDArrayConverter::init_numpy(); 17 | 18 | py::class_(m, "BoxData") 19 | .def(py::init>&, const std::optional&>(), "contour"_a, "pose"_a = std::nullopt) 20 | .def(py::init&, const std::array&, const std::optional&>(), "center"_a, "size"_a, "pose"_a = std::nullopt) 21 | .def_readwrite("contour", &BoxData::contour) 22 | .def_readwrite("pose", &BoxData::pose) 23 | .def("get_rect", &BoxData::get_rect, "pixel_size"_a, "offset"_a=0) 24 | .def("is_pose_inside", &BoxData::is_pose_inside, "pose"_a) 25 | .def("as_dict", [](BoxData self) { 26 | py::dict d; 27 | d["contour"] = self.contour; 28 | return d; 29 | }) 30 | .def("__repr__", &BoxData::toString); 31 | 32 | py::class_(m, "Grasp", py::dynamic_attr()) 33 | .def(py::init(), "pose"_a=Affine(), "stroke"_a=0.0, "index"_a=0, "estimated_reward"_a=0.0) 34 | .def_readwrite("pose", &Grasp::pose) 35 | .def_readwrite("stroke", &Grasp::stroke) 36 | .def_readwrite("index", &Grasp::index) 37 | .def_readwrite("estimated_reward", &Grasp::estimated_reward) 38 | .def_readwrite("detail_durations", &Grasp::detail_durations) 39 | .def_readwrite("calculation_duration", &Grasp::calculation_duration) 40 | .def("__repr__", &Grasp::toString); 41 | 42 | py::class_(m, "Gripper") 43 | .def(py::init(), "min_stroke"_a = 0.0, "max_stroke"_a = std::numeric_limits::infinity(), "finger_width"_a = 0.0, "finger_extent"_a = 0.0, "finger_height"_a = 0.0, "offset"_a = Affine()) 44 | .def_readwrite("min_stroke", &Gripper::min_stroke) 45 | .def_readwrite("max_stroke", &Gripper::max_stroke) 46 | .def_readwrite("finger_width", &Gripper::finger_width) 47 | .def_readwrite("finger_extent", &Gripper::finger_extent) 48 | .def_readwrite("finger_height", &Gripper::finger_height) 49 | .def_readwrite("offset", &Gripper::offset) 50 | .def("consider_indices", &Gripper::consider_indices); 51 | 52 | py::class_(m, "OrthographicImage") 53 | .def(py::init&, const Affine&>(), "mat"_a, "pixel_size"_a, "min_depth"_a, "max_depth"_a, "camera"_a=std::nullopt, "pose"_a=Affine()) 54 | .def_readwrite("mat", &OrthographicImage::mat) 55 | .def_readwrite("pixel_size", &OrthographicImage::pixel_size) 56 | .def_readwrite("min_depth", &OrthographicImage::min_depth) 57 | .def_readwrite("max_depth", &OrthographicImage::max_depth) 58 | .def_readwrite("camera", &OrthographicImage::camera) 59 | .def_readwrite("pose", &OrthographicImage::pose) 60 | .def("clone", [](py::object self) { 61 | py::object b = py::cast(OrthographicImage(self.cast())); 62 | b.attr("mat") = self.attr("mat").attr("copy")(); 63 | return b; 64 | }) 65 | .def("depth_from_value", &OrthographicImage::depthFromValue) 66 | .def("value_from_depth", &OrthographicImage::valueFromDepth) 67 | .def("project", &OrthographicImage::project) 68 | .def("inverse_project", &OrthographicImage::inverse_project) 69 | .def("position_from_index", &OrthographicImage::positionFromIndex) 70 | .def("index_from_position", &OrthographicImage::indexFromPosition); 71 | 72 | py::enum_(m, "PointType") 73 | .value("XYZ", PointType::XYZ) 74 | .value("XYZRGB", PointType::XYZRGB) 75 | .value("XYZWRGBA", PointType::XYZWRGBA) 76 | .value("UV", PointType::UV) 77 | .export_values(); 78 | 79 | py::class_(m, "Pointcloud") 80 | .def(py::init([](py::object frames) { 81 | py::object pc = py::module_::import("pyrealsense2").attr("pointcloud")(); 82 | py::object depth = frames.attr("get_depth_frame")(); 83 | py::object color = frames.attr("get_color_frame")(); 84 | 85 | py::object points = pc.attr("calculate")(depth); 86 | pc.attr("map_to")(color); 87 | 88 | size_t size = points.attr("size")().cast(); 89 | int width = color.attr("get_width")().cast(); 90 | int height = color.attr("get_height")().cast(); 91 | 92 | py::buffer vertices_buf = points.attr("get_vertices")(); 93 | py::buffer tex_buf = color.attr("get_data")(); 94 | py::buffer tex_coords_buf = points.attr("get_texture_coordinates")(); 95 | 96 | const void* vertices = vertices_buf.request().ptr; 97 | const void* texture = tex_buf.request().ptr; 98 | const void* tex_coords = tex_coords_buf.request().ptr; 99 | 100 | auto result = std::make_unique(size, width, height, vertices, texture, tex_coords); 101 | result->pc = std::move(pc); 102 | return result; 103 | }), py::kw_only(), "realsense_frames"_a=py::none()) 104 | .def(py::init([](py::object ros_message) { 105 | py::object data = ros_message.attr("data"); 106 | const size_t point_size = sizeof(PointTypes::XYZWRGBA); 107 | py::buffer_info info(py::buffer((py::bytes)data).request()); 108 | return std::make_unique(static_cast(info.size) / point_size, PointType::XYZWRGBA, info.ptr); 109 | }), py::kw_only(), "ros_message"_a=py::none()) 110 | .def(py::init([](PointType type, py::object data) { 111 | size_t point_size; 112 | switch (type) { 113 | default: 114 | case PointType::XYZWRGBA: { point_size = sizeof(PointTypes::XYZWRGBA); } break; 115 | case PointType::XYZ: { point_size = sizeof(PointTypes::XYZ); } break; 116 | case PointType::XYZRGB: { point_size = sizeof(PointTypes::XYZRGB); } break; 117 | } 118 | 119 | py::buffer_info info(py::buffer((py::bytes)data).request()); 120 | return std::make_unique(static_cast(info.size) / point_size, type, info.ptr); 121 | }), py::kw_only(), "type"_a=PointType::XYZWRGBA, "data"_a=py::none()) 122 | .def_readonly("size", &Pointcloud::size) 123 | .def_readonly("point_type", &Pointcloud::point_type); 124 | 125 | py::class_(m, "RobotPose") 126 | .def(py::init(), "affine"_a, "d"_a) 127 | .def(py::init(), "x"_a=0.0, "y"_a=0.0, "z"_a=0.0, "a"_a=0.0, "b"_a=0.0, "c"_a=0.0, "d"_a=0.0) 128 | .def(py::init(), "x"_a, "y"_a, "z"_a, "q_w"_a, "q_x"_a, "q_y"_a, "q_z"_a, "d"_a) 129 | .def(py::init([](py::dict d) { 130 | if (d.contains("q_x")) { // Prefer quaternion construction 131 | return RobotPose(d["x"].cast(), d["y"].cast(), d["z"].cast(), d["q_w"].cast(), d["q_x"].cast(), d["q_y"].cast(), d["q_z"].cast(), d["d"].cast()); 132 | } 133 | return RobotPose(d["x"].cast(), d["y"].cast(), d["z"].cast(), d["a"].cast(), d["b"].cast(), d["c"].cast(), d["d"].cast()); 134 | })) 135 | .def_readwrite("d", &RobotPose::d) 136 | .def(py::self * Affine()) 137 | .def(Affine() * py::self) 138 | .def("__repr__", &RobotPose::toString) 139 | .def("as_dict", [](RobotPose self) { 140 | auto translation = self.translation(); 141 | auto quaternion = self.quaternion(); 142 | 143 | py::dict d; 144 | d["x"] = translation.x(); 145 | d["y"] = translation.y(); 146 | d["z"] = translation.z(); 147 | d["a"] = self.a(); 148 | d["b"] = self.b(); 149 | d["c"] = self.c(); 150 | d["q_w"] = quaternion.w(); 151 | d["q_x"] = quaternion.x(); 152 | d["q_y"] = quaternion.y(); 153 | d["q_z"] = quaternion.z(); 154 | d["d"] = self.d; 155 | return d; 156 | }); 157 | 158 | py::class_(m, "Renderer") 159 | .def(py::init&, double, double, double>(), "size"_a, "typical_camera_distance"_a, "pixel_size"_a, "depth_diff"_a) 160 | .def(py::init(), "box_data"_a, "typical_camera_distance"_a, "pixel_size"_a, "depth_diff"_a) 161 | .def("draw_gripper_on_image", &Renderer::draw_gripper_on_image, "image"_a, "gripper"_a, "pose"_a) 162 | .def("draw_box_on_image", &Renderer::draw_box_on_image, "image"_a) 163 | .def("check_gripper_collision", &Renderer::check_gripper_collision, "image"_a, "gripper"_a, "pose"_a) 164 | .def("render_pointcloud", static_cast(&Renderer::render_pointcloud)) 165 | .def("render_pointcloud", static_cast(&Renderer::render_pointcloud)) 166 | .def("render_pointcloud_mat", &Renderer::render_pointcloud_mat, "pointcloud"_a, "pixel_size"_a, "min_depth"_a, "max_depth"_a, "camera_position"_a = (std::array){0.0, 0.0, 0.0}) 167 | .def("render_depth_pointcloud_mat", &Renderer::render_pointcloud_mat) 168 | .def_readwrite("camera_position", &Renderer::camera_position) 169 | .def_readwrite("box_data", &Renderer::box_contour) 170 | .def_readwrite("pixel_size", &Renderer::pixel_size) 171 | .def_readwrite("typical_camera_distance", &Renderer::typical_camera_distance) 172 | .def_readwrite("depth_diff", &Renderer::depth_diff) 173 | .def_readwrite("width", &Renderer::width) 174 | .def_readwrite("height", &Renderer::height); 175 | } 176 | -------------------------------------------------------------------------------- /test/data/1-rc.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pantor/griffig/0b10ef5d69902b14a4d648a809a51933a8f5fe8a/test/data/1-rc.jpeg -------------------------------------------------------------------------------- /test/data/1-rd.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pantor/griffig/0b10ef5d69902b14a4d648a809a51933a8f5fe8a/test/data/1-rd.jpeg -------------------------------------------------------------------------------- /test/loader.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import cv2 4 | import numpy as np 5 | 6 | from griffig import OrthographicImage 7 | 8 | 9 | class Loader: 10 | data_path = Path(__file__).parent.absolute() / 'data' 11 | 12 | @classmethod 13 | def get_image(cls, episode_id: str): 14 | mat_color = cv2.imread(str(cls.data_path / f'{episode_id}-rc.jpeg'), cv2.IMREAD_UNCHANGED).astype(np.uint16) * 255 15 | mat_depth = cv2.imread(str(cls.data_path / f'{episode_id}-rd.jpeg'), cv2.IMREAD_UNCHANGED).astype(np.uint16) * 255 16 | mat_depth = cv2.cvtColor(mat_depth, cv2.COLOR_BGR2GRAY) 17 | 18 | mat = np.concatenate([mat_color, np.expand_dims(mat_depth, axis=2)], axis=2) 19 | return OrthographicImage(mat, 2000.0, 0.22, 0.41) 20 | -------------------------------------------------------------------------------- /test/test_box.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import unittest 3 | 4 | import cv2 5 | 6 | from griffig import BoxData, Grasp, Griffig, RobotPose 7 | 8 | from loader import Loader 9 | 10 | 11 | class BoxTestCase(unittest.TestCase): 12 | output_path = Path(__file__).parent / 'data' 13 | 14 | def setUp(self): 15 | self.box_data = BoxData([-0.002, -0.0065, 0.0], [0.174, 0.282, 0.0]) 16 | 17 | def test_draw_box_on_image(self): 18 | self.assertTrue(self.box_data.is_pose_inside(RobotPose(x=0.04, y=-0.01, z=-0.34, d=0.05))) 19 | self.assertFalse(self.box_data.is_pose_inside(RobotPose(x=0.08, y=-0.01, z=-0.34, a=0.4, d=0.05))) 20 | self.assertTrue(self.box_data.is_pose_inside(RobotPose(x=0.02, y=-0.12, z=-0.34, a=-1.0, b=-0.3, d=0.07))) 21 | self.assertFalse(self.box_data.is_pose_inside(RobotPose(x=0.02, y=-0.18, z=-0.34, a=-1.0, b=-0.3, d=0.01))) 22 | self.assertTrue(self.box_data.is_pose_inside(RobotPose(x=0.02, y=-0.10, z=-0.34, a=-1.4, b=-0.2, d=0.03))) 23 | self.assertFalse(self.box_data.is_pose_inside(RobotPose(x=0.02, y=-0.10, z=-0.34, a=-1.4, b=0.4, d=0.03))) 24 | 25 | # image = Loader.get_image('1') 26 | # grasp = Grasp(pose=RobotPose(x=0.02, y=-0.10, z=-0.34, a=-1.4, b=0.4), stroke=0.03) 27 | # Griffig.draw_grasp_on_image(image, grasp) 28 | # Griffig.draw_box_on_image(image, self.box_data, draw_lines=True) 29 | # print(self.box_data.is_pose_inside(RobotPose(grasp.pose, d=grasp.stroke))) 30 | # cv2.imwrite(str(self.output_path / 'image-box-check-d.jpg'), image.mat[:, :, 3] / 255) 31 | 32 | 33 | if __name__ == '__main__': 34 | unittest.main() 35 | -------------------------------------------------------------------------------- /test/test_draw.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import unittest 3 | 4 | import cv2 5 | 6 | from griffig import BoxData, Grasp, Griffig, Gripper, Renderer, RobotPose 7 | 8 | from loader import Loader 9 | 10 | 11 | class DrawTestCase(unittest.TestCase): 12 | output_path = Path(__file__).parent.absolute() / 'data' 13 | 14 | def setUp(self): 15 | self.box_data = BoxData([-0.002, -0.0065, 0.0], [0.174, 0.282, 0.0]) 16 | self.renderer = Renderer((752, 480), 0.41, 2000.0, 0.19) 17 | self.gripper = Gripper(min_stroke=0.0, max_stroke=0.086, finger_width=0.024, finger_extent=0.008) 18 | 19 | def test_draw_box_on_image(self): 20 | image = Loader.get_image('1') 21 | Griffig.draw_box_on_image(image, self.box_data) 22 | img_c = Griffig.convert_to_pillow_image(image, channels='RGB') 23 | img_c.save(self.output_path / 'image-box-c.png') 24 | img_d = Griffig.convert_to_pillow_image(image, channels='D') 25 | img_d.save(self.output_path / 'image-box-d.png') 26 | 27 | def test_draw_gipper_on_image(self): 28 | image = Loader.get_image('1') 29 | pose = RobotPose(x=0.04, y=-0.01, z=-0.34, a=0.0, b=-0.3, d=0.05) 30 | 31 | self.renderer.draw_gripper_on_image(image, self.gripper, pose) 32 | img_c = Griffig.convert_to_pillow_image(image, channels='RGB') 33 | img_c.save(self.output_path / 'image-gripper-c.png') 34 | img_d = Griffig.convert_to_pillow_image(image, channels='D') 35 | img_d.save(self.output_path / 'image-gripper-d.png') 36 | 37 | def test_draw_pose_on_image(self): 38 | image = Loader.get_image('1') 39 | grasp = Grasp(pose=RobotPose(x=0.04, y=-0.01, z=-0.34, a=0.0, b=-0.3), stroke=0.05) 40 | 41 | img = Griffig.draw_grasp_on_image(image, grasp) 42 | img.save(self.output_path / 'image-pose-cd.png') 43 | img_c = Griffig.draw_grasp_on_image(image, grasp, channels='RGB') 44 | img_c.save(self.output_path / 'image-pose-c.png') 45 | img_c = Griffig.draw_grasp_on_image(image, grasp, channels='D') 46 | img_c.save(self.output_path / 'image-pose-d.png') 47 | 48 | def test_check_collision(self): 49 | image = Loader.get_image('1') 50 | pose1 = RobotPose(x=0.04, y=-0.01, z=-0.34, a=0.0, b=-0.3, d=0.05) 51 | self.assertFalse(self.renderer.check_gripper_collision(image, self.gripper, pose1)) 52 | 53 | pose2 = RobotPose(x=0.04, y=0.0, z=-0.34, a=0.0, b=-0.2, d=0.05) 54 | self.assertTrue(self.renderer.check_gripper_collision(image, self.gripper, pose2)) 55 | 56 | 57 | if __name__ == '__main__': 58 | unittest.main() 59 | --------------------------------------------------------------------------------