├── .github
└── workflows
│ ├── ci.yml
│ ├── install-base.sh
│ └── publish.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── doc
├── base-dockerfile
├── input.jpeg
└── systemnew-sm.JPG
├── examples
├── grasp_depth_image.py
├── grasp_realsense.py
├── loader.py
├── render_heatmap.py
└── render_pointcloud.py
├── griffig
├── __init__.py
├── action
│ ├── __init__.py
│ ├── checker.py
│ └── converter.py
├── griffig.py
├── infer
│ ├── __init__.py
│ ├── inference.py
│ ├── inference_actor_critic.py
│ ├── inference_base.py
│ ├── inference_planar.py
│ ├── inference_semantic.py
│ └── selection.py
└── utility
│ ├── __init__.py
│ ├── box_finder.py
│ ├── heatmap.py
│ ├── image.py
│ ├── model_data.py
│ └── model_library.py
├── include
└── griffig
│ ├── box_data.hpp
│ ├── grasp.hpp
│ ├── griffig.hpp
│ ├── gripper.hpp
│ ├── ndarray_converter.hpp
│ ├── orthographic_image.hpp
│ ├── pointcloud.hpp
│ ├── renderer.hpp
│ └── robot_pose.hpp
├── pyproject.toml
├── requirements.txt
├── setup.py
├── src
├── griffig.cpp
└── python.cpp
└── test
├── data
├── 1-rc.jpeg
└── 1-rd.jpeg
├── loader.py
├── test_box.py
└── test_draw.py
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 |
9 | container:
10 | image: griffig.xyz/base
11 | credentials:
12 | username: ${{ secrets.DOCKER_REGISTRY_USER }}
13 | password: ${{ secrets.DOCKER_REGISTRY_PASSWORD }}
14 |
15 | steps:
16 | - name: Checkout repository and submodules
17 | uses: actions/checkout@v2
18 | with:
19 | submodules: recursive
20 |
21 | - name: Build and test
22 | run: |
23 | pip install .
24 | python3 test/test_box.py
25 |
--------------------------------------------------------------------------------
/.github/workflows/install-base.sh:
--------------------------------------------------------------------------------
1 | # Fix Cmake find *.so.0
2 | ln -s /usr/lib64/libOpenGL.so.0 /usr/lib64/libOpenGL.so
3 |
4 | # Fix CMake find Numpy
5 | python3.9 -m pip install --no-cache-dir numpy
6 | sed -i '3092s/.*/"import sys; import numpy; sys.stdout.write(numpy.get_include())"/' /opt/_internal/tools/lib/python3.9/site-packages/cmake/data/share/cmake-3.21/Modules/FindPython/Support.cmake
7 |
8 | # Install Eigen
9 | git clone https://gitlab.com/libeigen/eigen.git
10 | cd eigen
11 | git checkout 3.3.9
12 | mkdir build && cd build
13 | cmake ..
14 | make -j2
15 | make install
16 | cd ../../
17 |
18 | # Checkout PyBind11
19 | git clone https://github.com/pybind/pybind11.git
20 | cd pybind11
21 | git checkout v2.6.2
22 | cd ../
23 |
24 | # Install OpenCV
25 | git clone https://github.com/opencv/opencv.git
26 | cd opencv
27 | git checkout 4.5.2
28 | mkdir build && cd build
29 | cmake -DCMAKE_INSTALL_PREFIX=/usr -DWITH_VTK=OFF -DWITH_GTK=OFF -DWITH_PROTOBUF=OFF -DBUILD_PERF_TESTS=OFF -DBUILD_TESTS=OFF -DWITH_WEBP=OFF -DBUILD_opencv_ml=OFF -DBUILD_opencv_calib3d=OFF -DBUILD_opencv_videoio=OFF -DBUILD_opencv_gapi=OFF -DBUILD_opencv_stitching=OFF -DBUILD_opencv_objdetect=OFF -DBUILD_opencv_flann=OFF -DBUILD_opencv_video=OFF -DBUILD_opencv_features2d=OFF ..
30 | make -j2
31 | make install
32 | cd ../../
33 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Upload Python Package
2 |
3 | # on: [push, pull_request]
4 | on:
5 | release:
6 | types: [created]
7 |
8 | jobs:
9 | deploy:
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - name: Checkout repository and submodules
14 | uses: actions/checkout@v2
15 | with:
16 | submodules: recursive
17 |
18 | - name: Set up Python
19 | uses: actions/setup-python@v2
20 | with:
21 | python-version: 3.9
22 |
23 | - name: Install dependencies
24 | run: |
25 | python3 -m pip install --upgrade pip
26 | python3 -m pip install setuptools wheel twine --upgrade
27 |
28 | - name: Build source wheels
29 | run: |
30 | python3 setup.py sdist
31 |
32 | # - name: Build manylinux wheels
33 | # uses: RalfG/python-wheels-manylinux-build@v0.3.4
34 | # with:
35 | # python-versions: 'cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39'
36 | # build-requirements: 'numpy'
37 | # system-packages: 'cmake libglvnd-opengl mesa-libGL mesa-libGL-devel mesa-libGLU mesa-libGLU-devel mesa-libEGL mesa-libEGL-devel glew glew-devel'
38 | # pre-build-command: 'sh .github/workflows/install-base.sh'
39 |
40 | - name: Publish
41 | env:
42 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
43 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
44 | run: |
45 | twine upload dist/*.tar.gz
46 | # twine upload dist/*-manylinux*.whl
47 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | .vscode
3 | .mypy_cache
4 | __pycache__
5 | *.pyc
6 |
7 | test/data/*
8 | !test/data/1-rc.jpeg
9 | !test/data/1-rd.jpeg
10 |
11 | dist
12 | *.egg-info
13 | wheels
14 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "affx"]
2 | path = affx
3 | url = https://github.com/pantor/affx.git
4 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.14)
2 |
3 |
4 | project(griffig VERSION 0.0.11 LANGUAGES CXX)
5 | include(FetchContent)
6 |
7 |
8 | option(USE_INTERNAL_PYBIND11 "Build python module" OFF)
9 |
10 |
11 | find_package(Eigen3 REQUIRED)
12 | find_package(GLEW REQUIRED)
13 | find_package(OpenCV REQUIRED COMPONENTS core imgproc)
14 | find_package(OpenGL REQUIRED COMPONENTS EGL)
15 | find_package(Python3 REQUIRED COMPONENTS NumPy)
16 |
17 |
18 | if(USE_INTERNAL_PYBIND11)
19 | FetchContent_Declare(pybind11_fetch GIT_REPOSITORY https://github.com/pybind/pybind11.git GIT_TAG v2.6.2)
20 | FetchContent_MakeAvailable(pybind11_fetch)
21 | else()
22 | find_package(pybind11 REQUIRED)
23 | endif()
24 |
25 |
26 | add_subdirectory(affx)
27 |
28 |
29 | add_library(griffig src/griffig.cpp)
30 | target_compile_features(griffig PUBLIC cxx_std_17)
31 | target_include_directories(griffig PUBLIC include)
32 | target_link_libraries(griffig PUBLIC affx opencv_core opencv_imgproc OpenGL::EGL OpenGL::GLU GLEW::GLEW pybind11::embed Python3::NumPy)
33 |
34 |
35 | pybind11_add_module(_griffig src/python.cpp)
36 | target_link_libraries(_griffig PUBLIC griffig)
37 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM griffig.xyz/base
2 |
3 | WORKDIR griffig
4 |
5 | RUN python3 -m pip install loguru scipy==1.5 Pillow cmake
6 |
7 | COPY affx/ affx
8 | COPY griffig/ griffig
9 | COPY include/ include
10 | COPY src/ src
11 | COPY CMakeLists.txt .
12 | COPY README.md .
13 | COPY setup.py .
14 |
15 | RUN python3 -m pip install .
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Lars Berscheid
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include affx *
2 | recursive-include griffig *
3 | recursive-include include *
4 | recursive-include src *
5 |
6 | include CMakeLists.txt
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
Griffig
3 |
4 | Robotic Manipulation Learned from Imitation and Self-Supervision.
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | Griffig is a library (in particular) for 6D robotic grasping, learned from large-scale imitation and self-supervised real-world training. Using an action-centric approach, Griffig does not need object models and requires only a simple depth camera as input. In our [model library](https://griffig.xyz), we publish models pre-trained in densly cluttered bin picking scenarios. Griffig is able to calculate grasp points with high performance (< 70ms), and is yet robust achieving grasp rates as high as 95% for typical use-cases. This repository contains the source code and corresponding library of our upcoming paper *Learning 6D Robotic Grasping using a Fully Convolutional Actor-Critic Architecture*.
26 |
27 | [](https://griffig.xyz)
28 |
29 |
30 | You can find many videos on griffig.xyz!
31 |
32 |
33 | ## Installation
34 |
35 | Griffig is a library for Python 3.7+. You can install Griffig via [PyPI](https://pypi.org/project/griffig/) by
36 | ```bash
37 | # Install Eigen >3.3, OpenCV >3, and OpenGL (e.g. via apt)
38 | (sudo) apt install libeigen3-dev libopencv-dev libgl1-mesa-dev libglu1-mesa-dev libegl1-mesa-dev libglew-dev
39 |
40 | pip install griffig
41 | ```
42 | Of course, a NVIDIA GPU with corresponding CUDA version is highly recommended. When building from source, you can either call `pip install .` or use CMake directly to build the C++ core of Griffig. We also provide a Docker container to get started more easily.
43 |
44 |
45 | ## Tutorial
46 |
47 | We focused on making *Griffig* easy to use! In this tutorial, we use a RGBD pointcloud of the scene to detect a 6D grasp point with an additional pre-shaped gripper stroke. We use a common parallel two-finger gripper and a RealSense D435 camera for recording. Griffig includes a small library of pre-trained models. As with all data-driven methods, make sure to match our robotic system as much as possible. The main output of Griffig is a *grasp point*. Then, the robot should move its gripper to a pre-shaped position and approach the point along a trajectory parallel to its gripper fingers. Be careful of possible collisions that might always happen in bin picking.
48 |
49 | [
](https://griffig.xyz/dataset/viewer)
50 |
51 | A typical scene looks like the color (left) and depth (right) images above. The (orthographic) images are rendered from pointclouds, and show the bin randomly filled with objects of multiple types. Now, we want to find the *best* grasp within the bin. You can find working examples in the corresponding [directory](). At first, we need to import `griffig`, generate a pointcloud, and create the main `Griffig` instance.
52 |
53 | ```python
54 | from griffig import Affine, Griffig, Gripper, Pointcloud, BoxData
55 |
56 | # Griffig requires a RGB pointcloud of the scene
57 | pointcloud = Pointcloud.fromRealsense(camera.record_pointcloud())
58 |
59 | # Specify some options
60 | griffig = Griffig(
61 | model='two-finger', # Use the default model for a two-finger gripper
62 | gripper=Gripper(
63 | min_stroke=0.01, # [m]
64 | max_stroke=0.10, # [m], to limit pre-shaped stroke
65 | ),
66 | )
67 | ```
68 |
69 | Next to the model name (or a path for your own models), we input some details about the robots gripper. In particular, we limit the pre-shaped gripper stroke (or called width). We can now calculate the best possible grasp within the scene. To get the grasp in the global frame in return, we pass Griffig the camera pose of the pointcloud.
70 |
71 | ```python
72 | grasp = griffig.calculate_grasp(pointcloud, camera_pose=camera_pose)
73 |
74 | # Make use of the grasp here!
75 | print(f'Grasp at {grasp.pose} with probability {grasp.estimated_reward})
76 | ```
77 | The grasp pose is given as an [Affx](https://github.com/pantor/affx) affine, which is a very light wrapper around [Eigen's](https://eigen.tuxfamily.org) `Affine3d` class. On top, we can easily generate a heatmap of grasp probabilities as a PIL image to visualize our model.
78 |
79 | ```python
80 | heatmap = griffig.calculate_heatmap()
81 | heatmap.show()
82 | ```
83 |
84 | Furthermore, we show the usage of the *Griffig* library in a few more details.
85 |
86 |
87 | ### BoxData Class
88 |
89 | We can define a box to avoid grasps outside of the bin (and even worse: grasps of the bin itself). A box can be constructed by its contour given as a polygon in the image frame. To construct a cubic box, we've simplify this method by
90 |
91 | ```python
92 | box_data = BoxData(
93 | size=(0.2, 0.3, 0.1), # (x, y, z) [m]
94 | center=(0.0, 0.0, 0.0), # At the center [m]
95 | )
96 | ```
97 | with the size and center position of the box.
98 |
99 |
100 | ### Gripper Class
101 |
102 | We use the gripper class for collision checks and to specify the minimum and maximum gripper stroke
103 |
104 | ```python
105 | gripper = Gripper( # Everything in [m]
106 | # Pre-shaped stroke interval
107 | min_stroke=0.01,
108 | max_stroke=0.10,
109 | # Size of a bounding box for optional collision check
110 | finger_width=0.02, # Finger width
111 | finger_extent=0.008, # Finger extent (in direction of finger opening/closing)
112 | finger_height=0.1, # Finger height from tip to gripper base
113 | # An optional offset for the local grasp pose
114 | offset=Affine(z=0.02),
115 | )
116 | ```
117 |
118 | ### Griffig Class
119 |
120 | The `Griffig` class is the main interface for grasp calculations. You can create a griffig instance with following arguments:
121 |
122 | ```python
123 | griffig = Griffig(
124 | model='two-finger-planar',
125 | gripper=gripper,
126 | box_data=box_data, # Might be None
127 | avoid_collisions=True, # If true, check collisions using the given pointcloud and gripper data
128 | )
129 | ```
130 |
131 | Griffig includes a [model library](https://griffig.xyz/model-library) for different tasks and downloads them automatically.
132 |
133 |
134 | ### Pointcloud Class
135 |
136 | Griffig uses its own pointcloud class as input to its rendering pipeline. Currently, three possible inputs are supported:
137 |
138 | ```python
139 | # (1) Input from a realsense frame
140 | pointcloud = Pointcloud(realsense_frame=<...>)
141 |
142 | # (2) Input from a ROS Pointcloud2 message
143 | pointcloud = Pointcloud(ros_message=<...>)
144 |
145 | # (3) The raw pointer variant...
146 | pointcloud = Pointcloud(type=PointType.XYZRGB, data=cloud_data.ptr())
147 |
148 | # Then, we can render the pointcloud as a PIL image
149 | image = griffig.render(pointcloud)
150 | image.show()
151 | ```
152 | Note that the pointcloud does only store the pointer to the data, but doesn't hold anything.
153 |
154 |
155 | ### Grasp Class
156 |
157 | The calculated grasp contains - of course - information about its grasp pose, but also some more details. At first, we calculate the next grasp based on the current pointcloud input.
158 |
159 | ```python
160 | grasp = griffig.calculate_grasp(pointcloud, camera_pose=camera_pose) # Get grasp in the global frame using the camera pose
161 |
162 | print(f'Calculated grasp {grasp} in {grasp.calculation_duration} [ms].') # Calculation duration in [ms]
163 | ```
164 |
165 | If using a GPU, the grasp calculation should not take longer than a few 100ms, and most probably below 70ms! Then, a typical grasp pipeline would look like this:
166 |
167 | ```python
168 | # (1) Check if the grasp reward is below a user-defined threshold
169 | if grasp.estimated_reward < 0.2: # Grasp probability in [0, 1]
170 | print('The bin is probably empty!')
171 |
172 | approach_start = grasp.pose * Affine(z=-0.12) # Approx. finger length [m]
173 |
174 | # (2) Move robot to start of approach trajectory
175 | robot.move_linear(approach_start)
176 |
177 | # (3) Move gripper to pre-shaped grasp stroke
178 | robot.move_gripper(grasp.pose.d)
179 |
180 | # (4) Move robot to actual grasp pose
181 | robot_move_linear(grasp.pose)
182 |
183 | # (5) And finally close the gripper
184 | robot.close_gripper()
185 | ```
186 |
187 | The robot should have grasped something! If something went wrong, make sure to call `griffig.report_grasp_failure()`, so that griffig will output a different grasp next time.
188 |
189 |
190 | ## Development
191 |
192 | Griffig is written in C++17 and Python 3.7 (or higher). It is tested on Ubuntu 20.04 against following dependency versions:
193 |
194 | - OpenCV 4.5
195 | - Eigen 3.3.9
196 | - PyBind11 2.6
197 | - TensorFlow 2.4
198 |
199 | To build the docker image, call `docker build .`.
200 |
201 |
202 | ## License
203 |
204 | Griffig is published under the permissive MIT license. Feel free to contact us in case of commercial interest.
205 |
--------------------------------------------------------------------------------
/doc/base-dockerfile:
--------------------------------------------------------------------------------
1 | FROM tensorflow/tensorflow:2.4.1
2 |
3 | ENV TZ=Europe/Berlin
4 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
5 |
6 | # Install Git (latest version)
7 | RUN apt-get update && apt-get install -y software-properties-common
8 | RUN add-apt-repository ppa:git-core/ppa
9 | RUN apt-get update && apt-get install -y git
10 |
11 | # Install Cmake
12 | RUN python3 -m pip install cmake
13 |
14 | # Install OpenCV
15 | RUN git clone https://github.com/opencv/opencv.git &&\
16 | cd opencv &&\
17 | git checkout 4.5.2 &&\
18 | mkdir build && cd build &&\
19 | cmake -DWITH_VTK=OFF -DWITH_GTK=OFF -DWITH_PROTOBUF=OFF -DBUILD_PERF_TESTS=OFF -DBUILD_TESTS=OFF -DWITH_WEBP=OFF .. &&\
20 | make &&\
21 | make install &&\
22 | cd ../../
23 |
24 | # Install PyBind11
25 | RUN git clone https://github.com/pybind/pybind11.git &&\
26 | cd pybind11 &&\
27 | git checkout v2.6.2 &&\
28 | mkdir build && cd build &&\
29 | cmake -DPYBIND11_TEST=OFF .. &&\
30 | make &&\
31 | make install &&\
32 | cd ../../
33 |
34 | # Install EGL
35 | RUN apt-get install -y libgl1-mesa-dev libglu1-mesa-dev libgles2-mesa-dev libglew-dev libeigen3-dev
36 |
37 | # Install Python3 Numpy C files
38 | RUN apt-get install -y python3-numpy
39 |
--------------------------------------------------------------------------------
/doc/input.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pantor/griffig/0b10ef5d69902b14a4d648a809a51933a8f5fe8a/doc/input.jpeg
--------------------------------------------------------------------------------
/doc/systemnew-sm.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pantor/griffig/0b10ef5d69902b14a4d648a809a51933a8f5fe8a/doc/systemnew-sm.JPG
--------------------------------------------------------------------------------
/examples/grasp_depth_image.py:
--------------------------------------------------------------------------------
1 | from griffig import BoxData, Griffig
2 |
3 | from loader import Loader
4 |
5 |
6 | if __name__ == '__main__':
7 | box_data = BoxData([-0.002, -0.0065, 0.0], [0.174, 0.282, 0.0])
8 | griffig = Griffig('two-finger-planar', gpu=0)
9 |
10 | image = Loader.get_image('1')
11 |
12 | # Calculate grasp multiple times to fully load NN the first time
13 | for _ in range(3):
14 | grasp = griffig.calculate_grasp_from_image(image, box_data)
15 |
16 | print(grasp)
17 | print(f'Calculation duration: {grasp.calculation_duration*1000:0.2f} [ms], details: {grasp.detail_durations}')
18 |
19 | img = griffig.draw_grasp_on_image(image, grasp, channels='RGB')
20 | img.show()
21 |
--------------------------------------------------------------------------------
/examples/grasp_realsense.py:
--------------------------------------------------------------------------------
1 | from argparse import ArgumentParser
2 |
3 | import pyrealsense2 as rs
4 |
5 | from griffig import Griffig, Gripper, BoxData, Pointcloud
6 | from loader import Loader
7 |
8 |
9 | class RealsenseReader:
10 | def __init__(self, bag_file):
11 | self.bag_file = bag_file
12 | self.colorizer = rs.colorizer()
13 | self.pipeline = rs.pipeline()
14 | self.config = rs.config()
15 | rs.config.enable_device_from_file(self.config, self.bag_file)
16 | self.config.enable_stream(rs.stream.depth, 1280, 720, rs.format.any, 30)
17 | self.config.enable_stream(rs.stream.color, 1280, 720, rs.format.any, 30)
18 |
19 | def __del__(self):
20 | self.config.disable_all_streams()
21 |
22 | def wait_for_frames(self):
23 | self.pipeline.start(self.config)
24 | return self.pipeline.wait_for_frames()
25 |
26 |
27 | if __name__ == '__main__':
28 | parser = ArgumentParser()
29 | parser.add_argument('-i', '--input', type=str, default=str(Loader.data_path / '20210318_110437.bag'))
30 | args = parser.parse_args()
31 |
32 | box_data = BoxData(
33 | center=(0.0, 0.017, 0.0), # At the center [m]
34 | size=(0.18, 0.285, 0.1), # (x, y, z) [m]
35 | )
36 |
37 | gripper = Gripper( # Some information about the gripper
38 | min_stroke=0.01, # Min. pre-shaped width in [m]
39 | max_stroke=0.10, # Max. pre-shaped width in [m]
40 | )
41 |
42 | griffig = Griffig(
43 | model='two-finger-planar', # Use the default model for a two-finger gripper
44 | gripper=gripper,
45 | box_data=box_data,
46 | typical_camera_distance=0.41, # Typical distance between camera and bin (used for depth image rendering) [m]
47 | )
48 |
49 | realsense = RealsenseReader(args.input)
50 | frames = realsense.wait_for_frames()
51 |
52 | pointcloud = Pointcloud(realsense_frames=frames)
53 |
54 | # Return image (depth channel) with grasp for visualization
55 | grasp, image = griffig.calculate_grasp(pointcloud, return_image=True, channels='D')
56 | print(grasp)
57 | image.show()
58 |
--------------------------------------------------------------------------------
/examples/loader.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import cv2
4 | import numpy as np
5 |
6 | from griffig import OrthographicImage
7 |
8 |
9 | class Loader:
10 | data_path = Path(__file__).parent.absolute().parent / 'test' / 'data'
11 |
12 | @classmethod
13 | def get_image(cls, episode_id: str):
14 | mat_color = cv2.imread(str(cls.data_path / f'{episode_id}-rc.jpeg'), cv2.IMREAD_UNCHANGED).astype(np.uint16) * 255
15 | mat_depth = cv2.imread(str(cls.data_path / f'{episode_id}-rd.jpeg'), cv2.IMREAD_UNCHANGED).astype(np.uint16) * 255
16 | mat_depth = cv2.cvtColor(mat_depth, cv2.COLOR_BGR2GRAY)
17 |
18 | mat = np.concatenate([mat_color, np.expand_dims(mat_depth, axis=2)], axis=2)
19 | return OrthographicImage(mat, 2000.0, 0.22, 0.41)
20 |
--------------------------------------------------------------------------------
/examples/render_heatmap.py:
--------------------------------------------------------------------------------
1 | from griffig import BoxData, Griffig
2 |
3 | from loader import Loader
4 |
5 |
6 | if __name__ == '__main__':
7 | box_data = BoxData([-0.002, -0.0065, 0.0], [0.174, 0.282, 0.0])
8 | griffig = Griffig('two-finger-planar', gpu=0)
9 |
10 | image = Loader.get_image('1')
11 | heatmap = griffig.calculate_heatmap_from_image(image, box_data)
12 | heatmap.show()
13 |
--------------------------------------------------------------------------------
/examples/render_pointcloud.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import rospy
4 | from sensor_msgs.msg import PointCloud2
5 |
6 | from griffig import Griffig, Pointcloud, Renderer
7 |
8 |
9 | if __name__ == '__main__':
10 | base = Path(__file__).parent
11 | rospy.init_node('pointcloud_renderer', anonymous=True)
12 |
13 | # Orthographic image renderer: (width, height), typical distance [m], pixel density [px/m], depth difference [m]
14 | renderer = Renderer((752, 480), 0.41, 2000.0, 0.19)
15 |
16 | pointcloud_message = rospy.wait_for_message('/camera/depth/color/points', PointCloud2)
17 |
18 | pointcloud = Pointcloud(ros_message=pointcloud_message)
19 | print(f'Pointcloud has {pointcloud.size} points')
20 |
21 | image = renderer.render_pointcloud(pointcloud)
22 | pillow_image = Griffig.convert_to_pillow_image(image, channels='RGBD')
23 | pillow_image.save(base / 'data' / 'image-rendered.png')
24 |
--------------------------------------------------------------------------------
/griffig/__init__.py:
--------------------------------------------------------------------------------
1 | from pyaffx import Affine
2 |
3 | from _griffig import (
4 | BoxData,
5 | Grasp,
6 | Gripper,
7 | OrthographicImage,
8 | PointType,
9 | Pointcloud,
10 | Renderer,
11 | RobotPose,
12 | )
13 |
14 | from .griffig import Griffig
15 | from .infer.inference import Inference
16 | from .utility.heatmap import Heatmap
17 | from .utility.model_data import ModelData, ModelArchitecture
18 | from .utility.model_library import ModelLibrary
19 |
--------------------------------------------------------------------------------
/griffig/action/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pantor/griffig/0b10ef5d69902b14a4d648a809a51933a8f5fe8a/griffig/action/__init__.py
--------------------------------------------------------------------------------
/griffig/action/checker.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | from pyaffx import Affine
5 | from _griffig import BoxData, Grasp, Gripper, OrthographicImage, RobotPose
6 | from ..utility.image import get_area_of_interest
7 |
8 |
9 | class Checker:
10 | def __init__(self, converter, lateral_grasps=False, avoid_collisions=True):
11 | self.converter = converter
12 | self.lateral_grasps = lateral_grasps
13 | self.avoid_collisions = avoid_collisions
14 |
15 | def find_grasp(self, grasp_gen, image: OrthographicImage, box_data: BoxData = None, gripper: Gripper = None):
16 | grasp = next(grasp_gen)
17 |
18 | while not self.check(grasp, image, box_data, gripper):
19 | try:
20 | grasp = next(grasp_gen)
21 | except StopIteration:
22 | return None
23 |
24 | grasp_gen.close()
25 |
26 | if gripper:
27 | grasp.pose = grasp.pose * gripper.offset
28 |
29 | return grasp
30 |
31 | def check(self, grasp: Grasp, image: OrthographicImage, box_data: BoxData = None, gripper: Gripper = None):
32 | self.converter.index_to_action(grasp)
33 |
34 | area_size = (0.1, 0.1) # [m]
35 | image_area = get_area_of_interest(image, grasp.pose, (int(image.pixel_size * area_size[0]), int(image.pixel_size * area_size[1])))
36 |
37 | self.converter.calculate_z(image_area, grasp)
38 |
39 | if self.lateral_grasps:
40 | self.converter.calculate_b(image_area, grasp)
41 | self.converter.calculate_c(image_area, grasp)
42 |
43 | is_safe = np.isfinite([grasp.pose.x, grasp.pose.y, grasp.pose.z]).all()
44 |
45 | if box_data:
46 | is_safe &= box_data.is_pose_inside(RobotPose(grasp.pose, d=grasp.stroke))
47 |
48 | if self.avoid_collisions:
49 | pass # Do the gripper rendering here
50 |
51 | return is_safe
52 |
--------------------------------------------------------------------------------
/griffig/action/converter.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | from ..utility.image import crop
5 |
6 |
7 | class Converter:
8 | def __init__(self, gripper_strokes, z_offset=0.0):
9 | self.gripper_strokes = gripper_strokes
10 | self.z_offset = z_offset
11 |
12 | self.gripper_width = 0.016 # [m]
13 | self.gripper_height = 0.004 # [m]
14 |
15 | self.max_lateral_angle = 0.5 # [rad]
16 | self.image_area_size = 0.12 # [m]
17 |
18 | def __len__(self):
19 | return len(self.gripper_strokes)
20 |
21 | def index_to_action(self, grasp):
22 | grasp.stroke = self.gripper_strokes[grasp.index]
23 |
24 | def consider_indices(self, gripper):
25 | arr = np.array(self.gripper_strokes)
26 | return (gripper.min_stroke <= arr) & (arr <= gripper.max_stroke)
27 |
28 | def calculate_z(self, image_area, grasp):
29 | """Model-based calculation of the grasp distance z"""
30 |
31 | mat_area_image = image_area.mat[:, :, 3]
32 | mat_area_image = mat_area_image.astype(np.float32) / np.iinfo(image_area.mat.dtype).max
33 | mat_area_image[mat_area_image < 0.02] = np.NaN # Make every not found pixel NaN
34 |
35 | # Get distance at gripper for possible collisions
36 | area_center_default_size = 0.012 # [m]
37 | area_center_default_size_px = image_area.pixel_size * area_center_default_size
38 |
39 | gripper_one_side_size = 0.5 * image_area.pixel_size * (grasp.stroke + 0.002) # [px]
40 | area_center = crop(mat_area_image, (area_center_default_size_px, area_center_default_size_px))
41 | side_gripper_image_size = (image_area.pixel_size * 0.025, image_area.pixel_size * 0.025)
42 | area_left = crop(mat_area_image, side_gripper_image_size, (-gripper_one_side_size, 0))
43 | area_right = crop(mat_area_image, side_gripper_image_size, (gripper_one_side_size, 0))
44 |
45 | z_raw = image_area.depth_from_value(np.nanmedian(area_center) * 255 * 255)
46 | if z_raw is np.NaN:
47 | area_center = crop(mat_area_image, (image_area.pixel_size * 0.03, image_area.pixel_size * 0.03))
48 | z_raw = image_area.depth_from_value(np.nanmedian(area_center) * 255 * 255)
49 |
50 | if z_raw is np.NaN:
51 | print('[converter] z is NaN!')
52 |
53 | analyze_collision = True and area_left.size > 0 and area_right.size > 0
54 |
55 | if analyze_collision:
56 | z_raw_left = image_area.depth_from_value(np.nanmin(area_left) * 255 * 255)
57 | z_raw_right = image_area.depth_from_value(np.nanmin(area_right) * 255 * 255)
58 | z_raw_collision = min(z_raw_left, z_raw_right) - 0.03 # [m]
59 | else:
60 | z_raw_collision = np.Inf
61 |
62 | grasp.pose.z = min(z_raw, z_raw_collision) + self.z_offset # Get the maximum [m] for impedance mode
63 |
64 | def calculate_b(self, image_area, grasp):
65 | b_lateral_default_size_px_height = int(image_area.pixel_size * self.image_area_size)
66 |
67 | b_gripper_height = int(image_area.pixel_size * self.gripper_width)
68 | b_gripper_width = int(image_area.pixel_size * self.gripper_height)
69 |
70 | b_lateral_default_size_px_width = image_area.pixel_size * (action.pose.d + self.gripper_height) # [px]
71 | b_lateral_area = crop(image_area, (b_lateral_default_size_px_height, b_lateral_default_size_px_width)) / 255
72 |
73 | b_lateral_area_left = b_lateral_area[:, :2*b_gripper_width] / 255
74 | b_lateral_area_right = b_lateral_area[:, -2*b_gripper_width:] / 255
75 |
76 | values_left = np.gradient(b_lateral_area_left, axis=0)
77 | values_right = np.gradient(b_lateral_area_right, axis=0)
78 |
79 | weights_left = np.ones_like(values_left)
80 | weights_left[np.isnan(values_left)] = np.nan
81 |
82 | weights_right = np.ones_like(values_right)
83 | weights_right[np.isnan(values_right)] = np.nan
84 |
85 | a = np.linspace(-self.image_area_size / 2, self.image_area_size / 2, b_lateral_area.shape[0])
86 | sigma = 1.3 * self.gripper_width
87 | s = 0.042 * np.exp(-0.5 * np.power(a / sigma, 2.)) / (np.sqrt(2 * np.pi) * sigma)
88 | s = np.expand_dims(s, -1)
89 | weights_left *= s
90 | weights_right *= s
91 |
92 | weighted_gradient = np.nansum(weights_left * values_left) / weights_left.size + np.nansum(weights_right * values_right) / weights_right.size
93 |
94 | new_b = np.arctan2((image_area.max_depth - image_area.min_depth) * 0.5 * weighted_gradient, 1. / image_area.pixel_size)
95 | grasp.pose.b = np.clip(new_b, -self.max_lateral_angle, self.max_lateral_angle)
96 |
97 | def calculate_c(self, image_area, grasp):
98 | c_lateral_default_size_px_height = int(image_area.pixel_size * 0.016 / 2)
99 | c_lateral_default_size_px_width = image_area.pixel_size * (action.pose.d + 0.004) # [px]
100 | c_lateral_area = crop(image_area, (c_lateral_default_size_px_height, c_lateral_default_size_px_width)) / 255
101 |
102 | mask = np.isnan(c_lateral_area).astype(np.uint8)
103 |
104 | # Make distance constant 2cm below the grasp point
105 | min_value_for_gradient = image_area.value_from_depth(-action.pose.z + 0.04) / (255 * 255)
106 | lateral_area_pre_inpaint = c_lateral_area.astype(np.uint8)
107 | lateral_area_pre_inpaint[lateral_area_pre_inpaint < 255 * min_value_for_gradient] = 255 * min_value_for_gradient
108 | c_lateral_area_inpaint = cv2.inpaint(lateral_area_pre_inpaint, mask, 3, cv2.INPAINT_TELEA) / 255
109 |
110 | gradient = np.mean(np.diff(c_lateral_area_inpaint, axis=1), axis=0)
111 |
112 | left_gradient_max = np.max(gradient)
113 | right_gradient_max = np.min(gradient)
114 |
115 | gradient_norm = (image_area.max_depth - image_area.min_depth)
116 | left_gamma_1 = np.pi / 2 - np.arctan2(gradient_norm * left_gradient_max, 1. / image_area.pixel_size)
117 | right_gamma_1 = -np.pi / 2 - np.arctan2(gradient_norm * right_gradient_max, 1. / image_area.pixel_size)
118 |
119 | gamma_mean = np.arctan2(gradient_norm * np.mean(gradient), 1. / image_area.pixel_size)
120 |
121 | certainty_parameter = 0.0
122 | eps = 0.03
123 | if left_gamma_1 < eps and right_gamma_1 > -eps:
124 | new_c = (left_gamma_1 + right_gamma_1 + 2 * gamma_mean) / 4
125 | elif left_gamma_1 >= eps and right_gamma_1 <= -eps:
126 | new_c = gamma_mean
127 | elif left_gamma_1 <= eps:
128 | new_c = ((1 - certainty_parameter) * right_gamma_1 + gamma_mean) / 2
129 | else:
130 | new_c = ((1 - certainty_parameter) * -left_gamma_1 + gamma_mean) / 2
131 |
132 | grasp.pose.c = np.clip(new_c, -self.max_lateral_angle, self.max_lateral_angle)
133 |
--------------------------------------------------------------------------------
/griffig/griffig.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 | from pathlib import Path
3 |
4 | import cv2
5 | import numpy as np
6 | from PIL import Image
7 |
8 | from _griffig import BoxData, Gripper, Pointcloud, Renderer, RobotPose
9 | from .action.checker import Checker
10 | from .action.converter import Converter
11 | from .infer.inference import Inference
12 | from .infer.selection import Method, Max, Top
13 | from .utility.heatmap import Heatmap
14 | from .utility.image import draw_around_box, draw_pose
15 | from .utility.model_library import ModelData, ModelLibrary, ModelArchitecture
16 |
17 |
18 | class Griffig:
19 | def __init__(
20 | self,
21 | model: Union[str, ModelData, Path] ='two-finger-planar',
22 | gripper: Gripper = None,
23 | box_data: BoxData = None,
24 | typical_camera_distance: int = None,
25 | avoid_collisions=False,
26 | gpu: int = None,
27 | verbose = 0,
28 | ):
29 | self.gripper = gripper
30 | self.box_data = box_data
31 |
32 | self.model_data = model if isinstance(model, ModelData) else ModelLibrary.load_model_data(model)
33 | self.inference = Inference.create(self.model_data, gpu=gpu, verbose=verbose)
34 |
35 | self.converter = Converter(self.model_data.gripper_widths)
36 | self.checker = Checker(self.converter, avoid_collisions=avoid_collisions)
37 |
38 | self.typical_camera_distance = typical_camera_distance if typical_camera_distance is not None else 0.5
39 |
40 | if box_data:
41 | self.renderer = Renderer(box_data, self.typical_camera_distance, self.model_data.pixel_size, self.model_data.depth_diff)
42 | else:
43 | self.renderer = Renderer((752, 480), self.typical_camera_distance, self.model_data.pixel_size, self.model_data.depth_diff)
44 |
45 | self.last_grasp_successful = True
46 |
47 | def render(self, pointcloud: Pointcloud, pixel_size=None, min_depth=None, max_depth=None, position=[0.0, 0.0, 0.0]):
48 | pixel_size = pixel_size if pixel_size is not None else self.model_data.pixel_size
49 | min_depth = min_depth if min_depth is not None else self.typical_camera_distance - self.model_data.depth_diff
50 | max_depth = max_depth if max_depth is not None else self.typical_camera_distance
51 |
52 | img = self.renderer.render_pointcloud_mat(pointcloud, pixel_size, min_depth, max_depth, position)
53 | return self.convert_to_pillow_image(img)
54 |
55 | def calculate_grasp(self, pointcloud: Pointcloud, camera_pose=None, box_data=None, gripper=None, method=None, return_image=False, channels='RGBD'):
56 | image = self.renderer.render_pointcloud(pointcloud)
57 | grasp = self.calculate_grasp_from_image(image, box_data=box_data, gripper=gripper, method=method)
58 |
59 | if return_image:
60 | image = image.clone()
61 | return grasp, self.draw_grasp_on_image(image, grasp, channels=channels)
62 | return grasp
63 |
64 | def calculate_grasp_from_image(self, image, box_data=None, gripper=None, method=None):
65 | box_data = box_data if box_data else self.box_data
66 | gripper = gripper if gripper else self.gripper
67 | selection_method = method if method else (Max() if self.last_grasp_successful else Top(5))
68 |
69 | action_generator = self.inference.infer(image, selection_method, box_data=box_data)
70 | grasp = self.checker.find_grasp(action_generator, image, box_data=box_data, gripper=self.gripper)
71 | self.last_grasp_successful = True
72 | return grasp
73 |
74 | def calculate_heatmap(self, pointcloud: Pointcloud, box_data: BoxData = None, a_space=None):
75 | pixel_size = self.model_data.pixel_size
76 | min_depth = self.typical_camera_distance - self.model_data.depth_diff
77 | max_depth = self.typical_camera_distance
78 | position = [0.0, 0.0, 0.0]
79 |
80 | img = self.renderer.render_pointcloud_mat(pointcloud, pixel_size, min_depth, max_depth, position)
81 | return self.calculate_heatmap_from_image(img, box_data, a_space)
82 |
83 | def calculate_heatmap_from_image(self, image, box_data: BoxData = None, a_space=None):
84 | a_space = a_space if a_space is not None else [0.0]
85 | heatmapper = Heatmap(self.inference, a_space=a_space)
86 | heatmap = heatmapper.render(image, box_data=box_data)
87 | return Image.fromarray((heatmap[:, :, 2::-1]).astype(np.uint8))
88 |
89 | def report_grasp_failure(self):
90 | self.last_grasp_successful = False
91 |
92 | @classmethod
93 | def convert_to_pillow_image(cls, image, channels='RGBD'):
94 | mat = cv2.convertScaleAbs(cv2.cvtColor(image.mat, cv2.COLOR_BGRA2RGBA), alpha=(255.0/65535.0)).astype(np.uint8)
95 | pillow_image = Image.fromarray(mat, 'RGBA')
96 | if channels == 'RGB':
97 | return pillow_image.convert('RGB')
98 | elif channels == 'D':
99 | return pillow_image.getchannel('A')
100 | return pillow_image
101 |
102 | @classmethod
103 | def draw_box_on_image(cls, image, box_data, draw_lines=False, channels='RGBD'):
104 | draw_around_box(image, box_data, draw_lines)
105 | return cls.convert_to_pillow_image(image, channels)
106 |
107 | @classmethod
108 | def draw_grasp_on_image(cls, image, grasp, channels='RGBD', convert_to_rgb=True):
109 | if channels == 'D' and convert_to_rgb:
110 | image.mat = cv2.cvtColor(image.mat[:, :, 3], cv2.COLOR_GRAY2RGB)
111 | channels = 'RGB'
112 |
113 | draw_pose(image, RobotPose(grasp.pose, d=grasp.stroke))
114 | return cls.convert_to_pillow_image(image, channels)
115 |
--------------------------------------------------------------------------------
/griffig/infer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pantor/griffig/0b10ef5d69902b14a4d648a809a51933a8f5fe8a/griffig/infer/__init__.py
--------------------------------------------------------------------------------
/griffig/infer/inference.py:
--------------------------------------------------------------------------------
1 | from ..utility.model_data import ModelArchitecture
2 | from ..infer.inference_actor_critic import InferenceActorCritic
3 | from ..infer.inference_planar import InferencePlanar
4 | from ..infer.inference_semantic import InferencePlanarSemantic
5 |
6 |
7 | class Inference:
8 | """A factory class for inference"""
9 |
10 | @classmethod
11 | def create(cls, model_data, *params, **kwargs):
12 | if ModelArchitecture(model_data.architecture) == ModelArchitecture.ModelBasedConvolution:
13 | raise Exception('Architecture model based convolution is not implemented!')
14 |
15 | if ModelArchitecture(model_data.architecture) == ModelArchitecture.PlanarSemantic:
16 | return InferencePlanarSemantic(model_data, *params, **kwargs)
17 |
18 | if ModelArchitecture(model_data.architecture) == ModelArchitecture.ActorCritic:
19 | return InferenceActorCritic(model_data, *params, **kwargs)
20 |
21 | return InferencePlanar(model_data, *params, **kwargs)
22 |
--------------------------------------------------------------------------------
/griffig/infer/inference_actor_critic.py:
--------------------------------------------------------------------------------
1 | from time import time
2 |
3 | import numpy as np
4 | from scipy.ndimage import gaussian_filter
5 |
6 | from _griffig import BoxData, Grasp, Gripper, OrthographicImage
7 | from ..infer.inference_base import InferenceBase
8 | from ..infer.selection import Method, Max
9 |
10 |
11 | class InferenceActorCritic(InferenceBase):
12 | def infer(self, image: OrthographicImage, method: Method, box_data: BoxData = None, gripper: Gripper = None):
13 | start = time()
14 |
15 | input_images = self.get_input_images(image, box_data)
16 |
17 | pre_duration = time() - start
18 | start = time()
19 |
20 | rewards_and_actions = self.model(input_images)
21 |
22 | nn_duration = time() - start
23 | start = time()
24 |
25 | estimated_rewards = np.array(rewards_and_actions[::2])
26 | action_from_actor = np.array(rewards_and_actions[1::2])
27 |
28 | if self.model_data.output[0] == 'reward+human':
29 | estimated_rewards = estimated_rewards[:, :, :, :, :4]
30 |
31 | if gripper:
32 | possible_indices = gripper.consider_indices(self.model_data.gripper_widths)
33 | for i in range(estimated_rewards.shape[0]):
34 | self.set_last_dim_to_zero(estimated_rewards[i], np.invert(possible_indices))
35 |
36 | if self.gaussian_sigma:
37 | for i in range(estimated_rewards.shape[0]):
38 | for j in range(estimated_rewards.shape[1]):
39 | estimated_rewards[i][j] = gaussian_filter(estimated_rewards[i][j], self.gaussian_sigma)
40 |
41 | for _ in range(estimated_rewards.size):
42 | index_raveled = method(estimated_rewards)
43 | index = np.unravel_index(index_raveled, estimated_rewards.shape)
44 |
45 | action = Grasp()
46 | action.index = index[4]
47 | action.pose = self.pose_from_index(index[1:], estimated_rewards.shape[1:], image)
48 | action.pose.z, action.pose.b, action.pose.c = action_from_actor[index[0], index[1], index[2], index[3]]
49 | action.estimated_reward = estimated_rewards[index]
50 | action.detail_durations = {
51 | 'pre': pre_duration,
52 | 'nn': nn_duration,
53 | 'post': time() - start,
54 | }
55 | action.calculation_duration = sum(action.detail_durations.values())
56 |
57 | yield action
58 |
59 | method.disable(index, estimated_rewards)
60 | return
61 |
--------------------------------------------------------------------------------
/griffig/infer/inference_base.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import os
3 |
4 | import cv2
5 | from loguru import logger
6 | import numpy as np
7 | import tensorflow.keras as tk
8 |
9 | from pyaffx import Affine
10 | from _griffig import BoxData, RobotPose, OrthographicImage
11 | from ..utility.image import draw_around_box, draw_around_box2, get_inference_image, get_box_projection
12 |
13 |
14 | class InferenceBase:
15 | def __init__(self, model_data, gaussian_sigma=None, gpu: int = None, seed: int = None, verbose=0):
16 | self.model_data = model_data
17 | self.model = self._load_model(model_data.path, 'grasp', gpu=gpu)
18 | self.gaussian_sigma = gaussian_sigma
19 | self.rs = np.random.default_rng(seed=seed)
20 | self.verbose = verbose
21 |
22 | self.size_area_cropped = model_data.size_area_cropped
23 | self.size_result = model_data.size_result
24 | self.scale_factors = (self.size_area_cropped[0] / self.size_result[0], self.size_area_cropped[1] / self.size_result[1])
25 | self.a_space = np.linspace(-np.pi/2 + 0.1, np.pi/2 - 0.1, 20) # [rad] # Don't use a=0.0 -> even number
26 | self.keep_indixes = None
27 |
28 | def _load_model(self, path: Path, submodel=None, gpu=None):
29 | if os.getenv('GRIFFIG_HARDWARE') == 'jetson-nano':
30 | import tensorflow as tf
31 |
32 | logger.info('Detected NVIDIA Jetson Nano Platform')
33 | device = tf.config.list_physical_devices('GPU')
34 | tf.config.experimental.set_memory_growth(device[0], True)
35 | tf.config.experimental.set_virtual_device_configuration(device[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=512)])
36 |
37 | elif gpu is not None:
38 | import tensorflow as tf
39 |
40 | devices = tf.config.list_physical_devices('GPU')
41 | tf.config.experimental.set_visible_devices(devices[gpu], 'GPU')
42 | for device in devices:
43 | tf.config.experimental.set_memory_growth(device, True)
44 |
45 | model = tk.models.load_model(path, compile=False)
46 |
47 | if submodel:
48 | model = model.get_layer(submodel)
49 |
50 | # TensorRT
51 | use_tensorrt = os.getenv('GRIFFIG_HARDWARE') == 'jetson-nano'
52 | if use_tensorrt:
53 | import tensorflow as tf
54 | from tensorflow.python.compiler.tensorrt import trt_convert as trt
55 |
56 | converted_path = path / 'converted'
57 |
58 | if not converted_path.exists():
59 | submodel_path = path / 'submodel'
60 | model.save(str(submodel_path))
61 |
62 | logger.info('Convert to TensorRT')
63 | conversion_params = trt.TrtConversionParams(
64 | precision_mode=trt.TrtPrecisionMode.FP16,
65 | # max_batch_size=32,
66 | )
67 |
68 | def my_input_fn():
69 | # Let's assume a network with 2 input tensors. We generate 3 sets
70 | # of dummy input data:
71 | input_shapes = [[(20, 110, 110, 4)], # min and max range for 1st input list
72 | [(20, 110, 110, 4)], # min and max range for 2nd list of two tensors
73 | [(20, 110, 110, 4)]] # 3rd input list
74 | for shapes in input_shapes:
75 | # return a list of input tensors
76 | yield [np.zeros(x).astype(np.float32) for x in shapes]
77 |
78 | converter = trt.TrtGraphConverterV2(
79 | input_saved_model_dir=str(submodel_path),
80 | conversion_params=conversion_params
81 | )
82 | converter.convert()
83 | converter.build(input_fn=my_input_fn)
84 |
85 | converter.save(str(converted_path))
86 |
87 | root = tf.saved_model.load(str(converted_path))
88 | func = root.signatures['serving_default']
89 |
90 | def predict(x):
91 | x = tf.convert_to_tensor(x)
92 | output = [y.numpy() for y in func(x).values()]
93 |
94 | if len(output) == 1:
95 | return output[0]
96 | return output
97 | return predict
98 |
99 | def predict(x):
100 | return model.predict_on_batch(x)
101 | return predict
102 |
103 | def _get_size_cropped(self, image, box_data: BoxData):
104 | box_projection = get_box_projection(image, box_data)
105 | center = np.array([image.mat.shape[1], image.mat.shape[0]]) / 2
106 | farthest_corner = np.max(np.linalg.norm(box_projection - center, axis=1))
107 | side_length = int(np.ceil(2 * farthest_corner * self.size_result[0] / self.size_area_cropped[0]))
108 | return (side_length, side_length)
109 |
110 | def pose_from_index(self, index, index_shape, image: OrthographicImage, resolution_factor=2.0):
111 | return Affine(
112 | x=resolution_factor * self.scale_factors[0] * image.position_from_index(index[1], index_shape[1]),
113 | y=resolution_factor * self.scale_factors[1] * image.position_from_index(index[2], index_shape[2]),
114 | a=self.a_space[index[0]],
115 | ).inverse()
116 |
117 | def get_input_images(self, orig_image, box_data: BoxData):
118 | image = orig_image.clone()
119 | size_cropped = self._get_size_cropped(orig_image, box_data)
120 |
121 | if box_data:
122 | draw_around_box2(image, box_data)
123 |
124 | result_ = [get_inference_image(image, Affine(a=a), size_cropped, self.size_area_cropped, self.size_result, return_mat=True) for a in self.a_space]
125 |
126 | if self.verbose:
127 | cv2.imwrite('/tmp/test-input-c.png', result_[10][:, :, 3])
128 | cv2.imwrite('/tmp/test-input-d.png', result_[10][:, :, 3])
129 |
130 | result = np.array(result_, dtype=np.float32) / np.iinfo(image.mat.dtype).max
131 | if len(result.shape) == 3:
132 | result = np.expand_dims(result, axis=-1)
133 |
134 | return result
135 |
136 | @classmethod
137 | def keep_array_at_last_indixes(cls, array, indixes) -> None:
138 | mask = np.zeros(array.shape)
139 | mask[:, :, :, indixes] = 1
140 | array *= mask
141 |
142 | @classmethod
143 | def set_last_dim_to_zero(cls, array, indixes):
144 | array[:, :, :, indixes] = 0
145 |
--------------------------------------------------------------------------------
/griffig/infer/inference_planar.py:
--------------------------------------------------------------------------------
1 | from time import time
2 |
3 | import cv2
4 | import numpy as np
5 | from scipy.ndimage import gaussian_filter
6 |
7 | from _griffig import BoxData, Grasp, Gripper, OrthographicImage
8 | from ..infer.inference_base import InferenceBase
9 | from ..infer.selection import Method, Max
10 | from ..utility.image import draw_around_box2, get_box_projection, get_inference_image
11 |
12 |
13 | class InferencePlanar(InferenceBase):
14 | def infer(self, image: OrthographicImage, method: Method, box_data: BoxData = None, gripper: Gripper = None):
15 | start = time()
16 | input_images = self.get_input_images(image, box_data)
17 |
18 | pre_duration = time() - start
19 | start = time()
20 |
21 | estimated_reward = self.model(input_images)
22 |
23 | nn_duration = time() - start
24 | start = time()
25 |
26 | if self.gaussian_sigma:
27 | for i in range(estimated_reward.shape[0]):
28 | estimated_reward[i] = gaussian_filter(estimated_reward[i], self.gaussian_sigma)
29 |
30 | if gripper:
31 | possible_indices = gripper.consider_indices(self.model_data.gripper_widths)
32 | self.set_last_dim_to_zero(estimated_reward, np.invert(possible_indices))
33 |
34 | for _ in range(estimated_reward.size):
35 | index_raveled = method(estimated_reward)
36 | index = np.unravel_index(index_raveled, estimated_reward.shape)
37 |
38 | action = Grasp()
39 | action.index = index[3]
40 | action.pose = self.pose_from_index(index, estimated_reward.shape, image)
41 | action.pose.z = np.nan
42 | action.estimated_reward = estimated_reward[index]
43 | action.detail_durations = {
44 | 'pre': pre_duration,
45 | 'nn': nn_duration,
46 | 'post': time() - start,
47 | }
48 | action.calculation_duration = sum(action.detail_durations.values())
49 |
50 | yield action
51 |
52 | method.disable(index, estimated_reward)
53 | return
54 |
--------------------------------------------------------------------------------
/griffig/infer/inference_semantic.py:
--------------------------------------------------------------------------------
1 | from time import time
2 |
3 | import cv2
4 | import numpy as np
5 | from scipy.ndimage import gaussian_filter
6 |
7 | from pyaffx import Affine
8 | from _griffig import BoxData, Grasp, Gripper, OrthographicImage
9 | from ..infer.inference_base import InferenceBase
10 | from ..infer.selection import Method, Max
11 | from ..utility.image import draw_around_box2, get_box_projection, get_inference_image
12 |
13 |
14 | class InferencePlanarSemantic(InferenceBase):
15 | def infer(self, image: OrthographicImage, object_image: OrthographicImage, method: Method, box_data: BoxData = None, gripper: Gripper = None):
16 | assert object_image is not None
17 |
18 | start = time()
19 |
20 | input_images = self.get_input_images(image, box_data)
21 | input_object_images = [get_inference_image(object_image, Affine(a=a), (224, 224), (224, 224), (224, 224), return_mat=True) for a in self.a_space]
22 |
23 | input_object_images = np.array(input_object_images) / np.iinfo(object_image.mat.dtype).max
24 |
25 | pre_duration = time() - start
26 | start = time()
27 |
28 | estimated_grasp_reward, estimated_object_reward = self.model([input_images, [input_object_images]])
29 |
30 | nn_duration = time() - start
31 | start = time()
32 |
33 | estimated_reward = estimated_object_reward * estimated_grasp_reward # np.cbrt(estimated_object_reward * np.power(estimated_grasp_reward, 2))
34 |
35 | if gripper:
36 | possible_indices = gripper.consider_indices(self.model_data.gripper_widths)
37 | self.set_last_dim_to_zero(estimated_reward, np.invert(possible_indices))
38 |
39 | for _ in range(estimated_reward.size):
40 | index_raveled = method(estimated_reward)
41 | index = np.unravel_index(index_raveled, estimated_reward.shape)
42 |
43 | action = Grasp()
44 | action.index = index[3]
45 | action.pose = self.pose_from_index(index, estimated_reward.shape, image)
46 | action.pose.z = np.nan
47 | action.estimated_reward = estimated_reward[index]
48 | action.detail_durations = {
49 | 'pre': pre_duration,
50 | 'nn': nn_duration,
51 | 'post': time() - start,
52 | }
53 | action.calculation_duration = sum(action.detail_durations.values())
54 |
55 | yield action
56 |
57 | method.disable(index, estimated_reward)
58 | return
59 |
--------------------------------------------------------------------------------
/griffig/infer/selection.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class Method:
5 | def __call__(self, x):
6 | raise NotImplementedError()
7 |
8 | def disable(self, index, x):
9 | raise NotImplementedError
10 |
11 | def __repr__(self):
12 | raise NotImplementedError
13 |
14 |
15 | class Max(Method):
16 | def __call__(self, x):
17 | return x.argmax()
18 |
19 | def disable(self, index, x):
20 | x[index] = -np.inf
21 |
22 | def __repr__(self):
23 | return 'Max'
24 |
25 |
26 | class Top(Method):
27 | def __init__(self, n=5):
28 | self.n = n
29 |
30 | def __call__(self, x):
31 | return np.random.choice(np.argpartition(x, -self.n, axis=None)[-self.n:])
32 |
33 | def disable(self, index, x):
34 | x[index] = -np.inf
35 |
36 | def __repr__(self):
37 | return 'Top' + str(self.n)
38 |
39 |
40 | class PowerProb(Method):
41 | def __init__(self, power=4):
42 | self.power = power
43 |
44 | def __call__(self, x):
45 | raveled = np.power(np.ravel(x), self.power)
46 | return np.random.choice(np.arange(x.size), p=(raveled / np.sum(raveled)))
47 |
48 | def disable(self, index, x):
49 | x[index] = 0.0
50 |
51 | def __repr__(self):
52 | return 'PowerProb' + str(self.power)
53 |
54 |
55 | class RandomInference(Method):
56 | def __call__(self, x):
57 | return np.random.choice(np.arange(x.size))
58 |
59 | def disable(self, index, x):
60 | pass
61 |
62 | def __repr__(self):
63 | return 'RandomInference'
64 |
--------------------------------------------------------------------------------
/griffig/utility/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pantor/griffig/0b10ef5d69902b14a4d648a809a51933a8f5fe8a/griffig/utility/__init__.py
--------------------------------------------------------------------------------
/griffig/utility/box_finder.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | from _griffig import BoxData
5 |
6 |
7 | class BoxFinder:
8 | def __init__(self, method='highest-contour', height_buffer=20):
9 | self.method = method
10 | self.height_buffer = height_buffer
11 |
12 | @staticmethod
13 | def contour_rect_area(x):
14 | _, _, w, h = cv2.boundingRect(x)
15 | return w * h
16 |
17 | def find(self, image):
18 | assert self.method == 'highest-contour'
19 |
20 | depth = image.mat[:, :, 3]
21 |
22 | max_value = np.max(depth) - self.height_buffer
23 | _, depth = cv2.threshold(depth, max_value, 255, cv2.THRESH_BINARY)
24 | canny_output = cv2.Canny(depth, 255 / 2, 255)
25 | contours, _ = cv2.findContours(canny_output, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
26 |
27 | if len(contours) < 2:
28 | return None
29 |
30 | # Find second largest contour (inside of the box)
31 | sorted_contours = sorted(contours, key=self.contour_rect_area, reverse=True)
32 | x, y, w, h = cv2.boundingRect(sorted_contours[1])
33 |
34 | box_center = np.array([y + (h - image.mat.shape[0]) / 2, -x + (-w + image.mat.shape[1]) / 2, 0.0]) / image.pixel_size
35 | box_size = np.array([h, -w, 0.1]) / image.pixel_size
36 | return BoxData(box_center, box_size)
37 |
--------------------------------------------------------------------------------
/griffig/utility/heatmap.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | from pyaffx import Affine
5 | from _griffig import BoxData, OrthographicImage
6 | from ..utility.image import draw_line, get_inference_image
7 | from ..utility.model_data import ModelArchitecture
8 |
9 |
10 | class Heatmap:
11 | def __init__(self, inference, a_space=None):
12 | self.inference = inference
13 |
14 | if a_space is not None:
15 | self.inference.a_space = a_space
16 |
17 | def calculate_heat(self, reward, size_cropped, size_result):
18 | size_reward_center = (reward.shape[1] / 2, reward.shape[2] / 2)
19 | scale = self.inference.size_area_cropped[0] / self.inference.size_result[0] * ((size_cropped[0] - 30) / reward.shape[1])
20 |
21 | a_space_idx = range(len(self.inference.a_space))
22 |
23 | heat_values = np.zeros(size_result[::-1], dtype=np.float)
24 | for i in a_space_idx:
25 | a = self.inference.a_space[i]
26 | rot_mat = cv2.getRotationMatrix2D(size_reward_center, -a * 180.0 / np.pi, scale)
27 | rot_mat[0][2] += size_result[0] / 2 - size_reward_center[0]
28 | rot_mat[1][2] += size_result[1] / 2 - size_reward_center[1]
29 | heat_values += cv2.warpAffine(reward[i], rot_mat, size_result, borderValue=0)
30 |
31 | norm = (5 * heat_values.max() + len(a_space_idx)) / 6
32 | return (heat_values / norm * 255.0).astype(np.uint8)
33 |
34 | @staticmethod
35 | def get_background(image, use_rgb):
36 | if len(image.mat.shape) >= 3 and image.mat.shape[-1] >= 3:
37 | if use_rgb:
38 | back = cv2.cvtColor(image.mat[:, :, :3], cv2.COLOR_RGB2GRAY)
39 | return cv2.cvtColor(back, cv2.COLOR_GRAY2RGB)
40 |
41 | return cv2.cvtColor(image.mat[:, :, 3:], cv2.COLOR_GRAY2RGB)
42 |
43 | return cv2.cvtColor(image.mat, cv2.COLOR_GRAY2RGB)
44 |
45 | def render(
46 | self,
47 | image: OrthographicImage,
48 | object_image: OrthographicImage = None,
49 | goal_image: OrthographicImage = None,
50 | box_data: BoxData = None,
51 | alpha=0.5,
52 | use_rgb=True, # Otherwise depth
53 | save_path=None,
54 | reward_index=None,
55 | draw_lateral=False,
56 | draw_shifts=False,
57 | draw_indices=None,
58 | alpha_human=0.0,
59 | ):
60 | input_images = self.inference.get_input_images(image, box_data)
61 |
62 | # if goal_image:
63 | # input_images += self.inference.get_input_images(goal_image, box_data)
64 |
65 | if self.inference.model_data.architecture == ModelArchitecture.ActorCritic:
66 | estimated_reward, actor_result = self.inference.model(input_images)
67 |
68 | elif self.inference.model_data.architecture == ModelArchitecture.PlanarSemantic:
69 | input_object_images = [get_inference_image(object_image, Affine(a=a), (224, 224), (224, 224), (224, 224), return_mat=True) for a in self.inference.a_space]
70 | input_object_images = np.array(input_object_images) / np.iinfo(object_image.mat.dtype).max
71 |
72 | estimated_grasp_reward, estimated_object_reward = self.inference.model([input_images, [input_object_images]])
73 | estimated_reward = estimated_object_reward * estimated_grasp_reward
74 |
75 | else:
76 | estimated_reward = self.inference.model(input_images)
77 | actor_result = None
78 |
79 | if reward_index is not None:
80 | estimated_reward = estimated_reward[reward_index]
81 |
82 | if estimated_reward.shape[-1] > 4: # self.inference.model_data.output[0] == 'reward+human':
83 | estimated_reward = (1 - alpha_human) * estimated_reward[:, :, :, :4] + alpha_human * estimated_reward[:, :, :, 4:]
84 |
85 | # reward_reduced = np.maximum(estimated_reward, 0)
86 | reward_reduced = np.mean(estimated_reward, axis=3)
87 | # reward_reduced = estimated_reward[:, :, :, 0]
88 |
89 | # For heatmapping the actor
90 | # reward_reduced = actor_result[:, :, :, 2]
91 | # reward_reduced = (reward_reduced - np.min(reward_reduced)) / np.ptp(reward_reduced)
92 | # reward_reduced += 0.5
93 |
94 | size_cropped = input_images[0].shape[1::-1]
95 | size_result = image.mat.shape[1::-1]
96 |
97 | heat = self.calculate_heat(reward_reduced, size_cropped, size_result)
98 | heat = cv2.applyColorMap(heat.astype(np.uint8), cv2.COLORMAP_JET)
99 |
100 | background = self.get_background(image, use_rgb)
101 | if background.dtype == np.uint16:
102 | background = background.astype(np.float32) / 255
103 | else:
104 | background = background.astype(np.float32)
105 | result = (1 - alpha) * background + alpha * heat
106 | result = OrthographicImage(result.astype(np.float32), image.pixel_size, image.min_depth, image.max_depth)
107 |
108 | if draw_indices is not None:
109 | self.draw_indices(result, reward_reduced, draw_indices)
110 |
111 | if draw_lateral:
112 | for _ in range(10):
113 | index = np.unravel_index(estimated_reward.argmax(), estimated_reward.shape)
114 | action = actor_result[index[0], index[1], index[2]]
115 | self.draw_lateral(result, estimated_reward.shape, index, action)
116 | estimated_reward[np.unravel_index(estimated_reward.argmax(), estimated_reward.shape)] = 0
117 |
118 | if draw_shifts:
119 | for _ in range(10):
120 | self.draw_arrow(result, reward_reduced, np.unravel_index(reward_reduced.argmax(), reward_reduced.shape))
121 | reward_reduced[np.unravel_index(reward_reduced.argmax(), reward_reduced.shape)] = 0
122 |
123 | if save_path:
124 | cv2.imwrite(str(save_path), result.mat)
125 | return result.mat
126 |
127 | def draw_lateral(self, image: OrthographicImage, reward_shape, index, action):
128 | pose = self.inference.pose_from_index(index, reward_shape, image)
129 | arrow_color = (255*255, 255*255, 255*255)
130 | draw_line(image, pose, Affine(0.0, 0.0), Affine(a=pose.a, b=action[1], c=action[2]) * Affine(0.0, 0.0, -0.14), color=arrow_color, thickness=1)
131 |
132 | def draw_indices(self, image: OrthographicImage, reward_shape, indices):
133 | point_color = (255, 255, 255)
134 |
135 | for index in indices:
136 | pose = self.inference.pose_from_index(index, reward_shape, image)
137 | pose.x /= reward_shape[1] / 40
138 | pose.y /= reward_shape[2] / 40
139 |
140 | draw_line(image, pose, Affine(-0.001, 0), Affine(0.001, 0), color=point_color, thickness=1)
141 | draw_line(image, pose, Affine(0, -0.001), Affine(0, 0.001), color=point_color, thickness=1)
142 |
143 | def draw_arrow(self, image: OrthographicImage, reward_shape, index):
144 | pose = self.inference.pose_from_index(index, reward_shape, image)
145 |
146 | arrow_color = (255, 255, 255)
147 | draw_line(image, pose, Affine(0, 0), Affine(0.036, 0), color=arrow_color, thickness=2)
148 | draw_line(image, pose, Affine(0.036, 0.0), Affine(0.026, -0.008), color=arrow_color, thickness=2)
149 | draw_line(image, pose, Affine(0.036, 0.0), Affine(0.026, 0.008), color=arrow_color, thickness=2)
150 |
--------------------------------------------------------------------------------
/griffig/utility/image.py:
--------------------------------------------------------------------------------
1 | from typing import Any, List, Sequence, Tuple, Optional, Union
2 |
3 | import cv2
4 | import numpy as np
5 |
6 | from _griffig import BoxData, Gripper, RobotPose, OrthographicImage
7 | from pyaffx import Affine
8 |
9 |
10 | def get_color_scale(dtype):
11 | if dtype == np.float32:
12 | return 1. / 255
13 | if dtype == np.uint16:
14 | return 255
15 | return 1
16 |
17 |
18 | def crop(mat_image: Any, size_output: Sequence[float], vec=(0, 0)) -> Any:
19 | margin_x_lower = int(round((mat_image.shape[0] - size_output[0]) / 2 + vec[1]))
20 | margin_y_lower = int(round((mat_image.shape[1] - size_output[1]) / 2 + vec[0]))
21 | margin_x_upper = margin_x_lower + int(round(size_output[0]))
22 | margin_y_upper = margin_y_lower + int(round(size_output[1]))
23 | return mat_image[margin_x_lower:margin_x_upper, margin_y_lower:margin_y_upper]
24 |
25 |
26 | def get_transformation(x: float, y: float, a: float, center: Sequence[float], scale=1.0, cropped: Sequence[float] = None): # [rad]
27 | trans = cv2.getRotationMatrix2D((round(center[0] - x), round(center[1] - y)), a * 180.0 / np.pi, scale) # [deg]
28 | trans[0][2] += x + scale * cropped[0] / 2 - center[0] if cropped else x
29 | trans[1][2] += y + scale * cropped[1] / 2 - center[1] if cropped else y
30 | return trans
31 |
32 |
33 | def get_area_of_interest(
34 | image: OrthographicImage,
35 | pose: Affine,
36 | size_cropped: Tuple[float, float] = None,
37 | size_result: Tuple[float, float] = None,
38 | flags=cv2.INTER_LINEAR,
39 | return_mat=False,
40 | planar=True,
41 | ) -> Union[OrthographicImage, Any]:
42 | size_input = (image.mat.shape[1], image.mat.shape[0])
43 | center_image = (image.mat.shape[1] / 2, image.mat.shape[0] / 2)
44 |
45 | if size_result and size_cropped:
46 | scale = size_result[0] / size_cropped[0]
47 | assert scale == (size_result[1] / size_cropped[1])
48 | elif size_result:
49 | scale = size_result[0] / size_input[0]
50 | assert scale == (size_result[1] / size_input[1])
51 | else:
52 | scale = 1.0
53 |
54 | size_final = size_result or size_cropped or size_input
55 |
56 | if not planar:
57 | pose = image.pose.inverse() * pose
58 |
59 | trans = get_transformation(
60 | image.pixel_size * pose.y,
61 | image.pixel_size * pose.x,
62 | -pose.a,
63 | center_image,
64 | scale=scale,
65 | cropped=size_cropped,
66 | )
67 | mat_result = cv2.warpAffine(image.mat, trans, size_final, flags=flags, borderMode=cv2.BORDER_REPLICATE) # INTERPOLATION_METHOD
68 | if return_mat:
69 | return mat_result
70 |
71 | image_pose = Affine(x=pose.x, y=pose.y, a=-pose.a) * image.pose
72 |
73 | return OrthographicImage(
74 | mat_result,
75 | scale * image.pixel_size,
76 | image.min_depth,
77 | image.max_depth,
78 | image.camera,
79 | image_pose,
80 | )
81 |
82 |
83 | def get_inference_image(
84 | image: OrthographicImage,
85 | pose: Affine,
86 | size_cropped: Tuple[float, float],
87 | size_area_cropped: Tuple[float, float],
88 | size_area_result: Tuple[float, float],
89 | return_mat=False,
90 | ) -> OrthographicImage:
91 | size_input = (image.mat.shape[1], image.mat.shape[0])
92 | center_image = (size_input[0] / 2, size_input[1] / 2)
93 |
94 | scale = size_area_result[0] / size_area_cropped[0]
95 |
96 | trans = get_transformation(
97 | image.pixel_size * pose.y,
98 | image.pixel_size * pose.x,
99 | pose.a,
100 | center_image,
101 | scale=scale,
102 | cropped=(size_cropped[0] / scale, size_cropped[1] / scale),
103 | )
104 | mat_result = cv2.warpAffine(image.mat, trans, size_cropped, borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_AREA)
105 | if return_mat:
106 | return mat_result
107 |
108 | image_pose = Affine(x=pose.x, y=pose.y, a=pose.a) * image.pose
109 |
110 | return OrthographicImage(
111 | mat_result,
112 | scale * image.pixel_size,
113 | image.min_depth,
114 | image.max_depth,
115 | image.camera,
116 | image_pose,
117 | )
118 |
119 |
120 | def _get_rect_contour(center: Sequence[float], size: Sequence[float]) -> List[Sequence[float]]:
121 | return [
122 | [center[0] + size[0] / 2, center[1] + size[1] / 2, size[2]],
123 | [center[0] + size[0] / 2, center[1] - size[1] / 2, size[2]],
124 | [center[0] - size[0] / 2, center[1] - size[1] / 2, size[2]],
125 | [center[0] - size[0] / 2, center[1] + size[1] / 2, size[2]],
126 | ]
127 |
128 |
129 | def get_box_projection(image: OrthographicImage, box_data: BoxData):
130 | if not box_data:
131 | return
132 |
133 | box_border = [Affine(*p) for p in box_data.contour]
134 | return [image.project(p) for p in box_border]
135 |
136 |
137 | def draw_line(
138 | image: OrthographicImage,
139 | action_pose: Affine,
140 | pt1: Affine,
141 | pt2: Affine,
142 | color,
143 | thickness=1,
144 | ) -> None:
145 | cm = get_color_scale(image.mat.dtype)
146 | pose = image.pose.inverse() * action_pose
147 | pt1_projection = image.project(pose * pt1)
148 | pt2_projection = image.project(pose * pt2)
149 | cv2.line(image.mat, tuple(pt1_projection), tuple(pt2_projection), (color[0] * cm, color[1] * cm, color[2] * cm), thickness, lineType=cv2.LINE_AA)
150 |
151 |
152 | def draw_polygon(
153 | image: OrthographicImage,
154 | action_pose: Affine,
155 | polygon,
156 | color,
157 | thickness=1,
158 | ) -> None:
159 | cm = get_color_scale(image.mat.dtype)
160 | pose = image.pose.inverse() * action_pose
161 | polygon_projection = np.asarray([tuple(image.project(pose * p)) for p in polygon])
162 | cv2.polylines(image.mat, [polygon_projection], True, (color[0] * cm, color[1] * cm, color[2] * cm), thickness, lineType=cv2.LINE_AA)
163 |
164 |
165 | def draw_around_box(image: OrthographicImage, box_data: Optional[BoxData], draw_lines=False) -> None:
166 | if not box_data:
167 | return
168 |
169 | assert box_data, 'Box contour should be drawn, but is false.'
170 |
171 | box_border = [Affine(*p) for p in box_data.contour]
172 | image_border = [Affine(*p) for p in _get_rect_contour([0.0, 0.0, 0.0], [10.0, 10.0, box_data.contour[0][2]])]
173 | box_projection = [image.project(image.pose.inverse() * p) for p in box_border]
174 |
175 | cm = get_color_scale(image.mat.dtype)
176 |
177 | if draw_lines:
178 | number_channels = image.mat.shape[-1] if len(image.mat.shape) > 2 else 1
179 |
180 | color = np.array([255 * cm] * number_channels) # White
181 | cv2.polylines(image.mat, [np.asarray(box_projection)], True, color, 2, lineType=cv2.LINE_AA)
182 |
183 | else:
184 | color_array = np.array([image.mat[np.clip(p[1], 0, image.mat.shape[0] - 1), np.clip(p[0], 0, image.mat.shape[1] - 1)] for p in box_projection], dtype=np.float32)
185 | if len(color_array.shape) > 1:
186 | color_array[np.mean(color_array, axis=1) < cm] = np.nan
187 | else:
188 | color_array[color_array < cm] = np.nan
189 | color = np.nanmean(color_array, axis=0)
190 | np.nan_to_num(color, copy=False)
191 | image_border_projection = [image.project(image.pose.inverse() * p) for p in image_border]
192 | cv2.fillPoly(image.mat, np.array([image_border_projection, box_projection]), color.tolist())
193 |
194 |
195 | def draw_object(image: OrthographicImage, object_data) -> None:
196 | if 'polygon' not in object_data:
197 | return
198 |
199 | cm = get_color_scale(image.mat.dtype)
200 | polygon = np.asarray([(int(p['x'] * image.mat.shape[1]), int(p['y'] * image.mat.shape[0])) for p in object_data['polygon']])
201 | cv2.polylines(image.mat, [polygon], True, (0 * cm, 255 * cm, 255 * cm), 1, lineType=cv2.LINE_AA)
202 |
203 |
204 | def draw_pose(image: OrthographicImage, action_pose: RobotPose, convert_to_rgb=False, calibration_pattern=False, border_thickness=2, line_thickness=1) -> None:
205 | if convert_to_rgb and image.mat.ndim == 2:
206 | image.mat = cv2.cvtColor(image.mat, cv2.COLOR_GRAY2RGB)
207 |
208 | color_rect = (255, 0, 0) # Blue
209 | color_lines = (0, 0, 255) # Red
210 | color_direction = (0, 255, 0) # Green
211 |
212 | rect = [Affine(*p) for p in _get_rect_contour([0.0, 0.0, 0.0], [200.0 / image.pixel_size, 200.0 / image.pixel_size, 0.0])]
213 |
214 | draw_polygon(image, action_pose, rect, color_rect, 2)
215 |
216 | draw_line(image, action_pose, Affine(90 / image.pixel_size, 0), Affine(100 / image.pixel_size, 0), color_rect, border_thickness)
217 | draw_line(image, action_pose, Affine(0.012, action_pose.d / 2), Affine(-0.012, action_pose.d / 2), color_lines, line_thickness)
218 | draw_line(image, action_pose, Affine(0.012, -action_pose.d / 2), Affine(-0.012, -action_pose.d / 2), color_lines, line_thickness)
219 | draw_line(image, action_pose, Affine(0, action_pose.d / 2), Affine(0, -action_pose.d / 2), color_lines, line_thickness)
220 | draw_line(image, action_pose, Affine(0.006, 0), Affine(-0.006, 0), color_lines, line_thickness)
221 | draw_line(image, action_pose, Affine(z=0.14), Affine(), color_direction, line_thickness)
222 |
223 | if calibration_pattern:
224 | color_calibration = (255, 255, 255)
225 | draw_line(image, action_pose, Affine(0, -0.1), Affine(0, 0.1), color_calibration, line_thickness)
226 | draw_line(image, action_pose, Affine(-0.1, 0), Affine(0.1, 0), color_calibration, line_thickness)
227 |
228 |
229 | def draw_around_box2(image, box_data: BoxData, draw_lines=False):
230 | if not box_data:
231 | return
232 |
233 | assert box_data, 'Box contour should be drawn, but is false.'
234 |
235 | box_border = [Affine(*p) for p in box_data.contour]
236 | image_border = [Affine(*p) for p in _get_rect_contour([0.0, 0.0, 0.0], [10.0, 10.0, box_data.contour[0][2]])]
237 | box_projection = [image.project(p) for p in box_border]
238 |
239 | cm = get_color_scale(image.mat.dtype)
240 |
241 | if draw_lines:
242 | number_channels = image.mat.shape[-1] if len(image.mat.shape) > 2 else 1
243 | color = np.array([255 * cm] * number_channels) # White
244 | cv2.polylines(image.mat, [np.asarray(box_projection)], True, color, 2, lineType=cv2.LINE_AA)
245 |
246 | else:
247 | color_array = np.array([image.mat[np.clip(p[1], 0, image.mat.shape[0] - 1), np.clip(p[0], 0, image.mat.shape[1] - 1)] for p in box_projection], dtype=np.float32)
248 | if len(color_array.shape) > 1:
249 | color_array[np.mean(color_array, axis=1) < cm] = np.nan
250 | else:
251 | color_array[color_array < cm] = np.nan
252 |
253 | color = np.nanmean(color_array, axis=0)
254 | np.nan_to_num(color, copy=False)
255 |
256 | image_border_projection = [image.project(p) for p in image_border]
257 | cv2.fillPoly(image.mat, np.array([image_border_projection, box_projection]), color.tolist())
258 |
259 |
260 | def draw_pose2(image, grasp, gripper: Gripper, convert_to_rgb=False):
261 | if convert_to_rgb and image.mat.ndim == 2:
262 | image.mat = cv2.cvtColor(image.mat, cv2.COLOR_GRAY2RGB)
263 |
264 | color_rect = (255, 0, 0) # Blue
265 | color_lines = (0, 0, 255) # Red
266 | color_direction = (0, 255, 0) # Green
267 |
268 | rect = [Affine(*p) for p in _get_rect_contour([0.0, 0.0, 0.0], [200.0 / image.pixel_size, 200.0 / image.pixel_size, 0.0])]
269 |
270 | action_pose = RobotPose(grasp.pose, d=grasp.stroke)
271 |
272 | draw_polygon(image, action_pose, rect, color_rect, 2)
273 | draw_line(image, action_pose, Affine(90 / image.pixel_size, 0), Affine(100 / image.pixel_size, 0), color_rect, 2)
274 |
275 | half_width = gripper.finger_width / 2
276 |
277 | draw_line(image, action_pose, Affine(half_width, action_pose.d / 2), Affine(-half_width, action_pose.d / 2), color_lines, 1)
278 | draw_line(image, action_pose, Affine(half_width, -action_pose.d / 2), Affine(-half_width, -action_pose.d / 2), color_lines, 1)
279 |
280 | half_width_height = half_width + gripper.finger_height
281 | draw_line(image, action_pose, Affine(half_width_height, action_pose.d / 2), Affine(-half_width_height, action_pose.d / 2), color_lines, 1)
282 | draw_line(image, action_pose, Affine(half_width_height, -action_pose.d / 2), Affine(-half_width_height, -action_pose.d / 2), color_lines, 1)
283 |
284 | draw_line(image, action_pose, Affine(0, action_pose.d / 2), Affine(0, -action_pose.d / 2), color_lines, 1)
285 | draw_line(image, action_pose, Affine(0.006, 0), Affine(-0.006, 0), color_lines, 1)
286 |
287 | if np.isfinite(action_pose.z) and (action_pose.b != 0 or action_pose.c != 0):
288 | draw_line(image, action_pose, Affine(z=0.14), Affine(), color_direction, 1)
289 |
--------------------------------------------------------------------------------
/griffig/utility/model_data.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | from typing import List
3 | from pathlib import Path
4 |
5 |
6 | class ModelArchitecture(str, Enum):
7 | Planar = 'planar' # Model-based z, and bc=0
8 | PlanarTypes = 'planar-types' # Model-based z, and bc=0, for multiple learned types
9 | PlanarZ = 'planar-z' # Regression-based z, and bc=0
10 | PlanarSemantic = 'planar-semantic' # Semantic Grasping
11 | Lateral = 'lateral' # Model-based z, b, and c
12 | ActorCritic = 'actor-critic' # Actor-critic
13 | ModelBasedConvolution = 'model-based-convolution' # Model-based convolution
14 | NonFCNActorCritic = 'non-fcn-actor-critic' # Non-fully-convolutional actor-critic
15 | NonFCNPlanar = 'non-fcn-planar' # Non-fully-convolutional planar
16 |
17 |
18 | class ModelData:
19 | def __init__(
20 | self,
21 | name: str = None,
22 | path: Path = None,
23 | architecture: ModelArchitecture = None,
24 | pixel_size: float = None,
25 | depth_diff: float = None,
26 | gripper_widths: List[float] = None,
27 | description: str = None,
28 | size_area_cropped = None,
29 | size_result = None,
30 | task = None,
31 | input = None,
32 | input_type = None,
33 | output = None,
34 | version = None,
35 | ):
36 | self.name = name
37 | self.path = path
38 | self.architecture = architecture
39 | self.pixel_size = pixel_size
40 | self.depth_diff = depth_diff
41 | self.gripper_widths = gripper_widths
42 | self.description = description
43 | self.size_area_cropped = size_area_cropped
44 | self.size_result = size_result
45 | self.task = task
46 | self.input = input
47 | self.input_type = input_type
48 | self.output = output
49 | self.version = version
50 |
51 | def to_dict(self):
52 | return self.__dict__
53 |
54 |
--------------------------------------------------------------------------------
/griffig/utility/model_library.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import json
3 | from pathlib import Path
4 | from shutil import unpack_archive
5 | from tempfile import NamedTemporaryFile
6 | from typing import Union
7 | from urllib.request import urlopen
8 |
9 | import requests
10 |
11 | from .model_data import ModelArchitecture, ModelData
12 |
13 |
14 | class ModelLibrary:
15 | remote_url = 'https://griffig.xyz/api/v1/models/'
16 | tmp_path = Path('/tmp') / 'griffig' / 'models'
17 |
18 | @classmethod
19 | def get_model_version_from_name(cls, name: str) -> int:
20 | return int(name.split('-')[-1][1:])
21 |
22 | @classmethod
23 | def load_model_data(cls, name: Union[str, Path]) -> ModelData:
24 | model_path = name
25 |
26 | if isinstance(model_path, str):
27 | matched_model_globs = glob.glob(str(cls.tmp_path / f'{name}-v*'))
28 | if not matched_model_globs:
29 | try:
30 | r = requests.get(url=cls.remote_url + name, timeout=1.0) # [s]
31 | except requests.exceptions.Timeout as e:
32 | raise Exception('Could not search for model, please use a local model via a Path input.') from e
33 |
34 | if r.status_code == 404:
35 | raise Exception(f'Model {name} not found!')
36 |
37 | model_info = r.json()
38 | print(f"Download model {model_info['name']} version {model_info['version']} to {model_path}...")
39 |
40 | model_path = cls.tmp_path / f"{model_info['name']}-v{model_info['version']}"
41 | model_path.mkdir(parents=True, exist_ok=True)
42 |
43 | model_remote_url = model_info['download_path']
44 | with urlopen(model_remote_url) as zipresp, NamedTemporaryFile() as tfile:
45 | tfile.write(zipresp.read())
46 | tfile.seek(0)
47 | unpack_archive(tfile.name, model_path, format='zip')
48 |
49 | else:
50 | matched_model_globs.sort(key=cls.get_model_version_from_name, reverse=True)
51 | version = cls.get_model_version_from_name(matched_model_globs[0])
52 | model_path = Path(matched_model_globs[0])
53 | print(f'Found model {name} version {version} file at {model_path}')
54 |
55 | if isinstance(name, Path):
56 | name = name.name
57 |
58 | with open(model_path / 'model_data.json', 'r') as read_file:
59 | model_data = ModelData(**json.load(read_file))
60 | model_data.path = model_path / name / 'model' / 'data' / 'model' # model_data.path
61 |
62 | return model_data
63 |
--------------------------------------------------------------------------------
/include/griffig/box_data.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include
6 |
7 | #include
8 |
9 | #include
10 |
11 |
12 | struct BoxData {
13 | using Affine = affx::Affine;
14 |
15 | //! The contour of the box
16 | std::vector> contour;
17 |
18 | //! An optional pose of the center
19 | std::optional pose;
20 |
21 | explicit BoxData() { }
22 | explicit BoxData(const std::vector>& contour, const std::optional& pose = std::nullopt): contour(contour), pose(pose) { }
23 | explicit BoxData(const std::array& center, const std::array& size, const std::optional& pose = std::nullopt): pose(pose) {
24 | contour = {
25 | {center[0] + size[0] / 2, center[1] + size[1] / 2, size[2]},
26 | {center[0] + size[0] / 2, center[1] - size[1] / 2, size[2]},
27 | {center[0] - size[0] / 2, center[1] - size[1] / 2, size[2]},
28 | {center[0] - size[0] / 2, center[1] + size[1] / 2, size[2]},
29 | };
30 | }
31 |
32 | std::array get_rect(float pixel_size, int offset) const {
33 | std::vector cont;
34 | for (auto e: contour) {
35 | cont.push_back({(float)e[0] * pixel_size, (float)e[1] * pixel_size});
36 | }
37 | auto rect = cv::boundingRect(cont);
38 | return {
39 | 2*std::max(std::abs(rect.y), std::abs(rect.y + rect.height)) + offset,
40 | 2*std::max(std::abs(rect.x), std::abs(rect.x + rect.width)) + offset
41 | };
42 | }
43 |
44 | bool is_pose_inside(const RobotPose& pose) const {
45 | double half_stroke = 0.5 * (pose.d + 0.002); // [m]
46 | auto gripper_b1_trans = (pose * Affine(0.0, half_stroke, 0.0)).translation();
47 | auto gripper_b2_trans = (pose * Affine(0.0, -half_stroke, 0.0)).translation();
48 | cv::Point2f gripper_b1 {(float)gripper_b1_trans[0], (float)gripper_b1_trans[1]};
49 | cv::Point2f gripper_b2 {(float)gripper_b2_trans[0], (float)gripper_b2_trans[1]};
50 |
51 | std::vector check_contour;
52 | for (auto c: contour) {
53 | check_contour.emplace_back(cv::Point2f(c[0], c[1]));
54 | }
55 |
56 | bool jaw1_inside_box = cv::pointPolygonTest(check_contour, gripper_b1, false) >= 0;
57 | bool jaw2_inside_box = cv::pointPolygonTest(check_contour, gripper_b2, false) >= 0;
58 |
59 | bool start_point_inside_box = true;
60 | if (!std::isnan(pose.z())) {
61 | auto helper_pose_trans = (pose * Affine(0.0, 0.0, 0.16)).translation();
62 | cv::Point2f helper_pose {(float)helper_pose_trans[0], (float)helper_pose_trans[1]};
63 | start_point_inside_box = cv::pointPolygonTest(check_contour, helper_pose, false) >= 0;
64 | }
65 |
66 | return jaw1_inside_box && jaw2_inside_box && start_point_inside_box;
67 | }
68 |
69 | std::string toString() const {
70 | std::string result = "Box contour (";
71 | for (auto e: contour) {
72 | result += "(" + std::to_string(e[0]) + ", " + std::to_string(e[1]) + ", " + std::to_string(e[2]) + ")";
73 | }
74 | return result + ")";
75 | }
76 | };
77 |
--------------------------------------------------------------------------------
/include/griffig/grasp.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include