├── .gitignore ├── .gitmodules ├── dev.md ├── include ├── kinematics.h ├── parser.h ├── fast_kinematics.h └── cuda_quat.h ├── tests ├── test_client.cu └── test.ipynb ├── docker └── Dockerfile ├── .vscode └── settings.json ├── CMakeLists.txt ├── readme.md ├── setup.py ├── src ├── parser.cpp ├── fast_kinematics.cu └── kinematics.cu ├── pybind └── pybind_fast_kinematics.cpp ├── idea.md ├── kuka_iiwa.urdf ├── panda.urdf └── fetch.urdf /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | *.egg-info/ 3 | build/ 4 | wheelhouse/ 5 | dist/ 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "extern/pybind11"] 2 | path = extern/pybind11 3 | url = ../../pybind/pybind11 4 | branch = stable 5 | -------------------------------------------------------------------------------- /dev.md: -------------------------------------------------------------------------------- 1 | ## building 2 | 3 | Currently the project is setup to build with cmake and setup.py. Because fast_kinematics requires torch, which has a lot of dynamic links to shared libraries, please see `build.sh` for a list of shared libraries we need to exclude to make the build usable on other machines. 4 | 5 | The the user will need to import torch before using this library as torch will load the shared libraries. -------------------------------------------------------------------------------- /include/kinematics.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // data structure 4 | // [translation (3 floats), rotation (4 floats), type (1 float), axis (3 floats)] 5 | 6 | __global__ void _forward_kinematics(float *data, float *angs, size_t *cum_data_idx, 7 | size_t *cum_active_joint_idx, float *result, size_t num_of_robots); 8 | 9 | __global__ void _jacobian(float *data, float *angs, size_t *cum_data_idx, 10 | size_t *cum_active_joint_idx, float *result, size_t num_of_robots); 11 | 12 | __global__ void _jacobian_mixed_frame(float *data, float *angs, size_t *cum_data_idx, 13 | size_t *cum_active_joint_idx, float *result, size_t num_of_robots); -------------------------------------------------------------------------------- /include/parser.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | struct JointTree { 10 | std::string name; 11 | int type; 12 | Eigen::Quaternionf rotation; 13 | Eigen::Quaternionf position; 14 | Eigen::Vector3f axis; 15 | std::shared_ptr parent; 16 | std::vector> children; 17 | std::vector children_links; 18 | }; 19 | using JointTreePtr = std::shared_ptr; 20 | 21 | // representation 22 | // [translation (3 floats), rotation (4 floats), type (1 float), axis (3 floats), ...] 23 | constexpr size_t float_per_joint = 11; 24 | 25 | class Parser { 26 | public: 27 | static JointTreePtr parse(std::string urdf_path, bool verbose = false); 28 | 29 | static size_t find_num_of_active_joints(JointTreePtr tip, bool verbose = false); 30 | 31 | static size_t find_num_of_joints(JointTreePtr tip); 32 | 33 | static JointTreePtr find_link_parent(JointTreePtr root, std::string eef_name, bool verbose = false); 34 | 35 | static void prepare_repr(size_t num_of_robots, JointTreePtr tip, float **data, size_t **cum_idx); 36 | }; 37 | -------------------------------------------------------------------------------- /include/fast_kinematics.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | class FastKinematics { 8 | public: 9 | FastKinematics(std::string urdf_path, size_t num_of_robots, std::string eef_name, bool verbose=false); 10 | ~FastKinematics(); 11 | 12 | Eigen::Ref forward_kinematics(Eigen::Ref h_angs, size_t block_size=256); 13 | Eigen::Ref jacobian_mixed_frame(Eigen::Ref h_angs, size_t block_size=256); 14 | Eigen::Ref jacobian_world_frame(Eigen::Ref h_angs, size_t block_size=256); 15 | float *forward_kinematics_raw_ptr(float *t_angs, size_t block_size=256); 16 | float *jacobian_mixed_frame_raw_ptr(float *t_angs, size_t block_size=256); 17 | float *jacobian_world_frame_raw_ptr(float *t_angs, size_t block_size=256); 18 | size_t get_num_of_active_joints(); 19 | size_t get_num_of_joints(); 20 | size_t get_num_of_robots(); 21 | 22 | private: 23 | JointTreePtr root, tip; 24 | size_t num_of_active_joints, num_of_joints; 25 | float *h_data; 26 | Eigen::VectorXf h_fk_result, h_jac_result; 27 | float *d_data, *d_angs, *d_result; 28 | size_t *h_cum_data_idx, *d_num_of_joints_cum; 29 | size_t *h_cum_active_joint_idx, *d_num_of_active_joints_cum; 30 | size_t num_of_robots; 31 | std::string eef_name; 32 | std::string urdf_path; 33 | 34 | void allocate_memory(); 35 | void copy_memory(); 36 | }; 37 | -------------------------------------------------------------------------------- /tests/test_client.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int main() { 10 | size_t num_of_robots = 1000; 11 | FastKinematics fk("../kuka_iiwa.urdf", num_of_robots, "lbr_iiwa_link_7"); 12 | Eigen::VectorXf h_angs(num_of_robots * fk.get_num_of_active_joints()); 13 | h_angs[0] = 0.0; 14 | h_angs[1] = -M_PI / 4.0; 15 | h_angs[2] = 0.0; 16 | h_angs[3] = M_PI / 2.0; 17 | h_angs[4] = 0.0; 18 | h_angs[5] = M_PI / 4.0; 19 | h_angs[6] = 0.0; 20 | for (size_t i = 1; i < num_of_robots; ++i) { 21 | std::memcpy(h_angs.data() + i * fk.get_num_of_active_joints(), h_angs.data(), 22 | fk.get_num_of_active_joints() * sizeof(float)); 23 | } 24 | 25 | float *d_angs; 26 | cudaMalloc(&d_angs, h_angs.size() * sizeof(float)); 27 | cudaMemcpy(d_angs, h_angs.data(), h_angs.size() * sizeof(float), cudaMemcpyHostToDevice); 28 | torch::Tensor t_angs = torch::from_blob(d_angs, {h_angs.size()}); 29 | float *result_ptr = fk.forward_kinematics_raw_ptr(t_angs.data_ptr(), num_of_robots); 30 | auto options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, 0).pinned_memory(true); 31 | long sz = fk.get_num_of_active_joints()*6; 32 | torch::Tensor result = torch::from_blob(result_ptr, {sz}, options); 33 | std::cout << result << std::endl; 34 | 35 | // auto start = std::chrono::high_resolution_clock::now(); 36 | // for (size_t i = 0; i < 1000; ++i) { 37 | // // auto h_result = fk.jacobian_mixed_frame(h_angs); 38 | // } 39 | // auto end = std::chrono::high_resolution_clock::now(); 40 | // std::cout << "Elapsed time: " << std::chrono::duration_cast(end - start).count() << " ms" << std::endl; 41 | 42 | // for (size_t i = 0; i < 6; ++i) { 43 | // for (size_t j = 0; j < fk.get_num_of_active_joints(); ++j) { 44 | // std::cout << std::fixed << std::setprecision(3) << h_result[j*6+i] << " "; 45 | // } 46 | // std::cout << std::endl; 47 | // } 48 | return 0; 49 | } 50 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM quay.io/pypa/manylinux2014_x86_64 2 | 3 | # basic development tools 4 | RUN yum update -y && yum install -y wget git && yum upgrade -y && yum clean all 5 | WORKDIR /workspace 6 | 7 | # get oh my zsh 8 | RUN yum install -y zsh && \ 9 | sh -c -y "$(wget -O- https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" 10 | 11 | # get cuda 12 | RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \ 13 | yum clean all && \ 14 | yum -y install cuda-toolkit-12-4 && \ 15 | echo 'export PATH=/usr/local/cuda-12.4/bin${PATH:+:${PATH}}' >> ~/.zshrc && \ 16 | echo 'export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}' >> ~/.zshrc 17 | 18 | # Eigen 19 | RUN git clone --single-branch -b 3.4.0 --depth 1 https://gitlab.com/libeigen/eigen.git && \ 20 | cd eigen && mkdir build && cd build && \ 21 | cmake -DCMAKE_BUILD_TYPE=Release .. && \ 22 | make -j$(nproc) && make install && \ 23 | rm -rf /workspace/eigen 24 | 25 | # tinyxml2 26 | RUN git clone --single-branch -b 10.0.0 --depth 1 https://github.com/leethomason/tinyxml2.git && \ 27 | cd tinyxml2 && mkdir build && cd build && \ 28 | cmake -DCMAKE_CXX_FLAGS='-D_GLIBCXX_USE_CXX11_ABI=0' -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release .. && \ 29 | make -j$(nproc) && make install && \ 30 | rm -rf /workspace/tinyxml2 31 | 32 | # console_bridge 33 | RUN git clone --single-branch -b 1.0.2 --depth 1 https://github.com/ros/console_bridge.git && \ 34 | cd console_bridge && mkdir build && cd build && \ 35 | cmake -DCMAKE_CXX_FLAGS='-D_GLIBCXX_USE_CXX11_ABI=0' -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release .. && \ 36 | make -j$(nproc) && make install && \ 37 | rm -rf /workspace/console_bridge 38 | 39 | # urdfdom_headers 40 | RUN git clone --single-branch -b 1.1.1 --depth 1 https://github.com/ros/urdfdom_headers.git && \ 41 | cd urdfdom_headers && mkdir build && cd build && \ 42 | cmake .. -DCMAKE_BUILD_TYPE=Release && \ 43 | make -j$(nproc) && make install && \ 44 | rm -rf /workspace/urdfdom_headers 45 | 46 | # urdfdom 47 | RUN git clone --single-branch -b 4.0.0 --depth 1 https://github.com/ros/urdfdom.git && \ 48 | # modify the CMakeLists.txt to build static library 49 | cd urdfdom/urdf_parser && sed -i 's/SHARED/STATIC/' CMakeLists.txt && \ 50 | cd .. && mkdir build && cd build && \ 51 | cmake -DCMAKE_CXX_FLAGS='-D_GLIBCXX_USE_CXX11_ABI=0' -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release .. && \ 52 | make -j$(nproc) && make install && \ 53 | rm -rf /workspace/urdfdom 54 | 55 | # get fast_kinematics 56 | RUN git clone https://github.com/Lexseal/fast_kinematics.git 57 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.exclude": { 3 | "**/.git": true, 4 | "**/.svn": true, 5 | "**/.hg": true, 6 | "**/CVS": true, 7 | "**/.DS_Store": true, 8 | "**/Thumbs.db": true, 9 | "**/.classpath": true, 10 | "**/.project": true, 11 | "**/.settings": true, 12 | "**/.factorypath": true 13 | }, 14 | "hide-files.files": [], 15 | "files.associations": { 16 | "dense": "cpp", 17 | "algorithm": "cpp", 18 | "__bit_reference": "cpp", 19 | "__config": "cpp", 20 | "__debug": "cpp", 21 | "__errc": "cpp", 22 | "__hash_table": "cpp", 23 | "__locale": "cpp", 24 | "__mutex_base": "cpp", 25 | "__node_handle": "cpp", 26 | "__split_buffer": "cpp", 27 | "__threading_support": "cpp", 28 | "__tree": "cpp", 29 | "__verbose_abort": "cpp", 30 | "any": "cpp", 31 | "array": "cpp", 32 | "atomic": "cpp", 33 | "bitset": "cpp", 34 | "cctype": "cpp", 35 | "charconv": "cpp", 36 | "clocale": "cpp", 37 | "cmath": "cpp", 38 | "complex": "cpp", 39 | "cstdarg": "cpp", 40 | "cstddef": "cpp", 41 | "cstdint": "cpp", 42 | "cstdio": "cpp", 43 | "cstdlib": "cpp", 44 | "cstring": "cpp", 45 | "ctime": "cpp", 46 | "cwchar": "cpp", 47 | "cwctype": "cpp", 48 | "deque": "cpp", 49 | "exception": "cpp", 50 | "fstream": "cpp", 51 | "initializer_list": "cpp", 52 | "iomanip": "cpp", 53 | "ios": "cpp", 54 | "iosfwd": "cpp", 55 | "iostream": "cpp", 56 | "istream": "cpp", 57 | "limits": "cpp", 58 | "list": "cpp", 59 | "locale": "cpp", 60 | "map": "cpp", 61 | "mutex": "cpp", 62 | "new": "cpp", 63 | "optional": "cpp", 64 | "ostream": "cpp", 65 | "queue": "cpp", 66 | "ratio": "cpp", 67 | "set": "cpp", 68 | "sstream": "cpp", 69 | "stdexcept": "cpp", 70 | "streambuf": "cpp", 71 | "string": "cpp", 72 | "string_view": "cpp", 73 | "system_error": "cpp", 74 | "thread": "cpp", 75 | "tuple": "cpp", 76 | "typeinfo": "cpp", 77 | "unordered_map": "cpp", 78 | "variant": "cpp", 79 | "vector": "cpp", 80 | "geometry": "cpp", 81 | "core": "cpp", 82 | "functional": "cpp", 83 | "chrono": "cpp", 84 | "bit": "cpp", 85 | "*.tcc": "cpp", 86 | "iterator": "cpp", 87 | "memory": "cpp", 88 | "memory_resource": "cpp", 89 | "numeric": "cpp", 90 | "random": "cpp", 91 | "type_traits": "cpp", 92 | "utility": "cpp", 93 | "cinttypes": "cpp", 94 | "codecvt": "cpp", 95 | "condition_variable": "cpp", 96 | "forward_list": "cpp", 97 | "unordered_set": "cpp", 98 | "typeindex": "cpp", 99 | "valarray": "cpp", 100 | "specialfunctions": "cpp", 101 | "shared_mutex": "cpp", 102 | "ranges": "cpp", 103 | "regex": "cpp" 104 | } 105 | } -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.12) 2 | project(fast_kinematics LANGUAGES CXX CUDA) 3 | set(CMAKE_CXX_STANDARD 17) 4 | set(CMAKE_CUDA_STANDARD 17) 5 | 6 | set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g3 -Wall -Werror -fsized-deallocation -Wno-deprecated-declarations") 7 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -fPIC") 8 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O2 -fPIC") 9 | set(CMAKE_CUDA_RUNTIME_LIBRARY Static) 10 | # important or else we get "undefined symbol: fatbinData" 11 | # https://github.com/pybind/pybind11/issues/4825 12 | set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF) 13 | 14 | set(TORCH_CUDA_ARCH_LIST "5.0 6.1 8.0 8.6 8.9 9.0") 15 | 16 | # find torch if no explicit path is given 17 | if(NOT DEFINED CMAKE_PREFIX_PATH) 18 | execute_process( 19 | COMMAND python3.10 -c "import torch;print(torch.utils.cmake_prefix_path)" 20 | OUTPUT_VARIABLE CMAKE_PREFIX_PATH 21 | OUTPUT_STRIP_TRAILING_WHITESPACE 22 | ) 23 | endif() 24 | 25 | # torch requires pre-C++11 ABI 26 | # may or may not be necessary here but doesn't hurt 27 | add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0) 28 | 29 | find_package(Eigen3 REQUIRED) 30 | find_package(urdfdom REQUIRED) 31 | find_package(CUDAToolkit REQUIRED) 32 | find_package(Torch REQUIRED) 33 | find_library(TORCH_PYTHON_LIBRARY torch_python PATH "${TORCH_INSTALL_PREFIX}/lib") 34 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") 35 | 36 | # urdfdom links statically to these two libraries, but for some reason, 37 | # on my system I still need to link these two separately 38 | find_package(TinyXML2 REQUIRED) 39 | find_package(console_bridge REQUIRED) 40 | 41 | # eigen include directory 42 | get_target_property(inc_dir Eigen3::Eigen INTERFACE_INCLUDE_DIRECTORIES) 43 | include_directories(${inc_dir}) 44 | # urdfdom include directory 45 | include_directories(${urdfdom_INCLUDE_DIRS}) 46 | # torch include directory 47 | include_directories(${TORCH_INCLUDE_DIRS}) 48 | include_directories(include) 49 | 50 | # first make the parsing library 51 | add_library(parser src/parser.cpp) 52 | target_link_libraries(parser PRIVATE urdfdom_model tinyxml2 console_bridge) 53 | 54 | # then make core kinematics library 55 | add_library(kinematics src/kinematics.cu src/fast_kinematics.cu) 56 | set_target_properties(kinematics PROPERTIES CUDA_ARCHITECTURES "52;61") 57 | set_target_properties(kinematics PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 58 | target_compile_options(kinematics PRIVATE --expt-relaxed-constexpr -diag-suppress 20012,20236) 59 | target_link_libraries(kinematics PRIVATE parser CUDA::cudart_static) 60 | 61 | add_executable(test_client tests/test_client.cu) 62 | set_target_properties(test_client PROPERTIES CUDA_ARCHITECTURES "52;61") 63 | target_compile_options(test_client PRIVATE --expt-relaxed-constexpr -diag-suppress 20012,20236) 64 | target_link_libraries(test_client PRIVATE kinematics "${TORCH_LIBRARIES}") 65 | 66 | # include pybind11 67 | add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/extern/pybind11") 68 | pybind11_add_module(fast_kinematics pybind/pybind_fast_kinematics.cpp) 69 | # link torch and torch_python without any cuda stuff 70 | target_link_libraries(fast_kinematics PRIVATE kinematics torch torch_library "${TORCH_PYTHON_LIBRARY}") 71 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Fast Kinematics 2 | 3 | This is a cuda enabled library for calculating forward kinematics and Jacobian of a kinematics chain. It can be used with numpy arrays or pytorch tensors. Compared to `pytorch_kinematics`, this is about 1000 times faster at Jacobian calculation with 4096 parallel 7dof robot arms. 4 | 5 | ## Installation 6 | 7 | ### From PyPi 8 | 9 | ```bash 10 | pip install fast_kinematics 11 | ``` 12 | 13 | ### From Source 14 | 15 | 1. Get the docker image 16 | ```bash 17 | docker pull lexseal/fast_kinematics:latest 18 | ``` 19 | 2. Run the docker image 20 | ```bash 21 | docker run -it lexseal/fast_kinematics:latest 22 | ``` 23 | 3. Update the repo 24 | ```bash 25 | cd /workspace/fast_kinematics && git pull 26 | ``` 27 | 4. Run `build.sh` to build all wheels. The built wheels will showup in the `wheelhouse` dir 28 | ```bash 29 | ./build.sh 30 | ``` 31 | 5. Take the wheel out of the docker container. From your host machine, run 32 | ```bash 33 | docker cp :/workspace/fast_kinematics/wheelhouse . 34 | ``` 35 | 36 | ## Usage 37 | 38 | > [!WARNING] 39 | > Please use dtype `np.float32` or `torch.float32` for the joint configuration. The library is optimized for `float32` and will not work with `float64`. 40 | 41 | > [!NOTE] 42 | > You might need to import torch before using this library as torch will load the shared libraries. Your mileage may vary. 43 | 44 | Minimal numpy example: 45 | 46 | ```python 47 | import numpy as np 48 | from fast_kinematics import FastKinematics 49 | 50 | N = 1024 # number of parallel calculations 51 | 52 | # need the urdf file, number of parallel calculations and end effector link 53 | model = FastKinematics("kuka_iiwa.urdf", N, "lbr_iiwa_link_7") 54 | 55 | # this robot has 7 joints, so the joint configuration has size (N*7,) (1d array) 56 | # the first 7 values are for the first robot... Note we need to use float32! 57 | joint_config = np.random.rand(N*7).astype(np.float32) 58 | 59 | # get the forward kinematics size (N*7,) (1d array) 60 | # the first 7 values are the first robot's pose [x,y,z,qw,qx,qy,qz] 61 | ee_pose = model.forward_kinematics(joint_config) 62 | 63 | # get the jacobian size (N*6*7,) (1d array) The 7 here is the number of joints 64 | jac = model.jacobian_mixed_frame(joint_config) 65 | # we can reshape the jacobian to (N,7,6) and then transpose to (N,6,7) 66 | jac = jac.reshape(-1, 6, 7).transpose(0, 2, 1) 67 | ``` 68 | 69 | Minimal pytorch example: 70 | 71 | ```python 72 | import torch 73 | from fast_kinematics import FastKinematics 74 | 75 | N = 1024 76 | model = FastKinematics("kuka_iiwa.urdf", N, "lbr_iiwa_link_7") 77 | 78 | joint_config = torch.rand(N*7, dtype=torch.float32, device="cuda") 79 | 80 | ee_pose = model.forward_kinematics_pytorch(joint_config) 81 | ee_pose = ee_pose.view(-1,7,6).permute(0,2,1) 82 | ``` 83 | 84 | There is also a jupyter notebook in the `tests` folder that contains usage as well as some benchmarks against `pytorch_kinematics`. 85 | 86 | ## APIs 87 | 88 | The docs are embedded in the library itself, you can either print out the doc string or use the code suggestions in your IDE to see the API documentation. 89 | Alternatively, please see `pybind11/pybind_fast_kinematics.cpp` for the API documentation. 90 | -------------------------------------------------------------------------------- /include/cuda_quat.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class Quaternion { 6 | public: 7 | __host__ __device__ Quaternion(float w = 1.0f, float x = 0.0f, float y = 0.0f, float z = 0.0f) 8 | : w(w), x(x), y(y), z(z) {} 9 | 10 | __host__ __device__ float norm() const { 11 | return std::sqrt(w * w + x * x + y * y + z * z); 12 | } 13 | 14 | __host__ __device__ Quaternion conjugate() const { 15 | return Quaternion(w, -x, -y, -z); 16 | } 17 | 18 | __host__ __device__ Quaternion operator+(const Quaternion& other) const { 19 | return Quaternion(w + other.w, x + other.x, y + other.y, z + other.z); 20 | } 21 | 22 | __host__ __device__ Quaternion operator*(const Quaternion& other) const { 23 | float nw = w * other.w - x * other.x - y * other.y - z * other.z; 24 | float nx = w * other.x + x * other.w + y * other.z - z * other.y; 25 | float ny = w * other.y - x * other.z + y * other.w + z * other.x; 26 | float nz = w * other.z + x * other.y - y * other.x + z * other.w; 27 | return Quaternion(nw, nx, ny, nz); 28 | } 29 | 30 | __host__ __device__ Quaternion operator*(float scalar) const { 31 | return Quaternion(w * scalar, x * scalar, y * scalar, z * scalar); 32 | } 33 | 34 | __host__ __device__ Quaternion operator/(float scalar) const { 35 | return Quaternion(w / scalar, x / scalar, y / scalar, z / scalar); 36 | } 37 | 38 | __host__ __device__ Quaternion& operator+=(const Quaternion& other) { 39 | w += other.w; 40 | x += other.x; 41 | y += other.y; 42 | z += other.z; 43 | return *this; 44 | } 45 | 46 | __host__ __device__ Quaternion& operator*=(const Quaternion& other) { 47 | float nw = w * other.w - x * other.x - y * other.y - z * other.z; 48 | float nx = w * other.x + x * other.w + y * other.z - z * other.y; 49 | float ny = w * other.y - x * other.z + y * other.w + z * other.x; 50 | float nz = w * other.z + x * other.y - y * other.x + z * other.w; 51 | w = nw; 52 | x = nx; 53 | y = ny; 54 | z = nz; 55 | return *this; 56 | } 57 | 58 | __host__ __device__ Quaternion& operator*=(float scalar) { 59 | w *= scalar; 60 | x *= scalar; 61 | y *= scalar; 62 | z *= scalar; 63 | return *this; 64 | } 65 | 66 | __host__ __device__ Quaternion& operator/=(float scalar) { 67 | w /= scalar; 68 | x /= scalar; 69 | y /= scalar; 70 | z /= scalar; 71 | return *this; 72 | } 73 | 74 | __host__ __device__ void normalize() { 75 | float n = norm(); 76 | w /= n; 77 | x /= n; 78 | y /= n; 79 | z /= n; 80 | } 81 | 82 | __host__ __device__ Quaternion normalized() const { 83 | float n = norm(); 84 | return Quaternion(w / n, x / n, y / n, z / n); 85 | } 86 | 87 | __host__ __device__ Quaternion inverse() const { 88 | return conjugate() / (w * w + x * x + y * y + z * z); 89 | } 90 | 91 | __host__ __device__ static Quaternion Identity() { 92 | return Quaternion(1.0f, 0.0f, 0.0f, 0.0f); 93 | } 94 | 95 | __host__ __device__ static Quaternion FromAngleAxis(float angle, float x, float y, float z) { 96 | float halfAngle = angle / 2; 97 | float s = std::sin(halfAngle); 98 | return Quaternion(std::cos(halfAngle), x * s, y * s, z * s); 99 | } 100 | 101 | public: 102 | float w, x, y, z; 103 | }; 104 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | from pathlib import Path 5 | 6 | from setuptools import Extension, setup 7 | from setuptools.command.build_ext import build_ext 8 | 9 | 10 | # A CMakeExtension needs a sourcedir instead of a file list. 11 | # The name must be the _single_ output extension from the CMake build. 12 | # If you need multiple extensions, see scikit-build. 13 | class CMakeExtension(Extension): 14 | def __init__(self, name: str, sourcedir: str = "") -> None: 15 | super().__init__(name, sources=[]) 16 | self.sourcedir = os.fspath(Path(sourcedir).resolve()) 17 | 18 | 19 | class CMakeBuild(build_ext): 20 | def build_extension(self, ext: CMakeExtension) -> None: 21 | # install numpy and pytorch 22 | subprocess.run([sys.executable, "-m", "pip", "install", "numpy"]) 23 | subprocess.run([sys.executable, "-m", "pip", "install", "torch"]) 24 | 25 | # Must be in this form due to bug in .resolve() only fixed in Python 3.10+ 26 | ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name) 27 | extdir = ext_fullpath.parent.resolve() 28 | 29 | # Using this requires trailing slash for auto-detection & inclusion of 30 | # auxiliary "native" libs 31 | 32 | debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug 33 | cfg = "Debug" if debug else "Release" 34 | 35 | # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON 36 | # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code 37 | # from Python. 38 | cmake_args = [ 39 | f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}", 40 | f"-DPYTHON_EXECUTABLE={sys.executable}", 41 | f"-DCMAKE_BUILD_TYPE={cfg}", # not used on MSVC, but no harm 42 | ] 43 | build_args = [] 44 | # Adding CMake arguments set as environment variable 45 | # (needed e.g. to build for ARM OSx on conda-forge) 46 | if "CMAKE_ARGS" in os.environ: 47 | cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item] 48 | 49 | # In this example, we pass in the version to C++. You might not need to. 50 | cmake_args += [f"-DEXAMPLE_VERSION_INFO={self.distribution.get_version()}"] 51 | 52 | build_args += ["-j"] # parallel build 53 | 54 | build_temp = Path(self.build_temp) / ext.name 55 | if not build_temp.exists(): 56 | build_temp.mkdir(parents=True) 57 | 58 | subprocess.run( 59 | ["cmake", ext.sourcedir, *cmake_args], cwd=build_temp, check=True 60 | ) 61 | subprocess.run( 62 | ["cmake", "--build", ".", *build_args], cwd=build_temp, check=True 63 | ) 64 | 65 | this_directory = Path(__file__).parent 66 | long_description = (this_directory / "readme.md").read_text() 67 | 68 | # The information here can also be placed in setup.cfg - better separation of 69 | # logic and declaration, and simpler if you include description/version in a file. 70 | setup( 71 | name="fast_kinematics", 72 | version="0.2.2", 73 | author="Xinsong Lin", 74 | author_email="x8lin@ucsd.edu", 75 | description="A fast kinematics library for robotics", 76 | long_description=long_description, 77 | long_description_content_type="text/markdown", 78 | ext_modules=[CMakeExtension("fast_kinematics")], 79 | cmdclass={"build_ext": CMakeBuild}, 80 | zip_safe=False, 81 | requires=["pytorch", "numpy"], 82 | python_requires=">=3.8", 83 | url="https://github.com/Lexseal/fast_kinematics", 84 | license="MIT", 85 | ) 86 | -------------------------------------------------------------------------------- /src/parser.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | JointTreePtr Parser::parse(std::string urdf_path, bool verbose) { 8 | urdf::ModelInterfaceSharedPtr model = urdf::parseURDFFile(urdf_path); 9 | if (verbose) { 10 | std::cout << "Model name: " << model->getName() << std::endl; 11 | std::cout << "Root link name: " << model->getRoot()->name << std::endl; 12 | std::cout << "Number of links: " << model->links_.size() << std::endl; 13 | std::cout << "Number of joints: " << model->joints_.size() << std::endl; 14 | std::cout << "Number of materials: " << model->materials_.size() << std::endl; 15 | } 16 | 17 | std::unordered_map link_to_parent; 18 | std::unordered_map> link_to_children; 19 | for (auto joint : model->joints_) { 20 | // create a joint tree of the link 21 | auto joint_tree = std::make_shared(); 22 | joint_tree->name = joint.second->name; 23 | joint_tree->type = joint.second->type; 24 | auto &rotation = joint.second->parent_to_joint_origin_transform.rotation; 25 | auto &position = joint.second->parent_to_joint_origin_transform.position; 26 | auto &axis = joint.second->axis; 27 | joint_tree->rotation = Eigen::Quaternionf(rotation.w, rotation.x, rotation.y, rotation.z); 28 | joint_tree->position = Eigen::Quaternionf(0, position.x, position.y, position.z); 29 | joint_tree->axis = Eigen::Vector3f(axis.x, axis.y, axis.z); 30 | std::string parent = joint.second->parent_link_name, child = joint.second->child_link_name; 31 | link_to_children[parent].push_back(joint_tree); 32 | assert(link_to_parent.count(child) == 0); // do not support a link having multiple parents 33 | link_to_parent[child] = joint_tree; 34 | joint_tree->children_links.push_back(child); 35 | } 36 | 37 | // connect the joint trees 38 | for (auto &itm : link_to_parent) { 39 | const std::string &link_name = itm.first; 40 | JointTreePtr &parent = itm.second; 41 | for (auto &child : link_to_children[link_name]) { 42 | parent->children.push_back(child); 43 | child->parent = parent; 44 | } 45 | } 46 | 47 | // find the tree root 48 | std::string root_link_name = model->getRoot()->name; 49 | std::vector root_joints = link_to_children[root_link_name]; 50 | assert(root_joints.size() == 1); // do not support a link having multiple root links yet 51 | JointTreePtr root = root_joints[0]; 52 | 53 | return root; 54 | } 55 | 56 | size_t Parser::find_num_of_active_joints(JointTreePtr tip, bool verbose) { 57 | size_t num_of_active_joints = 0; 58 | JointTreePtr walker = tip; 59 | while (walker) { 60 | if (verbose && walker->type != urdf::Joint::FIXED) { 61 | std::cout << "Joint name: " << walker->name << std::endl; 62 | std::cout << "Joint type: " << walker->type << std::endl; 63 | } 64 | num_of_active_joints += (walker->type != urdf::Joint::FIXED); 65 | walker = walker->parent; 66 | } 67 | return num_of_active_joints; 68 | } 69 | 70 | size_t Parser::find_num_of_joints(JointTreePtr tip) { 71 | size_t num_of_joints = 0; 72 | JointTreePtr walker = tip; 73 | while (walker) { 74 | ++num_of_joints; 75 | walker = walker->parent; 76 | } 77 | return num_of_joints; 78 | } 79 | 80 | JointTreePtr Parser::find_link_parent(JointTreePtr root, std::string eef_name, bool verbose) { 81 | JointTreePtr tip; 82 | std::queue q; 83 | q.push(root); 84 | while (!q.empty()) { 85 | JointTreePtr node = q.front(); 86 | q.pop(); 87 | if (verbose) { 88 | std::cout << "Joint name: " << node->name << std::endl; 89 | std::cout << "Joint type: " << node->type << std::endl; 90 | std::cout << "Joint rotation: " << node->rotation << std::endl; 91 | std::cout << "Joint position: " << node->position << std::endl; 92 | std::cout << "Joint axis: " << node->axis.transpose() << std::endl; 93 | if (node->parent) { 94 | std::cout << "Parent joint name: " << node->parent->name << std::endl; 95 | } 96 | } 97 | for (auto &child : node->children) 98 | q.push(child); 99 | if (std::find(node->children_links.begin(), 100 | node->children_links.end(), 101 | eef_name) != node->children_links.end()) { 102 | tip = node; 103 | break; // found 104 | } 105 | } 106 | return tip; 107 | } 108 | 109 | void Parser::prepare_repr(size_t num_of_robots, JointTreePtr tip, float **data, size_t **cum_idx) { 110 | size_t num_of_joints = find_num_of_joints(tip); 111 | size_t idx = num_of_robots * float_per_joint * num_of_joints; 112 | *data = new float[idx]; 113 | *cum_idx = new size_t[num_of_robots]; 114 | (*cum_idx)[0] = num_of_joints * float_per_joint; 115 | JointTreePtr walker = tip; 116 | while (walker) { 117 | (*data)[--idx] = walker->axis.z(); 118 | (*data)[--idx] = walker->axis.y(); 119 | (*data)[--idx] = walker->axis.x(); 120 | (*data)[--idx] = walker->type; 121 | (*data)[--idx] = walker->rotation.coeffs().z(); 122 | (*data)[--idx] = walker->rotation.coeffs().y(); 123 | (*data)[--idx] = walker->rotation.coeffs().x(); 124 | (*data)[--idx] = walker->rotation.coeffs().w(); 125 | (*data)[--idx] = walker->position.z(); 126 | (*data)[--idx] = walker->position.y(); 127 | (*data)[--idx] = walker->position.x(); 128 | walker = walker->parent; 129 | } 130 | for (size_t i = 0; i < num_of_robots - 1; ++i) { 131 | memcpy((*data) + i * float_per_joint * num_of_joints, 132 | (*data) + (num_of_robots - 1) * float_per_joint * num_of_joints, float_per_joint * num_of_joints * sizeof(float)); 133 | (*cum_idx)[i + 1] = (*cum_idx)[i] + num_of_joints * float_per_joint; 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /pybind/pybind_fast_kinematics.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace py = pybind11; 8 | 9 | std::string constructor_doc = R"( 10 | :param urdf_path: path to the URDF file 11 | :param num_of_robots: number of robots in the URDF file 12 | :param eef_name: name of the end-effector 13 | :param verbose: whether to print debug information 14 | )"; 15 | 16 | std::string forward_kinematics_doc = R"( 17 | forward kinematics in numpy 18 | 19 | :param h_angs: joint angles in 1d numpy array [q1, q2, q3, ...] 20 | :param block_size: block size for parallel computation 21 | :return: end-effector pose in 1d numpy array [x, y, z, qw, qx, qy, qz, ...] 22 | )"; 23 | 24 | std::string jacobian_mixed_frame_doc = R"( 25 | mixed frame means the velocity is expressed in the end-effector frame 26 | but the angular velocity is expressed in the base frame 27 | 28 | :param h_angs: joint angles in 1d numpy array [q1, q2, q3, ...] 29 | :param block_size: block size for parallel computation 30 | :return: Jacobian matrix in 2d numpy array 31 | )"; 32 | 33 | std::string jacobian_world_frame_doc = R"( 34 | both the velocity and angular velocity are expressed in the base frame 35 | 36 | :param h_angs: joint angles in 1d numpy array [q1, q2, q3, ...] 37 | :param block_size: block size for parallel computation 38 | :return: Jacobian matrix in 2d numpy array 39 | )"; 40 | 41 | std::string forward_kinematics_pytorch_doc = R"( 42 | forward kinematics in PyTorch 43 | 44 | :param t_angs: joint angles in 1d torch tensor [q1, q2, q3, ...] 45 | :param block_size: block size for parallel computation 46 | :return: end-effector pose in 1d torch tensor [x, y, z, qw, qx, qy, qz, ...] 47 | )"; 48 | 49 | std::string jacobian_mixed_frame_pytorch_doc = R"( 50 | mixed frame means the velocity is expressed in the end-effector frame 51 | but the angular velocity is expressed in the base frame 52 | 53 | :param t_angs: joint angles in 1d torch tensor [q1, q2, q3, ...] 54 | :param block_size: block size for parallel computation 55 | :return: Jacobian matrix in 2d torch tensor 56 | )"; 57 | 58 | std::string jacobian_world_frame_pytorch_doc = R"( 59 | both the velocity and angular velocity are expressed in the base frame 60 | 61 | :param t_angs: joint angles in 1d torch tensor [q1, q2, q3, ...] 62 | :param block_size: block size for parallel computation 63 | :return: Jacobian matrix in 2d torch tensor 64 | )"; 65 | 66 | std::string get_num_of_active_joints_doc = R"( 67 | get the number of joints that are not fixed in a single kinematic chain, 68 | )"; 69 | 70 | std::string get_num_of_joints_doc = R"( 71 | get the total number of joints in a single kinematic chain, 72 | )"; 73 | 74 | std::string get_num_of_robots_doc = R"( 75 | get the number of parallel robots 76 | )"; 77 | 78 | 79 | PYBIND11_MODULE(fast_kinematics, m) { 80 | m.doc() = "fast kinematics python bindings"; 81 | 82 | auto PyFastKinematics = py::class_(m, "FastKinematics"); 83 | PyFastKinematics 84 | .def(py::init(), py::arg("urdf_path"), 85 | py::arg("num_of_robots"), 86 | py::arg("eef_name"), 87 | py::arg("verbose")=false, 88 | constructor_doc.c_str()) 89 | .def("forward_kinematics", &FastKinematics::forward_kinematics, 90 | py::arg("h_angs"), py::arg("block_size")=256, forward_kinematics_doc.c_str()) 91 | .def("jacobian_mixed_frame", &FastKinematics::jacobian_mixed_frame, 92 | py::arg("h_angs"), py::arg("block_size")=256, jacobian_mixed_frame_doc.c_str()) 93 | .def("jacobian_world_frame", &FastKinematics::jacobian_world_frame, 94 | py::arg("h_angs"), py::arg("block_size")=256, jacobian_world_frame_doc.c_str()) 95 | .def("forward_kinematics_pytorch", [](FastKinematics &fk, torch::Tensor t_angs, size_t block_size=256) { 96 | auto options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, 0).pinned_memory(true); 97 | float *d_result = fk.forward_kinematics_raw_ptr(t_angs.data_ptr(), block_size); 98 | long int sz = fk.get_num_of_robots() * 7; 99 | return torch::from_blob(d_result, {sz}, options); 100 | }, py::arg("t_angs"), py::arg("block_size")=256, forward_kinematics_pytorch_doc.c_str()) 101 | .def("jacobian_mixed_frame_pytorch", [](FastKinematics &fk, torch::Tensor t_angs, size_t block_size=256) { 102 | auto options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, 0).pinned_memory(true); 103 | float *d_result = fk.jacobian_mixed_frame_raw_ptr(t_angs.data_ptr(), block_size); 104 | long int sz = fk.get_num_of_robots() * fk.get_num_of_active_joints() * 6; 105 | return torch::from_blob(d_result, {sz}, options); 106 | }, py::arg("t_angs"), py::arg("block_size")=256, jacobian_mixed_frame_pytorch_doc.c_str()) 107 | .def("jacobian_world_frame_pytorch", [](FastKinematics &fk, torch::Tensor t_angs, size_t block_size=256) { 108 | auto options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, 0).pinned_memory(true); 109 | float *d_result = fk.jacobian_world_frame_raw_ptr(t_angs.data_ptr(), block_size); 110 | long int sz = fk.get_num_of_robots() * fk.get_num_of_active_joints() * 6; 111 | return torch::from_blob(d_result, {sz}, options); 112 | }, py::arg("t_angs"), py::arg("block_size")=256, jacobian_world_frame_pytorch_doc.c_str()) 113 | .def("get_num_of_active_joints", &FastKinematics::get_num_of_active_joints, get_num_of_active_joints_doc.c_str()) 114 | .def("get_num_of_joints", &FastKinematics::get_num_of_joints, get_num_of_joints_doc.c_str()) 115 | .def("get_num_of_robots", &FastKinematics::get_num_of_robots, get_num_of_robots_doc.c_str()); 116 | } 117 | -------------------------------------------------------------------------------- /src/fast_kinematics.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | FastKinematics::FastKinematics(std::string urdf_path, size_t num_of_robots, std::string eef_name, bool verbose) 5 | : urdf_path(urdf_path) 6 | , num_of_robots(num_of_robots) 7 | , eef_name(eef_name) { 8 | root = Parser::parse(urdf_path); 9 | tip = Parser::find_link_parent(root, eef_name, verbose); 10 | num_of_active_joints = Parser::find_num_of_active_joints(tip, verbose); 11 | num_of_joints = Parser::find_num_of_joints(tip); 12 | Parser::prepare_repr(num_of_robots, tip, &h_data, &h_cum_data_idx); 13 | allocate_memory(); 14 | copy_memory(); 15 | } 16 | 17 | FastKinematics::~FastKinematics() { 18 | delete[] h_data; 19 | delete[] h_cum_active_joint_idx; 20 | delete[] h_cum_data_idx; 21 | cudaFree(d_data); 22 | cudaFree(d_angs); 23 | cudaFree(d_result); 24 | cudaFree(d_num_of_joints_cum); 25 | cudaFree(d_num_of_active_joints_cum); 26 | } 27 | 28 | Eigen::Ref FastKinematics::forward_kinematics(Eigen::Ref h_angs, size_t block_size) { 29 | assert(h_angs.size() == h_cum_active_joint_idx[num_of_robots-1]); // check if the number of joints is correct 30 | cudaMemcpy(d_angs, h_angs.data(), h_cum_active_joint_idx[num_of_robots-1] * sizeof(float), cudaMemcpyHostToDevice); 31 | dim3 grid((num_of_robots+(block_size-1))/block_size); 32 | dim3 block(block_size); 33 | _forward_kinematics<<>>(d_data, d_angs, d_num_of_joints_cum, d_num_of_active_joints_cum, d_result, num_of_robots); 34 | cudaMemcpy(h_fk_result.data(), d_result, 7*num_of_robots * sizeof(float), cudaMemcpyDeviceToHost); 35 | return h_fk_result; 36 | } 37 | 38 | Eigen::Ref FastKinematics::jacobian_mixed_frame(Eigen::Ref h_angs, size_t block_size) { 39 | assert(h_angs.size() == h_cum_active_joint_idx[num_of_robots-1]); // check if the number of joints is correct 40 | cudaMemcpy(d_angs, h_angs.data(), h_cum_active_joint_idx[num_of_robots-1] * sizeof(float), cudaMemcpyHostToDevice); 41 | dim3 grid((num_of_robots+(block_size-1))/block_size); 42 | dim3 block(block_size); 43 | _jacobian_mixed_frame<<>>(d_data, d_angs, d_num_of_joints_cum, d_num_of_active_joints_cum, d_result, num_of_robots); 44 | cudaMemcpy(h_jac_result.data(), d_result, 45 | h_cum_active_joint_idx[num_of_robots-1] * 6 * sizeof(float), cudaMemcpyDeviceToHost); 46 | return h_jac_result; 47 | } 48 | 49 | Eigen::Ref FastKinematics::jacobian_world_frame(Eigen::Ref h_angs, size_t block_size) { 50 | assert(h_angs.size() == h_cum_active_joint_idx[num_of_robots-1]); // check if the number of joints is correct 51 | cudaMemcpy(d_angs, h_angs.data(), h_cum_active_joint_idx[num_of_robots-1] * sizeof(float), cudaMemcpyHostToDevice); 52 | dim3 grid((num_of_robots+(block_size-1))/block_size); 53 | dim3 block(block_size); 54 | _jacobian<<>>(d_data, d_angs, d_num_of_joints_cum, d_num_of_active_joints_cum, d_result, num_of_robots); 55 | cudaMemcpy(h_jac_result.data(), d_result, 56 | h_cum_active_joint_idx[num_of_robots-1] * 6 * sizeof(float), cudaMemcpyDeviceToHost); 57 | return h_jac_result; 58 | } 59 | 60 | float *FastKinematics::forward_kinematics_raw_ptr(float *t_angs, size_t block_size) { 61 | // first check the shape of the input tensor 62 | dim3 grid((num_of_robots+(block_size-1))/block_size); 63 | dim3 block(block_size); 64 | _forward_kinematics<<>>(d_data, t_angs, d_num_of_joints_cum, d_num_of_active_joints_cum, d_result, num_of_robots); 65 | // // now convert d_result to a tensor 66 | // auto options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, 0).pinned_memory(true); 67 | // torch::Tensor t_result = torch::from_blob(d_result, {h_fk_result.size()}, options); 68 | // return t_result; 69 | return d_result; 70 | } 71 | 72 | float *FastKinematics::jacobian_mixed_frame_raw_ptr(float *t_angs, size_t block_size) { 73 | // first check the shape of the input tensor 74 | dim3 grid((num_of_robots+(block_size-1))/block_size); 75 | dim3 block(block_size); 76 | _jacobian_mixed_frame<<>>(d_data, t_angs, d_num_of_joints_cum, d_num_of_active_joints_cum, d_result, num_of_robots); 77 | // // now convert d_result to a tensor 78 | // auto options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, 0).pinned_memory(true); 79 | // torch::Tensor t_result = torch::from_blob(d_result, {h_jac_result.size()}, options); 80 | // return t_result; 81 | return d_result; 82 | } 83 | 84 | float *FastKinematics::jacobian_world_frame_raw_ptr(float *t_angs, size_t block_size) { 85 | // first check the shape of the input tensor 86 | dim3 grid((num_of_robots+(block_size-1))/block_size); 87 | dim3 block(block_size); 88 | _jacobian<<>>(d_data, t_angs, d_num_of_joints_cum, d_num_of_active_joints_cum, d_result, num_of_robots); 89 | // // now convert d_result to a tensor 90 | // auto options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, 0).pinned_memory(true); 91 | // torch::Tensor t_result = torch::from_blob(d_result, {h_jac_result.size()}, options); 92 | // return t_result; 93 | return d_result; 94 | } 95 | 96 | size_t FastKinematics::get_num_of_active_joints() { 97 | return num_of_active_joints; 98 | } 99 | 100 | size_t FastKinematics::get_num_of_joints() { 101 | return num_of_joints; 102 | } 103 | 104 | size_t FastKinematics::get_num_of_robots() { 105 | return num_of_robots; 106 | } 107 | 108 | void FastKinematics::allocate_memory() { 109 | h_cum_active_joint_idx = new size_t[num_of_robots]; 110 | h_cum_active_joint_idx[0] = num_of_active_joints; 111 | for (size_t i = 1; i < num_of_robots; ++i) { 112 | h_cum_active_joint_idx[i] = h_cum_active_joint_idx[i - 1] + num_of_active_joints; 113 | } 114 | size_t result_max_size = std::max(7*num_of_robots, 6*h_cum_active_joint_idx[num_of_robots-1]); 115 | // resize the eigen xd vector 116 | h_fk_result.resize(7*num_of_robots); 117 | h_jac_result.resize(6*h_cum_active_joint_idx[num_of_robots-1]); 118 | cudaMalloc(&d_data, h_cum_data_idx[num_of_robots-1] * sizeof(float)); 119 | cudaMalloc(&d_angs, h_cum_active_joint_idx[num_of_robots-1] * sizeof(float)); 120 | cudaMalloc(&d_result, result_max_size * sizeof(float)); 121 | cudaMalloc(&d_num_of_joints_cum, num_of_robots * sizeof(size_t)); 122 | cudaMalloc(&d_num_of_active_joints_cum, num_of_robots * sizeof(size_t)); 123 | } 124 | 125 | void FastKinematics::copy_memory() { 126 | cudaMemcpy(d_data, h_data, h_cum_data_idx[num_of_robots-1] * sizeof(float), cudaMemcpyHostToDevice); 127 | cudaMemcpy(d_num_of_joints_cum, h_cum_data_idx, num_of_robots * sizeof(size_t), cudaMemcpyHostToDevice); 128 | cudaMemcpy(d_num_of_active_joints_cum, h_cum_active_joint_idx, num_of_robots * sizeof(size_t), cudaMemcpyHostToDevice); 129 | } 130 | -------------------------------------------------------------------------------- /idea.md: -------------------------------------------------------------------------------- 1 | Forward kinematics can be calculated with $q_1 v_1 q_1^{-1} + q_1 q_2 v_2 q_2^{-1} q_1^{-1} + \ldots$. Now it's just a matter of finding the best implementation. Whether it's just like above or using dual quaternion. 2 | 3 | But for inverse kinematics, we don't 4 | 5 | ## Geometric Jacobian vs Analytical Jacobian 6 | 7 | They are the same for the translational part, but different for the rotational part. The geometric Jacobian represents the rotation as the rotational axis. i.e. rotation around the x, y, z axis. The analytical Jacobian represents the rotation as the derivative of whatever representation of rotation you are using. 8 | 9 | ## Parallel Inverse Kinematics 10 | 11 | 1. https://dl.acm.org/doi/abs/10.1145/2887740 12 | 2. https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9126798 13 | 3. https://github.com/CarletonABL/QuIK/blob/main/SLloydEtAl2022_QuIK_preprint.pdf 14 | 4. https://arxiv.org/pdf/2311.05938.pdf 15 | 5. https://cseweb.ucsd.edu/classes/sp16/cse169-a/slides/CSE169_08.pdf 16 | 6. https://sim2real.github.io/assets/papers/kaspar.pdf 17 | 7. https://github.com/UM-ARM-Lab/pytorch_kinematics 18 | 19 | ## First, do everything in CPU and compare the results to existing libraries. 20 | 1. ~~Set up the build system and include urdfdom and eigen properly.~~ 21 | 2. ~~Parese the panda urdf file and print out all the joint names, limits, origins, and axes.~~ 22 | 3. ~~We will store all the information inside a kinematics tree. The tree will contain the joint names, limits, origins, types, and axis of roation or translation. It will also contain the parent and children.~~ 23 | 4. ~~Then it is time to set up the forward kinematics using homogeneous transformation.~~ 24 | 5. ~~After that we will set up the forward kinematics using quaternion. We will need to treat the rotation and translation separately.~~ 25 | 6. ~~Jacobian calculation~~ 26 | 7. ~~Make implementation parallel~~ 27 | 8. Transfer to GPU 28 | 29 | Memory layout: 30 | data consists of [translation (3 floats), rotation (4 floats), type (1 float), axis (3 floats), ...] 31 | control consists of [angle for first joint, angle for second joint, ...] 32 | two other arrays to specify cumulative number of joints for each robot and cumulative number of active joints for each robot 33 | 34 | ## Benchmarks 35 | 36 | The time it takes to calculate 10000 Jacobians in parallel (including the memory copy time but not including the data prepreation time) is ~2ms on m4000. The same calculation takes ~3.2ms on an M1 and ~9ms on E5-2623 single core. 37 | 38 | 10000 J 39 | ==18797== NVPROF is profiling process 18797, command: ./test_client 40 | ==18797== Profiling application: ./test_client 41 | ==18797== Profiling result: 42 | Start Duration Grid Size Block Size Regs* SSMem* DSMem* Size Throughput SrcMemType DstMemType Device Context Stream Name 43 | 352.26ms 619.91us - - - - - 4.1962MB 6.6103GB/s Pageable Device Quadro M4000 (0 1 7 [CUDA memcpy HtoD] 44 | 352.93ms 33.407us - - - - - 312.50KB 8.9210GB/s Pageable Device Quadro M4000 (0 1 7 [CUDA memcpy HtoD] 45 | 352.98ms 9.6000us - - - - - 78.125KB 7.7610GB/s Pageable Device Quadro M4000 (0 1 7 [CUDA memcpy HtoD] 46 | 353.01ms 9.4080us - - - - - 78.125KB 7.9194GB/s Pageable Device Quadro M4000 (0 1 7 [CUDA memcpy HtoD] 47 | 353.15ms 202.78us - - - - - 1.8311MB 8.8183GB/s Pageable Device Quadro M4000 (0 1 7 [CUDA memcpy HtoD] 48 | 353.36ms 296.95us (40 1 1) (256 1 1) 48 0B 0B - - - - Quadro M4000 (0 1 7 jacobian(float*, float*, unsigned long*, unsigned long*, float*, unsigned long) [122] 49 | 353.66ms 194.49us - - - - - 1.8311MB 9.1940GB/s Device Pageable Quadro M4000 (0 1 7 [CUDA memcpy DtoH] 50 | 51 | Regs: Number of registers used per CUDA thread. This number includes registers used internally by the CUDA driver and/or tools and can be more than what the compiler shows. 52 | SSMem: Static shared memory allocated per CUDA block. 53 | DSMem: Dynamic shared memory allocated per CUDA block. 54 | SrcMemType: The type of source memory accessed by memory operation/copy 55 | DstMemType: The type of destination memory accessed by memory operation/copy 56 | ==18797== Generated result file: /home/paperspace/fast_kinematics/build/results.nvprof 57 | 58 | 100000 J 59 | ==19104== NVPROF is profiling process 19104, command: ./test_client 60 | ==19104== Profiling application: ./test_client 61 | ==19104== Profiling result: 62 | Start Duration Grid Size Block Size Regs* SSMem* DSMem* Size Throughput SrcMemType DstMemType Device Context Stream Name 63 | 311.65ms 5.8044ms - - - - - 41.962MB 7.0598GB/s Pageable Device Quadro M4000 (0 1 7 [CUDA memcpy HtoD] 64 | 317.59ms 363.26us - - - - - 3.0518MB 8.2042GB/s Pageable Device Quadro M4000 (0 1 7 [CUDA memcpy HtoD] 65 | 318.04ms 82.046us - - - - - 781.25KB 9.0810GB/s Pageable Device Quadro M4000 (0 1 7 [CUDA memcpy HtoD] 66 | 318.22ms 81.438us - - - - - 781.25KB 9.1488GB/s Pageable Device Quadro M4000 (0 1 7 [CUDA memcpy HtoD] 67 | 318.42ms 2.5035ms - - - - - 18.311MB 7.1424GB/s Pageable Device Quadro M4000 (0 1 7 [CUDA memcpy HtoD] 68 | 320.93ms 3.9205ms (391 1 1) (256 1 1) 48 0B 0B - - - - Quadro M4000 (0 1 7 jacobian(float*, float*, unsigned long*, unsigned long*, float*, unsigned long) [122] 69 | 324.86ms 2.0000ms - - - - - 18.311MB 8.9406GB/s Device Pageable Quadro M4000 (0 1 7 [CUDA memcpy DtoH] 70 | 71 | Regs: Number of registers used per CUDA thread. This number includes registers used internally by the CUDA driver and/or tools and can be more than what the compiler shows. 72 | SSMem: Static shared memory allocated per CUDA block. 73 | DSMem: Dynamic shared memory allocated per CUDA block. 74 | SrcMemType: The type of source memory accessed by memory operation/copy 75 | DstMemType: The type of destination memory accessed by memory operation/copy 76 | ==19104== Generated result file: /home/paperspace/fast_kinematics/build/results.nvprof 77 | 78 | ## More Benchmarks 79 | 80 | After wrapping the code in pybind11, I drag raced the code against pytorch_kinematics. The results are as follows: 81 | 82 | | unit: ms | 16 | 64 | 256 | 1024 | 4096 | 83 | |---------------|--------|--------|--------|--------|--------| 84 | | Pytorch | 14.164 | 14.743 | 14.490 | 16.667 | 20.776 | 85 | | Vector/List | 0.195 | 0.686 | 2.461 | 8.669 | 36.887 | 86 | | Eigen/numpy | 0.126 | 0.232 | 0.707 | 2.488 | 9.628 | 87 | 88 | Additionally, for size ~1000, the C++ version takes 0.165ms, which is 50 times faster than python. It has to be pybind11 overhead and it needs to be optimized. 89 | 90 | Turns out the entire overhead is due to pybind11. I made a do nothing function and timed it. It took the same time as the jacobian function. 91 | -------------------------------------------------------------------------------- /tests/test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import time\n", 11 | "import torch\n", 12 | "import fast_kinematics\n", 13 | "import pytorch_kinematics as pk" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "name": "stdout", 23 | "output_type": "stream", 24 | "text": [ 25 | "2.2.2+cu118\n" 26 | ] 27 | } 28 | ], 29 | "source": [ 30 | "N = 4096\n", 31 | "print(torch.__version__)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 4, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "name": "stdout", 41 | "output_type": "stream", 42 | "text": [ 43 | "Joint name: lbr_iiwa_joint_1\n", 44 | "Joint type: 1\n", 45 | "Joint rotation: 0i + 0j + 0k + 1\n", 46 | "Joint position: 0i + 0j + 0.1575k + 0\n", 47 | "Joint axis: 0 0 1\n", 48 | "Joint name: lbr_iiwa_joint_2\n", 49 | "Joint type: 1\n", 50 | "Joint rotation: -7.3123e-14i + 0.707107j + 0.707107k + -7.3123e-14\n", 51 | "Joint position: 0i + 0j + 0.2025k + 0\n", 52 | "Joint axis: 0 0 1\n", 53 | "Parent joint name: lbr_iiwa_joint_1\n", 54 | "Joint name: lbr_iiwa_joint_3\n", 55 | "Joint type: 1\n", 56 | "Joint rotation: -7.3123e-14i + 0.707107j + 0.707107k + -7.3123e-14\n", 57 | "Joint position: 0i + 0.2045j + 0k + 0\n", 58 | "Joint axis: 0 0 1\n", 59 | "Parent joint name: lbr_iiwa_joint_2\n", 60 | "Joint name: lbr_iiwa_joint_4\n", 61 | "Joint type: 1\n", 62 | "Joint rotation: 0.707107i + 0j + 0k + 0.707107\n", 63 | "Joint position: 0i + 0j + 0.2155k + 0\n", 64 | "Joint axis: 0 0 1\n", 65 | "Parent joint name: lbr_iiwa_joint_3\n", 66 | "Joint name: lbr_iiwa_joint_5\n", 67 | "Joint type: 1\n", 68 | "Joint rotation: 7.3123e-14i + 0.707107j + 0.707107k + -7.3123e-14\n", 69 | "Joint position: 0i + 0.1845j + 0k + 0\n", 70 | "Joint axis: 0 0 1\n", 71 | "Parent joint name: lbr_iiwa_joint_4\n", 72 | "Joint name: lbr_iiwa_joint_6\n", 73 | "Joint type: 1\n", 74 | "Joint rotation: 0.707107i + 0j + 0k + 0.707107\n", 75 | "Joint position: 0i + 0j + 0.2155k + 0\n", 76 | "Joint axis: 0 0 1\n", 77 | "Parent joint name: lbr_iiwa_joint_5\n", 78 | "Joint name: lbr_iiwa_joint_7\n", 79 | "Joint type: 1\n", 80 | "Joint rotation: 7.3123e-14i + 0.707107j + 0.707107k + -7.3123e-14\n", 81 | "Joint position: 0i + 0.081j + 0k + 0\n", 82 | "Joint axis: 0 0 1\n", 83 | "Parent joint name: lbr_iiwa_joint_6\n", 84 | "Joint name: lbr_iiwa_joint_7\n", 85 | "Joint type: 1\n", 86 | "Joint name: lbr_iiwa_joint_6\n", 87 | "Joint type: 1\n", 88 | "Joint name: lbr_iiwa_joint_5\n", 89 | "Joint type: 1\n", 90 | "Joint name: lbr_iiwa_joint_4\n", 91 | "Joint type: 1\n", 92 | "Joint name: lbr_iiwa_joint_3\n", 93 | "Joint type: 1\n", 94 | "Joint name: lbr_iiwa_joint_2\n", 95 | "Joint type: 1\n", 96 | "Joint name: lbr_iiwa_joint_1\n", 97 | "Joint type: 1\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "# fask kinematics model\n", 103 | "model = fast_kinematics.FastKinematics(\"../kuka_iiwa.urdf\", N, \"lbr_iiwa_link_7\", True)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 9, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "# pk chain\n", 113 | "chain = pk.build_serial_chain_from_urdf(open(\"../kuka_iiwa.urdf\").read(), \"lbr_iiwa_link_7\")\n", 114 | "d = \"cuda\"\n", 115 | "dtype = torch.float32\n", 116 | "\n", 117 | "chain = chain.to(dtype=dtype, device=d)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 10, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "Time taken for jacobian_mixed_frame: 0.00854259729385376 with std: 0.0011613306822710263\n" 130 | ] 131 | } 132 | ], 133 | "source": [ 134 | "th = torch.rand(N, 7, dtype=dtype, device=d, requires_grad=True)\n", 135 | "all_times = []\n", 136 | "J = None\n", 137 | "for _ in range(100):\n", 138 | " start_time = time.time()\n", 139 | " J = chain.jacobian(th)\n", 140 | " end_time = time.time()\n", 141 | " all_times.append(end_time - start_time)\n", 142 | "print(f\"Time taken for jacobian_mixed_frame: {np.mean(all_times)} with std: {np.std(all_times)}\")" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 6, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "name": "stdout", 152 | "output_type": "stream", 153 | "text": [ 154 | "tensor([[[ 1, 7, 13, 19, 25, 31, 37],\n", 155 | " [ 2, 8, 14, 20, 26, 32, 38],\n", 156 | " [ 3, 9, 15, 21, 27, 33, 39],\n", 157 | " [ 4, 10, 16, 22, 28, 34, 40],\n", 158 | " [ 5, 11, 17, 23, 29, 35, 41],\n", 159 | " [ 6, 12, 18, 24, 30, 36, 42]]])\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "# the output of fast_kinematics is column major 1d array, so we need to reshape it\n", 165 | "# experimenting to make sure we got it right\n", 166 | "flat_array = torch.arange(1,43)\n", 167 | "print(flat_array.view(-1,7,6).permute(0,2,1))" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 11, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "Time taken for fk: 4.538774490356445e-06 with std: 1.5257827219971919e-05\n" 180 | ] 181 | } 182 | ], 183 | "source": [ 184 | "# flatten th\n", 185 | "th = th.view(N*7)\n", 186 | "all_times = []\n", 187 | "J2 = None\n", 188 | "for _ in range(1000):\n", 189 | " start_time = time.time()\n", 190 | " J2 = model.jacobian_mixed_frame_pytorch(th).view(-1,7,6).permute(0,2,1)\n", 191 | " end_time = time.time()\n", 192 | " all_times.append(end_time - start_time)\n", 193 | "print(f\"Time taken for fk: {np.mean(all_times)} with std: {np.std(all_times)}\")" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 12, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "assert torch.allclose(J, J2, atol=1e-5)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 13, 208 | "metadata": {}, 209 | "outputs": [ 210 | { 211 | "name": "stdout", 212 | "output_type": "stream", 213 | "text": [ 214 | "Time taken for pinv: 0.03223166942596436 with std: 0.0038884625993035726\n" 215 | ] 216 | } 217 | ], 218 | "source": [ 219 | "# test out pinv vs lstsq\n", 220 | "\n", 221 | "b = torch.rand(6, 1, dtype=dtype, device=d, requires_grad=False)\n", 222 | "x_pinv = torch.zeros(N, 7, dtype=dtype, device=d)\n", 223 | "all_times = []\n", 224 | "for i in range(100):\n", 225 | " start_time = time.time()\n", 226 | " # J2 is a collection of matrices but pinv seems to work on the entire batch\n", 227 | " J2_pinv = torch.pinverse(J2)\n", 228 | " if i == 0: x_pinv = torch.matmul(J2_pinv, b)\n", 229 | " end_time = time.time()\n", 230 | " all_times.append(end_time - start_time)\n", 231 | "print(f\"Time taken for pinv: {np.mean(all_times)} with std: {np.std(all_times)}\")\n", 232 | "\n", 233 | "# unit test correctness of pinv for an entire batch\n", 234 | "first_pinv = J2[0].pinverse()\n", 235 | "first_x = first_pinv @ b\n", 236 | "assert torch.allclose(first_x, x_pinv[0], atol=1e-5)" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 14, 242 | "metadata": {}, 243 | "outputs": [ 244 | { 245 | "name": "stdout", 246 | "output_type": "stream", 247 | "text": [ 248 | "Time taken for lstsq: 0.024238510131835936 with std: 0.0020009472923546044\n" 249 | ] 250 | } 251 | ], 252 | "source": [ 253 | "all_times = []\n", 254 | "x_lstsq = None\n", 255 | "new_b = b.view(1,6,1).repeat(N,1,1) # repeat the same b for all N\n", 256 | "for i in range(100):\n", 257 | " start_time = time.time()\n", 258 | " x_lstsq = torch.linalg.lstsq(J2, new_b).solution\n", 259 | " end_time = time.time()\n", 260 | " all_times.append(end_time - start_time)\n", 261 | "print(f\"Time taken for lstsq: {np.mean(all_times)} with std: {np.std(all_times)}\")" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 10, 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "name": "stdout", 271 | "output_type": "stream", 272 | "text": [ 273 | "torch.Size([4096, 6, 7])\n", 274 | "torch.Size([4096, 6, 7])\n" 275 | ] 276 | } 277 | ], 278 | "source": [ 279 | "# checking to make sure the results are the same\n", 280 | "print(J.shape)\n", 281 | "print(J2.shape)\n", 282 | "assert torch.allclose(J, J2, atol=1e-5)" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 15, 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "name": "stdout", 292 | "output_type": "stream", 293 | "text": [ 294 | "Time taken for jacobian_mixed_frame: 0.00032803034782409666 with std: 0.0004883862096649905\n" 295 | ] 296 | } 297 | ], 298 | "source": [ 299 | "# just for fun numpy version still faster\n", 300 | "qpos = np.random.rand(7*N)\n", 301 | "qpos = qpos.astype(np.float32)\n", 302 | "\n", 303 | "all_times = []\n", 304 | "for _ in range(1000):\n", 305 | " start_time = time.time()\n", 306 | " poses = model.jacobian_mixed_frame(qpos)\n", 307 | " poses = poses.reshape(N,-1,6).transpose(0,2,1)\n", 308 | " end_time = time.time()\n", 309 | " all_times.append(end_time - start_time)\n", 310 | "print(f\"Time taken for jacobian_mixed_frame: {np.mean(all_times)} with std: {np.std(all_times)}\")" 311 | ] 312 | } 313 | ], 314 | "metadata": { 315 | "kernelspec": { 316 | "display_name": "Python 3", 317 | "language": "python", 318 | "name": "python3" 319 | }, 320 | "language_info": { 321 | "codemirror_mode": { 322 | "name": "ipython", 323 | "version": 3 324 | }, 325 | "file_extension": ".py", 326 | "mimetype": "text/x-python", 327 | "name": "python", 328 | "nbconvert_exporter": "python", 329 | "pygments_lexer": "ipython3", 330 | "version": "3.10.13" 331 | } 332 | }, 333 | "nbformat": 4, 334 | "nbformat_minor": 2 335 | } 336 | -------------------------------------------------------------------------------- /src/kinematics.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | __global__ void _forward_kinematics(float *data, float *angs, size_t *cum_data_idx, 6 | size_t *cum_active_joint_idx, float *result, size_t num_of_robots) { 7 | size_t idx = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (idx >= num_of_robots) return; 9 | size_t end_idx = cum_data_idx[idx]; 10 | size_t ang_idx = idx == 0 ? 0 : cum_active_joint_idx[idx - 1]; 11 | size_t data_idx = 0; 12 | if (idx > 0) data_idx = cum_data_idx[idx - 1]; 13 | Quaternion r = Quaternion::Identity(); 14 | Quaternion t(0, data[data_idx], data[data_idx+1], data[data_idx+2]); 15 | data_idx += 3; 16 | while (data_idx < end_idx-11) { 17 | Quaternion rotation(data[data_idx], data[data_idx + 1], data[data_idx + 2], data[data_idx + 3]); 18 | data_idx += 4; 19 | float type = data[data_idx++]; 20 | float axis[3] = {data[data_idx], data[data_idx + 1], data[data_idx + 2]}; 21 | data_idx += 3; 22 | Quaternion nxt_translation(0, data[data_idx], data[data_idx + 1], data[data_idx + 2]); 23 | data_idx += 3; 24 | 25 | r = r * rotation; 26 | if (type == urdf::Joint::REVOLUTE) { 27 | Quaternion change = Quaternion::FromAngleAxis(angs[ang_idx++], axis[0], axis[1], axis[2]); 28 | r = r * change; 29 | } else if (type == urdf::Joint::PRISMATIC) { 30 | float displacement = angs[ang_idx++]; 31 | Quaternion change(0, displacement*axis[0], displacement*axis[1], displacement*axis[2]); 32 | nxt_translation += change; 33 | } 34 | nxt_translation = r * nxt_translation * r.inverse(); 35 | t += nxt_translation; 36 | } 37 | 38 | Quaternion rotation(data[data_idx], data[data_idx + 1], data[data_idx + 2], data[data_idx + 3]); 39 | data_idx += 4; 40 | float type = data[data_idx++]; 41 | float axis[3] = {data[data_idx], data[data_idx + 1], data[data_idx + 2]}; 42 | data_idx += 3; 43 | Quaternion nxt_translation(0,0,0,0); 44 | 45 | r = r * rotation; 46 | if (type == urdf::Joint::REVOLUTE) { 47 | Quaternion change = Quaternion::FromAngleAxis(angs[ang_idx++], axis[0], axis[1], axis[2]); 48 | r = r * change; 49 | } else if (type == urdf::Joint::PRISMATIC) { 50 | float displacement = angs[ang_idx++]; 51 | Quaternion change(0, displacement*axis[0], displacement*axis[1], displacement*axis[2]); 52 | nxt_translation += change; 53 | } 54 | nxt_translation = r * nxt_translation * r.inverse(); 55 | t += nxt_translation; 56 | 57 | result[idx*7+0] = t.x; 58 | result[idx*7+1] = t.y; 59 | result[idx*7+2] = t.z; 60 | result[idx*7+3] = r.w; 61 | result[idx*7+4] = r.x; 62 | result[idx*7+5] = r.y; 63 | result[idx*7+6] = r.z; 64 | } 65 | 66 | __global__ void _jacobian(float *data, float *angs, size_t *cum_data_idx, 67 | size_t *cum_active_joint_idx, float *result, size_t num_of_robots) { 68 | size_t idx = blockIdx.x * blockDim.x + threadIdx.x; 69 | if (idx >= num_of_robots) return; 70 | size_t end_idx = cum_data_idx[idx]; 71 | size_t ang_idx = idx == 0 ? 0 : cum_active_joint_idx[idx - 1]; 72 | size_t data_idx = 0; 73 | if (idx > 0) data_idx = cum_data_idx[idx - 1]; 74 | Quaternion r = Quaternion::Identity(); 75 | Quaternion t(0, data[data_idx], data[data_idx+1], data[data_idx+2]); 76 | data_idx += 3; 77 | while (data_idx < end_idx-11) { 78 | Quaternion rotation(data[data_idx], data[data_idx + 1], data[data_idx + 2], data[data_idx + 3]); 79 | data_idx += 4; 80 | float type = data[data_idx++]; 81 | float axis[3] = {data[data_idx], data[data_idx + 1], data[data_idx + 2]}; 82 | data_idx += 3; 83 | Quaternion nxt_translation(0, data[data_idx], data[data_idx + 1], data[data_idx + 2]); 84 | data_idx += 3; 85 | 86 | r = r * rotation; 87 | if (type == urdf::Joint::REVOLUTE) { 88 | Quaternion axis_quat(0, axis[0], axis[1], axis[2]); 89 | axis_quat = r * axis_quat * r.inverse(); 90 | result[6*ang_idx+0] = -axis_quat.y*t.z + axis_quat.z*t.y; 91 | result[6*ang_idx+1] = axis_quat.x*t.z - axis_quat.z*t.x; 92 | result[6*ang_idx+2] = -axis_quat.x*t.y + axis_quat.y*t.x; 93 | result[6*ang_idx+3] = axis_quat.x; 94 | result[6*ang_idx+4] = axis_quat.y; 95 | result[6*ang_idx+5] = axis_quat.z; 96 | Quaternion change = Quaternion::FromAngleAxis(angs[ang_idx++], axis[0], axis[1], axis[2]); 97 | r = r * change; 98 | } else if (type == urdf::Joint::PRISMATIC) { 99 | Quaternion axis_quat(0, axis[0], axis[1], axis[2]); 100 | Quaternion rotated_axis = r * axis_quat * r.inverse(); 101 | result[6*ang_idx+0] = rotated_axis.x; 102 | result[6*ang_idx+1] = rotated_axis.y; 103 | result[6*ang_idx+2] = rotated_axis.z; 104 | result[6*ang_idx+3] = 0; 105 | result[6*ang_idx+4] = 0; 106 | result[6*ang_idx+5] = 0; 107 | float displacement = angs[ang_idx++]; 108 | nxt_translation += axis_quat*displacement; 109 | } 110 | nxt_translation = r * nxt_translation * r.inverse(); 111 | t += nxt_translation; 112 | } 113 | 114 | Quaternion rotation(data[data_idx], data[data_idx + 1], data[data_idx + 2], data[data_idx + 3]); 115 | data_idx += 4; 116 | float type = data[data_idx++]; 117 | float axis[3] = {data[data_idx], data[data_idx + 1], data[data_idx + 2]}; 118 | data_idx += 3; 119 | Quaternion nxt_translation(0,0,0,0); 120 | 121 | r = r * rotation; 122 | if (type == urdf::Joint::REVOLUTE) { 123 | Quaternion axis_quat(0, axis[0], axis[1], axis[2]); 124 | axis_quat = r * axis_quat * r.inverse(); 125 | result[6*ang_idx+0] = -axis_quat.y*t.z + axis_quat.z*t.y; 126 | result[6*ang_idx+1] = axis_quat.x*t.z - axis_quat.z*t.x; 127 | result[6*ang_idx+2] = -axis_quat.x*t.y + axis_quat.y*t.x; 128 | result[6*ang_idx+3] = axis_quat.x; 129 | result[6*ang_idx+4] = axis_quat.y; 130 | result[6*ang_idx+5] = axis_quat.z; 131 | Quaternion change = Quaternion::FromAngleAxis(angs[ang_idx++], axis[0], axis[1], axis[2]); 132 | r = r * change; 133 | } else if (type == urdf::Joint::PRISMATIC) { 134 | Quaternion axis_quat(0, axis[0], axis[1], axis[2]); 135 | Quaternion rotated_axis = r * axis_quat * r.inverse(); 136 | result[6*ang_idx+0] = rotated_axis.x; 137 | result[6*ang_idx+1] = rotated_axis.y; 138 | result[6*ang_idx+2] = rotated_axis.z; 139 | result[6*ang_idx+3] = 0; 140 | result[6*ang_idx+4] = 0; 141 | result[6*ang_idx+5] = 0; 142 | float displacement = angs[ang_idx++]; 143 | nxt_translation += axis_quat*displacement; 144 | } 145 | nxt_translation = r * nxt_translation * r.inverse(); 146 | t += nxt_translation; 147 | } 148 | 149 | __global__ void _jacobian_mixed_frame(float *data, float *angs, size_t *cum_data_idx, 150 | size_t *cum_active_joint_idx, float *result, size_t num_of_robots) { 151 | size_t idx = blockIdx.x * blockDim.x + threadIdx.x; 152 | if (idx >= num_of_robots) return; 153 | size_t end_idx = cum_data_idx[idx]; 154 | size_t start_ang_idx = idx == 0 ? 0 : cum_active_joint_idx[idx - 1]; 155 | size_t ang_idx = start_ang_idx; 156 | size_t data_idx = 0; 157 | if (idx > 0) data_idx = cum_data_idx[idx - 1]; 158 | Quaternion r = Quaternion::Identity(); 159 | Quaternion t(0, data[data_idx], data[data_idx+1], data[data_idx+2]); 160 | data_idx += 3; 161 | while (data_idx < end_idx-11) { 162 | Quaternion rotation(data[data_idx], data[data_idx + 1], data[data_idx + 2], data[data_idx + 3]); 163 | data_idx += 4; 164 | float type = data[data_idx++]; 165 | float axis[3] = {data[data_idx], data[data_idx + 1], data[data_idx + 2]}; 166 | data_idx += 3; 167 | Quaternion nxt_translation(0, data[data_idx], data[data_idx + 1], data[data_idx + 2]); 168 | data_idx += 3; 169 | 170 | r = r * rotation; 171 | if (type == urdf::Joint::REVOLUTE) { 172 | Quaternion axis_quat(0, axis[0], axis[1], axis[2]); 173 | axis_quat = r * axis_quat * r.inverse(); 174 | // temporarily store the translation in the jacobian matrix 175 | result[6*ang_idx+0] = t.x; 176 | result[6*ang_idx+1] = t.y; 177 | result[6*ang_idx+2] = t.z; 178 | result[6*ang_idx+3] = axis_quat.x; 179 | result[6*ang_idx+4] = axis_quat.y; 180 | result[6*ang_idx+5] = axis_quat.z; 181 | Quaternion change = Quaternion::FromAngleAxis(angs[ang_idx++], axis[0], axis[1], axis[2]); 182 | r = r * change; 183 | } else if (type == urdf::Joint::PRISMATIC) { 184 | Quaternion axis_quat(0, axis[0], axis[1], axis[2]); 185 | Quaternion rotated_axis = r * axis_quat * r.inverse(); 186 | result[6*ang_idx+0] = rotated_axis.x; 187 | result[6*ang_idx+1] = rotated_axis.y; 188 | result[6*ang_idx+2] = rotated_axis.z; 189 | result[6*ang_idx+3] = 0; 190 | result[6*ang_idx+4] = 0; 191 | result[6*ang_idx+5] = 0; 192 | float displacement = angs[ang_idx++]; 193 | nxt_translation += axis_quat*displacement; 194 | } 195 | nxt_translation = r * nxt_translation * r.inverse(); 196 | t += nxt_translation; 197 | } 198 | 199 | Quaternion rotation(data[data_idx], data[data_idx + 1], data[data_idx + 2], data[data_idx + 3]); 200 | data_idx += 4; 201 | float type = data[data_idx++]; 202 | float axis[3] = {data[data_idx], data[data_idx + 1], data[data_idx + 2]}; 203 | data_idx += 3; 204 | Quaternion nxt_translation(0,0,0,0); 205 | 206 | r = r * rotation; 207 | if (type == urdf::Joint::REVOLUTE) { 208 | Quaternion axis_quat(0, axis[0], axis[1], axis[2]); 209 | axis_quat = r * axis_quat * r.inverse(); 210 | result[6*ang_idx+0] = t.x; 211 | result[6*ang_idx+1] = t.y; 212 | result[6*ang_idx+2] = t.z; 213 | result[6*ang_idx+3] = axis_quat.x; 214 | result[6*ang_idx+4] = axis_quat.y; 215 | result[6*ang_idx+5] = axis_quat.z; 216 | Quaternion change = Quaternion::FromAngleAxis(angs[ang_idx++], axis[0], axis[1], axis[2]); 217 | r = r * change; 218 | } else if (type == urdf::Joint::PRISMATIC) { 219 | Quaternion axis_quat(0, axis[0], axis[1], axis[2]); 220 | Quaternion rotated_axis = r * axis_quat * r.inverse(); 221 | result[6*ang_idx+0] = rotated_axis.x; 222 | result[6*ang_idx+1] = rotated_axis.y; 223 | result[6*ang_idx+2] = rotated_axis.z; 224 | result[6*ang_idx+3] = 0; 225 | result[6*ang_idx+4] = 0; 226 | result[6*ang_idx+5] = 0; 227 | float displacement = angs[ang_idx++]; 228 | nxt_translation += axis_quat*displacement; 229 | } 230 | nxt_translation = r * nxt_translation * r.inverse(); 231 | t += nxt_translation; 232 | 233 | for (size_t i = start_ang_idx; i < ang_idx; ++i) { 234 | if (result[6*i+3] == 0 && result[6*i+4] == 0 && result[6*i+5] == 0) continue; 235 | float dx = result[6*i+4]*(t.z-result[6*i+2]) - result[6*i+5]*(t.y-result[6*i+1]); 236 | float dy = -result[6*i+3]*(t.z-result[6*i+2]) + result[6*i+5]*(t.x-result[6*i+0]); 237 | float dz = result[6*i+3]*(t.y-result[6*i+1]) - result[6*i+4]*(t.x-result[6*i+0]); 238 | result[6*i+0] = dx; 239 | result[6*i+1] = dy; 240 | result[6*i+2] = dz; 241 | } 242 | } -------------------------------------------------------------------------------- /kuka_iiwa.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | -------------------------------------------------------------------------------- /panda.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | -------------------------------------------------------------------------------- /fetch.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | --------------------------------------------------------------------------------