├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── doc ├── TIAGo_05.jpg └── logo.jpg ├── examples ├── control │ ├── classical_control │ │ └── test_pid.py │ ├── optimal_control │ │ ├── test_lqr_double_integrator.py │ │ ├── test_lqr_inverted_pendulum.py │ │ ├── test_lqr_planar_quadrotor.py │ │ ├── test_mpc_inverted_pendulum.py │ │ ├── test_mpc_planar_quadrotor.py │ │ └── test_mppi_cartpole.py │ └── path_follow_control │ │ ├── example_path.json │ │ ├── test_cem_path_follow.py │ │ ├── test_dwa_path_follow.py │ │ ├── test_lqr_path_follow.py │ │ ├── test_mpc_path_follow.py │ │ └── test_mppi_path_follow.py ├── learning │ ├── test_mc_control_on_policy.py │ ├── test_q_learning.py │ └── test_sarsa.py ├── planning │ ├── mdp │ │ ├── test_mcts.py │ │ ├── test_policy_iteration.py │ │ ├── test_policy_tree_search.py │ │ └── test_value_iteration.py │ ├── motion_planning │ │ ├── test_prm.py │ │ ├── test_rrt.py │ │ ├── test_rrt_connect.py │ │ └── test_rrt_star.py │ └── path_planning │ │ ├── test_a_star.py │ │ ├── test_dijkstra.py │ │ └── test_hybrid_a_star.py ├── realisitic_examples │ └── cost_aware_diff_drive_path_planning │ │ ├── cost_aware_diff_drive_path_planning.py │ │ ├── path_with_cost_penalty.png │ │ └── path_without_cost_penalty.png └── state_estimation │ ├── test_amcl.py │ ├── test_discrete_bayes_filter.py │ ├── test_extended_kalman_filter.py │ ├── test_kalman_filter.py │ └── test_particle_filter.py ├── pyproject.toml └── robotics_algorithm ├── __init__.py ├── control ├── __init__.py ├── classical_control │ ├── __init__.py │ └── pid.py ├── optimal_control │ ├── __init__.py │ ├── cem_mpc.py │ ├── convex_mpc.py │ ├── lqr.py │ └── mppi.py └── path_follow_control │ ├── __init__.py │ └── dwa.py ├── env ├── __init__.py ├── base_env.py ├── cartpole_balance.py ├── classic_mdp │ ├── cliff_walking.py │ ├── frozen_lake.py │ └── windy_grid_world.py ├── continuous_1d │ └── double_integrator_env.py ├── continuous_2d │ ├── __init__.py │ ├── diff_drive_2d_control.py │ ├── diff_drive_2d_env_base.py │ ├── diff_drive_2d_localization.py │ ├── diff_drive_2d_planning.py │ └── omni_2d_planning.py ├── discrete_world_1d.py ├── discrete_world_2d.py ├── inverted_pendulum.py └── planar_quadrotor_hover.py ├── learning ├── __init__.py └── reinforcement_learning │ ├── __init__.py │ ├── mc_control_on_policy.py │ ├── q_learning.py │ └── sarsa.py ├── planning ├── __init__.py ├── mdp │ ├── __init__.py │ ├── mcts.py │ ├── policy_iteration.py │ ├── policy_tree_search.py │ └── value_iteration.py ├── motion_planning │ ├── __init__.py │ ├── prm.py │ ├── rrt.py │ └── rrt_star.py └── path_planning │ ├── __init__.py │ ├── a_star.py │ ├── dijkstra.py │ └── hybrid_a_star.py ├── robot ├── __init__.py ├── cartpole.py ├── differential_drive.py ├── double_integrator.py ├── pendulum.py ├── planar_quadrotor.py └── robot.py ├── state_estimation ├── __init__.py ├── amcl.py ├── discrete_bayes_filter.py ├── extended_kalman_filter.py ├── kalman_filter.py └── particle_filter.py └── utils ├── __init__.py ├── math_utils.py ├── mdp_utils.py ├── models └── attention.py └── tree.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg-info 3 | 4 | .vscode/ -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [v0.11.2] 4 | 5 | ### Added 6 | 7 | - Added DWA 8 | 9 | ## [v0.11.1] 10 | 11 | ### Added 12 | 13 | - Added planar quadrotor 14 | - Added LQR and convex MPC example for planar quadrotor 15 | 16 | ### Changed 17 | 18 | - Changed all state, action, observation to be numpy array by default (cont). 19 | - Separated is_state_terminal() from get_state_info(). 20 | 21 | ## [v0.11.0] 22 | 23 | ### Added 24 | 25 | - Added convex MPC. 26 | - Added inverted pendulum environment. 27 | - Added a robot parent class for different robots. 28 | - Added LQR and convex MPC for path follow and inverted pendulum. 29 | 30 | ### Changed 31 | 32 | - Refactored folder structure to better group different components. 33 | - Changed all state, action, observation to be numpy array by default. 34 | 35 | ### Fixed 36 | 37 | - Fixed diff_drive_2d_planning traversal cost calculation so euclidean distance is properly underestimating true cost. 38 | 39 | ## [v0.10.0] 40 | 41 | ### Added 42 | 43 | - Added cost-aware differential drive path planning 44 | 45 | ### Fixed 46 | 47 | - Fixed a bug in Hybrid A-star 48 | 49 | ### Changed 50 | 51 | - Split continuous 2d env into separate classes for planning, control and localization, each with its own file. 52 | 53 | ## [v0.9.1] 54 | 55 | ### Added 56 | 57 | - Added changelog 58 | 59 | ### Changed 60 | 61 | - Minor comment and code change 62 | 63 | ### Removed 64 | 65 | - Trademark sign previously shown after the project description in version 66 | 0.3.0 67 | 68 | ## [v0.9.0] 69 | 70 | ### Added 71 | 72 | - Added AMCL 73 | 74 | ## [0.8.3] 75 | 76 | ### Added 77 | 78 | - Added RL tabular methods 79 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 lyfkyle 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # robotics_algorithms 2 | 3 | ![logo](/doc/TIAGo_05.jpg "logo") 4 | 5 | This repository contains my pure-python implementation of essential algorithms that make a mobile manipulator (and other robots) move. 6 | 7 | While it is true that a lot of algorithms have been implemented by other projects, this repo serves these main benefits: 8 | 9 | 1. It achieves a good balance of width and depth. It covers a wide range of topics, while focusing on only key algorithms in each field. 10 | 2. It is implemented with modular structure that cleanly separates algorithms from problems (like OMPL), at the same time emphasizing connection between different algorithms. For example, the design reflects that planing under uncertainties, optimal control and RL share the same underlying MDP formulation. 11 | 3. Serves as a single source of truth of various algorithms so that I no longer need to search all over the Internet. 12 | 13 | ## Scope 14 | 15 | It should include popular and representative algorithms from robot dynamics, state estimation, planning, control and 16 | learning. 17 | 18 | ## Requirement 19 | 20 | python 3.10 21 | 22 | ## How to use 23 | 24 | - Run `pip install -e .` 25 | - Run various scripts inside examples folder. 26 | 27 | For example, to run a\* to find the shortest path between start and goal in a grid world 28 | 29 | ```python 30 | python examples/planning/path_planning/test_a_star.py 31 | ``` 32 | 33 | ## News 34 | 35 | - 26/03/2025: Added DWA (v0.11.2) 36 | - 20/02/2025: Added LQR and convex MPC for planar quadrotor (v0.11.1). 37 | - 17/02/2025: Added convex MPC, inverted pendulum and more path following examples (v0.11.0). 38 | - 09/02/2025: Added cost-aware differential drive path planning (v0.10.0). 39 | 40 | ## Status 41 | 42 | This repository is undergoing significant development. Here is the status checklist. 43 | 44 | Algorithms 45 | 46 | - [ ] Robot Kinematic and Dynamics 47 | - [x] Differential drive 48 | - [x] Cartpole 49 | - [x] Double Integrator 50 | - [x] Inverted Pendulum 51 | - [ ] Arm 52 | - [ ] FK and IK 53 | - [ ] Car 54 | - [x] Planar Quadrotor 55 | - [ ] Quadrotor 56 | - [ ] Quadruped 57 | - [x] State Estimation 58 | - [x] Localizaion 59 | - [x] Discrete bayes filter 60 | - [x] Kalman filter 61 | - [x] Extended Kalman filter 62 | - [x] Particle filter (MCL) 63 | - [x] AMCL 64 | - [ ] SLAM 65 | - [ ] EKF SLAM 66 | - [ ] Fast SLAM 67 | - [ ] Graph-based SLAM 68 | - [ ] Planning 69 | - [x] Path Planning 70 | - [x] Dijkstra 71 | - [x] A-star 72 | - [x] Hybrid A-star 73 | - [ ] Motion Planning 74 | - [x] PRM 75 | - [x] RRT / RRT-Connect 76 | - [x] RRT\* 77 | - [ ] RRT\*-Connect 78 | - [ ] Informed RRT\* 79 | - [ ] BIT\* 80 | - [x] MDP 81 | - [x] Value iteration 82 | - [x] policy iteration 83 | - [x] Policy tree search 84 | - [x] MCTS 85 | - [ ] POMDP 86 | - [ ] Belief tree search 87 | - [ ] SARSOP 88 | - [ ] DESPOT 89 | - [ ] Control 90 | - [x] Classical control 91 | - [x] PID 92 | - [ ] Optimal Control 93 | - [x] LQR 94 | - [x] MPPI 95 | - [x] CEM-MPC 96 | - [x] Convex-MPC 97 | - [ ] Trajectory optimization 98 | - [ ] iLQR 99 | - [ ] Domain-specific Path Follow Control 100 | - [ ] Regulated Pure Pursuit 101 | - [x] Dynamic Window Approach 102 | - [ ] Time-elastic Band 103 | - [ ] Imitation learning 104 | - [ ] ACT 105 | - [ ] Diffusion-policy 106 | - [ ] Reinforcement learning 107 | - [ ] Tabular 108 | - [x] On-policy MC 109 | - [ ] Off-policy MC 110 | - [x] On-policy TD (SARSA) 111 | - [x] Off-policy TD (Q-learning) 112 | - [ ] Function approximation 113 | - [ ] Environments 114 | - [x] Frozen lake (MDP) 115 | - [x] Cliff walking (MDP) 116 | - [x] Windy gridworld (MDP) 117 | - [x] 1D navigation with double integrator 118 | - [x] Deterministic and fully-observable 119 | - [x] Stochastic and partially-observable 120 | - [x] 2D navigation with omni-directional robot 121 | - [x] Deterministic and fully-observable 122 | - [x] 2D navigation with differential drive 123 | - [x] Deterministic and fully-observable 124 | - [x] Stochastic and partially-observable 125 | - [x] With obstacle-distance cost 126 | - [x] 2D localization 127 | - [ ] 2D SLAM 128 | - [ ] Multi-arm bandits (POMDP) 129 | - [ ] Tiger (POMDP) 130 | 131 | In addition to the algorithm itself, also implement several realistic robotics problems that often require additional 132 | domain-specific components and strategies. 133 | 134 | - [x] Path planning for differential drive robot using Hybrid A Star with original heuristics and cost weighted distance measure. 135 | 136 | ## Known issues 137 | 138 | - [ ] EKF gives high localisation error at some instances. 139 | - [ ] MCTS is not stable. 140 | - [ ] Recursive feasibility in convex MPC 141 | -------------------------------------------------------------------------------- /doc/TIAGo_05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/doc/TIAGo_05.jpg -------------------------------------------------------------------------------- /doc/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/doc/logo.jpg -------------------------------------------------------------------------------- /examples/control/classical_control/test_pid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.cartpole_balance import CartPoleEnv 4 | from robotics_algorithm.control.classical_control.pid import PID 5 | 6 | # Initialize environment 7 | # NOTE: we can use PID for cartpole because when it is near the upright position, the system behaves like LTI system. 8 | # refer to https://ctms.engin.umich.edu/CTMS/index.php?example=InvertedPendulum§ion=SystemModeling for 9 | # the transfer function 10 | # Here we only control theta 11 | env = CartPoleEnv() 12 | env.reset() 13 | 14 | # These gains are tuned using Ziegler–Nichols method 15 | controller = PID(env, goal_state=env.goal_state[2], Kp=16.0, Kd=1.8) # we only control theta 16 | 17 | # run controller 18 | state = env.cur_state 19 | while True: 20 | action = controller.run(state[2]) # we only control theta 21 | action *= -1 # post-process action, needed because action and state are not in the same space. 22 | 23 | next_state, reward, term, trunc, info = env.step(action) 24 | print(state, action, next_state, reward) 25 | 26 | env.render() 27 | state = next_state 28 | 29 | if term or trunc: 30 | break 31 | -------------------------------------------------------------------------------- /examples/control/optimal_control/test_lqr_double_integrator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.continuous_1d.double_integrator_env import DoubleIntegratorEnv 4 | from robotics_algorithm.control.optimal_control.lqr import LQR 5 | 6 | # Test 1 7 | # discrete time model, solve dare using scipy 8 | env = DoubleIntegratorEnv(observation_noise_std=0, state_transition_noise_std=0) 9 | env.reset() 10 | print('cur_state: ', env.start_state) 11 | env.render() 12 | 13 | # initialize controller 14 | controller = LQR(env, discrete_time=True) 15 | 16 | # run controller 17 | state = env.start_state 18 | path = [state] 19 | while True: 20 | action = controller.run(state) 21 | next_state, reward, term, trunc, info = env.step(action) 22 | 23 | print(state, action, next_state) 24 | 25 | path.append(next_state) 26 | state = next_state 27 | 28 | if term or trunc: 29 | break 30 | 31 | env.add_path(path) 32 | env.render() 33 | 34 | # Test 2 35 | # Solve dare by iteration for finite-horizon case 36 | env = DoubleIntegratorEnv(observation_noise_std=0, state_transition_noise_std=0) 37 | 38 | env.reset() 39 | print('cur_state: ', env.start_state) 40 | env.render() 41 | 42 | # initialize controller 43 | controller = LQR(env, discrete_time=True, horizon=2000, solve_by_iteration=True) 44 | 45 | # run controller 46 | state = env.start_state 47 | path = [state] 48 | while True: 49 | action = controller.run(state) 50 | next_state, reward, term, trunc, info = env.step(action) 51 | 52 | print(state, action, next_state) 53 | 54 | path.append(next_state) 55 | state = next_state 56 | 57 | if term or trunc: 58 | break 59 | 60 | env.add_path(path) 61 | env.render() 62 | -------------------------------------------------------------------------------- /examples/control/optimal_control/test_lqr_inverted_pendulum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.inverted_pendulum import InvertedPendulumEnv 4 | from robotics_algorithm.control.optimal_control.lqr import LQR 5 | 6 | # discrete time model 7 | env = InvertedPendulumEnv() 8 | env.reset() 9 | print('cur_state: ', env.cur_state) 10 | env.render() 11 | 12 | # initialize controller 13 | controller = LQR(env, discrete_time=True) 14 | 15 | # run controller 16 | state = env.cur_state 17 | path = [state] 18 | while True: 19 | action = controller.run(state) 20 | next_state, reward, term, trunc, info = env.step(action) 21 | 22 | print(state, action, next_state, reward, term, trunc) 23 | env.render() 24 | 25 | state = next_state 26 | 27 | if term or trunc: 28 | break 29 | -------------------------------------------------------------------------------- /examples/control/optimal_control/test_lqr_planar_quadrotor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.planar_quadrotor_hover import PlanarQuadrotorHoverEnv 4 | from robotics_algorithm.control.optimal_control.lqr import LQR 5 | 6 | # discrete time model 7 | env = PlanarQuadrotorHoverEnv( 8 | hover_pos=0.5, hover_height=1.0, quadratic_reward=True, term_if_constraints_violated=False 9 | ) # ! LQR can't deal with constraints 10 | env.reset() 11 | print('cur_state: ', env.cur_state) 12 | env.render() 13 | 14 | # ! As LQR can't deal with constraints, it is best to set theta cost to be extremely large so that the quadcopter 15 | # ! stays horizontal to minimize linearization error. 16 | env.Q = np.diag([10, 10, 1000, 1, 1, 10]) # Set theta cost to be extremely large 17 | 18 | # initialize controller 19 | controller = LQR(env, discrete_time=True) 20 | 21 | # run controller 22 | state = env.cur_state 23 | path = [state] 24 | while True: 25 | state_error = state - env.goal_state 26 | action = controller.run(state_error) 27 | action = env.goal_action + action 28 | 29 | next_state, reward, term, trunc, info = env.step(action) 30 | 31 | print(state, action, next_state, reward, term, trunc) 32 | 33 | # ! It is expected for LQR to print constraint violations warnings here. 34 | if info['constraint_violated']: 35 | print('WARNING: constraints are violated!!!!!!!!!!!!!!!!') 36 | 37 | env.render() 38 | 39 | state = next_state 40 | 41 | if term or trunc: 42 | break 43 | -------------------------------------------------------------------------------- /examples/control/optimal_control/test_mpc_inverted_pendulum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.inverted_pendulum import InvertedPendulumEnv 4 | from robotics_algorithm.control.optimal_control.convex_mpc import ConvexMPC 5 | 6 | # discrete time model 7 | env = InvertedPendulumEnv() 8 | env.reset() 9 | print('cur_state: ', env.cur_state) 10 | env.render() 11 | 12 | # initialize controller 13 | controller = ConvexMPC(env, horizon=50) 14 | 15 | # run controller 16 | state = env.cur_state 17 | path = [state] 18 | while True: 19 | action = controller.run(state) 20 | next_state, reward, term, trunc, info = env.step(action) 21 | 22 | print(state, action, next_state, reward, term, trunc) 23 | env.render() 24 | 25 | state = next_state 26 | 27 | if term or trunc: 28 | break 29 | 30 | 31 | -------------------------------------------------------------------------------- /examples/control/optimal_control/test_mpc_planar_quadrotor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.planar_quadrotor_hover import PlanarQuadrotorHoverEnv 4 | from robotics_algorithm.control.optimal_control.convex_mpc import ConvexMPC 5 | 6 | # discrete time model 7 | env = PlanarQuadrotorHoverEnv( 8 | hover_pos=0.25, hover_height=1.0, quadratic_reward=True, term_if_constraints_violated=True 9 | ) 10 | env.reset() 11 | print('cur_state: ', env.cur_state) 12 | env.render() 13 | 14 | # initialize controller 15 | controller = ConvexMPC(env, horizon=20) 16 | 17 | # ! Add additional theta constraint so that linearization is always valid and thrust constraints are satisfied. 18 | # ! Compared to LQR, we don't have to manually increase theta cost in Q any more. 19 | x, u = controller.get_decision_variables() 20 | state_low = env.state_space.low - env.goal_state + 1e-2 21 | state_high = env.state_space.high - env.goal_state - 1e-2 22 | action_low = env.action_space.low - env.goal_action + 1e-2 23 | action_high = env.action_space.high - env.goal_action - 1e-2 24 | constr = [] 25 | for t in range(controller.horizon + 1): 26 | constr += [x[t] <= state_high, x[t] >= state_low] 27 | constr += [u[t] <= action_high, u[t] >= action_low] 28 | 29 | constr += [ 30 | x[t, 2] <= 0.174533, # ~10 degree 31 | x[t, 2] >= -0.174533, # ~10 degree 32 | ] # We make theta tighter so that linearized dynamics is more valid 33 | 34 | controller.add_constraints(constr) 35 | 36 | # run controller 37 | state = env.cur_state 38 | path = [state] 39 | while True: 40 | state_error = state - env.goal_state 41 | action_error = controller.run(state_error) 42 | action = env.goal_action + action_error 43 | 44 | next_state, reward, term, trunc, info = env.step(action) 45 | 46 | print(state, action, next_state, reward, term, trunc) 47 | 48 | env.render() 49 | 50 | state = next_state 51 | 52 | if term or trunc: 53 | break 54 | -------------------------------------------------------------------------------- /examples/control/optimal_control/test_mppi_cartpole.py: -------------------------------------------------------------------------------- 1 | from robotics_algorithm.env.cartpole_balance import CartPoleEnv 2 | from robotics_algorithm.control.optimal_control.mppi import MPPI 3 | 4 | # Initialize environment 5 | env = CartPoleEnv() 6 | env.reset() 7 | 8 | controller = MPPI(env, action_mean=[0], action_std=[5.0]) 9 | 10 | # run controller 11 | state = env.cur_state 12 | while True: 13 | action = controller.run(state) 14 | 15 | next_state, reward, term, trunc, info = env.step(action) 16 | print(state, action, next_state, reward) 17 | 18 | env.render() 19 | state = next_state 20 | 21 | if term or trunc: 22 | break 23 | -------------------------------------------------------------------------------- /examples/control/path_follow_control/test_cem_path_follow.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | import os.path as osp 4 | 5 | from robotics_algorithm.control.optimal_control.cem_mpc import CEMMPC 6 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_control import DiffDrive2DControl 7 | 8 | CUR_DIR = osp.join(osp.dirname(osp.abspath(__file__))) 9 | 10 | # This path is computed using Hybrid A* 11 | PATH_DT = 0.1 12 | with open(osp.join(CUR_DIR, 'example_path.json'), 'r') as f: 13 | shortest_path = json.load(f) 14 | 15 | # Initialize environment 16 | env = DiffDrive2DControl(use_lookahead=False) 17 | env.reset(shortest_path, empty=False) 18 | 19 | controller = CEMMPC(env, action_mean=[0.25, 0], action_std=[0.25, math.radians(30)]) 20 | 21 | # debug 22 | env.interactive_viz = True 23 | 24 | # run controller 25 | state = env.start_state 26 | path = [state] 27 | while True: 28 | action = controller.run(state) 29 | 30 | # visualize local plan 31 | local_plan = [state] 32 | best_actions = controller.best_traj # debug 33 | new_state = env.sample_state_transition(state, action)[0] 34 | for future_action in best_actions: 35 | new_state = env.sample_state_transition(new_state, future_action)[0] 36 | local_plan.append(new_state) 37 | env.set_local_plan(local_plan) 38 | 39 | next_state, reward, term, trunc, info = env.step(action) 40 | print(state, action, next_state, reward) 41 | 42 | # nearest_idx = env._get_nearest_waypoint_to_state(next_state) 43 | # print(shortest_path[nearest_idx]) 44 | 45 | env.render() 46 | 47 | path.append(next_state) 48 | state = next_state 49 | 50 | if term or trunc: 51 | break 52 | -------------------------------------------------------------------------------- /examples/control/path_follow_control/test_dwa_path_follow.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | import os.path as osp 4 | 5 | from robotics_algorithm.control.path_follow_control.dwa import DWA 6 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_control import DiffDrive2DControl 7 | 8 | CUR_DIR = osp.join(osp.dirname(osp.abspath(__file__))) 9 | 10 | # This path is computed using Hybrid A* 11 | PATH_DT = 0.1 12 | with open(osp.join(CUR_DIR, 'example_path.json'), 'r') as f: 13 | shortest_path = json.load(f) 14 | 15 | # Initialize environment 16 | env = DiffDrive2DControl() 17 | env.reset(shortest_path, empty=False) 18 | 19 | controller = DWA(env) 20 | 21 | # debug 22 | env.interactive_viz = True 23 | 24 | # run controller 25 | state = env.start_state 26 | path = [state] 27 | while True: 28 | action = controller.run(state) 29 | 30 | # visualize local plan 31 | local_plan = controller.best_traj # debug 32 | env.set_local_plan(local_plan) 33 | 34 | next_state, reward, term, trunc, info = env.step(action) 35 | print(state, action, next_state, reward) 36 | 37 | # nearest_idx = env._get_nearest_waypoint_to_state(next_state) 38 | # print(shortest_path[nearest_idx]) 39 | 40 | env.render() 41 | 42 | path.append(next_state) 43 | state = next_state 44 | 45 | if term or trunc: 46 | break 47 | -------------------------------------------------------------------------------- /examples/control/path_follow_control/test_lqr_path_follow.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os.path as osp 3 | import numpy as np 4 | 5 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_control import DiffDrive2DControl 6 | from robotics_algorithm.control.optimal_control.lqr import LQR 7 | 8 | CUR_DIR = osp.join(osp.dirname(osp.abspath(__file__))) 9 | 10 | # This path is computed using Hybrid A* 11 | PATH_DT = 0.1 12 | with open(osp.join(CUR_DIR, 'example_path.json'), 'r') as f: 13 | shortest_path = json.load(f) 14 | env = DiffDrive2DControl(lookahead_index=5, has_kinematics_constraint=False) # ! LQR can't deal with constraints 15 | env.reset(shortest_path, empty=False) 16 | env.interactive_viz = True 17 | 18 | # initialize controller 19 | controller = LQR(env, horizon=100, discrete_time=True, solve_by_iteration=True) 20 | 21 | state = env.cur_state 22 | # ! Here we set reference action to be zero velocity, implying that each time LQR is trying to bring robot to stop 23 | # ! at the lookahead state. Optionally, if planned path contains velocity information, reference action can be read 24 | # ! from the path. 25 | ref_action = np.zeros(env.action_space.state_size) 26 | env.render() 27 | 28 | # Following reference path falls into category of iterative time-varying local trajectory stabilization. 29 | # In this case, we iteratively use LQR to stabilize around a reference state that is changing as the robot moves. 30 | # In each iteration, LQR is solved with linearized dynamics around current reference state and reference action. 31 | while True: 32 | # Set current reference state and action to obtain new linearized dynamics 33 | cur_ref_pose = env.get_cur_lookahead_state() 34 | controller.set_ref_state_action(cur_ref_pose, ref_action) 35 | 36 | state_error = env.cur_state - cur_ref_pose 37 | action_error = controller.run(state_error) 38 | action = ref_action + action_error 39 | 40 | next_state, reward, term, trunc, info = env.step(action) 41 | 42 | print(state, action, next_state, reward, term, trunc) 43 | env.render() 44 | 45 | state = next_state 46 | 47 | if term or trunc: 48 | break 49 | -------------------------------------------------------------------------------- /examples/control/path_follow_control/test_mpc_path_follow.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os.path as osp 3 | import numpy as np 4 | 5 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_control import DiffDrive2DControl 6 | from robotics_algorithm.control.optimal_control.convex_mpc import ConvexMPC 7 | 8 | CUR_DIR = osp.join(osp.dirname(osp.abspath(__file__))) 9 | 10 | PATH_DT = 0.1 11 | with open(osp.join(CUR_DIR, 'example_path.json'), 'r') as f: 12 | shortest_path = json.load(f) 13 | env = DiffDrive2DControl() 14 | env.reset(shortest_path, empty=False) 15 | env.interactive_viz = True 16 | env.render() 17 | 18 | # initialize controller 19 | controller = ConvexMPC(env, horizon=20) 20 | # TODO scipy.solve_dare fails in path follow case. We solve via DP with finite horizon 21 | controller._lqr.horizon=500 22 | controller._lqr.solve_by_iteration=True 23 | 24 | state = env.cur_state 25 | ref_action = np.zeros(env.action_space.state_size) 26 | 27 | 28 | # run controller 29 | while True: 30 | cur_ref_pose = env.get_cur_lookahead_state() 31 | controller.set_ref_state_action(cur_ref_pose, ref_action) 32 | 33 | state_error = env.cur_state - cur_ref_pose 34 | action_error = controller.run(state_error) 35 | action = ref_action + action_error 36 | 37 | next_state, reward, term, trunc, info = env.step(action) 38 | 39 | print(state, action, next_state, reward, term, trunc) 40 | env.render() 41 | 42 | state = next_state 43 | 44 | if term or trunc: 45 | break -------------------------------------------------------------------------------- /examples/control/path_follow_control/test_mppi_path_follow.py: -------------------------------------------------------------------------------- 1 | import math 2 | import json 3 | import os.path as osp 4 | 5 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_control import DiffDrive2DControl 6 | from robotics_algorithm.control.optimal_control.mppi import MPPI 7 | 8 | CUR_DIR = osp.join(osp.dirname(osp.abspath(__file__))) 9 | 10 | # This path is computed using Hybrid A* 11 | PATH_DT = 0.1 12 | with open(osp.join(CUR_DIR, 'example_path.json'), 'r') as f: 13 | shortest_path = json.load(f) 14 | 15 | # Initialize environment 16 | env = DiffDrive2DControl(use_lookahead=False) 17 | env.reset(shortest_path, empty=False) 18 | env.interactive_viz = True 19 | 20 | # Init controller 21 | controller = MPPI(env, action_mean=[0.25, 0], action_std=[0.25, math.radians(30)]) 22 | 23 | # run controller 24 | state = env.start_state 25 | path = [state] 26 | while True: 27 | action = controller.run(state) 28 | 29 | # visualize local plan 30 | local_plan = [state] 31 | best_actions = controller.prev_actions # debug 32 | new_state = env.sample_state_transition(state, action)[0] 33 | for future_action in best_actions: 34 | new_state = env.sample_state_transition(new_state, future_action)[0] 35 | local_plan.append(new_state) 36 | env.set_local_plan(local_plan) 37 | 38 | next_state, reward, term, trunc, info = env.step(action) 39 | print(state, action, next_state, reward) 40 | 41 | # nearest_idx = env._get_nearest_waypoint_to_state(next_state) 42 | # print(shortest_path[nearest_idx]) 43 | 44 | env.render() 45 | 46 | path.append(next_state) 47 | state = next_state 48 | 49 | if term or trunc: 50 | break 51 | -------------------------------------------------------------------------------- /examples/learning/test_mc_control_on_policy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from robotics_algorithm.env.classic_mdp.frozen_lake import FrozenLake 5 | from robotics_algorithm.learning.reinforcement_learning.mc_control_on_policy import MCControlOnPolicy 6 | from robotics_algorithm.utils.math_utils import smooth 7 | from robotics_algorithm.utils.mdp_utils import make_greedy_policy 8 | 9 | env = FrozenLake(dense_reward=True) 10 | state, _ = env.reset() 11 | env.render() 12 | 13 | # Plan 14 | learner = MCControlOnPolicy(env) 15 | Q, _ = learner.run(num_episodes=10000) 16 | # Plot cumulative rewards over episodes 17 | episodes, learning_curve = learner.get_learning_curve() 18 | plt.plot(episodes, smooth(learning_curve, weight=0.95), label="mc") 19 | plt.ylabel("episode_reward") 20 | plt.xlabel("episodes") 21 | plt.show() 22 | print(Q) 23 | 24 | # Execute 25 | policy = make_greedy_policy(Q, env.action_space.size) # after learning, switch to use greedy policy 26 | state, _ = env.reset() 27 | path = [] 28 | all_actions = env.action_space.get_all() 29 | while True: 30 | # choose action according to epsilon-greedy policy 31 | action_probs = policy(state) 32 | action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs) # choose action 33 | action = all_actions[action_idx] 34 | next_state, reward, term, trunc, info = env.step(action) 35 | 36 | print(state, action, next_state, reward) 37 | env.render() 38 | 39 | path.append(state) 40 | state = next_state 41 | 42 | if term or trunc: 43 | break 44 | 45 | env.render() -------------------------------------------------------------------------------- /examples/learning/test_q_learning.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from robotics_algorithm.env.classic_mdp.frozen_lake import FrozenLake 5 | from robotics_algorithm.learning.reinforcement_learning.q_learning import QLearning 6 | from robotics_algorithm.utils.math_utils import smooth 7 | from robotics_algorithm.utils.mdp_utils import make_greedy_policy 8 | 9 | env = FrozenLake(dense_reward=True) 10 | state, _ = env.reset() 11 | env.render() 12 | 13 | # Plan 14 | learner = QLearning(env) 15 | Q, _ = learner.run(num_episodes=10000) 16 | # Plot cumulative rewards over episodes 17 | episodes, learning_curve = learner.get_learning_curve() 18 | plt.plot(episodes, smooth(learning_curve, weight=0.95), label='mc') 19 | plt.ylabel('episode_reward') 20 | plt.xlabel('episodes') 21 | plt.show() 22 | print(Q) 23 | 24 | # Execute 25 | policy = make_greedy_policy(Q, env.action_space.size) # after learning, switch to use greedy policy 26 | state, _ = env.reset() 27 | path = [] 28 | all_actions = env.action_space.get_all() 29 | while True: 30 | # choose action according to epsilon-greedy policy 31 | action_probs = policy(state) 32 | action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs) # choose action 33 | action = all_actions[action_idx] 34 | next_state, reward, term, trunc, info = env.step(action) 35 | 36 | print(state, action, next_state, reward) 37 | env.render() 38 | 39 | path.append(state) 40 | state = next_state 41 | 42 | if term or trunc: 43 | break 44 | 45 | env.render() 46 | -------------------------------------------------------------------------------- /examples/learning/test_sarsa.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from robotics_algorithm.env.classic_mdp.frozen_lake import FrozenLake 5 | from robotics_algorithm.learning.reinforcement_learning.sarsa import SARSA 6 | from robotics_algorithm.utils.math_utils import smooth 7 | from robotics_algorithm.utils.mdp_utils import make_greedy_policy 8 | 9 | env = FrozenLake(dense_reward=True) 10 | state, _ = env.reset() 11 | env.render() 12 | 13 | # Plan 14 | learner = SARSA(env) 15 | Q, _ = learner.run(num_episodes=10000) 16 | # Plot cumulative rewards over episodes 17 | episodes, learning_curve = learner.get_learning_curve() 18 | plt.plot(episodes, smooth(learning_curve, weight=0.95), label='mc') 19 | plt.ylabel('episode_reward') 20 | plt.xlabel('episodes') 21 | plt.show() 22 | print(Q) 23 | 24 | # Execute 25 | policy = make_greedy_policy(Q, env.action_space.size) # after learning, switch to use greedy policy 26 | state, _ = env.reset() 27 | path = [] 28 | all_actions = env.action_space.get_all() 29 | while True: 30 | # choose action according to epsilon-greedy policy 31 | action_probs = policy(state) 32 | action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs) # choose action 33 | action = all_actions[action_idx] 34 | next_state, reward, term, trunc, info = env.step(action) 35 | 36 | print(state, action, next_state, reward) 37 | env.render() 38 | 39 | path.append(state) 40 | state = next_state 41 | 42 | if term or trunc: 43 | break 44 | 45 | env.render() 46 | -------------------------------------------------------------------------------- /examples/planning/mdp/test_mcts.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.classic_mdp.frozen_lake import FrozenLake 4 | from robotics_algorithm.env.classic_mdp.cliff_walking import CliffWalking 5 | from robotics_algorithm.planning.mdp.mcts import MCTS 6 | 7 | env = FrozenLake(dense_reward=True) # For online tree search, dense reward needs to be enabled. 8 | # env = CliffWalking(dense_reward=True) # For online tree search, dense reward needs to be enabled. 9 | state, _ = env.reset() 10 | env.render() 11 | 12 | planner = MCTS(env) 13 | 14 | # Execute 15 | path = [] 16 | while True: 17 | # Call tree search online 18 | action = planner.run(state) 19 | new_state, reward, term, trunc, info = env.step(action) 20 | 21 | print(state, action, new_state, reward) 22 | 23 | env.render() 24 | 25 | path.append(state) 26 | state = new_state 27 | 28 | if term or trunc: 29 | break 30 | 31 | # env.add_path(path) 32 | env.render() 33 | -------------------------------------------------------------------------------- /examples/planning/mdp/test_policy_iteration.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.classic_mdp.windy_grid_world import WindyGridWorld 4 | from robotics_algorithm.env.classic_mdp.cliff_walking import CliffWalking 5 | from robotics_algorithm.env.classic_mdp.frozen_lake import FrozenLake 6 | from robotics_algorithm.planning.mdp.policy_iteration import PolicyIteration 7 | 8 | envs = [ 9 | FrozenLake(dense_reward=True), 10 | CliffWalking(), 11 | WindyGridWorld() 12 | ] 13 | 14 | for env in envs: 15 | state, _ = env.reset() 16 | env.render() 17 | 18 | # Plan 19 | planner = PolicyIteration(env) 20 | Q, policy = planner.run() 21 | 22 | print(Q) 23 | 24 | # Execute 25 | path = [] 26 | all_actions = env.action_space.get_all() 27 | while True: 28 | # choose action according to epsilon-greedy policy 29 | action_probs = policy(state) 30 | action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs) # choose action 31 | action = all_actions[action_idx] 32 | 33 | next_state, reward, term, trunc, info = env.step(action) 34 | env.render() 35 | 36 | print(state, action, reward) 37 | 38 | path.append(state) 39 | state = next_state 40 | 41 | if term or trunc: 42 | break 43 | 44 | # env.add_path(path) 45 | env.render() -------------------------------------------------------------------------------- /examples/planning/mdp/test_policy_tree_search.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.classic_mdp.frozen_lake import FrozenLake 4 | from robotics_algorithm.planning.mdp.policy_tree_search import PolicyTreeSearch 5 | 6 | 7 | env = FrozenLake(dense_reward=True) 8 | state, _ = env.reset() 9 | env.render() 10 | 11 | # Plan 12 | planner = PolicyTreeSearch(env, max_depth=5) 13 | 14 | # Execute 15 | path = [] 16 | while True: 17 | # choose action according to the computed policy 18 | action = planner.run(state) 19 | next_state, reward, term, trunc, info = env.step(action) 20 | env.render() 21 | 22 | print(state, action, next_state, reward) 23 | 24 | path.append(state) 25 | state = next_state 26 | 27 | if term or trunc: 28 | break 29 | 30 | # env.add_path(path) 31 | env.render() 32 | -------------------------------------------------------------------------------- /examples/planning/mdp/test_value_iteration.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.classic_mdp.windy_grid_world import WindyGridWorld 4 | from robotics_algorithm.env.classic_mdp.cliff_walking import CliffWalking 5 | from robotics_algorithm.env.classic_mdp.frozen_lake import FrozenLake 6 | from robotics_algorithm.planning.mdp.value_iteration import ValueIteration 7 | 8 | envs = [FrozenLake(dense_reward=True), CliffWalking(), WindyGridWorld()] 9 | 10 | for env in envs: 11 | state, _ = env.reset() 12 | env.render() 13 | 14 | # Plan 15 | planner = ValueIteration(env) 16 | Q, policy = planner.run() 17 | 18 | print(Q) 19 | 20 | # Execute 21 | path = [] 22 | all_actions = env.action_space.get_all() 23 | while True: 24 | # choose action according to epsilon-greedy policy 25 | action_probs = policy(state) 26 | action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs) # choose action 27 | action = all_actions[action_idx] 28 | 29 | next_state, reward, term, trunc, info = env.step(action) 30 | env.render() 31 | 32 | print(state, action, reward) 33 | 34 | path.append(state) 35 | state = next_state 36 | 37 | if term or trunc: 38 | break 39 | 40 | # env.add_path(path) 41 | env.render() 42 | -------------------------------------------------------------------------------- /examples/planning/motion_planning/test_prm.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | 5 | from robotics_algorithm.env.base_env import BaseEnv 6 | from robotics_algorithm.env.continuous_2d.omni_2d_planning import OmniDrive2DPlanning 7 | from robotics_algorithm.planning.motion_planning.prm import ProbabilisticRoadmap 8 | 9 | # Initialize environment 10 | env = OmniDrive2DPlanning() 11 | 12 | # -------- Settings ------------ 13 | FIX_MAZE = True 14 | 15 | 16 | # -------- Helper Functions ------------- 17 | def sample_func(env: BaseEnv): 18 | random_state = env.random_state() 19 | return random_state 20 | 21 | 22 | def is_state_valid(env: BaseEnv, state: np.ndarray): 23 | return env.is_state_valid(state) 24 | 25 | 26 | def is_edge_valid(env: BaseEnv, state1: np.ndarray, state2: np.ndarray): 27 | return env.is_state_transition_valid(state1, None, state2) 28 | 29 | 30 | # -------- Main Code ---------- 31 | # add default obstacles 32 | env.reset(random_env=not FIX_MAZE) 33 | env.render() 34 | 35 | # initialize planner 36 | planner = ProbabilisticRoadmap( 37 | env, sample_func, is_state_valid, is_edge_valid, num_of_samples=100, num_neighbors=10 38 | ) # 1000 samples out of total 2500 vertex. 39 | 40 | # offline portion of PRM 41 | start_time = time.time() 42 | planner.compute_roadmap() 43 | end_time = time.time() 44 | print('TestPRM, offline takes {} seconds'.format(end_time - start_time)) 45 | 46 | # run path planner 47 | start_time = time.time() 48 | start = env.start_state 49 | goal = env.goal_state 50 | res, shortest_path, shortest_path_len = planner.get_path(start, goal) 51 | end_time = time.time() 52 | print('TestPRM, online takes {} seconds'.format(end_time - start_time)) 53 | 54 | # visualize roadmap 55 | roadmap = planner.get_roadmap() 56 | for state in roadmap.nodes: 57 | if state != tuple(goal.tolist()) and state != tuple(start.tolist()): 58 | env.add_state_samples(state) 59 | 60 | if not res: 61 | print('TestPRM, no path is available!') 62 | else: 63 | # visualize path 64 | env.add_state_path(shortest_path, interpolate=True) 65 | print('TestPRM, found path of len {}'.format(shortest_path_len)) 66 | 67 | env.render() 68 | -------------------------------------------------------------------------------- /examples/planning/motion_planning/test_rrt.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | 4 | from robotics_algorithm.env.base_env import BaseEnv 5 | from robotics_algorithm.env.continuous_2d.omni_2d_planning import OmniDrive2DPlanning 6 | from robotics_algorithm.planning.motion_planning.rrt import RRT 7 | 8 | # Initialize environment 9 | env = OmniDrive2DPlanning() 10 | 11 | # -------- Settings ------------ 12 | FIX_MAZE = True 13 | 14 | 15 | # -------- Helper Functions ------------- 16 | def sample_func(env: BaseEnv): 17 | random_state = env.random_state() 18 | return random_state 19 | 20 | 21 | def is_state_valid(env: BaseEnv, state: np.ndarray): 22 | return env.is_state_valid(state) 23 | 24 | 25 | def is_edge_valid(env: BaseEnv, state1: np.ndarray, state2: np.ndarray): 26 | return env.is_state_transition_valid(state1, None, state2) 27 | 28 | 29 | def vertex_expand_func(env: BaseEnv, state1: np.ndarray, state2: np.ndarray): 30 | path = env.extend(np.array(state1), np.array(state2)) # TODO refactor. 31 | path_len = np.linalg.norm(np.array(path[-1]) - np.array(state1)) 32 | return path[-1], path_len 33 | 34 | 35 | # -------- Main Code ---------- 36 | # add default obstacles 37 | env.reset(random_env=not FIX_MAZE) 38 | env.render() 39 | 40 | # initialize planner 41 | planner = RRT(env, sample_func, vertex_expand_func, num_of_samples=500) 42 | 43 | # run path planner 44 | start = env.start_state 45 | goal = env.goal_state 46 | start_time = time.time() 47 | res, shortest_path, shortest_path_len = planner.run(start, goal) 48 | end_time = time.time() 49 | print('TestRRT, online takes {} seconds'.format(end_time - start_time)) 50 | 51 | # visualize tree 52 | tree = planner.get_tree() 53 | for state in tree.nodes: 54 | if state != tuple(goal.tolist()) and state != tuple(start.tolist()): 55 | env.add_state_samples(state) 56 | 57 | if not res: 58 | print('TestRRT, no path is available!') 59 | else: 60 | # visualize path 61 | env.add_state_path(shortest_path, interpolate=True) 62 | print('TestRRT, found path of len {}'.format(shortest_path_len)) 63 | 64 | env.render() 65 | -------------------------------------------------------------------------------- /examples/planning/motion_planning/test_rrt_connect.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | 4 | from robotics_algorithm.env.base_env import BaseEnv 5 | from robotics_algorithm.env.continuous_2d.omni_2d_planning import OmniDrive2DPlanning 6 | from robotics_algorithm.planning.motion_planning.rrt import RRTConnect 7 | 8 | # Initialize environment 9 | env = OmniDrive2DPlanning() 10 | 11 | # -------- Settings ------------ 12 | FIX_MAZE = True 13 | 14 | 15 | # -------- Helper Functions ------------- 16 | def sample_func(env: BaseEnv): 17 | random_state = env.random_state() 18 | return random_state 19 | 20 | 21 | def is_state_valid(env: BaseEnv, state: np.ndarray): 22 | return env.is_state_valid(state) 23 | 24 | 25 | def is_edge_valid(env: BaseEnv, state1: np.ndarray, state2: np.ndarray): 26 | return env.is_state_transition_valid(state1, None, state2) 27 | 28 | 29 | def vertex_expand_func(env: BaseEnv, state1: np.ndarray, state2: np.ndarray): 30 | path = env.extend(np.array(state1), np.array(state2)) # TODO refactor. 31 | path_len = np.linalg.norm(np.array(path[-1]) - np.array(state1)) 32 | return path[-1], path_len 33 | 34 | 35 | # -------- Main Code ---------- 36 | # add default obstacles 37 | env.reset(random_env=not FIX_MAZE) 38 | env.render() 39 | 40 | # initialize planner 41 | planner = RRTConnect(env, sample_func, vertex_expand_func, num_of_samples=500) 42 | 43 | # run path planner 44 | start = env.start_state 45 | goal = env.goal_state 46 | start_time = time.time() 47 | res, shortest_path, shortest_path_len = planner.run(start, goal) 48 | end_time = time.time() 49 | print('TestRRT, online takes {} seconds'.format(end_time - start_time)) 50 | 51 | # visualize tree 52 | tree = planner.get_tree() 53 | for state in tree.nodes: 54 | if state != tuple(goal.tolist()) and state != tuple(start.tolist()): 55 | env.add_state_samples(state) 56 | 57 | if not res: 58 | print('TestRRT, no path is available!') 59 | else: 60 | # visualize path 61 | env.add_state_path(shortest_path, interpolate=True) 62 | print('TestRRT, found path of len {}'.format(shortest_path_len)) 63 | 64 | env.render() 65 | -------------------------------------------------------------------------------- /examples/planning/motion_planning/test_rrt_star.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | 4 | from robotics_algorithm.env.base_env import BaseEnv 5 | from robotics_algorithm.env.continuous_2d.omni_2d_planning import OmniDrive2DPlanning 6 | from robotics_algorithm.planning.motion_planning.rrt_star import RRTStar 7 | 8 | # Initialize environment 9 | env = OmniDrive2DPlanning() 10 | 11 | # -------- Settings ------------ 12 | FIX_MAZE = True 13 | 14 | 15 | # -------- Helper Functions ------------- 16 | def sample_func(env: BaseEnv): 17 | random_state = env.random_state() 18 | return random_state 19 | 20 | 21 | def is_state_valid(env: BaseEnv, state: np.ndarray): 22 | return env.is_state_valid(state) 23 | 24 | 25 | def is_edge_valid(env: BaseEnv, state1: np.ndarray, state2: np.ndarray): 26 | return env.is_state_transition_valid(state1, None, state2) 27 | 28 | 29 | def vertex_expand_func(env: BaseEnv, state1: np.ndarray, state2: np.ndarray): 30 | path = env.extend(np.array(state1), np.array(state2)) # TODO refactor. 31 | path_len = np.linalg.norm(np.array(path[-1]) - np.array(state1)) 32 | return path[-1], path_len 33 | 34 | 35 | def distance_func(env, state1, state2): 36 | return np.linalg.norm(np.array(state1) - np.array(state2)) 37 | 38 | 39 | # -------- Main Code ---------- 40 | # add default obstacles 41 | env.reset(random_env=not FIX_MAZE) 42 | env.render() 43 | 44 | # initialize planner 45 | planner = RRTStar(env, sample_func, vertex_expand_func, is_edge_valid, distance_func, num_of_samples=1000) 46 | 47 | # run path planner 48 | start = env.start_state 49 | goal = env.goal_state 50 | start_time = time.time() 51 | res, shortest_path, shortest_path_len = planner.run(start, goal) 52 | end_time = time.time() 53 | print('TestRRTStar, online takes {} seconds'.format(end_time - start_time)) 54 | 55 | # visualize tree 56 | tree = planner.get_tree() 57 | for state in tree.nodes: 58 | if state != tuple(goal.tolist()) and state != tuple(start.tolist()): 59 | env.add_state_samples(state) 60 | 61 | if not res: 62 | print('TestRRTStar, no path is available!') 63 | else: 64 | # visualize path 65 | env.add_state_path(shortest_path, interpolate=True) 66 | print('TestRRTStar, found path of len {}'.format(shortest_path_len)) 67 | 68 | env.render() 69 | -------------------------------------------------------------------------------- /examples/planning/path_planning/test_a_star.py: -------------------------------------------------------------------------------- 1 | import time 2 | import math 3 | import numpy as np 4 | 5 | from robotics_algorithm.env.discrete_world_2d import GridWorldMaze 6 | from robotics_algorithm.planning.path_planning.a_star import AStar 7 | 8 | # Initialize environment 9 | env = GridWorldMaze() 10 | 11 | # -------- Settings ------------ 12 | FIX_MAZE = True 13 | 14 | 15 | # -------- Helper Functions ------------- 16 | def heuristic_func(v, goal): 17 | # simply the distance between v and goal 18 | v_x, v_y = v 19 | goal_x, goal_y = goal 20 | return math.sqrt((goal_x - v_x) ** 2 + (goal_y - v_y) ** 2) 21 | 22 | 23 | def state_key_func(state: np.ndarray): 24 | return tuple(state.tolist()) 25 | 26 | 27 | # -------- Main Code ---------- 28 | 29 | # add random obstacle to environment 30 | env.reset(random_env=not FIX_MAZE) 31 | env.render() 32 | 33 | 34 | # initialize planner 35 | planner = AStar(env, heuristic_func, state_key_func) 36 | 37 | # run path planner 38 | start = env.start_state 39 | goal = env.goal_state 40 | start_time = time.time() 41 | res, shortest_path, shortest_path_len = planner.run(start, goal) 42 | end_time = time.time() 43 | print("TestAStar, takes {} seconds".format(end_time - start_time)) 44 | 45 | if not res: 46 | print("TestAStar, no path is available!") 47 | else: 48 | print("TestAStar, found path of len {}".format(shortest_path_len)) 49 | # visualize path 50 | env.add_path(shortest_path) 51 | 52 | env.render() 53 | -------------------------------------------------------------------------------- /examples/planning/path_planning/test_dijkstra.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | 5 | from robotics_algorithm.env.discrete_world_2d import GridWorldMaze 6 | from robotics_algorithm.planning.path_planning.dijkstra import Dijkstra 7 | 8 | # Initialize environment 9 | env = GridWorldMaze() 10 | 11 | # -------- Settings ------------ 12 | FIX_MAZE = True 13 | 14 | 15 | # -------- Helper Functions ------------- 16 | def state_key_func(state: np.ndarray): 17 | return tuple(state.tolist()) 18 | 19 | 20 | # -------- Main Code ---------- 21 | 22 | # add random obstacle to environment 23 | env.reset(random_env=not FIX_MAZE) 24 | env.render() 25 | 26 | # initialize planner 27 | planner = Dijkstra(env, state_key_func) 28 | 29 | # run path planner 30 | start = env.start_state 31 | goal = env.goal_state 32 | start_time = time.time() 33 | res, shortest_path, shortest_path_len = planner.run(start, goal) 34 | end_time = time.time() 35 | print("TestDijkstra, takes {} seconds".format(end_time - start_time)) 36 | 37 | if not res: 38 | print("TestDijkstra, no path is available!") 39 | else: 40 | print("TestDijkstra, found path of len {}".format(shortest_path_len)) 41 | # visualize path 42 | env.add_path(shortest_path) 43 | 44 | env.render() 45 | -------------------------------------------------------------------------------- /examples/planning/path_planning/test_hybrid_a_star.py: -------------------------------------------------------------------------------- 1 | import math 2 | import time 3 | 4 | import numpy as np 5 | 6 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_planning import DiffDrive2DPlanning 7 | from robotics_algorithm.planning.path_planning.hybrid_a_star import HybridAStar 8 | 9 | # Initialize environment 10 | env = DiffDrive2DPlanning(discrete_action=True) # use discrete action for Hybrid A* 11 | 12 | # -------- Settings ------------ 13 | FIX_MAZE = True 14 | 15 | 16 | # -------- Helper Functions ------------- 17 | def heuristic_func(state, goal): 18 | # simply the Euclidean distance between v and goal, which is an underestimate of the actual SE2 distance 19 | v_x, v_y, _ = state 20 | goal_x, goal_y, _ = goal 21 | return math.sqrt((goal_x - v_x) ** 2 + (goal_y - v_y) ** 2) 22 | 23 | 24 | def state_key_func(state: np.ndarray): 25 | return env.calc_state_key(state) 26 | 27 | 28 | # -------- Main Code ---------- 29 | env.reset(random_env=not FIX_MAZE) 30 | start = env.start_state 31 | goal = env.goal_state 32 | env.render() 33 | 34 | # initialize planner 35 | planner = HybridAStar(env, heuristic_func, state_key_func) 36 | 37 | # run path planner 38 | start_time = time.time() 39 | res, shortest_path, shortest_path_len = planner.run(start, goal) 40 | end_time = time.time() 41 | print('TestHybridAStar, takes {} seconds'.format(end_time - start_time)) 42 | 43 | if not res: 44 | print('TestHybridAStar, no path is available!') 45 | else: 46 | print('TestHybridAStar, found path of len {}'.format(shortest_path_len)) 47 | # visualize path 48 | path = [] 49 | for v in shortest_path: 50 | path.append(v) 51 | 52 | env.add_action_path(path) 53 | 54 | env.render() 55 | -------------------------------------------------------------------------------- /examples/realisitic_examples/cost_aware_diff_drive_path_planning/path_with_cost_penalty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/examples/realisitic_examples/cost_aware_diff_drive_path_planning/path_with_cost_penalty.png -------------------------------------------------------------------------------- /examples/realisitic_examples/cost_aware_diff_drive_path_planning/path_without_cost_penalty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/examples/realisitic_examples/cost_aware_diff_drive_path_planning/path_without_cost_penalty.png -------------------------------------------------------------------------------- /examples/state_estimation/test_amcl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_localization import DiffDrive2DLocalization 5 | from robotics_algorithm.state_estimation.amcl import AMCL 6 | 7 | 8 | def spiral_velocity(spiral_radius, spiral_growth_rate, time, linear_velocity): 9 | """ 10 | Calculate the linear velocity and angular velocity of a differential robot to drive in a spiral. 11 | 12 | Args: 13 | spiral_radius (float): The initial radius of the spiral. 14 | spiral_growth_rate (float): The rate at which the spiral's radius grows. 15 | time (float): The current time. 16 | linear_velocity (float): The desired linear velocity of the robot. 17 | 18 | Returns: 19 | tuple: The linear velocity and angular velocity of the robot. 20 | """ 21 | # Calculate the current radius of the spiral 22 | current_radius = spiral_radius + spiral_growth_rate * time 23 | 24 | # Calculate the angular velocity 25 | angular_velocity = linear_velocity / current_radius 26 | 27 | return linear_velocity, angular_velocity 28 | 29 | 30 | # Initialize environment 31 | env = DiffDrive2DLocalization(action_dt=0.1, obs_noise_std=[0.1, 0.1, 0.1]) 32 | obs, _ = env.reset(empty=True) 33 | 34 | # Manually clamp env start state so that robot does not move outside of env when doing the spiral 35 | start_state = env.cur_state.copy() 36 | start_state[0] = min(max(env.size / 4.0, start_state[0]), env.size / 4.0 * 3.0) 37 | start_state[1] = min(max(env.size / 4.0, start_state[1]), env.size / 4.0 * 3.0) 38 | env.start_state = start_state 39 | env.cur_state = start_state 40 | obs = start_state 41 | env.render() 42 | 43 | # Initialize filter 44 | filter = AMCL(env, min_particles=50, max_particles=500) 45 | print(filter.num_of_cur_particles) 46 | # filter.set_initial_state(env.cur_state) 47 | 48 | # Add initial state 49 | # Step env with random actions 50 | true_states = [] 51 | filter_states = [] 52 | obss = [] 53 | true_states.append(env.cur_state) 54 | filter_states.append(filter.get_state()) 55 | obss.append(obs) 56 | 57 | max_steps = 500 58 | num_particles = [filter.num_of_cur_particles] 59 | for i in range(max_steps): 60 | print(f'step: {i}/{max_steps}') 61 | # action = [random.uniform(0.0, 0.5), random.uniform(0, 0.5)] 62 | action = spiral_velocity(1.0, 0.01, i * env.action_dt, 0.2) 63 | new_obs, reward, term, trunc, info = env.step(action) 64 | 65 | filter.run(action, new_obs) 66 | 67 | true_states.append(env.cur_state) 68 | filter_states.append(filter.get_state()) 69 | obss.append(new_obs) 70 | num_particles.append(filter.num_of_cur_particles) 71 | 72 | if term or trunc: 73 | break 74 | 75 | # print(true_states) 76 | # print(filter_states) 77 | 78 | # calculate RMSE 79 | true_states = np.array(true_states) 80 | filter_states = np.array(filter_states) 81 | 82 | rmse = np.sqrt(np.mean((true_states - filter_states) ** 2, axis=0)) 83 | print('RMSE: {}'.format(rmse)) 84 | 85 | env.add_state_path(true_states, id='groundtruth') 86 | env.add_state_path(obss, id='observed') 87 | env.add_state_path(filter_states, id='predicted') 88 | env.render() 89 | 90 | # Plot error and num_particles over time. 91 | error = np.linalg.norm(true_states - filter_states, axis=-1) 92 | fig, ax = plt.subplots(2, 1) 93 | ax[0].plot(error) 94 | ax[0].set_title('Error over time') 95 | ax[1].plot(num_particles) 96 | ax[1].set_title('Number of particles over time') 97 | plt.show() 98 | -------------------------------------------------------------------------------- /examples/state_estimation/test_discrete_bayes_filter.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from robotics_algorithm.env.discrete_world_1d import DiscreteWorld1D 5 | from robotics_algorithm.state_estimation.discrete_bayes_filter import DiscreteBayesFilter 6 | 7 | # env 8 | env = DiscreteWorld1D() 9 | obs, _ = env.reset() 10 | env.render() 11 | 12 | # filter 13 | filter = DiscreteBayesFilter(env) 14 | filter.set_initial_state(env.cur_state) 15 | 16 | # Step env with random actions 17 | true_states = [] 18 | filter_states = [] 19 | obss = [] 20 | true_states.append(env.cur_state) 21 | filter_states.append(filter.get_state()) 22 | obss.append(obs) 23 | max_steps = 100 24 | for i in range(max_steps): 25 | action = env.action_space.sample() 26 | new_obs, reward, term, trunc, info = env.step(action) 27 | 28 | filter.run(action, new_obs) 29 | 30 | true_states.append(env.cur_state) 31 | filter_states.append(filter.get_state()) 32 | obss.append(new_obs) 33 | 34 | if term or trunc: 35 | break 36 | 37 | 38 | # calculate RMSE 39 | true_states = np.array(true_states) 40 | filter_states = np.array(filter_states) 41 | rmse = np.sqrt(np.mean((true_states - filter_states) ** 2, axis=0)) 42 | print("RMSE: {}".format(rmse)) 43 | 44 | # plot result 45 | env_steps = len(true_states) 46 | t = np.arange(env_steps) 47 | fig = plt.figure() 48 | plt.plot(t, [true_states[i][0] for i in range(env_steps)], "k", label="groundtruth") 49 | plt.plot(t, [filter_states[i][0] for i in range(env_steps)], "b", label="predicted") 50 | plt.plot(t, [obss[i][0] for i in range(env_steps)], "r", label="observed") 51 | plt.ylabel("position") 52 | plt.legend() 53 | plt.show() -------------------------------------------------------------------------------- /examples/state_estimation/test_extended_kalman_filter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_localization import DiffDrive2DLocalization 5 | from robotics_algorithm.state_estimation.extended_kalman_filter import ExtendedKalmanFilter 6 | 7 | 8 | def spiral_velocity(spiral_radius, spiral_growth_rate, time, linear_velocity): 9 | """ 10 | Calculate the linear velocity and angular velocity of a differential robot to drive in a spiral. 11 | 12 | Args: 13 | spiral_radius (float): The initial radius of the spiral. 14 | spiral_growth_rate (float): The rate at which the spiral's radius grows. 15 | time (float): The current time. 16 | linear_velocity (float): The desired linear velocity of the robot. 17 | 18 | Returns: 19 | tuple: The linear velocity and angular velocity of the robot. 20 | """ 21 | # Calculate the current radius of the spiral 22 | current_radius = spiral_radius + spiral_growth_rate * time 23 | 24 | # Calculate the angular velocity 25 | angular_velocity = linear_velocity / current_radius 26 | 27 | return linear_velocity, angular_velocity 28 | 29 | 30 | # Initialize environment 31 | env = DiffDrive2DLocalization(action_dt=0.1, obs_noise_std=[0.1, 0.1, 0.1]) 32 | obs, _ = env.reset(empty=True) 33 | 34 | # Manually clamp env start state so that robot does not move outside of env when doing the spiral 35 | start_state = env.cur_state.copy() 36 | start_state[0] = min(max(env.size / 4.0, start_state[0]), env.size / 4.0 * 3.0) 37 | start_state[1] = min(max(env.size / 4.0, start_state[1]), env.size / 4.0 * 3.0) 38 | env.start_state = start_state 39 | env.cur_state = start_state 40 | obs = start_state 41 | env.render() 42 | 43 | # Initialize filter 44 | filter = ExtendedKalmanFilter(env) 45 | # filter.set_initial_state(env.cur_state) 46 | 47 | # Add initial state 48 | # Step env with random actions 49 | true_states = [] 50 | filter_states = [] 51 | obss = [] 52 | true_states.append(env.cur_state) 53 | filter_states.append(filter.get_state()) 54 | obss.append(obs) 55 | 56 | max_steps = 500 57 | for i in range(max_steps): 58 | print(f'step: {i}/{max_steps}') 59 | # action = [random.uniform(0.0, 0.5), random.uniform(0, 0.5)] 60 | action = spiral_velocity(1.0, 0.01, i * env.action_dt, 0.2) 61 | new_obs, reward, term, trunc, info = env.step(action) 62 | 63 | filter.run(action, new_obs) 64 | 65 | true_states.append(env.cur_state) 66 | filter_states.append(filter.get_state()) 67 | obss.append(new_obs) 68 | 69 | if term or trunc: 70 | break 71 | 72 | # print(true_states) 73 | # print(filter_states) 74 | 75 | # calculate RMSE 76 | true_states = np.array(true_states) 77 | filter_states = np.array(filter_states) 78 | 79 | rmse = np.sqrt(np.mean((true_states - filter_states) ** 2, axis=0)) 80 | print('RMSE: {}'.format(rmse)) 81 | 82 | env.add_state_path(true_states, id='groundtruth') 83 | env.add_state_path(obss, id='observed') 84 | env.add_state_path(filter_states, id='predicted') 85 | env.render() 86 | 87 | # Plot error over time. 88 | error = np.linalg.norm(true_states - filter_states, axis=-1) 89 | plt.plot(error) 90 | plt.title('Error over time') 91 | plt.show() 92 | -------------------------------------------------------------------------------- /examples/state_estimation/test_kalman_filter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import matplotlib.pyplot as plt 4 | 5 | from robotics_algorithm.env.continuous_1d.double_integrator_env import DoubleIntegratorEnv 6 | from robotics_algorithm.state_estimation.kalman_filter import KalmanFilter 7 | 8 | # Env 9 | env = DoubleIntegratorEnv(observation_noise_std=0.1, state_transition_noise_std=0.1) 10 | obs, _ = env.reset() 11 | env.render() 12 | 13 | # Create filter 14 | filter = KalmanFilter(env) 15 | filter.set_initial_state(env.cur_state) 16 | 17 | # Step env with random actions 18 | true_states = [] 19 | filter_states = [] 20 | obss = [] 21 | true_states.append(env.cur_state) 22 | filter_states.append(filter.get_state()) 23 | obss.append(obs) 24 | max_steps = 100 25 | for i in range(max_steps): 26 | action = [random.uniform(-1.0, 1.0)] 27 | new_obs, reward, term, trunc, info = env.step(action) 28 | 29 | filter.run(action, new_obs) 30 | 31 | true_states.append(env.cur_state) 32 | filter_states.append(filter.get_state()) 33 | obss.append(new_obs) 34 | 35 | if term or trunc: 36 | break 37 | 38 | 39 | # calculate RMSE 40 | true_states = np.array(true_states) 41 | filter_states = np.array(filter_states) 42 | rmse = np.sqrt(np.mean((true_states - filter_states) ** 2, axis=0)) 43 | print("RMSE: {}".format(rmse)) 44 | 45 | # plot result 46 | env_steps = len(true_states) 47 | t = np.arange(env_steps) 48 | fig = plt.figure() 49 | plt.plot(t, [true_states[i][0] for i in range(env_steps)], "k", label="groundtruth") 50 | plt.plot(t, [filter_states[i][0] for i in range(env_steps)], "b", label="predicted") 51 | plt.plot(t, [obss[i][0] for i in range(env_steps)], "r", label="observed") 52 | plt.ylabel("position") 53 | plt.legend() 54 | plt.show() 55 | -------------------------------------------------------------------------------- /examples/state_estimation/test_particle_filter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_localization import DiffDrive2DLocalization 5 | from robotics_algorithm.state_estimation.particle_filter import ParticleFilter 6 | 7 | 8 | def spiral_velocity(spiral_radius, spiral_growth_rate, time, linear_velocity): 9 | """ 10 | Calculate the linear velocity and angular velocity of a differential robot to drive in a spiral. 11 | 12 | Args: 13 | spiral_radius (float): The initial radius of the spiral. 14 | spiral_growth_rate (float): The rate at which the spiral's radius grows. 15 | time (float): The current time. 16 | linear_velocity (float): The desired linear velocity of the robot. 17 | 18 | Returns: 19 | tuple: The linear velocity and angular velocity of the robot. 20 | """ 21 | # Calculate the current radius of the spiral 22 | current_radius = spiral_radius + spiral_growth_rate * time 23 | 24 | # Calculate the angular velocity 25 | angular_velocity = linear_velocity / current_radius 26 | 27 | return linear_velocity, angular_velocity 28 | 29 | 30 | # Initialize environment 31 | env = DiffDrive2DLocalization(action_dt=0.1, obs_noise_std=[0.1, 0.1, 0.1]) 32 | obs, _ = env.reset(empty=True) 33 | 34 | # Manually clamp env start state so that robot does not move outside of env when doing the spiral 35 | start_state = env.cur_state.copy() 36 | start_state[0] = min(max(env.size / 4.0, start_state[0]), env.size / 4.0 * 3.0) 37 | start_state[1] = min(max(env.size / 4.0, start_state[1]), env.size / 4.0 * 3.0) 38 | env.start_state = start_state 39 | env.cur_state = start_state 40 | obs = start_state 41 | env.render() 42 | 43 | # Initialize filter 44 | filter = ParticleFilter(env, num_of_particles=250) 45 | # filter.set_initial_state(env.cur_state) 46 | 47 | # Add initial state 48 | # Step env with random actions 49 | true_states = [] 50 | filter_states = [] 51 | obss = [] 52 | true_states.append(env.cur_state) 53 | filter_states.append(filter.get_state()) 54 | obss.append(obs) 55 | 56 | max_steps = 500 57 | for i in range(max_steps): 58 | print(f'step: {i}/{max_steps}') 59 | # action = [random.uniform(0.0, 0.5), random.uniform(0, 0.5)] 60 | action = spiral_velocity(1.0, 0.01, i * env.action_dt, 0.2) 61 | new_obs, reward, term, trunc, info = env.step(action) 62 | 63 | filter.run(action, new_obs) 64 | 65 | true_states.append(env.cur_state) 66 | filter_states.append(filter.get_state()) 67 | obss.append(new_obs) 68 | 69 | if term or trunc: 70 | break 71 | 72 | # print(true_states) 73 | # print(filter_states) 74 | 75 | # calculate RMSE 76 | true_states = np.array(true_states) 77 | filter_states = np.array(filter_states) 78 | 79 | rmse = np.sqrt(np.mean((true_states - filter_states) ** 2, axis=0)) 80 | print('RMSE: {}'.format(rmse)) 81 | 82 | env.add_state_path(true_states, id='groundtruth') 83 | env.add_state_path(obss, id='observed') 84 | env.add_state_path(filter_states, id='predicted') 85 | env.render() 86 | 87 | # Plot error over time. 88 | error = np.linalg.norm(true_states - filter_states, axis=-1) 89 | plt.plot(error) 90 | plt.title('Error over time') 91 | plt.show() 92 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "robotics_algorithm" 7 | version = "0.11.2" 8 | dependencies = [ 9 | "numpy", 10 | "matplotlib", 11 | "scikit-learn", 12 | "typing_extensions", 13 | "networkx", 14 | "pygame", 15 | "cvxpy", 16 | ] 17 | requires-python = ">=3.10" 18 | authors = [{ name = "Lu Yunfan", email = "luyunfan44@gmail.com" }] 19 | maintainers = [{ name = "Lu Yunfan", email = "luyunfan44@gmail.com" }] 20 | description = "A collection of robotics algorithms" 21 | readme = "README.md" 22 | license = "MIT" 23 | license-files = ["LICEN[CS]E.*"] 24 | 25 | [tool.ruff] 26 | line-length = 120 27 | 28 | [tool.ruff.format] 29 | quote-style = "single" 30 | indent-style = "space" 31 | docstring-code-format = true 32 | -------------------------------------------------------------------------------- /robotics_algorithm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/control/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/control/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/control/classical_control/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/control/classical_control/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/control/classical_control/pid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.base_env import BaseEnv, SpaceType 4 | 5 | 6 | class PID: 7 | """Implements Linear-quadratic regulator. 8 | 9 | # Given a system with linear state transition function and quadratic cost function, the optimal control strategy 10 | # can be computed analytically and has the form: u = -Kx 11 | """ 12 | 13 | def __init__(self, env: BaseEnv, goal_state=None, Kp: float = 1.0, Ki: float = 0.0, Kd: float = 0.0): 14 | """ 15 | State transition: 16 | """ 17 | assert env.state_space.type == SpaceType.CONTINUOUS.value 18 | assert env.action_space.type == SpaceType.CONTINUOUS.value 19 | 20 | self.env = env 21 | if goal_state is None: 22 | self.goal_state = np.array(env.goal_state, dtype=np.float32) 23 | else: 24 | self.goal_state = np.array(goal_state, dtype=np.float32) 25 | self.Kp = Kp 26 | self.Ki = Ki 27 | self.Kd = Kd 28 | 29 | self._prev_error = np.zeros_like(self.goal_state, dtype=np.float32) 30 | self._accum_error = np.zeros_like(self.goal_state, dtype=np.float32) 31 | 32 | def run(self, state: np.ndarray) -> np.ndarray: 33 | """Compute the current action based on the current state. 34 | 35 | Args: 36 | state (np.ndarray): current state. 37 | 38 | Returns: 39 | np.ndarray: current action 40 | """ 41 | state = np.array(state) 42 | 43 | error = self.goal_state - state 44 | delta_error = error - self._prev_error 45 | self._accum_error += error 46 | 47 | control = self.Kp * error + self.Kd * delta_error + self.Ki * self._accum_error 48 | 49 | self._prev_error = error 50 | 51 | return control 52 | -------------------------------------------------------------------------------- /robotics_algorithm/control/optimal_control/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/control/optimal_control/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/control/optimal_control/cem_mpc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.stats import truncnorm 3 | 4 | from robotics_algorithm.env.base_env import BaseEnv, SpaceType 5 | 6 | 7 | class CEMMPC: 8 | """Implements Cross-entropy method for model-predictive control. 9 | 10 | Assuming a Gaussian policy, the general idea is to sample a bunch of trajectories using env state transition model 11 | and get their respective cost. Then take the top-K best trajectories and update the policy's mean and std based 12 | on these trajectories. We then sample another set of trajectories based on the updated Gaussian policy. We iterate 13 | this process for certain number of times and return the best action in the current best trajectory. 14 | """ 15 | 16 | def __init__( 17 | self, 18 | env: BaseEnv, 19 | num_traj_samples: int = 40, 20 | num_elites: int = 10, 21 | sample_traj_len: int = 20, 22 | action_mean: float | np.ndarray = 0.0, 23 | action_std: float | np.ndarray = 1.0, 24 | opt_iter: int = 5, 25 | alpha: float = 0.1, 26 | ): 27 | """Constructor 28 | 29 | Args: 30 | env (BaseEnv): the env. 31 | num_traj_samples (int, optional): number of rollout trajectories to sample. Defaults to 40. 32 | num_elites (int, optional): number of best trajectories to use for estimating the next mean and std. 33 | Defaults to 10. 34 | sample_traj_len (int, optional): the length of each rollout trajectory. Defaults to 20. 35 | action_mean (float | np.ndarray, optional): the nominal mean of sampled action. Defaults to 0.0. 36 | action_std (float | np.ndarray, optional): the nominal std of sampled action. Defaults to 1.0. 37 | opt_iter (int, optional): number of optimization iteration. Defaults to 5. 38 | alpha (float, optional): the low pass filter gain for updating mean and std in each optimization iteration. 39 | Defaults to 0.1. 40 | """ 41 | assert env.state_space.type == SpaceType.CONTINUOUS.value 42 | assert env.action_space.type == SpaceType.CONTINUOUS.value 43 | 44 | self.env = env 45 | self.num_traj_samples = num_traj_samples 46 | self.num_elites = num_elites 47 | self.sample_traj_len = sample_traj_len 48 | self.opt_iter = opt_iter 49 | self.alpha = alpha 50 | 51 | self.nominal_action_mean = np.array(action_mean) 52 | self.nominal_action_std = np.array(action_std) 53 | self.action_seq_mean = np.tile(self.nominal_action_mean, (self.sample_traj_len, 1)) 54 | self.action_seq_std = np.tile(self.nominal_action_std, (self.sample_traj_len, 1)) 55 | 56 | def run(self, state: np.ndarray) -> np.ndarray: 57 | """Compute the current action given current state 58 | 59 | Args: 60 | state (np.ndarray): current state 61 | 62 | Returns: 63 | np.ndarray: current action 64 | """ 65 | for _ in range(self.opt_iter): 66 | # Sample trajectories 67 | all_costs = np.zeros((self.num_traj_samples)) 68 | all_traj_rollouts = np.zeros((self.num_traj_samples, *self.action_seq_mean.shape)) 69 | 70 | # Sample actions in a batch [K, N, Action_dim] 71 | sampled_action_seq = self._sample_action_seq() 72 | 73 | # Perform rollout 74 | for k in range(self.num_traj_samples): 75 | cur_state = state 76 | total_cost = 0 77 | for t in range(self.sample_traj_len): 78 | action = sampled_action_seq[k, t] 79 | new_state, reward, term, trunc, _ = self.env.sample_state_transition(cur_state, action) 80 | cost = -reward 81 | 82 | total_cost += cost 83 | cur_state = new_state 84 | all_traj_rollouts[k, t] = sampled_action_seq[k, t] 85 | 86 | if term or trunc: 87 | break 88 | 89 | all_costs[k] = total_cost 90 | 91 | # Update policy distribution based on elite trajectories 92 | self._update_distribution(all_traj_rollouts, all_costs) 93 | 94 | # After enough optimization iterations, 95 | # Get the best trajectory 96 | best_traj_idx = np.argmin(all_costs) 97 | self.best_traj = all_traj_rollouts[best_traj_idx] # For debug purpose 98 | 99 | # update previous control sequence, shift 1 step to the left 100 | action_mean = np.copy(self.action_seq_mean) 101 | self.action_seq_mean[:-1] = action_mean[1:] 102 | self.action_seq_mean[-1] = action_mean[-1] 103 | 104 | # reset std to nominal 105 | self.action_seq_std = np.copy(self.nominal_action_std) 106 | 107 | # Return the best traj's first action 108 | return self.best_traj[0] 109 | 110 | def _sample_action_seq(self): 111 | m = self.action_seq_mean[None, :] 112 | s = self.action_seq_std[None, :] 113 | samples = truncnorm.rvs( 114 | self.env.action_space.space[0], 115 | self.env.action_space.space[1], 116 | loc=m, 117 | scale=s, 118 | size=(self.num_traj_samples, *self.action_seq_mean.shape), 119 | ) 120 | # NOTE: truncnorm may still violate the bounds a little bit. 121 | samples = np.clip(samples, self.env.action_space.space[0], self.env.action_space.space[1]) 122 | return samples # shape: [K, N, Action_dim] 123 | 124 | def _update_distribution(self, all_traj_rollouts: np.ndarray, all_costs: np.ndarray): 125 | # Get elite parameters 126 | elite_idxs = np.array(all_costs).argsort()[: self.num_elites] 127 | elite_sequences = all_traj_rollouts[elite_idxs] 128 | 129 | # fit around mean of elites 130 | new_mean = elite_sequences.mean(axis=0) 131 | new_std = elite_sequences.std(axis=0) 132 | 133 | # Low pass filter 134 | self.action_seq_mean = (1 - self.alpha) * new_mean + self.alpha * self.action_seq_mean # [h,d] 135 | self.action_seq_std = (1 - self.alpha) * new_std + self.alpha * self.action_seq_std 136 | -------------------------------------------------------------------------------- /robotics_algorithm/control/optimal_control/convex_mpc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cvxpy 3 | import time 4 | 5 | from robotics_algorithm.env.base_env import BaseEnv, FunctionType, SpaceType 6 | from .lqr import LQR 7 | 8 | 9 | class ConvexMPC: 10 | """Implements Convex Model Predictive Control 11 | 12 | Use convex optimization to explicitly solve the optimal sequence of action that minimizes cost of future trajectory, 13 | subject to linear state transition constraints. 14 | """ 15 | 16 | def __init__(self, env: BaseEnv, horizon=100): 17 | """ 18 | Constructor 19 | 20 | Args: 21 | env (BaseEnv): the env 22 | horizon (int, optional): the lookahead distance. Defaults to 100. 23 | 24 | Raises: 25 | AssertionError: if env does not satisfy the assumptions of ConvexMPC 26 | """ 27 | assert env.state_space.type == SpaceType.CONTINUOUS.value 28 | assert env.action_space.type == SpaceType.CONTINUOUS.value 29 | assert ( 30 | env.state_transition_func_type == FunctionType.LINEAR.value 31 | or env.state_transition_func_type == FunctionType.GENERAL.value 32 | ) 33 | assert env.reward_func_type == FunctionType.QUADRATIC.value 34 | 35 | self.env = env 36 | self.horizon = horizon 37 | 38 | # ! Use LQR to calculate terminal cost. This makes sense as we assume after horizon, the system is brought 39 | # ! to good state where all constraints should be satisfied. 40 | self._lqr = LQR(env) 41 | 42 | # By default, default reference state and action is env.goal and zero action 43 | self.A, self.B = self.env.linearize_state_transition(env.goal_state, env.goal_action) 44 | 45 | # Set up default constraints on input and state 46 | self.default_constraints = [] 47 | self.x = cvxpy.Variable((self.horizon + 1, self.env.state_space.state_size)) 48 | self.u = cvxpy.Variable((self.horizon + 1, self.env.action_space.state_size)) 49 | 50 | def set_ref_state_action(self, ref_state, ref_action): 51 | """ 52 | Set the reference state and action for the current iteration. 53 | 54 | Args: 55 | ref_state (np.ndarray): the reference state 56 | ref_action (np.ndarray): the reference action 57 | """ 58 | self.A, self.B = self.env.linearize_state_transition(ref_state, ref_action) 59 | 60 | def get_decision_variables(self): 61 | return self.x, self.u 62 | 63 | def add_constraints(self, constraint): 64 | if isinstance(constraint, list): 65 | self.default_constraints += constraint 66 | else: 67 | self.default_constraints.append(constraint) 68 | 69 | def run(self, state: np.ndarray) -> np.ndarray: 70 | """Compute the current action based on the current state. 71 | 72 | Args: 73 | state (np.ndarray): current state. 74 | 75 | Returns: 76 | np.ndarray: current action 77 | """ 78 | A, B = self.A, self.B 79 | Q, R = self.env.Q, self.env.R 80 | 81 | # use lqr to get terminal cost 82 | P = self._lqr._solve_dare(A, B, Q, R) 83 | 84 | constr = self.default_constraints.copy() 85 | cost = 0.0 86 | for t in range(self.horizon): 87 | cost += cvxpy.quad_form(self.x[t], Q) 88 | cost += cvxpy.quad_form(self.u[t], R) 89 | constr += [self.x[t + 1].T == A @ self.x[t] + B @ self.u[t]] 90 | 91 | # default constraints on state space and action space size 92 | # constr += [self.x[t] <= self.env.state_space.high, self.x[t] >= self.env.state_space.low] 93 | # constr += [self.u[t] <= self.env.action_space.high, self.u[t] >= self.env.action_space.low] 94 | 95 | cost += cvxpy.quad_form(self.x[self.horizon], P) # LQR terminal cost 96 | constr += [self.x[0] == state] 97 | prob = cvxpy.Problem(cvxpy.Minimize(cost), constr) 98 | 99 | start = time.time() 100 | prob.solve(verbose=False) 101 | elapsed_time = time.time() - start 102 | print(f'calc time:{elapsed_time:.6f} [sec]') 103 | 104 | print('status:', prob.status) 105 | # print("optimal value", prob.value) 106 | # print('optimal var') 107 | # print(self.x.value) 108 | # print(self.u.value) 109 | 110 | return self.u.value[0] 111 | -------------------------------------------------------------------------------- /robotics_algorithm/control/optimal_control/lqr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy 3 | import scipy.linalg 4 | 5 | from robotics_algorithm.env.base_env import BaseEnv, FunctionType, SpaceType 6 | 7 | 8 | class LQR: 9 | """Implements Linear-quadratic regulator. 10 | 11 | Given a system with linear state transition function and quadratic cost function, the optimal control strategy 12 | can be computed analytically and has the form: u = -Kx 13 | """ 14 | def __init__(self, env: BaseEnv, discrete_time=True, horizon=float("inf"), solve_by_iteration=False): 15 | """ 16 | State transition: 17 | x_dot = A * X + B * U 18 | y = x (full state feedback) 19 | J = sum_over_time(X_T*Q*X + U_T*R*U) 20 | """ 21 | assert env.state_space.type == SpaceType.CONTINUOUS.value 22 | assert env.action_space.type == SpaceType.CONTINUOUS.value 23 | # Can be general because we can linearize dynamics around fixed point. 24 | assert env.state_transition_func_type == FunctionType.LINEAR.value or env.state_transition_func_type == FunctionType.GENERAL.value 25 | assert env.reward_func_type == FunctionType.QUADRATIC.value 26 | 27 | self.env = env 28 | self.discrete_time = discrete_time 29 | self.horizon = horizon 30 | self.solve_by_iteration = solve_by_iteration 31 | 32 | # By default, default reference state and action is env.goal and zero action 33 | self.A, self.B = self.env.linearize_state_transition(env.goal_state, env.goal_action) 34 | 35 | def set_ref_state_action(self, ref_state, ref_action): 36 | self.A, self.B = self.env.linearize_state_transition(ref_state, ref_action) 37 | 38 | def run(self, state: np.ndarray) -> np.ndarray: 39 | """Compute the current action based on the current state. 40 | 41 | Args: 42 | state (np.ndarray): current state. 43 | 44 | Returns: 45 | np.ndarray: current action 46 | """ 47 | A, B = self.A, self.B 48 | Q, R = self.env.Q, self.env.R 49 | state = np.array(state).reshape(-1, 1) 50 | 51 | if self.discrete_time: 52 | P = self._solve_dare(A, B, Q, R) 53 | K = np.linalg.inv(R + B.T @ P @ B) @ B.T @ P 54 | else: 55 | P = self._solve_care(A, B, Q, R) 56 | K = np.linalg.inv(R) @ B.T @ P 57 | 58 | u = -K @ state 59 | return u.reshape(-1) 60 | 61 | def _solve_care(self, A, B, Q, R): 62 | return scipy.linalg.solve_continuous_are(A, B, Q, R) 63 | 64 | def _solve_dare(self, A, B, Q, R): 65 | # we can call scipy library. 66 | if self.horizon == float("inf") or not self.solve_by_iteration: 67 | return scipy.linalg.solve_discrete_are(A, B, Q, R) 68 | 69 | # Alternatively, discrete-time algebraic Riccati equation can be solved by iteration 70 | 71 | # lists for P that contains P_t 72 | P = [Q] 73 | 74 | # work backwards in time from horizon to 0 75 | for _ in range(self.horizon): 76 | # calculate K_t 77 | # K_t = -(R+B^T*P_(t+1)*B)^(-1)*B^T*P_(t+1)*A 78 | K_t = np.linalg.inv(R + B.T @ P[-1] @ B) @ B.T @ P[-1] @ A 79 | 80 | # calculate P_t 81 | # P_t = Q + A^T*P_(t+1)*A - A^T*P_(t+1)*B*(R + B^T P_(t+1) B)^(-1) B^T*P_(t+1)*A 82 | P_t = Q + A.T @ P[-1] @ A - A.T @ P[-1] @ B @ K_t 83 | 84 | # add our calculations to the beginning of the lists (since we are working backwards) 85 | P.append(P_t) 86 | 87 | # P_N = Q_f 88 | # P.insert(0, Q) 89 | 90 | # K_N = 0 91 | # K.insert(0, 0) 92 | 93 | return P[-1] 94 | -------------------------------------------------------------------------------- /robotics_algorithm/control/path_follow_control/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/control/path_follow_control/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/control/path_follow_control/dwa.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_control import DiffDrive2DControl 4 | 5 | 6 | class DWA: 7 | """Implements Dynamic Window Approach for path follow. 8 | 9 | paper https://www.ri.cmu.edu/pub_files/pub1/fox_dieter_1997_1/fox_dieter_1997_1.pdf 10 | 11 | The basic idea of the Dynamic Window Approach (DWA) algorithm is as follows: 12 | 13 | - Discretely sample in the robot's control space 14 | - For each sampled velocity, perform forward simulation from the robot's current state to predict what would happen if the sampled velocity were applied for some (short) period of time. 15 | - Evaluate (score) each trajectory resulting from the forward simulation. 16 | - Pick the highest-scoring trajectory and send the associated velocity to the mobile base. 17 | - Rinse and repeat. 18 | """ 19 | 20 | def __init__( 21 | self, 22 | env: DiffDrive2DControl, 23 | min_lin_vel: float = 0.0, 24 | max_lin_vel: float = 0.5, 25 | lin_vel_samples: int = 10, 26 | min_ang_vel: float = -1.0, 27 | max_ang_vel: float = 1.0, 28 | ang_vel_samples: int = 20, 29 | simulate_time: float = 0.5, 30 | ) -> None: 31 | """ 32 | Constructor 33 | 34 | Args: 35 | env (DiffDrive2DControl): The environment. 36 | min_lin_vel (float): Minimum linear velocity. 37 | max_lin_vel (float): Maximum linear velocity. 38 | lin_vel_samples (int): Number of linear velocity samples. 39 | min_ang_vel (float): Minimum angular velocity. 40 | max_ang_vel (float): Maximum angular velocity. 41 | ang_vel_samples (int): Number of angular velocity samples. 42 | simulate_time (float): Simulation time. 43 | """ 44 | assert isinstance(env, DiffDrive2DControl), 'env must be a DiffDrive2DControl' 45 | 46 | self.env = env 47 | self._min_lin_vel = min_lin_vel 48 | self._max_lin_vel = max_lin_vel 49 | self._lin_vel_steps = (max_lin_vel - min_lin_vel) / lin_vel_samples 50 | self._min_ang_vel = min_ang_vel 51 | self._max_ang_vel = max_ang_vel 52 | self._ang_vel_steps = (max_ang_vel - min_ang_vel) / ang_vel_samples 53 | self._sim_steps = int(simulate_time / env.action_dt) 54 | 55 | self.sampled_action_seq = [] 56 | self._sample_action_seq() 57 | 58 | def _sample_action_seq(self): 59 | for lin_vel in np.arange(self._min_lin_vel, self._max_lin_vel, self._lin_vel_steps): 60 | for ang_vel in np.arange(self._min_ang_vel, self._max_ang_vel, self._ang_vel_steps): 61 | self.sampled_action_seq.append([lin_vel, ang_vel]) 62 | 63 | def run(self, state: np.ndarray) -> np.ndarray: 64 | """Compute the current action given current state 65 | 66 | This function uses Dynamic Window Approach to select the best action given the current state. 67 | 68 | Args: 69 | state (np.ndarray): current state 70 | 71 | Returns: 72 | np.ndarray: current action 73 | """ 74 | best_cost = np.inf 75 | best_action = np.array([0.0, 0.0]) 76 | self.best_traj = [state] 77 | 78 | # Iterate over all possible actions 79 | for lin_vel, ang_vel in self.sampled_action_seq: 80 | cur_state = state 81 | total_cost = 0 82 | 83 | # * The action stays constant for self._sim_steps because we do not enforce acceleration limits. 84 | # * If acceleration limits are to be enforced, here we need to slowly accelerate/decelerate to sampled 85 | # * velocity (like ROS2). 86 | action = np.array([lin_vel, ang_vel]) 87 | traj = [cur_state] 88 | for _ in range(self._sim_steps): 89 | # Take one step in the simulation 90 | new_state, reward, term, trunc, _ = self.env.sample_state_transition(cur_state, action) 91 | cost = -reward 92 | 93 | total_cost += cost 94 | cur_state = new_state 95 | traj.append(new_state) 96 | 97 | # If the simulation is terminated, break 98 | if term or trunc: 99 | break 100 | 101 | # Update the best cost and trajectory if the current one is better 102 | if total_cost < best_cost: 103 | best_cost = total_cost 104 | best_action = action 105 | self.best_traj = traj 106 | 107 | # Return the best action 108 | return best_action 109 | -------------------------------------------------------------------------------- /robotics_algorithm/env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/env/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/env/classic_mdp/cliff_walking.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from matplotlib import colors 4 | from typing_extensions import override 5 | 6 | from robotics_algorithm.env.base_env import MDPEnv, DiscreteSpace, DistributionType 7 | 8 | GRID_HEIGHT = 4 9 | GRID_WIDTH = 9 10 | OBSTACLES = [(1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0)] 11 | 12 | 13 | class CliffWalking(MDPEnv): 14 | """ 15 | A player is placed in grid world. The player should move from start to goal. If the player reaches the goal the 16 | episode ends. If the player moves to a cliff location the episode terminates with failure. 17 | During each move, the player has a chance of ending up in the left or right of the target grid. 18 | 19 | State: [x, y] 20 | Action: [up, down, left, right] 21 | 22 | Discrete state space. 23 | Discrete action space. 24 | Stochastic transition. 25 | Fully observable. 26 | """ 27 | 28 | def __init__( 29 | self, 30 | start: np.ndarray = np.array([0, 0]), 31 | goal: np.ndarray = np.array([8, 0]), 32 | obstacles: list[tuple] = OBSTACLES, 33 | dense_reward: bool = False, 34 | ): 35 | """Constructor. 36 | 37 | Args: 38 | start (tuple): the start position of agent. 39 | goal (tuple): the goal position. 40 | obstacles (list[tuple]): a list of obstacle positions. 41 | dense_reward (bool): whether to use dense reward for this env. 42 | """ 43 | super().__init__() 44 | 45 | # Define spaces 46 | self.state_space = DiscreteSpace([np.array([i, j]) for i in range(GRID_WIDTH) for j in range(GRID_HEIGHT)]) 47 | self.action_space = DiscreteSpace( 48 | [np.array([0]), np.array([1]), np.array([2]), np.array([3])] 49 | ) # action, 0: up, 1: right, 2: down, 3: left) 50 | self.state_transition_dist_type = DistributionType.CATEGORICAL.value 51 | 52 | self.start_state = start 53 | self.cur_state = start 54 | self.goal_state = goal 55 | self.obstacles = obstacles 56 | self.step_reward = -1 57 | self.obstacle_reward = -100 58 | self.goal_reward = 100 59 | self.path = [] 60 | self.dense_reward = dense_reward 61 | 62 | @override 63 | def reset(self): 64 | self.cur_state = self.start_state 65 | return self.cur_state, {} 66 | 67 | @override 68 | def state_transition_func(self, state: np.ndarray, action: np.ndarray) -> tuple[list[np.ndarray], list[float]]: 69 | i, j = state 70 | 71 | new_states = [] 72 | if action == 0: 73 | new_states.append(np.array([i, min(j + 1, GRID_HEIGHT - 1)])) 74 | new_states.append(np.array([max(0, i - 1), min(j + 1, GRID_HEIGHT - 1)])) 75 | new_states.append(np.array([min(i + 1, GRID_WIDTH - 1), min(j + 1, GRID_HEIGHT - 1)])) 76 | elif action == 1: 77 | new_states.append(np.array([min(i + 1, GRID_WIDTH - 1), j])) 78 | new_states.append(np.array([min(i + 1, GRID_WIDTH - 1), min(j + 1, GRID_HEIGHT - 1)])) 79 | new_states.append(np.array([min(i + 1, GRID_WIDTH - 1), max(0, j - 1)])) 80 | elif action == 2: 81 | new_states.append(np.array([i, max(0, j - 1)])) 82 | new_states.append(np.array([max(0, i - 1), max(0, j - 1)])) 83 | new_states.append(np.array([min(i + 1, GRID_WIDTH - 1), max(0, j - 1)])) 84 | elif action == 3: 85 | new_states.append(np.array([max(0, i - 1), j])) 86 | new_states.append(np.array([max(0, i - 1), min(j + 1, GRID_HEIGHT - 1)])) 87 | new_states.append(np.array([max(0, i - 1), max(0, j - 1)])) 88 | probs = [0.8, 0.1, 0.1] 89 | 90 | return new_states, probs 91 | 92 | @override 93 | def is_state_terminal(self, state): 94 | term = False 95 | if state[0] == self.goal_state[0] and state[1] == self.goal_state[1]: 96 | term = True 97 | 98 | elif tuple(state.tolist()) in self.obstacles: 99 | term = True 100 | 101 | return term 102 | 103 | @override 104 | def reward_func(self, state: np.ndarray, action: np.ndarray = None, new_state: np.ndarray = None) -> float: 105 | # R(s, s') 106 | # Transition to goal state gives goal reward. 107 | # Transition to obstacle gives obstacle reward. 108 | # Transition to free gives step reward. 109 | if np.allclose(new_state, self.goal_state): 110 | reward = self.goal_reward 111 | elif tuple(new_state.tolist()) in self.obstacles: 112 | reward = self.obstacle_reward 113 | else: 114 | if not self.dense_reward: 115 | reward = self.step_reward 116 | else: 117 | # Negative of dist as penalty. 118 | # This encourages moving to goal. 119 | reward = -(abs(new_state[0] - self.goal_state[0]) + abs(new_state[1] - self.goal_state[1])) * 0.1 120 | 121 | return reward 122 | 123 | def add_path(self, path): 124 | self.path = path 125 | 126 | @override 127 | def render(self): 128 | _, ax = plt.subplots() 129 | self.gridworld = np.full((GRID_HEIGHT, GRID_WIDTH), 0) 130 | 131 | for obstacle in self.obstacles: 132 | self.gridworld[GRID_HEIGHT - obstacle[1] - 1][obstacle[0]] = 1 133 | 134 | self.gridworld[GRID_HEIGHT - self.start_state[1] - 1][self.start_state[0]] = 2 135 | self.gridworld[GRID_HEIGHT - self.goal_state[1] - 1][self.goal_state[0]] = 3 136 | 137 | if len(self.path) > 0: 138 | for state in self.path: 139 | self.gridworld[GRID_HEIGHT - state[1] - 1][state[0]] = 4 140 | else: 141 | self.gridworld[GRID_HEIGHT - self.cur_state[1] - 1][self.cur_state[0]] = 4 142 | 143 | self.colour_map = colors.ListedColormap(['white', 'black', 'yellow', 'red', 'green']) 144 | bounds = [0, 1, 2, 3, 4, 5] 145 | self.norm = colors.BoundaryNorm(bounds, self.colour_map.N) 146 | 147 | ax.imshow(self.gridworld, cmap=self.colour_map, norm=self.norm) 148 | 149 | # draw gridlines 150 | ax.grid(which='major', axis='both', linestyle='-', color='k', linewidth=1) 151 | ax.set_xticks(np.arange(GRID_WIDTH) - 0.5) 152 | ax.set_xticklabels(np.array([str(i) for i in range(GRID_WIDTH)])) 153 | ax.set_yticks(np.flip(np.arange(GRID_HEIGHT) + 0.5)) 154 | ax.set_yticklabels(np.array([str(i) for i in range(GRID_HEIGHT)])) 155 | plt.tick_params(axis='both', labelsize=5, length=0) 156 | 157 | plt.show() 158 | -------------------------------------------------------------------------------- /robotics_algorithm/env/classic_mdp/windy_grid_world.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from matplotlib import colors 4 | from typing_extensions import override 5 | 6 | from robotics_algorithm.env.base_env import DiscreteSpace, DistributionType, MDPEnv 7 | 8 | GRID_HEIGHT = 7 9 | GRID_WIDTH = 10 10 | 11 | 12 | class WindyGridWorld(MDPEnv): 13 | """ 14 | Windy Gridworld is a standard gridworld with start and goal states. The difference is that there is a crosswind 15 | running upward through the middle of the grid. Actions are the standard four: up, right, down, and left. 16 | In the middle region the resultant next states are shifted upward by the "wind" which strength varies from column 17 | to column. The reward is -1 until goal state is reached. 18 | 19 | State: [x, y] 20 | Action: [up, down, left, right] 21 | 22 | Discrete state space. 23 | Discrete action space. 24 | Deterministic transition. 25 | Fully observable. 26 | """ 27 | 28 | def __init__( 29 | self, 30 | start: np.ndarray = np.array([0, 3]), 31 | goal: np.ndarray = np.array([7, 3]), 32 | wind: np.ndarray = [0, 0, 0, 1, 1, 1, 2, 2, 1, 0], 33 | ): 34 | """Constructor. 35 | 36 | Args: 37 | start (np.ndarray): the start position of agent. 38 | goal (np.ndarray): the goal position. 39 | wind (np.ndarray): the strength of upward wind in each horizontal position. 40 | The number decides how many grid cell the agent will be pushed upward. 41 | """ 42 | super().__init__() 43 | 44 | # Define spaces 45 | self.state_space = DiscreteSpace([np.array([i, j]) for i in range(GRID_WIDTH) for j in range(GRID_HEIGHT)]) 46 | self.action_space = DiscreteSpace( 47 | [np.array([0]), np.array([1]), np.array([2]), np.array([3])] 48 | ) # 0: up, 1: right, 2: down, 3: left 49 | self.state_transition_dist_type = DistributionType.CATEGORICAL.value 50 | 51 | self.start_state = start 52 | self.cur_state = start 53 | self.goal_state = goal 54 | self.wind = wind 55 | self.step_reward = -1 56 | self.goal_reward = 10 57 | self.path = [] 58 | 59 | @override 60 | def reset(self): 61 | self.cur_state = self.start_state 62 | return self.cur_state, {} 63 | 64 | @override 65 | def state_transition_func(self, state: np.ndarray, action: np.ndarray) -> tuple[list[np.ndarray], list[float]]: 66 | # NOTE: This environment has deterministic transition 67 | i, j = state 68 | 69 | new_states = [] 70 | if action == 0: 71 | next_state = np.array([i, min(j + 1 + self.wind[i], GRID_HEIGHT - 1)]) 72 | elif action == 1: 73 | new_i = min(i + 1, GRID_WIDTH - 1) 74 | next_state = np.array([new_i, min(j + self.wind[new_i], GRID_HEIGHT - 1)]) 75 | elif action == 2: 76 | next_state = np.array([i, max(0, min(j - 1 + self.wind[i], GRID_HEIGHT - 1))]) 77 | elif action == 3: 78 | new_i = max(i - 1, 0) 79 | next_state = np.array([new_i, min(j + self.wind[new_i], GRID_HEIGHT - 1)]) 80 | 81 | new_states.append(next_state) 82 | probs = [1.0] 83 | 84 | return new_states, probs 85 | 86 | @override 87 | def reward_func(self, state: np.ndarray, action: np.ndarray = None, new_state: np.ndarray = None) -> float: 88 | # R(s, s') 89 | # Transition to goal state gives goal reward. 90 | # Transition to free gives step reward. 91 | if np.allclose(new_state, self.goal_state): 92 | reward = 100 93 | reward = self.goal_reward 94 | else: 95 | reward = self.step_reward 96 | 97 | return reward 98 | 99 | def add_path(self, path): 100 | self.path = path 101 | 102 | @override 103 | def render(self): 104 | _, ax = plt.subplots() 105 | self.gridworld = np.full((GRID_HEIGHT, GRID_WIDTH), 0) 106 | 107 | self.gridworld[GRID_HEIGHT - self.start_state[1] - 1][self.start_state[0]] = 2 108 | self.gridworld[GRID_HEIGHT - self.goal_state[1] - 1][self.goal_state[0]] = 3 109 | 110 | if len(self.path) > 0: 111 | for state in self.path: 112 | self.gridworld[GRID_HEIGHT - state[1] - 1][state[0]] = 4 113 | else: 114 | self.gridworld[GRID_HEIGHT - self.cur_state[1] - 1][self.cur_state[0]] = 4 115 | 116 | self.colour_map = colors.ListedColormap(['white', 'black', 'yellow', 'red', 'green']) 117 | bounds = [0, 1, 2, 3, 4, 5] 118 | self.norm = colors.BoundaryNorm(bounds, self.colour_map.N) 119 | 120 | ax.imshow(self.gridworld, cmap=self.colour_map, norm=self.norm) 121 | 122 | # draw gridlines 123 | ax.grid(which='major', axis='both', linestyle='-', color='k', linewidth=1) 124 | ax.set_xticks(np.arange(GRID_WIDTH) - 0.5) 125 | ax.set_xticklabels(np.array([str(i) for i in range(GRID_WIDTH)])) 126 | ax.set_yticks(np.flip(np.arange(GRID_HEIGHT) + 0.5)) 127 | ax.set_yticklabels(np.array([str(i) for i in range(GRID_HEIGHT)])) 128 | plt.tick_params(axis='both', labelsize=5, length=0) 129 | 130 | plt.show() 131 | -------------------------------------------------------------------------------- /robotics_algorithm/env/continuous_1d/double_integrator_env.py: -------------------------------------------------------------------------------- 1 | from typing_extensions import override 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | from robotics_algorithm.robot.double_integrator import DoubleIntegrator 7 | from robotics_algorithm.env.base_env import ( 8 | ContinuousSpace, 9 | StochasticEnv, 10 | PartiallyObservableEnv, 11 | FunctionType, 12 | DistributionType, 13 | ) 14 | 15 | 16 | class DoubleIntegratorEnv(StochasticEnv, PartiallyObservableEnv): 17 | """A DoubleIntegrator robot is tasked to stop at the goal state which is at zero position with zero velocity. 18 | 19 | State: [pos, vel] 20 | Action: [acceleration] 21 | 22 | Continuous state space. 23 | Continuous action space. 24 | Stochastic transition. 25 | Partially observable. 26 | 27 | Linear state transition function. 28 | Quadratic cost. 29 | """ 30 | 31 | def __init__(self, size=10, observation_noise_std=0.01, state_transition_noise_std=0.01): 32 | super().__init__() 33 | 34 | self.size = size 35 | 36 | self.robot_model = DoubleIntegrator() 37 | self.state_space = ContinuousSpace(low=[-self.size / 2, -1e10], high=[self.size / 2, 1e10]) # pos and vel 38 | self.action_space = ContinuousSpace(low=[-1e10], high=[1e10]) # accel 39 | 40 | # declare linear state transition 41 | # x_dot = Ax + Bu 42 | self.state_transition_func_type = FunctionType.LINEAR.value 43 | self.A = self.robot_model.A 44 | self.B = self.robot_model.B 45 | 46 | # declare quadratic cost 47 | # L = x.T @ Q @ x + u.T @ R @ u 48 | self.reward_func_type = FunctionType.QUADRATIC.value 49 | self.Q = np.array([[1, 0], [0, 1]]) # state cost matrix 50 | self.R = np.array([[1]]) # control cost matrix 51 | 52 | # declare linear observation 53 | self.observation_func_type = FunctionType.LINEAR.value 54 | self.H = np.eye(2, dtype=np.float32) 55 | 56 | # state transition noise model 57 | self.state_transition_dist_type = DistributionType.GAUSSIAN.value 58 | self.state_transition_noise_std = state_transition_noise_std 59 | self.state_transition_noise_var = state_transition_noise_std * state_transition_noise_std 60 | self.state_transition_covariance_matrix = np.array( 61 | [ 62 | [self.state_transition_noise_var, 0], # only have noise on position 63 | [0, 1e-10], 64 | ], 65 | dtype=np.float32, 66 | ) 67 | 68 | # observation noise model 69 | self.observation_dist_type = DistributionType.GAUSSIAN.value 70 | self.observation_noise_std = observation_noise_std 71 | var = observation_noise_std * observation_noise_std 72 | self.observation_covariance_matrix = np.array( 73 | [ 74 | [var, 0], # only have noise on position 75 | [0, 1e-10], 76 | ], 77 | dtype=np.float32, 78 | ) 79 | 80 | self.path = None 81 | 82 | @override 83 | def reset(self): 84 | self.start_state = self.random_state() 85 | self.start_state[1] = 0.0 # zero velocity 86 | self.goal_state = [0, 0] # fixed 87 | self.goal_action = [0] 88 | self.cur_state = np.copy(self.start_state) 89 | self.step_cnt = 0 90 | 91 | return self.sample_observation(self.cur_state), {} 92 | 93 | @override 94 | def state_transition_func( 95 | self, state: np.ndarray, action: np.ndarray 96 | ) -> tuple[np.ndarray, float, bool, bool, dict]: 97 | new_state_mean = self.robot_model.control(state, action, dt=0.01) 98 | new_state_var = [self.state_transition_noise_var, 1e-10] 99 | return new_state_mean, new_state_var 100 | 101 | @override 102 | def linearize_state_transition(self, state: np.ndarray, action: np.ndarray): 103 | return self.robot_model.linearize_state_transition(state, action) 104 | 105 | @override 106 | def linearize_observation(self, state, observation): 107 | return self.H 108 | 109 | @override 110 | def sample_observation(self, state): 111 | # simulate measuring the position with noise 112 | state = np.array(state).reshape(-1, 1) 113 | 114 | # noisy observation 115 | obs = self.H @ state + np.random.multivariate_normal( 116 | mean=[0, 0], cov=self.observation_covariance_matrix 117 | ).reshape(2, 1) 118 | 119 | return obs.reshape(-1) 120 | 121 | @override 122 | def reward_func(self, state: np.ndarray, action: np.ndarray = None, new_state: np.ndarray = None) -> float: 123 | # check bounds 124 | if ( 125 | state[0] <= self.state_space.space[0][0] 126 | or state[0] >= self.state_space.space[1][0] 127 | or state[1] <= self.state_space.space[0][1] 128 | or state[1] >= self.state_space.space[1][1] 129 | ): 130 | return -100 131 | 132 | # quadratic reward func 133 | state = np.array(state) 134 | action = np.array(action) 135 | 136 | cost = state.T @ self.Q @ state + action.T @ self.R @ action 137 | return -cost 138 | 139 | @override 140 | def is_state_terminal(self, state): 141 | term = False 142 | # Check bounds 143 | if ( 144 | state[0] <= self.state_space.space[0][0] 145 | or state[0] >= self.state_space.space[1][0] 146 | or state[1] <= self.state_space.space[0][1] 147 | or state[1] >= self.state_space.space[1][1] 148 | ): 149 | term = True 150 | 151 | # Check goal state reached for termination 152 | if np.allclose(state, self.goal_state, atol=1e-3): 153 | term = True 154 | 155 | return term 156 | 157 | @override 158 | def get_state_info(self, state): 159 | info = {'success': False} 160 | if np.allclose(state, self.goal_state, atol=1e-3): 161 | info = {'success': True} 162 | 163 | return info 164 | 165 | @override 166 | def render(self): 167 | fig, ax = plt.subplots(2, 1, dpi=100) 168 | ax[0].plot(0, self.start_state[0], 'o') 169 | ax[1].plot(0, self.start_state[1], 'o') 170 | 171 | if self.path: 172 | for i, state in enumerate(self.path): 173 | ax[0].plot(i, state[0], 'o') 174 | ax[1].plot(i, state[1], 'o') 175 | 176 | ax[0].set_ylim(-self.size / 2, self.size / 2) 177 | plt.show() 178 | 179 | def add_path(self, path): 180 | self.path = path 181 | -------------------------------------------------------------------------------- /robotics_algorithm/env/continuous_2d/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/env/continuous_2d/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/env/continuous_2d/diff_drive_2d_env_base.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Any 3 | 4 | import numpy as np 5 | from matplotlib import colors 6 | from typing_extensions import override 7 | 8 | from robotics_algorithm.env.base_env import ( 9 | BaseEnv, 10 | ContinuousSpace, 11 | DiscreteSpace, 12 | ) 13 | from robotics_algorithm.robot.differential_drive import DiffDrive 14 | 15 | DEFAULT_OBSTACLES = [[2, 2, 0.5], [5, 5, 1], [3, 8, 0.5], [8, 3, 1]] 16 | DEFAULT_START = [0.5, 0.5, 0] 17 | DEFAULT_GOAL = [9.0, 9.0, math.radians(90)] 18 | 19 | 20 | class DiffDrive2DEnv(BaseEnv): 21 | """A differential drive robot must reach goal state in a 2d maze with obstacles. 22 | 23 | State: [x, y, theta] 24 | Action: [lin_vel, ang_vel] 25 | 26 | There are two modes. 27 | - If user does not set a reference path, the reward only encourages robot to reach goal as fast as possible. This 28 | environment hence behaves like a path planning environment. 29 | - If user sets a reference path, the reward will encourage the robot to track the path to reach the goal. Hence, the 30 | environment behaves like a path following (control) environment. 31 | """ 32 | 33 | FREE_SPACE = 0 34 | OBSTACLE = 1 35 | START = 2 36 | GOAL = 3 37 | PATH = 4 38 | WAYPOINT = 5 39 | MAX_POINT_TYPE = 6 40 | 41 | def __init__(self, size=10, robot_radius=0.2, action_dt=1.0, discrete_action=False, has_kinematics_constraint=True): 42 | """ 43 | Initialize a differential drive robot environment. 44 | 45 | Args: 46 | size (int): size of the maze 47 | robot_radius (float): radius of the robot 48 | action_dt (float): time step for the robot actions. 49 | discrete_action (bool): whether the action space is discrete or continuous 50 | """ 51 | super().__init__() 52 | 53 | self.size = size 54 | self.maze = np.full((size, size), DiffDrive2DEnv.FREE_SPACE) 55 | 56 | self.state_space = ContinuousSpace(low=[0, 0, -math.pi], high=[self.size, self.size, math.pi]) 57 | if not discrete_action: 58 | if has_kinematics_constraint: 59 | self.action_space = ContinuousSpace(low=[0, -math.radians(30)], high=[0.5, math.radians(30)]) 60 | else: 61 | self.action_space = ContinuousSpace( 62 | low=[-float('inf'), -float('inf')], high=[float('inf'), float('inf')] 63 | ) 64 | else: 65 | self.action_space = DiscreteSpace( 66 | [ 67 | (0.5, 0), 68 | (0.5, math.radians(30)), 69 | (0.5, -math.radians(30)), 70 | (0.25, 0), 71 | (0.25, math.radians(30)), 72 | (0.25, -math.radians(30)), 73 | ] 74 | ) 75 | 76 | # self.colour_map = colors.ListedColormap(['white', 'black', 'red', 'blue', 'green', 'yellow']) 77 | # bounds = [ 78 | # DiffDrive2DEnv.FREE_SPACE, 79 | # DiffDrive2DEnv.OBSTACLE, 80 | # DiffDrive2DEnv.START, 81 | # DiffDrive2DEnv.GOAL, 82 | # DiffDrive2DEnv.PATH, 83 | # DiffDrive2DEnv.WAYPOINT, 84 | # DiffDrive2DEnv.MAX_POINT_TYPE, 85 | # ] 86 | # self.norm = colors.BoundaryNorm(bounds, self.colour_map.N) 87 | 88 | self.robot_model = DiffDrive(wheel_dist=0.2, wheel_radius=0.05) 89 | self.robot_radius = robot_radius 90 | self.action_dt = action_dt 91 | 92 | self.path = None 93 | self.path_dict = {} 94 | self.local_plan = None 95 | 96 | # others 97 | self.interactive_viz = False # Interactive viz 98 | self._fig_created = False 99 | 100 | @override 101 | def reset(self, empty=False, random_env=True): 102 | if random_env: 103 | self._random_obstacles() 104 | self.start_state = self._random_valid_state() 105 | self.goal_state = self._random_valid_state() 106 | else: 107 | self.obstacles = DEFAULT_OBSTACLES 108 | self.start_state = DEFAULT_START 109 | self.goal_state = DEFAULT_GOAL 110 | 111 | # no obstacles if empty 112 | if empty: 113 | self.obstacles = [] 114 | 115 | self.cur_state = self.start_state.copy() 116 | 117 | return self.sample_observation(self.cur_state), {} 118 | 119 | @override 120 | def state_transition_func(self, state: np.ndarray, action: np.ndarray) -> np.ndarray: 121 | # compute next state 122 | new_state = self.robot_model.control(state, action, dt=self.action_dt) 123 | 124 | return new_state 125 | 126 | @override 127 | def is_state_terminal(self, state): 128 | # Compute term and info 129 | term = False 130 | if state[0] <= 0 or state[0] >= self.size or state[1] <= 0 or state[1] >= self.size: 131 | term = True 132 | 133 | if not self.is_state_valid(state): 134 | term = True 135 | 136 | # Check goal state reached for termination 137 | if self.is_state_similar(state, self.goal_state): 138 | term = True 139 | 140 | return term 141 | 142 | @override 143 | def get_state_info(self, state): 144 | info = {'success': False} 145 | if self.is_state_similar(state, self.goal_state): 146 | info['success'] = True 147 | 148 | return info 149 | 150 | @override 151 | def is_state_valid(self, state: np.ndarray) -> bool: 152 | for obstacle in self.obstacles: 153 | if np.linalg.norm(state[:2] - np.array(obstacle[:2])) <= obstacle[2] + self.robot_radius: 154 | return False 155 | 156 | return True 157 | 158 | def is_state_similar(self, state1: np.ndarray, state2: np.ndarray) -> bool: 159 | return np.linalg.norm(state1 - state2) < 0.2 160 | 161 | def _random_obstacles(self, num_of_obstacles: int = 5): 162 | self.obstacles = [] 163 | for _ in range(num_of_obstacles): 164 | obstacle = np.random.uniform([0, 0, 0.1], [self.size, self.size, 1]) 165 | self.obstacles.append(obstacle.tolist()) 166 | 167 | def _random_valid_state(self): 168 | while True: 169 | robot_pos = np.random.uniform(self.state_space.space[0], self.state_space.space[1]) 170 | if self.is_state_valid(robot_pos): 171 | break 172 | 173 | return robot_pos 174 | -------------------------------------------------------------------------------- /robotics_algorithm/env/discrete_world_1d.py: -------------------------------------------------------------------------------- 1 | from typing_extensions import override 2 | 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | from robotics_algorithm.env.base_env import StochasticEnv, PartiallyObservableEnv, DiscreteSpace, DistributionType 7 | 8 | # center value being the probability of perfect control 9 | TRANSITION_PROB = [0.05, 0.1, 0.7, 0.1, 0.05] 10 | # center value being the probability of perfect measurement 11 | OBS_PROB = [0.05, 0.1, 0.7, 0.1, 0.05] 12 | 13 | 14 | class DiscreteWorld1D(StochasticEnv, PartiallyObservableEnv): 15 | """A robot moves on a straight line and can only end up in integer value positions. 16 | 17 | State: [pos] 18 | Action: steps to move 19 | 20 | Discrete state space. 21 | Discrete action space. 22 | Stochastic transition. 23 | Partially observable. 24 | """ 25 | def __init__(self, size=100, action_size=5, state_transition_prob=TRANSITION_PROB, obs_prob=OBS_PROB): 26 | super().__init__() 27 | 28 | self.size = size 29 | 30 | self.state_space = DiscreteSpace(values=[[x] for x in range(size)]) 31 | self.action_space = DiscreteSpace(values=[[x] for x in range(-action_size, action_size + 1)]) 32 | self.observation_space = DiscreteSpace(values=[[x] for x in range(size)]) 33 | 34 | self.state_transition_prob = state_transition_prob 35 | self.obs_prob = obs_prob 36 | self.state_transition_dist_type = DistributionType.CATEGORICAL.value 37 | self.observation_dist_type = DistributionType.CATEGORICAL.value 38 | 39 | @override 40 | def reset(self): 41 | self.cur_state = self.state_space.sample() 42 | self.goal_state = [self.size // 2] 43 | 44 | return self.sample_observation(self.cur_state), {} 45 | 46 | @override 47 | def render(self): 48 | plt.figure(dpi=100) 49 | plt.plot(self.cur_state[0], 0, "o") 50 | plt.ylim(-1, 1) 51 | plt.xlim(0, self.size) 52 | plt.show() 53 | 54 | @override 55 | def state_transition_func(self, state: np.ndarray, action: np.ndarray) -> tuple[np.ndarray[tuple], np.ndarray[float]]: 56 | state = state[0] 57 | action = action[0] 58 | 59 | # move in the specified direction with some small chance of error 60 | tmp = int(len(self.state_transition_prob) // 2) 61 | new_state = state + action 62 | new_states = [[x] for x in range(new_state - tmp, new_state + tmp + 1)] 63 | probs = self.state_transition_prob 64 | 65 | for i in range(len(new_states)): 66 | # clamp state 67 | if new_states[i][0] < 0: 68 | new_states[i] = [0] 69 | if new_states[i][0] >= self.size: 70 | new_states[i] = [self.size - 1] 71 | 72 | return new_states, probs 73 | 74 | @override 75 | def reward_func(self, state: np.ndarray, action: np.ndarray = None, new_state: np.ndarray = None) -> float: 76 | # Check bounds 77 | if new_state[0] < 0 or new_state[0] >= self.size: 78 | return -100 79 | 80 | return -abs(new_state[0]) 81 | 82 | @override 83 | def observation_func(self, state: np.ndarray) -> tuple[np.ndarray[np.ndarray], np.ndarray[float]]: 84 | state = state[0] 85 | tmp = int(len(self.obs_prob) // 2) 86 | obss = [[x] for x in range(state - tmp, state + tmp + 1)] 87 | probs = self.obs_prob 88 | 89 | return obss, probs 90 | 91 | @override 92 | def is_state_terminal(self, state): 93 | term = False 94 | if state[0] < 0 or state[0] >= self.size: 95 | term = True 96 | 97 | if state[0] == self.goal_state[0]: 98 | term = True 99 | 100 | return term 101 | -------------------------------------------------------------------------------- /robotics_algorithm/env/inverted_pendulum.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from typing_extensions import override 6 | 7 | from robotics_algorithm.env.base_env import ContinuousSpace, DeterministicEnv, FullyObservableEnv, FunctionType 8 | from robotics_algorithm.robot.pendulum import Pendulum 9 | 10 | 11 | class InvertedPendulumEnv(DeterministicEnv, FullyObservableEnv): 12 | """Inverted pendulum environment. 13 | 14 | State: [theta, theta_dot] 15 | Action: [torque] 16 | 17 | """ 18 | 19 | def __init__(self, quadratic_reward=True): 20 | super().__init__() 21 | 22 | self.state_transition_func_type = FunctionType.LINEAR.value 23 | 24 | # state and action space 25 | self.state_space = ContinuousSpace(low=[-float('inf'), -float('inf')], high=[float('inf'), float('inf')]) 26 | self.action_space = ContinuousSpace(low=[-1e10], high=[1e10]) # torque 27 | 28 | # reward 29 | self.quadratic_reward = quadratic_reward 30 | if quadratic_reward: 31 | self.reward_func_type = FunctionType.QUADRATIC.value 32 | self.Q = np.array([[10, 0], [0, 10]]) 33 | self.R = np.array([[1]]) 34 | 35 | # robot model 36 | self.robot_model = Pendulum() 37 | self.action_dt = self.robot_model.dt 38 | self._ref_state = np.array([-np.pi, 0]) 39 | 40 | self.theta_threshold_radians = 12 * 2 * math.pi / 360 41 | self.max_steps = 500 42 | 43 | # others 44 | self._fig_created = False 45 | 46 | @override 47 | def reset(self): 48 | self.cur_state = np.random.randn(2) * 0.1 # near-upright position 49 | self.goal_state = np.array([0, 0]) # upright position 50 | self.goal_action = np.zeros(1) 51 | self.step_cnt = 0 52 | 53 | return self.sample_observation(self.cur_state), {} 54 | 55 | @override 56 | def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]: 57 | new_state, reward, term, trunc, info = super().step(action) 58 | 59 | self.step_cnt += 1 60 | trunc = self.step_cnt > self.max_steps # set truncated flag 61 | 62 | return new_state, reward, term, trunc, info 63 | 64 | @override 65 | def state_transition_func(self, state: np.ndarray, action: np.ndarray) -> np.ndarray: 66 | # compute next state 67 | new_state = self.robot_model.control(state, action) 68 | return new_state 69 | 70 | @override 71 | def reward_func(self, state: np.ndarray, action: np.ndarray = None, new_state: np.ndarray = None) -> float: 72 | terminated = self.is_state_terminal(new_state) 73 | 74 | if not terminated: 75 | if self.quadratic_reward: 76 | reward = -new_state.T @ self.Q @ new_state - action.T @ self.R @ action 77 | else: 78 | reward = 1.0 79 | else: 80 | reward = -1.0 81 | 82 | return reward 83 | 84 | @override 85 | def is_state_terminal(self, state): 86 | theta = state[0] 87 | 88 | term = bool(theta < -self.theta_threshold_radians or theta > self.theta_threshold_radians) 89 | 90 | return term 91 | 92 | @override 93 | def linearize_state_transition(self, state, action): 94 | return self.robot_model.linearize_state_transition(state, action) 95 | 96 | @override 97 | def render(self): 98 | if not self._fig_created: 99 | # Create an animation of the pendulum 100 | plt.ion() 101 | plt.figure() 102 | 103 | self._fig_created = True 104 | 105 | plt.clf() 106 | theta = self.cur_state[0] + np.pi 107 | x_pendulum = self.robot_model.L * np.sin(theta) 108 | y_pendulum = -self.robot_model.L * np.cos(theta) 109 | plt.plot([0, x_pendulum], [0, y_pendulum], 'o-', lw=2) 110 | plt.xlim(-self.robot_model.L - 0.1, self.robot_model.L + 0.1) 111 | plt.ylim(-self.robot_model.L - 0.1, self.robot_model.L + 0.1) 112 | plt.pause(0.01) 113 | -------------------------------------------------------------------------------- /robotics_algorithm/learning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/learning/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/learning/reinforcement_learning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/learning/reinforcement_learning/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/learning/reinforcement_learning/mc_control_on_policy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import defaultdict 3 | from typing import Callable 4 | 5 | from robotics_algorithm.env.base_env import MDPEnv, SpaceType, EnvType 6 | 7 | 8 | class MCControlOnPolicy: 9 | def __init__(self, env: MDPEnv, max_episode_len: int = 5000) -> None: 10 | """ 11 | Constructor. 12 | 13 | Args: 14 | env (MDPEnv): the env. 15 | """ 16 | assert env.state_space.type == SpaceType.DISCRETE.value 17 | assert env.action_space.type == SpaceType.DISCRETE.value 18 | assert env.observability == EnvType.FULLY_OBSERVABLE.value 19 | 20 | self.env = env 21 | self._max_episode_len = max_episode_len 22 | 23 | def make_epsilon_greedy_policy(self, epsilon, num_actions): 24 | """ 25 | Creates an epsilon-greedy policy based on a given Q-function and epsilon. 26 | 27 | Args: 28 | epsilon: The probability to select a random action . float between 0 and 1. 29 | num_actions: Number of actions in the environment. 30 | 31 | Returns: 32 | A function that takes the observation as an argument and returns 33 | the probabilities for each action in the form of a numpy array of length num_actions. 34 | 35 | """ 36 | 37 | def policy_fn(state): 38 | # epsilon probability of picking a random action. 39 | 40 | action_prob = np.ones(num_actions, dtype=float) * epsilon / num_actions 41 | 42 | # (1 - epsilon) probability of picking the best action 43 | state_key = tuple(state.tolist()) 44 | best_action = np.argmax(self.Q[state_key]) 45 | action_prob[best_action] += 1 - epsilon 46 | return action_prob 47 | 48 | return policy_fn 49 | 50 | def run( 51 | self, num_episodes: int = 500, discount_factor: float = 0.95, epsilon: float = 0.1 52 | ) -> tuple[dict, Callable]: 53 | """ 54 | Monte Carlo Control using Epsilon-Greedy policies. 55 | Finds an optimal epsilon-greedy policy. 56 | 57 | Args: 58 | num_episodes (int): Number of episodes to sample. Defaults to 500. 59 | discount_factor (float, optional): Gamma discount factor. Defaults to 0.95. 60 | epsilon (float, optional): Chance the sample a random action. Float between 0 and 1. Defaults to 0.1. 61 | 62 | Returns: 63 | tuple[dict, Callable]: A tuple (Q, policy). 64 | Q is a dictionary mapping state -> action values. 65 | policy is a function that takes an observation as an argument and returns action probabilities. 66 | """ 67 | # Keeps track of sum and count of returns for each state 68 | # to calculate an average. We could use an array to save all 69 | # returns (like in the book) but that's memory inefficient. 70 | state_action_visit_cnt = defaultdict(int) 71 | 72 | all_actions = self.env.action_space.get_all() 73 | 74 | # for plotting 75 | self.episodes = [] 76 | self.cumulative_rewards = [] 77 | 78 | # The final action-value function. 79 | # A nested dictionary that maps state -> (action -> action-value). 80 | self.Q = defaultdict(lambda: np.zeros(self.env.action_space.size)) 81 | 82 | # The policy we're following 83 | policy = self.make_epsilon_greedy_policy(epsilon, self.env.action_space.size) 84 | 85 | # Learn 86 | for i_episode in range(1, num_episodes + 1): 87 | # Generate an episode. 88 | # An episode is an array of (state, action, reward) tuples 89 | trajectory = [] 90 | cumulative_reward = 0 91 | state, _ = self.env.reset() 92 | 93 | # Run the episode 94 | for steps in range(self._max_episode_len): 95 | # choose action according to epsilon-greedy policy 96 | action_probs = policy(state) 97 | action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs) # choose action 98 | action = all_actions[action_idx] 99 | next_state, reward, term, trunc, _ = self.env.step(action) 100 | trajectory.append((tuple(state.tolist()), tuple(action.tolist()), reward)) 101 | 102 | steps += 1 103 | cumulative_reward += reward 104 | 105 | if term or trunc: 106 | break 107 | 108 | state = next_state 109 | 110 | # every-visit MC Prediction 111 | # Find all states the we've visited in this episode 112 | # We convert each state to a tuple so that we can use it as a dict key 113 | state_action_in_episode = set( 114 | [(tuple(x[0]), x[1]) for x in trajectory] 115 | ) # we use set because it is first-visit 116 | for state_action in state_action_in_episode: 117 | state, action = state_action 118 | 119 | # calculate total return for that state: 120 | first_idx = next(i for i, x in enumerate(trajectory) if x[0] == state and x[1] == action) 121 | # calculate total return 122 | total_return = sum([x[2] * (discount_factor**i) for i, x in enumerate(trajectory[first_idx:])]) 123 | # calculate average return in incremental fashion 124 | state_action_visit_cnt[state_action] += 1 125 | self.Q[state][action] = self.Q[state][action] + (1.0 / state_action_visit_cnt[state_action]) * ( 126 | total_return - self.Q[state][action] 127 | ) 128 | 129 | # improve policy based on updated Q function 130 | # NOTE: we do not need to call this because Q function value is already updated 131 | # policy = self.make_epsilon_greedy_policy(epsilon, self.env.action_space.size) 132 | 133 | # Print out which episode we're on, useful for debugging. 134 | if i_episode % 1 == 0: 135 | self.cumulative_rewards.append(cumulative_reward) 136 | self.episodes.append(i_episode) 137 | print('Episode {}/{}, reward : {}'.format(i_episode, num_episodes, cumulative_reward)) 138 | 139 | return self.Q, policy 140 | 141 | def get_learning_curve(self): 142 | return self.episodes, self.cumulative_rewards 143 | -------------------------------------------------------------------------------- /robotics_algorithm/learning/reinforcement_learning/q_learning.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import defaultdict 3 | from typing import Callable 4 | 5 | from robotics_algorithm.env.base_env import MDPEnv, SpaceType, EnvType 6 | 7 | 8 | class QLearning: 9 | def __init__(self, env: MDPEnv, max_episode_len: int = 5000) -> None: 10 | """ 11 | Constructor. 12 | 13 | Args: 14 | env (MDPEnv): the env. 15 | """ 16 | assert env.state_space.type == SpaceType.DISCRETE.value 17 | assert env.action_space.type == SpaceType.DISCRETE.value 18 | assert env.observability == EnvType.FULLY_OBSERVABLE.value 19 | 20 | self.env = env 21 | self._max_episode_len = max_episode_len 22 | 23 | def make_epsilon_greedy_policy(self, epsilon, num_actions): 24 | """ 25 | Creates an epsilon-greedy policy based on a given Q-function and epsilon. 26 | 27 | Args: 28 | epsilon: The probability to select a random action . float between 0 and 1. 29 | num_actions: Number of actions in the environment. 30 | 31 | Returns: 32 | A function that takes the observation as an argument and returns 33 | the probabilities for each action in the form of a numpy array of length num_actions. 34 | 35 | """ 36 | 37 | def policy_fn(state): 38 | # epsilon probability of picking a random action. 39 | action_prob = np.ones(num_actions, dtype=float) * epsilon / num_actions 40 | 41 | # (1 - epsilon) probability of picking the best action 42 | state_key = tuple(state.tolist()) 43 | best_action = np.argmax(self.Q[state_key]) 44 | action_prob[best_action] += 1 - epsilon 45 | return action_prob 46 | 47 | return policy_fn 48 | 49 | def run( 50 | self, num_episodes: int = 500, discount_factor: float = 0.95, epsilon: float = 0.1, alpha=0.05 51 | ) -> tuple[dict, Callable]: 52 | """ 53 | Run the SARSA algorithm to learn the optimal policy. 54 | 55 | Args: 56 | num_episodes (int): Number of episodes to sample. Defaults to 500. 57 | discount_factor (float, optional): Gamma discount factor. Defaults to 0.95. 58 | epsilon (float, optional): Chance the sample a random action. Float betwen 0 and 1. Defaults to 0.1. 59 | alpha (float): TD Update rate. 60 | 61 | Returns: 62 | tuple[dict, Callable]: A tuple (Q, policy). 63 | Q is a dictionary mapping state -> action values. 64 | policy is a function that takes an observation as an argument and returns 65 | action probabilities 66 | """ 67 | all_actions = self.env.action_space.get_all() 68 | 69 | # for plotting 70 | self.episodes = [] 71 | self.cumulative_rewards = [] 72 | 73 | # The final action-value function. 74 | # A nested dictionary that maps state -> (action -> action-value). 75 | self.Q = defaultdict(lambda: np.zeros(self.env.action_space.size)) 76 | 77 | # The policy we're following 78 | behaviour_policy = self.make_epsilon_greedy_policy(epsilon, self.env.action_space.size) 79 | 80 | # Iterate over each episode 81 | for i_episode in range(1, num_episodes + 1): 82 | # Initialize the state and action 83 | state, _ = self.env.reset() 84 | 85 | cumulative_reward = 0 86 | # Run the episode 87 | for steps in range(self._max_episode_len): 88 | # Take action, observe reward and next state 89 | action_probs = behaviour_policy(state) 90 | action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs) # choose action 91 | action = all_actions[action_idx] 92 | next_state, reward, term, trunc, _ = self.env.step(action) 93 | cumulative_reward += reward 94 | 95 | # Perform TD Update 96 | # ! we perform TD update even if term or trunc is true. Although Q function for terminal 97 | # ! state does not make sense, this is needed so that the negative reward earned when transitioning 98 | # ! to terminal state is learned. 99 | # ! For q learning, next action is selected based on greedy policy, so here just use np.max 100 | state_key = tuple(state.tolist()) 101 | action_key = tuple(action.tolist()) 102 | next_state_key = tuple(next_state.tolist()) 103 | td_target = reward + discount_factor * np.max(self.Q[next_state_key]).item() 104 | td_delta = td_target - self.Q[state_key][action_key] 105 | self.Q[state_key][action_key] += alpha * td_delta 106 | 107 | # Print debug information every 1000 steps 108 | # if steps % 1000 == 0: 109 | # print(state) 110 | # print(action_probs) 111 | # print(steps) 112 | 113 | # Update state and action 114 | state = next_state 115 | 116 | # ! for q-learning, action must be sampled using behaviour policy 117 | # action = next_action 118 | 119 | # Check for termination 120 | if term or trunc: 121 | break 122 | 123 | # Log progress after each episode 124 | if i_episode % 1 == 0: 125 | self.cumulative_rewards.append(cumulative_reward) 126 | self.episodes.append(i_episode) 127 | print("Episode {}/{}, reward : {}".format(i_episode, num_episodes, cumulative_reward)) 128 | 129 | return self.Q, behaviour_policy 130 | 131 | def get_learning_curve(self): 132 | return self.episodes, self.cumulative_rewards 133 | -------------------------------------------------------------------------------- /robotics_algorithm/learning/reinforcement_learning/sarsa.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import defaultdict 3 | from typing import Callable 4 | 5 | from robotics_algorithm.env.base_env import MDPEnv, SpaceType, EnvType 6 | 7 | 8 | class SARSA: 9 | def __init__(self, env: MDPEnv, max_episode_len: int = 5000) -> None: 10 | """ 11 | Constructor. 12 | 13 | Args: 14 | env (MDPEnv): the env. 15 | """ 16 | assert env.state_space.type == SpaceType.DISCRETE.value 17 | assert env.action_space.type == SpaceType.DISCRETE.value 18 | assert env.observability == EnvType.FULLY_OBSERVABLE.value 19 | 20 | self.env = env 21 | self._max_episode_len = max_episode_len 22 | 23 | def make_epsilon_greedy_policy(self, epsilon, num_actions): 24 | """ 25 | Creates an epsilon-greedy policy based on a given Q-function and epsilon. 26 | 27 | Args: 28 | epsilon: The probability to select a random action . float between 0 and 1. 29 | num_actions: Number of actions in the environment. 30 | 31 | Returns: 32 | A function that takes the observation as an argument and returns 33 | the probabilities for each action in the form of a numpy array of length num_actions. 34 | 35 | """ 36 | 37 | def policy_fn(state): 38 | # epsilon probability of picking a random action. 39 | action_prob = np.ones(num_actions, dtype=float) * epsilon / num_actions 40 | 41 | # (1 - epsilon) probability of picking the best action 42 | state_key = tuple(state.tolist()) 43 | best_action = np.argmax(self.Q[state_key]) 44 | action_prob[best_action] += 1 - epsilon 45 | return action_prob 46 | 47 | return policy_fn 48 | 49 | def run( 50 | self, num_episodes: int = 500, discount_factor: float = 0.95, epsilon: float = 0.1, alpha=0.05 51 | ) -> tuple[dict, Callable]: 52 | """ 53 | Run the SARSA algorithm to learn the optimal policy. 54 | 55 | Args: 56 | num_episodes (int): Number of episodes to sample. Defaults to 500. 57 | discount_factor (float, optional): Gamma discount factor. Defaults to 0.95. 58 | epsilon (float, optional): Chance the sample a random action. Float betwen 0 and 1. Defaults to 0.1. 59 | alpha (float): TD Update rate. 60 | 61 | Returns: 62 | tuple[dict, Callable]: A tuple (Q, policy). 63 | Q is a dictionary mapping state -> action values. 64 | policy is a function that takes an observation as an argument and returns 65 | action probabilities 66 | """ 67 | all_actions = self.env.action_space.get_all() 68 | 69 | # for plotting 70 | self.episodes = [] 71 | self.cumulative_rewards = [] 72 | 73 | # The final action-value function. 74 | # A nested dictionary that maps state -> (action -> action-value). 75 | self.Q = defaultdict(lambda: np.zeros(self.env.action_space.size)) 76 | 77 | # The policy we're following 78 | policy = self.make_epsilon_greedy_policy(epsilon, self.env.action_space.size) 79 | 80 | # Iterate over each episode 81 | for i_episode in range(1, num_episodes + 1): 82 | # Initialize the state and action 83 | state, _ = self.env.reset() 84 | 85 | cumulative_reward = 0 86 | # Run the episode 87 | action_probs = policy(state) 88 | action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs) # choose action 89 | action = all_actions[action_idx] 90 | for steps in range(self._max_episode_len): 91 | # Take action, observe reward and next state 92 | next_state, reward, term, trunc, _ = self.env.step(action) 93 | cumulative_reward += reward 94 | 95 | # Next state and next action 96 | next_action_probs = policy(next_state) 97 | next_action_idx = np.random.choice(np.arange(len(all_actions)), p=next_action_probs) # choose action 98 | next_action = all_actions[next_action_idx] 99 | 100 | # Perform TD Update 101 | # NOTE: we perform TD update even if term or trunc is true. Although Q function for terminal 102 | # state does not make sense, this is needed so that the negative reward earned when transitioning 103 | # to terminal state is learned. 104 | state_key = tuple(state.tolist()) 105 | action_key = tuple(action.tolist()) 106 | next_state_key = tuple(next_state.tolist()) 107 | next_action_key = tuple(next_action.tolist()) 108 | td_target = reward + discount_factor * self.Q[next_state_key][next_action_key] 109 | td_delta = td_target - self.Q[state_key][action_key] 110 | self.Q[state_key][action_key] += alpha * td_delta 111 | 112 | # Print debug information every 1000 steps 113 | # if steps % 1000 == 0: 114 | # print(state) 115 | # print(action_probs) 116 | # print(steps) 117 | 118 | # Update state and action 119 | state = next_state 120 | action = next_action 121 | 122 | # Check for termination 123 | if term or trunc: 124 | break 125 | 126 | # Log progress after each episode 127 | if i_episode % 1 == 0: 128 | self.cumulative_rewards.append(cumulative_reward) 129 | self.episodes.append(i_episode) 130 | print("Episode {}/{}, reward : {}".format(i_episode, num_episodes, cumulative_reward)) 131 | 132 | return self.Q, policy 133 | 134 | def get_learning_curve(self): 135 | return self.episodes, self.cumulative_rewards 136 | -------------------------------------------------------------------------------- /robotics_algorithm/planning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/planning/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/planning/mdp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/planning/mdp/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/planning/mdp/policy_iteration.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import math 3 | from collections import defaultdict 4 | from typing import Callable 5 | 6 | import numpy as np 7 | 8 | from robotics_algorithm.env.base_env import MDPEnv, SpaceType, EnvType 9 | 10 | 11 | class PolicyIteration: 12 | def __init__(self, env: MDPEnv) -> None: 13 | assert env.state_space.type == SpaceType.DISCRETE.value 14 | assert env.action_space.type == SpaceType.DISCRETE.value 15 | assert env.observability == EnvType.FULLY_OBSERVABLE.value 16 | 17 | self.env = env 18 | 19 | def make_greedy_policy(self, Q, num_actions): 20 | """ 21 | Creates an greedy policy based on a given Q-function. 22 | 23 | Args: 24 | Q: A dictionary that maps from state -> action-values. 25 | Each value is a numpy array of length nA (see below) 26 | num_action: Number of actions in the environment. 27 | 28 | Returns: 29 | A function that takes the observation as an argument and returns the action 30 | """ 31 | 32 | def policy_fn(state): 33 | action_prob = np.zeros(num_actions) 34 | state_key = tuple(state.tolist()) 35 | best_action_idx = np.argmax(Q[state_key], axis=-1) 36 | action_prob[best_action_idx] = 1.0 37 | return action_prob 38 | 39 | return policy_fn 40 | 41 | def run(self, discount_factor: float = 0.99) -> tuple[dict, Callable]: 42 | """Run algorithm. 43 | 44 | Args: 45 | discount_factor (float, optional): discount factor for future reward. Defaults to 0.99. 46 | 47 | Returns: 48 | Q (dict): Q function 49 | policy (Callable): policy constructed according to the Q function. 50 | """ 51 | print('PolicyIteration: plan!!') 52 | 53 | # random init Q 54 | Q = defaultdict(lambda: np.zeros(self.env.action_space.size)) 55 | 56 | iter = 0 57 | policy_converged = False 58 | while not policy_converged: 59 | print('PolicyIteration, iter {},'.format(iter)) 60 | prev_Q = copy.deepcopy(Q) 61 | prev_policy = self.make_greedy_policy(prev_Q, self.env.action_space.size) 62 | Q, policy_converged = self.policy_evaluation(self.env, Q, prev_policy, discount_factor) 63 | policy = self.policy_improvement(self.env, Q) 64 | iter += 1 65 | 66 | return Q, policy 67 | 68 | def policy_evaluation(self, env, Q, policy, discount_factor=0.99, max_steps=10, diff_threshold=0.01): 69 | # Value iteration operates on Bellman expectation equation 70 | # For state-action reward: 71 | # Q_pi(s, a) <- R(s, a) + discount * sum_s' [p(s' | s) * V_pi(s')] 72 | # For state-state reward: 73 | # Q_pi(s, a) = sum_s' [p(s' | s) * (R(s, s') + discount * V_pi(s'))] 74 | # V_pi(s) = sum_a [pi(a | s) * Q_pi(s, a)] 75 | 76 | self.terminal_states = set() 77 | states = env.state_space.get_all() 78 | actions = env.action_space.get_all() 79 | 80 | v_state = defaultdict(float) 81 | 82 | for state in states: 83 | action_probs = policy(state) 84 | state_key = tuple(state.tolist()) 85 | value = np.dot(action_probs, Q[state_key]) # calculate v_pi 86 | v_state[state_key] = value 87 | 88 | iter = 0 89 | max_change = np.inf 90 | while max_change > diff_threshold and iter < max_steps: 91 | v_state_new = copy.deepcopy(v_state) 92 | max_change = 0 93 | for state in states: 94 | state_key = tuple(state.tolist()) 95 | if state_key in self.terminal_states: 96 | continue 97 | 98 | for action in actions: 99 | action_key = tuple(action.tolist()) 100 | new_states, probs = env.state_transition_func(state, action) 101 | 102 | q_sa = 0 103 | for i, new_state in enumerate(new_states): 104 | new_state_key = tuple(new_state.tolist()) 105 | term = env.is_state_terminal(new_state) 106 | if term: 107 | self.terminal_states.add(new_state_key) 108 | 109 | reward = env.reward_func(state, action, new_state) 110 | q_sa += probs[i] * (reward + discount_factor * v_state[new_state_key]) 111 | 112 | # update Q(s,a) 113 | Q[state_key][action_key] = q_sa 114 | 115 | # Instead of using max, use action probability under the current policy 116 | action_probs = policy(state) 117 | value = np.dot(action_probs, Q[state_key]) # calculate v_pi 118 | 119 | # Check maximum change incurred. 120 | if v_state[state_key] != value: 121 | v_state_new[state_key] = value 122 | max_change = max(max_change, math.fabs(value - v_state[state_key])) 123 | 124 | v_state = v_state_new 125 | 126 | print('policy_evaluation, iter {}, max_change = {}'.format(iter, max_change)) 127 | iter += 1 128 | 129 | return Q, max_change < diff_threshold 130 | 131 | def policy_improvement(self, env, Q): 132 | policy = self.make_greedy_policy(Q, env.action_space.size) 133 | return policy 134 | -------------------------------------------------------------------------------- /robotics_algorithm/planning/mdp/policy_tree_search.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import numpy as np 4 | 5 | from robotics_algorithm.env.base_env import MDPEnv, SpaceType, EnvType 6 | 7 | 8 | class PolicyTreeSearch: 9 | def __init__(self, env: MDPEnv, max_depth: int = 5, discount_factor: float = 0.99): 10 | """Constructor. 11 | 12 | Args: 13 | env (MDPEnv): the env. 14 | max_depth (int, optional): maximum serach depth. Defaults to 5. 15 | discount_factor (float, optional): the discount factor for future reward. Defaults to 0.99. 16 | """ 17 | assert env.state_space.type == SpaceType.DISCRETE.value 18 | assert env.action_space.type == SpaceType.DISCRETE.value 19 | assert env.observability == EnvType.FULLY_OBSERVABLE.value 20 | 21 | self.env = env 22 | self.max_depth = max_depth 23 | self.discount_factor = discount_factor 24 | 25 | def run(self, state: np.ndarray) -> np.ndarray: 26 | """Compute best action for the current state. 27 | 28 | Args: 29 | state (np.ndarray): the current state 30 | 31 | Returns: 32 | action (np.ndarray): the best action 33 | """ 34 | print('Searching...') 35 | q, _ = self._compute_state_value(self.env, state, 0) 36 | print(q) 37 | actions = self.env.action_space.get_all() 38 | return actions[q.argmax()] 39 | 40 | def _compute_state_value(self, env: MDPEnv, state, cur_depth): 41 | q = np.zeros(env.action_space.size) 42 | 43 | # If maximum depth is not reached, recurse deeper 44 | if cur_depth <= self.max_depth: 45 | actions = env.action_space.get_all() 46 | for action_idx, action in enumerate(actions): 47 | new_states, probs = env.state_transition_func(state, action) 48 | 49 | # calculate Q values 50 | q_sa = 0 51 | for i, new_state in enumerate(new_states): 52 | # next_state, reward, term, trunc, info = result 53 | reward = self.env.reward_func(state, action, new_state) 54 | term = env.is_state_terminal(new_state) 55 | 56 | if not term: 57 | _, new_state_value = self._compute_state_value(env, new_state, cur_depth + 1) 58 | else: 59 | new_state_value = 0 60 | 61 | q_sa += probs[i] * (reward + self.discount_factor * new_state_value) 62 | 63 | # update Q(s,a) 64 | q[action_idx] = q_sa 65 | 66 | # else we do nothing, hence q and state_value will be zero. 67 | else: 68 | pass 69 | 70 | state_value = q.max() 71 | return q, state_value 72 | -------------------------------------------------------------------------------- /robotics_algorithm/planning/mdp/value_iteration.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import math 3 | from collections import defaultdict 4 | from typing import Callable 5 | 6 | import numpy as np 7 | 8 | from robotics_algorithm.env.base_env import MDPEnv, SpaceType, EnvType 9 | 10 | 11 | class ValueIteration: 12 | def __init__(self, env: MDPEnv) -> None: 13 | assert env.state_space.type == SpaceType.DISCRETE.value 14 | assert env.action_space.type == SpaceType.DISCRETE.value 15 | assert env.observability == EnvType.FULLY_OBSERVABLE.value 16 | 17 | self.env = env 18 | 19 | def make_greedy_policy(self, Q, num_actions): 20 | """ 21 | Creates an greedy policy based on a given Q-function. 22 | 23 | Args: 24 | Q: A dictionary that maps from state -> action-values. 25 | Each value is a numpy array of length nA (see below) 26 | num_action: Number of actions in the environment. 27 | 28 | Returns: 29 | A function that takes the observation as an argument and returns the action 30 | """ 31 | 32 | def policy_fn(state): 33 | action_prob = np.zeros(num_actions) 34 | state_key = tuple(state.tolist()) 35 | best_action_idx = np.argmax(Q[state_key], axis=-1) 36 | action_prob[best_action_idx] = 1.0 37 | return action_prob 38 | 39 | return policy_fn 40 | 41 | def run(self, discount_factor: float = 0.99, diff_threshold: float = 0.01) -> tuple[dict, Callable]: 42 | """Run algorithm. 43 | 44 | Args: 45 | discount_factor (float, optional): discount factor for future reward. Defaults to 0.99. 46 | 47 | Returns: 48 | Q (dict): Q function 49 | policy (Callable): policy constructed according to the Q function. 50 | """ 51 | print('ValueIteration: plan!!') 52 | 53 | self.terminal_states = set() 54 | states = self.env.state_space.get_all() 55 | actions = self.env.action_space.get_all() 56 | 57 | v_state = defaultdict(float) 58 | Q = defaultdict(lambda: np.zeros(self.env.action_space.size)) 59 | 60 | # Iterate until convergence. 61 | # Value iteration operates on Bellman optimality equation 62 | # For state-action reward: 63 | # Q*(s, a) = R(s, a) + discount * sum_s' [p(s' | s) * V*(s')] 64 | # For state-state reward: 65 | # Q*(s, a) = sum_s' [p(s' | s) * (R(s, s') + discount * V*(s'))] 66 | # Note: R(s, a) = sum_s' [p(s' | s) * (R(s, s')] 67 | # V*(s) = max_a Q*(s, a) 68 | 69 | max_change = np.inf 70 | iter = 0 71 | while max_change > diff_threshold: 72 | iter += 1 73 | print('iter {}, max_change = {}'.format(iter, max_change)) 74 | 75 | v_state_new = copy.deepcopy(v_state) 76 | max_change = -np.inf 77 | for state in states: 78 | state_key = tuple(state.tolist()) 79 | if state_key in self.terminal_states: 80 | continue 81 | 82 | for action in actions: 83 | action_key = tuple(action.tolist()) 84 | new_states, probs = self.env.state_transition_func(state, action) 85 | 86 | # calculate Q values 87 | q_sa = 0 88 | for i, new_state in enumerate(new_states): 89 | new_state_key = tuple(new_state.tolist()) 90 | term = self.env.is_state_terminal(new_state) 91 | if term: 92 | self.terminal_states.add(tuple(new_state.tolist())) 93 | 94 | reward = self.env.reward_func(state, action, new_state) 95 | q_sa += probs[i] * (reward + discount_factor * v_state[new_state_key]) 96 | 97 | # update Q(s,a) 98 | Q[state_key][action_key] = q_sa 99 | 100 | # V(s) = max_a Q(s,a) 101 | value = Q[state_key].max(axis=-1) 102 | 103 | # Check maximum change incurred. 104 | if v_state[state_key] != value: 105 | v_state_new[state_key] = value 106 | max_change = max(max_change, math.fabs(value - v_state[state_key])) 107 | 108 | v_state = v_state_new 109 | 110 | policy = self.make_greedy_policy(Q, self.env.action_space.size) 111 | 112 | return Q, policy 113 | -------------------------------------------------------------------------------- /robotics_algorithm/planning/motion_planning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/planning/motion_planning/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/planning/motion_planning/prm.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable 2 | 3 | from sklearn.neighbors import NearestNeighbors 4 | import numpy as np 5 | import networkx as nx 6 | 7 | from robotics_algorithm.env.base_env import BaseEnv, SpaceType, EnvType 8 | 9 | 10 | class ProbabilisticRoadmap: 11 | def __init__( 12 | self, 13 | env: BaseEnv, 14 | sample_func: Callable, 15 | state_col_check_func: Callable, 16 | edge_col_check_func: Callable, 17 | num_of_samples: int, 18 | num_neighbors: int, 19 | ): 20 | """_summary_ 21 | 22 | Args: 23 | env (BaseEnv): the env 24 | sample_func (Callable): the sampling function, returns a random point in space. 25 | state_col_check_func (Callable): a function that takes in a random point and returns whether it is in 26 | collision. 27 | edge_col_check_func (Callable): a function that takes in two points and returns whether there is a 28 | collision-free simple path between them. 29 | num_of_samples (int): maximum of samples drawn during planning. 30 | num_neighbors (int): number of closest neighbors to attempt connection. 31 | """ 32 | assert env.state_space.type == SpaceType.CONTINUOUS.value 33 | assert env.action_space.type == SpaceType.CONTINUOUS.value 34 | assert env.state_transition_type == EnvType.DETERMINISTIC.value 35 | assert env.observability == EnvType.FULLY_OBSERVABLE.value 36 | 37 | self.env = env 38 | self._sample_func = sample_func 39 | self._state_col_check_func = state_col_check_func 40 | self._edge_col_check_func = edge_col_check_func 41 | self.num_of_samples = num_of_samples 42 | self.num_neighbors = num_neighbors 43 | 44 | def compute_roadmap(self): 45 | """ 46 | Offline computation of roadmap by sampling points. 47 | """ 48 | self.all_samples = [] 49 | self.roadmap = nx.Graph() 50 | while len(self.all_samples) < self.num_of_samples: 51 | collision_free = False 52 | while not collision_free: 53 | sample = self._sample_func(self.env) 54 | collision_free = self._state_col_check_func(self.env, sample) 55 | 56 | self.all_samples.append(sample) 57 | 58 | print("PRM/compute_roadmap: finished adding {} of vertices".format(self.num_of_samples)) 59 | # print(self.V) 60 | 61 | for sample in self.all_samples: 62 | neighbors = self.get_nearest_neighbors(self.all_samples, sample, self.num_neighbors) 63 | # print("neighbours {}".format(neighbours)) 64 | for neighbor in neighbors: 65 | can_link, length = self._edge_col_check_func(self.env, sample, neighbor) 66 | if can_link: 67 | self.roadmap.add_edge(tuple(sample.tolist()), tuple(neighbor.tolist()), weight=length) 68 | 69 | def get_nearest_neighbors(self, all_vertices: np.ndarray[tuple], v: tuple, n_neighbors: int = 1) -> np.ndarray[tuple]: 70 | """ 71 | return the closest neighbors of v in all_vertices. 72 | 73 | Args: 74 | all_vertices (np.ndarray[tuple]): a np.ndarray of vertices 75 | v (tuple): the target vertex. 76 | n_neighbors (int): number of nearby neighbors. 77 | 78 | Returns: 79 | (np.ndarray[tuple]): a np.ndarray of nearby vertices 80 | """ 81 | n_neighbors = min(n_neighbors, len(all_vertices)) 82 | 83 | all_vertices = np.array(all_vertices) 84 | v = np.array(v).reshape(1, -1) 85 | 86 | nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm="ball_tree").fit(all_vertices) 87 | distances, indices = nbrs.kneighbors(v) 88 | # print("indices {}".format(indices)) 89 | nbr_vertices = np.take(np.array(all_vertices), indices.ravel(), axis=0) 90 | return nbr_vertices 91 | 92 | def get_path(self, start: np.ndarray, goal: np.ndarray) -> tuple[bool, np.ndarray[tuple], float]: 93 | """ 94 | Online computation of path. 95 | 96 | Args: 97 | start (tuple): the start state. 98 | goal (tuple): the goal state. 99 | 100 | Returns: 101 | success (boolean): return true if a path is found, return false otherwise 102 | shortest_path (np.ndarray[tuple]): a np.ndarray of vertices if shortest path is found 103 | shortest_path_len (float): the length of shortest path if found. 104 | """ 105 | start_tup = tuple(start.tolist()) 106 | goal_tup = tuple(goal.tolist()) 107 | 108 | source_neighbors = self.get_nearest_neighbors(self.all_samples, start, self.num_neighbors) 109 | for neighbor in source_neighbors: 110 | can_link, length = self._edge_col_check_func(self.env, start, neighbor) 111 | if can_link: 112 | self.roadmap.add_edge(start_tup, tuple(neighbor.tolist()), weight=length) 113 | break 114 | else: 115 | print("can't connect start to roadmap!!!") 116 | return False, None, None 117 | 118 | goal_neighbors = self.get_nearest_neighbors(self.all_samples, goal, self.num_neighbors) 119 | for neighbor in goal_neighbors: 120 | can_link, length = self._edge_col_check_func(self.env, goal, neighbor) 121 | if can_link: 122 | self.roadmap.add_edge(goal_tup, tuple(neighbor.tolist()), weight=length) 123 | break 124 | else: 125 | print("can't connect goal to roadmap!!!") 126 | return False, None, None 127 | 128 | # path_exist, shortest_path, path_len = self._path_finder.run(self.adj_list, start, goal) 129 | path = nx.shortest_path(self.roadmap, start_tup, goal_tup, weight="weight") 130 | path_len = nx.shortest_path_length(self.roadmap, start_tup, goal_tup, weight="weight") 131 | print(path, path_len) 132 | return True, path, path_len 133 | 134 | def run(self, start: tuple, goal: tuple) -> tuple[bool, np.ndarray[tuple], float]: 135 | """ 136 | Run algorithm. 137 | 138 | Args: 139 | start (tuple): the start state. 140 | goal (tuple): the goal state. 141 | 142 | Returns: 143 | success (boolean): return true if a path is found, return false otherwise. 144 | shortest_path (np.ndarray[tuple]): a np.ndarray of vertices if shortest path is found. 145 | shortest_path_len (float): the length of shortest path if found. 146 | """ 147 | self.compute_roadmap() 148 | return self.get_path(start, goal) 149 | 150 | def get_roadmap(self): 151 | return self.roadmap 152 | -------------------------------------------------------------------------------- /robotics_algorithm/planning/path_planning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/planning/path_planning/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/planning/path_planning/a_star.py: -------------------------------------------------------------------------------- 1 | import heapq 2 | from typing import Any, Callable 3 | 4 | import numpy as np 5 | 6 | from robotics_algorithm.env.base_env import BaseEnv, EnvType, SpaceType 7 | 8 | 9 | class AStar: 10 | def __init__(self, env: BaseEnv, heuristic_func: Callable, state_key_func: Callable): 11 | """Constructor. 12 | 13 | Args: 14 | env (BaseEnv): A planning env. 15 | heuristic_func (Callable): a function to return estimated cost-to-go from a state to goal. 16 | state_key_func (Callable): a function to hash the state. 17 | """ 18 | assert env.action_space.type == SpaceType.DISCRETE.value 19 | assert env.state_space.type == SpaceType.DISCRETE.value 20 | assert env.state_transition_type == EnvType.DETERMINISTIC.value 21 | assert env.observability == EnvType.FULLY_OBSERVABLE.value 22 | 23 | self.env = env 24 | self._heuristic_func = heuristic_func 25 | self._state_key_func = state_key_func 26 | 27 | def run(self, start: np.ndarray, goal: np.ndarray) -> tuple[bool, np.ndarray, float]: 28 | """Run algorithm. 29 | 30 | Args: 31 | start (np.ndarray): the start state 32 | goal (np.ndarray): the goal state 33 | 34 | Returns: 35 | res (bool): return true if a path is found, return false otherwise. 36 | shortest_path (np.ndarray): a np.ndarray of state if shortest path is found. 37 | shortest_path_len (float): the length of shortest path if found. 38 | """ 39 | # Astar gist: for every state, f[v] = g(s, v) + h(v, g) 40 | 41 | # initialize 42 | unvisited_states = set() # OPEN set. Nodes not in this set is in CLOSE set 43 | priority_q = [] 44 | shortest_path = [] 45 | shortest_path_len = 0 46 | g = {} # cost-to-come from start to a state 47 | f = {} # cost-to-come + heuristic cost-to-go 48 | prev_state_dict = {} # used to extract shortest path 49 | state_dict = {} # key is some state key, value is the actual state. 50 | 51 | start_key = self._state_key_func(start) 52 | goal_key = self._state_key_func(goal) 53 | state_dict[start_key] = start 54 | state_dict[goal_key] = goal 55 | 56 | for state in self.env.state_space.get_all(): 57 | state_key = self._state_key_func(state) 58 | g[state_key] = float('inf') 59 | f[state_key] = float('inf') 60 | state_dict[state_key] = state 61 | unvisited_states.add(state_key) # All states are unvisited 62 | 63 | g[start_key] = 0 # distance to source is 0 64 | f[start_key] = self._heuristic_func(start, goal) # cost from source to goal = g(s, s) + h(s, g) = 0 + h(s, g) 65 | heapq.heappush(priority_q, (f[start_key], start)) 66 | 67 | # run algorithm 68 | path_exist = False 69 | while len(priority_q) > 0: 70 | # pop the best state found so far. 71 | _, best_state = heapq.heappop(priority_q) 72 | best_state = np.array(best_state) 73 | best_state_key = self._state_key_func(best_state) 74 | 75 | # filter out state visited before. This may happen if multiple path leads to the same state with 76 | # different values. 77 | if best_state_key not in unvisited_states: 78 | continue 79 | else: 80 | unvisited_states.remove(best_state_key) 81 | 82 | # If goal state has been added. 83 | if best_state_key == goal_key: 84 | path_exist = True 85 | break 86 | 87 | print(best_state, best_state_key, g[best_state_key]) 88 | # Find possible transitions from best_state, and add them to queue ranked by heuristics. 89 | actions = self.env.action_space.get_all() 90 | # print(best_state, available_actions) 91 | for action in actions: 92 | new_state, reward, term, _, info = self.env.sample_state_transition(best_state, action) 93 | cost = -reward 94 | 95 | # skip actions that result in failures 96 | if term and not info['success']: 97 | continue 98 | 99 | # if new_state has not been visited, 100 | new_state_key = self._state_key_func(new_state) 101 | if new_state_key in unvisited_states: 102 | g_new_state = g[best_state_key] + cost # cost-to-come 103 | h_new_state = self._heuristic_func(new_state, goal) # cost-to-go 104 | # print(new_state, h_new_state) 105 | if g_new_state < g[new_state_key]: 106 | g[new_state_key] = g_new_state 107 | f[new_state_key] = g_new_state + h_new_state 108 | # NOTE: here we do not need to remove the the previous stored new_state with old best-to-come 109 | # value because of new_state will be marked as visited when first poped with the best 110 | # cost-to-come value. 111 | heapq.heappush(priority_q, (f[new_state_key], new_state.tolist())) 112 | prev_state_dict[new_state_key] = best_state_key 113 | 114 | if path_exist: 115 | # extract shortest path: 116 | shortest_path = [] 117 | state_key = goal_key 118 | while state_key in prev_state_dict: 119 | prev_state_key = prev_state_dict[state_key] 120 | shortest_path.append(state_dict[prev_state_key]) 121 | state_key = prev_state_key 122 | 123 | shortest_path = list(reversed(shortest_path)) 124 | shortest_path_len = g[goal_key] 125 | return (True, shortest_path, shortest_path_len) 126 | else: 127 | return (False, None, None) 128 | -------------------------------------------------------------------------------- /robotics_algorithm/planning/path_planning/dijkstra.py: -------------------------------------------------------------------------------- 1 | import heapq 2 | from typing import Any, Callable 3 | 4 | import numpy as np 5 | 6 | from robotics_algorithm.env.base_env import BaseEnv, EnvType, SpaceType 7 | 8 | 9 | class Dijkstra: 10 | def __init__(self, env: BaseEnv, state_key_func: Callable): 11 | """Constructor. 12 | 13 | Args: 14 | env (BaseEnv): A planning env. 15 | state_key_func (Callable): a function to hash the state. 16 | """ 17 | assert env.action_space.type == SpaceType.DISCRETE.value 18 | assert env.state_space.type == SpaceType.DISCRETE.value 19 | assert env.state_transition_type == EnvType.DETERMINISTIC.value 20 | assert env.observability == EnvType.FULLY_OBSERVABLE.value 21 | 22 | self.env = env 23 | self._state_key_func = state_key_func 24 | 25 | def run(self, start: Any, goal: Any) -> tuple[bool, np.ndarray, float]: 26 | """Run algorithm. 27 | 28 | Args: 29 | start (Any): the start state 30 | goal (Any): the goal state 31 | 32 | Returns: 33 | res (bool): return true if a path is found, return false otherwise. 34 | shortest_path (np.ndarray): a np.ndarray of state if shortest path is found. 35 | shortest_path_len (float): the length of shortest path if found. 36 | """ 37 | # for every state, f[v] = g(s, v) 38 | 39 | # initialize 40 | unvisited_states = set() # OPEN set. Nodes not in this set is in CLOSE set 41 | priority_q = [] 42 | shortest_path = [] 43 | shortest_path_len = 0 44 | g = {} # cost-to-come from start to a state 45 | prev_state_dict = {} # used to extract shortest path 46 | state_dict = {} # key is some state key, value is the actual state. 47 | 48 | start_key = self._state_key_func(start) 49 | goal_key = self._state_key_func(goal) 50 | state_dict[start_key] = start 51 | state_dict[goal_key] = goal 52 | 53 | for state in self.env.state_space.get_all(): 54 | state_key = self._state_key_func(state) 55 | g[state_key] = float('inf') 56 | state_dict[state_key] = state 57 | unvisited_states.add(state_key) 58 | 59 | g[start_key] = 0 # distance to source is 0 60 | heapq.heappush(priority_q, (g[start_key], start)) 61 | 62 | # run algorithm 63 | path_exist = False 64 | while len(priority_q) > 0: 65 | # pop the best state found so far. 66 | _, best_state = heapq.heappop(priority_q) 67 | best_state = np.array(best_state) 68 | best_state_key = self._state_key_func(best_state) 69 | 70 | # filter out state visited before. This may happen if multiple path leads to the same state with 71 | # different values. 72 | if best_state_key not in unvisited_states: 73 | continue 74 | else: 75 | unvisited_states.remove(best_state_key) 76 | 77 | # If goal state has been added. 78 | if best_state_key == goal_key: 79 | path_exist = True 80 | break 81 | 82 | print(best_state, best_state_key, g[best_state_key]) 83 | # Find possible transitions from best_state, and add them to queue 84 | actions = self.env.action_space.get_all() 85 | for action in actions: 86 | new_state, reward, term, _, info = self.env.sample_state_transition(best_state, action) 87 | cost = -reward 88 | 89 | # skip actions that result in failures 90 | if term and not info['success']: 91 | continue 92 | 93 | # if new_state has not been visited, 94 | new_state_key = self._state_key_func(new_state) 95 | if new_state_key in unvisited_states: 96 | g_new_state = g[best_state_key] + cost # cost-to-come 97 | if g_new_state < g[new_state_key]: 98 | g[new_state_key] = g_new_state 99 | # NOTE: here we do not need to remove the the previous stored new_state with old best-to-come 100 | # value because of new_state will be marked as visited when first poped with the best 101 | # cost-to-come value. 102 | heapq.heappush(priority_q, (g[new_state_key], new_state.tolist())) 103 | prev_state_dict[new_state_key] = best_state_key 104 | 105 | if path_exist: 106 | # extract shortest path: 107 | shortest_path = [] 108 | state_key = goal_key 109 | while state_key in prev_state_dict: 110 | prev_state_key = prev_state_dict[state_key] 111 | shortest_path.append(state_dict[prev_state_key]) 112 | state_key = prev_state_key 113 | 114 | shortest_path = list(reversed(shortest_path)) 115 | shortest_path_len = g[goal_key] 116 | return (True, shortest_path, shortest_path_len) 117 | else: 118 | return (False, None, None) 119 | -------------------------------------------------------------------------------- /robotics_algorithm/planning/path_planning/hybrid_a_star.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Any 2 | import heapq 3 | import numpy as np 4 | 5 | from robotics_algorithm.env.base_env import BaseEnv, SpaceType, EnvType 6 | 7 | 8 | class HybridAStar: 9 | def __init__(self, env: BaseEnv, heuristic_func: Callable, state_key_func: Callable): 10 | """Constructor. 11 | 12 | Args: 13 | env (BaseEnv): A planning env. 14 | heuristic_func (Callable): a function to return estimated cost-to-go from a state to goal 15 | state_key_func (Callable): a function to hash the state. 16 | """ 17 | assert env.action_space.type == SpaceType.DISCRETE.value 18 | assert env.state_space.type == SpaceType.CONTINUOUS.value 19 | assert env.state_transition_type == EnvType.DETERMINISTIC.value 20 | assert env.observability == EnvType.FULLY_OBSERVABLE.value 21 | 22 | self.env = env 23 | self._heuristic_func = heuristic_func 24 | self._state_key_func = state_key_func 25 | 26 | def run(self, start: np.ndarray, goal: np.ndarray) -> tuple[bool, np.ndarray, float]: 27 | """Run algorithm. 28 | 29 | Args: 30 | start (np.ndarray): the start state 31 | goal (np.ndarray): the goal state 32 | 33 | Returns: 34 | res (bool): return true if a path is found, return false otherwise. 35 | shortest_path (np.ndarray): a np.ndarray of state if shortest path is found. 36 | shortest_path_len (float): the length of shortest path if found. 37 | """ 38 | 39 | # initialize 40 | # for every state, f[v] = g(s, v) + h(v, g) 41 | priority_q = [] 42 | shortest_path = [] 43 | shortest_path_len = 0 44 | g = {} # cost-to-come from start to a state 45 | f = {} # cost-to-come + heuristic cost-to-go 46 | prev_state_dict = {} # used to extract shortest path 47 | state_dict = {} # key is some discrete state key, value is the actual continuous state. 48 | 49 | start_key = self._state_key_func(start) 50 | goal_key = self._state_key_func(goal) 51 | state_dict[start_key] = start 52 | state_dict[goal_key] = goal 53 | 54 | g[start_key] = 0 55 | f[start_key] = self._heuristic_func(start, goal) # cost from source to goal = g(s, s) + h(s, g) = 0 + h(s, g) 56 | heapq.heappush(priority_q, (f[start_key], start_key)) 57 | open_set = set() 58 | close_set = set() 59 | open_set.add(start_key) 60 | 61 | # run algorithm 62 | path_exist = False 63 | while True: 64 | if not open_set: 65 | print("Error: Cannot find path, No open set") 66 | break 67 | 68 | # pop the best state found so far. 69 | _, best_state_key = heapq.heappop(priority_q) 70 | if best_state_key in open_set: 71 | open_set.remove(best_state_key) 72 | close_set.add(best_state_key) 73 | best_state = state_dict[best_state_key] 74 | else: 75 | continue # If best_state in close_set, just continue 76 | 77 | # If goal state has been added. 78 | if best_state_key == goal_key: 79 | path_exist = True 80 | break 81 | 82 | # print(f"close set num: {len(close_set)}/81000") 83 | print(best_state, best_state_key, g[best_state_key]) 84 | # Find possible transitions from best_state, and add them to queue ranked by heuristics. 85 | actions = self.env.action_space.get_all() 86 | for action in actions: 87 | new_state, reward, term, _, info = self.env.sample_state_transition(best_state, action) 88 | cost = -reward 89 | 90 | # skip actions that result in failures 91 | if term and not info["success"]: 92 | continue 93 | 94 | # print(new_state, best_state, action) 95 | 96 | # if new_state has not been visited, or a shorter path to new_state has been found. 97 | new_state_key = self._state_key_func(new_state) 98 | if new_state_key not in close_set: 99 | g_new_state = g[best_state_key] + cost # cost-to-come 100 | 101 | if new_state_key not in open_set or g_new_state < g[new_state_key]: 102 | h_new_state = self._heuristic_func(new_state, goal) # cost-to-go 103 | # print(new_state_key, h_new_state) 104 | f[new_state_key] = g_new_state + h_new_state 105 | g[new_state_key] = g_new_state 106 | heapq.heappush(priority_q, (f[new_state_key], new_state_key)) 107 | state_dict[new_state_key] = new_state 108 | prev_state_dict[new_state_key] = (best_state_key, action) 109 | open_set.add(new_state_key) 110 | 111 | # print("adding ", best_state, action, new_state, new_state_key, h_new_state) 112 | # debug.append([new_action, new_state_tup]) 113 | 114 | # print(debug) 115 | 116 | if path_exist: 117 | # extract shortest path: 118 | shortest_path = [] 119 | state_key = goal_key 120 | while state_key in prev_state_dict: 121 | prev_state_key, prev_action = prev_state_dict[state_key] 122 | shortest_path.append(prev_action) 123 | state_key = prev_state_key 124 | 125 | shortest_path = list(reversed(shortest_path)) 126 | shortest_path_len = f[goal_key] 127 | return (True, shortest_path, shortest_path_len) 128 | else: 129 | return (False, None, None) 130 | -------------------------------------------------------------------------------- /robotics_algorithm/robot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/robot/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/robot/cartpole.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing_extensions import override 3 | 4 | from robotics_algorithm.robot.robot import Robot 5 | 6 | 7 | class Cartpole(Robot): 8 | def __init__(self, dt=0.02): 9 | super().__init__(dt) 10 | 11 | self.kinematics_integrator = "euler" 12 | self.gravity = 9.8 13 | self.masscart = 1.0 14 | self.masspole = 0.1 15 | self.total_mass = self.masspole + self.masscart 16 | self.length = 0.5 # actually half the pole's length 17 | self.polemass_length = self.masspole * self.length 18 | self.force_mag = 10.0 19 | 20 | @override 21 | def control(self, state: np.ndarray, action: np.ndarray) -> np.ndarray: 22 | action = action.item() 23 | 24 | x, x_dot, theta, theta_dot = state 25 | 26 | # For the interested reader: 27 | # https://coneural.org/florian/papers/05_cart_pole.pdf 28 | costheta = np.cos(theta) 29 | sintheta = np.sin(theta) 30 | temp = (action + self.polemass_length * np.square(theta_dot) * sintheta) / self.total_mass 31 | thetaacc = (self.gravity * sintheta - costheta * temp) / ( 32 | self.length * (4.0 / 3.0 - self.masspole * np.square(costheta) / self.total_mass) 33 | ) 34 | xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass 35 | 36 | if self.kinematics_integrator == "euler": 37 | x = x + self.dt * x_dot 38 | x_dot = x_dot + self.dt * xacc 39 | theta = theta + self.dt * theta_dot 40 | theta_dot = theta_dot + self.dt * thetaacc 41 | else: # semi-implicit euler 42 | x_dot = x_dot + self.dt * xacc 43 | x = x + self.dt * x_dot 44 | theta_dot = theta_dot + self.dt * thetaacc 45 | theta = theta + self.dt * theta_dot 46 | 47 | return np.array([x, x_dot, theta, theta_dot]) 48 | -------------------------------------------------------------------------------- /robotics_algorithm/robot/differential_drive.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from typing_extensions import override 4 | 5 | from robotics_algorithm.robot.robot import Robot 6 | from robotics_algorithm.utils import math_utils 7 | 8 | 9 | class DiffDrive(Robot): 10 | def __init__(self, wheel_radius: float, wheel_dist: float, dt=0.05): 11 | super().__init__(dt) 12 | 13 | # Robot parameters 14 | self.wheel_radius = 0.05 # meters 15 | self.wheel_dist = 0.2 # meters 16 | 17 | def control_wheel_speed(self, state: np.ndarray, control: np.ndarray, dt: float) -> np.ndarray: 18 | """_summary_ 19 | 20 | Args: 21 | state (np.ndarray): [x, y, theta] robot's current state 22 | control (np.ndarray): [v_l, v_r] left and right wheel velocities in radians 23 | dt (float): time step 24 | 25 | Returns: 26 | new state [x, y, theta] 27 | """ 28 | v_l, v_r = control 29 | lin_vel = self.wheel_radius * (v_r + v_l) / 2.0 30 | ang_vel = self.wheel_radius * (v_r - v_l) / self.wheel_dist 31 | 32 | return self.control(state, lin_vel, ang_vel, dt) 33 | 34 | @override 35 | def control(self, state: np.ndarray, action: np.ndarray, dt: float) -> np.ndarray: 36 | """ 37 | Update the robot state based on the differential drive kinematics. 38 | 39 | Args: 40 | state (np.ndarray): [x, y, theta] robot's current state. 41 | action (np.ndarray): [lin_vel, ang_vel] 42 | dt (float): time step 43 | 44 | Returns: 45 | new state [x, y, theta] 46 | """ 47 | x, y, theta = state 48 | lin_vel, ang_vel = action 49 | 50 | t = 0 51 | while t < dt: 52 | # Update the state 53 | x_new = x + lin_vel * np.cos(theta) * self.dt 54 | y_new = y + lin_vel * np.sin(theta) * self.dt 55 | theta_new = theta + ang_vel * self.dt 56 | theta_new = math_utils.normalize_angle(theta_new) 57 | 58 | t += self.dt 59 | x, y, theta = x_new, y_new, theta_new 60 | 61 | return np.array([x_new, y_new, theta_new]) 62 | 63 | @override 64 | def linearize_state_transition(self, state, action): 65 | # linearize dynamics around state in discrete time -> x_new = Ax + Bu 66 | 67 | x, y, theta = state 68 | lin_vel, ang_vel = action 69 | A = np.array( 70 | [ 71 | [1, 0, -lin_vel * np.sin(theta) * self.dt], 72 | [0, 1, lin_vel * np.cos(theta) * self.dt], 73 | [0, 0, 1] 74 | ] 75 | ) 76 | B = np.array([[np.cos(theta) * self.dt, 0], [np.sin(theta) * self.dt, 0], [0, self.dt]]) 77 | 78 | return A, B 79 | 80 | if __name__ == "__main__": 81 | # Simulation parameters 82 | dt = 1.0 # time step 83 | total_time = 10.0 # total simulation time 84 | num_steps = int(total_time / dt) 85 | 86 | # Initial state [x, y, theta] 87 | state = [0.0, 0.0, 0.0] 88 | 89 | # Control inputs (left and right wheel velocities) 90 | control_inputs = [0.2, 0.5] # constant velocities 91 | 92 | # Record state history for plotting 93 | state_history = [state] 94 | 95 | diff_drive_system = DiffDrive(wheel_radius=0.05, wheel_dist=0.2) 96 | 97 | # Run simulation 98 | for _ in range(num_steps): 99 | state1 = diff_drive_system.control_wheel_speed(state, control_inputs, dt) 100 | 101 | num_sub_steps = int(dt / diff_drive_system.dt) 102 | for _ in range(num_sub_steps): 103 | state = diff_drive_system.control_wheel_speed(state, control_inputs, dt=diff_drive_system.dt) 104 | state_history.append(state) 105 | 106 | assert np.allclose(np.array(state1), np.array(state)) 107 | 108 | # Convert state history to numpy array for easier indexing 109 | state_history = np.array(state_history) 110 | print(state_history) 111 | 112 | # Plot results 113 | plt.figure() 114 | plt.plot(state_history[:, 0], state_history[:, 1], label="Robot Path") 115 | plt.xlabel("X position (m)") 116 | plt.ylabel("Y position (m)") 117 | plt.title("Differential Drive Robot Path") 118 | plt.legend() 119 | plt.axis("equal") 120 | plt.grid() 121 | plt.show() 122 | -------------------------------------------------------------------------------- /robotics_algorithm/robot/double_integrator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from typing_extensions import override 4 | 5 | from robotics_algorithm.robot.robot import Robot 6 | 7 | class DoubleIntegrator(Robot): 8 | def __init__(self, dt=0.01): 9 | # Robot parameters 10 | super().__init__(dt) 11 | 12 | use_discrete_time_model = True # Hardcode 13 | 14 | # x = [q, q_dot] 15 | # x_dot = [q_dot, q_dot_dot] = Ax + Bu 16 | if not use_discrete_time_model: 17 | self.A = np.array([[0, 1], [0, 0]]) 18 | self.B = np.array([[0, 1]]).T 19 | else: 20 | self.A = np.array([[1, dt], [0, 1]]) 21 | self.B = np.array([[0.5 * dt * dt], [dt]]) 22 | 23 | self.use_discrete_time_model = use_discrete_time_model 24 | 25 | @override 26 | def control(self, state: np.ndarray, control: np.ndarray, dt: float) -> np.ndarray: 27 | """Compute the end state given then current state and control. 28 | 29 | Args: 30 | state (np.ndarray): [q, q_dot] robot's current state 31 | control (np.ndarray): left and right wheel velocities in radians 32 | dt (float): time step 33 | 34 | Returns: 35 | new state [q, q_dot] 36 | """ 37 | state = np.array(state).reshape(-1, 1) 38 | control = np.array(control).reshape(-1, 1) 39 | 40 | if not self.use_discrete_time_model: 41 | # discretize using zero order hold, assuming dt is small enough 42 | x_dot = self.A @ state + self.B @ control 43 | new_state = state + x_dot * dt 44 | else: 45 | # if discrete_time model is used, transition func directly gives new state 46 | new_state = self.A @ state + self.B @ control 47 | 48 | return new_state.reshape(-1) 49 | 50 | @override 51 | def linearize_state_transition(self, state, action): 52 | return self.A, self.B 53 | 54 | if __name__ == "__main__": 55 | # Simulation parameters 56 | dt = 0.01 # time step 57 | total_time = 10.0 # total simulation time 58 | num_steps = int(total_time / dt) 59 | 60 | # Initial state [q, q_dot] 61 | state = np.array([0.0, 0.0]) 62 | 63 | # Control inputs (accelerations) 64 | control_inputs = [0.1] 65 | 66 | # Record state history for plotting 67 | state_history = [state] 68 | 69 | system = DoubleIntegrator() 70 | 71 | # Run simulation 72 | time_history = [0] 73 | for time_step in range(num_steps): 74 | state = system.control(state, control_inputs, dt) 75 | state_history.append(state) 76 | time_history.append(dt + time_step * dt) 77 | 78 | # Convert state history to numpy array for easier indexing 79 | state_history = np.array(state_history) 80 | print(state_history) 81 | 82 | # Plot results 83 | plt.figure() 84 | plt.plot(time_history, state_history[:, 0], label="Robot Path") 85 | plt.xlabel("time (s)") 86 | plt.ylabel("position (m)") 87 | plt.title("Double integrator Robot Path") 88 | plt.legend() 89 | plt.grid() 90 | plt.show() 91 | -------------------------------------------------------------------------------- /robotics_algorithm/robot/pendulum.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from matplotlib.animation import FuncAnimation 6 | from typing_extensions import override 7 | 8 | from robotics_algorithm.robot.robot import Robot 9 | 10 | 11 | class Pendulum(Robot): 12 | def __init__(self, dt=0.01): 13 | super().__init__(dt) 14 | 15 | # Constants 16 | self.g = 9.81 # acceleration due to gravity, in m/s^2 17 | self.L = 1.0 # length of the pendulum, in m 18 | 19 | @override 20 | def control(self, state: np.ndarray, action: np.ndarray) -> np.ndarray: 21 | theta, theta_dot = state 22 | theta += np.pi 23 | 24 | theta_dot_dot = -self.g / self.L * math.sin(theta) + action.item() / self.L / self.L 25 | 26 | # Forward euler 27 | new_theta = theta + self.dt * theta_dot - np.pi 28 | new_theta_dot = theta_dot + self.dt * theta_dot_dot 29 | 30 | return np.array([new_theta, new_theta_dot]) 31 | 32 | @override 33 | def linearize_state_transition(self, state, action): 34 | # linearize dynamics around state in discrete time -> x_new = Ax + Bu 35 | 36 | # discrete-time case: x_t+1 = Ax + Bu 37 | theta = state[0] 38 | theta += np.pi 39 | A = np.array([[1, self.dt], [-self.g / self.L * math.cos(theta) * self.dt, 1]]) 40 | B = np.array([0, 1 / (self.L ** 2) * self.dt]).reshape(2, 1) 41 | return A, B 42 | 43 | 44 | if __name__ == "__main__": 45 | env = Pendulum() 46 | 47 | # Time vector 48 | t = np.arange(0, 10, env.dt) # simulate for 10 seconds 49 | 50 | # State vector [theta, omega] 51 | x = np.zeros((len(t), 2)) 52 | x[0] = [0, 0] 53 | 54 | # Control input (constant or time-varying) 55 | u = np.ones(len(t)) * 20 # example: no control input 56 | # u = np.sin(t) # example: sinusoidal control input 57 | 58 | for i in range(1, len(t)): 59 | x[i] = env.control(x[i-1], u[i-1]) 60 | 61 | # Extract theta and omega for plotting 62 | theta = x[:, 0] 63 | omega = x[:, 1] 64 | 65 | # Create an animation of the pendulum 66 | fig, ax = plt.subplots() 67 | ax.set_xlim(-env.L-0.1, env.L+0.1) 68 | ax.set_ylim(-env.L-0.1, env.L+0.1) 69 | ax.set_aspect('equal') 70 | line, = ax.plot([], [], 'o-', lw=2) 71 | 72 | def init(): 73 | line.set_data([], []) 74 | return line, 75 | 76 | def update(frame): 77 | x_pendulum = env.L * np.sin(theta[frame] + np.pi) 78 | y_pendulum = -env.L * np.cos(theta[frame] + np.pi) 79 | line.set_data([0, x_pendulum], [0, y_pendulum]) 80 | return line, 81 | 82 | ani = FuncAnimation(fig, update, frames=len(t), init_func=init, blit=True, interval=env.dt*1000) 83 | plt.show() -------------------------------------------------------------------------------- /robotics_algorithm/robot/planar_quadrotor.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from matplotlib.animation import FuncAnimation 6 | from typing_extensions import override 7 | 8 | from robotics_algorithm.robot.robot import Robot 9 | 10 | 11 | class PlanarQuadrotor(Robot): 12 | def __init__(self, dt=0.01): 13 | """Planar Quadrotor. 14 | 15 | https://arxiv.org/pdf/2106.15134 16 | 17 | State: [x, z, theta, x_dot, z_dot, theta_dot] 18 | Action: [net_thrust, net_moment, g] gravity acceleration needs to be appended to simplify state space equation 19 | 20 | """ 21 | super().__init__(dt) 22 | 23 | # Constants 24 | self.g = 9.81 25 | self.m = 1.0 26 | self.L = 0.4 # length of the , in m 27 | self.I = self.m * self.L * self.L / 12.0 # Using moment of inertia of a thin rod 28 | 29 | @override 30 | def control(self, state: np.ndarray, action: np.ndarray) -> np.ndarray: 31 | x, z, theta, x_vel, z_vel, theta_vel = state 32 | 33 | left_thrust, right_thrust = action 34 | net_thrust = left_thrust + right_thrust 35 | net_moment = self.L / 2.0 * (right_thrust - left_thrust) 36 | 37 | x_accel = -net_thrust * math.sin(theta) / self.m 38 | z_accel = net_thrust * math.cos(theta) / self.m - self.g 39 | theta_accel = net_moment / self.I 40 | 41 | # Forward euler simulation 42 | new_x = x + x_vel * self.dt 43 | new_y = z + z_vel * self.dt 44 | new_theta = theta + theta_vel * self.dt 45 | new_x_vel = x_vel + x_accel * self.dt 46 | new_z_vel = z_vel + z_accel * self.dt 47 | new_theta_vel = theta_vel + theta_accel * self.dt 48 | 49 | return np.array([new_x, new_y, new_theta, new_x_vel, new_z_vel, new_theta_vel]) 50 | 51 | # def control_thrust(self, state, left_thrust, right_thrust): 52 | # net_thrust = left_thrust + right_thrust 53 | # net_moment = self.L / 2.0 * (right_thrust - left_thrust) 54 | 55 | # return self.control(state, np.array([net_thrust, net_moment])) 56 | 57 | @override 58 | def linearize_state_transition(self, state, action): 59 | # linearize dynamics around state in discrete time -> x_new = Ax + Bu 60 | 61 | x, z, theta, x_vel, z_vel, theta_vel = state 62 | u1, u2 = action 63 | 64 | # discrete-time case: x_t+1 = Ax + Bu 65 | A = np.array( 66 | [ 67 | [1, 0, 0, self.dt, 0, 0], 68 | [0, 1, 0, 0, self.dt, 0], 69 | [0, 0, 1, 0, 0, self.dt], 70 | [0, 0, -(u1 + u2) * math.cos(theta) / self.m * self.dt, 1, 0, 0], 71 | [0, 0, (u1 + u2) * math.sin(theta) / self.m * self.dt, 0, 1, 0], 72 | [0, 0, 0, 0, 0, 1], 73 | ] 74 | ) 75 | B = np.array( 76 | [ 77 | [0, 0], 78 | [0, 0], 79 | [0, 0], 80 | [-math.sin(theta) / self.m * self.dt, -math.sin(theta) / self.m * self.dt], 81 | [math.cos(theta) / self.m * self.dt, math.cos(theta) / self.m * self.dt], 82 | [-self.L / (2 * self.I), self.L / (2 * self.I)], 83 | ] 84 | ) 85 | return A, B 86 | 87 | 88 | if __name__ == '__main__': 89 | env = PlanarQuadrotor() 90 | 91 | total_time = 10 92 | 93 | # Initial state [x, z, theta, x_dot, z_dot, theta_dot] 94 | state = np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]) 95 | 96 | # Control inputs: constant thrusts 97 | u1 = 5.0 # Thrust from left rotor (N) 98 | u2 = 5.0 # Thrust from right rotor (N) 99 | 100 | # Simulation variables 101 | trajectory_x = [] 102 | trajectory_z = [] 103 | 104 | # Update function for integration 105 | def update_state(state, u1, u2, dt): 106 | """Update the state using numerical integration (Euler method).""" 107 | new_state = env.control(state, np.array([u1, u2])) 108 | return new_state 109 | 110 | # Set up the figure and animation 111 | fig, ax = plt.subplots(figsize=(8, 6)) 112 | 113 | # Limits for animation 114 | ax.set_xlim(-2, 2) 115 | ax.set_ylim(-1, 3) 116 | 117 | # Quadrotor body and thrusters 118 | (body,) = ax.plot([], [], 'o-', lw=4, markersize=10, label='Quadrotor Body') # Quadrotor body 119 | (trajectory,) = ax.plot([], [], 'b--', lw=1, label='Trajectory') # Trajectory of the center of mass 120 | 121 | # Text for simulation time 122 | time_text = ax.text(0.02, 0.95, '', transform=ax.transAxes) 123 | 124 | # Initialize the quadrotor drawing components 125 | def init(): 126 | body.set_data([], []) 127 | trajectory.set_data([], []) 128 | time_text.set_text('') 129 | return body, trajectory, time_text 130 | 131 | # Animation function 132 | def animate(i): 133 | global state, trajectory_x, trajectory_z 134 | 135 | # Update the state using Euler integration 136 | state = update_state(state, u1, u2, env.dt) 137 | 138 | # Extract positions and angles 139 | x, z, theta, _, _, _ = state 140 | 141 | # Compute quadrotor geometry 142 | rotor1_x = x - env.L * np.cos(theta) 143 | rotor1_z = z - env.L * np.sin(theta) 144 | rotor2_x = x + env.L * np.cos(theta) 145 | rotor2_z = z + env.L * np.sin(theta) 146 | 147 | # Update trajectory 148 | trajectory_x.append(x) 149 | trajectory_z.append(z) 150 | 151 | # Update the quadrotor body and trajectory data 152 | body.set_data([rotor1_x, rotor2_x], [rotor1_z, rotor2_z]) 153 | trajectory.set_data(trajectory_x, trajectory_z) 154 | 155 | # Update simulation time 156 | time_text.set_text(f'Time: {i * env.dt:.2f}s') 157 | 158 | return body, trajectory, time_text 159 | 160 | # Run the animation 161 | ani = FuncAnimation( 162 | fig, animate, frames=int(total_time / env.dt), interval=env.dt * 1000, init_func=init, blit=True 163 | ) 164 | plt.legend() 165 | plt.xlabel('X Position (m)') 166 | plt.ylabel('Z Position (m)') 167 | plt.title('Planar Quadrotor Simulation') 168 | plt.grid() 169 | plt.show() 170 | -------------------------------------------------------------------------------- /robotics_algorithm/robot/robot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Robot: 5 | def __init__(self, dt): 6 | self.dt = dt 7 | 8 | def control(self, state: np.ndarray, action: np.ndarray) -> np.ndarray: 9 | """Simulate robot kinematics/dynamics forward in time to get the next state 10 | 11 | Args: 12 | state (np.ndarray): previous state 13 | action (np.ndarray): action to apply 14 | 15 | Returns: 16 | np.ndarray: next state 17 | """ 18 | raise NotImplementedError() 19 | 20 | def linearize_state_transition(self, state: np.ndarray, action: np.ndarray) -> tuple[np.ndarray, np.ndarray]: 21 | """Linearize dynamics in discrete time so that x_new = A @ x + B @ u 22 | 23 | Args: 24 | state (np.ndarray): previous state 25 | action (np.ndarray): action to apply 26 | 27 | Returns: 28 | tuple[np.ndarray, np.ndarray]: A and B 29 | """ 30 | raise NotImplementedError 31 | -------------------------------------------------------------------------------- /robotics_algorithm/state_estimation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/state_estimation/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/state_estimation/amcl.py: -------------------------------------------------------------------------------- 1 | from typing_extensions import override 2 | import math 3 | import numpy as np 4 | from collections import defaultdict 5 | import random 6 | 7 | from robotics_algorithm.env.base_env import PartiallyObservableEnv 8 | from robotics_algorithm.state_estimation.particle_filter import ParticleFilter 9 | 10 | 11 | class AMCL(ParticleFilter): 12 | def __init__(self, env: PartiallyObservableEnv, min_particles= 500, max_particles: int = 10000): 13 | """Adaptive Monte-Carlo localisation. 14 | 15 | Particle filter with Augmentation and KLD resampling. 16 | Augmentation: Adjust number of random samples based on the measurement likelihood. 17 | KLD resampling: Adjust total number of particles based on KL divergence against true prior. 18 | 19 | Args: 20 | env (PartiallyObservableEnv): environment 21 | min_particles (int, optional): minimum number of samples. Defaults to 500. 22 | max_particles (int, optional): maximum number of samples. Defaults to 10000. 23 | """ 24 | super().__init__(env) 25 | 26 | self.hist_res = 0.1 27 | self.hist = defaultdict(int) 28 | self.min_samples = min_particles 29 | self.max_samples = max_particles 30 | 31 | # For KLD sampling 32 | # ! The statistical bound says: if we choose N number of samples, then we can guarantee that with probability quantile, 33 | # ! the KL-divergence between the MLE and the true distribution is less than epsilon. 34 | self.quantile = 0.99 35 | self.epsilon = 0.05 36 | 37 | # For Augmentation 38 | self.alpha_short_term = 0.1 39 | self.alpha_long_term = 0.001 40 | self.w_short_term = 0 41 | self.w_long_term = 0 42 | 43 | @override 44 | def update(self, observation): 45 | # Add particles until desired number is reached. 46 | """ 47 | @param observation, observation 48 | """ 49 | # calculate particle weights 50 | # - get_observation_prob may return pdf, need to normalize after. 51 | weights = [] 52 | w_avg = 0 53 | for particle in self.particles: 54 | weight = self.env.get_observation_prob(particle, observation) + 1e-300 # avoid round-off to zero 55 | weights.append(weight) 56 | w_avg += 1.0 / len(self.particles) * weight 57 | weights = np.array(weights) 58 | weights = weights / np.sum(weights) # normalize! 59 | 60 | # Calculate average likelihood of measurement over a large and short time periods 61 | # in order to determine whether the current measurement likelihood is too large 62 | self.w_short_term += self.alpha_short_term * (w_avg - self.w_short_term) 63 | self.w_long_term += self.alpha_long_term * (w_avg - self.w_long_term) 64 | 65 | # Augmentation + KLD resampling. 66 | # ! probability of choosing random sample is decided by the divergence between short-term likelihood and long-term likelihood 67 | # ! if short-term likelihood is worse (small), then random sample probability is higher 68 | random_sample_prob = max(0.0, 1.0 - self.w_short_term / self.w_long_term) 69 | # print(w_avg, self.w_short_term, self.w_long_term, random_sample_prob) 70 | particle_indices = np.arange(len(self.particles)) 71 | new_particles = [] 72 | while len(new_particles) < self._KLD_sample_limit(): # ! Keep adding particles until statistical bound is met 73 | x = random.uniform(0, 1) 74 | if x <= random_sample_prob: 75 | new_particle = self.env.random_state() 76 | else: 77 | new_particle_index = np.random.choice(particle_indices, replace=True, p=weights) # Draw 1 sample 78 | new_particle = self.particles[new_particle_index] 79 | new_particles.append(new_particle) 80 | 81 | # check sample falls into which bin, and increment bin count 82 | particle_key = tuple(np.floor(np.array(new_particle) / self.hist_res).astype(np.int32).tolist()) 83 | self.hist[particle_key] += 1 84 | 85 | self.particles = new_particles 86 | self.hist.clear() # ! Statistical bound is calculated every iteration. 87 | 88 | def _KLD_sample_limit(self): 89 | # Compute the required number of samples, given that there are k bins 90 | # with samples in them. This is taken directly from Fox et al. 91 | k = len(self.hist) # How many bins are filled up 92 | if k <= 1: 93 | return self.max_samples 94 | 95 | # ! The statistical bound says: if we choose n number of samples, then we can guarantee that with probability quantile, 96 | # ! the KL-divergence between the MLE and the true distribution is less than epsilon. 97 | a = 1 98 | b = 2 / (9 * (k - 1)) 99 | c = math.sqrt(2 / (9 * (k - 1))) * self.quantile # Upper X quantile of normal distribution 100 | x = a - b + c 101 | n = math.ceil((k - 1) / (2 * self.epsilon) * x * x * x) # epsilon is the threshold of KL divergence. 102 | 103 | if n < self.min_samples: 104 | return self.min_samples 105 | 106 | if n > self.max_samples: 107 | return self.max_samples 108 | 109 | return n 110 | 111 | @property 112 | def num_of_cur_particles(self): 113 | return len(self.particles) -------------------------------------------------------------------------------- /robotics_algorithm/state_estimation/discrete_bayes_filter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.base_env import BaseEnv, SpaceType, EnvType 4 | 5 | 6 | class DiscreteBayesFilter: 7 | """Bayes filter for environment with discrete state and observations. 8 | 9 | Analytically computes bayes update rule by iterating over all possible states and observations. 10 | """ 11 | def __init__(self, env: BaseEnv): 12 | assert env.state_space.type == SpaceType.DISCRETE.value 13 | assert env.observation_space.type == SpaceType.DISCRETE.value 14 | assert env.state_transition_type == EnvType.STOCHASTIC.value 15 | assert env.observability == EnvType.PARTIALLY_OBSERVABLE.value 16 | 17 | self.env = env 18 | self._state_space_size = env.state_space.size 19 | 20 | # Uniformly distributed 21 | self.state_belief = {} 22 | for state in self.env.state_space.get_all(): 23 | self.state_belief[tuple(state)] = 1 / self._state_space_size 24 | 25 | def set_initial_state(self, state: np.ndarray): 26 | """ 27 | Set the initial state of filter. 28 | 29 | @param state, initial state 30 | """ 31 | self.state_belief[tuple(state)] += 0.05 # slightly increase the initial state belief 32 | self._normalize() 33 | 34 | def get_state(self) -> np.ndarray: 35 | # return the state with highest probability 36 | return max(self.state_belief, key=self.state_belief.get) 37 | 38 | def run(self, action: np.ndarray, obs: np.ndarray): 39 | """ 40 | Run one iteration of filter. 41 | 42 | @param action, control 43 | @param obs, observation 44 | """ 45 | self.predict(action) 46 | self.update(obs) 47 | 48 | def predict(self, action: np.ndarray): 49 | """ 50 | @param action, action 51 | """ 52 | new_state_belief = self.state_belief.copy() 53 | 54 | # In discrete case, just add them up. 55 | for state in self.env.state_space.get_all(): 56 | new_states, probs = self.env.state_transition_func(state, action) 57 | for new_state, prob in zip(new_states, probs): 58 | new_state_belief[tuple(new_state)] += prob * self.state_belief[tuple(state)] 59 | 60 | self.state_belief = new_state_belief 61 | 62 | self._normalize() 63 | 64 | def update(self, obs: np.ndarray): 65 | """ 66 | @param obs, current observation. 67 | """ 68 | for state in self.env.state_space.get_all(): 69 | obss, obs_probs = self.env.observation_func(state) 70 | 71 | obs_prob = 0 72 | if obs in obss: 73 | obs_prob = obs_probs[obss.index(obs)] 74 | 75 | self.state_belief[tuple(state)] *= obs_prob 76 | 77 | self._normalize() 78 | 79 | def _normalize(self): 80 | # normalize 81 | total_sum = sum(self.state_belief.values()) 82 | for state in self.state_belief.keys(): 83 | self.state_belief[state] /= total_sum 84 | -------------------------------------------------------------------------------- /robotics_algorithm/state_estimation/extended_kalman_filter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.base_env import BaseEnv, EnvType, FunctionType, DistributionType 4 | 5 | 6 | class ExtendedKalmanFilter: 7 | def __init__(self, env: BaseEnv): 8 | """ 9 | Extended Kalman filter. 10 | 11 | State Transition: X = transition_func(x, u) + sigma 12 | Measurement Z = measurement_func(x) + delta 13 | R: Process covariance matrix from sigma 14 | Q. Measurement covariance matrix from delta 15 | """ 16 | 17 | assert env.state_transition_type == EnvType.STOCHASTIC.value 18 | assert env.observability == EnvType.PARTIALLY_OBSERVABLE.value 19 | assert env.state_transition_func_type == FunctionType.GENERAL.value 20 | assert env.observation_func_type == FunctionType.GENERAL.value 21 | assert env.state_transition_dist_type == DistributionType.GAUSSIAN.value 22 | assert env.observation_dist_type == DistributionType.GAUSSIAN.value 23 | 24 | self.env = env 25 | self.state = np.array(env.state_space.sample()) 26 | self.covariance = np.eye(env.state_space.state_size) 27 | 28 | def set_initial_state(self, state: np.ndarray): 29 | """ 30 | Set the initial state of filter. 31 | 32 | @param state, initial state 33 | """ 34 | self.state = np.array(state) 35 | 36 | def get_state(self) -> np.ndarray: 37 | return self.state 38 | 39 | def run(self, action: np.ndarray, obs: np.ndarray): 40 | """ 41 | Run one iteration of the Kalman filter. 42 | 43 | @param action, control 44 | @param obs, observation 45 | """ 46 | self.predict(action) 47 | self.update(obs) 48 | 49 | def predict(self, action: np.ndarray): 50 | """ 51 | @param control, control 52 | """ 53 | F, _ = self.env.linearize_state_transition(self.state, action) 54 | mean_new_state, _ = self.env.state_transition_func(self.state, action) # non-linear transition_func 55 | new_covariance = F @ self.covariance @ F.transpose() + self.env.state_transition_cov_matrix 56 | 57 | self.state = np.array(mean_new_state) 58 | self.covariance = new_covariance 59 | 60 | def update(self, obs: np.ndarray): 61 | """ 62 | @param obs, observation 63 | """ 64 | obs_np = np.array(obs) 65 | mean_obs, _ = self.env.observation_func(self.state) 66 | mean_obs_np = np.array(mean_obs) 67 | 68 | H = self.env.linearize_observation(self.state, obs) 69 | K = ( 70 | self.covariance 71 | @ H.transpose() 72 | @ np.linalg.inv(H @ self.covariance @ H.transpose() + self.env.obs_cov_matrix) 73 | ) 74 | new_state = self.state + K @ (obs_np - mean_obs_np) 75 | tmp_matrix = K @ H 76 | new_covariance = (np.eye(tmp_matrix.shape[0]) - tmp_matrix) @ self.covariance 77 | 78 | self.state = new_state 79 | self.covariance = new_covariance 80 | -------------------------------------------------------------------------------- /robotics_algorithm/state_estimation/kalman_filter.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | 4 | from robotics_algorithm.env.base_env import BaseEnv, EnvType, FunctionType, DistributionType 5 | 6 | 7 | class KalmanFilter: 8 | def __init__(self, env: BaseEnv): 9 | """ 10 | Kalman filter. 11 | 12 | When environment has linear state transtion and linear observation function with Gaussian noise, bayesian 13 | filter rule can be simplified. 14 | 15 | State Transition: X = AX + BU + sigma 16 | Measurement Z = HX + delta 17 | """ 18 | assert env.state_transition_type == EnvType.STOCHASTIC.value 19 | assert env.observability == EnvType.PARTIALLY_OBSERVABLE.value 20 | assert env.state_transition_func_type == FunctionType.LINEAR.value 21 | assert env.observation_func_type == FunctionType.LINEAR.value 22 | assert env.state_transition_dist_type == DistributionType.GAUSSIAN.value 23 | assert env.observation_dist_type == DistributionType.GAUSSIAN.value 24 | 25 | # state_transition_func: x = Ax + Bu + sigma 26 | self.A, self.B = env.linearize_state_transition(None, None) # As state transition is linear, A and B are constant 27 | self.R = env.state_transition_covariance_matrix 28 | 29 | # measurement function z = Hx + delta 30 | self.H = env.linearize_observation(None, None) # As observation is linear, H are constant 31 | self.Q = env.observation_covariance_matrix 32 | 33 | self.state = np.array(env.cur_state) 34 | self.covariance = np.eye(env.state_space.state_size) 35 | 36 | def set_initial_state(self, state: np.ndarray): 37 | """ 38 | Set the initial state of filter. 39 | @param state, initial state 40 | """ 41 | self.state = np.array(state) 42 | 43 | def get_state(self) -> np.ndarray: 44 | return self.state 45 | 46 | def run(self, action: np.ndarray, obs: np.ndarray): 47 | """ 48 | Run one iteration of the Kalman filter. 49 | 50 | @param action, control 51 | @param obs, observation 52 | """ 53 | self.predict(action) 54 | self.update(obs) 55 | 56 | def predict(self, action: np.ndarray): 57 | """ 58 | Predict the state of the Kalman filter. 59 | 60 | @param action, applied action 61 | """ 62 | new_state = self.A @ self.state + self.B @ action 63 | new_covariance = self.A @ self.covariance @ self.A.transpose() + self.R 64 | 65 | self.state = new_state 66 | self.covariance = new_covariance 67 | 68 | def update(self, obs: np.ndarray): 69 | """ 70 | Update the state of the Kalman filter. 71 | 72 | @param obs, obtained observation. 73 | """ 74 | K = self.covariance @ self.H.transpose() @ np.linalg.inv(self.H @ self.covariance @ self.H.transpose() + self.Q) 75 | new_state = self.state + K @ (obs - self.H @ self.state) 76 | tmp_matrix = K @ self.H 77 | new_covariance = (np.eye(tmp_matrix.shape[0]) - tmp_matrix) @ self.covariance 78 | 79 | self.state = new_state 80 | self.covariance = new_covariance 81 | -------------------------------------------------------------------------------- /robotics_algorithm/state_estimation/particle_filter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from robotics_algorithm.env.base_env import PartiallyObservableEnv, EnvType 4 | 5 | 6 | class ParticleFilter: 7 | def __init__( 8 | self, env: PartiallyObservableEnv, num_of_particles: int = 10000, uniform_particle_ratio: float = 0.05 9 | ): 10 | """Particle filter 11 | 12 | Args: 13 | env (BaseEnv): _description_ 14 | num_of_particles (int, optional): num of particles. Defaults to 10000. 15 | uniform_particle_ratio (float, optional): if set to non-zero, filter will add a small number of random 16 | particles after each iteration to tackle particle deprivation problem. Defaults to 0.05. 17 | """ 18 | 19 | # sanity check 20 | assert env.state_transition_type == EnvType.STOCHASTIC.value 21 | assert env.observability == EnvType.PARTIALLY_OBSERVABLE.value 22 | 23 | self.env = env 24 | 25 | # attributes 26 | self.num_of_particles = num_of_particles 27 | self.num_of_uniform_particles = int(num_of_particles * uniform_particle_ratio) 28 | 29 | self.particles = [] 30 | for _ in range(self.num_of_particles): 31 | particle = self.env.random_state() 32 | self.particles.append(particle) 33 | 34 | def set_initial_state(self, state: np.ndarray): 35 | """ 36 | Set the initial state of filter. 37 | 38 | @param state, initial state 39 | """ 40 | self.state = np.array(state) 41 | for i in range(self.num_of_uniform_particles, self.num_of_particles): 42 | self.particles[i] = state 43 | 44 | def get_state(self) -> np.ndarray: 45 | # simply average 46 | mean_state = np.mean(np.array(self.particles), axis=0) 47 | return mean_state 48 | 49 | def run(self, action: np.ndarray, observation: np.ndarray): 50 | """ 51 | Run one iteration of the filter. 52 | 53 | @param action, control 54 | @param observation, observation 55 | """ 56 | self.predict(action) 57 | self.update(observation) 58 | 59 | def predict(self, action): 60 | """ 61 | @param action, action 62 | """ 63 | # sample new particles according to state transition function 64 | new_particles = [] 65 | for particle in self.particles: 66 | # NOTE: Unlike kalman filter which assumes a Gaussian distribution, here we sample state transition instead 67 | # calling state_transition_function which returns the mean of the new state. 68 | new_particle = self.env.sample_state_transition(particle, action)[0] 69 | new_particles.append(new_particle) 70 | 71 | self.particles = new_particles 72 | 73 | def update(self, observation): 74 | """ 75 | @param observation, observation 76 | """ 77 | # calculate particle weights 78 | # - get_observation_prob may return pdf, need to normalize after. 79 | weights = [] 80 | for particle in self.particles: 81 | weight = self.env.get_observation_prob(particle, observation) + 1e-300 # avoid round-off to zero 82 | weights.append(weight) 83 | weights = np.array(weights) 84 | weights = weights / np.sum(weights) # normalize! 85 | 86 | # importance sampling, sample with replacement according to weights 87 | particle_indices = np.arange(self.num_of_particles) 88 | new_particle_indices = np.random.choice(particle_indices, size=self.num_of_particles, replace=True, p=weights) 89 | new_particles = np.array(self.particles)[new_particle_indices].tolist() 90 | 91 | # Add random particles if required 92 | for i in range(self.num_of_uniform_particles): 93 | particle = self.env.random_state() 94 | new_particles[i] = particle 95 | 96 | self.particles = new_particles 97 | -------------------------------------------------------------------------------- /robotics_algorithm/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/utils/__init__.py -------------------------------------------------------------------------------- /robotics_algorithm/utils/math_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # import math 4 | 5 | 6 | def normalize_angle(angle): 7 | angle = (angle + 6 * np.pi) % (2 * np.pi) # normalize theta to [-pi, pi] 8 | if angle > np.pi: 9 | angle = angle - 2 * np.pi 10 | 11 | return angle 12 | 13 | 14 | def smooth(scalars: list[float], weight: float = 0.5) -> list[float]: # Weight between 0 and 1 15 | """ 16 | Smooth a list of scalars by applying exponential smoothing with a given weight. 17 | 18 | Args: 19 | scalars: List of scalars to be smoothed. 20 | weight: Weight between 0 and 1 for exponential smoothing. 21 | 22 | Return: 23 | List of smoothed values. 24 | """ 25 | last = scalars[0] # First value in the plot (first timestep) 26 | smoothed = list() 27 | for point in scalars: 28 | smoothed_val = last * weight + (1 - weight) * point # Calculate smoothed value 29 | smoothed.append(smoothed_val) # Save it 30 | last = smoothed_val # Anchor the last smoothed value 31 | 32 | return smoothed 33 | -------------------------------------------------------------------------------- /robotics_algorithm/utils/mdp_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def make_greedy_policy(Q, num_actions): 5 | """ 6 | Creates an greedy policy based on a given Q-function. 7 | 8 | Args: 9 | Q: A dictionary that maps from state -> action-values. 10 | Each value is a numpy array of length nA (see below) 11 | num_action: Number of actions in the environment. 12 | 13 | Returns: 14 | A function that takes the observation as an argument and returns the action 15 | """ 16 | 17 | def policy_fn(state: np.ndarray): 18 | action_prob = np.zeros(num_actions) 19 | state_key = tuple(state.tolist()) 20 | best_action_idx = np.argmax(Q[state_key], axis=-1) 21 | action_prob[best_action_idx] = 1.0 22 | return action_prob 23 | 24 | return policy_fn 25 | -------------------------------------------------------------------------------- /robotics_algorithm/utils/models/attention.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class MultiHeadAttention(nn.Module): 9 | def __init__(self, embed_dim=512, n_heads=8): 10 | """ 11 | Args: 12 | embed_dim: dimension of embeding vector output 13 | n_heads: number of self attention heads 14 | """ 15 | super(MultiHeadAttention, self).__init__() 16 | 17 | self.embed_dim = embed_dim # 512 dim 18 | self.n_heads = n_heads # 8 19 | self.single_head_dim = int(self.embed_dim / self.n_heads) # 512/8 = 64. each key,query, value will be of 64d 20 | 21 | # key,query and value matrixes #64 x 64 22 | self.query_matrix = nn.Linear( 23 | self.single_head_dim, self.single_head_dim, bias=False 24 | ) # single key matrix for all 8 keys #512x512 25 | self.key_matrix = nn.Linear(self.single_head_dim, self.single_head_dim, bias=False) 26 | self.value_matrix = nn.Linear(self.single_head_dim, self.single_head_dim, bias=False) 27 | self.out = nn.Linear(self.n_heads * self.single_head_dim, self.embed_dim) 28 | 29 | def forward(self, key, query, value, mask=None): # batch_size x sequence_length x embedding_dim # 32 x 10 x 512 30 | """ 31 | Args: 32 | key : key vector 33 | query : query vector 34 | value : value vector 35 | mask: mask for decoder 36 | 37 | Returns: 38 | output vector from multihead attention 39 | """ 40 | batch_size = key.size(0) 41 | seq_length = key.size(1) 42 | 43 | # query dimension can change in decoder during inference. 44 | # so we cant take general seq_length 45 | seq_length_query = query.size(1) 46 | 47 | # 32x10x512 48 | key = key.view( 49 | batch_size, seq_length, self.n_heads, self.single_head_dim 50 | ) # batch_size x sequence_length x n_heads x single_head_dim = (32x10x8x64) 51 | query = query.view(batch_size, seq_length_query, self.n_heads, self.single_head_dim) # (32x10x8x64) 52 | value = value.view(batch_size, seq_length, self.n_heads, self.single_head_dim) # (32x10x8x64) 53 | 54 | k = self.key_matrix(key) # (32x10x8x64) 55 | q = self.query_matrix(query) 56 | v = self.value_matrix(value) 57 | 58 | q = q.transpose(1, 2) # (batch_size, n_heads, seq_len, single_head_dim) # (32 x 8 x 10 x 64) 59 | k = k.transpose(1, 2) # (batch_size, n_heads, seq_len, single_head_dim) 60 | v = v.transpose(1, 2) # (batch_size, n_heads, seq_len, single_head_dim) 61 | 62 | # computes attention 63 | # adjust key for matrix multiplication 64 | k_adjusted = k.transpose(-1, -2) # (batch_size, n_heads, single_head_dim, seq_ken) #(32 x 8 x 64 x 10) 65 | product = torch.matmul(q, k_adjusted) # (32 x 8 x 10 x 64) x (32 x 8 x 64 x 10) = #(32x8x10x10) 66 | 67 | # fill those positions of product matrix as (-1e20) where mask positions are 0 68 | if mask is not None: 69 | product = product.masked_fill(mask == 0, float("-1e20")) 70 | 71 | # divising by square root of key dimension 72 | product = product / math.sqrt(self.single_head_dim) # / sqrt(64) 73 | 74 | # applying softmax 75 | scores = F.softmax(product, dim=-1) 76 | 77 | # mutiply with value matrix 78 | scores = torch.matmul(scores, v) ##(32x8x 10x 10) x (32 x 8 x 10 x 64) = (32 x 8 x 10 x 64) 79 | 80 | # concatenated output 81 | concat = ( 82 | scores.transpose(1, 2).contiguous().view(batch_size, seq_length_query, self.single_head_dim * self.n_heads) 83 | ) # (32x8x10x64) -> (32x10x8x64) -> (32,10,512) 84 | 85 | output = self.out(concat) # (32,10,512) -> (32,10,512) 86 | 87 | return output 88 | -------------------------------------------------------------------------------- /robotics_algorithm/utils/tree.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | 3 | 4 | class TreeNode: 5 | def __init__(self, nx_key, tree: nx.DiGraph): 6 | self.nx_key = nx_key 7 | 8 | self.children = [] 9 | self.parent = None 10 | 11 | self._tree = tree 12 | 13 | @property 14 | def attr(self): 15 | return self._tree.nodes[self.nx_key] 16 | 17 | def transition_attr(self, child): 18 | return self._tree.edges[self.nx_key, child.nx_key] 19 | 20 | def add_child(self, child, **kwargs): 21 | self._tree.add_edge(self.nx_key, child.nx_key, **kwargs) 22 | self.children.append(child) 23 | child.parent = self 24 | 25 | def delete_child(self, child): 26 | self._tree.remove_node(child.nx_key) 27 | self.children.remove(child) 28 | 29 | 30 | class Tree: 31 | def __init__(self): 32 | self._num_node = 0 33 | self._tree = nx.DiGraph() 34 | 35 | def add_node(self, **kwargs) -> TreeNode: 36 | # n_node = TreeNode(value) 37 | nx_key = self._tree.number_of_nodes() 38 | self._tree.add_node(nx_key, **kwargs) 39 | return TreeNode(nx_key, self._tree) 40 | 41 | def delete_node(self, node: TreeNode): 42 | def _delete_recur(n): 43 | for child in n.children: 44 | _delete_recur(child) 45 | 46 | self._tree.remove_node(n.nx_key) 47 | 48 | assert node.nx_key in self._tree.nodes 49 | 50 | # Delete subtree rooted at n 51 | _delete_recur(node) 52 | 53 | def add_child(self, parent: TreeNode, child: TreeNode, **kwargs): 54 | return parent.add_child(child, **kwargs) 55 | 56 | def delete_child( 57 | self, 58 | parent: TreeNode, 59 | child: TreeNode, 60 | ): 61 | return parent.delete_child(child) 62 | --------------------------------------------------------------------------------