├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── doc
    ├── TIAGo_05.jpg
    └── logo.jpg
├── examples
    ├── control
    │   ├── classical_control
    │   │   └── test_pid.py
    │   ├── optimal_control
    │   │   ├── test_lqr_double_integrator.py
    │   │   ├── test_lqr_inverted_pendulum.py
    │   │   ├── test_lqr_planar_quadrotor.py
    │   │   ├── test_mpc_inverted_pendulum.py
    │   │   ├── test_mpc_planar_quadrotor.py
    │   │   └── test_mppi_cartpole.py
    │   └── path_follow_control
    │   │   ├── example_path.json
    │   │   ├── test_cem_path_follow.py
    │   │   ├── test_dwa_path_follow.py
    │   │   ├── test_lqr_path_follow.py
    │   │   ├── test_mpc_path_follow.py
    │   │   └── test_mppi_path_follow.py
    ├── learning
    │   ├── test_mc_control_on_policy.py
    │   ├── test_q_learning.py
    │   └── test_sarsa.py
    ├── planning
    │   ├── mdp
    │   │   ├── test_mcts.py
    │   │   ├── test_policy_iteration.py
    │   │   ├── test_policy_tree_search.py
    │   │   └── test_value_iteration.py
    │   ├── motion_planning
    │   │   ├── test_prm.py
    │   │   ├── test_rrt.py
    │   │   ├── test_rrt_connect.py
    │   │   └── test_rrt_star.py
    │   └── path_planning
    │   │   ├── test_a_star.py
    │   │   ├── test_dijkstra.py
    │   │   └── test_hybrid_a_star.py
    ├── realisitic_examples
    │   └── cost_aware_diff_drive_path_planning
    │   │   ├── cost_aware_diff_drive_path_planning.py
    │   │   ├── path_with_cost_penalty.png
    │   │   └── path_without_cost_penalty.png
    └── state_estimation
    │   ├── test_amcl.py
    │   ├── test_discrete_bayes_filter.py
    │   ├── test_extended_kalman_filter.py
    │   ├── test_kalman_filter.py
    │   └── test_particle_filter.py
├── pyproject.toml
└── robotics_algorithm
    ├── __init__.py
    ├── control
        ├── __init__.py
        ├── classical_control
        │   ├── __init__.py
        │   └── pid.py
        ├── optimal_control
        │   ├── __init__.py
        │   ├── cem_mpc.py
        │   ├── convex_mpc.py
        │   ├── lqr.py
        │   └── mppi.py
        └── path_follow_control
        │   ├── __init__.py
        │   └── dwa.py
    ├── env
        ├── __init__.py
        ├── base_env.py
        ├── cartpole_balance.py
        ├── classic_mdp
        │   ├── cliff_walking.py
        │   ├── frozen_lake.py
        │   └── windy_grid_world.py
        ├── continuous_1d
        │   └── double_integrator_env.py
        ├── continuous_2d
        │   ├── __init__.py
        │   ├── diff_drive_2d_control.py
        │   ├── diff_drive_2d_env_base.py
        │   ├── diff_drive_2d_localization.py
        │   ├── diff_drive_2d_planning.py
        │   └── omni_2d_planning.py
        ├── discrete_world_1d.py
        ├── discrete_world_2d.py
        ├── inverted_pendulum.py
        └── planar_quadrotor_hover.py
    ├── learning
        ├── __init__.py
        └── reinforcement_learning
        │   ├── __init__.py
        │   ├── mc_control_on_policy.py
        │   ├── q_learning.py
        │   └── sarsa.py
    ├── planning
        ├── __init__.py
        ├── mdp
        │   ├── __init__.py
        │   ├── mcts.py
        │   ├── policy_iteration.py
        │   ├── policy_tree_search.py
        │   └── value_iteration.py
        ├── motion_planning
        │   ├── __init__.py
        │   ├── prm.py
        │   ├── rrt.py
        │   └── rrt_star.py
        └── path_planning
        │   ├── __init__.py
        │   ├── a_star.py
        │   ├── dijkstra.py
        │   └── hybrid_a_star.py
    ├── robot
        ├── __init__.py
        ├── cartpole.py
        ├── differential_drive.py
        ├── double_integrator.py
        ├── pendulum.py
        ├── planar_quadrotor.py
        └── robot.py
    ├── state_estimation
        ├── __init__.py
        ├── amcl.py
        ├── discrete_bayes_filter.py
        ├── extended_kalman_filter.py
        ├── kalman_filter.py
        └── particle_filter.py
    └── utils
        ├── __init__.py
        ├── math_utils.py
        ├── mdp_utils.py
        ├── models
            └── attention.py
        └── tree.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.egg-info
3 | 
4 | .vscode/


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## [v0.11.2]
 4 | 
 5 | ### Added
 6 | 
 7 | - Added DWA
 8 | 
 9 | ## [v0.11.1]
10 | 
11 | ### Added
12 | 
13 | - Added planar quadrotor
14 | - Added LQR and convex MPC example for planar quadrotor
15 | 
16 | ### Changed
17 | 
18 | - Changed all state, action, observation to be numpy array by default (cont).
19 | - Separated is_state_terminal() from get_state_info().
20 | 
21 | ## [v0.11.0]
22 | 
23 | ### Added
24 | 
25 | - Added convex MPC.
26 | - Added inverted pendulum environment.
27 | - Added a robot parent class for different robots.
28 | - Added LQR and convex MPC for path follow and inverted pendulum.
29 | 
30 | ### Changed
31 | 
32 | - Refactored folder structure to better group different components.
33 | - Changed all state, action, observation to be numpy array by default.
34 | 
35 | ### Fixed
36 | 
37 | - Fixed diff_drive_2d_planning traversal cost calculation so euclidean distance is properly underestimating true cost.
38 | 
39 | ## [v0.10.0]
40 | 
41 | ### Added
42 | 
43 | - Added cost-aware differential drive path planning
44 | 
45 | ### Fixed
46 | 
47 | - Fixed a bug in Hybrid A-star
48 | 
49 | ### Changed
50 | 
51 | - Split continuous 2d env into separate classes for planning, control and localization, each with its own file.
52 | 
53 | ## [v0.9.1]
54 | 
55 | ### Added
56 | 
57 | - Added changelog
58 | 
59 | ### Changed
60 | 
61 | - Minor comment and code change
62 | 
63 | ### Removed
64 | 
65 | - Trademark sign previously shown after the project description in version
66 |   0.3.0
67 | 
68 | ## [v0.9.0]
69 | 
70 | ### Added
71 | 
72 | - Added AMCL
73 | 
74 | ## [0.8.3]
75 | 
76 | ### Added
77 | 
78 | - Added RL tabular methods
79 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 lyfkyle
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # robotics_algorithms
  2 | 
  3 | ![logo](/doc/TIAGo_05.jpg "logo")
  4 | 
  5 | This repository contains my pure-python implementation of essential algorithms that make a mobile manipulator (and other robots) move.
  6 | 
  7 | While it is true that a lot of algorithms have been implemented by other projects, this repo serves these main benefits:
  8 | 
  9 | 1. It achieves a good balance of width and depth. It covers a wide range of topics, while focusing on only key algorithms in each field.
 10 | 2. It is implemented with modular structure that cleanly separates algorithms from problems (like OMPL), at the same time emphasizing connection between different algorithms. For example, the design reflects that planing under uncertainties, optimal control and RL share the same underlying MDP formulation.
 11 | 3. Serves as a single source of truth of various algorithms so that I no longer need to search all over the Internet.
 12 | 
 13 | ## Scope
 14 | 
 15 | It should include popular and representative algorithms from robot dynamics, state estimation, planning, control and
 16 | learning.
 17 | 
 18 | ## Requirement
 19 | 
 20 | python 3.10
 21 | 
 22 | ## How to use
 23 | 
 24 | - Run `pip install -e .`
 25 | - Run various scripts inside examples folder.
 26 | 
 27 | For example, to run a\* to find the shortest path between start and goal in a grid world
 28 | 
 29 | ```python
 30 | python examples/planning/path_planning/test_a_star.py
 31 | ```
 32 | 
 33 | ## News
 34 | 
 35 | - 26/03/2025: Added DWA (v0.11.2)
 36 | - 20/02/2025: Added LQR and convex MPC for planar quadrotor (v0.11.1).
 37 | - 17/02/2025: Added convex MPC, inverted pendulum and more path following examples (v0.11.0).
 38 | - 09/02/2025: Added cost-aware differential drive path planning (v0.10.0).
 39 | 
 40 | ## Status
 41 | 
 42 | This repository is undergoing significant development. Here is the status checklist.
 43 | 
 44 | Algorithms
 45 | 
 46 | - [ ] Robot Kinematic and Dynamics
 47 |   - [x] Differential drive
 48 |   - [x] Cartpole
 49 |   - [x] Double Integrator
 50 |   - [x] Inverted Pendulum
 51 |   - [ ] Arm
 52 |     - [ ] FK and IK
 53 |   - [ ] Car
 54 |   - [x] Planar Quadrotor
 55 |   - [ ] Quadrotor
 56 |   - [ ] Quadruped
 57 | - [x] State Estimation
 58 |   - [x] Localizaion
 59 |     - [x] Discrete bayes filter
 60 |     - [x] Kalman filter
 61 |     - [x] Extended Kalman filter
 62 |     - [x] Particle filter (MCL)
 63 |     - [x] AMCL
 64 |   - [ ] SLAM
 65 |     - [ ] EKF SLAM
 66 |     - [ ] Fast SLAM
 67 |     - [ ] Graph-based SLAM
 68 | - [ ] Planning
 69 |   - [x] Path Planning
 70 |     - [x] Dijkstra
 71 |     - [x] A-star
 72 |     - [x] Hybrid A-star
 73 |   - [ ] Motion Planning
 74 |     - [x] PRM
 75 |     - [x] RRT / RRT-Connect
 76 |     - [x] RRT\*
 77 |     - [ ] RRT\*-Connect
 78 |     - [ ] Informed RRT\*
 79 |     - [ ] BIT\*
 80 |   - [x] MDP
 81 |     - [x] Value iteration
 82 |     - [x] policy iteration
 83 |     - [x] Policy tree search
 84 |     - [x] MCTS
 85 |   - [ ] POMDP
 86 |     - [ ] Belief tree search
 87 |     - [ ] SARSOP
 88 |     - [ ] DESPOT
 89 | - [ ] Control
 90 |   - [x] Classical control
 91 |     - [x] PID
 92 |   - [ ] Optimal Control
 93 |     - [x] LQR
 94 |     - [x] MPPI
 95 |     - [x] CEM-MPC
 96 |     - [x] Convex-MPC
 97 |   - [ ] Trajectory optimization
 98 |     - [ ] iLQR
 99 |   - [ ] Domain-specific Path Follow Control
100 |     - [ ] Regulated Pure Pursuit
101 |     - [x] Dynamic Window Approach
102 |     - [ ] Time-elastic Band
103 | - [ ] Imitation learning
104 |   - [ ] ACT
105 |   - [ ] Diffusion-policy
106 | - [ ] Reinforcement learning
107 |   - [ ] Tabular
108 |     - [x] On-policy MC
109 |     - [ ] Off-policy MC
110 |     - [x] On-policy TD (SARSA)
111 |     - [x] Off-policy TD (Q-learning)
112 |   - [ ] Function approximation
113 | - [ ] Environments
114 |   - [x] Frozen lake (MDP)
115 |   - [x] Cliff walking (MDP)
116 |   - [x] Windy gridworld (MDP)
117 |   - [x] 1D navigation with double integrator
118 |     - [x] Deterministic and fully-observable
119 |     - [x] Stochastic and partially-observable
120 |   - [x] 2D navigation with omni-directional robot
121 |     - [x] Deterministic and fully-observable
122 |   - [x] 2D navigation with differential drive
123 |     - [x] Deterministic and fully-observable
124 |     - [x] Stochastic and partially-observable
125 |     - [x] With obstacle-distance cost
126 |   - [x] 2D localization
127 |   - [ ] 2D SLAM
128 |   - [ ] Multi-arm bandits (POMDP)
129 |   - [ ] Tiger (POMDP)
130 | 
131 | In addition to the algorithm itself, also implement several realistic robotics problems that often require additional
132 | domain-specific components and strategies.
133 | 
134 | - [x] Path planning for differential drive robot using Hybrid A Star with original heuristics and cost weighted distance measure.
135 | 
136 | ## Known issues
137 | 
138 | - [ ] EKF gives high localisation error at some instances.
139 | - [ ] MCTS is not stable.
140 | - [ ] Recursive feasibility in convex MPC
141 | 


--------------------------------------------------------------------------------
/doc/TIAGo_05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/doc/TIAGo_05.jpg


--------------------------------------------------------------------------------
/doc/logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/doc/logo.jpg


--------------------------------------------------------------------------------
/examples/control/classical_control/test_pid.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from robotics_algorithm.env.cartpole_balance import CartPoleEnv
 4 | from robotics_algorithm.control.classical_control.pid import PID
 5 | 
 6 | # Initialize environment
 7 | # NOTE: we can use PID for cartpole because when it is near the upright position, the system behaves like LTI system.
 8 | #       refer to https://ctms.engin.umich.edu/CTMS/index.php?example=InvertedPendulum&section=SystemModeling for
 9 | #       the transfer function
10 | # Here we only control theta
11 | env = CartPoleEnv()
12 | env.reset()
13 | 
14 | # These gains are tuned using Ziegler–Nichols method
15 | controller = PID(env, goal_state=env.goal_state[2], Kp=16.0, Kd=1.8)  # we only control theta
16 | 
17 | # run controller
18 | state = env.cur_state
19 | while True:
20 |     action = controller.run(state[2])  # we only control theta
21 |     action *= -1  # post-process action, needed because action and state are not in the same space.
22 | 
23 |     next_state, reward, term, trunc, info = env.step(action)
24 |     print(state, action, next_state, reward)
25 | 
26 |     env.render()
27 |     state = next_state
28 | 
29 |     if term or trunc:
30 |         break
31 | 


--------------------------------------------------------------------------------
/examples/control/optimal_control/test_lqr_double_integrator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from robotics_algorithm.env.continuous_1d.double_integrator_env import DoubleIntegratorEnv
 4 | from robotics_algorithm.control.optimal_control.lqr import LQR
 5 | 
 6 | # Test 1
 7 | # discrete time model, solve dare using scipy
 8 | env = DoubleIntegratorEnv(observation_noise_std=0, state_transition_noise_std=0)
 9 | env.reset()
10 | print('cur_state: ', env.start_state)
11 | env.render()
12 | 
13 | # initialize controller
14 | controller = LQR(env, discrete_time=True)
15 | 
16 | # run controller
17 | state = env.start_state
18 | path = [state]
19 | while True:
20 |     action = controller.run(state)
21 |     next_state, reward, term, trunc, info = env.step(action)
22 | 
23 |     print(state, action, next_state)
24 | 
25 |     path.append(next_state)
26 |     state = next_state
27 | 
28 |     if term or trunc:
29 |         break
30 | 
31 | env.add_path(path)
32 | env.render()
33 | 
34 | # Test 2
35 | # Solve dare by iteration for finite-horizon case
36 | env = DoubleIntegratorEnv(observation_noise_std=0, state_transition_noise_std=0)
37 | 
38 | env.reset()
39 | print('cur_state: ', env.start_state)
40 | env.render()
41 | 
42 | # initialize controller
43 | controller = LQR(env, discrete_time=True, horizon=2000, solve_by_iteration=True)
44 | 
45 | # run controller
46 | state = env.start_state
47 | path = [state]
48 | while True:
49 |     action = controller.run(state)
50 |     next_state, reward, term, trunc, info = env.step(action)
51 | 
52 |     print(state, action, next_state)
53 | 
54 |     path.append(next_state)
55 |     state = next_state
56 | 
57 |     if term or trunc:
58 |         break
59 | 
60 | env.add_path(path)
61 | env.render()
62 | 


--------------------------------------------------------------------------------
/examples/control/optimal_control/test_lqr_inverted_pendulum.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from robotics_algorithm.env.inverted_pendulum import InvertedPendulumEnv
 4 | from robotics_algorithm.control.optimal_control.lqr import LQR
 5 | 
 6 | # discrete time model
 7 | env = InvertedPendulumEnv()
 8 | env.reset()
 9 | print('cur_state: ', env.cur_state)
10 | env.render()
11 | 
12 | # initialize controller
13 | controller = LQR(env, discrete_time=True)
14 | 
15 | # run controller
16 | state = env.cur_state
17 | path = [state]
18 | while True:
19 |     action = controller.run(state)
20 |     next_state, reward, term, trunc, info = env.step(action)
21 | 
22 |     print(state, action, next_state, reward, term, trunc)
23 |     env.render()
24 | 
25 |     state = next_state
26 | 
27 |     if term or trunc:
28 |         break
29 | 


--------------------------------------------------------------------------------
/examples/control/optimal_control/test_lqr_planar_quadrotor.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from robotics_algorithm.env.planar_quadrotor_hover import PlanarQuadrotorHoverEnv
 4 | from robotics_algorithm.control.optimal_control.lqr import LQR
 5 | 
 6 | # discrete time model
 7 | env = PlanarQuadrotorHoverEnv(
 8 |     hover_pos=0.5, hover_height=1.0, quadratic_reward=True, term_if_constraints_violated=False
 9 | )  # ! LQR can't deal with constraints
10 | env.reset()
11 | print('cur_state: ', env.cur_state)
12 | env.render()
13 | 
14 | # ! As LQR can't deal with constraints, it is best to set theta cost to be extremely large so that the quadcopter
15 | # ! stays horizontal to minimize linearization error.
16 | env.Q = np.diag([10, 10, 1000, 1, 1, 10])  # Set theta cost to be extremely large
17 | 
18 | # initialize controller
19 | controller = LQR(env, discrete_time=True)
20 | 
21 | # run controller
22 | state = env.cur_state
23 | path = [state]
24 | while True:
25 |     state_error = state - env.goal_state
26 |     action = controller.run(state_error)
27 |     action = env.goal_action + action
28 | 
29 |     next_state, reward, term, trunc, info = env.step(action)
30 | 
31 |     print(state, action, next_state, reward, term, trunc)
32 | 
33 |     # ! It is expected for LQR to print constraint violations warnings here.
34 |     if info['constraint_violated']:
35 |         print('WARNING: constraints are violated!!!!!!!!!!!!!!!!')
36 | 
37 |     env.render()
38 | 
39 |     state = next_state
40 | 
41 |     if term or trunc:
42 |         break
43 | 


--------------------------------------------------------------------------------
/examples/control/optimal_control/test_mpc_inverted_pendulum.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from robotics_algorithm.env.inverted_pendulum import InvertedPendulumEnv
 4 | from robotics_algorithm.control.optimal_control.convex_mpc import ConvexMPC
 5 | 
 6 | # discrete time model
 7 | env = InvertedPendulumEnv()
 8 | env.reset()
 9 | print('cur_state: ', env.cur_state)
10 | env.render()
11 | 
12 | # initialize controller
13 | controller = ConvexMPC(env, horizon=50)
14 | 
15 | # run controller
16 | state = env.cur_state
17 | path = [state]
18 | while True:
19 |     action = controller.run(state)
20 |     next_state, reward, term, trunc, info = env.step(action)
21 | 
22 |     print(state, action, next_state, reward, term, trunc)
23 |     env.render()
24 | 
25 |     state = next_state
26 | 
27 |     if term or trunc:
28 |         break
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/control/optimal_control/test_mpc_planar_quadrotor.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from robotics_algorithm.env.planar_quadrotor_hover import PlanarQuadrotorHoverEnv
 4 | from robotics_algorithm.control.optimal_control.convex_mpc import ConvexMPC
 5 | 
 6 | # discrete time model
 7 | env = PlanarQuadrotorHoverEnv(
 8 |     hover_pos=0.25, hover_height=1.0, quadratic_reward=True, term_if_constraints_violated=True
 9 | )
10 | env.reset()
11 | print('cur_state: ', env.cur_state)
12 | env.render()
13 | 
14 | # initialize controller
15 | controller = ConvexMPC(env, horizon=20)
16 | 
17 | # ! Add additional theta constraint so that linearization is always valid and thrust constraints are satisfied.
18 | # ! Compared to LQR, we don't have to manually increase theta cost in Q any more.
19 | x, u = controller.get_decision_variables()
20 | state_low = env.state_space.low - env.goal_state + 1e-2
21 | state_high = env.state_space.high - env.goal_state - 1e-2
22 | action_low = env.action_space.low - env.goal_action + 1e-2
23 | action_high = env.action_space.high - env.goal_action - 1e-2
24 | constr = []
25 | for t in range(controller.horizon + 1):
26 |     constr += [x[t] <= state_high, x[t] >= state_low]
27 |     constr += [u[t] <= action_high, u[t] >= action_low]
28 | 
29 |     constr += [
30 |         x[t, 2] <= 0.174533,  # ~10 degree
31 |         x[t, 2] >= -0.174533,  # ~10 degree
32 |     ]  # We make theta tighter so that linearized dynamics is more valid
33 | 
34 | controller.add_constraints(constr)
35 | 
36 | # run controller
37 | state = env.cur_state
38 | path = [state]
39 | while True:
40 |     state_error = state - env.goal_state
41 |     action_error = controller.run(state_error)
42 |     action = env.goal_action + action_error
43 | 
44 |     next_state, reward, term, trunc, info = env.step(action)
45 | 
46 |     print(state, action, next_state, reward, term, trunc)
47 | 
48 |     env.render()
49 | 
50 |     state = next_state
51 | 
52 |     if term or trunc:
53 |         break
54 | 


--------------------------------------------------------------------------------
/examples/control/optimal_control/test_mppi_cartpole.py:
--------------------------------------------------------------------------------
 1 | from robotics_algorithm.env.cartpole_balance import CartPoleEnv
 2 | from robotics_algorithm.control.optimal_control.mppi import MPPI
 3 | 
 4 | # Initialize environment
 5 | env = CartPoleEnv()
 6 | env.reset()
 7 | 
 8 | controller = MPPI(env, action_mean=[0], action_std=[5.0])
 9 | 
10 | # run controller
11 | state = env.cur_state
12 | while True:
13 |     action = controller.run(state)
14 | 
15 |     next_state, reward, term, trunc, info = env.step(action)
16 |     print(state, action, next_state, reward)
17 | 
18 |     env.render()
19 |     state = next_state
20 | 
21 |     if term or trunc:
22 |         break
23 | 


--------------------------------------------------------------------------------
/examples/control/path_follow_control/test_cem_path_follow.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import math
 3 | import os.path as osp
 4 | 
 5 | from robotics_algorithm.control.optimal_control.cem_mpc import CEMMPC
 6 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_control import DiffDrive2DControl
 7 | 
 8 | CUR_DIR = osp.join(osp.dirname(osp.abspath(__file__)))
 9 | 
10 | # This path is computed using Hybrid A*
11 | PATH_DT = 0.1
12 | with open(osp.join(CUR_DIR, 'example_path.json'), 'r') as f:
13 |     shortest_path = json.load(f)
14 | 
15 | # Initialize environment
16 | env = DiffDrive2DControl(use_lookahead=False)
17 | env.reset(shortest_path, empty=False)
18 | 
19 | controller = CEMMPC(env, action_mean=[0.25, 0], action_std=[0.25, math.radians(30)])
20 | 
21 | # debug
22 | env.interactive_viz = True
23 | 
24 | # run controller
25 | state = env.start_state
26 | path = [state]
27 | while True:
28 |     action = controller.run(state)
29 | 
30 |     # visualize local plan
31 |     local_plan = [state]
32 |     best_actions = controller.best_traj  # debug
33 |     new_state = env.sample_state_transition(state, action)[0]
34 |     for future_action in best_actions:
35 |         new_state = env.sample_state_transition(new_state, future_action)[0]
36 |         local_plan.append(new_state)
37 |     env.set_local_plan(local_plan)
38 | 
39 |     next_state, reward, term, trunc, info = env.step(action)
40 |     print(state, action, next_state, reward)
41 | 
42 |     # nearest_idx = env._get_nearest_waypoint_to_state(next_state)
43 |     # print(shortest_path[nearest_idx])
44 | 
45 |     env.render()
46 | 
47 |     path.append(next_state)
48 |     state = next_state
49 | 
50 |     if term or trunc:
51 |         break
52 | 


--------------------------------------------------------------------------------
/examples/control/path_follow_control/test_dwa_path_follow.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import math
 3 | import os.path as osp
 4 | 
 5 | from robotics_algorithm.control.path_follow_control.dwa import DWA
 6 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_control import DiffDrive2DControl
 7 | 
 8 | CUR_DIR = osp.join(osp.dirname(osp.abspath(__file__)))
 9 | 
10 | # This path is computed using Hybrid A*
11 | PATH_DT = 0.1
12 | with open(osp.join(CUR_DIR, 'example_path.json'), 'r') as f:
13 |     shortest_path = json.load(f)
14 | 
15 | # Initialize environment
16 | env = DiffDrive2DControl()
17 | env.reset(shortest_path, empty=False)
18 | 
19 | controller = DWA(env)
20 | 
21 | # debug
22 | env.interactive_viz = True
23 | 
24 | # run controller
25 | state = env.start_state
26 | path = [state]
27 | while True:
28 |     action = controller.run(state)
29 | 
30 |     # visualize local plan
31 |     local_plan = controller.best_traj  # debug
32 |     env.set_local_plan(local_plan)
33 | 
34 |     next_state, reward, term, trunc, info = env.step(action)
35 |     print(state, action, next_state, reward)
36 | 
37 |     # nearest_idx = env._get_nearest_waypoint_to_state(next_state)
38 |     # print(shortest_path[nearest_idx])
39 | 
40 |     env.render()
41 | 
42 |     path.append(next_state)
43 |     state = next_state
44 | 
45 |     if term or trunc:
46 |         break
47 | 


--------------------------------------------------------------------------------
/examples/control/path_follow_control/test_lqr_path_follow.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os.path as osp
 3 | import numpy as np
 4 | 
 5 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_control import DiffDrive2DControl
 6 | from robotics_algorithm.control.optimal_control.lqr import LQR
 7 | 
 8 | CUR_DIR = osp.join(osp.dirname(osp.abspath(__file__)))
 9 | 
10 | # This path is computed using Hybrid A*
11 | PATH_DT = 0.1
12 | with open(osp.join(CUR_DIR, 'example_path.json'), 'r') as f:
13 |     shortest_path = json.load(f)
14 | env = DiffDrive2DControl(lookahead_index=5, has_kinematics_constraint=False)  # ! LQR can't deal with constraints
15 | env.reset(shortest_path, empty=False)
16 | env.interactive_viz = True
17 | 
18 | # initialize controller
19 | controller = LQR(env, horizon=100, discrete_time=True, solve_by_iteration=True)
20 | 
21 | state = env.cur_state
22 | # ! Here we set reference action to be zero velocity, implying that each time LQR is trying to bring robot to stop
23 | # ! at the lookahead state. Optionally, if planned path contains velocity information, reference action can be read
24 | # ! from the path.
25 | ref_action = np.zeros(env.action_space.state_size)
26 | env.render()
27 | 
28 | # Following reference path falls into category of iterative time-varying local trajectory stabilization.
29 | # In this case, we iteratively use LQR to stabilize around a reference state that is changing as the robot moves.
30 | # In each iteration, LQR is solved with linearized dynamics around current reference state and reference action.
31 | while True:
32 |     # Set current reference state and action to obtain new linearized dynamics
33 |     cur_ref_pose = env.get_cur_lookahead_state()
34 |     controller.set_ref_state_action(cur_ref_pose, ref_action)
35 | 
36 |     state_error = env.cur_state - cur_ref_pose
37 |     action_error = controller.run(state_error)
38 |     action = ref_action + action_error
39 | 
40 |     next_state, reward, term, trunc, info = env.step(action)
41 | 
42 |     print(state, action, next_state, reward, term, trunc)
43 |     env.render()
44 | 
45 |     state = next_state
46 | 
47 |     if term or trunc:
48 |         break
49 | 


--------------------------------------------------------------------------------
/examples/control/path_follow_control/test_mpc_path_follow.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os.path as osp
 3 | import numpy as np
 4 | 
 5 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_control import DiffDrive2DControl
 6 | from robotics_algorithm.control.optimal_control.convex_mpc import ConvexMPC
 7 | 
 8 | CUR_DIR = osp.join(osp.dirname(osp.abspath(__file__)))
 9 | 
10 | PATH_DT = 0.1
11 | with open(osp.join(CUR_DIR, 'example_path.json'), 'r') as f:
12 |     shortest_path = json.load(f)
13 | env = DiffDrive2DControl()
14 | env.reset(shortest_path, empty=False)
15 | env.interactive_viz = True
16 | env.render()
17 | 
18 | # initialize controller
19 | controller = ConvexMPC(env, horizon=20)
20 | # TODO scipy.solve_dare fails in path follow case. We solve via DP with finite horizon
21 | controller._lqr.horizon=500
22 | controller._lqr.solve_by_iteration=True
23 | 
24 | state = env.cur_state
25 | ref_action = np.zeros(env.action_space.state_size)
26 | 
27 | 
28 | # run controller
29 | while True:
30 |     cur_ref_pose = env.get_cur_lookahead_state()
31 |     controller.set_ref_state_action(cur_ref_pose, ref_action)
32 | 
33 |     state_error = env.cur_state - cur_ref_pose
34 |     action_error = controller.run(state_error)
35 |     action = ref_action + action_error
36 | 
37 |     next_state, reward, term, trunc, info = env.step(action)
38 | 
39 |     print(state, action, next_state, reward, term, trunc)
40 |     env.render()
41 | 
42 |     state = next_state
43 | 
44 |     if term or trunc:
45 |         break


--------------------------------------------------------------------------------
/examples/control/path_follow_control/test_mppi_path_follow.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import json
 3 | import os.path as osp
 4 | 
 5 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_control import DiffDrive2DControl
 6 | from robotics_algorithm.control.optimal_control.mppi import MPPI
 7 | 
 8 | CUR_DIR = osp.join(osp.dirname(osp.abspath(__file__)))
 9 | 
10 | # This path is computed using Hybrid A*
11 | PATH_DT = 0.1
12 | with open(osp.join(CUR_DIR, 'example_path.json'), 'r') as f:
13 |     shortest_path = json.load(f)
14 | 
15 | # Initialize environment
16 | env = DiffDrive2DControl(use_lookahead=False)
17 | env.reset(shortest_path, empty=False)
18 | env.interactive_viz = True
19 | 
20 | # Init controller
21 | controller = MPPI(env, action_mean=[0.25, 0], action_std=[0.25, math.radians(30)])
22 | 
23 | # run controller
24 | state = env.start_state
25 | path = [state]
26 | while True:
27 |     action = controller.run(state)
28 | 
29 |     # visualize local plan
30 |     local_plan = [state]
31 |     best_actions = controller.prev_actions  # debug
32 |     new_state = env.sample_state_transition(state, action)[0]
33 |     for future_action in best_actions:
34 |         new_state = env.sample_state_transition(new_state, future_action)[0]
35 |         local_plan.append(new_state)
36 |     env.set_local_plan(local_plan)
37 | 
38 |     next_state, reward, term, trunc, info = env.step(action)
39 |     print(state, action, next_state, reward)
40 | 
41 |     # nearest_idx = env._get_nearest_waypoint_to_state(next_state)
42 |     # print(shortest_path[nearest_idx])
43 | 
44 |     env.render()
45 | 
46 |     path.append(next_state)
47 |     state = next_state
48 | 
49 |     if term or trunc:
50 |         break
51 | 


--------------------------------------------------------------------------------
/examples/learning/test_mc_control_on_policy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from robotics_algorithm.env.classic_mdp.frozen_lake import FrozenLake
 5 | from robotics_algorithm.learning.reinforcement_learning.mc_control_on_policy import MCControlOnPolicy
 6 | from robotics_algorithm.utils.math_utils import smooth
 7 | from robotics_algorithm.utils.mdp_utils import make_greedy_policy
 8 | 
 9 | env = FrozenLake(dense_reward=True)
10 | state, _ = env.reset()
11 | env.render()
12 | 
13 | # Plan
14 | learner = MCControlOnPolicy(env)
15 | Q, _ = learner.run(num_episodes=10000)
16 | # Plot cumulative rewards over episodes
17 | episodes, learning_curve = learner.get_learning_curve()
18 | plt.plot(episodes, smooth(learning_curve, weight=0.95), label="mc")
19 | plt.ylabel("episode_reward")
20 | plt.xlabel("episodes")
21 | plt.show()
22 | print(Q)
23 | 
24 | # Execute
25 | policy = make_greedy_policy(Q, env.action_space.size)  # after learning, switch to use greedy policy
26 | state, _ = env.reset()
27 | path = []
28 | all_actions = env.action_space.get_all()
29 | while True:
30 |     # choose action according to epsilon-greedy policy
31 |     action_probs = policy(state)
32 |     action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs)  # choose action
33 |     action = all_actions[action_idx]
34 |     next_state, reward, term, trunc, info = env.step(action)
35 | 
36 |     print(state, action, next_state, reward)
37 |     env.render()
38 | 
39 |     path.append(state)
40 |     state = next_state
41 | 
42 |     if term or trunc:
43 |         break
44 | 
45 | env.render()


--------------------------------------------------------------------------------
/examples/learning/test_q_learning.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from robotics_algorithm.env.classic_mdp.frozen_lake import FrozenLake
 5 | from robotics_algorithm.learning.reinforcement_learning.q_learning import QLearning
 6 | from robotics_algorithm.utils.math_utils import smooth
 7 | from robotics_algorithm.utils.mdp_utils import make_greedy_policy
 8 | 
 9 | env = FrozenLake(dense_reward=True)
10 | state, _ = env.reset()
11 | env.render()
12 | 
13 | # Plan
14 | learner = QLearning(env)
15 | Q, _ = learner.run(num_episodes=10000)
16 | # Plot cumulative rewards over episodes
17 | episodes, learning_curve = learner.get_learning_curve()
18 | plt.plot(episodes, smooth(learning_curve, weight=0.95), label='mc')
19 | plt.ylabel('episode_reward')
20 | plt.xlabel('episodes')
21 | plt.show()
22 | print(Q)
23 | 
24 | # Execute
25 | policy = make_greedy_policy(Q, env.action_space.size)  # after learning, switch to use greedy policy
26 | state, _ = env.reset()
27 | path = []
28 | all_actions = env.action_space.get_all()
29 | while True:
30 |     # choose action according to epsilon-greedy policy
31 |     action_probs = policy(state)
32 |     action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs)  # choose action
33 |     action = all_actions[action_idx]
34 |     next_state, reward, term, trunc, info = env.step(action)
35 | 
36 |     print(state, action, next_state, reward)
37 |     env.render()
38 | 
39 |     path.append(state)
40 |     state = next_state
41 | 
42 |     if term or trunc:
43 |         break
44 | 
45 | env.render()
46 | 


--------------------------------------------------------------------------------
/examples/learning/test_sarsa.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from robotics_algorithm.env.classic_mdp.frozen_lake import FrozenLake
 5 | from robotics_algorithm.learning.reinforcement_learning.sarsa import SARSA
 6 | from robotics_algorithm.utils.math_utils import smooth
 7 | from robotics_algorithm.utils.mdp_utils import make_greedy_policy
 8 | 
 9 | env = FrozenLake(dense_reward=True)
10 | state, _ = env.reset()
11 | env.render()
12 | 
13 | # Plan
14 | learner = SARSA(env)
15 | Q, _ = learner.run(num_episodes=10000)
16 | # Plot cumulative rewards over episodes
17 | episodes, learning_curve = learner.get_learning_curve()
18 | plt.plot(episodes, smooth(learning_curve, weight=0.95), label='mc')
19 | plt.ylabel('episode_reward')
20 | plt.xlabel('episodes')
21 | plt.show()
22 | print(Q)
23 | 
24 | # Execute
25 | policy = make_greedy_policy(Q, env.action_space.size)  # after learning, switch to use greedy policy
26 | state, _ = env.reset()
27 | path = []
28 | all_actions = env.action_space.get_all()
29 | while True:
30 |     # choose action according to epsilon-greedy policy
31 |     action_probs = policy(state)
32 |     action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs)  # choose action
33 |     action = all_actions[action_idx]
34 |     next_state, reward, term, trunc, info = env.step(action)
35 | 
36 |     print(state, action, next_state, reward)
37 |     env.render()
38 | 
39 |     path.append(state)
40 |     state = next_state
41 | 
42 |     if term or trunc:
43 |         break
44 | 
45 | env.render()
46 | 


--------------------------------------------------------------------------------
/examples/planning/mdp/test_mcts.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from robotics_algorithm.env.classic_mdp.frozen_lake import FrozenLake
 4 | from robotics_algorithm.env.classic_mdp.cliff_walking import CliffWalking
 5 | from robotics_algorithm.planning.mdp.mcts import MCTS
 6 | 
 7 | env = FrozenLake(dense_reward=True)  # For online tree search, dense reward needs to be enabled.
 8 | # env = CliffWalking(dense_reward=True)  # For online tree search, dense reward needs to be enabled.
 9 | state, _ = env.reset()
10 | env.render()
11 | 
12 | planner = MCTS(env)
13 | 
14 | # Execute
15 | path = []
16 | while True:
17 |     # Call tree search online
18 |     action = planner.run(state)
19 |     new_state, reward, term, trunc, info = env.step(action)
20 | 
21 |     print(state, action, new_state, reward)
22 | 
23 |     env.render()
24 | 
25 |     path.append(state)
26 |     state = new_state
27 | 
28 |     if term or trunc:
29 |         break
30 | 
31 | # env.add_path(path)
32 | env.render()
33 | 


--------------------------------------------------------------------------------
/examples/planning/mdp/test_policy_iteration.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from robotics_algorithm.env.classic_mdp.windy_grid_world import WindyGridWorld
 4 | from robotics_algorithm.env.classic_mdp.cliff_walking import CliffWalking
 5 | from robotics_algorithm.env.classic_mdp.frozen_lake import FrozenLake
 6 | from robotics_algorithm.planning.mdp.policy_iteration import PolicyIteration
 7 | 
 8 | envs = [
 9 |     FrozenLake(dense_reward=True),
10 |     CliffWalking(),
11 |     WindyGridWorld()
12 | ]
13 | 
14 | for env in envs:
15 |     state, _ = env.reset()
16 |     env.render()
17 | 
18 |     # Plan
19 |     planner = PolicyIteration(env)
20 |     Q, policy = planner.run()
21 | 
22 |     print(Q)
23 | 
24 |     # Execute
25 |     path = []
26 |     all_actions = env.action_space.get_all()
27 |     while True:
28 |         # choose action according to epsilon-greedy policy
29 |         action_probs = policy(state)
30 |         action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs)  # choose action
31 |         action = all_actions[action_idx]
32 | 
33 |         next_state, reward, term, trunc, info = env.step(action)
34 |         env.render()
35 | 
36 |         print(state, action, reward)
37 | 
38 |         path.append(state)
39 |         state = next_state
40 | 
41 |         if term or trunc:
42 |             break
43 | 
44 |     # env.add_path(path)
45 |     env.render()


--------------------------------------------------------------------------------
/examples/planning/mdp/test_policy_tree_search.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from robotics_algorithm.env.classic_mdp.frozen_lake import FrozenLake
 4 | from robotics_algorithm.planning.mdp.policy_tree_search import PolicyTreeSearch
 5 | 
 6 | 
 7 | env = FrozenLake(dense_reward=True)
 8 | state, _ = env.reset()
 9 | env.render()
10 | 
11 | # Plan
12 | planner = PolicyTreeSearch(env, max_depth=5)
13 | 
14 | # Execute
15 | path = []
16 | while True:
17 |     # choose action according to the computed policy
18 |     action = planner.run(state)
19 |     next_state, reward, term, trunc, info = env.step(action)
20 |     env.render()
21 | 
22 |     print(state, action, next_state, reward)
23 | 
24 |     path.append(state)
25 |     state = next_state
26 | 
27 |     if term or trunc:
28 |         break
29 | 
30 | # env.add_path(path)
31 | env.render()
32 | 


--------------------------------------------------------------------------------
/examples/planning/mdp/test_value_iteration.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from robotics_algorithm.env.classic_mdp.windy_grid_world import WindyGridWorld
 4 | from robotics_algorithm.env.classic_mdp.cliff_walking import CliffWalking
 5 | from robotics_algorithm.env.classic_mdp.frozen_lake import FrozenLake
 6 | from robotics_algorithm.planning.mdp.value_iteration import ValueIteration
 7 | 
 8 | envs = [FrozenLake(dense_reward=True), CliffWalking(), WindyGridWorld()]
 9 | 
10 | for env in envs:
11 |     state, _ = env.reset()
12 |     env.render()
13 | 
14 |     # Plan
15 |     planner = ValueIteration(env)
16 |     Q, policy = planner.run()
17 | 
18 |     print(Q)
19 | 
20 |     # Execute
21 |     path = []
22 |     all_actions = env.action_space.get_all()
23 |     while True:
24 |         # choose action according to epsilon-greedy policy
25 |         action_probs = policy(state)
26 |         action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs)  # choose action
27 |         action = all_actions[action_idx]
28 | 
29 |         next_state, reward, term, trunc, info = env.step(action)
30 |         env.render()
31 | 
32 |         print(state, action, reward)
33 | 
34 |         path.append(state)
35 |         state = next_state
36 | 
37 |         if term or trunc:
38 |             break
39 | 
40 |     # env.add_path(path)
41 |     env.render()
42 | 


--------------------------------------------------------------------------------
/examples/planning/motion_planning/test_prm.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import numpy as np
 4 | 
 5 | from robotics_algorithm.env.base_env import BaseEnv
 6 | from robotics_algorithm.env.continuous_2d.omni_2d_planning import OmniDrive2DPlanning
 7 | from robotics_algorithm.planning.motion_planning.prm import ProbabilisticRoadmap
 8 | 
 9 | # Initialize environment
10 | env = OmniDrive2DPlanning()
11 | 
12 | # -------- Settings ------------
13 | FIX_MAZE = True
14 | 
15 | 
16 | # -------- Helper Functions -------------
17 | def sample_func(env: BaseEnv):
18 |     random_state = env.random_state()
19 |     return random_state
20 | 
21 | 
22 | def is_state_valid(env: BaseEnv, state: np.ndarray):
23 |     return env.is_state_valid(state)
24 | 
25 | 
26 | def is_edge_valid(env: BaseEnv, state1: np.ndarray, state2: np.ndarray):
27 |     return env.is_state_transition_valid(state1, None, state2)
28 | 
29 | 
30 | # -------- Main Code ----------
31 | # add default obstacles
32 | env.reset(random_env=not FIX_MAZE)
33 | env.render()
34 | 
35 | # initialize planner
36 | planner = ProbabilisticRoadmap(
37 |     env, sample_func, is_state_valid, is_edge_valid, num_of_samples=100, num_neighbors=10
38 | )  # 1000 samples out of total 2500 vertex.
39 | 
40 | # offline portion of PRM
41 | start_time = time.time()
42 | planner.compute_roadmap()
43 | end_time = time.time()
44 | print('TestPRM, offline takes {} seconds'.format(end_time - start_time))
45 | 
46 | # run path planner
47 | start_time = time.time()
48 | start = env.start_state
49 | goal = env.goal_state
50 | res, shortest_path, shortest_path_len = planner.get_path(start, goal)
51 | end_time = time.time()
52 | print('TestPRM, online takes {} seconds'.format(end_time - start_time))
53 | 
54 | # visualize roadmap
55 | roadmap = planner.get_roadmap()
56 | for state in roadmap.nodes:
57 |     if state != tuple(goal.tolist()) and state != tuple(start.tolist()):
58 |         env.add_state_samples(state)
59 | 
60 | if not res:
61 |     print('TestPRM, no path is available!')
62 | else:
63 |     # visualize path
64 |     env.add_state_path(shortest_path, interpolate=True)
65 |     print('TestPRM, found path of len {}'.format(shortest_path_len))
66 | 
67 | env.render()
68 | 


--------------------------------------------------------------------------------
/examples/planning/motion_planning/test_rrt.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np
 3 | 
 4 | from robotics_algorithm.env.base_env import BaseEnv
 5 | from robotics_algorithm.env.continuous_2d.omni_2d_planning import OmniDrive2DPlanning
 6 | from robotics_algorithm.planning.motion_planning.rrt import RRT
 7 | 
 8 | # Initialize environment
 9 | env = OmniDrive2DPlanning()
10 | 
11 | # -------- Settings ------------
12 | FIX_MAZE = True
13 | 
14 | 
15 | # -------- Helper Functions -------------
16 | def sample_func(env: BaseEnv):
17 |     random_state = env.random_state()
18 |     return random_state
19 | 
20 | 
21 | def is_state_valid(env: BaseEnv, state: np.ndarray):
22 |     return env.is_state_valid(state)
23 | 
24 | 
25 | def is_edge_valid(env: BaseEnv, state1: np.ndarray, state2: np.ndarray):
26 |     return env.is_state_transition_valid(state1, None, state2)
27 | 
28 | 
29 | def vertex_expand_func(env: BaseEnv, state1: np.ndarray, state2: np.ndarray):
30 |     path = env.extend(np.array(state1), np.array(state2))  # TODO refactor.
31 |     path_len = np.linalg.norm(np.array(path[-1]) - np.array(state1))
32 |     return path[-1], path_len
33 | 
34 | 
35 | # -------- Main Code ----------
36 | # add default obstacles
37 | env.reset(random_env=not FIX_MAZE)
38 | env.render()
39 | 
40 | # initialize planner
41 | planner = RRT(env, sample_func, vertex_expand_func, num_of_samples=500)
42 | 
43 | # run path planner
44 | start = env.start_state
45 | goal = env.goal_state
46 | start_time = time.time()
47 | res, shortest_path, shortest_path_len = planner.run(start, goal)
48 | end_time = time.time()
49 | print('TestRRT, online takes {} seconds'.format(end_time - start_time))
50 | 
51 | # visualize tree
52 | tree = planner.get_tree()
53 | for state in tree.nodes:
54 |     if state != tuple(goal.tolist()) and state != tuple(start.tolist()):
55 |         env.add_state_samples(state)
56 | 
57 | if not res:
58 |     print('TestRRT, no path is available!')
59 | else:
60 |     # visualize path
61 |     env.add_state_path(shortest_path, interpolate=True)
62 |     print('TestRRT, found path of len {}'.format(shortest_path_len))
63 | 
64 | env.render()
65 | 


--------------------------------------------------------------------------------
/examples/planning/motion_planning/test_rrt_connect.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np
 3 | 
 4 | from robotics_algorithm.env.base_env import BaseEnv
 5 | from robotics_algorithm.env.continuous_2d.omni_2d_planning import OmniDrive2DPlanning
 6 | from robotics_algorithm.planning.motion_planning.rrt import RRTConnect
 7 | 
 8 | # Initialize environment
 9 | env = OmniDrive2DPlanning()
10 | 
11 | # -------- Settings ------------
12 | FIX_MAZE = True
13 | 
14 | 
15 | # -------- Helper Functions -------------
16 | def sample_func(env: BaseEnv):
17 |     random_state = env.random_state()
18 |     return random_state
19 | 
20 | 
21 | def is_state_valid(env: BaseEnv, state: np.ndarray):
22 |     return env.is_state_valid(state)
23 | 
24 | 
25 | def is_edge_valid(env: BaseEnv, state1: np.ndarray, state2: np.ndarray):
26 |     return env.is_state_transition_valid(state1, None, state2)
27 | 
28 | 
29 | def vertex_expand_func(env: BaseEnv, state1: np.ndarray, state2: np.ndarray):
30 |     path = env.extend(np.array(state1), np.array(state2))  # TODO refactor.
31 |     path_len = np.linalg.norm(np.array(path[-1]) - np.array(state1))
32 |     return path[-1], path_len
33 | 
34 | 
35 | # -------- Main Code ----------
36 | # add default obstacles
37 | env.reset(random_env=not FIX_MAZE)
38 | env.render()
39 | 
40 | # initialize planner
41 | planner = RRTConnect(env, sample_func, vertex_expand_func, num_of_samples=500)
42 | 
43 | # run path planner
44 | start = env.start_state
45 | goal = env.goal_state
46 | start_time = time.time()
47 | res, shortest_path, shortest_path_len = planner.run(start, goal)
48 | end_time = time.time()
49 | print('TestRRT, online takes {} seconds'.format(end_time - start_time))
50 | 
51 | # visualize tree
52 | tree = planner.get_tree()
53 | for state in tree.nodes:
54 |     if state != tuple(goal.tolist()) and state != tuple(start.tolist()):
55 |         env.add_state_samples(state)
56 | 
57 | if not res:
58 |     print('TestRRT, no path is available!')
59 | else:
60 |     # visualize path
61 |     env.add_state_path(shortest_path, interpolate=True)
62 |     print('TestRRT, found path of len {}'.format(shortest_path_len))
63 | 
64 | env.render()
65 | 


--------------------------------------------------------------------------------
/examples/planning/motion_planning/test_rrt_star.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np
 3 | 
 4 | from robotics_algorithm.env.base_env import BaseEnv
 5 | from robotics_algorithm.env.continuous_2d.omni_2d_planning import OmniDrive2DPlanning
 6 | from robotics_algorithm.planning.motion_planning.rrt_star import RRTStar
 7 | 
 8 | # Initialize environment
 9 | env = OmniDrive2DPlanning()
10 | 
11 | # -------- Settings ------------
12 | FIX_MAZE = True
13 | 
14 | 
15 | # -------- Helper Functions -------------
16 | def sample_func(env: BaseEnv):
17 |     random_state = env.random_state()
18 |     return random_state
19 | 
20 | 
21 | def is_state_valid(env: BaseEnv, state: np.ndarray):
22 |     return env.is_state_valid(state)
23 | 
24 | 
25 | def is_edge_valid(env: BaseEnv, state1: np.ndarray, state2: np.ndarray):
26 |     return env.is_state_transition_valid(state1, None, state2)
27 | 
28 | 
29 | def vertex_expand_func(env: BaseEnv, state1: np.ndarray, state2: np.ndarray):
30 |     path = env.extend(np.array(state1), np.array(state2))  # TODO refactor.
31 |     path_len = np.linalg.norm(np.array(path[-1]) - np.array(state1))
32 |     return path[-1], path_len
33 | 
34 | 
35 | def distance_func(env, state1, state2):
36 |     return np.linalg.norm(np.array(state1) - np.array(state2))
37 | 
38 | 
39 | # -------- Main Code ----------
40 | # add default obstacles
41 | env.reset(random_env=not FIX_MAZE)
42 | env.render()
43 | 
44 | # initialize planner
45 | planner = RRTStar(env, sample_func, vertex_expand_func, is_edge_valid, distance_func, num_of_samples=1000)
46 | 
47 | # run path planner
48 | start = env.start_state
49 | goal = env.goal_state
50 | start_time = time.time()
51 | res, shortest_path, shortest_path_len = planner.run(start, goal)
52 | end_time = time.time()
53 | print('TestRRTStar, online takes {} seconds'.format(end_time - start_time))
54 | 
55 | # visualize tree
56 | tree = planner.get_tree()
57 | for state in tree.nodes:
58 |     if state != tuple(goal.tolist()) and state != tuple(start.tolist()):
59 |         env.add_state_samples(state)
60 | 
61 | if not res:
62 |     print('TestRRTStar, no path is available!')
63 | else:
64 |     # visualize path
65 |     env.add_state_path(shortest_path, interpolate=True)
66 |     print('TestRRTStar, found path of len {}'.format(shortest_path_len))
67 | 
68 | env.render()
69 | 


--------------------------------------------------------------------------------
/examples/planning/path_planning/test_a_star.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import math
 3 | import numpy as np
 4 | 
 5 | from robotics_algorithm.env.discrete_world_2d import GridWorldMaze
 6 | from robotics_algorithm.planning.path_planning.a_star import AStar
 7 | 
 8 | # Initialize environment
 9 | env = GridWorldMaze()
10 | 
11 | # -------- Settings ------------
12 | FIX_MAZE = True
13 | 
14 | 
15 | # -------- Helper Functions -------------
16 | def heuristic_func(v, goal):
17 |     # simply the distance between v and goal
18 |     v_x, v_y = v
19 |     goal_x, goal_y = goal
20 |     return math.sqrt((goal_x - v_x) ** 2 + (goal_y - v_y) ** 2)
21 | 
22 | 
23 | def state_key_func(state: np.ndarray):
24 |     return tuple(state.tolist())
25 | 
26 | 
27 | # -------- Main Code ----------
28 | 
29 | # add random obstacle to environment
30 | env.reset(random_env=not FIX_MAZE)
31 | env.render()
32 | 
33 | 
34 | # initialize planner
35 | planner = AStar(env, heuristic_func, state_key_func)
36 | 
37 | # run path planner
38 | start = env.start_state
39 | goal = env.goal_state
40 | start_time = time.time()
41 | res, shortest_path, shortest_path_len = planner.run(start, goal)
42 | end_time = time.time()
43 | print("TestAStar, takes {} seconds".format(end_time - start_time))
44 | 
45 | if not res:
46 |     print("TestAStar, no path is available!")
47 | else:
48 |     print("TestAStar, found path of len {}".format(shortest_path_len))
49 |     # visualize path
50 |     env.add_path(shortest_path)
51 | 
52 | env.render()
53 | 


--------------------------------------------------------------------------------
/examples/planning/path_planning/test_dijkstra.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import numpy as np
 4 | 
 5 | from robotics_algorithm.env.discrete_world_2d import GridWorldMaze
 6 | from robotics_algorithm.planning.path_planning.dijkstra import Dijkstra
 7 | 
 8 | # Initialize environment
 9 | env = GridWorldMaze()
10 | 
11 | # -------- Settings ------------
12 | FIX_MAZE = True
13 | 
14 | 
15 | # -------- Helper Functions -------------
16 | def state_key_func(state: np.ndarray):
17 |     return tuple(state.tolist())
18 | 
19 | 
20 | # -------- Main Code ----------
21 | 
22 | # add random obstacle to environment
23 | env.reset(random_env=not FIX_MAZE)
24 | env.render()
25 | 
26 | # initialize planner
27 | planner = Dijkstra(env, state_key_func)
28 | 
29 | # run path planner
30 | start = env.start_state
31 | goal = env.goal_state
32 | start_time = time.time()
33 | res, shortest_path, shortest_path_len = planner.run(start, goal)
34 | end_time = time.time()
35 | print("TestDijkstra, takes {} seconds".format(end_time - start_time))
36 | 
37 | if not res:
38 |     print("TestDijkstra, no path is available!")
39 | else:
40 |     print("TestDijkstra, found path of len {}".format(shortest_path_len))
41 |     # visualize path
42 |     env.add_path(shortest_path)
43 | 
44 | env.render()
45 | 


--------------------------------------------------------------------------------
/examples/planning/path_planning/test_hybrid_a_star.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import time
 3 | 
 4 | import numpy as np
 5 | 
 6 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_planning import DiffDrive2DPlanning
 7 | from robotics_algorithm.planning.path_planning.hybrid_a_star import HybridAStar
 8 | 
 9 | # Initialize environment
10 | env = DiffDrive2DPlanning(discrete_action=True)  # use discrete action for Hybrid A*
11 | 
12 | # -------- Settings ------------
13 | FIX_MAZE = True
14 | 
15 | 
16 | # -------- Helper Functions -------------
17 | def heuristic_func(state, goal):
18 |     # simply the Euclidean distance between v and goal, which is an underestimate of the actual SE2 distance
19 |     v_x, v_y, _ = state
20 |     goal_x, goal_y, _ = goal
21 |     return math.sqrt((goal_x - v_x) ** 2 + (goal_y - v_y) ** 2)
22 | 
23 | 
24 | def state_key_func(state: np.ndarray):
25 |     return env.calc_state_key(state)
26 | 
27 | 
28 | # -------- Main Code ----------
29 | env.reset(random_env=not FIX_MAZE)
30 | start = env.start_state
31 | goal = env.goal_state
32 | env.render()
33 | 
34 | # initialize planner
35 | planner = HybridAStar(env, heuristic_func, state_key_func)
36 | 
37 | # run path planner
38 | start_time = time.time()
39 | res, shortest_path, shortest_path_len = planner.run(start, goal)
40 | end_time = time.time()
41 | print('TestHybridAStar, takes {} seconds'.format(end_time - start_time))
42 | 
43 | if not res:
44 |     print('TestHybridAStar, no path is available!')
45 | else:
46 |     print('TestHybridAStar, found path of len {}'.format(shortest_path_len))
47 |     # visualize path
48 |     path = []
49 |     for v in shortest_path:
50 |         path.append(v)
51 | 
52 |     env.add_action_path(path)
53 | 
54 | env.render()
55 | 


--------------------------------------------------------------------------------
/examples/realisitic_examples/cost_aware_diff_drive_path_planning/path_with_cost_penalty.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/examples/realisitic_examples/cost_aware_diff_drive_path_planning/path_with_cost_penalty.png


--------------------------------------------------------------------------------
/examples/realisitic_examples/cost_aware_diff_drive_path_planning/path_without_cost_penalty.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/examples/realisitic_examples/cost_aware_diff_drive_path_planning/path_without_cost_penalty.png


--------------------------------------------------------------------------------
/examples/state_estimation/test_amcl.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_localization import DiffDrive2DLocalization
 5 | from robotics_algorithm.state_estimation.amcl import AMCL
 6 | 
 7 | 
 8 | def spiral_velocity(spiral_radius, spiral_growth_rate, time, linear_velocity):
 9 |     """
10 |     Calculate the linear velocity and angular velocity of a differential robot to drive in a spiral.
11 | 
12 |     Args:
13 |         spiral_radius (float): The initial radius of the spiral.
14 |         spiral_growth_rate (float): The rate at which the spiral's radius grows.
15 |         time (float): The current time.
16 |         linear_velocity (float): The desired linear velocity of the robot.
17 | 
18 |     Returns:
19 |         tuple: The linear velocity and angular velocity of the robot.
20 |     """
21 |     # Calculate the current radius of the spiral
22 |     current_radius = spiral_radius + spiral_growth_rate * time
23 | 
24 |     # Calculate the angular velocity
25 |     angular_velocity = linear_velocity / current_radius
26 | 
27 |     return linear_velocity, angular_velocity
28 | 
29 | 
30 | # Initialize environment
31 | env = DiffDrive2DLocalization(action_dt=0.1, obs_noise_std=[0.1, 0.1, 0.1])
32 | obs, _ = env.reset(empty=True)
33 | 
34 | # Manually clamp env start state so that robot does not move outside of env when doing the spiral
35 | start_state = env.cur_state.copy()
36 | start_state[0] = min(max(env.size / 4.0, start_state[0]), env.size / 4.0 * 3.0)
37 | start_state[1] = min(max(env.size / 4.0, start_state[1]), env.size / 4.0 * 3.0)
38 | env.start_state = start_state
39 | env.cur_state = start_state
40 | obs = start_state
41 | env.render()
42 | 
43 | # Initialize filter
44 | filter = AMCL(env, min_particles=50, max_particles=500)
45 | print(filter.num_of_cur_particles)
46 | # filter.set_initial_state(env.cur_state)
47 | 
48 | # Add initial state
49 | # Step env with random actions
50 | true_states = []
51 | filter_states = []
52 | obss = []
53 | true_states.append(env.cur_state)
54 | filter_states.append(filter.get_state())
55 | obss.append(obs)
56 | 
57 | max_steps = 500
58 | num_particles = [filter.num_of_cur_particles]
59 | for i in range(max_steps):
60 |     print(f'step: {i}/{max_steps}')
61 |     # action = [random.uniform(0.0, 0.5), random.uniform(0, 0.5)]
62 |     action = spiral_velocity(1.0, 0.01, i * env.action_dt, 0.2)
63 |     new_obs, reward, term, trunc, info = env.step(action)
64 | 
65 |     filter.run(action, new_obs)
66 | 
67 |     true_states.append(env.cur_state)
68 |     filter_states.append(filter.get_state())
69 |     obss.append(new_obs)
70 |     num_particles.append(filter.num_of_cur_particles)
71 | 
72 |     if term or trunc:
73 |         break
74 | 
75 | # print(true_states)
76 | # print(filter_states)
77 | 
78 | # calculate RMSE
79 | true_states = np.array(true_states)
80 | filter_states = np.array(filter_states)
81 | 
82 | rmse = np.sqrt(np.mean((true_states - filter_states) ** 2, axis=0))
83 | print('RMSE: {}'.format(rmse))
84 | 
85 | env.add_state_path(true_states, id='groundtruth')
86 | env.add_state_path(obss, id='observed')
87 | env.add_state_path(filter_states, id='predicted')
88 | env.render()
89 | 
90 | # Plot error and num_particles over time.
91 | error = np.linalg.norm(true_states - filter_states, axis=-1)
92 | fig, ax = plt.subplots(2, 1)
93 | ax[0].plot(error)
94 | ax[0].set_title('Error over time')
95 | ax[1].plot(num_particles)
96 | ax[1].set_title('Number of particles over time')
97 | plt.show()
98 | 


--------------------------------------------------------------------------------
/examples/state_estimation/test_discrete_bayes_filter.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | from robotics_algorithm.env.discrete_world_1d import DiscreteWorld1D
 5 | from robotics_algorithm.state_estimation.discrete_bayes_filter import DiscreteBayesFilter
 6 | 
 7 | # env
 8 | env = DiscreteWorld1D()
 9 | obs, _ = env.reset()
10 | env.render()
11 | 
12 | # filter
13 | filter = DiscreteBayesFilter(env)
14 | filter.set_initial_state(env.cur_state)
15 | 
16 | # Step env with random actions
17 | true_states = []
18 | filter_states = []
19 | obss = []
20 | true_states.append(env.cur_state)
21 | filter_states.append(filter.get_state())
22 | obss.append(obs)
23 | max_steps = 100
24 | for i in range(max_steps):
25 |     action = env.action_space.sample()
26 |     new_obs, reward, term, trunc, info = env.step(action)
27 | 
28 |     filter.run(action, new_obs)
29 | 
30 |     true_states.append(env.cur_state)
31 |     filter_states.append(filter.get_state())
32 |     obss.append(new_obs)
33 | 
34 |     if term or trunc:
35 |         break
36 | 
37 | 
38 | # calculate RMSE
39 | true_states = np.array(true_states)
40 | filter_states = np.array(filter_states)
41 | rmse = np.sqrt(np.mean((true_states - filter_states) ** 2, axis=0))
42 | print("RMSE: {}".format(rmse))
43 | 
44 | # plot result
45 | env_steps = len(true_states)
46 | t = np.arange(env_steps)
47 | fig = plt.figure()
48 | plt.plot(t, [true_states[i][0] for i in range(env_steps)], "k", label="groundtruth")
49 | plt.plot(t, [filter_states[i][0] for i in range(env_steps)], "b", label="predicted")
50 | plt.plot(t, [obss[i][0] for i in range(env_steps)], "r", label="observed")
51 | plt.ylabel("position")
52 | plt.legend()
53 | plt.show()


--------------------------------------------------------------------------------
/examples/state_estimation/test_extended_kalman_filter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_localization import DiffDrive2DLocalization
 5 | from robotics_algorithm.state_estimation.extended_kalman_filter import ExtendedKalmanFilter
 6 | 
 7 | 
 8 | def spiral_velocity(spiral_radius, spiral_growth_rate, time, linear_velocity):
 9 |     """
10 |     Calculate the linear velocity and angular velocity of a differential robot to drive in a spiral.
11 | 
12 |     Args:
13 |         spiral_radius (float): The initial radius of the spiral.
14 |         spiral_growth_rate (float): The rate at which the spiral's radius grows.
15 |         time (float): The current time.
16 |         linear_velocity (float): The desired linear velocity of the robot.
17 | 
18 |     Returns:
19 |         tuple: The linear velocity and angular velocity of the robot.
20 |     """
21 |     # Calculate the current radius of the spiral
22 |     current_radius = spiral_radius + spiral_growth_rate * time
23 | 
24 |     # Calculate the angular velocity
25 |     angular_velocity = linear_velocity / current_radius
26 | 
27 |     return linear_velocity, angular_velocity
28 | 
29 | 
30 | # Initialize environment
31 | env = DiffDrive2DLocalization(action_dt=0.1, obs_noise_std=[0.1, 0.1, 0.1])
32 | obs, _ = env.reset(empty=True)
33 | 
34 | # Manually clamp env start state so that robot does not move outside of env when doing the spiral
35 | start_state = env.cur_state.copy()
36 | start_state[0] = min(max(env.size / 4.0, start_state[0]), env.size / 4.0 * 3.0)
37 | start_state[1] = min(max(env.size / 4.0, start_state[1]), env.size / 4.0 * 3.0)
38 | env.start_state = start_state
39 | env.cur_state = start_state
40 | obs = start_state
41 | env.render()
42 | 
43 | # Initialize filter
44 | filter = ExtendedKalmanFilter(env)
45 | # filter.set_initial_state(env.cur_state)
46 | 
47 | # Add initial state
48 | # Step env with random actions
49 | true_states = []
50 | filter_states = []
51 | obss = []
52 | true_states.append(env.cur_state)
53 | filter_states.append(filter.get_state())
54 | obss.append(obs)
55 | 
56 | max_steps = 500
57 | for i in range(max_steps):
58 |     print(f'step: {i}/{max_steps}')
59 |     # action = [random.uniform(0.0, 0.5), random.uniform(0, 0.5)]
60 |     action = spiral_velocity(1.0, 0.01, i * env.action_dt, 0.2)
61 |     new_obs, reward, term, trunc, info = env.step(action)
62 | 
63 |     filter.run(action, new_obs)
64 | 
65 |     true_states.append(env.cur_state)
66 |     filter_states.append(filter.get_state())
67 |     obss.append(new_obs)
68 | 
69 |     if term or trunc:
70 |         break
71 | 
72 | # print(true_states)
73 | # print(filter_states)
74 | 
75 | # calculate RMSE
76 | true_states = np.array(true_states)
77 | filter_states = np.array(filter_states)
78 | 
79 | rmse = np.sqrt(np.mean((true_states - filter_states) ** 2, axis=0))
80 | print('RMSE: {}'.format(rmse))
81 | 
82 | env.add_state_path(true_states, id='groundtruth')
83 | env.add_state_path(obss, id='observed')
84 | env.add_state_path(filter_states, id='predicted')
85 | env.render()
86 | 
87 | # Plot error over time.
88 | error = np.linalg.norm(true_states - filter_states, axis=-1)
89 | plt.plot(error)
90 | plt.title('Error over time')
91 | plt.show()
92 | 


--------------------------------------------------------------------------------
/examples/state_estimation/test_kalman_filter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | from robotics_algorithm.env.continuous_1d.double_integrator_env import DoubleIntegratorEnv
 6 | from robotics_algorithm.state_estimation.kalman_filter import KalmanFilter
 7 | 
 8 | # Env
 9 | env = DoubleIntegratorEnv(observation_noise_std=0.1, state_transition_noise_std=0.1)
10 | obs, _ = env.reset()
11 | env.render()
12 | 
13 | # Create filter
14 | filter = KalmanFilter(env)
15 | filter.set_initial_state(env.cur_state)
16 | 
17 | # Step env with random actions
18 | true_states = []
19 | filter_states = []
20 | obss = []
21 | true_states.append(env.cur_state)
22 | filter_states.append(filter.get_state())
23 | obss.append(obs)
24 | max_steps = 100
25 | for i in range(max_steps):
26 |     action = [random.uniform(-1.0, 1.0)]
27 |     new_obs, reward, term, trunc, info = env.step(action)
28 | 
29 |     filter.run(action, new_obs)
30 | 
31 |     true_states.append(env.cur_state)
32 |     filter_states.append(filter.get_state())
33 |     obss.append(new_obs)
34 | 
35 |     if term or trunc:
36 |         break
37 | 
38 | 
39 | # calculate RMSE
40 | true_states = np.array(true_states)
41 | filter_states = np.array(filter_states)
42 | rmse = np.sqrt(np.mean((true_states - filter_states) ** 2, axis=0))
43 | print("RMSE: {}".format(rmse))
44 | 
45 | # plot result
46 | env_steps = len(true_states)
47 | t = np.arange(env_steps)
48 | fig = plt.figure()
49 | plt.plot(t, [true_states[i][0] for i in range(env_steps)], "k", label="groundtruth")
50 | plt.plot(t, [filter_states[i][0] for i in range(env_steps)], "b", label="predicted")
51 | plt.plot(t, [obss[i][0] for i in range(env_steps)], "r", label="observed")
52 | plt.ylabel("position")
53 | plt.legend()
54 | plt.show()
55 | 


--------------------------------------------------------------------------------
/examples/state_estimation/test_particle_filter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_localization import DiffDrive2DLocalization
 5 | from robotics_algorithm.state_estimation.particle_filter import ParticleFilter
 6 | 
 7 | 
 8 | def spiral_velocity(spiral_radius, spiral_growth_rate, time, linear_velocity):
 9 |     """
10 |     Calculate the linear velocity and angular velocity of a differential robot to drive in a spiral.
11 | 
12 |     Args:
13 |         spiral_radius (float): The initial radius of the spiral.
14 |         spiral_growth_rate (float): The rate at which the spiral's radius grows.
15 |         time (float): The current time.
16 |         linear_velocity (float): The desired linear velocity of the robot.
17 | 
18 |     Returns:
19 |         tuple: The linear velocity and angular velocity of the robot.
20 |     """
21 |     # Calculate the current radius of the spiral
22 |     current_radius = spiral_radius + spiral_growth_rate * time
23 | 
24 |     # Calculate the angular velocity
25 |     angular_velocity = linear_velocity / current_radius
26 | 
27 |     return linear_velocity, angular_velocity
28 | 
29 | 
30 | # Initialize environment
31 | env = DiffDrive2DLocalization(action_dt=0.1, obs_noise_std=[0.1, 0.1, 0.1])
32 | obs, _ = env.reset(empty=True)
33 | 
34 | # Manually clamp env start state so that robot does not move outside of env when doing the spiral
35 | start_state = env.cur_state.copy()
36 | start_state[0] = min(max(env.size / 4.0, start_state[0]), env.size / 4.0 * 3.0)
37 | start_state[1] = min(max(env.size / 4.0, start_state[1]), env.size / 4.0 * 3.0)
38 | env.start_state = start_state
39 | env.cur_state = start_state
40 | obs = start_state
41 | env.render()
42 | 
43 | # Initialize filter
44 | filter = ParticleFilter(env, num_of_particles=250)
45 | # filter.set_initial_state(env.cur_state)
46 | 
47 | # Add initial state
48 | # Step env with random actions
49 | true_states = []
50 | filter_states = []
51 | obss = []
52 | true_states.append(env.cur_state)
53 | filter_states.append(filter.get_state())
54 | obss.append(obs)
55 | 
56 | max_steps = 500
57 | for i in range(max_steps):
58 |     print(f'step: {i}/{max_steps}')
59 |     # action = [random.uniform(0.0, 0.5), random.uniform(0, 0.5)]
60 |     action = spiral_velocity(1.0, 0.01, i * env.action_dt, 0.2)
61 |     new_obs, reward, term, trunc, info = env.step(action)
62 | 
63 |     filter.run(action, new_obs)
64 | 
65 |     true_states.append(env.cur_state)
66 |     filter_states.append(filter.get_state())
67 |     obss.append(new_obs)
68 | 
69 |     if term or trunc:
70 |         break
71 | 
72 | # print(true_states)
73 | # print(filter_states)
74 | 
75 | # calculate RMSE
76 | true_states = np.array(true_states)
77 | filter_states = np.array(filter_states)
78 | 
79 | rmse = np.sqrt(np.mean((true_states - filter_states) ** 2, axis=0))
80 | print('RMSE: {}'.format(rmse))
81 | 
82 | env.add_state_path(true_states, id='groundtruth')
83 | env.add_state_path(obss, id='observed')
84 | env.add_state_path(filter_states, id='predicted')
85 | env.render()
86 | 
87 | # Plot error over time.
88 | error = np.linalg.norm(true_states - filter_states, axis=-1)
89 | plt.plot(error)
90 | plt.title('Error over time')
91 | plt.show()
92 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "robotics_algorithm"
 7 | version = "0.11.2"
 8 | dependencies = [
 9 |     "numpy",
10 |     "matplotlib",
11 |     "scikit-learn",
12 |     "typing_extensions",
13 |     "networkx",
14 |     "pygame",
15 |     "cvxpy",
16 | ]
17 | requires-python = ">=3.10"
18 | authors = [{ name = "Lu Yunfan", email = "luyunfan44@gmail.com" }]
19 | maintainers = [{ name = "Lu Yunfan", email = "luyunfan44@gmail.com" }]
20 | description = "A collection of robotics algorithms"
21 | readme = "README.md"
22 | license = "MIT"
23 | license-files = ["LICEN[CS]E.*"]
24 | 
25 | [tool.ruff]
26 | line-length = 120
27 | 
28 | [tool.ruff.format]
29 | quote-style = "single"
30 | indent-style = "space"
31 | docstring-code-format = true
32 | 


--------------------------------------------------------------------------------
/robotics_algorithm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/control/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/control/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/control/classical_control/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/control/classical_control/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/control/classical_control/pid.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from robotics_algorithm.env.base_env import BaseEnv, SpaceType
 4 | 
 5 | 
 6 | class PID:
 7 |     """Implements Linear-quadratic regulator.
 8 | 
 9 |     # Given a system with linear state transition function and quadratic cost function, the optimal control strategy
10 |     # can be computed analytically and has the form: u = -Kx
11 |     """
12 | 
13 |     def __init__(self, env: BaseEnv, goal_state=None, Kp: float = 1.0, Ki: float = 0.0, Kd: float = 0.0):
14 |         """
15 |         State transition:
16 |         """
17 |         assert env.state_space.type == SpaceType.CONTINUOUS.value
18 |         assert env.action_space.type == SpaceType.CONTINUOUS.value
19 | 
20 |         self.env = env
21 |         if goal_state is None:
22 |             self.goal_state = np.array(env.goal_state, dtype=np.float32)
23 |         else:
24 |             self.goal_state = np.array(goal_state, dtype=np.float32)
25 |         self.Kp = Kp
26 |         self.Ki = Ki
27 |         self.Kd = Kd
28 | 
29 |         self._prev_error = np.zeros_like(self.goal_state, dtype=np.float32)
30 |         self._accum_error = np.zeros_like(self.goal_state, dtype=np.float32)
31 | 
32 |     def run(self, state: np.ndarray) -> np.ndarray:
33 |         """Compute the current action based on the current state.
34 | 
35 |         Args:
36 |             state (np.ndarray): current state.
37 | 
38 |         Returns:
39 |             np.ndarray: current action
40 |         """
41 |         state = np.array(state)
42 | 
43 |         error = self.goal_state - state
44 |         delta_error = error - self._prev_error
45 |         self._accum_error += error
46 | 
47 |         control = self.Kp * error + self.Kd * delta_error + self.Ki * self._accum_error
48 | 
49 |         self._prev_error = error
50 | 
51 |         return control
52 | 


--------------------------------------------------------------------------------
/robotics_algorithm/control/optimal_control/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/control/optimal_control/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/control/optimal_control/cem_mpc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.stats import truncnorm
  3 | 
  4 | from robotics_algorithm.env.base_env import BaseEnv, SpaceType
  5 | 
  6 | 
  7 | class CEMMPC:
  8 |     """Implements Cross-entropy method for model-predictive control.
  9 | 
 10 |     Assuming a Gaussian policy, the general idea is to sample a bunch of trajectories using env state transition model
 11 |     and get their respective cost. Then take the top-K best trajectories and update the policy's mean and std based
 12 |     on these trajectories. We then sample another set of trajectories based on the updated Gaussian policy. We iterate
 13 |     this process for certain number of times and return the best action in the current best trajectory.
 14 |     """
 15 | 
 16 |     def __init__(
 17 |         self,
 18 |         env: BaseEnv,
 19 |         num_traj_samples: int = 40,
 20 |         num_elites: int = 10,
 21 |         sample_traj_len: int = 20,
 22 |         action_mean: float | np.ndarray = 0.0,
 23 |         action_std: float | np.ndarray = 1.0,
 24 |         opt_iter: int = 5,
 25 |         alpha: float = 0.1,
 26 |     ):
 27 |         """Constructor
 28 | 
 29 |         Args:
 30 |             env (BaseEnv): the env.
 31 |             num_traj_samples (int, optional): number of rollout trajectories to sample. Defaults to 40.
 32 |             num_elites (int, optional): number of best trajectories to use for estimating the next mean and std.
 33 |                 Defaults to 10.
 34 |             sample_traj_len (int, optional): the length of each rollout trajectory. Defaults to 20.
 35 |             action_mean (float | np.ndarray, optional): the nominal mean of sampled action. Defaults to 0.0.
 36 |             action_std (float | np.ndarray, optional): the nominal std of sampled action. Defaults to 1.0.
 37 |             opt_iter (int, optional): number of optimization iteration. Defaults to 5.
 38 |             alpha (float, optional): the low pass filter gain for updating mean and std in each optimization iteration.
 39 |                 Defaults to 0.1.
 40 |         """
 41 |         assert env.state_space.type == SpaceType.CONTINUOUS.value
 42 |         assert env.action_space.type == SpaceType.CONTINUOUS.value
 43 | 
 44 |         self.env = env
 45 |         self.num_traj_samples = num_traj_samples
 46 |         self.num_elites = num_elites
 47 |         self.sample_traj_len = sample_traj_len
 48 |         self.opt_iter = opt_iter
 49 |         self.alpha = alpha
 50 | 
 51 |         self.nominal_action_mean = np.array(action_mean)
 52 |         self.nominal_action_std = np.array(action_std)
 53 |         self.action_seq_mean = np.tile(self.nominal_action_mean, (self.sample_traj_len, 1))
 54 |         self.action_seq_std = np.tile(self.nominal_action_std, (self.sample_traj_len, 1))
 55 | 
 56 |     def run(self, state: np.ndarray) -> np.ndarray:
 57 |         """Compute the current action given current state
 58 | 
 59 |         Args:
 60 |             state (np.ndarray): current state
 61 | 
 62 |         Returns:
 63 |             np.ndarray: current action
 64 |         """
 65 |         for _ in range(self.opt_iter):
 66 |             # Sample trajectories
 67 |             all_costs = np.zeros((self.num_traj_samples))
 68 |             all_traj_rollouts = np.zeros((self.num_traj_samples, *self.action_seq_mean.shape))
 69 | 
 70 |             # Sample actions in a batch [K, N, Action_dim]
 71 |             sampled_action_seq = self._sample_action_seq()
 72 | 
 73 |             # Perform rollout
 74 |             for k in range(self.num_traj_samples):
 75 |                 cur_state = state
 76 |                 total_cost = 0
 77 |                 for t in range(self.sample_traj_len):
 78 |                     action = sampled_action_seq[k, t]
 79 |                     new_state, reward, term, trunc, _ = self.env.sample_state_transition(cur_state, action)
 80 |                     cost = -reward
 81 | 
 82 |                     total_cost += cost
 83 |                     cur_state = new_state
 84 |                     all_traj_rollouts[k, t] = sampled_action_seq[k, t]
 85 | 
 86 |                     if term or trunc:
 87 |                         break
 88 | 
 89 |                 all_costs[k] = total_cost
 90 | 
 91 |             # Update policy distribution based on elite trajectories
 92 |             self._update_distribution(all_traj_rollouts, all_costs)
 93 | 
 94 |         # After enough optimization iterations,
 95 |         # Get the best trajectory
 96 |         best_traj_idx = np.argmin(all_costs)
 97 |         self.best_traj = all_traj_rollouts[best_traj_idx]  # For debug purpose
 98 | 
 99 |         # update previous control sequence, shift 1 step to the left
100 |         action_mean = np.copy(self.action_seq_mean)
101 |         self.action_seq_mean[:-1] = action_mean[1:]
102 |         self.action_seq_mean[-1] = action_mean[-1]
103 | 
104 |         # reset std to nominal
105 |         self.action_seq_std = np.copy(self.nominal_action_std)
106 | 
107 |         # Return the best traj's first action
108 |         return self.best_traj[0]
109 | 
110 |     def _sample_action_seq(self):
111 |         m = self.action_seq_mean[None, :]
112 |         s = self.action_seq_std[None, :]
113 |         samples = truncnorm.rvs(
114 |             self.env.action_space.space[0],
115 |             self.env.action_space.space[1],
116 |             loc=m,
117 |             scale=s,
118 |             size=(self.num_traj_samples, *self.action_seq_mean.shape),
119 |         )
120 |         # NOTE: truncnorm may still violate the bounds a little bit.
121 |         samples = np.clip(samples, self.env.action_space.space[0], self.env.action_space.space[1])
122 |         return samples  # shape: [K, N, Action_dim]
123 | 
124 |     def _update_distribution(self, all_traj_rollouts: np.ndarray, all_costs: np.ndarray):
125 |         # Get elite parameters
126 |         elite_idxs = np.array(all_costs).argsort()[: self.num_elites]
127 |         elite_sequences = all_traj_rollouts[elite_idxs]
128 | 
129 |         # fit around mean of elites
130 |         new_mean = elite_sequences.mean(axis=0)
131 |         new_std = elite_sequences.std(axis=0)
132 | 
133 |         # Low pass filter
134 |         self.action_seq_mean = (1 - self.alpha) * new_mean + self.alpha * self.action_seq_mean  # [h,d]
135 |         self.action_seq_std = (1 - self.alpha) * new_std + self.alpha * self.action_seq_std
136 | 


--------------------------------------------------------------------------------
/robotics_algorithm/control/optimal_control/convex_mpc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cvxpy
  3 | import time
  4 | 
  5 | from robotics_algorithm.env.base_env import BaseEnv, FunctionType, SpaceType
  6 | from .lqr import LQR
  7 | 
  8 | 
  9 | class ConvexMPC:
 10 |     """Implements Convex Model Predictive Control
 11 | 
 12 |     Use convex optimization to explicitly solve the optimal sequence of action that minimizes cost of future trajectory,
 13 |     subject to linear state transition constraints.
 14 |     """
 15 | 
 16 |     def __init__(self, env: BaseEnv, horizon=100):
 17 |         """
 18 |         Constructor
 19 | 
 20 |         Args:
 21 |             env (BaseEnv): the env
 22 |             horizon (int, optional): the lookahead distance. Defaults to 100.
 23 | 
 24 |         Raises:
 25 |             AssertionError: if env does not satisfy the assumptions of ConvexMPC
 26 |         """
 27 |         assert env.state_space.type == SpaceType.CONTINUOUS.value
 28 |         assert env.action_space.type == SpaceType.CONTINUOUS.value
 29 |         assert (
 30 |             env.state_transition_func_type == FunctionType.LINEAR.value
 31 |             or env.state_transition_func_type == FunctionType.GENERAL.value
 32 |         )
 33 |         assert env.reward_func_type == FunctionType.QUADRATIC.value
 34 | 
 35 |         self.env = env
 36 |         self.horizon = horizon
 37 | 
 38 |         # ! Use LQR to calculate terminal cost. This makes sense as we assume after horizon, the system is brought
 39 |         # ! to good state where all constraints should be satisfied.
 40 |         self._lqr = LQR(env)
 41 | 
 42 |         # By default, default reference state and action is env.goal and zero action
 43 |         self.A, self.B = self.env.linearize_state_transition(env.goal_state, env.goal_action)
 44 | 
 45 |         # Set up default constraints on input and state
 46 |         self.default_constraints = []
 47 |         self.x = cvxpy.Variable((self.horizon + 1, self.env.state_space.state_size))
 48 |         self.u = cvxpy.Variable((self.horizon + 1, self.env.action_space.state_size))
 49 | 
 50 |     def set_ref_state_action(self, ref_state, ref_action):
 51 |         """
 52 |         Set the reference state and action for the current iteration.
 53 | 
 54 |         Args:
 55 |             ref_state (np.ndarray): the reference state
 56 |             ref_action (np.ndarray): the reference action
 57 |         """
 58 |         self.A, self.B = self.env.linearize_state_transition(ref_state, ref_action)
 59 | 
 60 |     def get_decision_variables(self):
 61 |         return self.x, self.u
 62 | 
 63 |     def add_constraints(self, constraint):
 64 |         if isinstance(constraint, list):
 65 |             self.default_constraints += constraint
 66 |         else:
 67 |             self.default_constraints.append(constraint)
 68 | 
 69 |     def run(self, state: np.ndarray) -> np.ndarray:
 70 |         """Compute the current action based on the current state.
 71 | 
 72 |         Args:
 73 |             state (np.ndarray): current state.
 74 | 
 75 |         Returns:
 76 |             np.ndarray: current action
 77 |         """
 78 |         A, B = self.A, self.B
 79 |         Q, R = self.env.Q, self.env.R
 80 | 
 81 |         # use lqr to get terminal cost
 82 |         P = self._lqr._solve_dare(A, B, Q, R)
 83 | 
 84 |         constr = self.default_constraints.copy()
 85 |         cost = 0.0
 86 |         for t in range(self.horizon):
 87 |             cost += cvxpy.quad_form(self.x[t], Q)
 88 |             cost += cvxpy.quad_form(self.u[t], R)
 89 |             constr += [self.x[t + 1].T == A @ self.x[t] + B @ self.u[t]]
 90 | 
 91 |             # default constraints on state space and action space size
 92 |             # constr += [self.x[t] <= self.env.state_space.high, self.x[t] >= self.env.state_space.low]
 93 |             # constr += [self.u[t] <= self.env.action_space.high, self.u[t] >= self.env.action_space.low]
 94 | 
 95 |         cost += cvxpy.quad_form(self.x[self.horizon], P)  # LQR terminal cost
 96 |         constr += [self.x[0] == state]
 97 |         prob = cvxpy.Problem(cvxpy.Minimize(cost), constr)
 98 | 
 99 |         start = time.time()
100 |         prob.solve(verbose=False)
101 |         elapsed_time = time.time() - start
102 |         print(f'calc time:{elapsed_time:.6f} [sec]')
103 | 
104 |         print('status:', prob.status)
105 |         # print("optimal value", prob.value)
106 |         # print('optimal var')
107 |         # print(self.x.value)
108 |         # print(self.u.value)
109 | 
110 |         return self.u.value[0]
111 | 


--------------------------------------------------------------------------------
/robotics_algorithm/control/optimal_control/lqr.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy
 3 | import scipy.linalg
 4 | 
 5 | from robotics_algorithm.env.base_env import BaseEnv, FunctionType, SpaceType
 6 | 
 7 | 
 8 | class LQR:
 9 |     """Implements Linear-quadratic regulator.
10 | 
11 |     Given a system with linear state transition function and quadratic cost function, the optimal control strategy
12 |     can be computed analytically and has the form: u = -Kx
13 |     """
14 |     def __init__(self, env: BaseEnv, discrete_time=True, horizon=float("inf"), solve_by_iteration=False):
15 |         """
16 |         State transition:
17 |         x_dot = A * X + B * U
18 |         y = x (full state feedback)
19 |         J = sum_over_time(X_T*Q*X + U_T*R*U)
20 |         """
21 |         assert env.state_space.type == SpaceType.CONTINUOUS.value
22 |         assert env.action_space.type == SpaceType.CONTINUOUS.value
23 |         # Can be general because we can linearize dynamics around fixed point.
24 |         assert env.state_transition_func_type == FunctionType.LINEAR.value or env.state_transition_func_type == FunctionType.GENERAL.value
25 |         assert env.reward_func_type == FunctionType.QUADRATIC.value
26 | 
27 |         self.env = env
28 |         self.discrete_time = discrete_time
29 |         self.horizon = horizon
30 |         self.solve_by_iteration = solve_by_iteration
31 | 
32 |         # By default, default reference state and action is env.goal and zero action
33 |         self.A, self.B = self.env.linearize_state_transition(env.goal_state, env.goal_action)
34 | 
35 |     def set_ref_state_action(self, ref_state, ref_action):
36 |         self.A, self.B = self.env.linearize_state_transition(ref_state, ref_action)
37 | 
38 |     def run(self, state: np.ndarray) -> np.ndarray:
39 |         """Compute the current action based on the current state.
40 | 
41 |         Args:
42 |             state (np.ndarray): current state.
43 | 
44 |         Returns:
45 |             np.ndarray: current action
46 |         """
47 |         A, B = self.A, self.B
48 |         Q, R = self.env.Q, self.env.R
49 |         state = np.array(state).reshape(-1, 1)
50 | 
51 |         if self.discrete_time:
52 |             P = self._solve_dare(A, B, Q, R)
53 |             K = np.linalg.inv(R + B.T @ P @ B) @ B.T @ P
54 |         else:
55 |             P = self._solve_care(A, B, Q, R)
56 |             K = np.linalg.inv(R) @ B.T @ P
57 | 
58 |         u = -K @ state
59 |         return u.reshape(-1)
60 | 
61 |     def _solve_care(self, A, B, Q, R):
62 |         return scipy.linalg.solve_continuous_are(A, B, Q, R)
63 | 
64 |     def _solve_dare(self, A, B, Q, R):
65 |         # we can call scipy library.
66 |         if self.horizon == float("inf") or not self.solve_by_iteration:
67 |             return scipy.linalg.solve_discrete_are(A, B, Q, R)
68 | 
69 |         # Alternatively, discrete-time algebraic Riccati equation can be solved by iteration
70 | 
71 |         # lists for P that contains P_t
72 |         P = [Q]
73 | 
74 |         # work backwards in time from horizon to 0
75 |         for _ in range(self.horizon):
76 |             # calculate K_t
77 |             # K_t = -(R+B^T*P_(t+1)*B)^(-1)*B^T*P_(t+1)*A
78 |             K_t = np.linalg.inv(R + B.T @ P[-1] @ B) @ B.T @ P[-1] @ A
79 | 
80 |             # calculate P_t
81 |             # P_t  = Q + A^T*P_(t+1)*A - A^T*P_(t+1)*B*(R + B^T  P_(t+1) B)^(-1) B^T*P_(t+1)*A
82 |             P_t = Q + A.T @ P[-1] @ A - A.T @ P[-1] @ B @ K_t
83 | 
84 |             # add our calculations to the beginning of the lists (since we are working backwards)
85 |             P.append(P_t)
86 | 
87 |         # P_N = Q_f
88 |         # P.insert(0, Q)
89 | 
90 |         # K_N = 0
91 |         # K.insert(0, 0)
92 | 
93 |         return P[-1]
94 | 


--------------------------------------------------------------------------------
/robotics_algorithm/control/path_follow_control/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/control/path_follow_control/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/control/path_follow_control/dwa.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from robotics_algorithm.env.continuous_2d.diff_drive_2d_control import DiffDrive2DControl
  4 | 
  5 | 
  6 | class DWA:
  7 |     """Implements Dynamic Window Approach for path follow.
  8 | 
  9 |     paper https://www.ri.cmu.edu/pub_files/pub1/fox_dieter_1997_1/fox_dieter_1997_1.pdf
 10 | 
 11 |     The basic idea of the Dynamic Window Approach (DWA) algorithm is as follows:
 12 | 
 13 |     - Discretely sample in the robot's control space
 14 |     - For each sampled velocity, perform forward simulation from the robot's current state to predict what would happen if the sampled velocity were applied for some (short) period of time.
 15 |     - Evaluate (score) each trajectory resulting from the forward simulation.
 16 |     - Pick the highest-scoring trajectory and send the associated velocity to the mobile base.
 17 |     - Rinse and repeat.
 18 |     """
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         env: DiffDrive2DControl,
 23 |         min_lin_vel: float = 0.0,
 24 |         max_lin_vel: float = 0.5,
 25 |         lin_vel_samples: int = 10,
 26 |         min_ang_vel: float = -1.0,
 27 |         max_ang_vel: float = 1.0,
 28 |         ang_vel_samples: int = 20,
 29 |         simulate_time: float = 0.5,
 30 |     ) -> None:
 31 |         """
 32 |         Constructor
 33 | 
 34 |         Args:
 35 |             env (DiffDrive2DControl): The environment.
 36 |             min_lin_vel (float): Minimum linear velocity.
 37 |             max_lin_vel (float): Maximum linear velocity.
 38 |             lin_vel_samples (int): Number of linear velocity samples.
 39 |             min_ang_vel (float): Minimum angular velocity.
 40 |             max_ang_vel (float): Maximum angular velocity.
 41 |             ang_vel_samples (int): Number of angular velocity samples.
 42 |             simulate_time (float): Simulation time.
 43 |         """
 44 |         assert isinstance(env, DiffDrive2DControl), 'env must be a DiffDrive2DControl'
 45 | 
 46 |         self.env = env
 47 |         self._min_lin_vel = min_lin_vel
 48 |         self._max_lin_vel = max_lin_vel
 49 |         self._lin_vel_steps = (max_lin_vel - min_lin_vel) / lin_vel_samples
 50 |         self._min_ang_vel = min_ang_vel
 51 |         self._max_ang_vel = max_ang_vel
 52 |         self._ang_vel_steps = (max_ang_vel - min_ang_vel) / ang_vel_samples
 53 |         self._sim_steps = int(simulate_time / env.action_dt)
 54 | 
 55 |         self.sampled_action_seq = []
 56 |         self._sample_action_seq()
 57 | 
 58 |     def _sample_action_seq(self):
 59 |         for lin_vel in np.arange(self._min_lin_vel, self._max_lin_vel, self._lin_vel_steps):
 60 |             for ang_vel in np.arange(self._min_ang_vel, self._max_ang_vel, self._ang_vel_steps):
 61 |                 self.sampled_action_seq.append([lin_vel, ang_vel])
 62 | 
 63 |     def run(self, state: np.ndarray) -> np.ndarray:
 64 |         """Compute the current action given current state
 65 | 
 66 |         This function uses Dynamic Window Approach to select the best action given the current state.
 67 | 
 68 |         Args:
 69 |             state (np.ndarray): current state
 70 | 
 71 |         Returns:
 72 |             np.ndarray: current action
 73 |         """
 74 |         best_cost = np.inf
 75 |         best_action = np.array([0.0, 0.0])
 76 |         self.best_traj = [state]
 77 | 
 78 |         # Iterate over all possible actions
 79 |         for lin_vel, ang_vel in self.sampled_action_seq:
 80 |             cur_state = state
 81 |             total_cost = 0
 82 | 
 83 |             # * The action stays constant for self._sim_steps because we do not enforce acceleration limits.
 84 |             # * If acceleration limits are to be enforced, here we need to slowly accelerate/decelerate to sampled
 85 |             # * velocity (like ROS2).
 86 |             action = np.array([lin_vel, ang_vel])
 87 |             traj = [cur_state]
 88 |             for _ in range(self._sim_steps):
 89 |                 # Take one step in the simulation
 90 |                 new_state, reward, term, trunc, _ = self.env.sample_state_transition(cur_state, action)
 91 |                 cost = -reward
 92 | 
 93 |                 total_cost += cost
 94 |                 cur_state = new_state
 95 |                 traj.append(new_state)
 96 | 
 97 |                 # If the simulation is terminated, break
 98 |                 if term or trunc:
 99 |                     break
100 | 
101 |             # Update the best cost and trajectory if the current one is better
102 |             if total_cost < best_cost:
103 |                 best_cost = total_cost
104 |                 best_action = action
105 |                 self.best_traj = traj
106 | 
107 |         # Return the best action
108 |         return best_action
109 | 


--------------------------------------------------------------------------------
/robotics_algorithm/env/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/env/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/env/classic_mdp/cliff_walking.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | from matplotlib import colors
  4 | from typing_extensions import override
  5 | 
  6 | from robotics_algorithm.env.base_env import MDPEnv, DiscreteSpace, DistributionType
  7 | 
  8 | GRID_HEIGHT = 4
  9 | GRID_WIDTH = 9
 10 | OBSTACLES = [(1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0)]
 11 | 
 12 | 
 13 | class CliffWalking(MDPEnv):
 14 |     """
 15 |     A player is placed in grid world. The player should move from start to goal. If the player reaches the goal the
 16 |     episode ends. If the player moves to a cliff location the episode terminates with failure.
 17 |     During each move, the player has a chance of ending up in the left or right of the target grid.
 18 | 
 19 |     State: [x, y]
 20 |     Action: [up, down, left, right]
 21 | 
 22 |     Discrete state space.
 23 |     Discrete action space.
 24 |     Stochastic transition.
 25 |     Fully observable.
 26 |     """
 27 | 
 28 |     def __init__(
 29 |         self,
 30 |         start: np.ndarray = np.array([0, 0]),
 31 |         goal: np.ndarray = np.array([8, 0]),
 32 |         obstacles: list[tuple] = OBSTACLES,
 33 |         dense_reward: bool = False,
 34 |     ):
 35 |         """Constructor.
 36 | 
 37 |         Args:
 38 |             start (tuple): the start position of agent.
 39 |             goal (tuple): the goal position.
 40 |             obstacles (list[tuple]): a list of obstacle positions.
 41 |             dense_reward (bool): whether to use dense reward for this env.
 42 |         """
 43 |         super().__init__()
 44 | 
 45 |         # Define spaces
 46 |         self.state_space = DiscreteSpace([np.array([i, j]) for i in range(GRID_WIDTH) for j in range(GRID_HEIGHT)])
 47 |         self.action_space = DiscreteSpace(
 48 |             [np.array([0]), np.array([1]), np.array([2]), np.array([3])]
 49 |         )  # action, 0: up, 1: right, 2: down, 3: left)
 50 |         self.state_transition_dist_type = DistributionType.CATEGORICAL.value
 51 | 
 52 |         self.start_state = start
 53 |         self.cur_state = start
 54 |         self.goal_state = goal
 55 |         self.obstacles = obstacles
 56 |         self.step_reward = -1
 57 |         self.obstacle_reward = -100
 58 |         self.goal_reward = 100
 59 |         self.path = []
 60 |         self.dense_reward = dense_reward
 61 | 
 62 |     @override
 63 |     def reset(self):
 64 |         self.cur_state = self.start_state
 65 |         return self.cur_state, {}
 66 | 
 67 |     @override
 68 |     def state_transition_func(self, state: np.ndarray, action: np.ndarray) -> tuple[list[np.ndarray], list[float]]:
 69 |         i, j = state
 70 | 
 71 |         new_states = []
 72 |         if action == 0:
 73 |             new_states.append(np.array([i, min(j + 1, GRID_HEIGHT - 1)]))
 74 |             new_states.append(np.array([max(0, i - 1), min(j + 1, GRID_HEIGHT - 1)]))
 75 |             new_states.append(np.array([min(i + 1, GRID_WIDTH - 1), min(j + 1, GRID_HEIGHT - 1)]))
 76 |         elif action == 1:
 77 |             new_states.append(np.array([min(i + 1, GRID_WIDTH - 1), j]))
 78 |             new_states.append(np.array([min(i + 1, GRID_WIDTH - 1), min(j + 1, GRID_HEIGHT - 1)]))
 79 |             new_states.append(np.array([min(i + 1, GRID_WIDTH - 1), max(0, j - 1)]))
 80 |         elif action == 2:
 81 |             new_states.append(np.array([i, max(0, j - 1)]))
 82 |             new_states.append(np.array([max(0, i - 1), max(0, j - 1)]))
 83 |             new_states.append(np.array([min(i + 1, GRID_WIDTH - 1), max(0, j - 1)]))
 84 |         elif action == 3:
 85 |             new_states.append(np.array([max(0, i - 1), j]))
 86 |             new_states.append(np.array([max(0, i - 1), min(j + 1, GRID_HEIGHT - 1)]))
 87 |             new_states.append(np.array([max(0, i - 1), max(0, j - 1)]))
 88 |         probs = [0.8, 0.1, 0.1]
 89 | 
 90 |         return new_states, probs
 91 | 
 92 |     @override
 93 |     def is_state_terminal(self, state):
 94 |         term = False
 95 |         if state[0] == self.goal_state[0] and state[1] == self.goal_state[1]:
 96 |             term = True
 97 | 
 98 |         elif tuple(state.tolist()) in self.obstacles:
 99 |             term = True
100 | 
101 |         return term
102 | 
103 |     @override
104 |     def reward_func(self, state: np.ndarray, action: np.ndarray = None, new_state: np.ndarray = None) -> float:
105 |         # R(s, s')
106 |         # Transition to goal state gives goal reward.
107 |         # Transition to obstacle gives obstacle reward.
108 |         # Transition to free gives step reward.
109 |         if np.allclose(new_state, self.goal_state):
110 |             reward = self.goal_reward
111 |         elif tuple(new_state.tolist()) in self.obstacles:
112 |             reward = self.obstacle_reward
113 |         else:
114 |             if not self.dense_reward:
115 |                 reward = self.step_reward
116 |             else:
117 |                 # Negative of dist as penalty.
118 |                 # This encourages moving to goal.
119 |                 reward = -(abs(new_state[0] - self.goal_state[0]) + abs(new_state[1] - self.goal_state[1])) * 0.1
120 | 
121 |         return reward
122 | 
123 |     def add_path(self, path):
124 |         self.path = path
125 | 
126 |     @override
127 |     def render(self):
128 |         _, ax = plt.subplots()
129 |         self.gridworld = np.full((GRID_HEIGHT, GRID_WIDTH), 0)
130 | 
131 |         for obstacle in self.obstacles:
132 |             self.gridworld[GRID_HEIGHT - obstacle[1] - 1][obstacle[0]] = 1
133 | 
134 |         self.gridworld[GRID_HEIGHT - self.start_state[1] - 1][self.start_state[0]] = 2
135 |         self.gridworld[GRID_HEIGHT - self.goal_state[1] - 1][self.goal_state[0]] = 3
136 | 
137 |         if len(self.path) > 0:
138 |             for state in self.path:
139 |                 self.gridworld[GRID_HEIGHT - state[1] - 1][state[0]] = 4
140 |         else:
141 |             self.gridworld[GRID_HEIGHT - self.cur_state[1] - 1][self.cur_state[0]] = 4
142 | 
143 |         self.colour_map = colors.ListedColormap(['white', 'black', 'yellow', 'red', 'green'])
144 |         bounds = [0, 1, 2, 3, 4, 5]
145 |         self.norm = colors.BoundaryNorm(bounds, self.colour_map.N)
146 | 
147 |         ax.imshow(self.gridworld, cmap=self.colour_map, norm=self.norm)
148 | 
149 |         # draw gridlines
150 |         ax.grid(which='major', axis='both', linestyle='-', color='k', linewidth=1)
151 |         ax.set_xticks(np.arange(GRID_WIDTH) - 0.5)
152 |         ax.set_xticklabels(np.array([str(i) for i in range(GRID_WIDTH)]))
153 |         ax.set_yticks(np.flip(np.arange(GRID_HEIGHT) + 0.5))
154 |         ax.set_yticklabels(np.array([str(i) for i in range(GRID_HEIGHT)]))
155 |         plt.tick_params(axis='both', labelsize=5, length=0)
156 | 
157 |         plt.show()
158 | 


--------------------------------------------------------------------------------
/robotics_algorithm/env/classic_mdp/windy_grid_world.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | from matplotlib import colors
  4 | from typing_extensions import override
  5 | 
  6 | from robotics_algorithm.env.base_env import DiscreteSpace, DistributionType, MDPEnv
  7 | 
  8 | GRID_HEIGHT = 7
  9 | GRID_WIDTH = 10
 10 | 
 11 | 
 12 | class WindyGridWorld(MDPEnv):
 13 |     """
 14 |     Windy Gridworld is a standard gridworld with start and goal states. The difference is that there is a crosswind
 15 |     running upward through the middle of the grid. Actions are the standard four: up, right, down, and left.
 16 |     In the middle region the resultant next states are shifted upward by the "wind" which strength varies from column
 17 |     to column. The reward is -1 until goal state is reached.
 18 | 
 19 |     State: [x, y]
 20 |     Action: [up, down, left, right]
 21 | 
 22 |     Discrete state space.
 23 |     Discrete action space.
 24 |     Deterministic transition.
 25 |     Fully observable.
 26 |     """
 27 | 
 28 |     def __init__(
 29 |         self,
 30 |         start: np.ndarray = np.array([0, 3]),
 31 |         goal: np.ndarray = np.array([7, 3]),
 32 |         wind: np.ndarray = [0, 0, 0, 1, 1, 1, 2, 2, 1, 0],
 33 |     ):
 34 |         """Constructor.
 35 | 
 36 |         Args:
 37 |             start (np.ndarray): the start position of agent.
 38 |             goal (np.ndarray): the goal position.
 39 |             wind (np.ndarray): the strength of upward wind in each horizontal position.
 40 |                       The number decides how many grid cell the agent will be pushed upward.
 41 |         """
 42 |         super().__init__()
 43 | 
 44 |         # Define spaces
 45 |         self.state_space = DiscreteSpace([np.array([i, j]) for i in range(GRID_WIDTH) for j in range(GRID_HEIGHT)])
 46 |         self.action_space = DiscreteSpace(
 47 |             [np.array([0]), np.array([1]), np.array([2]), np.array([3])]
 48 |         )  #  0: up, 1: right, 2: down, 3: left
 49 |         self.state_transition_dist_type = DistributionType.CATEGORICAL.value
 50 | 
 51 |         self.start_state = start
 52 |         self.cur_state = start
 53 |         self.goal_state = goal
 54 |         self.wind = wind
 55 |         self.step_reward = -1
 56 |         self.goal_reward = 10
 57 |         self.path = []
 58 | 
 59 |     @override
 60 |     def reset(self):
 61 |         self.cur_state = self.start_state
 62 |         return self.cur_state, {}
 63 | 
 64 |     @override
 65 |     def state_transition_func(self, state: np.ndarray, action: np.ndarray) -> tuple[list[np.ndarray], list[float]]:
 66 |         # NOTE: This environment has deterministic transition
 67 |         i, j = state
 68 | 
 69 |         new_states = []
 70 |         if action == 0:
 71 |             next_state = np.array([i, min(j + 1 + self.wind[i], GRID_HEIGHT - 1)])
 72 |         elif action == 1:
 73 |             new_i = min(i + 1, GRID_WIDTH - 1)
 74 |             next_state = np.array([new_i, min(j + self.wind[new_i], GRID_HEIGHT - 1)])
 75 |         elif action == 2:
 76 |             next_state = np.array([i, max(0, min(j - 1 + self.wind[i], GRID_HEIGHT - 1))])
 77 |         elif action == 3:
 78 |             new_i = max(i - 1, 0)
 79 |             next_state = np.array([new_i, min(j + self.wind[new_i], GRID_HEIGHT - 1)])
 80 | 
 81 |         new_states.append(next_state)
 82 |         probs = [1.0]
 83 | 
 84 |         return new_states, probs
 85 | 
 86 |     @override
 87 |     def reward_func(self, state: np.ndarray, action: np.ndarray = None, new_state: np.ndarray = None) -> float:
 88 |         # R(s, s')
 89 |         # Transition to goal state gives goal reward.
 90 |         # Transition to free gives step reward.
 91 |         if np.allclose(new_state, self.goal_state):
 92 |             reward = 100
 93 |             reward = self.goal_reward
 94 |         else:
 95 |             reward = self.step_reward
 96 | 
 97 |         return reward
 98 | 
 99 |     def add_path(self, path):
100 |         self.path = path
101 | 
102 |     @override
103 |     def render(self):
104 |         _, ax = plt.subplots()
105 |         self.gridworld = np.full((GRID_HEIGHT, GRID_WIDTH), 0)
106 | 
107 |         self.gridworld[GRID_HEIGHT - self.start_state[1] - 1][self.start_state[0]] = 2
108 |         self.gridworld[GRID_HEIGHT - self.goal_state[1] - 1][self.goal_state[0]] = 3
109 | 
110 |         if len(self.path) > 0:
111 |             for state in self.path:
112 |                 self.gridworld[GRID_HEIGHT - state[1] - 1][state[0]] = 4
113 |         else:
114 |             self.gridworld[GRID_HEIGHT - self.cur_state[1] - 1][self.cur_state[0]] = 4
115 | 
116 |         self.colour_map = colors.ListedColormap(['white', 'black', 'yellow', 'red', 'green'])
117 |         bounds = [0, 1, 2, 3, 4, 5]
118 |         self.norm = colors.BoundaryNorm(bounds, self.colour_map.N)
119 | 
120 |         ax.imshow(self.gridworld, cmap=self.colour_map, norm=self.norm)
121 | 
122 |         # draw gridlines
123 |         ax.grid(which='major', axis='both', linestyle='-', color='k', linewidth=1)
124 |         ax.set_xticks(np.arange(GRID_WIDTH) - 0.5)
125 |         ax.set_xticklabels(np.array([str(i) for i in range(GRID_WIDTH)]))
126 |         ax.set_yticks(np.flip(np.arange(GRID_HEIGHT) + 0.5))
127 |         ax.set_yticklabels(np.array([str(i) for i in range(GRID_HEIGHT)]))
128 |         plt.tick_params(axis='both', labelsize=5, length=0)
129 | 
130 |         plt.show()
131 | 


--------------------------------------------------------------------------------
/robotics_algorithm/env/continuous_1d/double_integrator_env.py:
--------------------------------------------------------------------------------
  1 | from typing_extensions import override
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | 
  6 | from robotics_algorithm.robot.double_integrator import DoubleIntegrator
  7 | from robotics_algorithm.env.base_env import (
  8 |     ContinuousSpace,
  9 |     StochasticEnv,
 10 |     PartiallyObservableEnv,
 11 |     FunctionType,
 12 |     DistributionType,
 13 | )
 14 | 
 15 | 
 16 | class DoubleIntegratorEnv(StochasticEnv, PartiallyObservableEnv):
 17 |     """A DoubleIntegrator robot is tasked to stop at the goal state which is at zero position with zero velocity.
 18 | 
 19 |     State: [pos, vel]
 20 |     Action: [acceleration]
 21 | 
 22 |     Continuous state space.
 23 |     Continuous action space.
 24 |     Stochastic transition.
 25 |     Partially observable.
 26 | 
 27 |     Linear state transition function.
 28 |     Quadratic cost.
 29 |     """
 30 | 
 31 |     def __init__(self, size=10, observation_noise_std=0.01, state_transition_noise_std=0.01):
 32 |         super().__init__()
 33 | 
 34 |         self.size = size
 35 | 
 36 |         self.robot_model = DoubleIntegrator()
 37 |         self.state_space = ContinuousSpace(low=[-self.size / 2, -1e10], high=[self.size / 2, 1e10])  # pos and vel
 38 |         self.action_space = ContinuousSpace(low=[-1e10], high=[1e10])  # accel
 39 | 
 40 |         # declare linear state transition
 41 |         # x_dot = Ax + Bu
 42 |         self.state_transition_func_type = FunctionType.LINEAR.value
 43 |         self.A = self.robot_model.A
 44 |         self.B = self.robot_model.B
 45 | 
 46 |         # declare quadratic cost
 47 |         # L = x.T @ Q @ x + u.T @ R @ u
 48 |         self.reward_func_type = FunctionType.QUADRATIC.value
 49 |         self.Q = np.array([[1, 0], [0, 1]])  # state cost matrix
 50 |         self.R = np.array([[1]])  # control cost matrix
 51 | 
 52 |         # declare linear observation
 53 |         self.observation_func_type = FunctionType.LINEAR.value
 54 |         self.H = np.eye(2, dtype=np.float32)
 55 | 
 56 |         # state transition noise model
 57 |         self.state_transition_dist_type = DistributionType.GAUSSIAN.value
 58 |         self.state_transition_noise_std = state_transition_noise_std
 59 |         self.state_transition_noise_var = state_transition_noise_std * state_transition_noise_std
 60 |         self.state_transition_covariance_matrix = np.array(
 61 |             [
 62 |                 [self.state_transition_noise_var, 0],  # only have noise on position
 63 |                 [0, 1e-10],
 64 |             ],
 65 |             dtype=np.float32,
 66 |         )
 67 | 
 68 |         # observation noise model
 69 |         self.observation_dist_type = DistributionType.GAUSSIAN.value
 70 |         self.observation_noise_std = observation_noise_std
 71 |         var = observation_noise_std * observation_noise_std
 72 |         self.observation_covariance_matrix = np.array(
 73 |             [
 74 |                 [var, 0],  # only have noise on position
 75 |                 [0, 1e-10],
 76 |             ],
 77 |             dtype=np.float32,
 78 |         )
 79 | 
 80 |         self.path = None
 81 | 
 82 |     @override
 83 |     def reset(self):
 84 |         self.start_state = self.random_state()
 85 |         self.start_state[1] = 0.0  # zero velocity
 86 |         self.goal_state = [0, 0]  # fixed
 87 |         self.goal_action = [0]
 88 |         self.cur_state = np.copy(self.start_state)
 89 |         self.step_cnt = 0
 90 | 
 91 |         return self.sample_observation(self.cur_state), {}
 92 | 
 93 |     @override
 94 |     def state_transition_func(
 95 |         self, state: np.ndarray, action: np.ndarray
 96 |     ) -> tuple[np.ndarray, float, bool, bool, dict]:
 97 |         new_state_mean = self.robot_model.control(state, action, dt=0.01)
 98 |         new_state_var = [self.state_transition_noise_var, 1e-10]
 99 |         return new_state_mean, new_state_var
100 | 
101 |     @override
102 |     def linearize_state_transition(self, state: np.ndarray, action: np.ndarray):
103 |         return self.robot_model.linearize_state_transition(state, action)
104 | 
105 |     @override
106 |     def linearize_observation(self, state, observation):
107 |         return self.H
108 | 
109 |     @override
110 |     def sample_observation(self, state):
111 |         # simulate measuring the position with noise
112 |         state = np.array(state).reshape(-1, 1)
113 | 
114 |         # noisy observation
115 |         obs = self.H @ state + np.random.multivariate_normal(
116 |             mean=[0, 0], cov=self.observation_covariance_matrix
117 |         ).reshape(2, 1)
118 | 
119 |         return obs.reshape(-1)
120 | 
121 |     @override
122 |     def reward_func(self, state: np.ndarray, action: np.ndarray = None, new_state: np.ndarray = None) -> float:
123 |         # check bounds
124 |         if (
125 |             state[0] <= self.state_space.space[0][0]
126 |             or state[0] >= self.state_space.space[1][0]
127 |             or state[1] <= self.state_space.space[0][1]
128 |             or state[1] >= self.state_space.space[1][1]
129 |         ):
130 |             return -100
131 | 
132 |         # quadratic reward func
133 |         state = np.array(state)
134 |         action = np.array(action)
135 | 
136 |         cost = state.T @ self.Q @ state + action.T @ self.R @ action
137 |         return -cost
138 | 
139 |     @override
140 |     def is_state_terminal(self, state):
141 |         term = False
142 |         # Check bounds
143 |         if (
144 |             state[0] <= self.state_space.space[0][0]
145 |             or state[0] >= self.state_space.space[1][0]
146 |             or state[1] <= self.state_space.space[0][1]
147 |             or state[1] >= self.state_space.space[1][1]
148 |         ):
149 |             term = True
150 | 
151 |         # Check goal state reached for termination
152 |         if np.allclose(state, self.goal_state, atol=1e-3):
153 |             term = True
154 | 
155 |         return term
156 | 
157 |     @override
158 |     def get_state_info(self, state):
159 |         info = {'success': False}
160 |         if np.allclose(state, self.goal_state, atol=1e-3):
161 |             info = {'success': True}
162 | 
163 |         return info
164 | 
165 |     @override
166 |     def render(self):
167 |         fig, ax = plt.subplots(2, 1, dpi=100)
168 |         ax[0].plot(0, self.start_state[0], 'o')
169 |         ax[1].plot(0, self.start_state[1], 'o')
170 | 
171 |         if self.path:
172 |             for i, state in enumerate(self.path):
173 |                 ax[0].plot(i, state[0], 'o')
174 |                 ax[1].plot(i, state[1], 'o')
175 | 
176 |         ax[0].set_ylim(-self.size / 2, self.size / 2)
177 |         plt.show()
178 | 
179 |     def add_path(self, path):
180 |         self.path = path
181 | 


--------------------------------------------------------------------------------
/robotics_algorithm/env/continuous_2d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/env/continuous_2d/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/env/continuous_2d/diff_drive_2d_env_base.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from typing import Any
  3 | 
  4 | import numpy as np
  5 | from matplotlib import colors
  6 | from typing_extensions import override
  7 | 
  8 | from robotics_algorithm.env.base_env import (
  9 |     BaseEnv,
 10 |     ContinuousSpace,
 11 |     DiscreteSpace,
 12 | )
 13 | from robotics_algorithm.robot.differential_drive import DiffDrive
 14 | 
 15 | DEFAULT_OBSTACLES = [[2, 2, 0.5], [5, 5, 1], [3, 8, 0.5], [8, 3, 1]]
 16 | DEFAULT_START = [0.5, 0.5, 0]
 17 | DEFAULT_GOAL = [9.0, 9.0, math.radians(90)]
 18 | 
 19 | 
 20 | class DiffDrive2DEnv(BaseEnv):
 21 |     """A differential drive robot must reach goal state in a 2d maze with obstacles.
 22 | 
 23 |     State: [x, y, theta]
 24 |     Action: [lin_vel, ang_vel]
 25 | 
 26 |     There are two modes.
 27 |     - If user does not set a reference path, the reward only encourages robot to reach goal as fast as possible. This
 28 |     environment hence behaves like a path planning environment.
 29 |     - If user sets a reference path, the reward will encourage the robot to track the path to reach the goal. Hence, the
 30 |     environment behaves like a path following (control) environment.
 31 |     """
 32 | 
 33 |     FREE_SPACE = 0
 34 |     OBSTACLE = 1
 35 |     START = 2
 36 |     GOAL = 3
 37 |     PATH = 4
 38 |     WAYPOINT = 5
 39 |     MAX_POINT_TYPE = 6
 40 | 
 41 |     def __init__(self, size=10, robot_radius=0.2, action_dt=1.0, discrete_action=False, has_kinematics_constraint=True):
 42 |         """
 43 |         Initialize a differential drive robot environment.
 44 | 
 45 |         Args:
 46 |             size (int): size of the maze
 47 |             robot_radius (float): radius of the robot
 48 |             action_dt (float): time step for the robot actions.
 49 |             discrete_action (bool): whether the action space is discrete or continuous
 50 |         """
 51 |         super().__init__()
 52 | 
 53 |         self.size = size
 54 |         self.maze = np.full((size, size), DiffDrive2DEnv.FREE_SPACE)
 55 | 
 56 |         self.state_space = ContinuousSpace(low=[0, 0, -math.pi], high=[self.size, self.size, math.pi])
 57 |         if not discrete_action:
 58 |             if has_kinematics_constraint:
 59 |                 self.action_space = ContinuousSpace(low=[0, -math.radians(30)], high=[0.5, math.radians(30)])
 60 |             else:
 61 |                 self.action_space = ContinuousSpace(
 62 |                     low=[-float('inf'), -float('inf')], high=[float('inf'), float('inf')]
 63 |                 )
 64 |         else:
 65 |             self.action_space = DiscreteSpace(
 66 |                 [
 67 |                     (0.5, 0),
 68 |                     (0.5, math.radians(30)),
 69 |                     (0.5, -math.radians(30)),
 70 |                     (0.25, 0),
 71 |                     (0.25, math.radians(30)),
 72 |                     (0.25, -math.radians(30)),
 73 |                 ]
 74 |             )
 75 | 
 76 |         # self.colour_map = colors.ListedColormap(['white', 'black', 'red', 'blue', 'green', 'yellow'])
 77 |         # bounds = [
 78 |         #     DiffDrive2DEnv.FREE_SPACE,
 79 |         #     DiffDrive2DEnv.OBSTACLE,
 80 |         #     DiffDrive2DEnv.START,
 81 |         #     DiffDrive2DEnv.GOAL,
 82 |         #     DiffDrive2DEnv.PATH,
 83 |         #     DiffDrive2DEnv.WAYPOINT,
 84 |         #     DiffDrive2DEnv.MAX_POINT_TYPE,
 85 |         # ]
 86 |         # self.norm = colors.BoundaryNorm(bounds, self.colour_map.N)
 87 | 
 88 |         self.robot_model = DiffDrive(wheel_dist=0.2, wheel_radius=0.05)
 89 |         self.robot_radius = robot_radius
 90 |         self.action_dt = action_dt
 91 | 
 92 |         self.path = None
 93 |         self.path_dict = {}
 94 |         self.local_plan = None
 95 | 
 96 |         # others
 97 |         self.interactive_viz = False  # Interactive viz
 98 |         self._fig_created = False
 99 | 
100 |     @override
101 |     def reset(self, empty=False, random_env=True):
102 |         if random_env:
103 |             self._random_obstacles()
104 |             self.start_state = self._random_valid_state()
105 |             self.goal_state = self._random_valid_state()
106 |         else:
107 |             self.obstacles = DEFAULT_OBSTACLES
108 |             self.start_state = DEFAULT_START
109 |             self.goal_state = DEFAULT_GOAL
110 | 
111 |         # no obstacles if empty
112 |         if empty:
113 |             self.obstacles = []
114 | 
115 |         self.cur_state = self.start_state.copy()
116 | 
117 |         return self.sample_observation(self.cur_state), {}
118 | 
119 |     @override
120 |     def state_transition_func(self, state: np.ndarray, action: np.ndarray) -> np.ndarray:
121 |         # compute next state
122 |         new_state = self.robot_model.control(state, action, dt=self.action_dt)
123 | 
124 |         return new_state
125 | 
126 |     @override
127 |     def is_state_terminal(self, state):
128 |         # Compute term and info
129 |         term = False
130 |         if state[0] <= 0 or state[0] >= self.size or state[1] <= 0 or state[1] >= self.size:
131 |             term = True
132 | 
133 |         if not self.is_state_valid(state):
134 |             term = True
135 | 
136 |         # Check goal state reached for termination
137 |         if self.is_state_similar(state, self.goal_state):
138 |             term = True
139 | 
140 |         return term
141 | 
142 |     @override
143 |     def get_state_info(self, state):
144 |         info = {'success': False}
145 |         if self.is_state_similar(state, self.goal_state):
146 |             info['success'] = True
147 | 
148 |         return info
149 | 
150 |     @override
151 |     def is_state_valid(self, state: np.ndarray) -> bool:
152 |         for obstacle in self.obstacles:
153 |             if np.linalg.norm(state[:2] - np.array(obstacle[:2])) <= obstacle[2] + self.robot_radius:
154 |                 return False
155 | 
156 |         return True
157 | 
158 |     def is_state_similar(self, state1: np.ndarray, state2: np.ndarray) -> bool:
159 |         return np.linalg.norm(state1 - state2) < 0.2
160 | 
161 |     def _random_obstacles(self, num_of_obstacles: int = 5):
162 |         self.obstacles = []
163 |         for _ in range(num_of_obstacles):
164 |             obstacle = np.random.uniform([0, 0, 0.1], [self.size, self.size, 1])
165 |             self.obstacles.append(obstacle.tolist())
166 | 
167 |     def _random_valid_state(self):
168 |         while True:
169 |             robot_pos = np.random.uniform(self.state_space.space[0], self.state_space.space[1])
170 |             if self.is_state_valid(robot_pos):
171 |                 break
172 | 
173 |         return robot_pos
174 | 


--------------------------------------------------------------------------------
/robotics_algorithm/env/discrete_world_1d.py:
--------------------------------------------------------------------------------
  1 | from typing_extensions import override
  2 | 
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | from robotics_algorithm.env.base_env import StochasticEnv, PartiallyObservableEnv, DiscreteSpace, DistributionType
  7 | 
  8 | # center value being the probability of perfect control
  9 | TRANSITION_PROB = [0.05, 0.1, 0.7, 0.1, 0.05]
 10 | # center value being the probability of perfect measurement
 11 | OBS_PROB = [0.05, 0.1, 0.7, 0.1, 0.05]
 12 | 
 13 | 
 14 | class DiscreteWorld1D(StochasticEnv, PartiallyObservableEnv):
 15 |     """A robot moves on a straight line and can only end up in integer value positions.
 16 | 
 17 |     State: [pos]
 18 |     Action: steps to move
 19 | 
 20 |     Discrete state space.
 21 |     Discrete action space.
 22 |     Stochastic transition.
 23 |     Partially observable.
 24 |     """
 25 |     def __init__(self, size=100, action_size=5, state_transition_prob=TRANSITION_PROB, obs_prob=OBS_PROB):
 26 |         super().__init__()
 27 | 
 28 |         self.size = size
 29 | 
 30 |         self.state_space = DiscreteSpace(values=[[x] for x in range(size)])
 31 |         self.action_space = DiscreteSpace(values=[[x] for x in range(-action_size, action_size + 1)])
 32 |         self.observation_space = DiscreteSpace(values=[[x] for x in range(size)])
 33 | 
 34 |         self.state_transition_prob = state_transition_prob
 35 |         self.obs_prob = obs_prob
 36 |         self.state_transition_dist_type = DistributionType.CATEGORICAL.value
 37 |         self.observation_dist_type = DistributionType.CATEGORICAL.value
 38 | 
 39 |     @override
 40 |     def reset(self):
 41 |         self.cur_state = self.state_space.sample()
 42 |         self.goal_state = [self.size // 2]
 43 | 
 44 |         return self.sample_observation(self.cur_state), {}
 45 | 
 46 |     @override
 47 |     def render(self):
 48 |         plt.figure(dpi=100)
 49 |         plt.plot(self.cur_state[0], 0, "o")
 50 |         plt.ylim(-1, 1)
 51 |         plt.xlim(0, self.size)
 52 |         plt.show()
 53 | 
 54 |     @override
 55 |     def state_transition_func(self, state: np.ndarray, action: np.ndarray) -> tuple[np.ndarray[tuple], np.ndarray[float]]:
 56 |         state = state[0]
 57 |         action = action[0]
 58 | 
 59 |         # move in the specified direction with some small chance of error
 60 |         tmp = int(len(self.state_transition_prob) // 2)
 61 |         new_state = state + action
 62 |         new_states = [[x] for x in range(new_state - tmp, new_state + tmp + 1)]
 63 |         probs = self.state_transition_prob
 64 | 
 65 |         for i in range(len(new_states)):
 66 |             # clamp state
 67 |             if new_states[i][0] < 0:
 68 |                 new_states[i] = [0]
 69 |             if new_states[i][0] >= self.size:
 70 |                 new_states[i] = [self.size - 1]
 71 | 
 72 |         return new_states, probs
 73 | 
 74 |     @override
 75 |     def reward_func(self, state: np.ndarray, action: np.ndarray = None, new_state: np.ndarray = None) -> float:
 76 |         # Check bounds
 77 |         if new_state[0] < 0 or new_state[0] >= self.size:
 78 |             return -100
 79 | 
 80 |         return -abs(new_state[0])
 81 | 
 82 |     @override
 83 |     def observation_func(self, state: np.ndarray) -> tuple[np.ndarray[np.ndarray], np.ndarray[float]]:
 84 |         state = state[0]
 85 |         tmp = int(len(self.obs_prob) // 2)
 86 |         obss = [[x] for x in range(state - tmp, state + tmp + 1)]
 87 |         probs = self.obs_prob
 88 | 
 89 |         return obss, probs
 90 | 
 91 |     @override
 92 |     def is_state_terminal(self, state):
 93 |         term = False
 94 |         if state[0] < 0 or state[0] >= self.size:
 95 |             term = True
 96 | 
 97 |         if state[0] == self.goal_state[0]:
 98 |             term = True
 99 | 
100 |         return term
101 | 


--------------------------------------------------------------------------------
/robotics_algorithm/env/inverted_pendulum.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | from typing_extensions import override
  6 | 
  7 | from robotics_algorithm.env.base_env import ContinuousSpace, DeterministicEnv, FullyObservableEnv, FunctionType
  8 | from robotics_algorithm.robot.pendulum import Pendulum
  9 | 
 10 | 
 11 | class InvertedPendulumEnv(DeterministicEnv, FullyObservableEnv):
 12 |     """Inverted pendulum environment.
 13 | 
 14 |     State: [theta, theta_dot]
 15 |     Action: [torque]
 16 | 
 17 |     """
 18 | 
 19 |     def __init__(self, quadratic_reward=True):
 20 |         super().__init__()
 21 | 
 22 |         self.state_transition_func_type = FunctionType.LINEAR.value
 23 | 
 24 |         # state and action space
 25 |         self.state_space = ContinuousSpace(low=[-float('inf'), -float('inf')], high=[float('inf'), float('inf')])
 26 |         self.action_space = ContinuousSpace(low=[-1e10], high=[1e10])  # torque
 27 | 
 28 |         # reward
 29 |         self.quadratic_reward = quadratic_reward
 30 |         if quadratic_reward:
 31 |             self.reward_func_type = FunctionType.QUADRATIC.value
 32 |             self.Q = np.array([[10, 0], [0, 10]])
 33 |             self.R = np.array([[1]])
 34 | 
 35 |         # robot model
 36 |         self.robot_model = Pendulum()
 37 |         self.action_dt = self.robot_model.dt
 38 |         self._ref_state = np.array([-np.pi, 0])
 39 | 
 40 |         self.theta_threshold_radians = 12 * 2 * math.pi / 360
 41 |         self.max_steps = 500
 42 | 
 43 |         # others
 44 |         self._fig_created = False
 45 | 
 46 |     @override
 47 |     def reset(self):
 48 |         self.cur_state = np.random.randn(2) * 0.1  # near-upright position
 49 |         self.goal_state = np.array([0, 0])  # upright position
 50 |         self.goal_action = np.zeros(1)
 51 |         self.step_cnt = 0
 52 | 
 53 |         return self.sample_observation(self.cur_state), {}
 54 | 
 55 |     @override
 56 |     def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]:
 57 |         new_state, reward, term, trunc, info = super().step(action)
 58 | 
 59 |         self.step_cnt += 1
 60 |         trunc = self.step_cnt > self.max_steps  # set truncated flag
 61 | 
 62 |         return new_state, reward, term, trunc, info
 63 | 
 64 |     @override
 65 |     def state_transition_func(self, state: np.ndarray, action: np.ndarray) -> np.ndarray:
 66 |         # compute next state
 67 |         new_state = self.robot_model.control(state, action)
 68 |         return new_state
 69 | 
 70 |     @override
 71 |     def reward_func(self, state: np.ndarray, action: np.ndarray = None, new_state: np.ndarray = None) -> float:
 72 |         terminated = self.is_state_terminal(new_state)
 73 | 
 74 |         if not terminated:
 75 |             if self.quadratic_reward:
 76 |                 reward = -new_state.T @ self.Q @ new_state - action.T @ self.R @ action
 77 |             else:
 78 |                 reward = 1.0
 79 |         else:
 80 |             reward = -1.0
 81 | 
 82 |         return reward
 83 | 
 84 |     @override
 85 |     def is_state_terminal(self, state):
 86 |         theta = state[0]
 87 | 
 88 |         term = bool(theta < -self.theta_threshold_radians or theta > self.theta_threshold_radians)
 89 | 
 90 |         return term
 91 | 
 92 |     @override
 93 |     def linearize_state_transition(self, state, action):
 94 |         return self.robot_model.linearize_state_transition(state, action)
 95 | 
 96 |     @override
 97 |     def render(self):
 98 |         if not self._fig_created:
 99 |             # Create an animation of the pendulum
100 |             plt.ion()
101 |             plt.figure()
102 | 
103 |             self._fig_created = True
104 | 
105 |         plt.clf()
106 |         theta = self.cur_state[0] + np.pi
107 |         x_pendulum = self.robot_model.L * np.sin(theta)
108 |         y_pendulum = -self.robot_model.L * np.cos(theta)
109 |         plt.plot([0, x_pendulum], [0, y_pendulum], 'o-', lw=2)
110 |         plt.xlim(-self.robot_model.L - 0.1, self.robot_model.L + 0.1)
111 |         plt.ylim(-self.robot_model.L - 0.1, self.robot_model.L + 0.1)
112 |         plt.pause(0.01)
113 | 


--------------------------------------------------------------------------------
/robotics_algorithm/learning/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/learning/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/learning/reinforcement_learning/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/learning/reinforcement_learning/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/learning/reinforcement_learning/mc_control_on_policy.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import defaultdict
  3 | from typing import Callable
  4 | 
  5 | from robotics_algorithm.env.base_env import MDPEnv, SpaceType, EnvType
  6 | 
  7 | 
  8 | class MCControlOnPolicy:
  9 |     def __init__(self, env: MDPEnv, max_episode_len: int = 5000) -> None:
 10 |         """
 11 |         Constructor.
 12 | 
 13 |         Args:
 14 |             env (MDPEnv): the env.
 15 |         """
 16 |         assert env.state_space.type == SpaceType.DISCRETE.value
 17 |         assert env.action_space.type == SpaceType.DISCRETE.value
 18 |         assert env.observability == EnvType.FULLY_OBSERVABLE.value
 19 | 
 20 |         self.env = env
 21 |         self._max_episode_len = max_episode_len
 22 | 
 23 |     def make_epsilon_greedy_policy(self, epsilon, num_actions):
 24 |         """
 25 |         Creates an epsilon-greedy policy based on a given Q-function and epsilon.
 26 | 
 27 |         Args:
 28 |             epsilon: The probability to select a random action . float between 0 and 1.
 29 |             num_actions: Number of actions in the environment.
 30 | 
 31 |         Returns:
 32 |             A function that takes the observation as an argument and returns
 33 |             the probabilities for each action in the form of a numpy array of length num_actions.
 34 | 
 35 |         """
 36 | 
 37 |         def policy_fn(state):
 38 |             # epsilon probability of picking a random action.
 39 | 
 40 |             action_prob = np.ones(num_actions, dtype=float) * epsilon / num_actions
 41 | 
 42 |             # (1 - epsilon) probability of picking the best action
 43 |             state_key = tuple(state.tolist())
 44 |             best_action = np.argmax(self.Q[state_key])
 45 |             action_prob[best_action] += 1 - epsilon
 46 |             return action_prob
 47 | 
 48 |         return policy_fn
 49 | 
 50 |     def run(
 51 |         self, num_episodes: int = 500, discount_factor: float = 0.95, epsilon: float = 0.1
 52 |     ) -> tuple[dict, Callable]:
 53 |         """
 54 |         Monte Carlo Control using Epsilon-Greedy policies.
 55 |         Finds an optimal epsilon-greedy policy.
 56 | 
 57 |         Args:
 58 |             num_episodes (int): Number of episodes to sample. Defaults to 500.
 59 |             discount_factor (float, optional): Gamma discount factor. Defaults to 0.95.
 60 |             epsilon (float, optional): Chance the sample a random action. Float between 0 and 1. Defaults to 0.1.
 61 | 
 62 |         Returns:
 63 |             tuple[dict, Callable]: A tuple (Q, policy).
 64 |             Q is a dictionary mapping state -> action values.
 65 |             policy is a function that takes an observation as an argument and returns action probabilities.
 66 |         """
 67 |         # Keeps track of sum and count of returns for each state
 68 |         # to calculate an average. We could use an array to save all
 69 |         # returns (like in the book) but that's memory inefficient.
 70 |         state_action_visit_cnt = defaultdict(int)
 71 | 
 72 |         all_actions = self.env.action_space.get_all()
 73 | 
 74 |         # for plotting
 75 |         self.episodes = []
 76 |         self.cumulative_rewards = []
 77 | 
 78 |         # The final action-value function.
 79 |         # A nested dictionary that maps state -> (action -> action-value).
 80 |         self.Q = defaultdict(lambda: np.zeros(self.env.action_space.size))
 81 | 
 82 |         # The policy we're following
 83 |         policy = self.make_epsilon_greedy_policy(epsilon, self.env.action_space.size)
 84 | 
 85 |         # Learn
 86 |         for i_episode in range(1, num_episodes + 1):
 87 |             # Generate an episode.
 88 |             # An episode is an array of (state, action, reward) tuples
 89 |             trajectory = []
 90 |             cumulative_reward = 0
 91 |             state, _ = self.env.reset()
 92 | 
 93 |             # Run the episode
 94 |             for steps in range(self._max_episode_len):
 95 |                 # choose action according to epsilon-greedy policy
 96 |                 action_probs = policy(state)
 97 |                 action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs)  # choose action
 98 |                 action = all_actions[action_idx]
 99 |                 next_state, reward, term, trunc, _ = self.env.step(action)
100 |                 trajectory.append((tuple(state.tolist()), tuple(action.tolist()), reward))
101 | 
102 |                 steps += 1
103 |                 cumulative_reward += reward
104 | 
105 |                 if term or trunc:
106 |                     break
107 | 
108 |                 state = next_state
109 | 
110 |             # every-visit MC Prediction
111 |             # Find all states the we've visited in this episode
112 |             # We convert each state to a tuple so that we can use it as a dict key
113 |             state_action_in_episode = set(
114 |                 [(tuple(x[0]), x[1]) for x in trajectory]
115 |             )  # we use set because it is first-visit
116 |             for state_action in state_action_in_episode:
117 |                 state, action = state_action
118 | 
119 |                 # calculate total return for that state:
120 |                 first_idx = next(i for i, x in enumerate(trajectory) if x[0] == state and x[1] == action)
121 |                 # calculate total return
122 |                 total_return = sum([x[2] * (discount_factor**i) for i, x in enumerate(trajectory[first_idx:])])
123 |                 # calculate average return in incremental fashion
124 |                 state_action_visit_cnt[state_action] += 1
125 |                 self.Q[state][action] = self.Q[state][action] + (1.0 / state_action_visit_cnt[state_action]) * (
126 |                     total_return - self.Q[state][action]
127 |                 )
128 | 
129 |             # improve policy based on updated Q function
130 |             # NOTE: we do not need to call this because Q function value is already updated
131 |             # policy = self.make_epsilon_greedy_policy(epsilon, self.env.action_space.size)
132 | 
133 |             # Print out which episode we're on, useful for debugging.
134 |             if i_episode % 1 == 0:
135 |                 self.cumulative_rewards.append(cumulative_reward)
136 |                 self.episodes.append(i_episode)
137 |                 print('Episode {}/{}, reward : {}'.format(i_episode, num_episodes, cumulative_reward))
138 | 
139 |         return self.Q, policy
140 | 
141 |     def get_learning_curve(self):
142 |         return self.episodes, self.cumulative_rewards
143 | 


--------------------------------------------------------------------------------
/robotics_algorithm/learning/reinforcement_learning/q_learning.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import defaultdict
  3 | from typing import Callable
  4 | 
  5 | from robotics_algorithm.env.base_env import MDPEnv, SpaceType, EnvType
  6 | 
  7 | 
  8 | class QLearning:
  9 |     def __init__(self, env: MDPEnv, max_episode_len: int = 5000) -> None:
 10 |         """
 11 |         Constructor.
 12 | 
 13 |         Args:
 14 |             env (MDPEnv): the env.
 15 |         """
 16 |         assert env.state_space.type == SpaceType.DISCRETE.value
 17 |         assert env.action_space.type == SpaceType.DISCRETE.value
 18 |         assert env.observability == EnvType.FULLY_OBSERVABLE.value
 19 | 
 20 |         self.env = env
 21 |         self._max_episode_len = max_episode_len
 22 | 
 23 |     def make_epsilon_greedy_policy(self, epsilon, num_actions):
 24 |         """
 25 |         Creates an epsilon-greedy policy based on a given Q-function and epsilon.
 26 | 
 27 |         Args:
 28 |             epsilon: The probability to select a random action . float between 0 and 1.
 29 |             num_actions: Number of actions in the environment.
 30 | 
 31 |         Returns:
 32 |             A function that takes the observation as an argument and returns
 33 |             the probabilities for each action in the form of a numpy array of length num_actions.
 34 | 
 35 |         """
 36 | 
 37 |         def policy_fn(state):
 38 |             # epsilon probability of picking a random action.
 39 |             action_prob = np.ones(num_actions, dtype=float) * epsilon / num_actions
 40 | 
 41 |             # (1 - epsilon) probability of picking the best action
 42 |             state_key = tuple(state.tolist())
 43 |             best_action = np.argmax(self.Q[state_key])
 44 |             action_prob[best_action] += 1 - epsilon
 45 |             return action_prob
 46 | 
 47 |         return policy_fn
 48 | 
 49 |     def run(
 50 |         self, num_episodes: int = 500, discount_factor: float = 0.95, epsilon: float = 0.1, alpha=0.05
 51 |     ) -> tuple[dict, Callable]:
 52 |         """
 53 |         Run the SARSA algorithm to learn the optimal policy.
 54 | 
 55 |         Args:
 56 |             num_episodes (int): Number of episodes to sample. Defaults to 500.
 57 |             discount_factor (float, optional): Gamma discount factor. Defaults to 0.95.
 58 |             epsilon (float, optional): Chance the sample a random action. Float betwen 0 and 1. Defaults to 0.1.
 59 |             alpha (float): TD Update rate.
 60 | 
 61 |         Returns:
 62 |             tuple[dict, Callable]: A tuple (Q, policy).
 63 |             Q is a dictionary mapping state -> action values.
 64 |             policy is a function that takes an observation as an argument and returns
 65 |             action probabilities
 66 |         """
 67 |         all_actions = self.env.action_space.get_all()
 68 | 
 69 |         # for plotting
 70 |         self.episodes = []
 71 |         self.cumulative_rewards = []
 72 | 
 73 |         # The final action-value function.
 74 |         # A nested dictionary that maps state -> (action -> action-value).
 75 |         self.Q = defaultdict(lambda: np.zeros(self.env.action_space.size))
 76 | 
 77 |         # The policy we're following
 78 |         behaviour_policy = self.make_epsilon_greedy_policy(epsilon, self.env.action_space.size)
 79 | 
 80 |         # Iterate over each episode
 81 |         for i_episode in range(1, num_episodes + 1):
 82 |             # Initialize the state and action
 83 |             state, _ = self.env.reset()
 84 | 
 85 |             cumulative_reward = 0
 86 |             # Run the episode
 87 |             for steps in range(self._max_episode_len):
 88 |                 # Take action, observe reward and next state
 89 |                 action_probs = behaviour_policy(state)
 90 |                 action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs)  # choose action
 91 |                 action = all_actions[action_idx]
 92 |                 next_state, reward, term, trunc, _ = self.env.step(action)
 93 |                 cumulative_reward += reward
 94 | 
 95 |                 # Perform TD Update
 96 |                 # ! we perform TD update even if term or trunc is true. Although Q function for terminal
 97 |                 # ! state does not make sense, this is needed so that the negative reward earned when transitioning
 98 |                 # ! to terminal state is learned.
 99 |                 # ! For q learning, next action is selected based on greedy policy, so here just use np.max
100 |                 state_key = tuple(state.tolist())
101 |                 action_key = tuple(action.tolist())
102 |                 next_state_key = tuple(next_state.tolist())
103 |                 td_target = reward + discount_factor * np.max(self.Q[next_state_key]).item()
104 |                 td_delta = td_target - self.Q[state_key][action_key]
105 |                 self.Q[state_key][action_key] += alpha * td_delta
106 | 
107 |                 # Print debug information every 1000 steps
108 |                 # if steps % 1000 == 0:
109 |                 #     print(state)
110 |                 #     print(action_probs)
111 |                 #     print(steps)
112 | 
113 |                 # Update state and action
114 |                 state = next_state
115 | 
116 |                 # ! for q-learning, action must be sampled using behaviour policy
117 |                 # action = next_action
118 | 
119 |                 # Check for termination
120 |                 if term or trunc:
121 |                     break
122 | 
123 |             # Log progress after each episode
124 |             if i_episode % 1 == 0:
125 |                 self.cumulative_rewards.append(cumulative_reward)
126 |                 self.episodes.append(i_episode)
127 |                 print("Episode {}/{}, reward : {}".format(i_episode, num_episodes, cumulative_reward))
128 | 
129 |         return self.Q, behaviour_policy
130 | 
131 |     def get_learning_curve(self):
132 |         return self.episodes, self.cumulative_rewards
133 | 


--------------------------------------------------------------------------------
/robotics_algorithm/learning/reinforcement_learning/sarsa.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import defaultdict
  3 | from typing import Callable
  4 | 
  5 | from robotics_algorithm.env.base_env import MDPEnv, SpaceType, EnvType
  6 | 
  7 | 
  8 | class SARSA:
  9 |     def __init__(self, env: MDPEnv, max_episode_len: int = 5000) -> None:
 10 |         """
 11 |         Constructor.
 12 | 
 13 |         Args:
 14 |             env (MDPEnv): the env.
 15 |         """
 16 |         assert env.state_space.type == SpaceType.DISCRETE.value
 17 |         assert env.action_space.type == SpaceType.DISCRETE.value
 18 |         assert env.observability == EnvType.FULLY_OBSERVABLE.value
 19 | 
 20 |         self.env = env
 21 |         self._max_episode_len = max_episode_len
 22 | 
 23 |     def make_epsilon_greedy_policy(self, epsilon, num_actions):
 24 |         """
 25 |         Creates an epsilon-greedy policy based on a given Q-function and epsilon.
 26 | 
 27 |         Args:
 28 |             epsilon: The probability to select a random action . float between 0 and 1.
 29 |             num_actions: Number of actions in the environment.
 30 | 
 31 |         Returns:
 32 |             A function that takes the observation as an argument and returns
 33 |             the probabilities for each action in the form of a numpy array of length num_actions.
 34 | 
 35 |         """
 36 | 
 37 |         def policy_fn(state):
 38 |             # epsilon probability of picking a random action.
 39 |             action_prob = np.ones(num_actions, dtype=float) * epsilon / num_actions
 40 | 
 41 |             # (1 - epsilon) probability of picking the best action
 42 |             state_key = tuple(state.tolist())
 43 |             best_action = np.argmax(self.Q[state_key])
 44 |             action_prob[best_action] += 1 - epsilon
 45 |             return action_prob
 46 | 
 47 |         return policy_fn
 48 | 
 49 |     def run(
 50 |         self, num_episodes: int = 500, discount_factor: float = 0.95, epsilon: float = 0.1, alpha=0.05
 51 |     ) -> tuple[dict, Callable]:
 52 |         """
 53 |         Run the SARSA algorithm to learn the optimal policy.
 54 | 
 55 |         Args:
 56 |             num_episodes (int): Number of episodes to sample. Defaults to 500.
 57 |             discount_factor (float, optional): Gamma discount factor. Defaults to 0.95.
 58 |             epsilon (float, optional): Chance the sample a random action. Float betwen 0 and 1. Defaults to 0.1.
 59 |             alpha (float): TD Update rate.
 60 | 
 61 |         Returns:
 62 |             tuple[dict, Callable]: A tuple (Q, policy).
 63 |             Q is a dictionary mapping state -> action values.
 64 |             policy is a function that takes an observation as an argument and returns
 65 |             action probabilities
 66 |         """
 67 |         all_actions = self.env.action_space.get_all()
 68 | 
 69 |         # for plotting
 70 |         self.episodes = []
 71 |         self.cumulative_rewards = []
 72 | 
 73 |         # The final action-value function.
 74 |         # A nested dictionary that maps state -> (action -> action-value).
 75 |         self.Q = defaultdict(lambda: np.zeros(self.env.action_space.size))
 76 | 
 77 |         # The policy we're following
 78 |         policy = self.make_epsilon_greedy_policy(epsilon, self.env.action_space.size)
 79 | 
 80 |         # Iterate over each episode
 81 |         for i_episode in range(1, num_episodes + 1):
 82 |             # Initialize the state and action
 83 |             state, _ = self.env.reset()
 84 | 
 85 |             cumulative_reward = 0
 86 |             # Run the episode
 87 |             action_probs = policy(state)
 88 |             action_idx = np.random.choice(np.arange(len(all_actions)), p=action_probs)  # choose action
 89 |             action = all_actions[action_idx]
 90 |             for steps in range(self._max_episode_len):
 91 |                 # Take action, observe reward and next state
 92 |                 next_state, reward, term, trunc, _ = self.env.step(action)
 93 |                 cumulative_reward += reward
 94 | 
 95 |                 # Next state and next action
 96 |                 next_action_probs = policy(next_state)
 97 |                 next_action_idx = np.random.choice(np.arange(len(all_actions)), p=next_action_probs)  # choose action
 98 |                 next_action = all_actions[next_action_idx]
 99 | 
100 |                 # Perform TD Update
101 |                 # NOTE: we perform TD update even if term or trunc is true. Although Q function for terminal
102 |                 #       state does not make sense, this is needed so that the negative reward earned when transitioning
103 |                 #       to terminal state is learned.
104 |                 state_key = tuple(state.tolist())
105 |                 action_key = tuple(action.tolist())
106 |                 next_state_key = tuple(next_state.tolist())
107 |                 next_action_key = tuple(next_action.tolist())
108 |                 td_target = reward + discount_factor * self.Q[next_state_key][next_action_key]
109 |                 td_delta = td_target - self.Q[state_key][action_key]
110 |                 self.Q[state_key][action_key] += alpha * td_delta
111 | 
112 |                 # Print debug information every 1000 steps
113 |                 # if steps % 1000 == 0:
114 |                 #     print(state)
115 |                 #     print(action_probs)
116 |                 #     print(steps)
117 | 
118 |                 # Update state and action
119 |                 state = next_state
120 |                 action = next_action
121 | 
122 |                 # Check for termination
123 |                 if term or trunc:
124 |                     break
125 | 
126 |             # Log progress after each episode
127 |             if i_episode % 1 == 0:
128 |                 self.cumulative_rewards.append(cumulative_reward)
129 |                 self.episodes.append(i_episode)
130 |                 print("Episode {}/{}, reward : {}".format(i_episode, num_episodes, cumulative_reward))
131 | 
132 |         return self.Q, policy
133 | 
134 |     def get_learning_curve(self):
135 |         return self.episodes, self.cumulative_rewards
136 | 


--------------------------------------------------------------------------------
/robotics_algorithm/planning/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/planning/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/planning/mdp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/planning/mdp/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/planning/mdp/policy_iteration.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import math
  3 | from collections import defaultdict
  4 | from typing import Callable
  5 | 
  6 | import numpy as np
  7 | 
  8 | from robotics_algorithm.env.base_env import MDPEnv, SpaceType, EnvType
  9 | 
 10 | 
 11 | class PolicyIteration:
 12 |     def __init__(self, env: MDPEnv) -> None:
 13 |         assert env.state_space.type == SpaceType.DISCRETE.value
 14 |         assert env.action_space.type == SpaceType.DISCRETE.value
 15 |         assert env.observability == EnvType.FULLY_OBSERVABLE.value
 16 | 
 17 |         self.env = env
 18 | 
 19 |     def make_greedy_policy(self, Q, num_actions):
 20 |         """
 21 |         Creates an greedy policy based on a given Q-function.
 22 | 
 23 |         Args:
 24 |             Q: A dictionary that maps from state -> action-values.
 25 |                 Each value is a numpy array of length nA (see below)
 26 |             num_action: Number of actions in the environment.
 27 | 
 28 |         Returns:
 29 |             A function that takes the observation as an argument and returns the action
 30 |         """
 31 | 
 32 |         def policy_fn(state):
 33 |             action_prob = np.zeros(num_actions)
 34 |             state_key = tuple(state.tolist())
 35 |             best_action_idx = np.argmax(Q[state_key], axis=-1)
 36 |             action_prob[best_action_idx] = 1.0
 37 |             return action_prob
 38 | 
 39 |         return policy_fn
 40 | 
 41 |     def run(self, discount_factor: float = 0.99) -> tuple[dict, Callable]:
 42 |         """Run algorithm.
 43 | 
 44 |         Args:
 45 |             discount_factor (float, optional): discount factor for future reward. Defaults to 0.99.
 46 | 
 47 |         Returns:
 48 |             Q (dict): Q function
 49 |             policy (Callable): policy constructed according to the Q function.
 50 |         """
 51 |         print('PolicyIteration: plan!!')
 52 | 
 53 |         # random init Q
 54 |         Q = defaultdict(lambda: np.zeros(self.env.action_space.size))
 55 | 
 56 |         iter = 0
 57 |         policy_converged = False
 58 |         while not policy_converged:
 59 |             print('PolicyIteration, iter {},'.format(iter))
 60 |             prev_Q = copy.deepcopy(Q)
 61 |             prev_policy = self.make_greedy_policy(prev_Q, self.env.action_space.size)
 62 |             Q, policy_converged = self.policy_evaluation(self.env, Q, prev_policy, discount_factor)
 63 |             policy = self.policy_improvement(self.env, Q)
 64 |             iter += 1
 65 | 
 66 |         return Q, policy
 67 | 
 68 |     def policy_evaluation(self, env, Q, policy, discount_factor=0.99, max_steps=10, diff_threshold=0.01):
 69 |         # Value iteration operates on Bellman expectation equation
 70 |         # For state-action reward:
 71 |         # Q_pi(s, a) <- R(s, a) + discount * sum_s' [p(s' | s) * V_pi(s')]
 72 |         # For state-state reward:
 73 |         # Q_pi(s, a) = sum_s' [p(s' | s) * (R(s, s') +  discount * V_pi(s'))]
 74 |         # V_pi(s) = sum_a [pi(a | s) * Q_pi(s, a)]
 75 | 
 76 |         self.terminal_states = set()
 77 |         states = env.state_space.get_all()
 78 |         actions = env.action_space.get_all()
 79 | 
 80 |         v_state = defaultdict(float)
 81 | 
 82 |         for state in states:
 83 |             action_probs = policy(state)
 84 |             state_key = tuple(state.tolist())
 85 |             value = np.dot(action_probs, Q[state_key])  # calculate v_pi
 86 |             v_state[state_key] = value
 87 | 
 88 |         iter = 0
 89 |         max_change = np.inf
 90 |         while max_change > diff_threshold and iter < max_steps:
 91 |             v_state_new = copy.deepcopy(v_state)
 92 |             max_change = 0
 93 |             for state in states:
 94 |                 state_key = tuple(state.tolist())
 95 |                 if state_key in self.terminal_states:
 96 |                     continue
 97 | 
 98 |                 for action in actions:
 99 |                     action_key = tuple(action.tolist())
100 |                     new_states, probs = env.state_transition_func(state, action)
101 | 
102 |                     q_sa = 0
103 |                     for i, new_state in enumerate(new_states):
104 |                         new_state_key = tuple(new_state.tolist())
105 |                         term = env.is_state_terminal(new_state)
106 |                         if term:
107 |                             self.terminal_states.add(new_state_key)
108 | 
109 |                         reward = env.reward_func(state, action, new_state)
110 |                         q_sa += probs[i] * (reward + discount_factor * v_state[new_state_key])
111 | 
112 |                     # update Q(s,a)
113 |                     Q[state_key][action_key] = q_sa
114 | 
115 |                 # Instead of using max, use action probability under the current policy
116 |                 action_probs = policy(state)
117 |                 value = np.dot(action_probs, Q[state_key])  # calculate v_pi
118 | 
119 |                 # Check maximum change incurred.
120 |                 if v_state[state_key] != value:
121 |                     v_state_new[state_key] = value
122 |                     max_change = max(max_change, math.fabs(value - v_state[state_key]))
123 | 
124 |             v_state = v_state_new
125 | 
126 |             print('policy_evaluation, iter {}, max_change = {}'.format(iter, max_change))
127 |             iter += 1
128 | 
129 |         return Q, max_change < diff_threshold
130 | 
131 |     def policy_improvement(self, env, Q):
132 |         policy = self.make_greedy_policy(Q, env.action_space.size)
133 |         return policy
134 | 


--------------------------------------------------------------------------------
/robotics_algorithm/planning/mdp/policy_tree_search.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import numpy as np
 4 | 
 5 | from robotics_algorithm.env.base_env import MDPEnv, SpaceType, EnvType
 6 | 
 7 | 
 8 | class PolicyTreeSearch:
 9 |     def __init__(self, env: MDPEnv, max_depth: int = 5, discount_factor: float = 0.99):
10 |         """Constructor.
11 | 
12 |         Args:
13 |             env (MDPEnv): the env.
14 |             max_depth (int, optional): maximum serach depth. Defaults to 5.
15 |             discount_factor (float, optional): the discount factor for future reward. Defaults to 0.99.
16 |         """
17 |         assert env.state_space.type == SpaceType.DISCRETE.value
18 |         assert env.action_space.type == SpaceType.DISCRETE.value
19 |         assert env.observability == EnvType.FULLY_OBSERVABLE.value
20 | 
21 |         self.env = env
22 |         self.max_depth = max_depth
23 |         self.discount_factor = discount_factor
24 | 
25 |     def run(self, state: np.ndarray) -> np.ndarray:
26 |         """Compute best action for the current state.
27 | 
28 |         Args:
29 |             state (np.ndarray): the current state
30 | 
31 |         Returns:
32 |             action (np.ndarray): the best action
33 |         """
34 |         print('Searching...')
35 |         q, _ = self._compute_state_value(self.env, state, 0)
36 |         print(q)
37 |         actions = self.env.action_space.get_all()
38 |         return actions[q.argmax()]
39 | 
40 |     def _compute_state_value(self, env: MDPEnv, state, cur_depth):
41 |         q = np.zeros(env.action_space.size)
42 | 
43 |         # If maximum depth is not reached, recurse deeper
44 |         if cur_depth <= self.max_depth:
45 |             actions = env.action_space.get_all()
46 |             for action_idx, action in enumerate(actions):
47 |                 new_states, probs = env.state_transition_func(state, action)
48 | 
49 |                 # calculate Q values
50 |                 q_sa = 0
51 |                 for i, new_state in enumerate(new_states):
52 |                     # next_state, reward, term, trunc, info = result
53 |                     reward = self.env.reward_func(state, action, new_state)
54 |                     term = env.is_state_terminal(new_state)
55 | 
56 |                     if not term:
57 |                         _, new_state_value = self._compute_state_value(env, new_state, cur_depth + 1)
58 |                     else:
59 |                         new_state_value = 0
60 | 
61 |                     q_sa += probs[i] * (reward + self.discount_factor * new_state_value)
62 | 
63 |                 # update Q(s,a)
64 |                 q[action_idx] = q_sa
65 | 
66 |         # else we do nothing, hence q and state_value will be zero.
67 |         else:
68 |             pass
69 | 
70 |         state_value = q.max()
71 |         return q, state_value
72 | 


--------------------------------------------------------------------------------
/robotics_algorithm/planning/mdp/value_iteration.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import math
  3 | from collections import defaultdict
  4 | from typing import Callable
  5 | 
  6 | import numpy as np
  7 | 
  8 | from robotics_algorithm.env.base_env import MDPEnv, SpaceType, EnvType
  9 | 
 10 | 
 11 | class ValueIteration:
 12 |     def __init__(self, env: MDPEnv) -> None:
 13 |         assert env.state_space.type == SpaceType.DISCRETE.value
 14 |         assert env.action_space.type == SpaceType.DISCRETE.value
 15 |         assert env.observability == EnvType.FULLY_OBSERVABLE.value
 16 | 
 17 |         self.env = env
 18 | 
 19 |     def make_greedy_policy(self, Q, num_actions):
 20 |         """
 21 |         Creates an greedy policy based on a given Q-function.
 22 | 
 23 |         Args:
 24 |             Q: A dictionary that maps from state -> action-values.
 25 |                 Each value is a numpy array of length nA (see below)
 26 |             num_action: Number of actions in the environment.
 27 | 
 28 |         Returns:
 29 |             A function that takes the observation as an argument and returns the action
 30 |         """
 31 | 
 32 |         def policy_fn(state):
 33 |             action_prob = np.zeros(num_actions)
 34 |             state_key = tuple(state.tolist())
 35 |             best_action_idx = np.argmax(Q[state_key], axis=-1)
 36 |             action_prob[best_action_idx] = 1.0
 37 |             return action_prob
 38 | 
 39 |         return policy_fn
 40 | 
 41 |     def run(self, discount_factor: float = 0.99, diff_threshold: float = 0.01) -> tuple[dict, Callable]:
 42 |         """Run algorithm.
 43 | 
 44 |         Args:
 45 |             discount_factor (float, optional): discount factor for future reward. Defaults to 0.99.
 46 | 
 47 |         Returns:
 48 |             Q (dict): Q function
 49 |             policy (Callable): policy constructed according to the Q function.
 50 |         """
 51 |         print('ValueIteration: plan!!')
 52 | 
 53 |         self.terminal_states = set()
 54 |         states = self.env.state_space.get_all()
 55 |         actions = self.env.action_space.get_all()
 56 | 
 57 |         v_state = defaultdict(float)
 58 |         Q = defaultdict(lambda: np.zeros(self.env.action_space.size))
 59 | 
 60 |         # Iterate until convergence.
 61 |         # Value iteration operates on Bellman optimality equation
 62 |         # For state-action reward:
 63 |         # Q*(s, a) = R(s, a) + discount * sum_s' [p(s' | s) * V*(s')]
 64 |         # For state-state reward:
 65 |         # Q*(s, a) = sum_s' [p(s' | s) * (R(s, s') +  discount * V*(s'))]
 66 |         # Note: R(s, a) = sum_s' [p(s' | s) * (R(s, s')]
 67 |         # V*(s) = max_a Q*(s, a)
 68 | 
 69 |         max_change = np.inf
 70 |         iter = 0
 71 |         while max_change > diff_threshold:
 72 |             iter += 1
 73 |             print('iter {}, max_change = {}'.format(iter, max_change))
 74 | 
 75 |             v_state_new = copy.deepcopy(v_state)
 76 |             max_change = -np.inf
 77 |             for state in states:
 78 |                 state_key = tuple(state.tolist())
 79 |                 if state_key in self.terminal_states:
 80 |                     continue
 81 | 
 82 |                 for action in actions:
 83 |                     action_key = tuple(action.tolist())
 84 |                     new_states, probs = self.env.state_transition_func(state, action)
 85 | 
 86 |                     # calculate Q values
 87 |                     q_sa = 0
 88 |                     for i, new_state in enumerate(new_states):
 89 |                         new_state_key = tuple(new_state.tolist())
 90 |                         term = self.env.is_state_terminal(new_state)
 91 |                         if term:
 92 |                             self.terminal_states.add(tuple(new_state.tolist()))
 93 | 
 94 |                         reward = self.env.reward_func(state, action, new_state)
 95 |                         q_sa += probs[i] * (reward + discount_factor * v_state[new_state_key])
 96 | 
 97 |                     # update Q(s,a)
 98 |                     Q[state_key][action_key] = q_sa
 99 | 
100 |                 # V(s) = max_a Q(s,a)
101 |                 value = Q[state_key].max(axis=-1)
102 | 
103 |                 # Check maximum change incurred.
104 |                 if v_state[state_key] != value:
105 |                     v_state_new[state_key] = value
106 |                     max_change = max(max_change, math.fabs(value - v_state[state_key]))
107 | 
108 |             v_state = v_state_new
109 | 
110 |         policy = self.make_greedy_policy(Q, self.env.action_space.size)
111 | 
112 |         return Q, policy
113 | 


--------------------------------------------------------------------------------
/robotics_algorithm/planning/motion_planning/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/planning/motion_planning/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/planning/motion_planning/prm.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Callable
  2 | 
  3 | from sklearn.neighbors import NearestNeighbors
  4 | import numpy as np
  5 | import networkx as nx
  6 | 
  7 | from robotics_algorithm.env.base_env import BaseEnv, SpaceType, EnvType
  8 | 
  9 | 
 10 | class ProbabilisticRoadmap:
 11 |     def __init__(
 12 |         self,
 13 |         env: BaseEnv,
 14 |         sample_func: Callable,
 15 |         state_col_check_func: Callable,
 16 |         edge_col_check_func: Callable,
 17 |         num_of_samples: int,
 18 |         num_neighbors: int,
 19 |     ):
 20 |         """_summary_
 21 | 
 22 |         Args:
 23 |             env (BaseEnv): the env
 24 |             sample_func (Callable): the sampling function, returns a random point in space.
 25 |             state_col_check_func (Callable): a function that takes in a random point and returns whether it is in
 26 |                 collision.
 27 |             edge_col_check_func (Callable): a function that takes in two points and returns whether there is a
 28 |                 collision-free simple path between them.
 29 |             num_of_samples (int): maximum of samples drawn during planning.
 30 |             num_neighbors (int): number of closest neighbors to attempt connection.
 31 |         """
 32 |         assert env.state_space.type == SpaceType.CONTINUOUS.value
 33 |         assert env.action_space.type == SpaceType.CONTINUOUS.value
 34 |         assert env.state_transition_type == EnvType.DETERMINISTIC.value
 35 |         assert env.observability == EnvType.FULLY_OBSERVABLE.value
 36 | 
 37 |         self.env = env
 38 |         self._sample_func = sample_func
 39 |         self._state_col_check_func = state_col_check_func
 40 |         self._edge_col_check_func = edge_col_check_func
 41 |         self.num_of_samples = num_of_samples
 42 |         self.num_neighbors = num_neighbors
 43 | 
 44 |     def compute_roadmap(self):
 45 |         """
 46 |         Offline computation of roadmap by sampling points.
 47 |         """
 48 |         self.all_samples = []
 49 |         self.roadmap = nx.Graph()
 50 |         while len(self.all_samples) < self.num_of_samples:
 51 |             collision_free = False
 52 |             while not collision_free:
 53 |                 sample = self._sample_func(self.env)
 54 |                 collision_free = self._state_col_check_func(self.env, sample)
 55 | 
 56 |             self.all_samples.append(sample)
 57 | 
 58 |         print("PRM/compute_roadmap: finished adding {} of vertices".format(self.num_of_samples))
 59 |         # print(self.V)
 60 | 
 61 |         for sample in self.all_samples:
 62 |             neighbors = self.get_nearest_neighbors(self.all_samples, sample, self.num_neighbors)
 63 |             # print("neighbours {}".format(neighbours))
 64 |             for neighbor in neighbors:
 65 |                 can_link, length = self._edge_col_check_func(self.env, sample, neighbor)
 66 |                 if can_link:
 67 |                     self.roadmap.add_edge(tuple(sample.tolist()), tuple(neighbor.tolist()), weight=length)
 68 | 
 69 |     def get_nearest_neighbors(self, all_vertices: np.ndarray[tuple], v: tuple, n_neighbors: int = 1) -> np.ndarray[tuple]:
 70 |         """
 71 |         return the closest neighbors of v in all_vertices.
 72 | 
 73 |         Args:
 74 |             all_vertices (np.ndarray[tuple]): a np.ndarray of vertices
 75 |             v (tuple): the target vertex.
 76 |             n_neighbors (int): number of nearby neighbors.
 77 | 
 78 |         Returns:
 79 |             (np.ndarray[tuple]): a np.ndarray of nearby vertices
 80 |         """
 81 |         n_neighbors = min(n_neighbors, len(all_vertices))
 82 | 
 83 |         all_vertices = np.array(all_vertices)
 84 |         v = np.array(v).reshape(1, -1)
 85 | 
 86 |         nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm="ball_tree").fit(all_vertices)
 87 |         distances, indices = nbrs.kneighbors(v)
 88 |         # print("indices {}".format(indices))
 89 |         nbr_vertices = np.take(np.array(all_vertices), indices.ravel(), axis=0)
 90 |         return nbr_vertices
 91 | 
 92 |     def get_path(self, start: np.ndarray, goal: np.ndarray) -> tuple[bool, np.ndarray[tuple], float]:
 93 |         """
 94 |         Online computation of path.
 95 | 
 96 |         Args:
 97 |             start (tuple): the start state.
 98 |             goal (tuple): the goal state.
 99 | 
100 |         Returns:
101 |             success (boolean): return true if a path is found, return false otherwise
102 |             shortest_path (np.ndarray[tuple]): a np.ndarray of vertices if shortest path is found
103 |             shortest_path_len (float): the length of shortest path if found.
104 |         """
105 |         start_tup = tuple(start.tolist())
106 |         goal_tup = tuple(goal.tolist())
107 | 
108 |         source_neighbors = self.get_nearest_neighbors(self.all_samples, start, self.num_neighbors)
109 |         for neighbor in source_neighbors:
110 |             can_link, length = self._edge_col_check_func(self.env, start, neighbor)
111 |             if can_link:
112 |                 self.roadmap.add_edge(start_tup, tuple(neighbor.tolist()), weight=length)
113 |                 break
114 |         else:
115 |             print("can't connect start to roadmap!!!")
116 |             return False, None, None
117 | 
118 |         goal_neighbors = self.get_nearest_neighbors(self.all_samples, goal, self.num_neighbors)
119 |         for neighbor in goal_neighbors:
120 |             can_link, length = self._edge_col_check_func(self.env, goal, neighbor)
121 |             if can_link:
122 |                 self.roadmap.add_edge(goal_tup, tuple(neighbor.tolist()), weight=length)
123 |                 break
124 |         else:
125 |             print("can't connect goal to roadmap!!!")
126 |             return False, None, None
127 | 
128 |         # path_exist, shortest_path, path_len = self._path_finder.run(self.adj_list, start, goal)
129 |         path = nx.shortest_path(self.roadmap, start_tup, goal_tup, weight="weight")
130 |         path_len = nx.shortest_path_length(self.roadmap, start_tup, goal_tup, weight="weight")
131 |         print(path, path_len)
132 |         return True, path, path_len
133 | 
134 |     def run(self, start: tuple, goal: tuple) -> tuple[bool, np.ndarray[tuple], float]:
135 |         """
136 |         Run algorithm.
137 | 
138 |         Args:
139 |             start (tuple): the start state.
140 |             goal (tuple): the goal state.
141 | 
142 |         Returns:
143 |             success (boolean): return true if a path is found, return false otherwise.
144 |             shortest_path (np.ndarray[tuple]): a np.ndarray of vertices if shortest path is found.
145 |             shortest_path_len (float): the length of shortest path if found.
146 |         """
147 |         self.compute_roadmap()
148 |         return self.get_path(start, goal)
149 | 
150 |     def get_roadmap(self):
151 |         return self.roadmap
152 | 


--------------------------------------------------------------------------------
/robotics_algorithm/planning/path_planning/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/planning/path_planning/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/planning/path_planning/a_star.py:
--------------------------------------------------------------------------------
  1 | import heapq
  2 | from typing import Any, Callable
  3 | 
  4 | import numpy as np
  5 | 
  6 | from robotics_algorithm.env.base_env import BaseEnv, EnvType, SpaceType
  7 | 
  8 | 
  9 | class AStar:
 10 |     def __init__(self, env: BaseEnv, heuristic_func: Callable, state_key_func: Callable):
 11 |         """Constructor.
 12 | 
 13 |         Args:
 14 |             env (BaseEnv): A planning env.
 15 |             heuristic_func (Callable): a function to return estimated cost-to-go from a state to goal.
 16 |             state_key_func (Callable): a function to hash the state.
 17 |         """
 18 |         assert env.action_space.type == SpaceType.DISCRETE.value
 19 |         assert env.state_space.type == SpaceType.DISCRETE.value
 20 |         assert env.state_transition_type == EnvType.DETERMINISTIC.value
 21 |         assert env.observability == EnvType.FULLY_OBSERVABLE.value
 22 | 
 23 |         self.env = env
 24 |         self._heuristic_func = heuristic_func
 25 |         self._state_key_func = state_key_func
 26 | 
 27 |     def run(self, start: np.ndarray, goal: np.ndarray) -> tuple[bool, np.ndarray, float]:
 28 |         """Run algorithm.
 29 | 
 30 |         Args:
 31 |             start (np.ndarray): the start state
 32 |             goal (np.ndarray): the goal state
 33 | 
 34 |         Returns:
 35 |             res (bool): return true if a path is found, return false otherwise.
 36 |             shortest_path (np.ndarray): a np.ndarray of state if shortest path is found.
 37 |             shortest_path_len (float): the length of shortest path if found.
 38 |         """
 39 |         # Astar gist: for every state, f[v] = g(s, v) + h(v, g)
 40 | 
 41 |         # initialize
 42 |         unvisited_states = set()  # OPEN set. Nodes not in this set is in CLOSE set
 43 |         priority_q = []
 44 |         shortest_path = []
 45 |         shortest_path_len = 0
 46 |         g = {}  # cost-to-come from start to a state
 47 |         f = {}  # cost-to-come + heuristic cost-to-go
 48 |         prev_state_dict = {}  # used to extract shortest path
 49 |         state_dict = {}  # key is some state key, value is the actual state.
 50 | 
 51 |         start_key = self._state_key_func(start)
 52 |         goal_key = self._state_key_func(goal)
 53 |         state_dict[start_key] = start
 54 |         state_dict[goal_key] = goal
 55 | 
 56 |         for state in self.env.state_space.get_all():
 57 |             state_key = self._state_key_func(state)
 58 |             g[state_key] = float('inf')
 59 |             f[state_key] = float('inf')
 60 |             state_dict[state_key] = state
 61 |             unvisited_states.add(state_key)  # All states are unvisited
 62 | 
 63 |         g[start_key] = 0  # distance to source is 0
 64 |         f[start_key] = self._heuristic_func(start, goal)  # cost from source to goal = g(s, s) + h(s, g) = 0 + h(s, g)
 65 |         heapq.heappush(priority_q, (f[start_key], start))
 66 | 
 67 |         # run algorithm
 68 |         path_exist = False
 69 |         while len(priority_q) > 0:
 70 |             # pop the best state found so far.
 71 |             _, best_state = heapq.heappop(priority_q)
 72 |             best_state = np.array(best_state)
 73 |             best_state_key = self._state_key_func(best_state)
 74 | 
 75 |             # filter out state visited before. This may happen if multiple path leads to the same state with
 76 |             # different values.
 77 |             if best_state_key not in unvisited_states:
 78 |                 continue
 79 |             else:
 80 |                 unvisited_states.remove(best_state_key)
 81 | 
 82 |             # If goal state has been added.
 83 |             if best_state_key == goal_key:
 84 |                 path_exist = True
 85 |                 break
 86 | 
 87 |             print(best_state, best_state_key, g[best_state_key])
 88 |             # Find possible transitions from best_state, and add them to queue ranked by heuristics.
 89 |             actions = self.env.action_space.get_all()
 90 |             # print(best_state, available_actions)
 91 |             for action in actions:
 92 |                 new_state, reward, term, _, info = self.env.sample_state_transition(best_state, action)
 93 |                 cost = -reward
 94 | 
 95 |                 # skip actions that result in failures
 96 |                 if term and not info['success']:
 97 |                     continue
 98 | 
 99 |                 # if new_state has not been visited,
100 |                 new_state_key = self._state_key_func(new_state)
101 |                 if new_state_key in unvisited_states:
102 |                     g_new_state = g[best_state_key] + cost  # cost-to-come
103 |                     h_new_state = self._heuristic_func(new_state, goal)  # cost-to-go
104 |                     # print(new_state, h_new_state)
105 |                     if g_new_state < g[new_state_key]:
106 |                         g[new_state_key] = g_new_state
107 |                         f[new_state_key] = g_new_state + h_new_state
108 |                         # NOTE: here we do not need to remove the the previous stored new_state with old best-to-come
109 |                         #       value because of new_state will be marked as visited when first poped with the best
110 |                         #       cost-to-come value.
111 |                         heapq.heappush(priority_q, (f[new_state_key], new_state.tolist()))
112 |                         prev_state_dict[new_state_key] = best_state_key
113 | 
114 |         if path_exist:
115 |             # extract shortest path:
116 |             shortest_path = []
117 |             state_key = goal_key
118 |             while state_key in prev_state_dict:
119 |                 prev_state_key = prev_state_dict[state_key]
120 |                 shortest_path.append(state_dict[prev_state_key])
121 |                 state_key = prev_state_key
122 | 
123 |             shortest_path = list(reversed(shortest_path))
124 |             shortest_path_len = g[goal_key]
125 |             return (True, shortest_path, shortest_path_len)
126 |         else:
127 |             return (False, None, None)
128 | 


--------------------------------------------------------------------------------
/robotics_algorithm/planning/path_planning/dijkstra.py:
--------------------------------------------------------------------------------
  1 | import heapq
  2 | from typing import Any, Callable
  3 | 
  4 | import numpy as np
  5 | 
  6 | from robotics_algorithm.env.base_env import BaseEnv, EnvType, SpaceType
  7 | 
  8 | 
  9 | class Dijkstra:
 10 |     def __init__(self, env: BaseEnv, state_key_func: Callable):
 11 |         """Constructor.
 12 | 
 13 |         Args:
 14 |             env (BaseEnv): A planning env.
 15 |             state_key_func (Callable): a function to hash the state.
 16 |         """
 17 |         assert env.action_space.type == SpaceType.DISCRETE.value
 18 |         assert env.state_space.type == SpaceType.DISCRETE.value
 19 |         assert env.state_transition_type == EnvType.DETERMINISTIC.value
 20 |         assert env.observability == EnvType.FULLY_OBSERVABLE.value
 21 | 
 22 |         self.env = env
 23 |         self._state_key_func = state_key_func
 24 | 
 25 |     def run(self, start: Any, goal: Any) -> tuple[bool, np.ndarray, float]:
 26 |         """Run algorithm.
 27 | 
 28 |         Args:
 29 |             start (Any): the start state
 30 |             goal (Any): the goal state
 31 | 
 32 |         Returns:
 33 |             res (bool): return true if a path is found, return false otherwise.
 34 |             shortest_path (np.ndarray): a np.ndarray of state if shortest path is found.
 35 |             shortest_path_len (float): the length of shortest path if found.
 36 |         """
 37 |         # for every state, f[v] = g(s, v)
 38 | 
 39 |         # initialize
 40 |         unvisited_states = set()  # OPEN set. Nodes not in this set is in CLOSE set
 41 |         priority_q = []
 42 |         shortest_path = []
 43 |         shortest_path_len = 0
 44 |         g = {}  # cost-to-come from start to a state
 45 |         prev_state_dict = {}  # used to extract shortest path
 46 |         state_dict = {}  # key is some  state key, value is the actual state.
 47 | 
 48 |         start_key = self._state_key_func(start)
 49 |         goal_key = self._state_key_func(goal)
 50 |         state_dict[start_key] = start
 51 |         state_dict[goal_key] = goal
 52 | 
 53 |         for state in self.env.state_space.get_all():
 54 |             state_key = self._state_key_func(state)
 55 |             g[state_key] = float('inf')
 56 |             state_dict[state_key] = state
 57 |             unvisited_states.add(state_key)
 58 | 
 59 |         g[start_key] = 0  # distance to source is 0
 60 |         heapq.heappush(priority_q, (g[start_key], start))
 61 | 
 62 |         # run algorithm
 63 |         path_exist = False
 64 |         while len(priority_q) > 0:
 65 |             # pop the best state found so far.
 66 |             _, best_state = heapq.heappop(priority_q)
 67 |             best_state = np.array(best_state)
 68 |             best_state_key = self._state_key_func(best_state)
 69 | 
 70 |             # filter out state visited before. This may happen if multiple path leads to the same state with
 71 |             # different values.
 72 |             if best_state_key not in unvisited_states:
 73 |                 continue
 74 |             else:
 75 |                 unvisited_states.remove(best_state_key)
 76 | 
 77 |             # If goal state has been added.
 78 |             if best_state_key == goal_key:
 79 |                 path_exist = True
 80 |                 break
 81 | 
 82 |             print(best_state, best_state_key, g[best_state_key])
 83 |             # Find possible transitions from best_state, and add them to queue
 84 |             actions = self.env.action_space.get_all()
 85 |             for action in actions:
 86 |                 new_state, reward, term, _, info = self.env.sample_state_transition(best_state, action)
 87 |                 cost = -reward
 88 | 
 89 |                 # skip actions that result in failures
 90 |                 if term and not info['success']:
 91 |                     continue
 92 | 
 93 |                 # if new_state has not been visited,
 94 |                 new_state_key = self._state_key_func(new_state)
 95 |                 if new_state_key in unvisited_states:
 96 |                     g_new_state = g[best_state_key] + cost  # cost-to-come
 97 |                     if g_new_state < g[new_state_key]:
 98 |                         g[new_state_key] = g_new_state
 99 |                         # NOTE: here we do not need to remove the the previous stored new_state with old best-to-come
100 |                         #       value because of new_state will be marked as visited when first poped with the best
101 |                         #       cost-to-come value.
102 |                         heapq.heappush(priority_q, (g[new_state_key], new_state.tolist()))
103 |                         prev_state_dict[new_state_key] = best_state_key
104 | 
105 |         if path_exist:
106 |             # extract shortest path:
107 |             shortest_path = []
108 |             state_key = goal_key
109 |             while state_key in prev_state_dict:
110 |                 prev_state_key = prev_state_dict[state_key]
111 |                 shortest_path.append(state_dict[prev_state_key])
112 |                 state_key = prev_state_key
113 | 
114 |             shortest_path = list(reversed(shortest_path))
115 |             shortest_path_len = g[goal_key]
116 |             return (True, shortest_path, shortest_path_len)
117 |         else:
118 |             return (False, None, None)
119 | 


--------------------------------------------------------------------------------
/robotics_algorithm/planning/path_planning/hybrid_a_star.py:
--------------------------------------------------------------------------------
  1 | from typing import Callable, Any
  2 | import heapq
  3 | import numpy as np
  4 | 
  5 | from robotics_algorithm.env.base_env import BaseEnv, SpaceType, EnvType
  6 | 
  7 | 
  8 | class HybridAStar:
  9 |     def __init__(self, env: BaseEnv, heuristic_func: Callable, state_key_func: Callable):
 10 |         """Constructor.
 11 | 
 12 |         Args:
 13 |             env (BaseEnv): A planning env.
 14 |             heuristic_func (Callable): a function to return estimated cost-to-go from a state to goal
 15 |             state_key_func (Callable): a function to hash the state.
 16 |         """
 17 |         assert env.action_space.type == SpaceType.DISCRETE.value
 18 |         assert env.state_space.type == SpaceType.CONTINUOUS.value
 19 |         assert env.state_transition_type == EnvType.DETERMINISTIC.value
 20 |         assert env.observability == EnvType.FULLY_OBSERVABLE.value
 21 | 
 22 |         self.env = env
 23 |         self._heuristic_func = heuristic_func
 24 |         self._state_key_func = state_key_func
 25 | 
 26 |     def run(self, start: np.ndarray, goal: np.ndarray) -> tuple[bool, np.ndarray, float]:
 27 |         """Run algorithm.
 28 | 
 29 |         Args:
 30 |             start (np.ndarray): the start state
 31 |             goal (np.ndarray): the goal state
 32 | 
 33 |         Returns:
 34 |             res (bool): return true if a path is found, return false otherwise.
 35 |             shortest_path (np.ndarray): a np.ndarray of state if shortest path is found.
 36 |             shortest_path_len (float): the length of shortest path if found.
 37 |         """
 38 | 
 39 |         # initialize
 40 |         # for every state, f[v] = g(s, v) + h(v, g)
 41 |         priority_q = []
 42 |         shortest_path = []
 43 |         shortest_path_len = 0
 44 |         g = {}  # cost-to-come from start to a state
 45 |         f = {}  # cost-to-come + heuristic cost-to-go
 46 |         prev_state_dict = {}  # used to extract shortest path
 47 |         state_dict = {}  # key is some discrete state key, value is the actual continuous state.
 48 | 
 49 |         start_key = self._state_key_func(start)
 50 |         goal_key = self._state_key_func(goal)
 51 |         state_dict[start_key] = start
 52 |         state_dict[goal_key] = goal
 53 | 
 54 |         g[start_key] = 0
 55 |         f[start_key] = self._heuristic_func(start, goal)  # cost from source to goal = g(s, s) + h(s, g) = 0 + h(s, g)
 56 |         heapq.heappush(priority_q, (f[start_key], start_key))
 57 |         open_set = set()
 58 |         close_set = set()
 59 |         open_set.add(start_key)
 60 | 
 61 |         # run algorithm
 62 |         path_exist = False
 63 |         while True:
 64 |             if not open_set:
 65 |                 print("Error: Cannot find path, No open set")
 66 |                 break
 67 | 
 68 |             # pop the best state found so far.
 69 |             _, best_state_key = heapq.heappop(priority_q)
 70 |             if best_state_key in open_set:
 71 |                 open_set.remove(best_state_key)
 72 |                 close_set.add(best_state_key)
 73 |                 best_state = state_dict[best_state_key]
 74 |             else:
 75 |                 continue  # If best_state in close_set, just continue
 76 | 
 77 |             # If goal state has been added.
 78 |             if best_state_key == goal_key:
 79 |                 path_exist = True
 80 |                 break
 81 | 
 82 |             # print(f"close set num: {len(close_set)}/81000")
 83 |             print(best_state, best_state_key, g[best_state_key])
 84 |             # Find possible transitions from best_state, and add them to queue ranked by heuristics.
 85 |             actions = self.env.action_space.get_all()
 86 |             for action in actions:
 87 |                 new_state, reward, term, _, info = self.env.sample_state_transition(best_state, action)
 88 |                 cost = -reward
 89 | 
 90 |                 # skip actions that result in failures
 91 |                 if term and not info["success"]:
 92 |                     continue
 93 | 
 94 |                 # print(new_state, best_state, action)
 95 | 
 96 |                 # if new_state has not been visited, or a shorter path to new_state has been found.
 97 |                 new_state_key = self._state_key_func(new_state)
 98 |                 if new_state_key not in close_set:
 99 |                     g_new_state = g[best_state_key] + cost  # cost-to-come
100 | 
101 |                     if new_state_key not in open_set or g_new_state < g[new_state_key]:
102 |                         h_new_state = self._heuristic_func(new_state, goal)  # cost-to-go
103 |                         # print(new_state_key, h_new_state)
104 |                         f[new_state_key] = g_new_state + h_new_state
105 |                         g[new_state_key] = g_new_state
106 |                         heapq.heappush(priority_q, (f[new_state_key], new_state_key))
107 |                         state_dict[new_state_key] = new_state
108 |                         prev_state_dict[new_state_key] = (best_state_key, action)
109 |                         open_set.add(new_state_key)
110 | 
111 |                         # print("adding ", best_state, action, new_state, new_state_key, h_new_state)
112 |                     # debug.append([new_action, new_state_tup])
113 | 
114 |             # print(debug)
115 | 
116 |         if path_exist:
117 |             # extract shortest path:
118 |             shortest_path = []
119 |             state_key = goal_key
120 |             while state_key in prev_state_dict:
121 |                 prev_state_key, prev_action = prev_state_dict[state_key]
122 |                 shortest_path.append(prev_action)
123 |                 state_key = prev_state_key
124 | 
125 |             shortest_path = list(reversed(shortest_path))
126 |             shortest_path_len = f[goal_key]
127 |             return (True, shortest_path, shortest_path_len)
128 |         else:
129 |             return (False, None, None)
130 | 


--------------------------------------------------------------------------------
/robotics_algorithm/robot/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/robot/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/robot/cartpole.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from typing_extensions import override
 3 | 
 4 | from robotics_algorithm.robot.robot import Robot
 5 | 
 6 | 
 7 | class Cartpole(Robot):
 8 |     def __init__(self, dt=0.02):
 9 |         super().__init__(dt)
10 | 
11 |         self.kinematics_integrator = "euler"
12 |         self.gravity = 9.8
13 |         self.masscart = 1.0
14 |         self.masspole = 0.1
15 |         self.total_mass = self.masspole + self.masscart
16 |         self.length = 0.5  # actually half the pole's length
17 |         self.polemass_length = self.masspole * self.length
18 |         self.force_mag = 10.0
19 | 
20 |     @override
21 |     def control(self, state: np.ndarray, action: np.ndarray) -> np.ndarray:
22 |         action = action.item()
23 | 
24 |         x, x_dot, theta, theta_dot = state
25 | 
26 |         # For the interested reader:
27 |         # https://coneural.org/florian/papers/05_cart_pole.pdf
28 |         costheta = np.cos(theta)
29 |         sintheta = np.sin(theta)
30 |         temp = (action + self.polemass_length * np.square(theta_dot) * sintheta) / self.total_mass
31 |         thetaacc = (self.gravity * sintheta - costheta * temp) / (
32 |             self.length * (4.0 / 3.0 - self.masspole * np.square(costheta) / self.total_mass)
33 |         )
34 |         xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
35 | 
36 |         if self.kinematics_integrator == "euler":
37 |             x = x + self.dt * x_dot
38 |             x_dot = x_dot + self.dt * xacc
39 |             theta = theta + self.dt * theta_dot
40 |             theta_dot = theta_dot + self.dt * thetaacc
41 |         else:  # semi-implicit euler
42 |             x_dot = x_dot + self.dt * xacc
43 |             x = x + self.dt * x_dot
44 |             theta_dot = theta_dot + self.dt * thetaacc
45 |             theta = theta + self.dt * theta_dot
46 | 
47 |         return np.array([x, x_dot, theta, theta_dot])
48 | 


--------------------------------------------------------------------------------
/robotics_algorithm/robot/differential_drive.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | from typing_extensions import override
  4 | 
  5 | from robotics_algorithm.robot.robot import Robot
  6 | from robotics_algorithm.utils import math_utils
  7 | 
  8 | 
  9 | class DiffDrive(Robot):
 10 |     def __init__(self, wheel_radius: float, wheel_dist: float, dt=0.05):
 11 |         super().__init__(dt)
 12 | 
 13 |         # Robot parameters
 14 |         self.wheel_radius = 0.05  # meters
 15 |         self.wheel_dist = 0.2  # meters
 16 | 
 17 |     def control_wheel_speed(self, state: np.ndarray, control: np.ndarray, dt: float) -> np.ndarray:
 18 |         """_summary_
 19 | 
 20 |         Args:
 21 |             state (np.ndarray): [x, y, theta] robot's current state
 22 |             control (np.ndarray): [v_l, v_r] left and right wheel velocities in radians
 23 |             dt (float): time step
 24 | 
 25 |         Returns:
 26 |             new state [x, y, theta]
 27 |         """
 28 |         v_l, v_r = control
 29 |         lin_vel = self.wheel_radius * (v_r + v_l) / 2.0
 30 |         ang_vel = self.wheel_radius * (v_r - v_l) / self.wheel_dist
 31 | 
 32 |         return self.control(state, lin_vel, ang_vel, dt)
 33 | 
 34 |     @override
 35 |     def control(self, state: np.ndarray, action: np.ndarray, dt: float) -> np.ndarray:
 36 |         """
 37 |         Update the robot state based on the differential drive kinematics.
 38 | 
 39 |         Args:
 40 |             state (np.ndarray): [x, y, theta] robot's current state.
 41 |             action (np.ndarray): [lin_vel, ang_vel]
 42 |             dt (float): time step
 43 | 
 44 |         Returns:
 45 |             new state [x, y, theta]
 46 |         """
 47 |         x, y, theta = state
 48 |         lin_vel, ang_vel = action
 49 | 
 50 |         t = 0
 51 |         while t < dt:
 52 |             # Update the state
 53 |             x_new = x + lin_vel * np.cos(theta) * self.dt
 54 |             y_new = y + lin_vel * np.sin(theta) * self.dt
 55 |             theta_new = theta + ang_vel * self.dt
 56 |             theta_new = math_utils.normalize_angle(theta_new)
 57 | 
 58 |             t += self.dt
 59 |             x, y, theta = x_new, y_new, theta_new
 60 | 
 61 |         return np.array([x_new, y_new, theta_new])
 62 | 
 63 |     @override
 64 |     def linearize_state_transition(self, state, action):
 65 |         # linearize dynamics around state in discrete time -> x_new = Ax + Bu
 66 | 
 67 |         x, y, theta = state
 68 |         lin_vel, ang_vel = action
 69 |         A = np.array(
 70 |             [
 71 |                 [1, 0, -lin_vel * np.sin(theta) * self.dt],
 72 |                 [0, 1, lin_vel * np.cos(theta) * self.dt],
 73 |                 [0, 0, 1]
 74 |             ]
 75 |         )
 76 |         B = np.array([[np.cos(theta) * self.dt, 0], [np.sin(theta) * self.dt, 0], [0, self.dt]])
 77 | 
 78 |         return A, B
 79 | 
 80 | if __name__ == "__main__":
 81 |     # Simulation parameters
 82 |     dt = 1.0  # time step
 83 |     total_time = 10.0  # total simulation time
 84 |     num_steps = int(total_time / dt)
 85 | 
 86 |     # Initial state [x, y, theta]
 87 |     state = [0.0, 0.0, 0.0]
 88 | 
 89 |     # Control inputs (left and right wheel velocities)
 90 |     control_inputs = [0.2, 0.5]  # constant velocities
 91 | 
 92 |     # Record state history for plotting
 93 |     state_history = [state]
 94 | 
 95 |     diff_drive_system = DiffDrive(wheel_radius=0.05, wheel_dist=0.2)
 96 | 
 97 |     # Run simulation
 98 |     for _ in range(num_steps):
 99 |         state1 = diff_drive_system.control_wheel_speed(state, control_inputs, dt)
100 | 
101 |         num_sub_steps = int(dt / diff_drive_system.dt)
102 |         for _ in range(num_sub_steps):
103 |             state = diff_drive_system.control_wheel_speed(state, control_inputs, dt=diff_drive_system.dt)
104 |             state_history.append(state)
105 | 
106 |         assert np.allclose(np.array(state1), np.array(state))
107 | 
108 |     # Convert state history to numpy array for easier indexing
109 |     state_history = np.array(state_history)
110 |     print(state_history)
111 | 
112 |     # Plot results
113 |     plt.figure()
114 |     plt.plot(state_history[:, 0], state_history[:, 1], label="Robot Path")
115 |     plt.xlabel("X position (m)")
116 |     plt.ylabel("Y position (m)")
117 |     plt.title("Differential Drive Robot Path")
118 |     plt.legend()
119 |     plt.axis("equal")
120 |     plt.grid()
121 |     plt.show()
122 | 


--------------------------------------------------------------------------------
/robotics_algorithm/robot/double_integrator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from typing_extensions import override
 4 | 
 5 | from robotics_algorithm.robot.robot import Robot
 6 | 
 7 | class DoubleIntegrator(Robot):
 8 |     def __init__(self, dt=0.01):
 9 |         # Robot parameters
10 |         super().__init__(dt)
11 | 
12 |         use_discrete_time_model = True  # Hardcode
13 | 
14 |         # x = [q, q_dot]
15 |         # x_dot = [q_dot, q_dot_dot] = Ax + Bu
16 |         if not use_discrete_time_model:
17 |             self.A = np.array([[0, 1], [0, 0]])
18 |             self.B = np.array([[0, 1]]).T
19 |         else:
20 |             self.A = np.array([[1, dt], [0, 1]])
21 |             self.B = np.array([[0.5 * dt * dt], [dt]])
22 | 
23 |         self.use_discrete_time_model = use_discrete_time_model
24 | 
25 |     @override
26 |     def control(self, state: np.ndarray, control: np.ndarray, dt: float) -> np.ndarray:
27 |         """Compute the end state given then current state and control.
28 | 
29 |         Args:
30 |             state (np.ndarray): [q, q_dot] robot's current state
31 |             control (np.ndarray): left and right wheel velocities in radians
32 |             dt (float): time step
33 | 
34 |         Returns:
35 |             new state [q, q_dot]
36 |         """
37 |         state = np.array(state).reshape(-1, 1)
38 |         control = np.array(control).reshape(-1, 1)
39 | 
40 |         if not self.use_discrete_time_model:
41 |             # discretize using zero order hold, assuming dt is small enough
42 |             x_dot = self.A @ state + self.B @ control
43 |             new_state = state + x_dot * dt
44 |         else:
45 |             # if discrete_time model is used, transition func directly gives new state
46 |             new_state = self.A @ state + self.B @ control
47 | 
48 |         return new_state.reshape(-1)
49 | 
50 |     @override
51 |     def linearize_state_transition(self, state, action):
52 |         return self.A, self.B
53 | 
54 | if __name__ == "__main__":
55 |     # Simulation parameters
56 |     dt = 0.01  # time step
57 |     total_time = 10.0  # total simulation time
58 |     num_steps = int(total_time / dt)
59 | 
60 |     # Initial state [q, q_dot]
61 |     state = np.array([0.0, 0.0])
62 | 
63 |     # Control inputs (accelerations)
64 |     control_inputs = [0.1]
65 | 
66 |     # Record state history for plotting
67 |     state_history = [state]
68 | 
69 |     system = DoubleIntegrator()
70 | 
71 |     # Run simulation
72 |     time_history = [0]
73 |     for time_step in range(num_steps):
74 |         state = system.control(state, control_inputs, dt)
75 |         state_history.append(state)
76 |         time_history.append(dt + time_step * dt)
77 | 
78 |     # Convert state history to numpy array for easier indexing
79 |     state_history = np.array(state_history)
80 |     print(state_history)
81 | 
82 |     # Plot results
83 |     plt.figure()
84 |     plt.plot(time_history, state_history[:, 0], label="Robot Path")
85 |     plt.xlabel("time (s)")
86 |     plt.ylabel("position (m)")
87 |     plt.title("Double integrator Robot Path")
88 |     plt.legend()
89 |     plt.grid()
90 |     plt.show()
91 | 


--------------------------------------------------------------------------------
/robotics_algorithm/robot/pendulum.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | from matplotlib.animation import FuncAnimation
 6 | from typing_extensions import override
 7 | 
 8 | from robotics_algorithm.robot.robot import Robot
 9 | 
10 | 
11 | class Pendulum(Robot):
12 |     def __init__(self, dt=0.01):
13 |         super().__init__(dt)
14 | 
15 |         # Constants
16 |         self.g = 9.81  # acceleration due to gravity, in m/s^2
17 |         self.L = 1.0   # length of the pendulum, in m
18 | 
19 |     @override
20 |     def control(self, state: np.ndarray, action: np.ndarray) -> np.ndarray:
21 |         theta, theta_dot = state
22 |         theta += np.pi
23 | 
24 |         theta_dot_dot = -self.g / self.L * math.sin(theta) + action.item() / self.L / self.L
25 | 
26 |         # Forward euler
27 |         new_theta = theta + self.dt * theta_dot - np.pi
28 |         new_theta_dot = theta_dot + self.dt * theta_dot_dot
29 | 
30 |         return np.array([new_theta, new_theta_dot])
31 | 
32 |     @override
33 |     def linearize_state_transition(self, state, action):
34 |         # linearize dynamics around state in discrete time -> x_new = Ax + Bu
35 | 
36 |         # discrete-time case: x_t+1 = Ax + Bu
37 |         theta = state[0]
38 |         theta += np.pi
39 |         A = np.array([[1, self.dt], [-self.g / self.L * math.cos(theta) * self.dt, 1]])
40 |         B = np.array([0, 1 / (self.L ** 2) * self.dt]).reshape(2, 1)
41 |         return A, B
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     env = Pendulum()
46 | 
47 |     # Time vector
48 |     t = np.arange(0, 10, env.dt)  # simulate for 10 seconds
49 | 
50 |     # State vector [theta, omega]
51 |     x = np.zeros((len(t), 2))
52 |     x[0] = [0, 0]
53 | 
54 |     # Control input (constant or time-varying)
55 |     u = np.ones(len(t)) * 20 # example: no control input
56 |     # u = np.sin(t)  # example: sinusoidal control input
57 | 
58 |     for i in range(1, len(t)):
59 |         x[i] = env.control(x[i-1], u[i-1])
60 | 
61 |     # Extract theta and omega for plotting
62 |     theta = x[:, 0]
63 |     omega = x[:, 1]
64 | 
65 |     # Create an animation of the pendulum
66 |     fig, ax = plt.subplots()
67 |     ax.set_xlim(-env.L-0.1, env.L+0.1)
68 |     ax.set_ylim(-env.L-0.1, env.L+0.1)
69 |     ax.set_aspect('equal')
70 |     line, = ax.plot([], [], 'o-', lw=2)
71 | 
72 |     def init():
73 |         line.set_data([], [])
74 |         return line,
75 | 
76 |     def update(frame):
77 |         x_pendulum = env.L * np.sin(theta[frame] + np.pi)
78 |         y_pendulum = -env.L * np.cos(theta[frame] + np.pi)
79 |         line.set_data([0, x_pendulum], [0, y_pendulum])
80 |         return line,
81 | 
82 |     ani = FuncAnimation(fig, update, frames=len(t), init_func=init, blit=True, interval=env.dt*1000)
83 |     plt.show()


--------------------------------------------------------------------------------
/robotics_algorithm/robot/planar_quadrotor.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | from matplotlib.animation import FuncAnimation
  6 | from typing_extensions import override
  7 | 
  8 | from robotics_algorithm.robot.robot import Robot
  9 | 
 10 | 
 11 | class PlanarQuadrotor(Robot):
 12 |     def __init__(self, dt=0.01):
 13 |         """Planar Quadrotor.
 14 | 
 15 |         https://arxiv.org/pdf/2106.15134
 16 | 
 17 |         State: [x, z, theta, x_dot, z_dot, theta_dot]
 18 |         Action: [net_thrust, net_moment, g]  gravity acceleration needs to be appended to simplify state space equation
 19 | 
 20 |         """
 21 |         super().__init__(dt)
 22 | 
 23 |         # Constants
 24 |         self.g = 9.81
 25 |         self.m = 1.0
 26 |         self.L = 0.4  # length of the , in m
 27 |         self.I = self.m * self.L * self.L / 12.0  # Using moment of inertia of a thin rod
 28 | 
 29 |     @override
 30 |     def control(self, state: np.ndarray, action: np.ndarray) -> np.ndarray:
 31 |         x, z, theta, x_vel, z_vel, theta_vel = state
 32 | 
 33 |         left_thrust, right_thrust = action
 34 |         net_thrust = left_thrust + right_thrust
 35 |         net_moment = self.L / 2.0 * (right_thrust - left_thrust)
 36 | 
 37 |         x_accel = -net_thrust * math.sin(theta) / self.m
 38 |         z_accel = net_thrust * math.cos(theta) / self.m - self.g
 39 |         theta_accel = net_moment / self.I
 40 | 
 41 |         # Forward euler simulation
 42 |         new_x = x + x_vel * self.dt
 43 |         new_y = z + z_vel * self.dt
 44 |         new_theta = theta + theta_vel * self.dt
 45 |         new_x_vel = x_vel + x_accel * self.dt
 46 |         new_z_vel = z_vel + z_accel * self.dt
 47 |         new_theta_vel = theta_vel + theta_accel * self.dt
 48 | 
 49 |         return np.array([new_x, new_y, new_theta, new_x_vel, new_z_vel, new_theta_vel])
 50 | 
 51 |     # def control_thrust(self, state, left_thrust, right_thrust):
 52 |     # net_thrust = left_thrust + right_thrust
 53 |     # net_moment = self.L / 2.0 * (right_thrust - left_thrust)
 54 | 
 55 |     # return self.control(state, np.array([net_thrust, net_moment]))
 56 | 
 57 |     @override
 58 |     def linearize_state_transition(self, state, action):
 59 |         # linearize dynamics around state in discrete time -> x_new = Ax + Bu
 60 | 
 61 |         x, z, theta, x_vel, z_vel, theta_vel = state
 62 |         u1, u2 = action
 63 | 
 64 |         # discrete-time case: x_t+1 = Ax + Bu
 65 |         A = np.array(
 66 |             [
 67 |                 [1, 0, 0, self.dt, 0, 0],
 68 |                 [0, 1, 0, 0, self.dt, 0],
 69 |                 [0, 0, 1, 0, 0, self.dt],
 70 |                 [0, 0, -(u1 + u2) * math.cos(theta) / self.m * self.dt, 1, 0, 0],
 71 |                 [0, 0, (u1 + u2) * math.sin(theta) / self.m * self.dt, 0, 1, 0],
 72 |                 [0, 0, 0, 0, 0, 1],
 73 |             ]
 74 |         )
 75 |         B = np.array(
 76 |             [
 77 |                 [0, 0],
 78 |                 [0, 0],
 79 |                 [0, 0],
 80 |                 [-math.sin(theta) / self.m * self.dt, -math.sin(theta) / self.m * self.dt],
 81 |                 [math.cos(theta) / self.m * self.dt, math.cos(theta) / self.m * self.dt],
 82 |                 [-self.L / (2 * self.I), self.L / (2 * self.I)],
 83 |             ]
 84 |         )
 85 |         return A, B
 86 | 
 87 | 
 88 | if __name__ == '__main__':
 89 |     env = PlanarQuadrotor()
 90 | 
 91 |     total_time = 10
 92 | 
 93 |     # Initial state [x, z, theta, x_dot, z_dot, theta_dot]
 94 |     state = np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0])
 95 | 
 96 |     # Control inputs: constant thrusts
 97 |     u1 = 5.0  # Thrust from left rotor (N)
 98 |     u2 = 5.0  # Thrust from right rotor (N)
 99 | 
100 |     # Simulation variables
101 |     trajectory_x = []
102 |     trajectory_z = []
103 | 
104 |     # Update function for integration
105 |     def update_state(state, u1, u2, dt):
106 |         """Update the state using numerical integration (Euler method)."""
107 |         new_state = env.control(state, np.array([u1, u2]))
108 |         return new_state
109 | 
110 |     # Set up the figure and animation
111 |     fig, ax = plt.subplots(figsize=(8, 6))
112 | 
113 |     # Limits for animation
114 |     ax.set_xlim(-2, 2)
115 |     ax.set_ylim(-1, 3)
116 | 
117 |     # Quadrotor body and thrusters
118 |     (body,) = ax.plot([], [], 'o-', lw=4, markersize=10, label='Quadrotor Body')  # Quadrotor body
119 |     (trajectory,) = ax.plot([], [], 'b--', lw=1, label='Trajectory')  # Trajectory of the center of mass
120 | 
121 |     # Text for simulation time
122 |     time_text = ax.text(0.02, 0.95, '', transform=ax.transAxes)
123 | 
124 |     # Initialize the quadrotor drawing components
125 |     def init():
126 |         body.set_data([], [])
127 |         trajectory.set_data([], [])
128 |         time_text.set_text('')
129 |         return body, trajectory, time_text
130 | 
131 |     # Animation function
132 |     def animate(i):
133 |         global state, trajectory_x, trajectory_z
134 | 
135 |         # Update the state using Euler integration
136 |         state = update_state(state, u1, u2, env.dt)
137 | 
138 |         # Extract positions and angles
139 |         x, z, theta, _, _, _ = state
140 | 
141 |         # Compute quadrotor geometry
142 |         rotor1_x = x - env.L * np.cos(theta)
143 |         rotor1_z = z - env.L * np.sin(theta)
144 |         rotor2_x = x + env.L * np.cos(theta)
145 |         rotor2_z = z + env.L * np.sin(theta)
146 | 
147 |         # Update trajectory
148 |         trajectory_x.append(x)
149 |         trajectory_z.append(z)
150 | 
151 |         # Update the quadrotor body and trajectory data
152 |         body.set_data([rotor1_x, rotor2_x], [rotor1_z, rotor2_z])
153 |         trajectory.set_data(trajectory_x, trajectory_z)
154 | 
155 |         # Update simulation time
156 |         time_text.set_text(f'Time: {i * env.dt:.2f}s')
157 | 
158 |         return body, trajectory, time_text
159 | 
160 |     # Run the animation
161 |     ani = FuncAnimation(
162 |         fig, animate, frames=int(total_time / env.dt), interval=env.dt * 1000, init_func=init, blit=True
163 |     )
164 |     plt.legend()
165 |     plt.xlabel('X Position (m)')
166 |     plt.ylabel('Z Position (m)')
167 |     plt.title('Planar Quadrotor Simulation')
168 |     plt.grid()
169 |     plt.show()
170 | 


--------------------------------------------------------------------------------
/robotics_algorithm/robot/robot.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class Robot:
 5 |     def __init__(self, dt):
 6 |         self.dt = dt
 7 | 
 8 |     def control(self, state: np.ndarray, action: np.ndarray) -> np.ndarray:
 9 |         """Simulate robot kinematics/dynamics forward in time to get the next state
10 | 
11 |         Args:
12 |             state (np.ndarray): previous state
13 |             action (np.ndarray): action to apply
14 | 
15 |         Returns:
16 |             np.ndarray: next state
17 |         """
18 |         raise NotImplementedError()
19 | 
20 |     def linearize_state_transition(self, state: np.ndarray, action: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
21 |         """Linearize dynamics in discrete time so that x_new = A @ x + B @ u
22 | 
23 |         Args:
24 |             state (np.ndarray): previous state
25 |             action (np.ndarray): action to apply
26 | 
27 |         Returns:
28 |             tuple[np.ndarray, np.ndarray]: A and B
29 |         """
30 |         raise NotImplementedError
31 | 


--------------------------------------------------------------------------------
/robotics_algorithm/state_estimation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/state_estimation/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/state_estimation/amcl.py:
--------------------------------------------------------------------------------
  1 | from typing_extensions import override
  2 | import math
  3 | import numpy as np
  4 | from collections import defaultdict
  5 | import random
  6 | 
  7 | from robotics_algorithm.env.base_env import PartiallyObservableEnv
  8 | from robotics_algorithm.state_estimation.particle_filter import ParticleFilter
  9 | 
 10 | 
 11 | class AMCL(ParticleFilter):
 12 |     def __init__(self, env: PartiallyObservableEnv, min_particles= 500, max_particles: int = 10000):
 13 |         """Adaptive Monte-Carlo localisation.
 14 | 
 15 |         Particle filter with Augmentation and KLD resampling.
 16 |         Augmentation: Adjust number of random samples based on the measurement likelihood.
 17 |         KLD resampling: Adjust total number of particles based on KL divergence against true prior.
 18 | 
 19 |         Args:
 20 |             env (PartiallyObservableEnv): environment
 21 |             min_particles (int, optional): minimum number of samples. Defaults to 500.
 22 |             max_particles (int, optional): maximum number of samples. Defaults to 10000.
 23 |         """
 24 |         super().__init__(env)
 25 | 
 26 |         self.hist_res = 0.1
 27 |         self.hist = defaultdict(int)
 28 |         self.min_samples = min_particles
 29 |         self.max_samples = max_particles
 30 | 
 31 |         # For KLD sampling
 32 |         # ! The statistical bound says: if we choose N number of samples, then we can guarantee that with probability quantile,
 33 |         # ! the KL-divergence between the MLE and the true distribution is less than epsilon.
 34 |         self.quantile = 0.99
 35 |         self.epsilon = 0.05
 36 | 
 37 |         # For Augmentation
 38 |         self.alpha_short_term = 0.1
 39 |         self.alpha_long_term = 0.001
 40 |         self.w_short_term = 0
 41 |         self.w_long_term = 0
 42 | 
 43 |     @override
 44 |     def update(self, observation):
 45 |         # Add particles until desired number is reached.
 46 |         """
 47 |         @param observation, observation
 48 |         """
 49 |         # calculate particle weights
 50 |         # - get_observation_prob may return pdf, need to normalize after.
 51 |         weights = []
 52 |         w_avg = 0
 53 |         for particle in self.particles:
 54 |             weight = self.env.get_observation_prob(particle, observation) + 1e-300  # avoid round-off to zero
 55 |             weights.append(weight)
 56 |             w_avg += 1.0 / len(self.particles) * weight
 57 |         weights = np.array(weights)
 58 |         weights = weights / np.sum(weights)  # normalize!
 59 | 
 60 |         # Calculate average likelihood of measurement over a large and short time periods
 61 |         # in order to determine whether the current measurement likelihood is too large
 62 |         self.w_short_term += self.alpha_short_term * (w_avg - self.w_short_term)
 63 |         self.w_long_term += self.alpha_long_term * (w_avg - self.w_long_term)
 64 | 
 65 |         # Augmentation + KLD resampling.
 66 |         # ! probability of choosing random sample is decided by the divergence between short-term likelihood and long-term likelihood
 67 |         # ! if short-term likelihood is worse (small), then random sample probability is higher
 68 |         random_sample_prob = max(0.0, 1.0 - self.w_short_term / self.w_long_term)
 69 |         # print(w_avg, self.w_short_term, self.w_long_term, random_sample_prob)
 70 |         particle_indices = np.arange(len(self.particles))
 71 |         new_particles = []
 72 |         while len(new_particles) < self._KLD_sample_limit():  # ! Keep adding particles until statistical bound is met
 73 |             x = random.uniform(0, 1)
 74 |             if x <= random_sample_prob:
 75 |                 new_particle = self.env.random_state()
 76 |             else:
 77 |                 new_particle_index = np.random.choice(particle_indices, replace=True, p=weights)  # Draw 1 sample
 78 |                 new_particle = self.particles[new_particle_index]
 79 |             new_particles.append(new_particle)
 80 | 
 81 |             # check sample falls into which bin, and increment bin count
 82 |             particle_key = tuple(np.floor(np.array(new_particle) / self.hist_res).astype(np.int32).tolist())
 83 |             self.hist[particle_key] += 1
 84 | 
 85 |         self.particles = new_particles
 86 |         self.hist.clear()  # ! Statistical bound is calculated every iteration.
 87 | 
 88 |     def _KLD_sample_limit(self):
 89 |         # Compute the required number of samples, given that there are k bins
 90 |         # with samples in them.  This is taken directly from Fox et al.
 91 |         k = len(self.hist)  # How many bins are filled up
 92 |         if k <= 1:
 93 |             return self.max_samples
 94 | 
 95 |         # ! The statistical bound says: if we choose n number of samples, then we can guarantee that with probability quantile,
 96 |         # ! the KL-divergence between the MLE and the true distribution is less than epsilon.
 97 |         a = 1
 98 |         b = 2 / (9 * (k - 1))
 99 |         c = math.sqrt(2 / (9 * (k - 1))) * self.quantile  # Upper X quantile of normal distribution
100 |         x = a - b + c
101 |         n = math.ceil((k - 1) / (2 * self.epsilon) * x * x * x)  # epsilon is the threshold of KL divergence.
102 | 
103 |         if n < self.min_samples:
104 |             return self.min_samples
105 | 
106 |         if n > self.max_samples:
107 |             return self.max_samples
108 | 
109 |         return n
110 | 
111 |     @property
112 |     def num_of_cur_particles(self):
113 |         return len(self.particles)


--------------------------------------------------------------------------------
/robotics_algorithm/state_estimation/discrete_bayes_filter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from robotics_algorithm.env.base_env import BaseEnv, SpaceType, EnvType
 4 | 
 5 | 
 6 | class DiscreteBayesFilter:
 7 |     """Bayes filter for environment with discrete state and observations.
 8 | 
 9 |     Analytically computes bayes update rule by iterating over all possible states and observations.
10 |     """
11 |     def __init__(self, env: BaseEnv):
12 |         assert env.state_space.type == SpaceType.DISCRETE.value
13 |         assert env.observation_space.type == SpaceType.DISCRETE.value
14 |         assert env.state_transition_type == EnvType.STOCHASTIC.value
15 |         assert env.observability == EnvType.PARTIALLY_OBSERVABLE.value
16 | 
17 |         self.env = env
18 |         self._state_space_size = env.state_space.size
19 | 
20 |         # Uniformly distributed
21 |         self.state_belief = {}
22 |         for state in self.env.state_space.get_all():
23 |             self.state_belief[tuple(state)] = 1 / self._state_space_size
24 | 
25 |     def set_initial_state(self, state: np.ndarray):
26 |         """
27 |         Set the initial state of filter.
28 | 
29 |         @param state, initial state
30 |         """
31 |         self.state_belief[tuple(state)] += 0.05  # slightly increase the initial state belief
32 |         self._normalize()
33 | 
34 |     def get_state(self) -> np.ndarray:
35 |         # return the state with highest probability
36 |         return max(self.state_belief, key=self.state_belief.get)
37 | 
38 |     def run(self, action: np.ndarray, obs: np.ndarray):
39 |         """
40 |         Run one iteration of filter.
41 | 
42 |         @param action, control
43 |         @param obs, observation
44 |         """
45 |         self.predict(action)
46 |         self.update(obs)
47 | 
48 |     def predict(self, action: np.ndarray):
49 |         """
50 |         @param action, action
51 |         """
52 |         new_state_belief = self.state_belief.copy()
53 | 
54 |         # In discrete case, just add them up.
55 |         for state in self.env.state_space.get_all():
56 |             new_states, probs = self.env.state_transition_func(state, action)
57 |             for new_state, prob in zip(new_states, probs):
58 |                 new_state_belief[tuple(new_state)] += prob * self.state_belief[tuple(state)]
59 | 
60 |         self.state_belief = new_state_belief
61 | 
62 |         self._normalize()
63 | 
64 |     def update(self, obs: np.ndarray):
65 |         """
66 |         @param obs, current observation.
67 |         """
68 |         for state in self.env.state_space.get_all():
69 |             obss, obs_probs = self.env.observation_func(state)
70 | 
71 |             obs_prob = 0
72 |             if obs in obss:
73 |                 obs_prob = obs_probs[obss.index(obs)]
74 | 
75 |             self.state_belief[tuple(state)] *= obs_prob
76 | 
77 |         self._normalize()
78 | 
79 |     def _normalize(self):
80 |         # normalize
81 |         total_sum = sum(self.state_belief.values())
82 |         for state in self.state_belief.keys():
83 |             self.state_belief[state] /= total_sum
84 | 


--------------------------------------------------------------------------------
/robotics_algorithm/state_estimation/extended_kalman_filter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from robotics_algorithm.env.base_env import BaseEnv, EnvType, FunctionType, DistributionType
 4 | 
 5 | 
 6 | class ExtendedKalmanFilter:
 7 |     def __init__(self, env: BaseEnv):
 8 |         """
 9 |         Extended Kalman filter.
10 | 
11 |         State Transition: X = transition_func(x, u) + sigma
12 |         Measurement Z = measurement_func(x) + delta
13 |         R: Process covariance matrix from sigma
14 |         Q. Measurement covariance matrix from delta
15 |         """
16 | 
17 |         assert env.state_transition_type == EnvType.STOCHASTIC.value
18 |         assert env.observability == EnvType.PARTIALLY_OBSERVABLE.value
19 |         assert env.state_transition_func_type == FunctionType.GENERAL.value
20 |         assert env.observation_func_type == FunctionType.GENERAL.value
21 |         assert env.state_transition_dist_type == DistributionType.GAUSSIAN.value
22 |         assert env.observation_dist_type == DistributionType.GAUSSIAN.value
23 | 
24 |         self.env = env
25 |         self.state = np.array(env.state_space.sample())
26 |         self.covariance = np.eye(env.state_space.state_size)
27 | 
28 |     def set_initial_state(self, state: np.ndarray):
29 |         """
30 |         Set the initial state of filter.
31 | 
32 |         @param state, initial state
33 |         """
34 |         self.state = np.array(state)
35 | 
36 |     def get_state(self) -> np.ndarray:
37 |         return self.state
38 | 
39 |     def run(self, action: np.ndarray, obs: np.ndarray):
40 |         """
41 |         Run one iteration of the Kalman filter.
42 | 
43 |         @param action, control
44 |         @param obs, observation
45 |         """
46 |         self.predict(action)
47 |         self.update(obs)
48 | 
49 |     def predict(self, action: np.ndarray):
50 |         """
51 |         @param control, control
52 |         """
53 |         F, _ = self.env.linearize_state_transition(self.state, action)
54 |         mean_new_state, _ = self.env.state_transition_func(self.state, action)  # non-linear transition_func
55 |         new_covariance = F @ self.covariance @ F.transpose() + self.env.state_transition_cov_matrix
56 | 
57 |         self.state = np.array(mean_new_state)
58 |         self.covariance = new_covariance
59 | 
60 |     def update(self, obs: np.ndarray):
61 |         """
62 |         @param obs, observation
63 |         """
64 |         obs_np = np.array(obs)
65 |         mean_obs, _ = self.env.observation_func(self.state)
66 |         mean_obs_np = np.array(mean_obs)
67 | 
68 |         H = self.env.linearize_observation(self.state, obs)
69 |         K = (
70 |             self.covariance
71 |             @ H.transpose()
72 |             @ np.linalg.inv(H @ self.covariance @ H.transpose() + self.env.obs_cov_matrix)
73 |         )
74 |         new_state = self.state + K @ (obs_np - mean_obs_np)
75 |         tmp_matrix = K @ H
76 |         new_covariance = (np.eye(tmp_matrix.shape[0]) - tmp_matrix) @ self.covariance
77 | 
78 |         self.state = new_state
79 |         self.covariance = new_covariance
80 | 


--------------------------------------------------------------------------------
/robotics_algorithm/state_estimation/kalman_filter.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | 
 4 | from robotics_algorithm.env.base_env import BaseEnv, EnvType, FunctionType, DistributionType
 5 | 
 6 | 
 7 | class KalmanFilter:
 8 |     def __init__(self, env: BaseEnv):
 9 |         """
10 |         Kalman filter.
11 | 
12 |         When environment has linear state transtion and linear observation function with Gaussian noise, bayesian
13 |         filter rule can be simplified.
14 | 
15 |         State Transition: X = AX + BU + sigma
16 |         Measurement Z = HX + delta
17 |         """
18 |         assert env.state_transition_type == EnvType.STOCHASTIC.value
19 |         assert env.observability == EnvType.PARTIALLY_OBSERVABLE.value
20 |         assert env.state_transition_func_type == FunctionType.LINEAR.value
21 |         assert env.observation_func_type == FunctionType.LINEAR.value
22 |         assert env.state_transition_dist_type == DistributionType.GAUSSIAN.value
23 |         assert env.observation_dist_type == DistributionType.GAUSSIAN.value
24 | 
25 |         # state_transition_func: x = Ax + Bu + sigma
26 |         self.A, self.B = env.linearize_state_transition(None, None)  # As state transition is linear, A and B are constant
27 |         self.R = env.state_transition_covariance_matrix
28 | 
29 |         # measurement function z = Hx + delta
30 |         self.H = env.linearize_observation(None, None)  # As observation is linear, H are constant
31 |         self.Q = env.observation_covariance_matrix
32 | 
33 |         self.state = np.array(env.cur_state)
34 |         self.covariance = np.eye(env.state_space.state_size)
35 | 
36 |     def set_initial_state(self, state: np.ndarray):
37 |         """
38 |         Set the initial state of filter.
39 |         @param state, initial state
40 |         """
41 |         self.state = np.array(state)
42 | 
43 |     def get_state(self) -> np.ndarray:
44 |         return self.state
45 | 
46 |     def run(self, action: np.ndarray, obs: np.ndarray):
47 |         """
48 |         Run one iteration of the Kalman filter.
49 | 
50 |         @param action, control
51 |         @param obs, observation
52 |         """
53 |         self.predict(action)
54 |         self.update(obs)
55 | 
56 |     def predict(self, action: np.ndarray):
57 |         """
58 |         Predict the state of the Kalman filter.
59 | 
60 |         @param action, applied action
61 |         """
62 |         new_state = self.A @ self.state + self.B @ action
63 |         new_covariance = self.A @ self.covariance @ self.A.transpose() + self.R
64 | 
65 |         self.state = new_state
66 |         self.covariance = new_covariance
67 | 
68 |     def update(self, obs: np.ndarray):
69 |         """
70 |         Update the state of the Kalman filter.
71 | 
72 |         @param obs, obtained observation.
73 |         """
74 |         K = self.covariance @ self.H.transpose() @ np.linalg.inv(self.H @ self.covariance @ self.H.transpose() + self.Q)
75 |         new_state = self.state + K @ (obs - self.H @ self.state)
76 |         tmp_matrix = K @ self.H
77 |         new_covariance = (np.eye(tmp_matrix.shape[0]) - tmp_matrix) @ self.covariance
78 | 
79 |         self.state = new_state
80 |         self.covariance = new_covariance
81 | 


--------------------------------------------------------------------------------
/robotics_algorithm/state_estimation/particle_filter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from robotics_algorithm.env.base_env import PartiallyObservableEnv, EnvType
 4 | 
 5 | 
 6 | class ParticleFilter:
 7 |     def __init__(
 8 |         self, env: PartiallyObservableEnv, num_of_particles: int = 10000, uniform_particle_ratio: float = 0.05
 9 |     ):
10 |         """Particle filter
11 | 
12 |         Args:
13 |             env (BaseEnv): _description_
14 |             num_of_particles (int, optional): num of particles. Defaults to 10000.
15 |             uniform_particle_ratio (float, optional): if set to non-zero, filter will add a small number of random
16 |                 particles after each iteration to tackle particle deprivation problem. Defaults to 0.05.
17 |         """
18 | 
19 |         # sanity check
20 |         assert env.state_transition_type == EnvType.STOCHASTIC.value
21 |         assert env.observability == EnvType.PARTIALLY_OBSERVABLE.value
22 | 
23 |         self.env = env
24 | 
25 |         # attributes
26 |         self.num_of_particles = num_of_particles
27 |         self.num_of_uniform_particles = int(num_of_particles * uniform_particle_ratio)
28 | 
29 |         self.particles = []
30 |         for _ in range(self.num_of_particles):
31 |             particle = self.env.random_state()
32 |             self.particles.append(particle)
33 | 
34 |     def set_initial_state(self, state: np.ndarray):
35 |         """
36 |         Set the initial state of filter.
37 | 
38 |         @param state, initial state
39 |         """
40 |         self.state = np.array(state)
41 |         for i in range(self.num_of_uniform_particles, self.num_of_particles):
42 |             self.particles[i] = state
43 | 
44 |     def get_state(self) -> np.ndarray:
45 |         # simply average
46 |         mean_state = np.mean(np.array(self.particles), axis=0)
47 |         return mean_state
48 | 
49 |     def run(self, action: np.ndarray, observation: np.ndarray):
50 |         """
51 |         Run one iteration of the filter.
52 | 
53 |         @param action, control
54 |         @param observation, observation
55 |         """
56 |         self.predict(action)
57 |         self.update(observation)
58 | 
59 |     def predict(self, action):
60 |         """
61 |         @param action, action
62 |         """
63 |         # sample new particles according to state transition function
64 |         new_particles = []
65 |         for particle in self.particles:
66 |             # NOTE: Unlike kalman filter which assumes a Gaussian distribution, here we sample state transition instead
67 |             #       calling state_transition_function which returns the mean of the new state.
68 |             new_particle = self.env.sample_state_transition(particle, action)[0]
69 |             new_particles.append(new_particle)
70 | 
71 |         self.particles = new_particles
72 | 
73 |     def update(self, observation):
74 |         """
75 |         @param observation, observation
76 |         """
77 |         # calculate particle weights
78 |         # - get_observation_prob may return pdf, need to normalize after.
79 |         weights = []
80 |         for particle in self.particles:
81 |             weight = self.env.get_observation_prob(particle, observation) + 1e-300  # avoid round-off to zero
82 |             weights.append(weight)
83 |         weights = np.array(weights)
84 |         weights = weights / np.sum(weights)  # normalize!
85 | 
86 |         # importance sampling, sample with replacement according to weights
87 |         particle_indices = np.arange(self.num_of_particles)
88 |         new_particle_indices = np.random.choice(particle_indices, size=self.num_of_particles, replace=True, p=weights)
89 |         new_particles = np.array(self.particles)[new_particle_indices].tolist()
90 | 
91 |         # Add random particles if required
92 |         for i in range(self.num_of_uniform_particles):
93 |             particle = self.env.random_state()
94 |             new_particles[i] = particle
95 | 
96 |         self.particles = new_particles
97 | 


--------------------------------------------------------------------------------
/robotics_algorithm/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lyfkyle/robotics_algorithms/f456f516f7c569aa6ecf9979348c9adb407b1dd1/robotics_algorithm/utils/__init__.py


--------------------------------------------------------------------------------
/robotics_algorithm/utils/math_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # import math
 4 | 
 5 | 
 6 | def normalize_angle(angle):
 7 |     angle = (angle + 6 * np.pi) % (2 * np.pi)  # normalize theta to [-pi, pi]
 8 |     if angle > np.pi:
 9 |         angle = angle - 2 * np.pi
10 | 
11 |     return angle
12 | 
13 | 
14 | def smooth(scalars: list[float], weight: float = 0.5) -> list[float]:  # Weight between 0 and 1
15 |     """
16 |     Smooth a list of scalars by applying exponential smoothing with a given weight.
17 | 
18 |     Args:
19 |         scalars: List of scalars to be smoothed.
20 |         weight: Weight between 0 and 1 for exponential smoothing.
21 | 
22 |     Return:
23 |         List of smoothed values.
24 |     """
25 |     last = scalars[0]  # First value in the plot (first timestep)
26 |     smoothed = list()
27 |     for point in scalars:
28 |         smoothed_val = last * weight + (1 - weight) * point  # Calculate smoothed value
29 |         smoothed.append(smoothed_val)  # Save it
30 |         last = smoothed_val  # Anchor the last smoothed value
31 | 
32 |     return smoothed
33 | 


--------------------------------------------------------------------------------
/robotics_algorithm/utils/mdp_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def make_greedy_policy(Q, num_actions):
 5 |     """
 6 |     Creates an greedy policy based on a given Q-function.
 7 | 
 8 |     Args:
 9 |         Q: A dictionary that maps from state -> action-values.
10 |             Each value is a numpy array of length nA (see below)
11 |         num_action: Number of actions in the environment.
12 | 
13 |     Returns:
14 |         A function that takes the observation as an argument and returns the action
15 |     """
16 | 
17 |     def policy_fn(state: np.ndarray):
18 |         action_prob = np.zeros(num_actions)
19 |         state_key = tuple(state.tolist())
20 |         best_action_idx = np.argmax(Q[state_key], axis=-1)
21 |         action_prob[best_action_idx] = 1.0
22 |         return action_prob
23 | 
24 |     return policy_fn
25 | 


--------------------------------------------------------------------------------
/robotics_algorithm/utils/models/attention.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | class MultiHeadAttention(nn.Module):
 9 |     def __init__(self, embed_dim=512, n_heads=8):
10 |         """
11 |         Args:
12 |             embed_dim: dimension of embeding vector output
13 |             n_heads: number of self attention heads
14 |         """
15 |         super(MultiHeadAttention, self).__init__()
16 | 
17 |         self.embed_dim = embed_dim  # 512 dim
18 |         self.n_heads = n_heads  # 8
19 |         self.single_head_dim = int(self.embed_dim / self.n_heads)  # 512/8 = 64. each key,query, value will be of 64d
20 | 
21 |         # key,query and value matrixes    #64 x 64
22 |         self.query_matrix = nn.Linear(
23 |             self.single_head_dim, self.single_head_dim, bias=False
24 |         )  # single key matrix for all 8 keys #512x512
25 |         self.key_matrix = nn.Linear(self.single_head_dim, self.single_head_dim, bias=False)
26 |         self.value_matrix = nn.Linear(self.single_head_dim, self.single_head_dim, bias=False)
27 |         self.out = nn.Linear(self.n_heads * self.single_head_dim, self.embed_dim)
28 | 
29 |     def forward(self, key, query, value, mask=None):  # batch_size x sequence_length x embedding_dim    # 32 x 10 x 512
30 |         """
31 |         Args:
32 |            key : key vector
33 |            query : query vector
34 |            value : value vector
35 |            mask: mask for decoder
36 | 
37 |         Returns:
38 |            output vector from multihead attention
39 |         """
40 |         batch_size = key.size(0)
41 |         seq_length = key.size(1)
42 | 
43 |         # query dimension can change in decoder during inference.
44 |         # so we cant take general seq_length
45 |         seq_length_query = query.size(1)
46 | 
47 |         # 32x10x512
48 |         key = key.view(
49 |             batch_size, seq_length, self.n_heads, self.single_head_dim
50 |         )  # batch_size x sequence_length x n_heads x single_head_dim = (32x10x8x64)
51 |         query = query.view(batch_size, seq_length_query, self.n_heads, self.single_head_dim)  # (32x10x8x64)
52 |         value = value.view(batch_size, seq_length, self.n_heads, self.single_head_dim)  # (32x10x8x64)
53 | 
54 |         k = self.key_matrix(key)  # (32x10x8x64)
55 |         q = self.query_matrix(query)
56 |         v = self.value_matrix(value)
57 | 
58 |         q = q.transpose(1, 2)  # (batch_size, n_heads, seq_len, single_head_dim)    # (32 x 8 x 10 x 64)
59 |         k = k.transpose(1, 2)  # (batch_size, n_heads, seq_len, single_head_dim)
60 |         v = v.transpose(1, 2)  # (batch_size, n_heads, seq_len, single_head_dim)
61 | 
62 |         # computes attention
63 |         # adjust key for matrix multiplication
64 |         k_adjusted = k.transpose(-1, -2)  # (batch_size, n_heads, single_head_dim, seq_ken)  #(32 x 8 x 64 x 10)
65 |         product = torch.matmul(q, k_adjusted)  # (32 x 8 x 10 x 64) x (32 x 8 x 64 x 10) = #(32x8x10x10)
66 | 
67 |         # fill those positions of product matrix as (-1e20) where mask positions are 0
68 |         if mask is not None:
69 |             product = product.masked_fill(mask == 0, float("-1e20"))
70 | 
71 |         # divising by square root of key dimension
72 |         product = product / math.sqrt(self.single_head_dim)  # / sqrt(64)
73 | 
74 |         # applying softmax
75 |         scores = F.softmax(product, dim=-1)
76 | 
77 |         # mutiply with value matrix
78 |         scores = torch.matmul(scores, v)  ##(32x8x 10x 10) x (32 x 8 x 10 x 64) = (32 x 8 x 10 x 64)
79 | 
80 |         # concatenated output
81 |         concat = (
82 |             scores.transpose(1, 2).contiguous().view(batch_size, seq_length_query, self.single_head_dim * self.n_heads)
83 |         )  # (32x8x10x64) -> (32x10x8x64)  -> (32,10,512)
84 | 
85 |         output = self.out(concat)  # (32,10,512) -> (32,10,512)
86 | 
87 |         return output
88 | 


--------------------------------------------------------------------------------
/robotics_algorithm/utils/tree.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | 
 3 | 
 4 | class TreeNode:
 5 |     def __init__(self, nx_key, tree: nx.DiGraph):
 6 |         self.nx_key = nx_key
 7 | 
 8 |         self.children = []
 9 |         self.parent = None
10 | 
11 |         self._tree = tree
12 | 
13 |     @property
14 |     def attr(self):
15 |         return self._tree.nodes[self.nx_key]
16 | 
17 |     def transition_attr(self, child):
18 |         return self._tree.edges[self.nx_key, child.nx_key]
19 | 
20 |     def add_child(self, child, **kwargs):
21 |         self._tree.add_edge(self.nx_key, child.nx_key, **kwargs)
22 |         self.children.append(child)
23 |         child.parent = self
24 | 
25 |     def delete_child(self, child):
26 |         self._tree.remove_node(child.nx_key)
27 |         self.children.remove(child)
28 | 
29 | 
30 | class Tree:
31 |     def __init__(self):
32 |         self._num_node = 0
33 |         self._tree = nx.DiGraph()
34 | 
35 |     def add_node(self, **kwargs) -> TreeNode:
36 |         # n_node = TreeNode(value)
37 |         nx_key = self._tree.number_of_nodes()
38 |         self._tree.add_node(nx_key, **kwargs)
39 |         return TreeNode(nx_key, self._tree)
40 | 
41 |     def delete_node(self, node: TreeNode):
42 |         def _delete_recur(n):
43 |             for child in n.children:
44 |                 _delete_recur(child)
45 | 
46 |             self._tree.remove_node(n.nx_key)
47 | 
48 |         assert node.nx_key in self._tree.nodes
49 | 
50 |         # Delete subtree rooted at n
51 |         _delete_recur(node)
52 | 
53 |     def add_child(self, parent: TreeNode, child: TreeNode, **kwargs):
54 |         return parent.add_child(child, **kwargs)
55 | 
56 |     def delete_child(
57 |         self,
58 |         parent: TreeNode,
59 |         child: TreeNode,
60 |     ):
61 |         return parent.delete_child(child)
62 | 


--------------------------------------------------------------------------------