├── EOM ├── __init__.py ├── __pycache__ │ ├── eom.cpython-37.pyc │ ├── rk4.cpython-37.pyc │ └── __init__.cpython-37.pyc ├── rk4.py └── eom.py ├── Reward ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ └── rewardfuncs.cpython-37.pyc └── rewardfuncs.py ├── Save_Gif ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ └── save_gif.cpython-37.pyc └── save_gif.py ├── Environments ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ └── crazyflie.cpython-37.pyc └── crazyflie.py ├── Eval_Environments ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ └── crazyflie.cpython-37.pyc └── crazyflie.py ├── Gifs ├── Gif_landing.gif ├── Gif_setpoint.gif └── CF_3d_setpoint_PPO_1000000Timesteps.gif ├── README.md └── main.py /EOM/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Reward/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Save_Gif/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Environments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Eval_Environments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Gifs/Gif_landing.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jacobkooi/InclinedDroneLander/HEAD/Gifs/Gif_landing.gif -------------------------------------------------------------------------------- /Gifs/Gif_setpoint.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jacobkooi/InclinedDroneLander/HEAD/Gifs/Gif_setpoint.gif -------------------------------------------------------------------------------- /EOM/__pycache__/eom.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jacobkooi/InclinedDroneLander/HEAD/EOM/__pycache__/eom.cpython-37.pyc -------------------------------------------------------------------------------- /EOM/__pycache__/rk4.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jacobkooi/InclinedDroneLander/HEAD/EOM/__pycache__/rk4.cpython-37.pyc -------------------------------------------------------------------------------- /EOM/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jacobkooi/InclinedDroneLander/HEAD/EOM/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Gifs/CF_3d_setpoint_PPO_1000000Timesteps.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jacobkooi/InclinedDroneLander/HEAD/Gifs/CF_3d_setpoint_PPO_1000000Timesteps.gif -------------------------------------------------------------------------------- /Reward/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jacobkooi/InclinedDroneLander/HEAD/Reward/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Save_Gif/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jacobkooi/InclinedDroneLander/HEAD/Save_Gif/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Save_Gif/__pycache__/save_gif.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jacobkooi/InclinedDroneLander/HEAD/Save_Gif/__pycache__/save_gif.cpython-37.pyc -------------------------------------------------------------------------------- /Reward/__pycache__/rewardfuncs.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jacobkooi/InclinedDroneLander/HEAD/Reward/__pycache__/rewardfuncs.cpython-37.pyc -------------------------------------------------------------------------------- /Environments/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jacobkooi/InclinedDroneLander/HEAD/Environments/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Environments/__pycache__/crazyflie.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jacobkooi/InclinedDroneLander/HEAD/Environments/__pycache__/crazyflie.cpython-37.pyc -------------------------------------------------------------------------------- /Eval_Environments/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jacobkooi/InclinedDroneLander/HEAD/Eval_Environments/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Eval_Environments/__pycache__/crazyflie.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jacobkooi/InclinedDroneLander/HEAD/Eval_Environments/__pycache__/crazyflie.cpython-37.pyc -------------------------------------------------------------------------------- /EOM/rk4.py: -------------------------------------------------------------------------------- 1 | 2 | # Runge Kutta taking the EOM as an argument 3 | def runge_kutta4(x, u, eom, param, t_s): 4 | 5 | k1 = eom(x, u, param) 6 | k2 = eom(x + 0.5 * t_s * k1, u, param) 7 | k3 = eom(x + 0.5 * t_s * k2, u, param) 8 | k4 = eom(x + t_s * k3, u, param) 9 | return (t_s / 6) * (k1 + 2 * k2 + 2 * k3 + k4) 10 | -------------------------------------------------------------------------------- /Save_Gif/save_gif.py: -------------------------------------------------------------------------------- 1 | 2 | import matplotlib.pyplot as plt 3 | from matplotlib import animation 4 | 5 | # Nice piece of code adopted from: https://gist.github.com/botforge/64cbb71780e6208172bbf03cd9293553 6 | 7 | def save_frames_as_gif(frames, path='.', filename='gym_animation.gif'): 8 | 9 | # Mess with this to change frame size 10 | fig = plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi=72) 11 | fig.tight_layout() 12 | 13 | patch = plt.imshow(frames[0]) 14 | plt.axis('off') 15 | 16 | def animate(i): 17 | patch.set_data(frames[i]) 18 | 19 | anim = animation.FuncAnimation(plt.gcf(), animate, frames=len(frames), interval=50) 20 | anim.save('%s/Gifs/%s' % (path, filename), writer='imagemagick', fps=50) 21 | print('Gif saved to %s/Gifs/%s' % (path, filename)) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Welcome to the repository for the Python code used for the paper: 2 | 3 | "Inclined Quadrotor Landing using Deep Reinforcement Learning" 4 | 5 | By Jacob Kooi and Robert Babuska. 6 | 7 | More documentation will be added soon. 8 | 9 | 10 | # InclinedDroneLander 11 | 12 | Model-free training of 2d inclined landing and 3d setpoint-tracking for the Crazyflie 2.1 Nano-UAV. The resulting policies are transferable to use on a real crazyflie. (See https://youtu.be/53YaqfwUIFU) 13 | 14 | ## Usage 15 | 16 | - In main.py, select either 2d inclined landing or 3d setpoint training. 17 | - When choosing setpoint tracking, one can use SAC,TD3,A2C, PPO etc. 18 | - When choosing inclined landing, use the on-policy PPO algorithm. 19 | - Select the thrust action space, taken as a PWM signal from hover (around 42000 for m = 0.3303). Also select the timesteps to train for and the sampling time (1/50s recommended). 20 | - run main.py 21 | 22 | 23 | 24 | ## Dependencies 25 | Trained and tested on: 26 | ``` 27 | Python 3.7 28 | PyTorch 1.5.0 29 | Numpy 1.18.4 30 | StableBaselines3 0.6.0 31 | gym 0.17.3 32 | ``` 33 | 34 | ## Results 35 | 36 | PPO Sparse Inclined Landing (3000000 timesteps) | PPO Euclidean Setpoint Tracking (1000000 timesteps) (Left is xz-plane, right is yz-plane) 37 | :-------------------------:|:-------------------------: 38 | ![](https://github.com/Jacobkooi/InclinedDroneLander/blob/master/Gifs/Gif_landing.gif) | ![](https://github.com/Jacobkooi/InclinedDroneLander/blob/master/Gifs/Gif_setpoint.gif) 39 | 40 | ## Questions 41 | 42 | For any questions, errors or suggestions contact me at jacobkooi92@gmail.com. 43 | 44 | ## Citing 45 | 46 | When using this code or referring to our work please cite the paper at: 47 | 48 | https://ieeexplore.ieee.org/document/9636096/citations?tabFilter=papers#citations 49 | 50 | Or use this bibtex: 51 | 52 | @INPROCEEDINGS{9636096, 53 | 54 | author={Kooi, Jacob E. and Babuška, Robert}, 55 | 56 | booktitle={2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, 57 | 58 | title={Inclined Quadrotor Landing using Deep Reinforcement Learning}, 59 | 60 | year={2021}, 61 | 62 | volume={}, 63 | 64 | number={}, 65 | 66 | pages={2361-2368}, 67 | 68 | doi={10.1109/IROS51168.2021.9636096}} 69 | 70 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from Environments.crazyflie import Crazyflie_3d_setpoint, Crazyflie_2d_inclined 2 | import numpy as np 3 | import torch 4 | from Reward.rewardfuncs import sparse_reward2d, euclidean_reward3d 5 | from math import pi 6 | from Save_Gif.save_gif import save_frames_as_gif 7 | from stable_baselines3.common.env_checker import check_env 8 | from stable_baselines3 import PPO, SAC, A2C, TD3 9 | import random 10 | import os 11 | 12 | # Use the GPU for training 13 | if not torch.cuda.is_available(): 14 | raise NotImplementedError("No GPU") 15 | else: 16 | device = torch.device('cuda') 17 | 18 | if __name__ == '__main__': 19 | 20 | environment = 'CF_2d_inclined' 21 | algorithm = 'PPO' # PPO is fast, robust, and on-policy for curriculum learning 22 | training_timesteps = 3000000 # Total amount of Timesteps to train for 23 | pwm = 16000 # PWM from theoretical hover Pwm, in the minus or plus direction. 24 | t_s = 1/50 # seconds 25 | 26 | if environment == 'CF_2d_inclined': 27 | env = Crazyflie_2d_inclined(t_s, rewardfunc=sparse_reward2d, max_pwm_from_hover=pwm) 28 | 29 | elif environment == 'CF_3d_setpoint': 30 | env = Crazyflie_3d_setpoint(t_s, rewardfunc=euclidean_reward3d, max_pwm_from_hover=pwm) 31 | 32 | # Check if the environment is working right 33 | check_env(env) 34 | 35 | # Set seeds to be able to reproduce results 36 | seed = 1 37 | os.environ['PYTHONHASHSEED'] = str(seed) 38 | env.seed(seed) 39 | random.seed(seed) 40 | torch.manual_seed(seed) 41 | np.random.seed(seed) 42 | 43 | # Select the algorithm from Stable Baselines 3 44 | if algorithm == 'PPO': 45 | model = PPO('MlpPolicy', env, verbose=1, gamma=0.97, seed=seed) # Try 0.995 for inclined landing 46 | elif algorithm == 'SAC': 47 | model = SAC('MlpPolicy', env, verbose=1, gamma=0.97, seed=seed) 48 | elif algorithm == 'A2C': 49 | model = A2C('MlpPolicy', env, verbose=1, seed=seed) 50 | elif algorithm == 'TD3': 51 | model = TD3('MlpPolicy', env, verbose=1, gamma=0.97, seed=seed) 52 | 53 | model.learn(training_timesteps) 54 | 55 | # Name the pytorch model 56 | run_name = environment+"_"+algorithm+"_"+str(training_timesteps)+"Timesteps" 57 | 58 | # Save the pytorch model 59 | torch.save(model.policy.state_dict(), run_name + 'state_dict.pt') 60 | 61 | # Render and save the gif afterwards 62 | obs = env.reset() 63 | frames = [] 64 | 65 | # Render 66 | for i in range(1800): 67 | action, _states = model.predict(obs, deterministic=True) 68 | obs, reward, done, info = env.step(action) 69 | frames.append(env.render(mode='rgb_array')) 70 | if done: 71 | obs = env.reset() 72 | # Save Gif 73 | save_frames_as_gif(frames, filename=run_name+'.gif') 74 | env.close() 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /Reward/rewardfuncs.py: -------------------------------------------------------------------------------- 1 | from math import sqrt, pi 2 | from shapely.geometry import Point 3 | from math import cos, sin 4 | 5 | # Sparse reward Function used for Inclined landing and Euclidean reward Function used for set-point tracking 6 | 7 | 8 | def sparse_reward2d(next_state, goal_state, observation_space, goal_range, polygon): 9 | 10 | # goal_range is the vector of goal threshold ranges 11 | # polygon represents the polygon of the landing platform 12 | 13 | quad_arm_length = 0.10 14 | 15 | goal_reward = -1 * int((abs(next_state[0] - goal_state[0]) > goal_range or 16 | abs(next_state[1] - goal_state[1]) > goal_range or 17 | abs(next_state[2] - goal_state[2]) > min(10*goal_range, 1.5) or 18 | abs(next_state[3] - goal_state[3]) > min(10*goal_range, 1.5) or 19 | abs(next_state[4] - goal_state[4]) > goal_range/4)) 20 | 21 | bounds_reward = -1 * int((abs(next_state[0] - observation_space.high[0]) < 0.05 or 22 | abs(next_state[0] - observation_space.low[0]) < 0.05 or 23 | abs(next_state[1] - observation_space.high[1]) < 0.05 or 24 | abs(next_state[1] - observation_space.low[1]) < 0.05)) 25 | 26 | # Check if it hits the landing platform 27 | right_point = Point(next_state[0]+cos(next_state[4])*quad_arm_length, 28 | next_state[1] - sin(next_state[4])*quad_arm_length) 29 | left_point = Point(next_state[0]-cos(next_state[4])*quad_arm_length, 30 | next_state[1] + sin(next_state[4])*quad_arm_length) 31 | middle_point = Point(next_state[0], next_state[1]) 32 | if polygon.contains(middle_point) or polygon.contains(right_point) or polygon.contains(left_point): 33 | obstacle_reward = -6 34 | else: 35 | obstacle_reward = 0 36 | 37 | done = (goal_reward == 0) 38 | 39 | total_reward = goal_reward + bounds_reward + obstacle_reward 40 | 41 | return total_reward, done 42 | 43 | 44 | def euclidean_reward3d(next_state, goal_state, observation_space, action): 45 | 46 | # Position cost 47 | error_position = sqrt((next_state[0] - goal_state[0])**2 + (next_state[1] - goal_state[1])**2 48 | + (next_state[2] - goal_state[2])**2) 49 | # Velocity cost 50 | error_velocity = sqrt((next_state[3] - goal_state[3])**2 + (next_state[4] - goal_state[4])**2 51 | + (next_state[5] - goal_state[5])**2) 52 | # Orientation cost 53 | error_angle = sqrt((next_state[6] - goal_state[6])**2 + (next_state[7] - goal_state[7])**2) 54 | # Action cost 55 | error_action = 0.1*(action[1]**2+action[2]**2)*1/(max(error_position, 0.001)) 56 | # Bounds cost 57 | bounds_reward = -1 * int((abs(next_state[0] - observation_space.high[0]) < 0.05 or 58 | abs(next_state[0] - observation_space.low[0]) < 0.05 or 59 | abs(next_state[1] - observation_space.high[1]) < 0.05 or 60 | abs(next_state[1] - observation_space.low[1]) < 0.05 or 61 | abs(next_state[2] - observation_space.high[2]) < 0.05 or 62 | abs(next_state[2] - observation_space.low[2]) < 0.05)) 63 | # Total cost 64 | total_reward = -1 * error_position + -0.2 * error_velocity - 0.1 * error_angle - error_action + bounds_reward 65 | 66 | done = False 67 | reward = total_reward 68 | 69 | return reward, done 70 | -------------------------------------------------------------------------------- /EOM/eom.py: -------------------------------------------------------------------------------- 1 | from numba import jit 2 | import numpy as np 3 | from math import cos, sin, pi, sqrt 4 | 5 | # Equations of motion used for the 2d and 3d Crazyflie 2.1 quadrotor 6 | # Use jit decorator to greatly increase function speed 7 | 8 | @jit 9 | def pwm_to_force(pwm): 10 | # Modified formula from Julian Forster's Crazyflie identification 11 | force = 4*(2.130295e-11*(pwm**2) + 1.032633e-6*pwm+5.485e-4) 12 | return force 13 | 14 | @jit 15 | def force_to_pwm(force): 16 | # Just the inversion of pwm_to_force 17 | a = 4*2.130295e-11 18 | b = 4*1.032633e-6 19 | c = 5.485e-4 - force 20 | d = b**2 - 4*a*c 21 | pwm = (-b + sqrt(d))/(2*a) 22 | return pwm 23 | 24 | @jit 25 | def eom2d_crazyflie_closedloop(x, u, param): 26 | 27 | # States are: [x, z, x_dot. z_dot, Theta, thrust_state] 28 | # u = [PWM_c, Theta_c] = [10000 to 60000, -1 to 1] 29 | # param = [mass, gain constant, time constant] 30 | 31 | pwm_commanded = u[0] 32 | a_ss = -15.4666 # State space A 33 | b_ss = 1 # State space B 34 | c_ss = 3.5616e-5 # State space C 35 | d_ss = 7.2345e-8 # State space AD 36 | force = 4*(c_ss*x[5] + d_ss*pwm_commanded) # Thrust force 37 | # force *= 0.88 # This takes care of the motor thrust gap sim2real 38 | pwm_drag = force_to_pwm(force) # Symbolic PWM to approximate rotor drag 39 | dragx = 9.1785e-7*4*(0.04076521*pwm_drag + 380.8359) # Fa,x 40 | dragz = 10.311e-7*4*(0.04076521*pwm_drag + 380.8359) # Fa,z 41 | theta_commanded = u[1]*pi/6 # Commanded theta in radians 42 | dx = np.array([x[2], # x_dot 43 | x[3], # z_dot 44 | (sin(x[4])*(force-dragx*x[2]))/param[0], # x_ddot 45 | (cos(x[4])*(force-dragz*x[3]))/param[0] - 9.81, # z_ddot 46 | (param[1]*theta_commanded - x[4])/param[2], # Theta_dot 47 | a_ss*x[5] + b_ss*pwm_commanded], # Thrust_state dot 48 | dtype =np.float32) 49 | return dx 50 | 51 | @jit 52 | def eom3d_crazyflie_closedloop(x, u, param): 53 | 54 | # States are: [x, y, z, x_dot, y_dot, z_dot, phi, theta, thrust_state] 55 | # u = [PWM_c, Phi_c, Theta_c] = [10000 to 60000, -1 to 1, -1 to 1] 56 | # param = [mass, gain constant, time constant] 57 | # Note that we use the rotation matrices convention according to the paper, with yaw = 0. 58 | 59 | pwm_commanded = u[0] 60 | a_ss = -15.4666 # State space A 61 | b_ss = 1 # State space B 62 | c_ss = 3.5616e-5 # State space C 63 | d_ss = 7.2345e-8 # State space AD 64 | force = 4*(c_ss*x[8] + d_ss*pwm_commanded) # Thrust force 65 | # force *= 0.88 # This takes care of the motor thrust gap sim2real 66 | pwm_drag = force_to_pwm(force) # Symbolic PWM to approximate rotor drag 67 | dragxy = 9.1785e-7*4*(0.04076521*pwm_drag + 380.8359) # Fa,xy 68 | dragz = 10.311e-7*4*(0.04076521*pwm_drag + 380.8359) # Fa,z 69 | phi_commanded = u[1]*pi/6 # Commanded phi in radians 70 | theta_commanded = u[2]*pi/6 # Commanded theta in radians 71 | dx = np.array([x[3], # x_dot 72 | x[4], # y_dot 73 | x[5], # z_dot 74 | (sin(x[7]))*(force - dragxy*x[3])/param[0], # x_ddot 75 | (sin(x[6])*cos(x[7])) * (force - dragxy*x[4])/param[0], # y_ddot 76 | (cos(x[6])*cos(x[7])) * (force - dragz*x[5])/param[0] - 9.81, # z_ddot 77 | (param[1]*phi_commanded - x[6])/param[2], # Phi_dot 78 | (param[1]*theta_commanded - x[7]) / param[2], # Theta_dot 79 | a_ss*x[8] + b_ss*pwm_commanded], # Thrust_state dot 80 | dtype=np.float32) 81 | return dx 82 | -------------------------------------------------------------------------------- /Eval_Environments/crazyflie.py: -------------------------------------------------------------------------------- 1 | 2 | # Custom gym-like Environment classes for the Crazyflie quadrotor 3 | 4 | from EOM.eom import pwm_to_force, force_to_pwm, eom2d_crazyflie_closedloop, eom3d_crazyflie_closedloop 5 | from EOM.rk4 import runge_kutta4 6 | import random as r 7 | import numpy as np 8 | import gym 9 | from os import path 10 | from gym.utils import seeding 11 | from gym import spaces 12 | from math import pi, cos, sin, tan 13 | from Reward.rewardfuncs import sparse_reward2d, euclidean_reward3d 14 | from shapely.geometry import Point 15 | from shapely.geometry.polygon import Polygon 16 | 17 | 18 | class Crazyflie_2d_inclined_eval(gym.Env): 19 | metadata = {'render.modes': ['human']} 20 | 21 | # state = [x, z, xdot, zdot, theta], action = [Thrust, Theta_commanded] 22 | 23 | def __init__(self, t_s, goal_state=np.array([0, 1.25, 0, 0, -pi/7], dtype=float), 24 | episode_steps=300, rewardfunc=sparse_reward2d, eom=eom2d_crazyflie_closedloop, 25 | max_pwm_from_hover=15000, param=np.array([0.03303, 1.1094, 0.183806]), rk4=runge_kutta4): 26 | super(Crazyflie_2d_inclined_eval, self).__init__() 27 | 28 | # Construct the landing polygon 29 | self.landing_angle = -pi/7 30 | self.platform_center = goal_state[0] 31 | self.platform_center_height = 1.15 32 | self.platform_width = 0.8 33 | self.landing_polygon = Polygon([(self.platform_center-0.5*self.platform_width, self.platform_center_height- 34 | tan(abs(self.landing_angle))*0.5*self.platform_width), 35 | (self.platform_center+0.5*self.platform_width, 36 | self.platform_center_height + 37 | tan(abs(self.landing_angle))*0.5*self.platform_width), 38 | (self.platform_center+0.5*self.platform_width, 0), 39 | (self.platform_center-0.5*self.platform_width, 0)]) 40 | 41 | self.quad_arms = 0.2 42 | self.obstacle_range = self.quad_arms-0.15 43 | 44 | self.viewer = None 45 | self.episode_steps = episode_steps 46 | self.param = param 47 | self.rewardfunc = rewardfunc 48 | self.EOM = eom 49 | self.RK4 = rk4 50 | self.T_s = t_s 51 | self.Timesteps = 0 52 | self.goal_state = goal_state 53 | 54 | # Used for simulations 55 | self.thrust_state = np.array([0]) 56 | self.real_action = np.array([0, 0]) 57 | self.hover_pwm = force_to_pwm(self.param[0]*9.81) # Calculate the theoretical hover thrust 58 | self.max_pwm_from_hover = max_pwm_from_hover 59 | 60 | self.episode_counter = 0 61 | self.goal_range = 0.10 62 | self.action_space = spaces.Box(low=np.array([-1, -1]), 63 | high=np.array([1, 1]), dtype=np.float) 64 | # States are: [x, z, x_dot. z_dot, Theta, Theta_dot] 65 | self.observation_space = spaces.Box(low=np.array([-3.4, 0, -10, -10, -pi/3]), 66 | high=np.array([3.4, 2.4, 10, 10, pi/3]), dtype=np.float) 67 | self.reward_range = (-float("inf"), float("inf")) 68 | self.agent_pos = [] 69 | self.reset() 70 | self.seed() 71 | self.counter = 0 72 | 73 | def seed(self, seed=None): 74 | self.np_random, seed = seeding.np_random(seed) 75 | return [seed] 76 | 77 | def step(self, action): 78 | # Fix PWM increments from its theoretical Hover PWM 79 | pwm_commanded = self.hover_pwm + action[0]*self.max_pwm_from_hover 80 | # Compute the action vector that goes into the Equations of Motion 81 | self.real_action = np.array([pwm_commanded, action[1]], dtype=float) 82 | # Add the physically unobservable thrust state for simulation purposes 83 | extended_state = np.concatenate((self.agent_pos, self.thrust_state)) 84 | # Simulate the agent with 1 time step using Runge Kutta 4 and the EOM 85 | extended_state = extended_state + self.RK4(extended_state, self.real_action, self.EOM, self.param, self.T_s) 86 | # Subtract the thrust state to form the actual states we want to use in training 87 | self.agent_pos = extended_state[0:-1] 88 | self.thrust_state = np.array([extended_state[-1]]) 89 | # Clip the agent's position so it doesn't leave the simulation bounds 90 | self.agent_pos = np.clip(self.agent_pos, self.observation_space.low, self.observation_space.high) 91 | observation = self.agent_pos 92 | reward, done = self.rewardfunc(observation, self.goal_state, self.observation_space, self.goal_range, 93 | self.landing_polygon) 94 | # Check if the quadrotor touches the landing polygon and diminish horizontal velocity if true 95 | point = Point(self.agent_pos[0], self.agent_pos[1]) 96 | if self.landing_polygon.contains(point): 97 | self.agent_pos[2] = 0 98 | 99 | self.counter += 1 100 | self.Timesteps += 1 101 | 102 | if self.counter == self.episode_steps: 103 | done = True 104 | 105 | info = {} 106 | 107 | return observation, reward, done, info 108 | 109 | def reset(self): 110 | 111 | # We use the reset function to achieve a form of continuous curriculum learning 112 | 113 | self.episode_counter += 1 114 | 115 | # Start episodes within a box around the goal state 116 | self.agent_pos = np.array([r.uniform(self.goal_state[0]-3, 117 | self.goal_state[0]+3), 118 | r.uniform(self.goal_state[1]-0.3, 119 | self.goal_state[1]+1), 0, 0, 0], dtype=np.float32) 120 | 121 | while self.landing_polygon.contains(Point(self.agent_pos[0], self.agent_pos[1])): 122 | self.agent_pos = np.array([np.clip(r.uniform(self.goal_state[0] - 3, self.goal_state[0] + 3), 123 | self.observation_space.low[0], self.observation_space.high[0]), 124 | np.clip(r.uniform(self.goal_state[1] - 0.3, self.goal_state[1] + 1), 125 | self.observation_space.low[1], self.observation_space.high[1]), 0, 0, 0], 126 | dtype=np.float32) 127 | 128 | # Clip position to be in the bounds of the Optitrack Stadium 129 | self.agent_pos[0] = np.clip(self.agent_pos[0], self.observation_space.low[0] + 0.1, 130 | self.observation_space.high[0] - 0.1) 131 | self.agent_pos[1] = np.clip(self.agent_pos[1], self.observation_space.low[1] + 1, 132 | self.observation_space.high[1] - 0.1) 133 | 134 | self.counter = 0 135 | 136 | return self.agent_pos 137 | 138 | def render(self, mode='human'): 139 | 140 | # Rendering function 141 | 142 | if self.viewer is None: 143 | from gym.envs.classic_control import rendering 144 | self.viewer = rendering.Viewer(740, 288) 145 | self.viewer.set_bounds(-3.5, 3.5, -0.5, 2.5) 146 | 147 | rod_quad1 = rendering.make_capsule(self.quad_arms, 0.05) 148 | rod_quad2 = rendering.make_capsule(self.quad_arms, 0.05) 149 | rod_goalquad1 = rendering.make_capsule(self.quad_arms, 0.05) 150 | rod_goalquad2 = rendering.make_capsule(self.quad_arms, 0.05) 151 | 152 | obstacle_1_top = rendering.Line((self.landing_polygon.exterior.coords.xy[0][0], 153 | self.landing_polygon.exterior.coords.xy[1][0]), 154 | (self.landing_polygon.exterior.coords.xy[0][1], 155 | self.landing_polygon.exterior.coords.xy[1][1])) 156 | obstacle_1_right = rendering.Line((self.landing_polygon.exterior.coords.xy[0][1], 157 | self.landing_polygon.exterior.coords.xy[1][1]), 158 | (self.landing_polygon.exterior.coords.xy[0][2], 159 | self.landing_polygon.exterior.coords.xy[1][2])) 160 | obstacle_1_bottom = rendering.Line((self.landing_polygon.exterior.coords.xy[0][2], 161 | self.landing_polygon.exterior.coords.xy[1][2]), 162 | (self.landing_polygon.exterior.coords.xy[0][3], 163 | self.landing_polygon.exterior.coords.xy[1][3])) 164 | obstacle_1_left = rendering.Line((self.landing_polygon.exterior.coords.xy[0][3], 165 | self.landing_polygon.exterior.coords.xy[1][3]), 166 | (self.landing_polygon.exterior.coords.xy[0][4], 167 | self.landing_polygon.exterior.coords.xy[1][4])) 168 | 169 | xaxis = rendering.Line((-3.2, 0), (3.2, 0)) 170 | xmetermin3 = rendering.Line((-3, 0), (-3, 0.05)) 171 | xmetermin2 = rendering.Line((-2, 0), (-2, 0.05)) 172 | xmetermin1 = rendering.Line((-1, 0), (-1, 0.05)) 173 | xmeter0 = rendering.Line((0, -0.10), (0, 0.10)) 174 | xmeter1 = rendering.Line((1, 0), (1, 0.05)) 175 | xmeter2 = rendering.Line((2, 0), (2, 0.05)) 176 | xmeter3 = rendering.Line((3, 0), (3, 0.05)) 177 | 178 | zaxis = rendering.Line((-3.2, 0), (-3.2, 2.4)) 179 | zmeter1 = rendering.Line((-3.2, 1), (-3.15, 1)) 180 | zmeter2 = rendering.Line((-3.2, 2), (-3.15, 2)) 181 | 182 | rod_quad1.set_color(.8, .3, .3) 183 | rod_quad2.set_color(.8, .3, .3) 184 | rod_goalquad1.set_color(.4, .3, .3) 185 | rod_goalquad2.set_color(.4, .3, .3) 186 | 187 | quadcirc1 = rendering.make_capsule(.03, 0.05) 188 | quadcirc2 = rendering.make_capsule(.03, 0.05) 189 | quadcirc3 = rendering.make_capsule(.03, 0.05) 190 | goalquadcirc1 = rendering.make_capsule(.03, 0.05) 191 | goalquadcirc2 = rendering.make_capsule(.03, 0.05) 192 | goalquadcirc3 = rendering.make_capsule(.03, 0.05) 193 | 194 | quadcirc1.set_color(.3, .6, .3) 195 | quadcirc2.set_color(.3, .6, .3) 196 | quadcirc3.set_color(.3, .6, .3) 197 | goalquadcirc1.set_color(.3, .3, .3) 198 | goalquadcirc2.set_color(.3, .3, .3) 199 | goalquadcirc3.set_color(.3, .3, .3) 200 | 201 | self.pole_transform = rendering.Transform() 202 | self.pole2_transform = rendering.Transform() 203 | self.goalpole_transform = rendering.Transform() 204 | self.goalpole2_transform = rendering.Transform() 205 | 206 | self.circ1_transform = rendering.Transform() 207 | self.circ2_transform = rendering.Transform() 208 | self.circ3_transform = rendering.Transform() 209 | self.goalcirc1_transform = rendering.Transform() 210 | self.goalcirc2_transform = rendering.Transform() 211 | self.goalcirc3_transform = rendering.Transform() 212 | 213 | rod_quad1.add_attr(self.pole_transform) 214 | rod_quad2.add_attr(self.pole2_transform) 215 | rod_goalquad1.add_attr(self.goalpole_transform) 216 | rod_goalquad2.add_attr(self.goalpole2_transform) 217 | 218 | quadcirc1.add_attr(self.circ1_transform) 219 | quadcirc2.add_attr(self.circ2_transform) 220 | quadcirc3.add_attr(self.circ3_transform) 221 | goalquadcirc1.add_attr(self.goalcirc1_transform) 222 | goalquadcirc2.add_attr(self.goalcirc2_transform) 223 | goalquadcirc3.add_attr(self.goalcirc3_transform) 224 | 225 | self.viewer.add_geom(rod_quad1) 226 | self.viewer.add_geom(rod_quad2) 227 | self.viewer.add_geom(rod_goalquad1) 228 | self.viewer.add_geom(rod_goalquad2) 229 | self.viewer.add_geom(xaxis) 230 | self.viewer.add_geom(xmetermin3) 231 | self.viewer.add_geom(xmetermin2) 232 | self.viewer.add_geom(xmetermin1) 233 | self.viewer.add_geom(xmeter0) 234 | self.viewer.add_geom(xmeter1) 235 | self.viewer.add_geom(xmeter2) 236 | self.viewer.add_geom(xmeter3) 237 | self.viewer.add_geom(zaxis) 238 | self.viewer.add_geom(zmeter1) 239 | self.viewer.add_geom(zmeter2) 240 | 241 | self.viewer.add_geom(quadcirc1) 242 | self.viewer.add_geom(quadcirc2) 243 | self.viewer.add_geom(quadcirc3) 244 | self.viewer.add_geom(goalquadcirc1) 245 | self.viewer.add_geom(goalquadcirc2) 246 | self.viewer.add_geom(goalquadcirc3) 247 | 248 | self.viewer.add_geom(obstacle_1_right) 249 | self.viewer.add_geom(obstacle_1_bottom) 250 | self.viewer.add_geom(obstacle_1_left) 251 | self.viewer.add_geom(obstacle_1_top) 252 | 253 | 254 | self.pole_transform.set_rotation(-self.agent_pos[4]) 255 | self.pole_transform.set_translation(self.agent_pos[0], self.agent_pos[1]) 256 | self.pole2_transform.set_rotation(-self.agent_pos[4] + pi) 257 | self.pole2_transform.set_translation(self.agent_pos[0], self.agent_pos[1]) 258 | 259 | self.goalpole_transform.set_rotation(-self.goal_state[4]) 260 | self.goalpole_transform.set_translation(self.goal_state[0], self.goal_state[1]) 261 | self.goalpole2_transform.set_rotation(-self.goal_state[4] + pi) 262 | self.goalpole2_transform.set_translation(self.goal_state[0], self.goal_state[1]) 263 | 264 | self.circ1_transform.set_translation(self.agent_pos[0] + self.quad_arms*cos(self.agent_pos[4]), 265 | self.agent_pos[1] - self.quad_arms*sin(self.agent_pos[4])) 266 | self.circ2_transform.set_translation(self.agent_pos[0], 267 | self.agent_pos[1]) 268 | self.circ3_transform.set_translation(self.agent_pos[0] - self.quad_arms*cos(self.agent_pos[4]), 269 | self.agent_pos[1] + self.quad_arms*sin(self.agent_pos[4])) 270 | self.circ1_transform.set_rotation(-self.agent_pos[4] -pi/2) 271 | self.circ2_transform.set_rotation(-self.agent_pos[4] -pi/2) 272 | self.circ3_transform.set_rotation(-self.agent_pos[4] -pi/2) 273 | 274 | self.goalcirc1_transform.set_translation(self.goal_state[0] + self.quad_arms * cos(self.goal_state[4]), 275 | self.goal_state[1] - self.quad_arms * sin(self.goal_state[4])) 276 | self.goalcirc2_transform.set_translation(self.goal_state[0], 277 | self.goal_state[1]) 278 | self.goalcirc3_transform.set_translation(self.goal_state[0] - self.quad_arms * cos(self.goal_state[4]), 279 | self.goal_state[1] + self.quad_arms * sin(self.goal_state[4])) 280 | self.goalcirc1_transform.set_rotation(-self.goal_state[4] - pi / 2) 281 | self.goalcirc2_transform.set_rotation(-self.goal_state[4] - pi / 2) 282 | self.goalcirc3_transform.set_rotation(-self.goal_state[4] - pi / 2) 283 | 284 | return self.viewer.render(return_rgb_array=mode == 'rgb_array') 285 | 286 | def close(self): 287 | if self.viewer: 288 | self.viewer.close() 289 | self.viewer = None 290 | 291 | 292 | class Crazyflie_3d_setpoint_eval(gym.Env): 293 | metadata = {'render.modes': ['human']} 294 | 295 | # state = [x, y, z, xdot, ydot, zdot, phi, theta], action = [Thrust, Phi_commanded, Theta_commanded] 296 | def __init__(self, t_s, goal_state=np.array([0, 0, 1.2, 0, 0, 0, 0, 0]), episode_steps=300, 297 | rewardfunc=euclidean_reward3d, eom=eom3d_crazyflie_closedloop, max_pwm_from_hover=13000, 298 | param=np.array([0.03303, 1.1094, 0.183806]), rk4=runge_kutta4): 299 | super(Crazyflie_3d_setpoint_eval, self).__init__() 300 | 301 | self.quad_arms = 0.2 302 | self.obstacle_range = self.quad_arms - 0.15 303 | self.viewer = None 304 | 305 | self.episode_steps = episode_steps 306 | self.counter = 0 307 | self.param = param 308 | self.rewardfunc = rewardfunc 309 | self.EOM = eom 310 | self.RK4 = rk4 311 | self.real_action = np.array([0, 0]) 312 | self.T_s = t_s 313 | self.goal_state = goal_state 314 | self.episode_counter = 0 315 | self.goal_range = 0.25 316 | 317 | self.x_spawn_radius = 0.2 318 | self.y_spawn_radius = 0.2 319 | self.z_spawn_radius = 0.2 320 | self.spawn_increment = 1/6000 321 | 322 | self.thrust_state = np.array([0]) 323 | self.hover_pwm = force_to_pwm(self.param[0]*9.81) 324 | self.max_pwm_from_hover = max_pwm_from_hover 325 | 326 | self.action_space = spaces.Box(low=np.array([-1, -1, -1]), 327 | high=np.array([1, 1, 1]), dtype=np.float32) 328 | # States are: [x, z, x_dot. z_dot, Theta, Theta_dot] 329 | self.observation_space = spaces.Box(low=np.array([-3.4, -1.4, 0, -10, -10, -10, -pi/3, -pi/3]), 330 | high=np.array([3.4, 1.4, 2.4, 10, 10, 10, pi/3, pi/3]), dtype=np.float32) 331 | self.reward_range = (-float("inf"), float("inf")) 332 | self.agent_pos = [] 333 | self.reset() 334 | self.seed() 335 | 336 | def seed(self, seed=None): 337 | self.np_random, seed = seeding.np_random(seed) 338 | return [seed] 339 | 340 | def step(self, action): 341 | 342 | # Fix PWM increments from its theoretical Hover PWM 343 | PWM_commanded_new = self.hover_pwm + action[0]*self.max_pwm_from_hover 344 | # Compute the action vector that goes into the Equations of Motion [PWM, clipped, clipped] 345 | self.real_action = np.array([PWM_commanded_new, action[1], action[2]], dtype=float) 346 | # Add the physically unobservable thrust state for simulation purposes 347 | extended_state = np.concatenate((self.agent_pos, self.thrust_state)) 348 | # Simulate the agent for 1 time step using Runge Kutta 4 and the EOM 349 | extended_state = extended_state + self.RK4(extended_state, self.real_action, self.EOM, self.param, self.T_s) 350 | # Subtract the thrust state to form the actual states we want to use in training 351 | self.agent_pos = extended_state[0:-1] 352 | self.thrust_state = np.array([extended_state[-1]]) 353 | # Clip the agent's position so it doesn't leave the simulation bounds 354 | self.agent_pos = np.clip(self.agent_pos, self.observation_space.low, self.observation_space.high) 355 | 356 | observation = self.agent_pos 357 | reward, done = self.rewardfunc(observation, self.goal_state, self.observation_space, self.real_action) 358 | 359 | self.counter += 1 360 | 361 | if self.counter == self.episode_steps: 362 | done = True 363 | 364 | info = {} 365 | 366 | return observation, reward, done, info 367 | 368 | def reset(self): 369 | # Use the reset function to achieve a form of continuous curriculum learning 370 | 371 | # Pick the box of starting positions around the euclidean goal 372 | self.agent_pos = np.array([r.uniform(self.goal_state[0]-2.5, 373 | self.goal_state[0]+2.5), 374 | r.uniform(self.goal_state[1] - 1.3, 375 | self.goal_state[1] + 1.3), 376 | r.uniform(self.goal_state[2]-1, 377 | self.goal_state[2]+1), 0, 0, 0, 0, 0], dtype=np.float32) 378 | 379 | # Clip starting position to be in the bounds of the Optitrack Stadium 380 | self.agent_pos[0] = np.clip(self.agent_pos[0], self.observation_space.low[0] + 0.1, 381 | self.observation_space.high[0] - 0.1) 382 | self.agent_pos[1] = np.clip(self.agent_pos[1], self.observation_space.low[1] + 0.1, 383 | self.observation_space.high[1] - 0.1) 384 | self.agent_pos[2] = np.clip(self.agent_pos[2], self.observation_space.low[2] + 0.1, 385 | self.observation_space.high[2] - 0.1) 386 | self.counter = 0 387 | 388 | return self.agent_pos 389 | 390 | def render(self, mode='human'): 391 | 392 | # Rendering for 3-D performance. Since pyglet did not support 3D we made two 2D views. 393 | if self.viewer is None: 394 | from gym.envs.classic_control import rendering 395 | self.viewer = rendering.Viewer(720, 288) 396 | self.viewer.set_bounds(-3.5*2, 3.6, -0.5*2, 2.5) 397 | 398 | # Define the rods for making quadrotor bodies 399 | rod_quad1 = rendering.make_capsule(self.quad_arms, 0.05) 400 | rod_quad2 = rendering.make_capsule(self.quad_arms, 0.05) 401 | rod_goalquad1 = rendering.make_capsule(self.quad_arms, 0.05) 402 | rod_goalquad2 = rendering.make_capsule(self.quad_arms, 0.05) 403 | # Roll is for the y-z plane 404 | rod_quadroll1 = rendering.make_capsule(self.quad_arms, 0.05) 405 | rod_quadroll2 = rendering.make_capsule(self.quad_arms, 0.05) 406 | rod_goalquadroll1 = rendering.make_capsule(self.quad_arms, 0.05) 407 | rod_goalquadroll2 = rendering.make_capsule(self.quad_arms, 0.05) 408 | 409 | # Draw the axis 410 | xaxis = rendering.Line((-6.8, 0), (0, 0)) 411 | xmetermin3 = rendering.Line((-6.4, 0), (-6.4, 0.05)) 412 | xmetermin2 = rendering.Line((-5.4, 0), (-5.4, 0.05)) 413 | xmetermin1 = rendering.Line((-4.4, 0), (-4.4, 0.05)) 414 | xmeter0 = rendering.Line((-3.4, -0.10), (-3.4, 0.10)) 415 | xmeter1 = rendering.Line((-2.4, 0), (-2.4, 0.05)) 416 | xmeter2 = rendering.Line((-1.4, 0), (-1.4, 0.05)) 417 | xmeter3 = rendering.Line((-0.4, 0), (-0.4, 0.05)) 418 | 419 | xaxisroll = rendering.Line((0, 0), (6.8, 0)) 420 | xmetermin1roll = rendering.Line((1, 0), (1, 0.05)) 421 | xmeter0roll = rendering.Line((2, -0.10), (2, 0.10)) 422 | xmeter1roll = rendering.Line((3, 0), (3, 0.05)) 423 | 424 | zaxis = rendering.Line((-6.8, 0), (-6.8, 2.4)) 425 | zmeter1 = rendering.Line((-6.8, 1), (-6.75, 1)) 426 | zmeter2 = rendering.Line((-6.8, 2), (-6.75, 2)) 427 | 428 | zaxisroll = rendering.Line((0.6, 0), (0.6, 2.4)) 429 | zmeter1roll = rendering.Line((0.6, 1), (0.65, 1)) 430 | zmeter2roll = rendering.Line((0.6, 2), (0.65, 2)) 431 | 432 | # Define the color for the quadrotor bodies 433 | rod_quad1.set_color(.8, .3, .3) 434 | rod_quad2.set_color(.8, .3, .3) 435 | rod_goalquad1.set_color(.4, .3, .3) 436 | rod_goalquad2.set_color(.4, .3, .3) 437 | 438 | rod_quadroll1.set_color(.4, .3, .3) 439 | rod_quadroll2.set_color(.4, .3, .3) 440 | rod_goalquadroll1.set_color(.2, .3, .3) 441 | rod_goalquadroll2.set_color(.2, .3, .3) 442 | 443 | # Define the circles on top of the rods 444 | quadcirc1 = rendering.make_capsule(.03, 0.05) 445 | quadcirc2 = rendering.make_capsule(.03, 0.05) 446 | quadcirc3 = rendering.make_capsule(.03, 0.05) 447 | goalquadcirc1 = rendering.make_capsule(.03, 0.05) 448 | goalquadcirc2 = rendering.make_capsule(.03, 0.05) 449 | goalquadcirc3 = rendering.make_capsule(.03, 0.05) 450 | 451 | quadcirc1roll = rendering.make_capsule(.03, 0.05) 452 | quadcirc2roll = rendering.make_capsule(.03, 0.05) 453 | quadcirc3roll = rendering.make_capsule(.03, 0.05) 454 | goalquadcirc1roll = rendering.make_capsule(.03, 0.05) 455 | goalquadcirc2roll = rendering.make_capsule(.03, 0.05) 456 | goalquadcirc3roll = rendering.make_capsule(.03, 0.05) 457 | 458 | # Define the circle colors on top of the rod 459 | quadcirc1.set_color(.3, .6, .3) 460 | quadcirc2.set_color(.3, .6, .3) 461 | quadcirc3.set_color(.3, .6, .3) 462 | goalquadcirc1.set_color(.3, .3, .3) 463 | goalquadcirc2.set_color(.3, .3, .3) 464 | goalquadcirc3.set_color(.3, .3, .3) 465 | 466 | quadcirc1roll.set_color(.3, .6, .3) 467 | quadcirc2roll.set_color(.3, .6, .3) 468 | quadcirc3roll.set_color(.3, .6, .3) 469 | goalquadcirc1roll.set_color(.3, .3, .3) 470 | goalquadcirc2roll.set_color(.3, .3, .3) 471 | goalquadcirc3roll.set_color(.3, .3, .3) 472 | 473 | # Define the transformations that need to happen at every timestep 474 | self.pole_transform = rendering.Transform() 475 | self.pole2_transform = rendering.Transform() 476 | self.goalpole_transform = rendering.Transform() 477 | self.goalpole2_transform = rendering.Transform() 478 | 479 | self.poleroll_transform = rendering.Transform() 480 | self.pole2roll_transform = rendering.Transform() 481 | self.goalpoleroll_transform = rendering.Transform() 482 | self.goalpole2roll_transform = rendering.Transform() 483 | 484 | self.circ1_transform = rendering.Transform() 485 | self.circ2_transform = rendering.Transform() 486 | self.circ3_transform = rendering.Transform() 487 | self.goalcirc1_transform = rendering.Transform() 488 | self.goalcirc2_transform = rendering.Transform() 489 | self.goalcirc3_transform = rendering.Transform() 490 | 491 | self.circ1roll_transform = rendering.Transform() 492 | self.circ2roll_transform = rendering.Transform() 493 | self.circ3roll_transform = rendering.Transform() 494 | self.goalcirc1roll_transform = rendering.Transform() 495 | self.goalcirc2roll_transform = rendering.Transform() 496 | self.goalcirc3roll_transform = rendering.Transform() 497 | 498 | # Assign the transformations to the defined rods and circles 499 | rod_quad1.add_attr(self.pole_transform) 500 | rod_quad2.add_attr(self.pole2_transform) 501 | rod_goalquad1.add_attr(self.goalpole_transform) 502 | rod_goalquad2.add_attr(self.goalpole2_transform) 503 | 504 | rod_quadroll1.add_attr(self.poleroll_transform) 505 | rod_quadroll2.add_attr(self.pole2roll_transform) 506 | rod_goalquadroll1.add_attr(self.goalpoleroll_transform) 507 | rod_goalquadroll2.add_attr(self.goalpole2roll_transform) 508 | 509 | quadcirc1.add_attr(self.circ1_transform) 510 | quadcirc2.add_attr(self.circ2_transform) 511 | quadcirc3.add_attr(self.circ3_transform) 512 | goalquadcirc1.add_attr(self.goalcirc1_transform) 513 | goalquadcirc2.add_attr(self.goalcirc2_transform) 514 | goalquadcirc3.add_attr(self.goalcirc3_transform) 515 | 516 | quadcirc1roll.add_attr(self.circ1roll_transform) 517 | quadcirc2roll.add_attr(self.circ2roll_transform) 518 | quadcirc3roll.add_attr(self.circ3roll_transform) 519 | goalquadcirc1roll.add_attr(self.goalcirc1roll_transform) 520 | goalquadcirc2roll.add_attr(self.goalcirc2roll_transform) 521 | goalquadcirc3roll.add_attr(self.goalcirc3roll_transform) 522 | 523 | # Draw the rods, circles and axis in the window defined at the beginning 524 | self.viewer.add_geom(rod_quad1) 525 | self.viewer.add_geom(rod_quad2) 526 | self.viewer.add_geom(rod_goalquad1) 527 | self.viewer.add_geom(rod_goalquad2) 528 | self.viewer.add_geom(xaxis) 529 | self.viewer.add_geom(xmetermin3) 530 | self.viewer.add_geom(xmetermin2) 531 | self.viewer.add_geom(xmetermin1) 532 | self.viewer.add_geom(xmeter0) 533 | self.viewer.add_geom(xmeter1) 534 | self.viewer.add_geom(xmeter2) 535 | self.viewer.add_geom(xmeter3) 536 | self.viewer.add_geom(zaxis) 537 | self.viewer.add_geom(zmeter1) 538 | self.viewer.add_geom(zmeter2) 539 | 540 | self.viewer.add_geom(rod_quadroll1) 541 | self.viewer.add_geom(rod_quadroll2) 542 | self.viewer.add_geom(rod_goalquadroll1) 543 | self.viewer.add_geom(rod_goalquadroll2) 544 | self.viewer.add_geom(xaxisroll) 545 | self.viewer.add_geom(xmetermin1roll) 546 | self.viewer.add_geom(xmeter0roll) 547 | self.viewer.add_geom(xmeter1roll) 548 | self.viewer.add_geom(zaxisroll) 549 | self.viewer.add_geom(zmeter1roll) 550 | self.viewer.add_geom(zmeter2roll) 551 | 552 | self.viewer.add_geom(quadcirc1) 553 | self.viewer.add_geom(quadcirc2) 554 | self.viewer.add_geom(quadcirc3) 555 | self.viewer.add_geom(goalquadcirc1) 556 | self.viewer.add_geom(goalquadcirc2) 557 | self.viewer.add_geom(goalquadcirc3) 558 | 559 | self.viewer.add_geom(quadcirc1roll) 560 | self.viewer.add_geom(quadcirc2roll) 561 | self.viewer.add_geom(quadcirc3roll) 562 | self.viewer.add_geom(goalquadcirc1roll) 563 | self.viewer.add_geom(goalquadcirc2roll) 564 | self.viewer.add_geom(goalquadcirc3roll) 565 | 566 | # Define how the rods and circles are transformed at every timestep 567 | self.pole_transform.set_rotation(-self.agent_pos[7]) 568 | self.pole_transform.set_translation(self.agent_pos[0]-3.4, self.agent_pos[2]) 569 | self.pole2_transform.set_rotation(-self.agent_pos[7] + pi) 570 | self.pole2_transform.set_translation(self.agent_pos[0]-3.4, self.agent_pos[2]) 571 | 572 | self.goalpole_transform.set_rotation(-self.goal_state[7]) 573 | self.goalpole_transform.set_translation(self.goal_state[0]-3.4, self.goal_state[2]) 574 | self.goalpole2_transform.set_rotation(-self.goal_state[7] + pi) 575 | self.goalpole2_transform.set_translation(self.goal_state[0]-3.4, self.goal_state[2]) 576 | 577 | self.poleroll_transform.set_rotation(-self.agent_pos[6]) 578 | self.poleroll_transform.set_translation(self.agent_pos[1]+2, self.agent_pos[2]) 579 | self.pole2roll_transform.set_rotation(-self.agent_pos[6] + pi) 580 | self.pole2roll_transform.set_translation(self.agent_pos[1]+2, self.agent_pos[2]) 581 | 582 | self.goalpoleroll_transform.set_rotation(-self.goal_state[6]) 583 | self.goalpoleroll_transform.set_translation(self.goal_state[1]+2, self.goal_state[2]) 584 | self.goalpole2roll_transform.set_rotation(-self.goal_state[6] + pi) 585 | self.goalpole2roll_transform.set_translation(self.goal_state[1]+2, self.goal_state[2]) 586 | 587 | self.circ1_transform.set_translation(self.agent_pos[0]-3.4 + self.quad_arms*cos(self.agent_pos[7]), 588 | self.agent_pos[2] - self.quad_arms*sin(self.agent_pos[7])) 589 | self.circ2_transform.set_translation(self.agent_pos[0]-3.4, 590 | self.agent_pos[2]) 591 | self.circ3_transform.set_translation(self.agent_pos[0]-3.4 - self.quad_arms*cos(self.agent_pos[7]), 592 | self.agent_pos[2] + self.quad_arms*sin(self.agent_pos[7])) 593 | self.circ1_transform.set_rotation(-self.agent_pos[7] - pi/2) 594 | self.circ2_transform.set_rotation(-self.agent_pos[7] - pi/2) 595 | self.circ3_transform.set_rotation(-self.agent_pos[7] - pi/2) 596 | 597 | self.goalcirc1_transform.set_translation(self.goal_state[0]-3.4 + self.quad_arms * cos(self.goal_state[7]), 598 | self.goal_state[2] - self.quad_arms * sin(self.goal_state[7])) 599 | self.goalcirc2_transform.set_translation(self.goal_state[0]-3.4, 600 | self.goal_state[2]) 601 | self.goalcirc3_transform.set_translation(self.goal_state[0]-3.4 - self.quad_arms * cos(self.goal_state[7]), 602 | self.goal_state[2] + self.quad_arms * sin(self.goal_state[7])) 603 | self.goalcirc1_transform.set_rotation(-self.goal_state[7] - pi / 2) 604 | self.goalcirc2_transform.set_rotation(-self.goal_state[7] - pi / 2) 605 | self.goalcirc3_transform.set_rotation(-self.goal_state[7] - pi / 2) 606 | 607 | self.circ1roll_transform.set_translation(self.agent_pos[1]+2 + self.quad_arms * cos(self.agent_pos[6]), 608 | self.agent_pos[2] - self.quad_arms * sin(self.agent_pos[6])) 609 | self.circ2roll_transform.set_translation(self.agent_pos[1] + 2, 610 | self.agent_pos[2]) 611 | self.circ3roll_transform.set_translation(self.agent_pos[1] + 2 - self.quad_arms * cos(self.agent_pos[6]), 612 | self.agent_pos[2] + self.quad_arms * sin(self.agent_pos[6])) 613 | self.circ1roll_transform.set_rotation(-self.agent_pos[6] - pi / 2) 614 | self.circ2roll_transform.set_rotation(-self.agent_pos[6] - pi / 2) 615 | self.circ3roll_transform.set_rotation(-self.agent_pos[6] - pi / 2) 616 | 617 | self.goalcirc1roll_transform.set_translation(self.goal_state[1] + 2 + self.quad_arms * cos(self.goal_state[6]), 618 | self.goal_state[2] - self.quad_arms * sin(self.goal_state[6])) 619 | self.goalcirc2roll_transform.set_translation(self.goal_state[1] + 2, 620 | self.goal_state[2]) 621 | self.goalcirc3roll_transform.set_translation(self.goal_state[1]+2 - self.quad_arms * cos(self.goal_state[6]), 622 | self.goal_state[2] + self.quad_arms * sin(self.goal_state[6])) 623 | self.goalcirc1roll_transform.set_rotation(-self.goal_state[6] - pi / 2) 624 | self.goalcirc2roll_transform.set_rotation(-self.goal_state[6] - pi / 2) 625 | self.goalcirc3roll_transform.set_rotation(-self.goal_state[6] - pi / 2) 626 | 627 | return self.viewer.render(return_rgb_array=mode == 'rgb_array') 628 | 629 | def close(self): 630 | if self.viewer: 631 | self.viewer.close() 632 | self.viewer = None 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | -------------------------------------------------------------------------------- /Environments/crazyflie.py: -------------------------------------------------------------------------------- 1 | 2 | # Custom gym-like Environment classes for the Crazyflie quadrotor 3 | 4 | from EOM.eom import pwm_to_force, force_to_pwm, eom2d_crazyflie_closedloop, eom3d_crazyflie_closedloop 5 | from EOM.rk4 import runge_kutta4 6 | import random as r 7 | import numpy as np 8 | import gym 9 | from os import path 10 | from gym.utils import seeding 11 | from gym import spaces 12 | from math import pi, cos, sin, tan 13 | from Reward.rewardfuncs import sparse_reward2d, euclidean_reward3d 14 | from shapely.geometry import Point 15 | from shapely.geometry.polygon import Polygon 16 | 17 | 18 | class Crazyflie_2d_inclined(gym.Env): 19 | metadata = {'render.modes': ['human']} 20 | 21 | # state = [x, z, xdot, zdot, theta], action = [Thrust, Theta_commanded], param = [mass, gain_const, time_const] 22 | 23 | def __init__(self, t_s, goal_state=np.array([0, 1.25, 0, 0, 0], dtype=float), 24 | episode_steps=300, rewardfunc=sparse_reward2d, eom=eom2d_crazyflie_closedloop, 25 | max_pwm_from_hover=15000, param=np.array([0.03303, 1.1094, 0.183806]), rk4=runge_kutta4): 26 | super(Crazyflie_2d_inclined, self).__init__() 27 | 28 | # Construct the landing polygon 29 | self.landing_angle = -pi/7 30 | self.platform_center = goal_state[0] 31 | self.platform_center_height = 1.15 32 | self.platform_width = 0.8 33 | self.landing_polygon = Polygon([(1, 0), (1, 0.1), (-1, 0.1), (-1, 0)]) 34 | self.final_polygon = Polygon([(self.platform_center-0.5*self.platform_width, self.platform_center_height- 35 | tan(abs(self.landing_angle))*0.5*self.platform_width), 36 | (self.platform_center+0.5*self.platform_width, 37 | self.platform_center_height + 38 | tan(abs(self.landing_angle))*0.5*self.platform_width), 39 | (self.platform_center+0.5*self.platform_width, 0), 40 | (self.platform_center-0.5*self.platform_width, 0)]) 41 | 42 | self.quad_arms = 0.2 43 | self.obstacle_range = self.quad_arms-0.15 44 | 45 | self.viewer = None 46 | self.episode_steps = episode_steps 47 | self.param = param 48 | self.rewardfunc = rewardfunc 49 | self.EOM = eom 50 | self.RK4 = rk4 51 | self.T_s = t_s 52 | self.Timesteps = 0 53 | self.goal_state = goal_state 54 | 55 | # Used for simulations 56 | self.thrust_state = np.array([0]) 57 | self.real_action = np.array([0, 0]) 58 | self.hover_pwm = force_to_pwm(self.param[0]*9.81) # Calculate the theoretical hover thrust 59 | self.max_pwm_from_hover = max_pwm_from_hover 60 | 61 | self.episode_counter = 0 62 | self.goal_range = 0.25 # Starting goal range 63 | self.horizontal_spawn_radius = 0.2 # Starting x-ais spawn distance around goal 64 | self.vertical_spawn_radius = 0.2 # Starting z-axis spawn distance around goal 65 | self.spawn_increment = 1/6000 66 | self.tilt_goal_increment = (abs(self.landing_angle)/6000) 67 | self.action_space = spaces.Box(low=np.array([-1, -1]), 68 | high=np.array([1, 1]), dtype=np.float) 69 | # States are: [x, z, x_dot. z_dot, Theta, Theta_dot] 70 | self.observation_space = spaces.Box(low=np.array([-3.4, 0, -10, -10, -pi/3]), 71 | high=np.array([3.4, 2.4, 10, 10, pi/3]), dtype=np.float) 72 | self.reward_range = (-float("inf"), float("inf")) 73 | self.agent_pos = [] 74 | self.reset() 75 | self.seed() 76 | self.counter = 0 77 | 78 | def seed(self, seed=None): 79 | self.np_random, seed = seeding.np_random(seed) 80 | return [seed] 81 | 82 | def step(self, action): 83 | # Fix PWM increments from its theoretical Hover PWM 84 | pwm_commanded = self.hover_pwm + action[0]*self.max_pwm_from_hover 85 | # Compute the action vector that goes into the Equations of Motion 86 | self.real_action = np.array([pwm_commanded, action[1]], dtype=float) 87 | # Add the physically unobservable thrust state for simulation purposes 88 | extended_state = np.concatenate((self.agent_pos, self.thrust_state)) 89 | # Simulate the agent with 1 time step using Runge Kutta 4 and the EOM 90 | extended_state = extended_state + self.RK4(extended_state, self.real_action, self.EOM, self.param, self.T_s) 91 | # Subtract the thrust state to form the actual states we want to use in training 92 | self.agent_pos = extended_state[0:-1] 93 | self.thrust_state = np.array([extended_state[-1]]) 94 | # Clip the agent's position so it doesn't leave the simulation bounds 95 | self.agent_pos = np.clip(self.agent_pos, self.observation_space.low, self.observation_space.high) 96 | observation = self.agent_pos 97 | reward, done = self.rewardfunc(observation, self.goal_state, self.observation_space, self.goal_range, 98 | self.landing_polygon) 99 | # Check if the quadrotor touches the landing polygon and diminish horizontal velocity if true 100 | point = Point(self.agent_pos[0], self.agent_pos[1]) 101 | if self.landing_polygon.contains(point): 102 | self.agent_pos[2] = 0 103 | 104 | self.counter += 1 105 | self.Timesteps += 1 106 | 107 | if self.counter == self.episode_steps: 108 | done = True 109 | 110 | info = {} 111 | 112 | return observation, reward, done, info 113 | 114 | def reset(self): 115 | 116 | # We use the reset function to achieve a form of continuous curriculum learning 117 | 118 | self.episode_counter += 1 119 | 120 | # Start episodes within a box around the goal state 121 | self.agent_pos = np.array([r.uniform(self.goal_state[0]-self.horizontal_spawn_radius, 122 | self.goal_state[0]+self.horizontal_spawn_radius), 123 | r.uniform(self.goal_state[1]-self.vertical_spawn_radius, 124 | self.goal_state[1]+self.vertical_spawn_radius), 0, 0, 0], dtype=np.float32) 125 | 126 | while self.landing_polygon.contains(Point(self.agent_pos[0], self.agent_pos[1])): 127 | self.agent_pos = np.array([np.clip(r.uniform(self.goal_state[0] - self.horizontal_spawn_radius, 128 | self.goal_state[0] + self.horizontal_spawn_radius), 129 | self.observation_space.low[0], self.observation_space.high[0]), 130 | np.clip(r.uniform(self.goal_state[1] - self.vertical_spawn_radius, 131 | self.goal_state[1] + self.vertical_spawn_radius), 132 | self.observation_space.low[1]+0.5, self.observation_space.high[1]), 0, 0, 0], 133 | dtype=np.float32) 134 | # Spawn Radius Increase 135 | self.horizontal_spawn_radius += self.spawn_increment 136 | if self.vertical_spawn_radius <= 1: 137 | self.vertical_spawn_radius += 0.75 * self.spawn_increment 138 | 139 | # Gradually decrease the goal threshold 140 | if 7500 >= self.episode_counter >= 2500: 141 | self.goal_range -= 0.15/5000 142 | 143 | # Slowly incline the goal state 144 | if self.Timesteps >= 400000 and self.goal_state[4] >= self.landing_angle: 145 | self.goal_state[4] -= self.tilt_goal_increment 146 | 147 | # Place the landing platform once the quad learned to reach inclined goals 148 | if self.Timesteps >= 800000: 149 | self.landing_polygon = self.final_polygon 150 | 151 | # Clip position to be in the bounds of the Optitrack Stadium 152 | self.agent_pos[0] = np.clip(self.agent_pos[0], self.observation_space.low[0] + 0.1, 153 | self.observation_space.high[0] - 0.1) 154 | self.agent_pos[1] = np.clip(self.agent_pos[1], self.observation_space.low[1] + 1, 155 | self.observation_space.high[1] - 0.1) 156 | 157 | self.counter = 0 158 | 159 | return self.agent_pos 160 | 161 | def render(self, mode='human'): 162 | 163 | # Rendering function 164 | 165 | if self.viewer is None: 166 | from gym.envs.classic_control import rendering 167 | self.viewer = rendering.Viewer(740, 288) 168 | self.viewer.set_bounds(-3.5, 3.5, -0.5, 2.5) 169 | 170 | rod_quad1 = rendering.make_capsule(self.quad_arms, 0.05) 171 | rod_quad2 = rendering.make_capsule(self.quad_arms, 0.05) 172 | rod_goalquad1 = rendering.make_capsule(self.quad_arms, 0.05) 173 | rod_goalquad2 = rendering.make_capsule(self.quad_arms, 0.05) 174 | 175 | obstacle_1_top = rendering.Line((self.landing_polygon.exterior.coords.xy[0][0], 176 | self.landing_polygon.exterior.coords.xy[1][0]), 177 | (self.landing_polygon.exterior.coords.xy[0][1], 178 | self.landing_polygon.exterior.coords.xy[1][1])) 179 | obstacle_1_right = rendering.Line((self.landing_polygon.exterior.coords.xy[0][1], 180 | self.landing_polygon.exterior.coords.xy[1][1]), 181 | (self.landing_polygon.exterior.coords.xy[0][2], 182 | self.landing_polygon.exterior.coords.xy[1][2])) 183 | obstacle_1_bottom = rendering.Line((self.landing_polygon.exterior.coords.xy[0][2], 184 | self.landing_polygon.exterior.coords.xy[1][2]), 185 | (self.landing_polygon.exterior.coords.xy[0][3], 186 | self.landing_polygon.exterior.coords.xy[1][3])) 187 | obstacle_1_left = rendering.Line((self.landing_polygon.exterior.coords.xy[0][3], 188 | self.landing_polygon.exterior.coords.xy[1][3]), 189 | (self.landing_polygon.exterior.coords.xy[0][4], 190 | self.landing_polygon.exterior.coords.xy[1][4])) 191 | 192 | xaxis = rendering.Line((-3.2, 0), (3.2, 0)) 193 | xmetermin3 = rendering.Line((-3, 0), (-3, 0.05)) 194 | xmetermin2 = rendering.Line((-2, 0), (-2, 0.05)) 195 | xmetermin1 = rendering.Line((-1, 0), (-1, 0.05)) 196 | xmeter0 = rendering.Line((0, -0.10), (0, 0.10)) 197 | xmeter1 = rendering.Line((1, 0), (1, 0.05)) 198 | xmeter2 = rendering.Line((2, 0), (2, 0.05)) 199 | xmeter3 = rendering.Line((3, 0), (3, 0.05)) 200 | 201 | zaxis = rendering.Line((-3.2, 0), (-3.2, 2.4)) 202 | zmeter1 = rendering.Line((-3.2, 1), (-3.15, 1)) 203 | zmeter2 = rendering.Line((-3.2, 2), (-3.15, 2)) 204 | 205 | rod_quad1.set_color(.8, .3, .3) 206 | rod_quad2.set_color(.8, .3, .3) 207 | rod_goalquad1.set_color(.4, .3, .3) 208 | rod_goalquad2.set_color(.4, .3, .3) 209 | 210 | quadcirc1 = rendering.make_capsule(.03, 0.05) 211 | quadcirc2 = rendering.make_capsule(.03, 0.05) 212 | quadcirc3 = rendering.make_capsule(.03, 0.05) 213 | goalquadcirc1 = rendering.make_capsule(.03, 0.05) 214 | goalquadcirc2 = rendering.make_capsule(.03, 0.05) 215 | goalquadcirc3 = rendering.make_capsule(.03, 0.05) 216 | 217 | quadcirc1.set_color(.3, .6, .3) 218 | quadcirc2.set_color(.3, .6, .3) 219 | quadcirc3.set_color(.3, .6, .3) 220 | goalquadcirc1.set_color(.3, .3, .3) 221 | goalquadcirc2.set_color(.3, .3, .3) 222 | goalquadcirc3.set_color(.3, .3, .3) 223 | 224 | self.pole_transform = rendering.Transform() 225 | self.pole2_transform = rendering.Transform() 226 | self.goalpole_transform = rendering.Transform() 227 | self.goalpole2_transform = rendering.Transform() 228 | 229 | self.circ1_transform = rendering.Transform() 230 | self.circ2_transform = rendering.Transform() 231 | self.circ3_transform = rendering.Transform() 232 | self.goalcirc1_transform = rendering.Transform() 233 | self.goalcirc2_transform = rendering.Transform() 234 | self.goalcirc3_transform = rendering.Transform() 235 | 236 | rod_quad1.add_attr(self.pole_transform) 237 | rod_quad2.add_attr(self.pole2_transform) 238 | rod_goalquad1.add_attr(self.goalpole_transform) 239 | rod_goalquad2.add_attr(self.goalpole2_transform) 240 | 241 | quadcirc1.add_attr(self.circ1_transform) 242 | quadcirc2.add_attr(self.circ2_transform) 243 | quadcirc3.add_attr(self.circ3_transform) 244 | goalquadcirc1.add_attr(self.goalcirc1_transform) 245 | goalquadcirc2.add_attr(self.goalcirc2_transform) 246 | goalquadcirc3.add_attr(self.goalcirc3_transform) 247 | 248 | self.viewer.add_geom(rod_quad1) 249 | self.viewer.add_geom(rod_quad2) 250 | self.viewer.add_geom(rod_goalquad1) 251 | self.viewer.add_geom(rod_goalquad2) 252 | self.viewer.add_geom(xaxis) 253 | self.viewer.add_geom(xmetermin3) 254 | self.viewer.add_geom(xmetermin2) 255 | self.viewer.add_geom(xmetermin1) 256 | self.viewer.add_geom(xmeter0) 257 | self.viewer.add_geom(xmeter1) 258 | self.viewer.add_geom(xmeter2) 259 | self.viewer.add_geom(xmeter3) 260 | self.viewer.add_geom(zaxis) 261 | self.viewer.add_geom(zmeter1) 262 | self.viewer.add_geom(zmeter2) 263 | 264 | self.viewer.add_geom(quadcirc1) 265 | self.viewer.add_geom(quadcirc2) 266 | self.viewer.add_geom(quadcirc3) 267 | self.viewer.add_geom(goalquadcirc1) 268 | self.viewer.add_geom(goalquadcirc2) 269 | self.viewer.add_geom(goalquadcirc3) 270 | 271 | self.viewer.add_geom(obstacle_1_right) 272 | self.viewer.add_geom(obstacle_1_bottom) 273 | self.viewer.add_geom(obstacle_1_left) 274 | self.viewer.add_geom(obstacle_1_top) 275 | 276 | 277 | self.pole_transform.set_rotation(-self.agent_pos[4]) 278 | self.pole_transform.set_translation(self.agent_pos[0], self.agent_pos[1]) 279 | self.pole2_transform.set_rotation(-self.agent_pos[4] + pi) 280 | self.pole2_transform.set_translation(self.agent_pos[0], self.agent_pos[1]) 281 | 282 | self.goalpole_transform.set_rotation(-self.goal_state[4]) 283 | self.goalpole_transform.set_translation(self.goal_state[0], self.goal_state[1]) 284 | self.goalpole2_transform.set_rotation(-self.goal_state[4] + pi) 285 | self.goalpole2_transform.set_translation(self.goal_state[0], self.goal_state[1]) 286 | 287 | self.circ1_transform.set_translation(self.agent_pos[0] + self.quad_arms*cos(self.agent_pos[4]), 288 | self.agent_pos[1] - self.quad_arms*sin(self.agent_pos[4])) 289 | self.circ2_transform.set_translation(self.agent_pos[0], 290 | self.agent_pos[1]) 291 | self.circ3_transform.set_translation(self.agent_pos[0] - self.quad_arms*cos(self.agent_pos[4]), 292 | self.agent_pos[1] + self.quad_arms*sin(self.agent_pos[4])) 293 | self.circ1_transform.set_rotation(-self.agent_pos[4] -pi/2) 294 | self.circ2_transform.set_rotation(-self.agent_pos[4] -pi/2) 295 | self.circ3_transform.set_rotation(-self.agent_pos[4] -pi/2) 296 | 297 | self.goalcirc1_transform.set_translation(self.goal_state[0] + self.quad_arms * cos(self.goal_state[4]), 298 | self.goal_state[1] - self.quad_arms * sin(self.goal_state[4])) 299 | self.goalcirc2_transform.set_translation(self.goal_state[0], 300 | self.goal_state[1]) 301 | self.goalcirc3_transform.set_translation(self.goal_state[0] - self.quad_arms * cos(self.goal_state[4]), 302 | self.goal_state[1] + self.quad_arms * sin(self.goal_state[4])) 303 | self.goalcirc1_transform.set_rotation(-self.goal_state[4] - pi / 2) 304 | self.goalcirc2_transform.set_rotation(-self.goal_state[4] - pi / 2) 305 | self.goalcirc3_transform.set_rotation(-self.goal_state[4] - pi / 2) 306 | 307 | return self.viewer.render(return_rgb_array=mode == 'rgb_array') 308 | 309 | def close(self): 310 | if self.viewer: 311 | self.viewer.close() 312 | self.viewer = None 313 | 314 | 315 | class Crazyflie_3d_setpoint(gym.Env): 316 | metadata = {'render.modes': ['human']} 317 | 318 | # state = [x, y, z, xdot, ydot, zdot, phi, theta], action = [Thrust, Phi_commanded, Theta_commanded] 319 | def __init__(self, t_s, goal_state=np.array([0, 0, 1.2, 0, 0, 0, 0, 0]), episode_steps=300, 320 | rewardfunc=euclidean_reward3d, eom=eom3d_crazyflie_closedloop, max_pwm_from_hover=17000, 321 | param=np.array([0.03303, 1.1094, 0.183806]), rk4=runge_kutta4): 322 | super(Crazyflie_3d_setpoint, self).__init__() 323 | 324 | self.quad_arms = 0.2 325 | self.obstacle_range = self.quad_arms - 0.15 326 | self.viewer = None 327 | 328 | self.episode_steps = episode_steps 329 | self.counter = 0 330 | self.param = param 331 | self.rewardfunc = rewardfunc 332 | self.EOM = eom 333 | self.RK4 = rk4 334 | self.real_action = np.array([0, 0]) 335 | self.T_s = t_s 336 | self.goal_state = goal_state 337 | self.episode_counter = 0 338 | self.goal_range = 0.25 339 | 340 | self.x_spawn_radius = 0.2 341 | self.y_spawn_radius = 0.2 342 | self.z_spawn_radius = 0.2 343 | self.spawn_increment = 1/6000 344 | 345 | self.thrust_state = np.array([0]) 346 | self.hover_pwm = force_to_pwm(self.param[0]*9.81) 347 | self.max_pwm_from_hover = max_pwm_from_hover 348 | 349 | self.action_space = spaces.Box(low=np.array([-1, -1, -1]), 350 | high=np.array([1, 1, 1]), dtype=np.float32) 351 | # States are: [x, z, x_dot. z_dot, Theta, Theta_dot] 352 | self.observation_space = spaces.Box(low=np.array([-3.4, -1.4, 0, -10, -10, -10, -pi/3, -pi/3]), 353 | high=np.array([3.4, 1.4, 2.4, 10, 10, 10, pi/3, pi/3]), dtype=np.float32) 354 | self.reward_range = (-float("inf"), float("inf")) 355 | self.agent_pos = [] 356 | self.reset() 357 | self.seed() 358 | 359 | def seed(self, seed=None): 360 | self.np_random, seed = seeding.np_random(seed) 361 | return [seed] 362 | 363 | def step(self, action): 364 | 365 | # Fix PWM increments from its theoretical Hover PWM 366 | PWM_commanded_new = self.hover_pwm + action[0]*self.max_pwm_from_hover 367 | # Compute the action vector that goes into the Equations of Motion [PWM, clipped, clipped] 368 | self.real_action = np.array([PWM_commanded_new, action[1], action[2]], dtype=float) 369 | # Add the physically unobservable thrust state for simulation purposes 370 | extended_state = np.concatenate((self.agent_pos, self.thrust_state)) 371 | # Simulate the agent for 1 time step using Runge Kutta 4 and the EOM 372 | extended_state = extended_state + self.RK4(extended_state, self.real_action, self.EOM, self.param, self.T_s) 373 | # Subtract the thrust state to form the actual states we want to use in training 374 | self.agent_pos = extended_state[0:-1] 375 | self.thrust_state = np.array([extended_state[-1]]) 376 | # Clip the agent's position so it doesn't leave the simulation bounds 377 | self.agent_pos = np.clip(self.agent_pos, self.observation_space.low, self.observation_space.high) 378 | 379 | observation = self.agent_pos 380 | reward, done = self.rewardfunc(observation, self.goal_state, self.observation_space, self.real_action) 381 | 382 | self.counter += 1 383 | 384 | if self.counter == self.episode_steps: 385 | done = True 386 | 387 | info = {} 388 | 389 | return observation, reward, done, info 390 | 391 | def reset(self): 392 | # Use the reset function to achieve a form of continuous curriculum learning 393 | 394 | # Pick the box of starting positions around the euclidean goal 395 | self.agent_pos = np.array([r.uniform(self.goal_state[0]-2.5, 396 | self.goal_state[0]+2.5), 397 | r.uniform(self.goal_state[1] - 1.3, 398 | self.goal_state[1] + 1.3), 399 | r.uniform(self.goal_state[2]-1, 400 | self.goal_state[2]+1), 0, 0, 0, 0, 0], dtype=np.float32) 401 | 402 | # Clip starting position to be in the bounds of the Optitrack Stadium 403 | self.agent_pos[0] = np.clip(self.agent_pos[0], self.observation_space.low[0] + 0.1, 404 | self.observation_space.high[0] - 0.1) 405 | self.agent_pos[1] = np.clip(self.agent_pos[1], self.observation_space.low[1] + 0.1, 406 | self.observation_space.high[1] - 0.1) 407 | self.agent_pos[2] = np.clip(self.agent_pos[2], self.observation_space.low[2] + 0.1, 408 | self.observation_space.high[2] - 0.1) 409 | self.counter = 0 410 | 411 | return self.agent_pos 412 | 413 | def render(self, mode='human'): 414 | 415 | # Rendering for 3-D performance. Since pyglet did not support 3D we made two 2D views. 416 | if self.viewer is None: 417 | from gym.envs.classic_control import rendering 418 | self.viewer = rendering.Viewer(720, 288) 419 | self.viewer.set_bounds(-3.5*2, 3.6, -0.5*2, 2.5) 420 | 421 | # Define the rods for making quadrotor bodies 422 | rod_quad1 = rendering.make_capsule(self.quad_arms, 0.05) 423 | rod_quad2 = rendering.make_capsule(self.quad_arms, 0.05) 424 | rod_goalquad1 = rendering.make_capsule(self.quad_arms, 0.05) 425 | rod_goalquad2 = rendering.make_capsule(self.quad_arms, 0.05) 426 | # Roll is for the y-z plane 427 | rod_quadroll1 = rendering.make_capsule(self.quad_arms, 0.05) 428 | rod_quadroll2 = rendering.make_capsule(self.quad_arms, 0.05) 429 | rod_goalquadroll1 = rendering.make_capsule(self.quad_arms, 0.05) 430 | rod_goalquadroll2 = rendering.make_capsule(self.quad_arms, 0.05) 431 | 432 | # Draw the axis 433 | xaxis = rendering.Line((-6.8, 0), (0, 0)) 434 | xmetermin3 = rendering.Line((-6.4, 0), (-6.4, 0.05)) 435 | xmetermin2 = rendering.Line((-5.4, 0), (-5.4, 0.05)) 436 | xmetermin1 = rendering.Line((-4.4, 0), (-4.4, 0.05)) 437 | xmeter0 = rendering.Line((-3.4, -0.10), (-3.4, 0.10)) 438 | xmeter1 = rendering.Line((-2.4, 0), (-2.4, 0.05)) 439 | xmeter2 = rendering.Line((-1.4, 0), (-1.4, 0.05)) 440 | xmeter3 = rendering.Line((-0.4, 0), (-0.4, 0.05)) 441 | 442 | xaxisroll = rendering.Line((0, 0), (6.8, 0)) 443 | xmetermin1roll = rendering.Line((1, 0), (1, 0.05)) 444 | xmeter0roll = rendering.Line((2, -0.10), (2, 0.10)) 445 | xmeter1roll = rendering.Line((3, 0), (3, 0.05)) 446 | 447 | zaxis = rendering.Line((-6.8, 0), (-6.8, 2.4)) 448 | zmeter1 = rendering.Line((-6.8, 1), (-6.75, 1)) 449 | zmeter2 = rendering.Line((-6.8, 2), (-6.75, 2)) 450 | 451 | zaxisroll = rendering.Line((0.6, 0), (0.6, 2.4)) 452 | zmeter1roll = rendering.Line((0.6, 1), (0.65, 1)) 453 | zmeter2roll = rendering.Line((0.6, 2), (0.65, 2)) 454 | 455 | # Define the color for the quadrotor bodies 456 | rod_quad1.set_color(.8, .3, .3) 457 | rod_quad2.set_color(.8, .3, .3) 458 | rod_goalquad1.set_color(.4, .3, .3) 459 | rod_goalquad2.set_color(.4, .3, .3) 460 | 461 | rod_quadroll1.set_color(.4, .3, .3) 462 | rod_quadroll2.set_color(.4, .3, .3) 463 | rod_goalquadroll1.set_color(.2, .3, .3) 464 | rod_goalquadroll2.set_color(.2, .3, .3) 465 | 466 | # Define the circles on top of the rods 467 | quadcirc1 = rendering.make_capsule(.03, 0.05) 468 | quadcirc2 = rendering.make_capsule(.03, 0.05) 469 | quadcirc3 = rendering.make_capsule(.03, 0.05) 470 | goalquadcirc1 = rendering.make_capsule(.03, 0.05) 471 | goalquadcirc2 = rendering.make_capsule(.03, 0.05) 472 | goalquadcirc3 = rendering.make_capsule(.03, 0.05) 473 | 474 | quadcirc1roll = rendering.make_capsule(.03, 0.05) 475 | quadcirc2roll = rendering.make_capsule(.03, 0.05) 476 | quadcirc3roll = rendering.make_capsule(.03, 0.05) 477 | goalquadcirc1roll = rendering.make_capsule(.03, 0.05) 478 | goalquadcirc2roll = rendering.make_capsule(.03, 0.05) 479 | goalquadcirc3roll = rendering.make_capsule(.03, 0.05) 480 | 481 | # Define the circle colors on top of the rod 482 | quadcirc1.set_color(.3, .6, .3) 483 | quadcirc2.set_color(.3, .6, .3) 484 | quadcirc3.set_color(.3, .6, .3) 485 | goalquadcirc1.set_color(.3, .3, .3) 486 | goalquadcirc2.set_color(.3, .3, .3) 487 | goalquadcirc3.set_color(.3, .3, .3) 488 | 489 | quadcirc1roll.set_color(.3, .6, .3) 490 | quadcirc2roll.set_color(.3, .6, .3) 491 | quadcirc3roll.set_color(.3, .6, .3) 492 | goalquadcirc1roll.set_color(.3, .3, .3) 493 | goalquadcirc2roll.set_color(.3, .3, .3) 494 | goalquadcirc3roll.set_color(.3, .3, .3) 495 | 496 | # Define the transformations that need to happen at every timestep 497 | self.pole_transform = rendering.Transform() 498 | self.pole2_transform = rendering.Transform() 499 | self.goalpole_transform = rendering.Transform() 500 | self.goalpole2_transform = rendering.Transform() 501 | 502 | self.poleroll_transform = rendering.Transform() 503 | self.pole2roll_transform = rendering.Transform() 504 | self.goalpoleroll_transform = rendering.Transform() 505 | self.goalpole2roll_transform = rendering.Transform() 506 | 507 | self.circ1_transform = rendering.Transform() 508 | self.circ2_transform = rendering.Transform() 509 | self.circ3_transform = rendering.Transform() 510 | self.goalcirc1_transform = rendering.Transform() 511 | self.goalcirc2_transform = rendering.Transform() 512 | self.goalcirc3_transform = rendering.Transform() 513 | 514 | self.circ1roll_transform = rendering.Transform() 515 | self.circ2roll_transform = rendering.Transform() 516 | self.circ3roll_transform = rendering.Transform() 517 | self.goalcirc1roll_transform = rendering.Transform() 518 | self.goalcirc2roll_transform = rendering.Transform() 519 | self.goalcirc3roll_transform = rendering.Transform() 520 | 521 | # Assign the transformations to the defined rods and circles 522 | rod_quad1.add_attr(self.pole_transform) 523 | rod_quad2.add_attr(self.pole2_transform) 524 | rod_goalquad1.add_attr(self.goalpole_transform) 525 | rod_goalquad2.add_attr(self.goalpole2_transform) 526 | 527 | rod_quadroll1.add_attr(self.poleroll_transform) 528 | rod_quadroll2.add_attr(self.pole2roll_transform) 529 | rod_goalquadroll1.add_attr(self.goalpoleroll_transform) 530 | rod_goalquadroll2.add_attr(self.goalpole2roll_transform) 531 | 532 | quadcirc1.add_attr(self.circ1_transform) 533 | quadcirc2.add_attr(self.circ2_transform) 534 | quadcirc3.add_attr(self.circ3_transform) 535 | goalquadcirc1.add_attr(self.goalcirc1_transform) 536 | goalquadcirc2.add_attr(self.goalcirc2_transform) 537 | goalquadcirc3.add_attr(self.goalcirc3_transform) 538 | 539 | quadcirc1roll.add_attr(self.circ1roll_transform) 540 | quadcirc2roll.add_attr(self.circ2roll_transform) 541 | quadcirc3roll.add_attr(self.circ3roll_transform) 542 | goalquadcirc1roll.add_attr(self.goalcirc1roll_transform) 543 | goalquadcirc2roll.add_attr(self.goalcirc2roll_transform) 544 | goalquadcirc3roll.add_attr(self.goalcirc3roll_transform) 545 | 546 | # Draw the rods, circles and axis in the window defined at the beginning 547 | self.viewer.add_geom(rod_quad1) 548 | self.viewer.add_geom(rod_quad2) 549 | self.viewer.add_geom(rod_goalquad1) 550 | self.viewer.add_geom(rod_goalquad2) 551 | self.viewer.add_geom(xaxis) 552 | self.viewer.add_geom(xmetermin3) 553 | self.viewer.add_geom(xmetermin2) 554 | self.viewer.add_geom(xmetermin1) 555 | self.viewer.add_geom(xmeter0) 556 | self.viewer.add_geom(xmeter1) 557 | self.viewer.add_geom(xmeter2) 558 | self.viewer.add_geom(xmeter3) 559 | self.viewer.add_geom(zaxis) 560 | self.viewer.add_geom(zmeter1) 561 | self.viewer.add_geom(zmeter2) 562 | 563 | self.viewer.add_geom(rod_quadroll1) 564 | self.viewer.add_geom(rod_quadroll2) 565 | self.viewer.add_geom(rod_goalquadroll1) 566 | self.viewer.add_geom(rod_goalquadroll2) 567 | self.viewer.add_geom(xaxisroll) 568 | self.viewer.add_geom(xmetermin1roll) 569 | self.viewer.add_geom(xmeter0roll) 570 | self.viewer.add_geom(xmeter1roll) 571 | self.viewer.add_geom(zaxisroll) 572 | self.viewer.add_geom(zmeter1roll) 573 | self.viewer.add_geom(zmeter2roll) 574 | 575 | self.viewer.add_geom(quadcirc1) 576 | self.viewer.add_geom(quadcirc2) 577 | self.viewer.add_geom(quadcirc3) 578 | self.viewer.add_geom(goalquadcirc1) 579 | self.viewer.add_geom(goalquadcirc2) 580 | self.viewer.add_geom(goalquadcirc3) 581 | 582 | self.viewer.add_geom(quadcirc1roll) 583 | self.viewer.add_geom(quadcirc2roll) 584 | self.viewer.add_geom(quadcirc3roll) 585 | self.viewer.add_geom(goalquadcirc1roll) 586 | self.viewer.add_geom(goalquadcirc2roll) 587 | self.viewer.add_geom(goalquadcirc3roll) 588 | 589 | # Define how the rods and circles are transformed at every timestep 590 | self.pole_transform.set_rotation(-self.agent_pos[7]) 591 | self.pole_transform.set_translation(self.agent_pos[0]-3.4, self.agent_pos[2]) 592 | self.pole2_transform.set_rotation(-self.agent_pos[7] + pi) 593 | self.pole2_transform.set_translation(self.agent_pos[0]-3.4, self.agent_pos[2]) 594 | 595 | self.goalpole_transform.set_rotation(-self.goal_state[7]) 596 | self.goalpole_transform.set_translation(self.goal_state[0]-3.4, self.goal_state[2]) 597 | self.goalpole2_transform.set_rotation(-self.goal_state[7] + pi) 598 | self.goalpole2_transform.set_translation(self.goal_state[0]-3.4, self.goal_state[2]) 599 | 600 | self.poleroll_transform.set_rotation(-self.agent_pos[6]) 601 | self.poleroll_transform.set_translation(self.agent_pos[1]+2, self.agent_pos[2]) 602 | self.pole2roll_transform.set_rotation(-self.agent_pos[6] + pi) 603 | self.pole2roll_transform.set_translation(self.agent_pos[1]+2, self.agent_pos[2]) 604 | 605 | self.goalpoleroll_transform.set_rotation(-self.goal_state[6]) 606 | self.goalpoleroll_transform.set_translation(self.goal_state[1]+2, self.goal_state[2]) 607 | self.goalpole2roll_transform.set_rotation(-self.goal_state[6] + pi) 608 | self.goalpole2roll_transform.set_translation(self.goal_state[1]+2, self.goal_state[2]) 609 | 610 | self.circ1_transform.set_translation(self.agent_pos[0]-3.4 + self.quad_arms*cos(self.agent_pos[7]), 611 | self.agent_pos[2] - self.quad_arms*sin(self.agent_pos[7])) 612 | self.circ2_transform.set_translation(self.agent_pos[0]-3.4, 613 | self.agent_pos[2]) 614 | self.circ3_transform.set_translation(self.agent_pos[0]-3.4 - self.quad_arms*cos(self.agent_pos[7]), 615 | self.agent_pos[2] + self.quad_arms*sin(self.agent_pos[7])) 616 | self.circ1_transform.set_rotation(-self.agent_pos[7] - pi/2) 617 | self.circ2_transform.set_rotation(-self.agent_pos[7] - pi/2) 618 | self.circ3_transform.set_rotation(-self.agent_pos[7] - pi/2) 619 | 620 | self.goalcirc1_transform.set_translation(self.goal_state[0]-3.4 + self.quad_arms * cos(self.goal_state[7]), 621 | self.goal_state[2] - self.quad_arms * sin(self.goal_state[7])) 622 | self.goalcirc2_transform.set_translation(self.goal_state[0]-3.4, 623 | self.goal_state[2]) 624 | self.goalcirc3_transform.set_translation(self.goal_state[0]-3.4 - self.quad_arms * cos(self.goal_state[7]), 625 | self.goal_state[2] + self.quad_arms * sin(self.goal_state[7])) 626 | self.goalcirc1_transform.set_rotation(-self.goal_state[7] - pi / 2) 627 | self.goalcirc2_transform.set_rotation(-self.goal_state[7] - pi / 2) 628 | self.goalcirc3_transform.set_rotation(-self.goal_state[7] - pi / 2) 629 | 630 | self.circ1roll_transform.set_translation(self.agent_pos[1]+2 + self.quad_arms * cos(self.agent_pos[6]), 631 | self.agent_pos[2] - self.quad_arms * sin(self.agent_pos[6])) 632 | self.circ2roll_transform.set_translation(self.agent_pos[1] + 2, 633 | self.agent_pos[2]) 634 | self.circ3roll_transform.set_translation(self.agent_pos[1] + 2 - self.quad_arms * cos(self.agent_pos[6]), 635 | self.agent_pos[2] + self.quad_arms * sin(self.agent_pos[6])) 636 | self.circ1roll_transform.set_rotation(-self.agent_pos[6] - pi / 2) 637 | self.circ2roll_transform.set_rotation(-self.agent_pos[6] - pi / 2) 638 | self.circ3roll_transform.set_rotation(-self.agent_pos[6] - pi / 2) 639 | 640 | self.goalcirc1roll_transform.set_translation(self.goal_state[1] + 2 + self.quad_arms * cos(self.goal_state[6]), 641 | self.goal_state[2] - self.quad_arms * sin(self.goal_state[6])) 642 | self.goalcirc2roll_transform.set_translation(self.goal_state[1] + 2, 643 | self.goal_state[2]) 644 | self.goalcirc3roll_transform.set_translation(self.goal_state[1]+2 - self.quad_arms * cos(self.goal_state[6]), 645 | self.goal_state[2] + self.quad_arms * sin(self.goal_state[6])) 646 | self.goalcirc1roll_transform.set_rotation(-self.goal_state[6] - pi / 2) 647 | self.goalcirc2roll_transform.set_rotation(-self.goal_state[6] - pi / 2) 648 | self.goalcirc3roll_transform.set_rotation(-self.goal_state[6] - pi / 2) 649 | 650 | return self.viewer.render(return_rgb_array=mode == 'rgb_array') 651 | 652 | def close(self): 653 | if self.viewer: 654 | self.viewer.close() 655 | self.viewer = None 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | --------------------------------------------------------------------------------