├── Carsim.py ├── CarsimRL.mp4 ├── Carsimenv.slx ├── LICENSE ├── README.md ├── Readme.md ├── SimuCarsim.m ├── World.py ├── env.py ├── main.py └── requirements.txt /Carsim.py: -------------------------------------------------------------------------------- 1 | import matlab.engine # Import MATLAB engine for Python-MATLAB integration 2 | import numpy as np # Import NumPy for numerical operations 3 | 4 | 5 | class Carsim_world: 6 | """ 7 | Class representing the Carsim simulation world integrated with MATLAB Simulink. 8 | """ 9 | 10 | def __init__(self): 11 | """ 12 | Initialize the Carsim simulation environment. 13 | """ 14 | # Initial control values 15 | self.INI_CONTROL = np.array([0, 0, 0]) 16 | self.a_ini = self.INI_CONTROL.tolist() 17 | 18 | # Flag indicating whether Simulink is running 19 | self.simulink = False 20 | 21 | # Start MATLAB engine 22 | self.eng = matlab.engine.start_matlab() 23 | self.simulink = True 24 | 25 | # Initialize simulation state variables 26 | self.px = 0 # Longitudinal position 27 | self.py = 0 # Lateral position 28 | self.vx = 60 # Longitudinal velocity 29 | self.vy = 0 # Lateral velocity 30 | self.t = 0 # Simulation time 31 | self.r = 0 # Yaw rate 32 | self.yaw = 0 # Yaw angle 33 | self.done = 0 # Simulation done flag 34 | self.control = [] # Last applied control inputs 35 | 36 | def run(self): 37 | """ 38 | Stop the Simulink simulation. 39 | 40 | This method calls the Simulink model with the initial control values and a step flag set to 0. 41 | """ 42 | self.eng.SimuCarsim(self.a_ini, 0, nargout=0) 43 | 44 | def get_simulink(self): 45 | """ 46 | Load the necessary Simulink files and initialize the MATLAB engine. 47 | """ 48 | self.eng = matlab.engine.start_matlab() 49 | self.eng.load('Shanghai_center.mat') # Load the required data file 50 | 51 | def set_ini(self): 52 | """ 53 | Reset the Simulink model to its initial state. 54 | 55 | This method stops the Simulink simulation by applying zero control inputs. 56 | """ 57 | self.eng.SimuCarsim([0, 0, 0], 0, nargout=0) 58 | 59 | def apply_control(self, control): 60 | """ 61 | Step the Simulink model with the specified control inputs. 62 | 63 | Args: 64 | control: A list of control inputs [steer, throttle, brake]. 65 | """ 66 | (self.s, self.l, self.vx, self.vy, self.yaw, 67 | self.r, self.t, self.done) = self.eng.SimuCarsim(control, 1, nargout=8) 68 | self.control = control 69 | 70 | def get_location(self): 71 | """ 72 | Get the current position of the vehicle. 73 | 74 | Returns: 75 | tuple: Longitudinal and lateral positions (s, l). 76 | """ 77 | return self.s, self.l 78 | 79 | def get_velocity(self): 80 | """ 81 | Get the current velocity of the vehicle. 82 | 83 | Returns: 84 | tuple: Longitudinal and lateral velocities (vx, vy). 85 | """ 86 | return self.vx, self.vy 87 | 88 | def get_time(self): 89 | """ 90 | Get the current simulation time. 91 | 92 | Returns: 93 | float: Current simulation time. 94 | """ 95 | return self.t 96 | 97 | def get_yaw(self): 98 | """ 99 | Get the current yaw angle of the vehicle. 100 | 101 | Returns: 102 | float: Current yaw angle in radians. 103 | """ 104 | return self.yaw 105 | 106 | def get_yawrate(self): 107 | """ 108 | Get the current yaw rate of the vehicle. 109 | 110 | Returns: 111 | float: Current yaw rate in radians per second. 112 | """ 113 | return self.r 114 | 115 | def get_control(self): 116 | """ 117 | Get the last applied control inputs. 118 | 119 | Returns: 120 | list: The last applied control inputs. 121 | """ 122 | return self.control 123 | 124 | def get_done(self): 125 | """ 126 | Check if the simulation has reached a terminal state. 127 | 128 | Returns: 129 | bool: True if the simulation is done, False otherwise. 130 | """ 131 | return self.done == 1 132 | 133 | def tick(self): 134 | """ 135 | Placeholder method for future implementation of simulation ticking. 136 | """ 137 | pass 138 | -------------------------------------------------------------------------------- /CarsimRL.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sean-shiyuez/Carsim_python_RL/6d87ef278a4f4200ddba1b8b46136ee7cfd13fee/CarsimRL.mp4 -------------------------------------------------------------------------------- /Carsimenv.slx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sean-shiyuez/Carsim_python_RL/6d87ef278a4f4200ddba1b8b46136ee7cfd13fee/Carsimenv.slx -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2024 SEAN ZHAO 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with this program. If not, see . 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Readme 2 | 3 | --- 4 | 5 | # Carsim Python-MATLAB Integrated Simulation Environment 6 | 7 | This project integrates Python and MATLAB to create a simulation environment for vehicle dynamics using Carsim and Simulink. It allows reinforcement learning algorithms to interact with Carsim simulations seamlessly. 8 | 9 | ## Features 10 | 11 | - **Python-MATLAB Integration**: Utilizes MATLAB Engine for Python to communicate with Carsim simulations. 12 | - **Reinforcement Learning (RL) Support**: Compatible with stable-baselines3 for RL algorithms like SAC. 13 | - **Custom Environment**: Implements a custom Gym environment (`Drift_env`) for training vehicle control policies. 14 | - **Simulink Interface**: Facilitates real-time simulation and control using MATLAB's Simulink. 15 | - **High-Precision Dynamics**: Leverages Carsim's high-fidelity vehicle dynamics model for RL simulations. 16 | 17 | --- 18 | 19 | ## Requirements 20 | 21 | ### Python Dependencies 22 | 23 | The required Python packages are listed in `requirements.txt`: 24 | 25 | ``` 26 | numpy==1.21.0 27 | matplotlib==3.5.0 28 | pandas==1.3.0 29 | gym==0.21.0 30 | stable-baselines3==1.6.0 31 | ``` 32 | 33 | ### MATLAB Engine for Python 34 | 35 | You must install MATLAB Engine for Python to enable communication with MATLAB. Refer to the **Installation Instructions** below. 36 | 37 | --- 38 | 39 | ## Installation 40 | 41 | ### Step 1: Clone the Repository 42 | 43 | ```bash 44 | git clone 45 | cd carsim-simulation 46 | ``` 47 | 48 | ### Step 2: Install Python Dependencies 49 | 50 | Use pip to install the dependencies: 51 | 52 | ```bash 53 | pip install -r requirements.txt 54 | ``` 55 | 56 | ### Step 3: Install MATLAB Engine for Python 57 | 58 | 1. Navigate to the MATLAB installation directory: 59 | - **Windows**: `C:\\Program Files\\MATLAB\\R\\extern\\engines\\python` 60 | - **macOS/Linux**: `/usr/local/MATLAB/R/extern/engines/python` 61 | 2. Run the installation script: 62 | 63 | ```bash 64 | python setup.py install 65 | ``` 66 | 67 | 3. Verify the installation: 68 | 69 | ```python 70 | import matlab.engine 71 | eng = matlab.engine.start_matlab() 72 | print("MATLAB Engine installed and connected successfully!") 73 | eng.quit() 74 | ``` 75 | 76 | 77 | --- 78 | 79 | ## Usage 80 | 81 | ### Running the Simulation 82 | 83 | 1. Ensure Carsim and Simulink models are configured properly (e.g., `Carsimenv` is loaded). 84 | 2. Run the main Python script: 85 | 86 | ```bash 87 | python main.py 88 | ``` 89 | 90 | 91 | ### Training RL Models 92 | 93 | The script integrates `stable-baselines3` for training RL policies. Modify the parameters in `main.py` to customize the training setup (e.g., learning rate, batch size, etc.). 94 | 95 | ### Simulink Requirements 96 | 97 | Ensure the Simulink model (`Carsimenv`) is correctly linked to the Carsim S-Function. Troubleshoot any missing paths or files in Simulink if errors occur. 98 | 99 | --- 100 | 101 | ## Known Issues 102 | 103 | - **MATLAB-Python Interaction Speed**: The interaction speed between MATLAB and Python might be a bottleneck for real-time applications. Contributions to optimize this aspect are welcome. 104 | - **Timestamp Alignment**: Ensure time synchronization between Python and Simulink when processing simulation results. 105 | 106 | --- 107 | 108 | ## File Structure 109 | 110 | ``` 111 | ├── env.py # Custom Gym environment 112 | ├── world.py # Simulation environment setup 113 | ├── Carsim.py # Carsim and MATLAB interaction 114 | ├── main.py # Main script to run the simulation 115 | ├── requirements.txt # Python dependencies 116 | ├── README.md # Project documentation 117 | └── Carsimenv.slx # Simulink model file 118 | ``` 119 | 120 | --- 121 | 122 | ## Example 123 | 124 | A video demonstrating the simulation process is provided. Ensure to check the timestamp alignment between Carsim and Python outputs. 125 | 126 | --- 127 | 128 | ## Credits 129 | 130 | While this project partially modifies the code from [auto-drift](https://github.com/angloth/auto-drift), its primary purpose differs: 131 | 132 | - **Focus on High-Precision Dynamics**: This project emphasizes the use of Carsim's high-precision vehicle dynamics for reinforcement learning simulations, whereas `auto-drift` focuses more on drift control scenarios. 133 | - **Integration with MATLAB and Simulink**: This project integrates MATLAB and Simulink, enabling advanced simulation and control capabilities not present in the original project. 134 | - **Versatile RL Applications**: Beyond drift scenarios, this project supports broader applications in vehicle control and safety-critical scenarios. 135 | 136 | --- 137 | 138 | ## Contributing 139 | 140 | 1. Fork the repository. 141 | 2. Create a feature branch (`git checkout -b feature-name`). 142 | 3. Commit changes (`git commit -m "Add feature"`). 143 | 4. Push to your branch (`git push origin feature-name`). 144 | 5. Open a Pull Request. 145 | 146 | --- 147 | 148 | ## License 149 | 150 | This project is licensed under the GPL License. See the `LICENSE` file for details. 151 | 152 | --- 153 | 154 | ## Acknowledgements 155 | 156 | - [Carsim](https://www.carsim.com/) for providing advanced vehicle simulation. 157 | - [MATLAB](https://www.mathworks.com/products/matlab.html) for its robust engineering tools. 158 | - [stable-baselines3](https://github.com/DLR-RM/stable-baselines3) for reinforcement learning support. 159 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Readme 2 | 3 | --- 4 | 5 | # Carsim Python-MATLAB Integrated Simulation Environment 6 | 7 | This project integrates Python and MATLAB to create a simulation environment for vehicle dynamics using Carsim and Simulink. It allows reinforcement learning algorithms to interact with Carsim simulations seamlessly. 8 | 9 | ## Features 10 | 11 | - **Python-MATLAB Integration**: Utilizes MATLAB Engine for Python to communicate with Carsim simulations. 12 | - **Reinforcement Learning (RL) Support**: Compatible with stable-baselines3 for RL algorithms like SAC. 13 | - **Custom Environment**: Implements a custom Gym environment (`Drift_env`) for training vehicle control policies. 14 | - **Simulink Interface**: Facilitates real-time simulation and control using MATLAB's Simulink. 15 | - **High-Precision Dynamics**: Leverages Carsim's high-fidelity vehicle dynamics model for RL simulations. 16 | 17 | --- 18 | 19 | ## Requirements 20 | 21 | ### Python Dependencies 22 | 23 | The required Python packages are listed in `requirements.txt`: 24 | 25 | ``` 26 | numpy==1.21.0 27 | matplotlib==3.5.0 28 | pandas==1.3.0 29 | gym==0.21.0 30 | stable-baselines3==1.6.0 31 | ``` 32 | 33 | ### MATLAB Engine for Python 34 | 35 | You must install MATLAB Engine for Python to enable communication with MATLAB. Refer to the **Installation Instructions** below. 36 | 37 | --- 38 | 39 | ## Installation 40 | 41 | ### Step 1: Clone the Repository 42 | 43 | ```bash 44 | git clone 45 | cd carsim-simulation 46 | ``` 47 | 48 | ### Step 2: Install Python Dependencies 49 | 50 | Use pip to install the dependencies: 51 | 52 | ```bash 53 | pip install -r requirements.txt 54 | ``` 55 | 56 | ### Step 3: Install MATLAB Engine for Python 57 | 58 | 1. Navigate to the MATLAB installation directory: 59 | - **Windows**: `C:\\Program Files\\MATLAB\\R\\extern\\engines\\python` 60 | - **macOS/Linux**: `/usr/local/MATLAB/R/extern/engines/python` 61 | 2. Run the installation script: 62 | 63 | ```bash 64 | python setup.py install 65 | ``` 66 | 67 | 3. Verify the installation: 68 | 69 | ```python 70 | import matlab.engine 71 | eng = matlab.engine.start_matlab() 72 | print("MATLAB Engine installed and connected successfully!") 73 | eng.quit() 74 | ``` 75 | 76 | 77 | --- 78 | 79 | ## Usage 80 | 81 | ### Running the Simulation 82 | 83 | 1. Ensure Carsim and Simulink models are configured properly (e.g., `Carsimenv` is loaded). 84 | 2. Run the main Python script: 85 | 86 | ```bash 87 | python main.py 88 | ``` 89 | 90 | 91 | ### Training RL Models 92 | 93 | The script integrates `stable-baselines3` for training RL policies. Modify the parameters in `main.py` to customize the training setup (e.g., learning rate, batch size, etc.). 94 | 95 | ### Simulink Requirements 96 | 97 | Ensure the Simulink model (`Carsimenv`) is correctly linked to the Carsim S-Function. Troubleshoot any missing paths or files in Simulink if errors occur. 98 | 99 | --- 100 | 101 | ## Known Issues 102 | 103 | - **MATLAB-Python Interaction Speed**: The interaction speed between MATLAB and Python might be a bottleneck for real-time applications. Contributions to optimize this aspect are welcome. 104 | - **Timestamp Alignment**: Ensure time synchronization between Python and Simulink when processing simulation results. 105 | 106 | --- 107 | 108 | ## File Structure 109 | 110 | ``` 111 | ├── env.py # Custom Gym environment 112 | ├── world.py # Simulation environment setup 113 | ├── Carsim.py # Carsim and MATLAB interaction 114 | ├── main.py # Main script to run the simulation 115 | ├── requirements.txt # Python dependencies 116 | ├── README.md # Project documentation 117 | └── Carsimenv.slx # Simulink model file 118 | ``` 119 | 120 | --- 121 | 122 | ## Example 123 | 124 | A video demonstrating the simulation process is provided. Ensure to check the timestamp alignment between Carsim and Python outputs. 125 | 126 | --- 127 | 128 | ## Credits 129 | 130 | While this project partially modifies the code from [auto-drift](https://github.com/angloth/auto-drift), its primary purpose differs: 131 | 132 | - **Focus on High-Precision Dynamics**: This project emphasizes the use of Carsim's high-precision vehicle dynamics for reinforcement learning simulations, whereas `auto-drift` focuses more on drift control scenarios. 133 | - **Integration with MATLAB and Simulink**: This project integrates MATLAB and Simulink, enabling advanced simulation and control capabilities not present in the original project. 134 | - **Versatile RL Applications**: Beyond drift scenarios, this project supports broader applications in vehicle control and safety-critical scenarios. 135 | 136 | --- 137 | 138 | ## Contributing 139 | 140 | 1. Fork the repository. 141 | 2. Create a feature branch (`git checkout -b feature-name`). 142 | 3. Commit changes (`git commit -m "Add feature"`). 143 | 4. Push to your branch (`git push origin feature-name`). 144 | 5. Open a Pull Request. 145 | 146 | --- 147 | 148 | ## License 149 | 150 | This project is licensed under the MIT License. See the `LICENSE` file for details. 151 | 152 | --- 153 | 154 | ## Reference: 155 | 156 | If you're exploring the theoretical background, consider reading **S. Zhao et al., "A Harmonized Approach: Beyond-the-Limit Control for Autonomous Vehicles Balancing Performance and Safety in Unpredictable Environments," in IEEE Transactions on Intelligent Transportation Systems, vol. 25, no. 11, pp. 15827-15840, Nov. 2024, doi: 10.1109/TITS.2024.3419108.** 157 | 158 | ## Acknowledgements 159 | 160 | - [Carsim](https://www.carsim.com/) for providing advanced vehicle simulation. 161 | - [MATLAB](https://www.mathworks.com/products/matlab.html) for its robust engineering tools. 162 | - [stable-baselines3](https://github.com/DLR-RM/stable-baselines3) for reinforcement learning support. -------------------------------------------------------------------------------- /SimuCarsim.m: -------------------------------------------------------------------------------- 1 | function [s, l, vx, vy, yaw, r, t, done] = carenv(action, cmd) 2 | % Function to communicate with Simulink environment for Carsim simulation 3 | % 4 | % Args: 5 | % action: Control action [steering, throttle] (cell array, converted to matrix) 6 | % cmd: Command flag (0 for reset, 1 for step simulation) 7 | % 8 | % Returns: 9 | % s: Longitudinal position 10 | % l: Lateral position 11 | % vx: Longitudinal velocity 12 | % vy: Lateral velocity 13 | % yaw: Yaw angle 14 | % r: Yaw rate 15 | % t: Simulation time 16 | % done: Boolean indicating if the simulation is complete 17 | 18 | action = cell2mat(action); % Convert cell array to matrix 19 | validate_action(action); % Validate action inputs 20 | 21 | %% Reset simulation if cmd == 0 22 | if cmd == 0 23 | [clock, done_br] = reset_simulation(); 24 | else 25 | [clock, done_br] = step_simulation(action); 26 | end 27 | 28 | % Extract simulation state variables 29 | px = clock(1); % Longitudinal position 30 | py = clock(2); % Lateral position 31 | vx = clock(3); % Longitudinal velocity 32 | vy = clock(4); % Lateral velocity 33 | yaw = clock(5); % Yaw angle 34 | r = clock(6); % Yaw rate 35 | t = clock(7); % Simulation time 36 | 37 | % Convert Cartesian coordinates to longitudinal and lateral positions 38 | [s, l] = xy2sl(px, py); 39 | 40 | % Determine if the simulation is complete 41 | done = calculate_done(done_br, t, l, vx, vy); 42 | end 43 | 44 | function validate_action(action) 45 | % Validate action inputs to ensure they are within the expected range 46 | if any(action < -1) || any(action > 1) 47 | error('Action inputs must be between -1 and 1.'); 48 | end 49 | end 50 | 51 | function [clock, done_br] = reset_simulation() 52 | % Reset the Simulink simulation environment 53 | global info; 54 | info = [0, 0, 0, 0, -1, 0, 0, 0]; % Initialize state info 55 | pause(5); % Pause for stability 56 | 57 | try 58 | load_system('Carsimenv'); % Load Simulink system 59 | set_param('Carsimenv', 'SimulationCommand', 'stop'); % Stop simulation 60 | set_param('Carsimenv', 'SimulationCommand', 'start'); % Start simulation 61 | 62 | % Retry if the simulation does not start properly 63 | if info(end, 7) ~= 0.1 64 | pause(0.5); 65 | set_param('Carsimenv', 'SimulationCommand', 'start'); 66 | end 67 | done_br = 0; 68 | clock = [0, 0, 0, 0, -1, 0, 0, 0]; % Reset state clock 69 | catch ME 70 | error('Failed to reset Simulink environment: %s', ME.message); 71 | end 72 | end 73 | 74 | function [clock, done_br] = step_simulation(action) 75 | % Step the Simulink simulation with the given action inputs 76 | t_be = evalin('caller', 'info(end, 7)'); % Retrieve previous simulation time 77 | set_param('Carsimenv/F_angle', 'Value', num2str(action(1))); % Set steering input 78 | set_param('Carsimenv/Speedcontrol', 'Value', num2str(action(2))); % Set throttle input 79 | set_param('Carsimenv', 'SimulationCommand', 'continue'); % Step simulation 80 | 81 | % Wait for simulation to advance 82 | t_now = evalin('caller', 'info(end, 7)'); 83 | paus = 0; 84 | done_br = 0; 85 | 86 | while t_now == t_be 87 | t_now = evalin('caller', 'info(end, 7)'); 88 | pause(0.1); 89 | paus = paus + 0.1; 90 | if paus >= 30 91 | % Stop simulation if it hangs 92 | set_param('Carsimenv', 'SimulationCommand', 'stop'); 93 | done_br = 1; 94 | break; 95 | end 96 | end 97 | 98 | clock = evalin('caller', 'info(end, :)'); % Retrieve current state info 99 | end 100 | 101 | function done = calculate_done(done_br, t, l, vx, vy) 102 | % Determine if the simulation is complete 103 | MAX_TIME = 15; % Maximum simulation time 104 | MAX_LATERAL_DEVIATION = 10; % Maximum lateral deviation 105 | MIN_SPEED = 1; % Minimum allowable speed 106 | 107 | if done_br == 1 || t >= MAX_TIME || abs(l) >= MAX_LATERAL_DEVIATION || sqrt(vx^2 + vy^2) <= MIN_SPEED 108 | done = 1; 109 | else 110 | done = 0; 111 | end 112 | end 113 | -------------------------------------------------------------------------------- /World.py: -------------------------------------------------------------------------------- 1 | import numpy as np # Import NumPy for numerical operations 2 | 3 | 4 | # ============================================================================== 5 | # -- Helper Functions ---------------------------------------------------------- 6 | # ============================================================================== 7 | 8 | def get_actor_display_name(actor, truncate=250): 9 | """ 10 | Get the display name of an actor. 11 | 12 | Args: 13 | actor: The actor object. 14 | truncate: Maximum length of the display name. 15 | 16 | Returns: 17 | str: Truncated display name of the actor. 18 | """ 19 | name = ' '.join(actor.type_id.replace('_', '.').title().split('.')[1:]) 20 | return (name[:truncate - 1] + u'\u2026') if len(name) > truncate else name 21 | 22 | 23 | # ============================================================================== 24 | # -- Carsim World --------------------------------------------------------------- 25 | # ============================================================================== 26 | 27 | class world1: 28 | """ 29 | Class representing the surrounding simulation environment. 30 | """ 31 | 32 | def __init__(self, carsim_world): 33 | """ 34 | Initialize the Carsim world. 35 | 36 | Args: 37 | carsim_world: The Carsim simulation world instance. 38 | """ 39 | self.INI_CONTROL = np.array([0, 0, 0]) # Initial control values 40 | self.ini_a = self.INI_CONTROL.tolist() # Convert initial control to list 41 | self.spawn_point = None # Spawn point for the player (not used currently) 42 | self.world = carsim_world # Reference to the Carsim world 43 | self.player = None # Placeholder for the player object 44 | self.recording_enabled = False # Flag for recording state 45 | self.recording_start = 0 # Starting point for recording 46 | self.name = 'SimulCarsim1' # Name of the simulation environment 47 | 48 | # Initialize the environment 49 | self.restart() 50 | 51 | def reset_player(self): 52 | """ 53 | Reset the player in the simulation. 54 | 55 | Stops the Simulink simulation and applies initial control to restart. 56 | """ 57 | if self.player: 58 | self.player.run() # Ensure the simulation is running 59 | self.player.apply_control(self.ini_a) # Apply initial control 60 | 61 | def restart(self): 62 | """ 63 | Restart the simulation environment. 64 | 65 | Spawns the player object and links it to the simulation world. 66 | """ 67 | if self.player is not None: 68 | self.reset_player() 69 | 70 | # Ensure the player is initialized 71 | while self.player is None: 72 | # Attempt to link the player with the simulation world 73 | self.world.get_simulink(self) # Get Simulink instance 74 | 75 | # Assign Carsim world to the player 76 | self.player = self.world() 77 | 78 | # ============================================================================== 79 | -------------------------------------------------------------------------------- /env.py: -------------------------------------------------------------------------------- 1 | import gym # Import the OpenAI Gym library for environment creation 2 | import wandb # Import the Weights and Biases library for logging (optional, currently unused) 3 | from gym import spaces # Import spaces for defining action and observation spaces 4 | from math import sin, cos, pi, acos, radians, degrees, exp, pow # Import mathematical functions 5 | import numpy as np # Import NumPy for numerical operations 6 | import time # Import time for managing delays and ticks 7 | import pandas as pd # Import pandas for handling logs and dataframes 8 | from statistics import mean # Import mean calculation from statistics module 9 | import matplotlib.pyplot as plt # Import Matplotlib for plotting 10 | 11 | # Define constants for the state space 12 | NUM_OBS = 6 # Number of observations 13 | MAX_YAW = 2 * pi # Maximum yaw angle (radians) 14 | MAX_T = 130 # Maximum time 15 | MAX_R = 2 # Maximum yaw rate (radians/second) 16 | MAX_S = 800 # Maximum longitudinal position 17 | MAX_L = 6 # Maximum lateral position 18 | MAX_VX = 150 # Maximum longitudinal velocity 19 | MAX_VY = 80 # Maximum lateral velocity 20 | MAX_ACC = 20 # Maximum acceleration 21 | MAX_STEER = 90 # Maximum steering angle (degrees) 22 | 23 | MIN_S = 0 # Minimum longitudinal position 24 | MIN_L = -6 # Minimum lateral position 25 | MIN_VX = 0 # Minimum longitudinal velocity 26 | MIN_VY = -80 # Minimum lateral velocity 27 | MIN_YAW = -2 * pi # Minimum yaw angle 28 | MIN_R = -2 # Minimum yaw rate 29 | 30 | # Define constants for the reward system 31 | R_OOB = 5000 # Reward for successfully completing the episode 32 | R_OOF = -500 # Penalty for going out of bounds 33 | R_OOT = 0 # Reward for other situations 34 | 35 | # Define a helper constant 36 | PI_2 = 2 * pi # Two times pi 37 | 38 | # Define the custom environment class 39 | class Drift_env(gym.Env): 40 | metadata = {'render.modes': ['gui', 'none']} # Metadata for rendering modes 41 | 42 | def __init__(self, world, v_ini, L_road, max_episode_iters=1000): 43 | """ 44 | Initialize the environment. 45 | 46 | Args: 47 | world: Simulation world instance. 48 | v_ini: Initial velocity. 49 | L_road: Lateral road tolerance. 50 | max_episode_iters: Maximum number of iterations per episode. 51 | """ 52 | self._world = world 53 | self.v_ini = v_ini 54 | self.py_tol = L_road 55 | 56 | # Define action space: [steering, throttle] 57 | self.action_space = spaces.Box(low=-1, high=1, shape=(2,)) 58 | 59 | # Define observation space: [vx, vy, yaw, r, px, py] 60 | self.observation_space = spaces.Box(low=-1, high=1, shape=(NUM_OBS,)) 61 | 62 | # Define observation range 63 | self._obs_max_vals = np.array([MAX_S, MAX_L, MAX_VX, MAX_VY, MAX_YAW, MAX_R]) 64 | self._obs_min_vals = np.array([MIN_S, MIN_L, MIN_VX, MIN_VY, MIN_YAW, MIN_R]) 65 | 66 | # Initialize variables for tracking state and rewards 67 | self._velocity = None 68 | self._delta_phi = None 69 | self._reward = None 70 | self._py = None 71 | 72 | # Episode management 73 | self.max_episode_iters = max_episode_iters 74 | self.iters = 0 75 | self.global_tick = 0 76 | 77 | # Logging data 78 | self.ticks = [] 79 | self.rewards = [] 80 | self.velocities = [] 81 | 82 | # Recent values for smoothing logs 83 | self.last_10_betas = [] 84 | self.last_10_epsilons = [] 85 | self.last_10_rewards = [] 86 | self.last_10_velocities = [] 87 | 88 | # Training mode flag 89 | self.is_training = False 90 | self.control = [] 91 | self._action1 = [1, 1] 92 | 93 | # Reset the environment 94 | self.reset() 95 | 96 | def log(self): 97 | """ Log the recent rewards and velocities. """ 98 | self.last_10_rewards.append(self._reward) 99 | self.last_10_velocities.append(self._velocity) 100 | 101 | if len(self.last_10_betas) == 10: 102 | self.ticks.append(self.global_tick) 103 | 104 | self.rewards.append(mean(self.last_10_rewards)) 105 | self.velocities.append(mean(self.last_10_velocities)) 106 | 107 | self.last_10_rewards = [] 108 | self.last_10_velocities = [] 109 | 110 | def plot_logs(self): 111 | """ Plot the logged rewards and velocities over time. """ 112 | vals = [self.rewards, self.velocities] 113 | names = ["Reward", "Velocity"] 114 | 115 | for i in range(2): 116 | t = self.ticks 117 | s = vals[i] 118 | name = names[i] 119 | 120 | fig, ax = plt.subplots() 121 | ax.plot(t, s) 122 | ax.set(xlabel='Timestep', title=name) 123 | ax.grid() 124 | 125 | fig.savefig(name + ".png") 126 | 127 | def save_logs(self): 128 | """ Save the logged data to a CSV file. """ 129 | d = {'tick': self.ticks, 'rewards': self.rewards, 'vels': self.velocities} 130 | df = pd.DataFrame(data=d) 131 | df.to_csv("data.csv") 132 | 133 | def step(self, action): 134 | """ 135 | Execute a single step in the environment. 136 | 137 | Args: 138 | action: The action to apply. 139 | 140 | Returns: 141 | obs: The new observation. 142 | reward: The reward for the step. 143 | done: Whether the episode has finished. 144 | info: Additional information. 145 | """ 146 | self._apply_action(action) 147 | self._action1 = action 148 | 149 | obs = self._extract_obs() 150 | reward = self._calc_reward() 151 | self._reward = reward 152 | 153 | done = self._calc_done() 154 | info = {} 155 | 156 | self.iters += 1 157 | self.global_tick += 1 158 | 159 | if self.is_training: 160 | self.log() 161 | 162 | print('Reward:', reward) 163 | 164 | return obs, reward, done, info 165 | 166 | def _apply_action(self, action): 167 | """ Apply the given action to the simulation world. """ 168 | self.control = action.tolist() 169 | self._world.player.apply_control(self.control) 170 | 171 | def _extract_obs(self, debug=True): 172 | """ Extract observations from the simulation world. """ 173 | Carsim_pos = self._world.player.get_location() 174 | Carsim_vel = self._world.player.get_velocity() 175 | Carsim_t = self._world.player.get_time() 176 | Carsim_r = self._world.player.get_yawrate() 177 | Carsim_rot = self._world.player.get_yaw() 178 | Carsim_done = self._world.player.get_done() 179 | 180 | pos_global = np.array([Carsim_pos[0], Carsim_pos[1]]) 181 | vel_global = np.array([Carsim_vel[0], Carsim_vel[1]]) 182 | yaw_global = radians(Carsim_rot) 183 | r = radians(Carsim_r) 184 | t_global = np.array(Carsim_t) 185 | 186 | s, l = pos_global 187 | v_y, v_x = vel_global 188 | yaw = yaw_global 189 | done = Carsim_done 190 | 191 | obs = np.array([s, l, v_x, v_y, yaw, r]) 192 | 193 | norm_obs = np.array([-1 + 2 * (obs[i] - self._obs_min_vals[i]) / (self._obs_max_vals[i] - self._obs_min_vals[i]) for i in range(NUM_OBS)]) 194 | 195 | self._velocity = np.linalg.norm(vel_global) 196 | self._s = s 197 | self._l = l 198 | self._vx = v_x 199 | self._vy = v_y 200 | self._yaw = yaw 201 | self._t = t_global 202 | self._done = done 203 | 204 | return norm_obs 205 | 206 | def _calc_reward(self): 207 | """ Calculate the reward for the current state. """ 208 | pass # Define your reward calculation logic here 209 | 210 | def _calc_done(self): 211 | """ Determine if the episode is complete. """ 212 | return self._done 213 | 214 | def reset(self): 215 | """ Reset the environment for a new episode. """ 216 | self.iters = 0 217 | self._world.reset_player() 218 | 219 | return self._extract_obs() 220 | 221 | def render(self, mode='none', close=False): 222 | """ Render the environment. """ 223 | if mode == 'none': 224 | pass 225 | elif mode == 'gui': 226 | pass # Implement GUI rendering if needed 227 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from env import Drift_env # Import the custom drift environment class 2 | from World import world1 # Import the function to generate the world 3 | from Carsim import Carsim_world # Import the CarSim simulation world 4 | from stable_baselines3 import SAC # Import the SAC algorithm 5 | from stable_baselines3.common.callbacks import CheckpointCallback # Import the callback for saving model checkpoints 6 | 7 | # Define the main function 8 | def main(): 9 | # Initialize the simulation world 10 | # world1 creates a world instance integrated with CarSim 11 | world = world1(Carsim_world) 12 | 13 | # Create the reinforcement learning environment 14 | # Drift_env is a custom environment that simulates 60 seconds with a 10ms step size 15 | env = Drift_env(world, simulation_time=60, step_time=10) 16 | 17 | # Initialize the reinforcement learning model 18 | # SAC uses a multilayer perceptron policy ("MlpPolicy") with specified parameters 19 | model = SAC( 20 | "MlpPolicy", # Type of policy 21 | env, # The associated environment 22 | verbose=0, # Log output level, 0 means no output 23 | device="auto", # Automatically choose GPU or CPU 24 | learning_rate=1e-5, # Learning rate 25 | tensorboard_log="./SAC_tensorboard/", # Path to save TensorBoard logs 26 | batch_size=256 # Batch size for training 27 | ) 28 | 29 | # Create a callback for saving model checkpoints 30 | # Save the model every 5000 timesteps 31 | checkpoint_callback = CheckpointCallback( 32 | save_freq=5000, # Frequency of saving 33 | save_path='./models/', # Path to save models 34 | name_prefix='rl_model' # Prefix for model filenames 35 | ) 36 | 37 | # Start training the model 38 | model.learn( 39 | total_timesteps=int(1e5), # Total training timesteps 40 | callback=checkpoint_callback, # Callback function 41 | reset_num_timesteps=False, # Do not reset the timestep counter 42 | tb_log_name="1124_run" # Name for TensorBoard logs 43 | ) 44 | 45 | # Save the final trained model 46 | model.save("Vehicle_Drift_test_1") 47 | 48 | # Save training logs 49 | env.save_logs() 50 | 51 | # Set the training flag to False 52 | env.is_training = False 53 | 54 | # Reset the environment for testing 55 | obs = env.reset() # Reset the drift environment, restarting the world 56 | 57 | # Test the trained model 58 | while True: 59 | action, _states = model.predict(obs, deterministic=False) # Predict actions using the model 60 | obs, rewards, done, info = env.step(action) # Take action and get new state 61 | env.render() # Render the environment 62 | if done: # Check if the episode is finished 63 | obs = env.reset() # Reset the environment after completion 64 | break 65 | 66 | # Ensure the script executes the main function only when run directly 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.0 2 | matplotlib==3.5.0 3 | pandas==1.3.0 4 | gym==0.21.0 5 | stable-baselines3==1.6.0 6 | 7 | # MATLAB Engine for Python (installation guide provided separately) 8 | --------------------------------------------------------------------------------