├── .all-contributorsrc
├── README.md
├── emitterReceiverSchemeTutorial
├── README.md
├── full_project
│ ├── controllers
│ │ ├── robot_controller
│ │ │ └── robot_controller.py
│ │ └── supervisor_controller
│ │ │ ├── PPO_agent.py
│ │ │ ├── cartpole_robot_definition.txt
│ │ │ ├── supervisor_controller.py
│ │ │ └── utilities.py
│ └── worlds
│ │ └── cartpole_world.wbt
└── images
│ ├── 10_new_controllers_created.png
│ ├── 11_assign_supervisor_controller_1.png
│ ├── 12_assign_supervisor_controller_2.png
│ ├── 13_workflow_diagram.png
│ ├── 14_click_play.png
│ ├── 1_new_proj_menu.png
│ ├── 2_world_settings.png
│ ├── 3_project_created.png
│ ├── 4_reload_world.png
│ ├── 5_add_new_object.png
│ ├── 6_add_robot_node.png
│ ├── 7_set_supervisor_true.png
│ ├── 8_click_save_button.png
│ ├── 9_new_controller_menu.png
│ └── cartPoleWorld.gif
└── robotSupervisorSchemeTutorial
├── README.md
├── full_project
├── controllers
│ └── robot_supervisor_controller
│ │ ├── PPO_agent.py
│ │ ├── cartpole_robot_definition.txt
│ │ ├── robot_supervisor_controller.py
│ │ └── utilities.py
└── worlds
│ └── cartpole_world.wbt
└── images
├── 10_assign_supervisor_controller_2.png
├── 11_click_play.png
├── 1_new_proj_menu.png
├── 2_world_settings.png
├── 3_project_created.png
├── 4_reload_world.png
├── 5_set_supervisor_true.png
├── 6_click_save_button.png
├── 7_new_controller_menu.png
├── 8_new_controllers_created.png
├── 9_assign_supervisor_controller_1.png
└── cartPoleWorld.gif
/.all-contributorsrc:
--------------------------------------------------------------------------------
1 | {
2 | "files": [
3 | "README.md"
4 | ],
5 | "imageSize": 100,
6 | "commit": false,
7 | "contributors": [
8 | {
9 | "login": "tsampazk",
10 | "name": "Kostas Tsampazis",
11 | "avatar_url": "https://avatars.githubusercontent.com/u/27914645?v=4",
12 | "profile": "https://github.com/tsampazk",
13 | "contributions": [
14 | "tutorial",
15 | "projectManagement",
16 | "maintenance",
17 | "ideas"
18 | ]
19 | },
20 | {
21 | "login": "DataDrover214530",
22 | "name": "DataDrover214530",
23 | "avatar_url": "https://avatars.githubusercontent.com/u/29627216?v=4",
24 | "profile": "https://github.com/DataDrover214530",
25 | "contributions": [
26 | "bug"
27 | ]
28 | },
29 | {
30 | "login": "JadarTheObscurity",
31 | "name": "JadarTheObscurity",
32 | "avatar_url": "https://avatars.githubusercontent.com/u/62043377?v=4",
33 | "profile": "https://github.com/JadarTheObscurity",
34 | "contributions": [
35 | "ideas"
36 | ]
37 | },
38 | {
39 | "login": "ManosMagnus",
40 | "name": "Manos Kirtas",
41 | "avatar_url": "https://avatars.githubusercontent.com/u/10010230?v=4",
42 | "profile": "http://eakirtas.webpages.auth.gr/",
43 | "contributions": [
44 | "projectManagement",
45 | "maintenance",
46 | "ideas"
47 | ]
48 | },
49 | {
50 | "login": "Piyush-555",
51 | "name": "Piyush Raikwar",
52 | "avatar_url": "https://avatars.githubusercontent.com/u/34499999?v=4",
53 | "profile": "http://piyush-555.github.io",
54 | "contributions": [
55 | "bug"
56 | ]
57 | },
58 | {
59 | "login": "caozx1110",
60 | "name": "生石灰",
61 | "avatar_url": "https://avatars.githubusercontent.com/u/78025946?v=4",
62 | "profile": "https://github.com/caozx1110",
63 | "contributions": [
64 | "bug"
65 | ]
66 | }
67 | ],
68 | "contributorsPerLine": 7,
69 | "projectName": "deepbots-tutorials",
70 | "projectOwner": "aidudezzz",
71 | "repoType": "github",
72 | "repoHost": "https://github.com",
73 | "skipCi": true
74 | }
75 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | [](#contributors-)
7 |
8 |
9 | This repository contains all official tutorials created for the
10 | [deepbots framework](https://github.com/aidudezzz/deepbots) listed below:
11 |
12 | - [CartPole beginner tutorial using the robot-supervisor scheme](/robotSupervisorSchemeTutorial/README.md):
13 |
Illustrates step-by-step the creation of a [CartPole](https://www.gymlibrary.dev/environments/classic_control/cart_pole/) problem in
14 | [Webots](https://cyberbotics.com/) and solves it by using the
15 | [deepbots framework](https://github.com/aidudezzz/deepbots) and the
16 | [Proximal Policy Optimization](https://openai.com/blog/openai-baselines-ppo/) Reinforcement Learning algorithm.
17 | This tutorial uses the [robot-supervisor scheme](https://github.com/aidudezzz/deepbots#combined-robot-supervisor-scheme).
18 | - [CartPole tutorial using the emitter-receiver scheme](/emitterReceiverSchemeTutorial/README.md):
19 |
Follow this tutorial to get started with the
20 | [emitter-receiver scheme](https://github.com/aidudezzz/deepbots#emitter---receiver-scheme)
21 | which is useful for setting up more
22 | complicated examples, such as having a single RL agent controlling multiple robots. This scheme separates the robot and
23 | the RL agent in different Webots nodes.
24 |
25 | ## Contributors ✨
26 |
27 | Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
28 |
29 |
30 |
31 |
32 |
42 |
43 |
44 |
45 |
46 |
47 |
48 | This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
49 |
50 | Special thanks to Papanikolaou Evangelia for designing project's logo!
51 |
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/README.md:
--------------------------------------------------------------------------------
1 | # CartPole Emitter-Receiver Scheme Tutorial
2 |
3 | 
4 |
5 | This tutorial explains how to use the [*deepbots framework*](https://github.com/aidudezzz/deepbots) by setting
6 | up a simple problem. We will use the
7 | [emitter-receiver scheme](https://github.com/aidudezzz/deepbots#emitter---receiver-scheme) which is appropriate for
8 | more complicated use-cases, such as setting up multiple robots. For simple use cases of a single robot, please use the
9 | [robot-supervisor scheme tutorial](https://github.com/aidudezzz/deepbots-tutorials/tree/master/robotSupervisorSchemeTutorial).
10 |
11 |
12 | ~~Moreover, we will implement a custom reset procedure, which is not actually needed as seen in the robot-supervisor
13 | scheme tutorial, but might be useful for some use-cases.~~
14 |
15 | _Since the original tutorial was written, Webots was updated and now provides
16 | [more options to reset the world](https://cyberbotics.com/doc/reference/supervisor?tab-language=python#resetreload-matrix),
17 | and thus the old custom reset procedure is not needed anymore.
18 | The reset method provided by the deepbots framework is enough for most use-cases. Of course, deepbots design philosophy allows
19 | you to override the reset method and provide your own implementation that fits your problem best. We do it ourselves in some of
20 | our examples in [deepworlds](https://github.com/aidudezzz/deepworlds)! You can check out a pretty involved custom reset method
21 | that entirely overrides the built-in reset method
22 | [here.](https://github.com/aidudezzz/deepworlds/blob/f3f286d5c3df5ca858745a40111a2834001e15e7/examples/find_and_avoid_v2/controllers/robot_supervisor_manager/find_and_avoid_v2_robot_supervisor.py#L713-L802)_
23 |
24 | Keep in mind that the tutorial is very detailed and many parts can be completed really fast by an
25 | experienced user. The tutorial assumes no familiarity with the [Webots](https://cyberbotics.com/) simulator.
26 |
27 | We will recreate the [CartPole](https://www.gymlibrary.dev/environments/classic_control/cart_pole/) problem step-by-step in
28 | [Webots](https://cyberbotics.com/), and solve it with the
29 | [Proximal Policy Optimization](https://openai.com/blog/openai-baselines-ppo/) (PPO)
30 | Reinforcement Learning (RL) algorithm, using [PyTorch](https://pytorch.org/) as our neural network backend library.
31 |
32 | We will focus on creating the project, the world and the controller scripts and how to use the *deepbots framework*.
33 | For the purposes of the tutorial, a basic implementation of the PPO algorithm, together with a custom CartPole robot
34 | node contained within the world are supplied. For guides on how to construct a custom robot, please visit the official Webots
35 | [tutorial](https://cyberbotics.com/doc/guide/tutorial-6-4-wheels-robot).
36 |
37 | You can check the complete example [here](/emitterReceiverSchemeTutorial/full_project) with all the scripts and nodes used
38 | in this tutorial.
39 | The CartPole example is available (with some added code for plots/monitoring and keyboard controls) on the
40 | [deepworlds](https://github.com/aidudezzz/deepworlds/) repository.
41 |
42 |
43 | ## Prerequisites
44 |
45 | Before starting, several prerequisites should be met. Follow the [installation section on the deepbots framework main
46 | repository](https://github.com/aidudezzz/deepbots#installation).
47 |
48 | For this tutorial you will also need to [install PyTorch](https://pytorch.org/get-started/locally/)
49 | (no CUDA/GPU support needed for this simple example as the very small neural networks used are sufficient to solve the task).
50 |
51 | ## CartPole
52 | ### Creating the project
53 |
54 | Now we are ready to start working on the *CartPole* problem. First of all, we should create a new project.
55 |
56 | 1. Open Webots and on the menu bar, click *"File -> New -> New Project Directory..."*\
57 | 
58 | 2. Select a directory of your choice
59 | 3. On world settings **all** boxes should be ticked\
60 | 
61 | 4. Give your world a name, e.g. "cartpole_world.wbt"
62 | 5. Press Finish
63 |
64 | You should end up with:\
65 | 
66 |
67 |
68 | ### Adding a *robot node* and a *supervisor robot* node in the world
69 |
70 | First of all we will download the *CartPole robot node* definition that is supplied for the purposes of the tutorial,
71 | we will later add it into the world.
72 |
73 | 1. Right-click on
74 | [this link](https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/master/emitterReceiverSchemeTutorial/full_project/controllers/supervisor_controller/cartpole_robot_definition.txt)
75 | and click *Save link as...* to download the CartPole robot definition
76 | 2. Save the .txt file in a directory of your choice
77 | 3. Navigate to the directory and open the downloaded file with a text editor
78 | 4. Select everything and copy it
79 |
80 | Now we need to add the *CartPole robot* into the world:
81 |
82 | 1. Navigate to your project's directory, open `/worlds` and edit the `.wbt` world file with a text editor
83 | 2. Navigate to the end of the file and paste the contents of the `cartpole_robot_definition.txt` file we downloaded earlier
84 | 3. Now all you need to do is reload the world and the robot will appear in it!
85 | 
86 |
87 | The *CartPole robot node* comes ready along with the communication devices needed (an emitter and a receiver) to send and receive data.
88 | Ignore the warning that appears in the console as we will later add a *robot controller* script to control the robot.
89 |
90 | Now that the *CartPole robot node* is added, we are going to create another special kind of *robot*, called
91 | a *supervisor*. Later, we will add the *supervisor controller* script, through which we will be able to handle several
92 | aspects of the simulation.
93 |
94 | _(Make sure the simulation is stopped and reset to its original state, by pressing the pause button and then the reset button)_
95 |
96 | 1. Click on the *Add a new object or import an object* button\
97 | 
98 | (If button is grayed out, left-click on the last node on the tree (the robot node) so the *add* button becomes active again.)
99 | 2. Expand the *Base nodes* and left-click on *Robot*\
100 | 
101 | 3. Click *Add*. Now on the left side of the screen, under the previously added *Robot* node, you can see a new *Robot* node
102 | 4. Click on the new *Robot* node and set its DEF field below to "SUPERVISOR". From now on we are going to refer to this robot
103 | as the *supervisor*
104 | 5. Double-click on the new *Robot* node to expand it
105 | 6. Scroll down to find the *supervisor* field and set it to TRUE\
106 | 
107 | 7. On the *children* field, right-click and select *Add new*
108 | 8. Expand the *Base nodes* and find *Emitter*
109 | 9. Select it and on the lower right press *Add*
110 | 10. Repeat from step 7, but this time add the *Receiver* node
111 | 11. Click *Save*\
112 | 
113 |
114 |
115 | ### Adding the controllers
116 |
117 | Now we will create the two basic controller scripts needed to control the *supervisor* and the *robot* nodes.
118 | Then we are going to assign the *supervisor controller* script to the *supervisor* node and the *robot controller*
119 | to the *robot* created before.
120 | Note that the *CartPole robot* node is going to be loaded into the world through the *supervisor controller* script
121 | later, but we still need to create its controller.
122 |
123 | Creating the *supervisor controller* and *robot controller* scripts:
124 | 1. On the *menu bar*, click *"File -> New -> New Robot Controller..."*\
125 | 
126 | 2. On *Language selection*, select *Python*
127 | 3. Give it the name "*supervisor_controller*"
128 | 4. Press *Finish*
129 | 5. Repeat from step 1, but on step 3 give the name "*robot controller*"
130 |
131 | *If you are using an external IDE:
132 | 1. Un-tick the "open ... in Text Editor" boxes and press *Finish*
133 | 2. Navigate to the project directory, inside the *Controllers/controllerName/* directory
134 | 3. Open the controller script with your IDE
135 |
136 | Two new Python controller scripts should be created and opened in Webots text editor looking like this:\
137 | 
138 |
139 | _(Make sure the simulation is stopped and reset to its original state, by pressing the pause button and then the reset button)_
140 |
141 | Assigning the *supervisor_controller* to the *supervisor robot* node *controller* field:
142 | 1. Expand the *supervisor* node created earlier and scroll down to find the *controller* field
143 | 2. Click on the *controller* field and press the "*Select...*" button below\
144 | 
145 | 3. Find the "*supervisor_controller*" controller from the list and select it\
146 | 
147 | 4. Click *OK*
148 | 5. Click *Save*
149 |
150 | Follow the same steps for the *robot* and the *robot controller* created earlier.
151 |
152 | ### Code overview
153 |
154 | Before delving into writing code, we take a look at the general workflow of the framework. We will create two classes
155 | that inherit the *deepbots framework* classes and write implementations for several key methods, specific for the
156 | *CartPole* problem.
157 |
158 | We will implement the basic methods *get_observations*, *get_default_observation*, *get_reward*, *is_done* and *solved*,
159 | used for RL, based on the [Gym](https://www.gymlibrary.dev/) framework logic, that will be contained in the
160 | *supervisor controller*.
161 | These methods will compose the *environment* for the RL algorithm. Within the *supervisor controller*, below the environment
162 | class we will also add the RL *agent* that will receive *observations* and output *actions* and the RL training loop.
163 |
164 | For the *robot controller* script we will implement a couple of basic methods to enable it to send and receive data from
165 | the *supervisor*. We will initialize the motors and the pole position sensor. The motors will have their speeds set depending
166 | on the action the robot receives from the *supervisor* at each simulation step. The position sensor data will be sent to the
167 | *supervisor* for it to compose the *agent's* observation.
168 |
169 | The following diagram loosely defines the general workflow:\
170 | 
171 |
172 | The *robot controller* will gather data from the *robot's* sensors and send it to the *supervisor controller*. The
173 | *supervisor controller* will use the data received and extra data to compose the *observation* for the agent. Then,
174 | using the *observation* the *agent* will perform a forward pass and return an *action*. Then the *supervisor controller*
175 | will send the *action* back to the *robot controller*, which will perform the *action* on the *robot*. This closes the
176 | loop, that repeats until a termination condition is met, defined in the *is_done* method.
177 |
178 |
179 | ### Writing the scripts
180 |
181 | Now we are ready to start writing the *robot controller* and *supervisor controller* scripts.
182 | It is recommended to delete all the contents of the two scripts that were automatically generated.
183 |
184 | ### Robot controller script
185 |
186 | First, we will start with the more simple *robot controller* script. In this script we will import the *CSVRobot*
187 | class from the *deepbots framework* and inherit it into our own *CartPoleRobot* class. Then, we are going to
188 | implement the two basic framework methods *create_message* and *use_message_data*. The former gathers data from the
189 | *robot's* sensors and packs it into a string message to be sent to the *supervisor controller* script. The latter
190 | unpacks messages sent by the *supervisor* that contain the next action, and uses the data to move the *CartPoleRobot*
191 | forward and backward by setting the motor's speeds.
192 |
193 | The only import we are going to need is the *CSVRobot* class.
194 | ```python
195 | from deepbots.robots.controllers.csv_robot import CSVRobot
196 | ```
197 | Then we define our class, inheriting the imported one.
198 | ```python
199 | class CartpoleRobot(CSVRobot):
200 | def __init__(self):
201 | super().__init__()
202 | ```
203 | Then we initialize the position sensor that reads the pole's angle, needed for the agent's observation.
204 | ```python
205 | self.position_sensor = self.getDevice("polePosSensor")
206 | self.position_sensor.enable(self.timestep)
207 | ```
208 | Finally, we initialize the four motors completing our `__init__()` method.
209 | ```python
210 | self.wheels = []
211 | for wheel_name in ['wheel1', 'wheel2', 'wheel3', 'wheel4']:
212 | wheel = self.getDevice(wheel_name) # Get the wheel handle
213 | wheel.setPosition(float('inf')) # Set starting position
214 | wheel.setVelocity(0.0) # Zero out starting velocity
215 | self.wheels.append(wheel)
216 | ```
217 | After the initialization method is done we move on to the `create_message()` method implementation, used to pack the
218 | value read by the sensor into a string, so it can be sent to the *supervisor controller*.
219 |
220 | _(mind the indentation, the following two methods belong to the *CartpoleRobot* class)_
221 |
222 | ```python
223 | def create_message(self):
224 | # Read the sensor value, convert to string and save it in a list
225 | message = [str(self.position_sensor.getValue())]
226 | return message
227 | ```
228 | Finally, we implement the `use_message_data()` method, which unpacks the message received by the
229 | *supervisor controller*, that contains the next action. Then we implement what the *action* actually means for the
230 | *CartPoleRobot*, i.e. moving forward and backward using its motors.
231 | ```python
232 | def use_message_data(self, message):
233 | action = int(message[0]) # Convert the string message into an action integer
234 |
235 | if action == 0:
236 | motor_speed = 5.0
237 | elif action == 1:
238 | motor_speed = -5.0
239 | else:
240 | motor_speed = 0.0
241 |
242 | # Set the motors' velocities based on the action received
243 | for i in range(len(self.wheels)):
244 | self.wheels[i].setPosition(float('inf'))
245 | self.wheels[i].setVelocity(motor_speed)
246 | ```
247 | That's the *CartpoleRobot* class complete. Now all that's left, is to add (outside the class scope, mind the
248 | indentation) the code that runs the controller.
249 |
250 | ```python
251 | # Create the robot controller object and run it
252 | robot_controller = CartpoleRobot()
253 | robot_controller.run() # Run method is implemented by the framework, just need to call it
254 | ```
255 |
256 | Now the *robot controller* script is complete! We move on to the *supervisor controller* script.
257 |
258 | ### Supervisor controller script
259 | Before we start coding, we should add two scripts, one that contains the RL PPO agent,
260 | and the other containing utility functions that we are going to need.
261 |
262 | Save both files inside the project directory, under Controllers/supervisorController/
263 | 1. Right-click on [this link](https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/master/emitterReceiverSchemeTutorial/full_project/controllers/supervisorController/PPO_agent.py) and click *Save link as...* to download the PPO agent
264 | 2. Right-click on [this link](https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/master/emitterReceiverSchemeTutorial/full_project/controllers/supervisorController/utilities.py) and click *Save link as...* to download the utilities script
265 |
266 | Now for the imports, we are going to need the numpy library, the deepbots CSVSupervisorEnv class, the PPO agent and the
267 | utilities.
268 |
269 | ```python
270 | import numpy as np
271 | from deepbots.supervisor.controllers.csv_supervisor_env import CSVSupervisorEnv
272 | from PPO_agent import PPOAgent, Transition
273 | from utilities import normalize_to_range
274 | ```
275 |
276 | Then we define our class inheriting the imported one, also defining the observation and action spaces.
277 | Here, the observation space is basically the number of the neural network's inputs, so its defined simply as an
278 | integer.
279 |
280 | Num | Observation | Min | Max
281 | ----|-------------|-----|----
282 | 0 | Cart Position z axis | -0.4 | 0.4
283 | 1 | Cart Velocity | -Inf | Inf
284 | 2 | Pole Angle | -1.3 rad | 1.3 rad
285 | 3 | Pole Velocity at Tip | -Inf | Inf
286 |
287 | The action space defines the outputs of the neural network, which are 2. One for the forward movement
288 | and one for the backward movement of the robot.
289 |
290 | ```python
291 | class CartPoleSupervisor(CSVSupervisorEnv):
292 | def __init__(self):
293 | super().__init__()
294 | self.observation_space = 4 # The agent has 4 inputs
295 | self.action_space = 2 # The agent can perform 2 actions
296 | ```
297 | Then we initialize the `self.robot` variable which will hold a reference to the *CartPole robot* node.
298 |
299 | We also get a reference for the *pole endpoint* node, which is a child node of the *CartPole robot node* and is going
300 | to be useful for getting the pole tip velocity.
301 | ```python
302 | self.robot = self.getFromDef("ROBOT")
303 | self.pole_endpoint = self.getFromDef("POLE_ENDPOINT")
304 | self.message_received = None # Variable to save the messages received from the robot
305 | ```
306 | Finally, we initialize several variables used during training. Note that the `self.steps_per_episode` is set to `200`
307 | based on the problem's definition. Feel free to change the `self.episode_limit` variable.
308 |
309 | ```python
310 | self.episode_count = 0 # Episode counter
311 | self.episode_limit = 10000 # Max number of episodes allowed
312 | self.steps_per_episode = 200 # Max number of steps per episode
313 | self.episode_score = 0 # Score accumulated during an episode
314 | self.episode_score_list = [] # A list to save all the episode scores, used to check if task is solved
315 | ```
316 |
317 | Now it's time for us to implement the base environment methods that a regular Gym environment uses and
318 | most RL algorithm implementations (agents) expect.
319 | These base methods are *get_observations()*, *get_reward()*, *is_done()*, *get_info()* and *render()*. Additionally, we are
320 | going to implement the *get_default_observation()* method which is used internally by deepbots and the *solved()* method
321 | that will help us determine when to stop training.
322 |
323 | Let's start with the `get_observations()` method, which builds the agent's observation (i.e. the neural network's input)
324 | for each step. This method also normalizes the values in the [-1.0, 1.0] range as appropriate, using the
325 | `normalize_to_range()` utility method.
326 |
327 | We will start by getting the *CartPole robot* node position and velocity on the x-axis. The x-axis is the direction of
328 | its forward/backward movement. We will also get the pole tip velocity from the *poleEndpoint* node we defined earlier.
329 | ```python
330 | def get_observations(self):
331 | # Position on x-axis, first (0) element of the getPosition vector
332 | cart_position = normalize_to_range(self.robot.getPosition()[0], -0.4, 0.4, -1.0, 1.0)
333 | # Linear velocity on x-axis
334 | cart_velocity = normalize_to_range(self.robot.getVelocity()[0], -0.2, 0.2, -1.0, 1.0, clip=True)
335 | # Angular velocity y of endpoint
336 | endpoint_velocity = normalize_to_range(self.pole_endpoint.getVelocity()[4], -1.5, 1.5, -1.0, 1.0, clip=True)
337 | ```
338 | Now all it's missing is the pole angle off vertical, which will be provided by the robot sensor. We don't have access to
339 | sensor values of other nodes, so the robot needs to actually send the value.
340 | To get it, we will need to call the `handle_receiver()` method which deepbots provides to get the message sent by the robot
341 | into the `self.messageReceived` variable. The message received, as defined into the robot's `create_message()` method, is a
342 | string which, here, gets converted back into a single float value.
343 |
344 | ```python
345 | # Update self.message_received received from robot, which contains pole angle
346 | self.message_received = self.handle_receiver()
347 | if self.message_received is not None:
348 | pole_angle = normalize_to_range(float(self.message_received[0]), -0.23, 0.23, -1.0, 1.0, clip=True)
349 | else:
350 | # Method is called before self.message_received is initialized
351 | pole_angle = 0.0
352 | ```
353 |
354 | Finally, we return a list containing all four values we created earlier.
355 |
356 | ```python
357 | return [cart_position, cart_velocity, pole_angle, endpoint_velocity]
358 | ```
359 |
360 | Let's also define the *get_defaults_observation()* that is used internally by deepbots when a new training episode starts:
361 | ```python
362 | def get_default_observation(self):
363 | # This method just returns a zero vector as a default observation
364 | return [0.0 for _ in range(self.observation_space)]
365 | ```
366 |
367 | Now for something simpler, we will define the `get_reward()` method, which simply returns
368 | `1` for each step. Usually reward functions are more elaborate, but for this problem it is simply
369 | defined as the agent getting a +1 reward for each step it manages to keep the pole from falling.
370 |
371 | ```python
372 | def get_reward(self, action=None):
373 | return 1
374 | ```
375 |
376 | Moving on, we define the *is_done()* method, which contains the episode termination conditions:
377 | - Episode terminates if the pole has fallen beyond an angle which can be realistically recovered (+-15 degrees)
378 | - Episode terminates if episode score is over 195
379 | - Episode terminates if the robot hit the walls by moving into them, which is calculated based on its position on x-axis
380 |
381 | ```python
382 | def is_done(self):
383 | if self.message_received is not None:
384 | pole_angle = round(float(self.message_received[0]), 2)
385 | else:
386 | # method is called before self.message_received is initialized
387 | pole_angle = 0.0
388 | if abs(pole_angle) > 0.261799388: # more than 15 degrees off vertical (defined in radians)
389 | return True
390 |
391 | if self.episode_score > 195.0:
392 | return True
393 |
394 | cart_position = round(self.robot.getPosition()[0], 2) # Position on x-axis
395 | if abs(cart_position) > 0.39:
396 | return True
397 |
398 | return False
399 | ```
400 |
401 | We separate the *solved* condition into another method, the `solved()` method, because it requires different handling.
402 | The *solved* condition depends on the agent completing consecutive episodes successfully, consistently. We measure this,
403 | by taking the average episode score of the last 100 episodes and checking if it's over 195.
404 |
405 | ```python
406 | def solved(self):
407 | if len(self.episode_score_list) > 100: # Over 100 trials thus far
408 | if np.mean(self.episode_score_list[-100:]) > 195.0: # Last 100 episodes' scores average value
409 | return True
410 | return False
411 | ```
412 |
413 | Lastly, we add a dummy implementation of `get_info()` and `render()` methods, because in this example they are not actually
414 | used, but is required to define a proper Gym environment.
415 |
416 | ```python
417 | def get_info(self):
418 | return None
419 |
420 | def render(self, mode="human"):
421 | pass
422 | ```
423 |
424 | This concludes the *CartPoleSupervisor* class, that now contains all required methods to run an RL training loop!
425 |
426 | ### RL Training Loop
427 |
428 | Finally, it all comes together inside the RL training loop. Now we initialize the RL agent and create the
429 | *CartPoleSupervisor* class object, i.e. the RL environment, with which the agent gets trained to solve the problem,
430 | maximizing the reward received by our reward function and achieve the solved condition defined.
431 |
432 | **Note that popular frameworks like [stable-baselines3](https://stable-baselines3.readthedocs.io/en/master/) contain the
433 | RL training loop within their *learn* method or similar. Frameworks like *sb3* are fully compatible with *deepbots*, as
434 | *deepbots* defines Gym environments and interfaces them with Webots saving you a lot of trouble, which then can be
435 | supplied to frameworks like *sb3*.**
436 |
437 | For this tutorial we follow a more hands-on approach to get a better grasp of how RL works. Also feel free to check out
438 | the simple PPO agent implementation we provide.
439 |
440 | First we create a supervisor object and then initialize the PPO agent, providing it with the observation and action
441 | spaces.
442 |
443 | ```python
444 | env = CartPoleSupervisor()
445 | agent = PPOAgent(env.observation_space, env.action_space)
446 | ```
447 |
448 | Then we set the `solved` flag to `False`. This flag is used to terminate the training loop.
449 | ```python
450 | solved = False
451 | ```
452 |
453 | Now we define the outer training loop which runs the number of episodes defined in the supervisor class
454 | and resets the world to get the starting observation. We also reset the episode score to zero.
455 |
456 | _(please be mindful of the indentation on the following code, because we are about to define several levels of nested
457 | loops and ifs)_
458 | ```python
459 | # Run outer loop until the episodes limit is reached or the task is solved
460 | while not solved and env.episode_count < env.episode_limit:
461 | observation = env.reset() # Reset robot and get starting observation
462 | env.episode_score = 0
463 | ```
464 |
465 | Inside the outer loop defined above we define the inner loop which runs for the course of an episode. This loop
466 | runs for a maximum number of steps defined by the problem. Here, the RL agent - environment loop takes place.
467 |
468 | We start by calling the `agent.work()` method, by providing it with the current observation, which for the first step
469 | is the zero vector returned by the `reset()` method, through the `get_default_observation()` method we defined.
470 | The `work()` method implements the forward pass of the agent's actor neural network, providing us with the next action.
471 | As the comment suggests the PPO algorithm implements exploration by sampling the probability distribution the
472 | agent outputs from its actor's softmax output layer.
473 |
474 | ```python
475 | for step in range(env.steps_per_episode):
476 | # In training mode the agent samples from the probability distribution, naturally implementing exploration
477 | selected_action, action_prob = agent.work(observation, type_="selectAction")
478 | ```
479 |
480 | The next part contains the call to the `step()` method which is defined internally in deepbots. This method calls most of
481 | the methods we implemented earlier (`get_observation()`, `get_reward()`, `is_done()` and `get_info()`), steps the Webots
482 | controller and sends the action that the agent selected to the robot for execution. Step returns the new observation,
483 | the reward for the previous action and whether the episode is terminated (info is not implemented in this example).
484 |
485 | Then, we create the `Transition`, which is a named tuple that contains, as the name suggests, the transition between
486 | the previous `observation` (or `state`) to the `new_observation` (or `new_state`). This is needed by the agent for its training
487 | procedure, so we call the agent's `store_transition()` method to save it to its buffer. Most RL algorithms require a
488 | similar procedure and have similar methods to do it.
489 |
490 | ```python
491 | # Step the env to get the current selected_action's reward, the new observation and whether we reached
492 | # the done condition
493 | new_observation, reward, done, info = env.step([selected_action])
494 |
495 | # Save the current state transition in agent's memory
496 | trans = Transition(observation, selected_action, action_prob, reward, new_observation)
497 | agent.store_transition(trans)
498 | ```
499 |
500 | Finally, we check whether the episode is terminated and if it is, we save the episode score, run a training step
501 | for the agent giving the number of steps taken in the episode as batch size, and check whether the problem is solved
502 | via the `solved()` method and break.
503 |
504 | If not, we add the step reward to the `episode_score` accumulator, save the `new_observation` as `observation` and loop
505 | onto the next episode step.
506 |
507 | **Note that in more realistic training procedures, the training step might not run for each episode. Depending on the problem
508 | you might need to run the training procedure multiple times per episode or once per multiple episodes. This is set as `n_steps`
509 | or similar in frameworks like *sb3*. Moreover, changing the batch size along with `n_steps` might influence greatly the
510 | training results and whether the agent actually converges to a solution, and consequently are crucial parameters.**
511 |
512 | ```python
513 | if done:
514 | # Save the episode's score
515 | env.episode_score_list.append(env.episode_score)
516 | agent.train_step(batch_size=step + 1)
517 | solved = env.solved() # Check whether the task is solved
518 | break
519 |
520 | env.episode_score += reward # Accumulate episode reward
521 | observation = new_observation # observation for next step is current step's new_observation
522 | ```
523 |
524 | This is the inner loop complete, and now we add a print statement and increment the episode counter to finalize the outer
525 | loop.
526 |
527 | (note that the following code snippet is part of the outer loop)
528 | ```python
529 | print("Episode #", env.episode_count, "score:", env.episode_score)
530 | env.episode_count += 1 # Increment episode counter
531 | ```
532 |
533 | With the outer loop complete, this completes the training procedure. Now all that's left is the testing loop which is a
534 | barebones, simpler version of the training loop. First we print a message on whether the task is solved or not (i.e.
535 | reached the episode limit without satisfying the solved condition) and call the `reset()` method. Then, we create a
536 | `while True` loop that runs the agent's forward method, but this time selecting the action with the max probability
537 | out of the actor's softmax output, eliminating exploration/randomness. Finally, the `step()` method is called, but
538 | this time we keep only the observation it returns to keep the environment - agent loop running. If the *done* flag is true, we
539 | reset the environment to start over.
540 |
541 | ```python
542 | if not solved:
543 | print("Task is not solved, deploying agent for testing...")
544 | elif solved:
545 | print("Task is solved, deploying agent for testing...")
546 |
547 | observation = env.reset()
548 | env.episode_score = 0.0
549 | while True:
550 | selected_action, action_prob = agent.work(observation, type_="selectActionMax")
551 | observation, _, done, _ = env.step([selected_action])
552 | if done:
553 | observation = env.reset()
554 | ```
555 |
556 | ### Conclusion
557 |
558 | Now with the coding done you can click on the *Run the simulation* button and watch the training run!
559 |
560 | \
561 | Webots allows to speed up the simulation, even run it without graphics, so the training shouldn't take long, at
562 | least to see the agent becoming visibly better at moving under the pole to balance it. It takes a while for it to
563 | achieve the *solved* condition, but when it does, it becomes quite good at balancing the pole! You can even apply forces
564 | in real time by pressing Alt - left-click and drag on the robot or the pole.
565 |
566 | That's it for this tutorial! :)
567 |
568 | **_We welcome you to leave comments and feedback for the tutorial on the relevant
569 | [discussions page](https://github.com/aidudezzz/deepbots-tutorials/discussions/15?sort=new) or to open an issue for any
570 | problem you find in it!_**
571 |
572 | 
573 |
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/full_project/controllers/robot_controller/robot_controller.py:
--------------------------------------------------------------------------------
1 | from deepbots.robots.controllers.csv_robot import CSVRobot
2 |
3 |
4 | class CartpoleRobot(CSVRobot):
5 | def __init__(self):
6 | super().__init__()
7 | self.position_sensor = self.getDevice("polePosSensor")
8 | self.position_sensor.enable(self.timestep)
9 | self.wheels = []
10 | for wheel_name in ['wheel1', 'wheel2', 'wheel3', 'wheel4']:
11 | wheel = self.getDevice(wheel_name) # Get the wheel handle
12 | wheel.setPosition(float('inf')) # Set starting position
13 | wheel.setVelocity(0.0) # Zero out starting velocity
14 | self.wheels.append(wheel)
15 |
16 | def create_message(self):
17 | # Read the sensor value, convert to string and save it in a list
18 | message = [str(self.position_sensor.getValue())]
19 | return message
20 |
21 | def use_message_data(self, message):
22 | action = int(message[0]) # Convert the string message into an action integer
23 |
24 | if action == 0:
25 | motor_speed = 5.0
26 | elif action == 1:
27 | motor_speed = -5.0
28 | else:
29 | motor_speed = 0.0
30 |
31 | # Set the motors' velocities based on the action received
32 | for i in range(len(self.wheels)):
33 | self.wheels[i].setPosition(float('inf'))
34 | self.wheels[i].setVelocity(motor_speed)
35 |
36 |
37 | # Create the robot controller object and run it
38 | robot_controller = CartpoleRobot()
39 | robot_controller.run() # Run method is implemented by the framework, just need to call it
40 |
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/full_project/controllers/supervisor_controller/PPO_agent.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | import torch.optim as optim
4 | from torch.distributions import Categorical
5 | from torch import from_numpy, no_grad, save, load, tensor, clamp
6 | from torch import float as torch_float
7 | from torch import long as torch_long
8 | from torch import min as torch_min
9 | from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler
10 | import numpy as np
11 | from torch import manual_seed
12 | from collections import namedtuple
13 |
14 | Transition = namedtuple('Transition', ['state', 'action', 'a_log_prob', 'reward', 'next_state'])
15 |
16 |
17 | class PPOAgent:
18 | """
19 | PPOAgent implements the PPO RL algorithm (https://arxiv.org/abs/1707.06347).
20 | It works with a set of discrete actions.
21 | It uses the Actor and Critic neural network classes defined below.
22 | """
23 |
24 | def __init__(self, number_of_inputs, number_of_actor_outputs, clip_param=0.2, max_grad_norm=0.5, ppo_update_iters=5,
25 | batch_size=8, gamma=0.99, use_cuda=False, actor_lr=0.001, critic_lr=0.003, seed=None):
26 | super().__init__()
27 | if seed is not None:
28 | manual_seed(seed)
29 |
30 | # Hyper-parameters
31 | self.clip_param = clip_param
32 | self.max_grad_norm = max_grad_norm
33 | self.ppo_update_iters = ppo_update_iters
34 | self.batch_size = batch_size
35 | self.gamma = gamma
36 | self.use_cuda = use_cuda
37 |
38 | # models
39 | self.actor_net = Actor(number_of_inputs, number_of_actor_outputs)
40 | self.critic_net = Critic(number_of_inputs)
41 |
42 | if self.use_cuda:
43 | self.actor_net.cuda()
44 | self.critic_net.cuda()
45 |
46 | # Create the optimizers
47 | self.actor_optimizer = optim.Adam(self.actor_net.parameters(), actor_lr)
48 | self.critic_net_optimizer = optim.Adam(self.critic_net.parameters(), critic_lr)
49 |
50 | # Training stats
51 | self.buffer = []
52 |
53 | def work(self, agent_input, type_="simple"):
54 | """
55 | type_ == "simple"
56 | Implementation for a simple forward pass.
57 | type_ == "selectAction"
58 | Implementation for the forward pass, that returns a selected action according to the probability
59 | distribution and its probability.
60 | type_ == "selectActionMax"
61 | Implementation for the forward pass, that returns the max selected action.
62 | """
63 | agent_input = from_numpy(np.array(agent_input)).float().unsqueeze(0)
64 | if self.use_cuda:
65 | agent_input = agent_input.cuda()
66 | with no_grad():
67 | action_prob = self.actor_net(agent_input)
68 |
69 | if type_ == "simple":
70 | output = [action_prob[0][i].data.tolist() for i in range(len(action_prob[0]))]
71 | return output
72 | elif type_ == "selectAction":
73 | c = Categorical(action_prob)
74 | action = c.sample()
75 | return action.item(), action_prob[:, action.item()].item()
76 | elif type_ == "selectActionMax":
77 | return np.argmax(action_prob).item(), 1.0
78 | else:
79 | raise Exception("Wrong type in agent.work(), returning input")
80 |
81 | def get_value(self, state):
82 | """
83 | Gets the value of the current state according to the critic model.
84 |
85 | :param state: The current state
86 | :return: state's value
87 | """
88 | state = from_numpy(state)
89 | with no_grad():
90 | value = self.critic_net(state)
91 | return value.item()
92 |
93 | def save(self, path):
94 | """
95 | Save actor and critic models in the path provided.
96 | :param path: path to save the models
97 | :return: None
98 | """
99 | save(self.actor_net.state_dict(), path + '_actor.pkl')
100 | save(self.critic_net.state_dict(), path + '_critic.pkl')
101 |
102 | def load(self, path):
103 | """
104 | Load actor and critic models from the path provided.
105 | :param path: path where the models are saved
106 | :return: None
107 | """
108 | actor_state_dict = load(path + '_actor.pkl')
109 | critic_state_dict = load(path + '_critic.pkl')
110 | self.actor_net.load_state_dict(actor_state_dict)
111 | self.critic_net.load_state_dict(critic_state_dict)
112 |
113 | def store_transition(self, transition):
114 | """
115 | Stores a transition in the buffer to be used later.
116 |
117 | :param transition: state, action, action_prob, reward, next_state
118 | :return: None
119 | """
120 | self.buffer.append(transition)
121 |
122 | def train_step(self, batch_size=None):
123 | """
124 | Performs a training step or update for the actor and critic models, based on transitions gathered in the
125 | buffer. It then resets the buffer.
126 | If provided with a batch_size, this is used instead of default self.batch_size
127 |
128 | :param: batch_size: int
129 | :return: None
130 | """
131 | if batch_size is None:
132 | if len(self.buffer) < self.batch_size:
133 | return
134 | batch_size = self.batch_size
135 |
136 | state = tensor([t.state for t in self.buffer], dtype=torch_float)
137 | action = tensor([t.action for t in self.buffer], dtype=torch_long).view(-1, 1)
138 | reward = [t.reward for t in self.buffer]
139 | old_action_log_prob = tensor([t.a_log_prob for t in self.buffer], dtype=torch_float).view(-1, 1)
140 |
141 | # Unroll rewards
142 | R = 0
143 | Gt = []
144 | for r in reward[::-1]:
145 | R = r + self.gamma * R
146 | Gt.insert(0, R)
147 | Gt = tensor(Gt, dtype=torch_float)
148 |
149 | if self.use_cuda:
150 | state, action, old_action_log_prob = state.cuda(), action.cuda(), old_action_log_prob.cuda()
151 | Gt = Gt.cuda()
152 |
153 | for _ in range(self.ppo_update_iters):
154 | for index in BatchSampler(SubsetRandomSampler(range(len(self.buffer))), batch_size, False):
155 | # Calculate the advantage at each step
156 | Gt_index = Gt[index].view(-1, 1)
157 | V = self.critic_net(state[index])
158 | delta = Gt_index - V
159 | advantage = delta.detach()
160 |
161 | # Get the current prob
162 | action_prob = self.actor_net(state[index]).gather(1, action[index]) # new policy
163 |
164 | # PPO
165 | ratio = (action_prob / old_action_log_prob[index])
166 | surr1 = ratio * advantage
167 | surr2 = clamp(ratio, 1 - self.clip_param, 1 + self.clip_param) * advantage
168 |
169 | # update actor network
170 | action_loss = -torch_min(surr1, surr2).mean() # MAX->MIN descent
171 | self.actor_optimizer.zero_grad()
172 | action_loss.backward()
173 | nn.utils.clip_grad_norm_(self.actor_net.parameters(), self.max_grad_norm)
174 | self.actor_optimizer.step()
175 |
176 | # update critic network
177 | value_loss = F.mse_loss(Gt_index, V)
178 | self.critic_net_optimizer.zero_grad()
179 | value_loss.backward()
180 | nn.utils.clip_grad_norm_(self.critic_net.parameters(), self.max_grad_norm)
181 | self.critic_net_optimizer.step()
182 |
183 | del self.buffer[:]
184 |
185 |
186 | class Actor(nn.Module):
187 | def __init__(self, number_of_inputs, number_of_outputs):
188 | super(Actor, self).__init__()
189 | self.fc1 = nn.Linear(number_of_inputs, 10)
190 | self.fc2 = nn.Linear(10, 10)
191 | self.action_head = nn.Linear(10, number_of_outputs)
192 |
193 | def forward(self, x):
194 | x = F.relu(self.fc1(x))
195 | x = F.relu(self.fc2(x))
196 | action_prob = F.softmax(self.action_head(x), dim=1)
197 | return action_prob
198 |
199 |
200 | class Critic(nn.Module):
201 | def __init__(self, number_of_inputs):
202 | super(Critic, self).__init__()
203 | self.fc1 = nn.Linear(number_of_inputs, 10)
204 | self.fc2 = nn.Linear(10, 10)
205 | self.state_value = nn.Linear(10, 1)
206 |
207 | def forward(self, x):
208 | x = F.relu(self.fc1(x))
209 | x = F.relu(self.fc2(x))
210 | value = self.state_value(x)
211 | return value
212 |
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/full_project/controllers/supervisor_controller/cartpole_robot_definition.txt:
--------------------------------------------------------------------------------
1 | DEF ROBOT Robot {
2 | translation 0 0 0.04
3 | rotation 1 0 0 1.5708
4 | children [
5 | DEF EMITTER Emitter {
6 | }
7 | DEF RECEIVER Receiver {
8 | }
9 | DEF HINGE_COVER Solid {
10 | translation 0 0.03 -3.469446951953614e-18
11 | rotation 0 1 0 -1.5707953071795862
12 | children [
13 | Shape {
14 | appearance PBRAppearance {
15 | baseColor 0 0.6509803921568628 1
16 | }
17 | geometry Box {
18 | size 0.030000000000000002 0.019999999999999997 0.05
19 | }
20 | }
21 | ]
22 | name "hingeCover"
23 | }
24 | DEF BODY Shape {
25 | appearance PBRAppearance {
26 | baseColor 0.917647 0.145098 0.145098
27 | roughness 1
28 | metalness 0
29 | }
30 | geometry Box {
31 | size 0.2 0.05 0.08
32 | }
33 | }
34 | DEF WHEEL1 HingeJoint {
35 | jointParameters HingeJointParameters {
36 | position 6.723540149111039e-17
37 | axis 0 0 1
38 | anchor 0.06 0 0.05
39 | }
40 | device [
41 | RotationalMotor {
42 | name "wheel1"
43 | }
44 | ]
45 | endPoint Solid {
46 | translation 0.060001004644818584 1.339276918416963e-05 0.050000010543824816
47 | rotation 1.7671082538995992e-08 -1.7671248834016876e-08 0.9999999999999998 1.5708000430119498
48 | children [
49 | DEF WHEEL Shape {
50 | appearance PBRAppearance {
51 | baseColor 0.305882 0.898039 0.25098
52 | roughness 1
53 | metalness 0
54 | }
55 | geometry Cylinder {
56 | height 0.02
57 | radius 0.04
58 | subdivision 24
59 | }
60 | }
61 | ]
62 | boundingObject USE WHEEL
63 | physics Physics {
64 | }
65 | }
66 | }
67 | DEF WHEEL2 HingeJoint {
68 | jointParameters HingeJointParameters {
69 | position 6.670640318240083e-17
70 | axis 0 0 1
71 | anchor -0.06 0 0.05
72 | }
73 | device [
74 | RotationalMotor {
75 | name "wheel2"
76 | }
77 | ]
78 | endPoint Solid {
79 | translation -0.060001095993954395 7.616338723251721e-07 0.05000000952472374
80 | rotation 1.76096540431902e-08 -1.7609826183889808e-08 0.9999999999999997 1.5707999569897093
81 | children [
82 | USE WHEEL
83 | ]
84 | name "solid(1)"
85 | boundingObject USE WHEEL
86 | physics Physics {
87 | }
88 | }
89 | }
90 | DEF WHEEL3 HingeJoint {
91 | jointParameters HingeJointParameters {
92 | position 1.0324509589061627e-16
93 | axis 0 0 1
94 | anchor 0.06 0 -0.05
95 | }
96 | device [
97 | RotationalMotor {
98 | name "wheel3"
99 | }
100 | ]
101 | endPoint Solid {
102 | translation 0.06000100379909951 1.3392659934531687e-05 -0.05000000738328558
103 | rotation -1.7671380018270767e-08 1.7671539284228046e-08 0.9999999999999998 1.5708000430119113
104 | children [
105 | USE WHEEL
106 | ]
107 | name "solid(2)"
108 | boundingObject USE WHEEL
109 | physics Physics {
110 | }
111 | }
112 | }
113 | DEF WHEEL4 HingeJoint {
114 | jointParameters HingeJointParameters {
115 | position -2.6895122773572173e-17
116 | axis 0 0 1
117 | anchor -0.06 0 -0.05
118 | }
119 | device [
120 | RotationalMotor {
121 | name "wheel4"
122 | }
123 | ]
124 | endPoint Solid {
125 | translation -0.060001096839924606 7.615246225854488e-07 -0.05000000839401549
126 | rotation -1.761000837263908e-08 1.761018020913388e-08 0.9999999999999997 1.5707999569897055
127 | children [
128 | USE WHEEL
129 | ]
130 | name "solid(3)"
131 | boundingObject USE WHEEL
132 | physics Physics {
133 | }
134 | }
135 | }
136 | DEF POLE HingeJoint {
137 | jointParameters HingeJointParameters {
138 | position -4.812689688184746e-16
139 | axis 0 0 1
140 | anchor 0 0.03000000000047226 0
141 | minStop -1.3
142 | maxStop 1.3
143 | }
144 | device [
145 | DEF POLE_POS_SENSOR PositionSensor {
146 | name "polePosSensor"
147 | }
148 | ]
149 | endPoint Solid {
150 | translation -0.0004655326854983616 0.5300037594690288 -5.369584240411801e-07
151 | rotation -0.57698744109641 -0.577531387769005 0.5775318077462263 2.0949379676602047
152 | children [
153 | DEF POLE_ENDPOINT Solid {
154 | translation 0.5000000000000002 0 0
155 | scale 0.1 0.1 0.1
156 | children [
157 | DEF BALL Shape {
158 | appearance PBRAppearance {
159 | baseColor 0.03137254901960784 0.047058823529411764 1
160 | metalness 0
161 | }
162 | geometry Sphere {
163 | radius 0.1
164 | }
165 | }
166 | ]
167 | boundingObject USE BALL
168 | physics Physics {
169 | density -1
170 | mass 0.02
171 | }
172 | }
173 | DEF POLE Shape {
174 | appearance PBRAppearance {
175 | baseColor 1 0.1568627450980392 0.1568627450980392
176 | roughness 1
177 | metalness 0
178 | }
179 | geometry Box {
180 | size 1 0.020000000000000004 0.019999999999999997
181 | }
182 | }
183 | ]
184 | name "pole"
185 | boundingObject USE POLE
186 | physics Physics {
187 | density -1
188 | mass 0.2
189 | }
190 | }
191 | }
192 | ]
193 | boundingObject USE BODY
194 | physics Physics {
195 | density -1
196 | mass 2
197 | }
198 | controller ""
199 | }
200 |
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/full_project/controllers/supervisor_controller/supervisor_controller.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from deepbots.supervisor.controllers.csv_supervisor_env import CSVSupervisorEnv
3 | from PPO_agent import PPOAgent, Transition
4 | from utilities import normalize_to_range
5 |
6 |
7 | class CartPoleSupervisor(CSVSupervisorEnv):
8 | def __init__(self):
9 | super().__init__()
10 | self.observation_space = 4 # The agent has 4 inputs
11 | self.action_space = 2 # The agent can perform 2 actions
12 |
13 | self.robot = self.getFromDef("ROBOT")
14 | self.pole_endpoint = self.getFromDef("POLE_ENDPOINT")
15 | self.message_received = None # Variable to save the messages received from the robot
16 |
17 | self.episode_count = 0 # Episode counter
18 | self.episode_limit = 10000 # Max number of episodes allowed
19 | self.steps_per_episode = 200 # Max number of steps per episode
20 | self.episode_score = 0 # Score accumulated during an episode
21 | self.episode_score_list = [] # A list to save all the episode scores, used to check if task is solved
22 |
23 | def get_observations(self):
24 | # Position on x-axis, first (0) element of the getPosition vector
25 | cart_position = normalize_to_range(self.robot.getPosition()[0], -0.4, 0.4, -1.0, 1.0)
26 | # Linear velocity on x-axis
27 | cart_velocity = normalize_to_range(self.robot.getVelocity()[0], -0.2, 0.2, -1.0, 1.0, clip=True)
28 | # Angular velocity y of endpoint
29 | endpoint_velocity = normalize_to_range(self.pole_endpoint.getVelocity()[4], -1.5, 1.5, -1.0, 1.0, clip=True)
30 |
31 | # Update self.message_received received from robot, which contains pole angle
32 | self.message_received = self.handle_receiver()
33 | if self.message_received is not None:
34 | pole_angle = normalize_to_range(float(self.message_received[0]), -0.23, 0.23, -1.0, 1.0, clip=True)
35 | else:
36 | # Method is called before self.message_received is initialized
37 | pole_angle = 0.0
38 |
39 | return [cart_position, cart_velocity, pole_angle, endpoint_velocity]
40 |
41 | def get_default_observation(self):
42 | # This method just returns a zero vector as a default observation
43 | return [0.0 for _ in range(self.observation_space)]
44 |
45 | def get_reward(self, action=None):
46 | # Reward is +1 for every step the episode hasn't ended
47 | return 1
48 |
49 | def is_done(self):
50 | if self.message_received is not None:
51 | pole_angle = round(float(self.message_received[0]), 2)
52 | else:
53 | # method is called before self.message_received is initialized
54 | pole_angle = 0.0
55 | if abs(pole_angle) > 0.261799388: # more than 15 degrees off vertical (defined in radians)
56 | return True
57 |
58 | if self.episode_score > 195.0:
59 | return True
60 |
61 | cart_position = round(self.robot.getPosition()[0], 2) # Position on x-axis
62 | if abs(cart_position) > 0.39:
63 | return True
64 |
65 | return False
66 |
67 | def solved(self):
68 | if len(self.episode_score_list) > 100: # Over 100 trials thus far
69 | if np.mean(self.episode_score_list[-100:]) > 195.0: # Last 100 episodes' scores average value
70 | return True
71 | return False
72 |
73 | def get_info(self):
74 | return None
75 |
76 | def render(self, mode="human"):
77 | pass
78 |
79 |
80 | env = CartPoleSupervisor()
81 | agent = PPOAgent(env.observation_space, env.action_space)
82 |
83 | solved = False
84 | # Run outer loop until the episodes limit is reached or the task is solved
85 | while not solved and env.episode_count < env.episode_limit:
86 | observation = env.reset() # Reset robot and get starting observation
87 | env.episode_score = 0
88 |
89 | for step in range(env.steps_per_episode):
90 | # In training mode the agent samples from the probability distribution, naturally implementing exploration
91 | selected_action, action_prob = agent.work(observation, type_="selectAction")
92 | # Step the env to get the current selected_action's reward, the new observation and whether we reached
93 | # the done condition
94 | new_observation, reward, done, info = env.step([selected_action])
95 |
96 | # Save the current state transition in agent's memory
97 | trans = Transition(observation, selected_action, action_prob, reward, new_observation)
98 | agent.store_transition(trans)
99 |
100 | if done:
101 | # Save the episode's score
102 | env.episode_score_list.append(env.episode_score)
103 | agent.train_step(batch_size=step + 1)
104 | solved = env.solved() # Check whether the task is solved
105 | break
106 |
107 | env.episode_score += reward # Accumulate episode reward
108 | observation = new_observation # observation for next step is current step's new_observation
109 |
110 | print("Episode #", env.episode_count, "score:", env.episode_score)
111 | env.episode_count += 1 # Increment episode counter
112 |
113 | if not solved:
114 | print("Task is not solved, deploying agent for testing...")
115 | elif solved:
116 | print("Task is solved, deploying agent for testing...")
117 |
118 | observation = env.reset()
119 | env.episode_score = 0.0
120 | while True:
121 | selected_action, action_prob = agent.work(observation, type_="selectActionMax")
122 | observation, _, done, _ = env.step([selected_action])
123 | if done:
124 | observation = env.reset()
125 |
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/full_project/controllers/supervisor_controller/utilities.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def normalize_to_range(value, min_val, max_val, new_min, new_max, clip=False):
5 | """
6 | Normalize value to a specified new range by supplying the current range.
7 |
8 | :param value: value to be normalized
9 | :param min_val: value's min value, value ∈ [min_val, max_val]
10 | :param max_val: value's max value, value ∈ [min_val, max_val]
11 | :param new_min: normalized range min value
12 | :param new_max: normalized range max value
13 | :param clip: whether to clip normalized value to new range or not
14 | :return: normalized value ∈ [new_min, new_max]
15 | """
16 | value = float(value)
17 | min_val = float(min_val)
18 | max_val = float(max_val)
19 | new_min = float(new_min)
20 | new_max = float(new_max)
21 |
22 | if clip:
23 | return np.clip((new_max - new_min) / (max_val - min_val) * (value - max_val) + new_max, new_min, new_max)
24 | else:
25 | return (new_max - new_min) / (max_val - min_val) * (value - max_val) + new_max
26 |
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/full_project/worlds/cartpole_world.wbt:
--------------------------------------------------------------------------------
1 | #VRML_SIM R2023a utf8
2 |
3 | EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2023a/projects/objects/backgrounds/protos/TexturedBackground.proto"
4 | EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2023a/projects/objects/backgrounds/protos/TexturedBackgroundLight.proto"
5 | EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2023a/projects/objects/floors/protos/RectangleArena.proto"
6 |
7 | WorldInfo {
8 | }
9 | Viewpoint {
10 | orientation -0.1849332562219514 0.10769485053314654 0.9768323857816086 1.9029469581134206
11 | position 1.835319026945999 -4.251256239319215 2.283828860202701
12 | }
13 | TexturedBackground {
14 | }
15 | TexturedBackgroundLight {
16 | }
17 | RectangleArena {
18 | }
19 | DEF SUPERVISOR Robot {
20 | children [
21 | DEF EMITTER Emitter {
22 | }
23 | DEF RECEIVER Receiver {
24 | }
25 | ]
26 | name "supervisor"
27 | controller "supervisorController"
28 | supervisor TRUE
29 | }
30 | DEF ROBOT Robot {
31 | translation 0 0 0.04
32 | rotation 1 0 0 1.5708
33 | children [
34 | DEF EMITTER Emitter {
35 | }
36 | DEF RECEIVER Receiver {
37 | }
38 | DEF HINGE_COVER Solid {
39 | translation 0 0.03 -3.469446951953614e-18
40 | rotation 0 1 0 -1.5707953071795862
41 | children [
42 | Shape {
43 | appearance PBRAppearance {
44 | baseColor 0 0.6509803921568628 1
45 | }
46 | geometry Box {
47 | size 0.030000000000000002 0.019999999999999997 0.05
48 | }
49 | }
50 | ]
51 | name "hingeCover"
52 | }
53 | DEF BODY Shape {
54 | appearance PBRAppearance {
55 | baseColor 0.917647 0.145098 0.145098
56 | roughness 1
57 | metalness 0
58 | }
59 | geometry Box {
60 | size 0.2 0.05 0.08
61 | }
62 | }
63 | DEF WHEEL1 HingeJoint {
64 | jointParameters HingeJointParameters {
65 | position 6.723540149111039e-17
66 | axis 0 0 1
67 | anchor 0.06 0 0.05
68 | }
69 | device [
70 | RotationalMotor {
71 | name "wheel1"
72 | }
73 | ]
74 | endPoint Solid {
75 | translation 0.060001004644818584 1.3392769184169005e-05 0.050000010543824816
76 | rotation 1.767108253899599e-08 -1.7671248834016873e-08 0.9999999999999998 1.5708000430119504
77 | children [
78 | DEF WHEEL Shape {
79 | appearance PBRAppearance {
80 | baseColor 0.305882 0.898039 0.25098
81 | roughness 1
82 | metalness 0
83 | }
84 | geometry Cylinder {
85 | height 0.02
86 | radius 0.04
87 | subdivision 24
88 | }
89 | }
90 | ]
91 | boundingObject USE WHEEL
92 | physics Physics {
93 | }
94 | }
95 | }
96 | DEF WHEEL2 HingeJoint {
97 | jointParameters HingeJointParameters {
98 | position 6.670640318240083e-17
99 | axis 0 0 1
100 | anchor -0.06 0 0.05
101 | }
102 | device [
103 | RotationalMotor {
104 | name "wheel2"
105 | }
106 | ]
107 | endPoint Solid {
108 | translation -0.060001095993954395 7.616338723243336e-07 0.05000000952472374
109 | rotation 1.760965404319019e-08 -1.7609826183889798e-08 0.9999999999999997 1.5707999569897086
110 | children [
111 | USE WHEEL
112 | ]
113 | name "solid(1)"
114 | boundingObject USE WHEEL
115 | physics Physics {
116 | }
117 | }
118 | }
119 | DEF WHEEL3 HingeJoint {
120 | jointParameters HingeJointParameters {
121 | position 1.0324509589061627e-16
122 | axis 0 0 1
123 | anchor 0.06 0 -0.05
124 | }
125 | device [
126 | RotationalMotor {
127 | name "wheel3"
128 | }
129 | ]
130 | endPoint Solid {
131 | translation 0.06000100379909951 1.3392659934531789e-05 -0.05000000738328558
132 | rotation -1.7671380018270767e-08 1.7671539284228043e-08 0.9999999999999998 1.5708000430119118
133 | children [
134 | USE WHEEL
135 | ]
136 | name "solid(2)"
137 | boundingObject USE WHEEL
138 | physics Physics {
139 | }
140 | }
141 | }
142 | DEF WHEEL4 HingeJoint {
143 | jointParameters HingeJointParameters {
144 | position -2.6895122773572173e-17
145 | axis 0 0 1
146 | anchor -0.06 0 -0.05
147 | }
148 | device [
149 | RotationalMotor {
150 | name "wheel4"
151 | }
152 | ]
153 | endPoint Solid {
154 | translation -0.060001096839924606 7.615246225860199e-07 -0.05000000839401549
155 | rotation -1.761000837263908e-08 1.7610180209133882e-08 0.9999999999999997 1.5707999569897055
156 | children [
157 | USE WHEEL
158 | ]
159 | name "solid(3)"
160 | boundingObject USE WHEEL
161 | physics Physics {
162 | }
163 | }
164 | }
165 | DEF POLE HingeJoint {
166 | jointParameters HingeJointParameters {
167 | position -4.812689688184746e-16
168 | axis 0 0 1
169 | anchor 0 0.03000000000047226 0
170 | minStop -1.3
171 | maxStop 1.3
172 | }
173 | device [
174 | DEF POLE_POS_SENSOR PositionSensor {
175 | name "polePosSensor"
176 | }
177 | ]
178 | endPoint Solid {
179 | translation -0.00046553268549740846 0.5300037594690286 -5.369584240411798e-07
180 | rotation -0.5769874410964105 -0.5775313877690047 0.577531807746226 2.0949379676602047
181 | children [
182 | DEF POLE_ENDPOINT Solid {
183 | translation 0.5000000000000002 0 0
184 | scale 0.1 0.1 0.1
185 | children [
186 | DEF BALL Shape {
187 | appearance PBRAppearance {
188 | baseColor 0.03137254901960784 0.047058823529411764 1
189 | metalness 0
190 | }
191 | geometry Sphere {
192 | radius 0.1
193 | }
194 | }
195 | ]
196 | boundingObject USE BALL
197 | physics Physics {
198 | density -1
199 | mass 0.02
200 | }
201 | }
202 | DEF POLE Shape {
203 | appearance PBRAppearance {
204 | baseColor 1 0.1568627450980392 0.1568627450980392
205 | roughness 1
206 | metalness 0
207 | }
208 | geometry Box {
209 | size 1 0.020000000000000004 0.019999999999999997
210 | }
211 | }
212 | ]
213 | name "pole"
214 | boundingObject USE POLE
215 | physics Physics {
216 | density -1
217 | mass 0.2
218 | }
219 | }
220 | }
221 | ]
222 | boundingObject USE BODY
223 | physics Physics {
224 | density -1
225 | mass 2
226 | }
227 | controller "robotController"
228 | }
229 |
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/10_new_controllers_created.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/10_new_controllers_created.png
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/11_assign_supervisor_controller_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/11_assign_supervisor_controller_1.png
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/12_assign_supervisor_controller_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/12_assign_supervisor_controller_2.png
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/13_workflow_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/13_workflow_diagram.png
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/14_click_play.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/14_click_play.png
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/1_new_proj_menu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/1_new_proj_menu.png
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/2_world_settings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/2_world_settings.png
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/3_project_created.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/3_project_created.png
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/4_reload_world.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/4_reload_world.png
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/5_add_new_object.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/5_add_new_object.png
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/6_add_robot_node.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/6_add_robot_node.png
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/7_set_supervisor_true.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/7_set_supervisor_true.png
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/8_click_save_button.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/8_click_save_button.png
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/9_new_controller_menu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/9_new_controller_menu.png
--------------------------------------------------------------------------------
/emitterReceiverSchemeTutorial/images/cartPoleWorld.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/emitterReceiverSchemeTutorial/images/cartPoleWorld.gif
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/README.md:
--------------------------------------------------------------------------------
1 | # CartPole Beginner Robot-Supervisor Scheme Tutorial
2 |
3 | 
4 |
5 | This tutorial explains how to use the [*deepbots framework*](https://github.com/aidudezzz/deepbots) by setting
6 | up a simple problem. We will use the
7 | [robot-supervisor scheme](https://github.com/aidudezzz/deepbots#combined-robot-supervisor-scheme)
8 | which combines the gym environment and the robot controller in one script, forgoing the emitter and receiver communication.
9 |
10 | This tutorial can get you started for use cases that use a single robot, which should cover most needs.
11 | If you desire to set up a more complicated example that might use multiple robots or similar, refer to the
12 | [emitter-receiver tutorial](https://github.com/aidudezzz/deepbots-tutorials/tree/master/emitterReceiverSchemeTutorial)
13 | to get started.
14 |
15 | Keep in mind that the tutorial is very detailed and many parts can be completed really fast by an
16 | experienced user. The tutorial assumes no familiarity with the [Webots](https://cyberbotics.com/) simulator.
17 |
18 | We will recreate the [CartPole](https://www.gymlibrary.dev/environments/classic_control/cart_pole/) problem step-by-step in
19 | [Webots](https://cyberbotics.com/), and solve it with the
20 | [Proximal Policy Optimization](https://openai.com/blog/openai-baselines-ppo/) (PPO)
21 | Reinforcement Learning (RL) algorithm, using [PyTorch](https://pytorch.org/) as our neural network backend library.
22 |
23 | We will focus on creating the project, the world and the controller scripts and how to use the *deepbots framework*.
24 | For the purposes of the tutorial, a basic implementation of the PPO algorithm, together with a custom CartPole robot
25 | node contained within the world are supplied. For guides on how to construct a custom robot, please visit the official Webots
26 | [tutorial](https://cyberbotics.com/doc/guide/tutorial-6-4-wheels-robot).
27 |
28 | You can check the complete example [here](/robotSupervisorSchemeTutorial/full_project) with all the scripts and nodes
29 | used in this tutorial.
30 | The CartPole example is available (with some added code for plots/monitoring and keyboard controls) on the
31 | [deepworlds](https://github.com/aidudezzz/deepworlds/) repository.
32 |
33 | ## Prerequisites
34 |
35 | Before starting, several prerequisites should be met. Follow the [installation section on the deepbots framework main
36 | repository](https://github.com/aidudezzz/deepbots#installation).
37 |
38 | For this tutorial you will also need to [install PyTorch](https://pytorch.org/get-started/locally/)
39 | (no CUDA/GPU support needed for this simple example as the very small neural networks used are sufficient to solve the task).
40 |
41 |
42 | ## CartPole
43 | ### Creating the project
44 |
45 | Now we are ready to start working on the *CartPole* problem. First of all, we should create a new project.
46 |
47 | 1. Open Webots and on the menu bar, click *"File -> New -> New Project Directory..."*\
48 | 
49 | 2. Select a directory of your choice
50 | 3. On world settings **all** boxes should be ticked\
51 | 
52 | 4. Give your world a name, e.g. "cartpole_world.wbt"
53 | 5. Press Finish
54 |
55 | You should end up with:\
56 | 
57 |
58 |
59 | ### Adding a *supervisor robot* node in the world
60 |
61 | First of all we will download the *CartPole robot node* definition that is supplied for the purposes of the tutorial,
62 | we will later add it into the world.
63 |
64 | 1. Right-click on
65 | [this link](https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/master/robotSupervisorSchemeTutorial/full_project/controllers/robot_supervisor_controller/cartpole_robot_definition.txt)
66 | and click *Save link as...* to download the CartPole robot definition
67 | 3. Save the .txt file in a directory of your choice
68 | 4. Navigate to the directory and open the downloaded file with a text editor
69 | 5. Select everything and copy it
70 |
71 | Now we need to add the *CartPole robot* into the world:
72 |
73 | 1. Navigate to your project's directory, open `/worlds` and edit the `.wbt` world file with a text editor
74 | 2. Navigate to the end of the file and paste the contents of the `cartpole_robot_definition.txt` file we downloaded earlier
75 | 3. Now all you need to do is reload the world and the robot will appear in it!
76 | 
77 |
78 | Ignore the warning that appears in the console as we will later add a *robot controller* script to control the robot.
79 |
80 | Now that the project and the starting world are created and the robot added to the world, we need to also give *supervisor*
81 | privileges to the robot, so it can get various values from the simulation and also control it.
82 | This will be done through the *robot controller* script which we will add later. Through this script we will be able to
83 | handle several aspects of the simulation needed for RL, but also control the robot with the actions produced by the RL agent.
84 |
85 | Now let's go ahead and give the *supervisor* privileges to the robot:
86 |
87 | _(Make sure the simulation is stopped and reset to its original state, by pressing the pause button and then the reset button)_
88 |
89 | 1. Double-click on the new *Robot* node to expand it
90 | 2. Scroll down to find the *supervisor* field and set it to TRUE\
91 | 
92 | 3. Click *Save*\
93 | \
94 | *If the save button is grayed out, move the camera a bit in the 3D view and it should be enabled*
95 |
96 |
97 | ### Adding the controllers
98 |
99 | Now we will create the controller script needed that contains the environment and the robot controls.
100 | Then we are going to assign the *robot controller* script to the *robot* node created before.
101 |
102 | Creating the *robot supervisor controller* script:
103 | 1. On the *menu bar*, click *"File -> New -> New Robot Controller..."*\
104 | 
105 | 2. On *Language selection*, select *Python*
106 | 3. Give it the name "*robot_supervisor_controller*"
107 | 4. Press *Finish*
108 |
109 | *If you are using an external IDE:
110 | 1. Un-tick the "open ... in Text Editor" boxes and press *Finish*
111 | 2. Navigate to the project directory, inside the *controllers/controllerName/* directory
112 | 3. Open the controller script with your IDE
113 |
114 | The new Python controller script should be created and opened in Webots text editor looking like this:\
115 | 
116 |
117 | Assigning the *robotSupervisorController* to the *robot* node *controller* field:
118 | 1. Expand the *robot* node and scroll down to find the *controller* field
119 | 2. Click on the *controller* field and press the "*Select...*" button below\
120 | 
121 | 3. Find the "*robot supervisor controller*" controller from the list and click it\
122 | 
123 | 4. Click *OK*
124 | 5. Click *Save*
125 |
126 | ### Code overview
127 |
128 | Before delving into writing code, we take a look at the general workflow of the framework. We will create a class
129 | that inherits a *deepbots framework* class and write implementations for several key methods, specific for the
130 | *CartPole* problem.
131 |
132 | We will be implementing the basic methods *get_observations*, *get_default_observation*, *get_reward*, *is_done* and *solved*,
133 | used for RL, based on the [Gym](https://www.gymlibrary.dev/) framework logic, that will be contained in the
134 | *robot supervisor controller*.
135 | These methods will compose the *environment* for the RL algorithm. Within the *robot supervisor controller*, below the
136 | environment class we will also add the RL *agent* that will receive *observations* and output *actions* and the
137 | RL training loop.
138 |
139 | The *robot supervisor controller* will also gather data from the *robot's* sensors and pack it to compose the *observation*
140 | for the agent using the *get_observations* method that we will implement. Then, using the *observation* the *agent* will
141 | perform a forward pass and return an *action*. Then the *robot supervisor controller* will use the *action* with the
142 | *apply_action* method, which will perform the *action* on the *robot*.
143 | This closes the loop that repeats until a termination condition is met, defined in the *is_done* method.
144 |
145 |
146 | ### Writing the script
147 |
148 | Now we are ready to start writing the *robot supervisor controller* script.
149 | It is recommended to delete the contents of the script that were automatically generated.
150 |
151 | ### Robot supervisor controller script
152 |
153 | In this script we will import the *RobotSupervisorEnv* class from the *deepbots framework* and inherit it into our own
154 | *CartpoleRobot* class. Then, we are going to implement the various basic framework methods:
155 | 1. *get_observations* which will create the *observation* for our agent in each step
156 | 2. *get_default_observation* which is used by the *reset* method that the framework implements
157 | 3. *get_reward* which will return the reward for agent for each step
158 | 4. *is_done* which will look for the episode done condition
159 | 5. *solved* which will look for a condition that shows that the agent is fully trained and able to solve the problem
160 | adequately (note that this method is not required by the framework, we just add it for convenience)
161 | 6. *apply_action* which will take the action provided by the agent and apply it to the robot by setting its
162 | motors' speeds
163 | 7. dummy implementations for *get_info* and *render* required to have a complete Gym environment
164 |
165 | Before we start coding, we should add two scripts, one that contains the RL PPO agent,
166 | and the other containing utility functions that we are going to need.
167 |
168 | Save both files inside the project directory, under controllers/robot_supervisor_controller/
169 | 1. Right-click on [this link](https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/master/robotSupervisorSchemeTutorial/full_project/controllers/robot_supervisor_controller/PPO_agent.py) and click *Save link as...* to download the PPO agent
170 | 2. Right-click on [this link](https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/master/robotSupervisorSchemeTutorial/full_project/controllers/robot_supervisor_controller/utilities.py) and click *Save link as...* to download the utilities script
171 |
172 | Starting with the imports, first we are going to need the *RobotSupervisorEnv* class and then
173 | a couple of utility functions, the PPO agent implementation, the gym spaces to define the action and observation spaces
174 | and finally numpy, which is installed as a dependency of the libraries we already installed.
175 | ```python
176 | from deepbots.supervisor.controllers.robot_supervisor_env import RobotSupervisorEnv
177 | from utilities import normalize_to_range
178 | from PPO_agent import PPOAgent, Transition
179 |
180 | from gym.spaces import Box, Discrete
181 | import numpy as np
182 | ```
183 | Then we define our class, inheriting the imported one.
184 | ```python
185 | class CartpoleRobot(RobotSupervisorEnv):
186 | def __init__(self):
187 | super().__init__()
188 | ```
189 | Then we set up the observation and action spaces.
190 |
191 | The observation space makes up the agent's (or the neural network's) input size and
192 | values and is defined by the table below:
193 |
194 | Num | Observation | Min | Max
195 | ----|-------------|-----|----
196 | 0 | Cart Position x axis | -0.4 | 0.4
197 | 1 | Cart Velocity | -Inf | Inf
198 | 2 | Pole Angle | -1.3 rad | 1.3 rad
199 | 3 | Pole Velocity at Tip | -Inf | Inf
200 |
201 | The action space defines the outputs of the neural network, which are 2. One for the forward movement
202 | and one for the backward movement of the robot.
203 |
204 | ```python
205 | self.observation_space = Box(low=np.array([-0.4, -np.inf, -1.3, -np.inf]),
206 | high=np.array([0.4, np.inf, 1.3, np.inf]),
207 | dtype=np.float64)
208 | self.action_space = Discrete(2)
209 | ```
210 | We then get a reference to the robot node, initialize the pole sensor, get a reference for the pole endpoint and
211 | initialize the wheel motors.
212 | ```python
213 | self.robot = self.getSelf() # Grab the robot reference from the supervisor to access various robot methods
214 | self.position_sensor = self.getDevice("polePosSensor")
215 | self.position_sensor.enable(self.timestep)
216 | self.pole_endpoint = self.getFromDef("POLE_ENDPOINT")
217 |
218 | self.wheels = []
219 | for wheel_name in ['wheel1', 'wheel2', 'wheel3', 'wheel4']:
220 | wheel = self.getDevice(wheel_name) # Get the wheel handle
221 | wheel.setPosition(float('inf')) # Set starting position
222 | wheel.setVelocity(0.0) # Zero out starting velocity
223 | self.wheels.append(wheel)
224 | ```
225 | Finally, we initialize several variables used during training. Note that the `self.steps_per_episode` is set to `200`
226 | based on the problem's definition. This concludes the `__init__()` method.
227 | ```python
228 | self.steps_per_episode = 200 # Max number of steps per episode
229 | self.episode_score = 0 # Score accumulated during an episode
230 | self.episode_score_list = [] # A list to save all the episode scores, used to check if task is solved
231 | ```
232 |
233 | After the initialization we start implementing the various methods needed. We start with the `get_observations()`
234 | method, which creates the agent's input from various information observed from the Webots world and returns it. We use
235 | the `normalize_to_range()` utility method to normalize the values into the `[-1.0, 1.0]` range.
236 |
237 | We will start by getting the cartpole robot nodes position and velocity on the x-axis. The x-axis is the direction of
238 | its forward/backward movement. We then read the position sensor value that returns the angle off vertical of the pole.
239 | Finally, we get the pole tip velocity from the poleEndpoint node we defined earlier. The values are packed in a list
240 | and returned.
241 |
242 | (mind the indentation, the following methods belong to the *CartpoleRobot* class)
243 | ```python
244 | def get_observations(self):
245 | # Position on x-axis
246 | cart_position = normalize_to_range(self.robot.getPosition()[0], -0.4, 0.4, -1.0, 1.0)
247 | # Linear velocity on x-axis
248 | cart_velocity = normalize_to_range(self.robot.getVelocity()[0], -0.2, 0.2, -1.0, 1.0, clip=True)
249 | # Pole angle off vertical
250 | pole_angle = normalize_to_range(self.position_sensor.getValue(), -0.23, 0.23, -1.0, 1.0, clip=True)
251 | # Angular velocity y of endpoint
252 | endpoint_velocity = normalize_to_range(self.pole_endpoint.getVelocity()[4], -1.5, 1.5, -1.0, 1.0, clip=True)
253 |
254 | return [cart_position, cart_velocity, pole_angle, endpoint_velocity]
255 | ```
256 |
257 | Let's also define the *get_defaults_observation()* that is used internally by deepbots when a new training episode starts:
258 | ```python
259 | def get_default_observation(self):
260 | # This method just returns a zero vector as a default observation
261 | return [0.0 for _ in range(self.observation_space.shape[0])]
262 | ```
263 |
264 | Now for something simpler, we will define the `get_reward()` method, which simply returns
265 | `1` for each step. Usually reward functions are more elaborate, but for this problem it is simply
266 | defined as the agent getting a +1 reward for each step it manages to keep the pole from falling.
267 |
268 | ```python
269 | def get_reward(self, action=None):
270 | return 1
271 | ```
272 | Moving on, we define the *is_done()* method, which contains the episode termination conditions:
273 | - Episode terminates if episode score is over 195
274 | - Episode terminates if the pole has fallen beyond an angle which can be realistically recovered (+-15 degrees)
275 | - Episode terminates if the robot hit the walls by moving into them, which is calculated based on its position on x-axis
276 | ```python
277 | def is_done(self):
278 | if self.episode_score > 195.0:
279 | return True
280 |
281 | pole_angle = round(self.position_sensor.getValue(), 2)
282 | if abs(pole_angle) > 0.261799388: # more than 15 degrees off vertical (defined in radians)
283 | return True
284 |
285 | cart_position = round(self.robot.getPosition()[0], 2) # Position on x-axis
286 | if abs(cart_position) > 0.39:
287 | return True
288 |
289 | return False
290 | ```
291 | We separate the *solved* condition into another method, the `solved()` method, because it requires different handling.
292 | The *solved* condition depends on the agent completing consecutive episodes successfully, consistently. We measure this,
293 | by taking the average episode score of the last 100 episodes and checking if it's over 195.
294 | ```python
295 | def solved(self):
296 | if len(self.episode_score_list) > 100: # Over 100 trials thus far
297 | if np.mean(self.episode_score_list[-100:]) > 195.0: # Last 100 episodes' scores average value
298 | return True
299 | return False
300 | ```
301 |
302 | To complete the Gym environment, we add dummy implementations of `get_info()` and `render()` methods,
303 | because in this example they are not actually used, but are required by the framework and gym in the background.
304 |
305 | ```python
306 | def get_info(self):
307 | return None
308 |
309 | def render(self, mode='human'):
310 | pass
311 | ```
312 |
313 | Lastly, this controller actually controls the robot itself, so we need to define the `apply_action()` method which gets the
314 | action that the agent outputs and turns it into physical motion of the robot. For this tutorial we use a discrete action space,
315 | and thus the agent outputs an integer that is either `0` or `1` denoting forward or backward motion using the robot's motors.
316 | ```python
317 | def apply_action(self, action):
318 | action = int(action[0])
319 |
320 | if action == 0:
321 | motor_speed = 5.0
322 | else:
323 | motor_speed = -5.0
324 |
325 | for i in range(len(self.wheels)):
326 | self.wheels[i].setPosition(float('inf'))
327 | self.wheels[i].setVelocity(motor_speed)
328 | ```
329 | That's the *CartpoleRobot* class complete that now contains all required methods to run an RL training loop and also get
330 | information from the simulation and the robot sensors, but also control the robot!
331 | Now all that's left, is to add (outside the class scope, mind the indentation) the code that runs the RL loop.
332 |
333 | ### RL Training Loop
334 |
335 | Finally, it all comes together inside the RL training loop. Now we initialize the RL agent and create the
336 | *CartPoleSupervisor* class object, i.e. the RL environment, with which the agent gets trained to solve the problem,
337 | maximizing the reward received by our reward function and achieve the solved condition defined.
338 |
339 | **Note that popular frameworks like [stable-baselines3](https://stable-baselines3.readthedocs.io/en/master/) contain the
340 | RL training loop within their *learn* method or similar. Frameworks like *sb3* are fully compatible with *deepbots*, as
341 | *deepbots* defines Gym environments and interfaces them with Webots saving you a lot of trouble, which then can be
342 | supplied to frameworks like *sb3* or any other RL framework of your choice.**
343 |
344 | For this tutorial we follow a more hands-on approach to get a better grasp of how RL works. Also feel free to check out
345 | the simple PPO agent implementation we provide.
346 |
347 | First we create a supervisor object and then initialize the PPO agent, providing it with the observation and action
348 | spaces. Note that we extract the number 4 as _number_of_inputs_ and number 2 as _number_of_actor_outputs_ from the gym spaces,
349 | because the algorithm implementation expects integers for these arguments to initialize the neural network's input and
350 | output neurons.
351 |
352 | ```python
353 | env = CartpoleRobot()
354 | agent = PPOAgent(number_of_inputs=env.observation_space.shape[0], number_of_actor_outputs=env.action_space.n)
355 | ```
356 |
357 | Then we set the `solved` flag to `false`. This flag is used to terminate the training loop when the solved condition is met.
358 | ```python
359 | solved = False
360 | ```
361 | Before setting up the RL loop, we define an episode counter and a limit for the number of episodes to run.
362 | ```python
363 | episode_count = 0
364 | episode_limit = 2000
365 | ```
366 | Now we define the outer loop which runs the number of episodes we just defined and resets the world to get the
367 | starting observation. We also reset the episode score to zero.
368 |
369 | _(please be mindful of the indentation on the following code, because we are about to define several levels of nested
370 | loops and ifs)_
371 | ```python
372 | # Run outer loop until the episodes limit is reached or the task is solved
373 | while not solved and episode_count < episode_limit:
374 | observation = env.reset() # Reset robot and get starting observation
375 | env.episode_score = 0
376 | ```
377 |
378 | Inside the outer loop defined above we define the inner loop which runs for the course of an episode. This loop
379 | runs for a maximum number of steps defined by the problem. Here, the RL agent - environment loop takes place.
380 |
381 | We start by calling the `agent.work()` method, by providing it with the current observation, which for the first step
382 | is the zero vector returned by the `reset()` method, through the `get_default_observation()` method we defined.
383 | The `reset()` method actually uses the `get_default_observation()` method we defined earlier.
384 | The `work()` method implements the forward pass of the agent's actor neural network, providing us with the next action.
385 | As the comment suggests the PPO algorithm implements exploration by sampling the probability distribution the agent
386 | outputs from its actor's softmax output layer.
387 |
388 | ```python
389 | for step in range(env.steps_per_episode):
390 | # In training mode the agent samples from the probability distribution, naturally implementing exploration
391 | selected_action, action_prob = agent.work(observation, type_="selectAction")
392 | ```
393 |
394 | The next part contains the call to the `step()` method which is defined internally in deepbots. This method calls most of
395 | the methods we implemented earlier (`get_observation()`, `get_reward()`, `is_done()` and `get_info()`), steps the Webots
396 | controller and applies the action that the agent selected on the robot with the `apply_action()` method we defined.
397 | Step returns the new observation, the reward for the previous action and whether the episode is
398 | terminated (info is not implemented in this example).
399 |
400 | Then, we create the `Transition`, which is a named tuple that contains, as the name suggests, the transition between
401 | the previous `observation` (or `state`) to the `new_observation` (or `new_state`). This is needed by the agent for its training
402 | procedure, so we call the agent's `store_transition()` method to save it to its buffer. Most RL algorithms require a
403 | similar procedure and have similar methods to do it.
404 |
405 | ```python
406 | # Step the supervisor to get the current selected_action's reward, the new observation and whether we reached
407 | # the done condition
408 | new_observation, reward, done, info = env.step([selected_action])
409 |
410 | # Save the current state transition in agent's memory
411 | trans = Transition(observation, selected_action, action_prob, reward, new_observation)
412 | agent.store_transition(trans)
413 | ```
414 |
415 | Finally, we check whether the episode is terminated and if it is, we save the episode score, run a training step
416 | for the agent giving the number of steps taken in the episode as batch size, and check whether the problem is solved
417 | via the `solved()` method and break.
418 |
419 | If not, we add the step reward to the `episode_score` accumulator, save the `new_observation` as `observation` and loop
420 | onto the next episode step.
421 |
422 | **Note that in more realistic training procedures, the training step might not run for each episode. Depending on the problem
423 | you might need to run the training procedure multiple times per episode or once per multiple episodes. This is set as `n_steps`
424 | or similar in frameworks like *sb3*. Moreover, changing the batch size along with `n_steps` might influence greatly the
425 | training results and whether the agent actually converges to a solution, and consequently are crucial parameters.**
426 |
427 | ```python
428 | if done:
429 | # Save the episode's score
430 | env.episode_score_list.append(env.episode_score)
431 | agent.train_step(batch_size=step + 1)
432 | solved = env.solved() # Check whether the task is solved
433 | break
434 |
435 | env.episode_score += reward # Accumulate episode reward
436 | observation = new_observation # observation for next step is current step's new_observation
437 | ```
438 |
439 | This is the inner loop complete, and now we add a print statement and increment the episode counter to finalize the outer
440 | loop.
441 |
442 | (note that the following code snippet is part of the outer loop)
443 | ```python
444 | print("Episode #", episode_count, "score:", env.episode_score)
445 | episode_count += 1 # Increment episode counter
446 | ```
447 |
448 | With the outer loop complete, this completes the training procedure. Now all that's left is the testing loop which is a
449 | barebones, simpler version of the training loop. First we print a message on whether the task is solved or not (i.e.
450 | reached the episode limit without satisfying the solved condition) and call the `reset()` method. Then, we create a
451 | `while True` loop that runs the agent's forward method, but this time selecting the action with the max probability
452 | out of the actor's softmax output, eliminating exploration/randomness. Finally, the `step()` method is called, but
453 | this time we keep only the observation it returns to keep the environment - agent loop running. If the *done* flag is true, we
454 | reset the environment to start over.
455 |
456 | ```python
457 | if not solved:
458 | print("Task is not solved, deploying agent for testing...")
459 | elif solved:
460 | print("Task is solved, deploying agent for testing...")
461 |
462 | observation = env.reset()
463 | env.episode_score = 0.0
464 | while True:
465 | selected_action, action_prob = agent.work(observation, type_="selectActionMax")
466 | observation, _, done, _ = env.step([selected_action])
467 | if done:
468 | observation = env.reset()
469 | ```
470 |
471 | ### Conclusion
472 |
473 | Now with the coding done you can click on the *Run the simulation* button and watch the training run!
474 |
475 | \
476 | Webots allows to speed up the simulation, even run it without graphics, so the training shouldn't take long, at
477 | least to see the agent becoming visibly better at moving under the pole to balance it. It takes a while for it to
478 | achieve the *solved* condition, but when it does, it becomes quite good at balancing the pole! You can even apply forces
479 | in real time by pressing Alt - left-click and drag on the robot or the pole.
480 |
481 | That's it for this tutorial! :)
482 |
483 | **_We welcome you to leave comments and feedback for the tutorial on the relevant
484 | [discussions page](https://github.com/aidudezzz/deepbots-tutorials/discussions/12?sort=new) or to open an issue for any
485 | problem you find in it!_**
486 |
487 | 
488 |
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/full_project/controllers/robot_supervisor_controller/PPO_agent.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | import torch.optim as optim
4 | from torch.distributions import Categorical
5 | from torch import from_numpy, no_grad, save, load, tensor, clamp
6 | from torch import float as torch_float
7 | from torch import long as torch_long
8 | from torch import min as torch_min
9 | from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler
10 | import numpy as np
11 | from torch import manual_seed
12 | from collections import namedtuple
13 |
14 | Transition = namedtuple('Transition', ['state', 'action', 'a_log_prob', 'reward', 'next_state'])
15 |
16 |
17 | class PPOAgent:
18 | """
19 | PPOAgent implements the PPO RL algorithm (https://arxiv.org/abs/1707.06347).
20 | It works with a set of discrete actions.
21 | It uses the Actor and Critic neural network classes defined below.
22 | """
23 |
24 | def __init__(self, number_of_inputs, number_of_actor_outputs, clip_param=0.2, max_grad_norm=0.5, ppo_update_iters=5,
25 | batch_size=8, gamma=0.99, use_cuda=False, actor_lr=0.001, critic_lr=0.003, seed=None):
26 | super().__init__()
27 | if seed is not None:
28 | manual_seed(seed)
29 |
30 | # Hyper-parameters
31 | self.clip_param = clip_param
32 | self.max_grad_norm = max_grad_norm
33 | self.ppo_update_iters = ppo_update_iters
34 | self.batch_size = batch_size
35 | self.gamma = gamma
36 | self.use_cuda = use_cuda
37 |
38 | # models
39 | self.actor_net = Actor(number_of_inputs, number_of_actor_outputs)
40 | self.critic_net = Critic(number_of_inputs)
41 |
42 | if self.use_cuda:
43 | self.actor_net.cuda()
44 | self.critic_net.cuda()
45 |
46 | # Create the optimizers
47 | self.actor_optimizer = optim.Adam(self.actor_net.parameters(), actor_lr)
48 | self.critic_net_optimizer = optim.Adam(self.critic_net.parameters(), critic_lr)
49 |
50 | # Training stats
51 | self.buffer = []
52 |
53 | def work(self, agent_input, type_="simple"):
54 | """
55 | type_ == "simple"
56 | Implementation for a simple forward pass.
57 | type_ == "selectAction"
58 | Implementation for the forward pass, that returns a selected action according to the probability
59 | distribution and its probability.
60 | type_ == "selectActionMax"
61 | Implementation for the forward pass, that returns the max selected action.
62 | """
63 | agent_input = from_numpy(np.array(agent_input)).float().unsqueeze(0) # Add batch dimension with unsqueeze
64 | if self.use_cuda:
65 | agent_input = agent_input.cuda()
66 | with no_grad():
67 | action_prob = self.actor_net(agent_input)
68 |
69 | if type_ == "simple":
70 | output = [action_prob[0][i].data.tolist() for i in range(len(action_prob[0]))]
71 | return output
72 | elif type_ == "selectAction":
73 | c = Categorical(action_prob)
74 | action = c.sample()
75 | return action.item(), action_prob[:, action.item()].item()
76 | elif type_ == "selectActionMax":
77 | return np.argmax(action_prob).item(), 1.0
78 | else:
79 | raise Exception("Wrong type in agent.work(), returning input")
80 |
81 | def get_value(self, state):
82 | """
83 | Gets the value of the current state according to the critic model.
84 |
85 | :param state: The current state
86 | :return: state's value
87 | """
88 | state = from_numpy(state)
89 | with no_grad():
90 | value = self.critic_net(state)
91 | return value.item()
92 |
93 | def save(self, path):
94 | """
95 | Save actor and critic models in the path provided.
96 |
97 | :param path: path to save the models
98 | :type path: str
99 | """
100 | save(self.actor_net.state_dict(), path + '_actor.pkl')
101 | save(self.critic_net.state_dict(), path + '_critic.pkl')
102 |
103 | def load(self, path):
104 | """
105 | Load actor and critic models from the path provided.
106 |
107 | :param path: path where the models are saved
108 | :type path: str
109 | """
110 | actor_state_dict = load(path + '_actor.pkl')
111 | critic_state_dict = load(path + '_critic.pkl')
112 | self.actor_net.load_state_dict(actor_state_dict)
113 | self.critic_net.load_state_dict(critic_state_dict)
114 |
115 | def store_transition(self, transition):
116 | """
117 | Stores a transition in the buffer to be used later.
118 |
119 | :param transition: contains state, action, action_prob, reward, next_state
120 | :type transition: namedtuple('Transition', ['state', 'action', 'a_log_prob', 'reward', 'next_state'])
121 | """
122 | self.buffer.append(transition)
123 |
124 | def train_step(self, batch_size=None):
125 | """
126 | Performs a training step or update for the actor and critic models, based on transitions gathered in the
127 | buffer. It then resets the buffer.
128 | If provided with a batch_size, this is used instead of default self.batch_size
129 |
130 | :param: batch_size: int
131 | :return: None
132 | """
133 | # Default behaviour waits for buffer to collect at least one batch_size of transitions
134 | if batch_size is None:
135 | if len(self.buffer) < self.batch_size:
136 | return
137 | batch_size = self.batch_size
138 |
139 | # Extract states, actions, rewards and action probabilities from transitions in buffer
140 | state = tensor([t.state for t in self.buffer], dtype=torch_float)
141 | action = tensor([t.action for t in self.buffer], dtype=torch_long).view(-1, 1)
142 | reward = [t.reward for t in self.buffer]
143 | old_action_log_prob = tensor([t.a_log_prob for t in self.buffer], dtype=torch_float).view(-1, 1)
144 |
145 | # Unroll rewards
146 | R = 0
147 | Gt = []
148 | for r in reward[::-1]:
149 | R = r + self.gamma * R
150 | Gt.insert(0, R)
151 | Gt = tensor(Gt, dtype=torch_float)
152 |
153 | # Send everything to cuda if used
154 | if self.use_cuda:
155 | state, action, old_action_log_prob = state.cuda(), action.cuda(), old_action_log_prob.cuda()
156 | Gt = Gt.cuda()
157 |
158 | # Repeat the update procedure for ppo_update_iters
159 | for _ in range(self.ppo_update_iters):
160 | # Create randomly ordered batches of size batch_size from buffer
161 | for index in BatchSampler(SubsetRandomSampler(range(len(self.buffer))), batch_size, False):
162 | # Calculate the advantage at each step
163 | Gt_index = Gt[index].view(-1, 1)
164 | V = self.critic_net(state[index])
165 | delta = Gt_index - V
166 | advantage = delta.detach()
167 |
168 | # Get the current probabilities
169 | # Apply past actions with .gather()
170 | action_prob = self.actor_net(state[index]).gather(1, action[index]) # new policy
171 |
172 | # PPO
173 | ratio = (action_prob / old_action_log_prob[index]) # Ratio between current and old policy probabilities
174 | surr1 = ratio * advantage
175 | surr2 = clamp(ratio, 1 - self.clip_param, 1 + self.clip_param) * advantage
176 |
177 | # update actor network
178 | action_loss = -torch_min(surr1, surr2).mean() # MAX->MIN descent
179 | self.actor_optimizer.zero_grad() # Delete old gradients
180 | action_loss.backward() # Perform backward step to compute new gradients
181 | nn.utils.clip_grad_norm_(self.actor_net.parameters(), self.max_grad_norm) # Clip gradients
182 | self.actor_optimizer.step() # Perform training step based on gradients
183 |
184 | # update critic network
185 | value_loss = F.mse_loss(Gt_index, V)
186 | self.critic_net_optimizer.zero_grad()
187 | value_loss.backward()
188 | nn.utils.clip_grad_norm_(self.critic_net.parameters(), self.max_grad_norm)
189 | self.critic_net_optimizer.step()
190 |
191 | # After each training step, the buffer is cleared
192 | del self.buffer[:]
193 |
194 |
195 | class Actor(nn.Module):
196 | def __init__(self, number_of_inputs, number_of_outputs):
197 | super(Actor, self).__init__()
198 | self.fc1 = nn.Linear(number_of_inputs, 10)
199 | self.fc2 = nn.Linear(10, 10)
200 | self.action_head = nn.Linear(10, number_of_outputs)
201 |
202 | def forward(self, x):
203 | x = F.relu(self.fc1(x))
204 | x = F.relu(self.fc2(x))
205 | action_prob = F.softmax(self.action_head(x), dim=1)
206 | return action_prob
207 |
208 |
209 | class Critic(nn.Module):
210 | def __init__(self, number_of_inputs):
211 | super(Critic, self).__init__()
212 | self.fc1 = nn.Linear(number_of_inputs, 10)
213 | self.fc2 = nn.Linear(10, 10)
214 | self.state_value = nn.Linear(10, 1)
215 |
216 | def forward(self, x):
217 | x = F.relu(self.fc1(x))
218 | x = F.relu(self.fc2(x))
219 | value = self.state_value(x)
220 | return value
221 |
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/full_project/controllers/robot_supervisor_controller/cartpole_robot_definition.txt:
--------------------------------------------------------------------------------
1 | DEF ROBOT Robot {
2 | translation 0 0 0.04
3 | rotation 1 0 0 1.5708
4 | children [
5 | DEF HINGE_COVER Solid {
6 | translation 0 0.03 -3.469446951953614e-18
7 | rotation 0 1 0 -1.5707953071795862
8 | children [
9 | Shape {
10 | appearance PBRAppearance {
11 | baseColor 0 0.6509803921568628 1
12 | }
13 | geometry Box {
14 | size 0.030000000000000002 0.019999999999999997 0.05
15 | }
16 | }
17 | ]
18 | name "hingeCover"
19 | }
20 | DEF BODY Shape {
21 | appearance PBRAppearance {
22 | baseColor 0.917647 0.145098 0.145098
23 | roughness 1
24 | metalness 0
25 | }
26 | geometry Box {
27 | size 0.2 0.05 0.08
28 | }
29 | }
30 | DEF WHEEL1 HingeJoint {
31 | jointParameters HingeJointParameters {
32 | position 6.723540149111039e-17
33 | axis 0 0 1
34 | anchor 0.06 0 0.05
35 | }
36 | device [
37 | RotationalMotor {
38 | name "wheel1"
39 | }
40 | ]
41 | endPoint Solid {
42 | translation 0.060001004644818584 1.339276918416963e-05 0.050000010543824816
43 | rotation 1.7671082538995992e-08 -1.7671248834016876e-08 0.9999999999999998 1.5708000430119498
44 | children [
45 | DEF WHEEL Shape {
46 | appearance PBRAppearance {
47 | baseColor 0.305882 0.898039 0.25098
48 | roughness 1
49 | metalness 0
50 | }
51 | geometry Cylinder {
52 | height 0.02
53 | radius 0.04
54 | subdivision 24
55 | }
56 | }
57 | ]
58 | boundingObject USE WHEEL
59 | physics Physics {
60 | }
61 | }
62 | }
63 | DEF WHEEL2 HingeJoint {
64 | jointParameters HingeJointParameters {
65 | position 6.670640318240083e-17
66 | axis 0 0 1
67 | anchor -0.06 0 0.05
68 | }
69 | device [
70 | RotationalMotor {
71 | name "wheel2"
72 | }
73 | ]
74 | endPoint Solid {
75 | translation -0.060001095993954395 7.616338723251721e-07 0.05000000952472374
76 | rotation 1.76096540431902e-08 -1.7609826183889808e-08 0.9999999999999997 1.5707999569897093
77 | children [
78 | USE WHEEL
79 | ]
80 | name "solid(1)"
81 | boundingObject USE WHEEL
82 | physics Physics {
83 | }
84 | }
85 | }
86 | DEF WHEEL3 HingeJoint {
87 | jointParameters HingeJointParameters {
88 | position 1.0324509589061627e-16
89 | axis 0 0 1
90 | anchor 0.06 0 -0.05
91 | }
92 | device [
93 | RotationalMotor {
94 | name "wheel3"
95 | }
96 | ]
97 | endPoint Solid {
98 | translation 0.06000100379909951 1.3392659934531687e-05 -0.05000000738328558
99 | rotation -1.7671380018270767e-08 1.7671539284228046e-08 0.9999999999999998 1.5708000430119113
100 | children [
101 | USE WHEEL
102 | ]
103 | name "solid(2)"
104 | boundingObject USE WHEEL
105 | physics Physics {
106 | }
107 | }
108 | }
109 | DEF WHEEL4 HingeJoint {
110 | jointParameters HingeJointParameters {
111 | position -2.6895122773572173e-17
112 | axis 0 0 1
113 | anchor -0.06 0 -0.05
114 | }
115 | device [
116 | RotationalMotor {
117 | name "wheel4"
118 | }
119 | ]
120 | endPoint Solid {
121 | translation -0.060001096839924606 7.615246225854488e-07 -0.05000000839401549
122 | rotation -1.761000837263908e-08 1.761018020913388e-08 0.9999999999999997 1.5707999569897055
123 | children [
124 | USE WHEEL
125 | ]
126 | name "solid(3)"
127 | boundingObject USE WHEEL
128 | physics Physics {
129 | }
130 | }
131 | }
132 | DEF POLE HingeJoint {
133 | jointParameters HingeJointParameters {
134 | position -4.812689688184746e-16
135 | axis 0 0 1
136 | anchor 0 0.03000000000047226 0
137 | minStop -1.3
138 | maxStop 1.3
139 | }
140 | device [
141 | DEF POLE_POS_SENSOR PositionSensor {
142 | name "polePosSensor"
143 | }
144 | ]
145 | endPoint Solid {
146 | translation -0.0004655326854983616 0.5300037594690288 -5.369584240411801e-07
147 | rotation -0.57698744109641 -0.577531387769005 0.5775318077462263 2.0949379676602047
148 | children [
149 | DEF POLE_ENDPOINT Solid {
150 | translation 0.5000000000000002 0 0
151 | scale 0.1 0.1 0.1
152 | children [
153 | DEF BALL Shape {
154 | appearance PBRAppearance {
155 | baseColor 0.03137254901960784 0.047058823529411764 1
156 | metalness 0
157 | }
158 | geometry Sphere {
159 | radius 0.1
160 | }
161 | }
162 | ]
163 | boundingObject USE BALL
164 | physics Physics {
165 | density -1
166 | mass 0.02
167 | }
168 | }
169 | DEF POLE Shape {
170 | appearance PBRAppearance {
171 | baseColor 1 0.1568627450980392 0.1568627450980392
172 | roughness 1
173 | metalness 0
174 | }
175 | geometry Box {
176 | size 1 0.020000000000000004 0.019999999999999997
177 | }
178 | }
179 | ]
180 | name "pole"
181 | boundingObject USE POLE
182 | physics Physics {
183 | density -1
184 | mass 0.2
185 | }
186 | }
187 | }
188 | ]
189 | boundingObject USE BODY
190 | physics Physics {
191 | density -1
192 | mass 2
193 | }
194 | controller ""
195 | }
196 |
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/full_project/controllers/robot_supervisor_controller/robot_supervisor_controller.py:
--------------------------------------------------------------------------------
1 | from deepbots.supervisor.controllers.robot_supervisor_env import RobotSupervisorEnv
2 | from utilities import normalize_to_range
3 | from PPO_agent import PPOAgent, Transition
4 |
5 | from gym.spaces import Box, Discrete
6 | import numpy as np
7 |
8 |
9 | class CartpoleRobot(RobotSupervisorEnv):
10 | def __init__(self):
11 | super().__init__()
12 | # Define agent's observation space using Gym's Box, setting the lowest and highest possible values
13 | self.observation_space = Box(low=np.array([-0.4, -np.inf, -1.3, -np.inf]),
14 | high=np.array([0.4, np.inf, 1.3, np.inf]),
15 | dtype=np.float64)
16 | # Define agent's action space using Gym's Discrete
17 | self.action_space = Discrete(2)
18 |
19 | self.robot = self.getSelf() # Grab the robot reference from the supervisor to access various robot methods
20 | self.position_sensor = self.getDevice("polePosSensor")
21 | self.position_sensor.enable(self.timestep)
22 |
23 | self.pole_endpoint = self.getFromDef("POLE_ENDPOINT")
24 | self.wheels = []
25 | for wheel_name in ['wheel1', 'wheel2', 'wheel3', 'wheel4']:
26 | wheel = self.getDevice(wheel_name) # Get the wheel handle
27 | wheel.setPosition(float('inf')) # Set starting position
28 | wheel.setVelocity(0.0) # Zero out starting velocity
29 | self.wheels.append(wheel)
30 | self.steps_per_episode = 200 # Max number of steps per episode
31 | self.episode_score = 0 # Score accumulated during an episode
32 | self.episode_score_list = [] # A list to save all the episode scores, used to check if task is solved
33 |
34 | def get_observations(self):
35 | # Position on x-axis
36 | cart_position = normalize_to_range(self.robot.getPosition()[0], -0.4, 0.4, -1.0, 1.0)
37 | # Linear velocity on x-axis
38 | cart_velocity = normalize_to_range(self.robot.getVelocity()[0], -0.2, 0.2, -1.0, 1.0, clip=True)
39 | # Pole angle off vertical
40 | pole_angle = normalize_to_range(self.position_sensor.getValue(), -0.23, 0.23, -1.0, 1.0, clip=True)
41 | # Angular velocity y of endpoint
42 | endpoint_velocity = normalize_to_range(self.pole_endpoint.getVelocity()[4], -1.5, 1.5, -1.0, 1.0, clip=True)
43 |
44 | return [cart_position, cart_velocity, pole_angle, endpoint_velocity]
45 |
46 | def get_default_observation(self):
47 | # This method just returns a zero vector as a default observation
48 | return [0.0 for _ in range(self.observation_space.shape[0])]
49 |
50 | def get_reward(self, action=None):
51 | # Reward is +1 for every step the episode hasn't ended
52 | return 1
53 |
54 | def is_done(self):
55 | if self.episode_score > 195.0:
56 | return True
57 |
58 | pole_angle = round(self.position_sensor.getValue(), 2)
59 | if abs(pole_angle) > 0.261799388: # more than 15 degrees off vertical (defined in radians)
60 | return True
61 |
62 | cart_position = round(self.robot.getPosition()[0], 2) # Position on x-axis
63 | if abs(cart_position) > 0.39:
64 | return True
65 |
66 | return False
67 |
68 | def solved(self):
69 | if len(self.episode_score_list) > 100: # Over 100 trials thus far
70 | if np.mean(self.episode_score_list[-100:]) > 195.0: # Last 100 episodes' scores average value
71 | return True
72 | return False
73 |
74 | def get_info(self):
75 | return None
76 |
77 | def render(self, mode='human'):
78 | pass
79 |
80 | def apply_action(self, action):
81 | action = int(action[0])
82 |
83 | if action == 0:
84 | motor_speed = 5.0
85 | else:
86 | motor_speed = -5.0
87 |
88 | for i in range(len(self.wheels)):
89 | self.wheels[i].setPosition(float('inf'))
90 | self.wheels[i].setVelocity(motor_speed)
91 |
92 |
93 | env = CartpoleRobot()
94 | agent = PPOAgent(number_of_inputs=env.observation_space.shape[0], number_of_actor_outputs=env.action_space.n)
95 |
96 | solved = False
97 | episode_count = 0
98 | episode_limit = 2000
99 | # Run outer loop until the episodes limit is reached or the task is solved
100 | while not solved and episode_count < episode_limit:
101 | observation = env.reset() # Reset robot and get starting observation
102 | env.episode_score = 0
103 |
104 | for step in range(env.steps_per_episode):
105 | # In training mode the agent samples from the probability distribution, naturally implementing exploration
106 | selected_action, action_prob = agent.work(observation, type_="selectAction")
107 | # Step the supervisor to get the current selected_action's reward, the new observation and whether we reached
108 | # the done condition
109 | new_observation, reward, done, info = env.step([selected_action])
110 |
111 | # Save the current state transition in agent's memory
112 | trans = Transition(observation, selected_action, action_prob, reward, new_observation)
113 | agent.store_transition(trans)
114 |
115 | if done:
116 | # Save the episode's score
117 | env.episode_score_list.append(env.episode_score)
118 | agent.train_step(batch_size=step + 1)
119 | solved = env.solved() # Check whether the task is solved
120 | break
121 |
122 | env.episode_score += reward # Accumulate episode reward
123 | observation = new_observation # observation for next step is current step's new_observation
124 |
125 | print("Episode #", episode_count, "score:", env.episode_score)
126 | episode_count += 1 # Increment episode counter
127 |
128 | if not solved:
129 | print("Task is not solved, deploying agent for testing...")
130 | elif solved:
131 | print("Task is solved, deploying agent for testing...")
132 |
133 | observation = env.reset()
134 | env.episode_score = 0.0
135 | while True:
136 | selected_action, action_prob = agent.work(observation, type_="selectActionMax")
137 | observation, _, done, _ = env.step([selected_action])
138 | if done:
139 | observation = env.reset()
140 |
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/full_project/controllers/robot_supervisor_controller/utilities.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def normalize_to_range(value, min_val, max_val, new_min, new_max, clip=False):
5 | """
6 | Normalize value to a specified new range by supplying the current range.
7 |
8 | :param value: value to be normalized
9 | :param min_val: value's min value, value ∈ [min_val, max_val]
10 | :param max_val: value's max value, value ∈ [min_val, max_val]
11 | :param new_min: normalized range min value
12 | :param new_max: normalized range max value
13 | :param clip: whether to clip normalized value to new range or not
14 | :return: normalized value ∈ [new_min, new_max]
15 | """
16 | value = float(value)
17 | min_val = float(min_val)
18 | max_val = float(max_val)
19 | new_min = float(new_min)
20 | new_max = float(new_max)
21 |
22 | if clip:
23 | return np.clip((new_max - new_min) / (max_val - min_val) * (value - max_val) + new_max, new_min, new_max)
24 | else:
25 | return (new_max - new_min) / (max_val - min_val) * (value - max_val) + new_max
26 |
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/full_project/worlds/cartpole_world.wbt:
--------------------------------------------------------------------------------
1 | #VRML_SIM R2023b utf8
2 |
3 | EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2023a/projects/objects/backgrounds/protos/TexturedBackground.proto"
4 | EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2023a/projects/objects/backgrounds/protos/TexturedBackgroundLight.proto"
5 | EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2023a/projects/objects/floors/protos/RectangleArena.proto"
6 |
7 | WorldInfo {
8 | }
9 | Viewpoint {
10 | orientation -0.1849332562219514 0.10769485053314654 0.9768323857816086 1.9029469581134206
11 | position 1.835319026945999 -4.251256239319215 2.283828860202701
12 | }
13 | TexturedBackground {
14 | }
15 | TexturedBackgroundLight {
16 | }
17 | RectangleArena {
18 | }
19 | DEF ROBOT Robot {
20 | translation 0 0 0.04
21 | rotation 1 0 0 1.5708
22 | children [
23 | DEF HINGE_COVER Solid {
24 | translation 0 0.03 -3.469446951953614e-18
25 | rotation 0 1 0 -1.5707953071795862
26 | children [
27 | Shape {
28 | appearance PBRAppearance {
29 | baseColor 0 0.6509803921568628 1
30 | }
31 | geometry Box {
32 | size 0.030000000000000002 0.019999999999999997 0.05
33 | }
34 | }
35 | ]
36 | name "hingeCover"
37 | }
38 | DEF BODY Shape {
39 | appearance PBRAppearance {
40 | baseColor 0.917647 0.145098 0.145098
41 | roughness 1
42 | metalness 0
43 | }
44 | geometry Box {
45 | size 0.2 0.05 0.08
46 | }
47 | }
48 | DEF WHEEL1 HingeJoint {
49 | jointParameters HingeJointParameters {
50 | position 6.723540149111039e-17
51 | axis 0 0 1
52 | anchor 0.06 0 0.05
53 | }
54 | device [
55 | RotationalMotor {
56 | name "wheel1"
57 | }
58 | ]
59 | endPoint Solid {
60 | translation 0.060001004644818584 1.339276918417074e-05 0.050000010543824816
61 | rotation 1.7671082538995516e-08 -1.76712488340164e-08 0.9999999999999998 1.5708000430119473
62 | children [
63 | DEF WHEEL Shape {
64 | appearance PBRAppearance {
65 | baseColor 0.305882 0.898039 0.25098
66 | roughness 1
67 | metalness 0
68 | }
69 | geometry Cylinder {
70 | height 0.02
71 | radius 0.04
72 | subdivision 24
73 | }
74 | }
75 | ]
76 | boundingObject USE WHEEL
77 | physics Physics {
78 | }
79 | }
80 | }
81 | DEF WHEEL2 HingeJoint {
82 | jointParameters HingeJointParameters {
83 | position 6.670640318240083e-17
84 | axis 0 0 1
85 | anchor -0.06 0 0.05
86 | }
87 | device [
88 | RotationalMotor {
89 | name "wheel2"
90 | }
91 | ]
92 | endPoint Solid {
93 | translation -0.060001095993954395 7.616338723219541e-07 0.05000000952472374
94 | rotation 1.760965404318995e-08 -1.760982618388955e-08 0.9999999999999997 1.5707999569897069
95 | children [
96 | USE WHEEL
97 | ]
98 | name "solid(1)"
99 | boundingObject USE WHEEL
100 | physics Physics {
101 | }
102 | }
103 | }
104 | DEF WHEEL3 HingeJoint {
105 | jointParameters HingeJointParameters {
106 | position 1.0324509589061627e-16
107 | axis 0 0 1
108 | anchor 0.06 0 -0.05
109 | }
110 | device [
111 | RotationalMotor {
112 | name "wheel3"
113 | }
114 | ]
115 | endPoint Solid {
116 | translation 0.06000100379909951 1.3392659934527935e-05 -0.05000000738328558
117 | rotation -1.767138001827027e-08 1.767153928422755e-08 0.9999999999999998 1.570800043011908
118 | children [
119 | USE WHEEL
120 | ]
121 | name "solid(2)"
122 | boundingObject USE WHEEL
123 | physics Physics {
124 | }
125 | }
126 | }
127 | DEF WHEEL4 HingeJoint {
128 | jointParameters HingeJointParameters {
129 | position -2.6895122773572173e-17
130 | axis 0 0 1
131 | anchor -0.06 0 -0.05
132 | }
133 | device [
134 | RotationalMotor {
135 | name "wheel4"
136 | }
137 | ]
138 | endPoint Solid {
139 | translation -0.060001096839924606 7.615246225881513e-07 -0.05000000839401549
140 | rotation -1.761000837263886e-08 1.7610180209133664e-08 0.9999999999999997 1.570799956989704
141 | children [
142 | USE WHEEL
143 | ]
144 | name "solid(3)"
145 | boundingObject USE WHEEL
146 | physics Physics {
147 | }
148 | }
149 | }
150 | DEF POLE HingeJoint {
151 | jointParameters HingeJointParameters {
152 | position -4.812689688184746e-16
153 | axis 0 0 1
154 | anchor 0 0.03000000000047226 0
155 | minStop -1.3
156 | maxStop 1.3
157 | }
158 | device [
159 | DEF POLE_POS_SENSOR PositionSensor {
160 | name "polePosSensor"
161 | }
162 | ]
163 | endPoint Solid {
164 | translation -0.00046553268549694605 0.5300037594690283 -5.369584240411796e-07
165 | rotation -0.576987441096411 -0.5775313877690046 0.5775318077462258 2.0949379676602042
166 | children [
167 | DEF POLE_ENDPOINT Solid {
168 | translation 0.5000000000000002 0 0
169 | children [
170 | DEF BALL Shape {
171 | appearance PBRAppearance {
172 | baseColor 0.03137254901960784 0.047058823529411764 1
173 | metalness 0
174 | }
175 | geometry Sphere {
176 | radius 0.01
177 | }
178 | }
179 | ]
180 | boundingObject USE BALL
181 | physics Physics {
182 | density -1
183 | mass 0.02
184 | }
185 | }
186 | DEF POLE Shape {
187 | appearance PBRAppearance {
188 | baseColor 1 0.1568627450980392 0.1568627450980392
189 | roughness 1
190 | metalness 0
191 | }
192 | geometry Box {
193 | size 1 0.020000000000000004 0.019999999999999997
194 | }
195 | }
196 | ]
197 | name "pole"
198 | boundingObject USE POLE
199 | physics Physics {
200 | density -1
201 | mass 0.2
202 | }
203 | }
204 | }
205 | ]
206 | boundingObject USE BODY
207 | physics Physics {
208 | density -1
209 | mass 2
210 | }
211 | controller "robot_supervisor_controller"
212 | supervisor TRUE
213 | }
214 |
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/images/10_assign_supervisor_controller_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/robotSupervisorSchemeTutorial/images/10_assign_supervisor_controller_2.png
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/images/11_click_play.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/robotSupervisorSchemeTutorial/images/11_click_play.png
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/images/1_new_proj_menu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/robotSupervisorSchemeTutorial/images/1_new_proj_menu.png
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/images/2_world_settings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/robotSupervisorSchemeTutorial/images/2_world_settings.png
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/images/3_project_created.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/robotSupervisorSchemeTutorial/images/3_project_created.png
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/images/4_reload_world.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/robotSupervisorSchemeTutorial/images/4_reload_world.png
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/images/5_set_supervisor_true.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/robotSupervisorSchemeTutorial/images/5_set_supervisor_true.png
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/images/6_click_save_button.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/robotSupervisorSchemeTutorial/images/6_click_save_button.png
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/images/7_new_controller_menu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/robotSupervisorSchemeTutorial/images/7_new_controller_menu.png
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/images/8_new_controllers_created.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/robotSupervisorSchemeTutorial/images/8_new_controllers_created.png
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/images/9_assign_supervisor_controller_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/robotSupervisorSchemeTutorial/images/9_assign_supervisor_controller_1.png
--------------------------------------------------------------------------------
/robotSupervisorSchemeTutorial/images/cartPoleWorld.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aidudezzz/deepbots-tutorials/e1b48286527824c29a4c1f76cc489ed92ce6bb0d/robotSupervisorSchemeTutorial/images/cartPoleWorld.gif
--------------------------------------------------------------------------------