├── DQN
├── DQN-CartPoleStab
│ ├── DQN.py
│ ├── README.md
│ ├── config.yml
│ ├── storage
│ │ ├── README.md
│ │ ├── config-1.yml
│ │ ├── config-2.yml
│ │ ├── config-3.yml
│ │ ├── config-4.yml
│ │ ├── exp_4.ckpt
│ │ ├── loss-1.png
│ │ ├── loss-2.png
│ │ ├── loss-3.png
│ │ ├── loss-4.png
│ │ ├── loss-5.png
│ │ ├── loss-6.png
│ │ ├── reward-1.png
│ │ ├── reward-2.png
│ │ ├── reward-3.png
│ │ ├── reward-4.png
│ │ ├── reward-5.png
│ │ └── reward-6.png
│ ├── test.py
│ ├── test_rr.py
│ ├── train.py
│ └── utils.py
├── DQN-Double
│ ├── DQN.py
│ ├── README.md
│ ├── config.yml
│ ├── storage
│ │ ├── README.md
│ │ ├── config-0.yml
│ │ ├── config-1.yml
│ │ ├── config-10.yml
│ │ ├── config-11.yml
│ │ ├── config-12.yml
│ │ ├── config-13.yml
│ │ ├── config-2.yml
│ │ ├── config-3.yml
│ │ ├── config-4.yml
│ │ ├── config-5.yml
│ │ ├── config-6.yml
│ │ ├── config-7.yml
│ │ ├── config-8.yml
│ │ ├── config-9.yml
│ │ ├── exp_4.ckpt
│ │ ├── loss-0.png
│ │ ├── loss-1.png
│ │ ├── loss-10.png
│ │ ├── loss-11.png
│ │ ├── loss-12.png
│ │ ├── loss-13.png
│ │ ├── loss-2.png
│ │ ├── loss-3.png
│ │ ├── loss-4.png
│ │ ├── loss-5.png
│ │ ├── loss-6.png
│ │ ├── loss-7.png
│ │ ├── loss-8.png
│ │ ├── loss-9.png
│ │ ├── reward-0.png
│ │ ├── reward-1.png
│ │ ├── reward-10.png
│ │ ├── reward-11.png
│ │ ├── reward-12.png
│ │ ├── reward-13.png
│ │ ├── reward-2.png
│ │ ├── reward-3.png
│ │ ├── reward-4.png
│ │ ├── reward-5.png
│ │ ├── reward-6.png
│ │ ├── reward-7.png
│ │ ├── reward-8.png
│ │ └── reward-9.png
│ ├── test.py
│ ├── train.py
│ └── utils.py
├── DQN-Qube
│ ├── DQN.py
│ ├── README.md
│ ├── config.yml
│ ├── storage
│ │ ├── .~lock.Parameters.ods#
│ │ ├── Parameters.ods
│ │ ├── README.md
│ │ ├── config-1.yml
│ │ ├── config-2.yml
│ │ ├── config-3.yml
│ │ ├── config-5.yml
│ │ ├── config-6.yml
│ │ ├── config-7.yml
│ │ ├── config-8.yml
│ │ ├── config-9.yml
│ │ ├── data_real_world.pkl
│ │ ├── exp_6.ckpt
│ │ ├── loss-1.png
│ │ ├── loss-2.png
│ │ ├── loss-3.png
│ │ ├── loss-4.png
│ │ ├── loss-5.png
│ │ ├── loss-6.png
│ │ ├── loss-7.png
│ │ ├── loss-8.png
│ │ ├── loss-9.png
│ │ ├── reward-1.png
│ │ ├── reward-2.png
│ │ ├── reward-3.png
│ │ ├── reward-4.png
│ │ ├── reward-5.png
│ │ ├── reward-6-real-world.png
│ │ ├── reward-6.png
│ │ ├── reward-7.png
│ │ ├── reward-8.png
│ │ ├── reward-9.png
│ │ ├── simulatedModelOnRealPlatform-2.png
│ │ ├── simulatedModelOnRealPlatform-3.png
│ │ ├── simulatedModelOnRealPlatform-4.png
│ │ └── simulatedModelOnRealPlatform.png
│ ├── test.py
│ ├── test_on_real_platform.py
│ ├── train.py
│ └── utils.py
├── DQN-Swing
│ ├── DQN.py
│ ├── README.md
│ ├── config.yml
│ ├── storage
│ │ ├── README.md
│ │ ├── config-0.yml
│ │ ├── config-1.yml
│ │ ├── config-2.yml
│ │ ├── config-3.yml
│ │ ├── exp_0.ckpt
│ │ ├── exp_1_best.ckpt
│ │ ├── loss-0.png
│ │ ├── loss-1-find-best.png
│ │ ├── loss-1.png
│ │ ├── loss-2.png
│ │ ├── loss-3.png
│ │ ├── reward-0.png
│ │ ├── reward-1-find-best.png
│ │ ├── reward-1.png
│ │ ├── reward-2.png
│ │ └── reward-3.png
│ ├── test.py
│ ├── test_rr.py
│ ├── train.py
│ └── utils.py
└── README.md
├── LICENSE
├── MPC
├── MPC-CartPoleStab
│ ├── .idea
│ │ ├── MPC qube.iml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ └── workspace.xml
│ ├── .ipynb_checkpoints
│ │ └── example-checkpoint.ipynb
│ ├── Hive
│ │ ├── Hive.py
│ │ ├── README.md
│ │ ├── SelectionMethods.py
│ │ ├── Utilities.py
│ │ └── __init__.py
│ ├── README.md
│ ├── config.yml
│ ├── controller.py
│ ├── dynamics.py
│ ├── example.ipynb
│ ├── run.py
│ ├── storage
│ │ ├── config-1.yml
│ │ ├── config-2.yml
│ │ ├── exp_1.ckpt
│ │ ├── loss-1.png
│ │ ├── loss-2.png
│ │ ├── model_error_exp_1.png
│ │ ├── reward-1.png
│ │ └── reward-2.png
│ └── utils.py
├── MPC-CartPoleSwing
│ ├── Hive
│ │ ├── Hive.py
│ │ ├── README.md
│ │ ├── SelectionMethods.py
│ │ ├── Utilities.py
│ │ └── __init__.py
│ ├── README.md
│ ├── config.yml
│ ├── controller.py
│ ├── dynamics.py
│ ├── run.py
│ ├── storage
│ │ ├── config-1.yml
│ │ ├── config-2.yml
│ │ ├── config-3.yml
│ │ ├── config-4.yml
│ │ ├── config-5.yml
│ │ ├── loss-1.png
│ │ ├── loss-2.png
│ │ ├── model_error_exp_1.png
│ │ ├── model_error_exp_2.png
│ │ ├── reward-1.png
│ │ └── reward-2.png
│ └── utils.py
├── MPC-Double
│ ├── Hive
│ │ ├── Hive.py
│ │ ├── README.md
│ │ ├── SelectionMethods.py
│ │ ├── Utilities.py
│ │ └── __init__.py
│ ├── README.md
│ ├── config.yml
│ ├── controller.py
│ ├── dynamics.py
│ ├── run.py
│ ├── storage
│ │ ├── config-1.yml
│ │ ├── config-2.yml
│ │ ├── config-3.yml
│ │ ├── config-4.yml
│ │ ├── loss-1.png
│ │ ├── loss-2.png
│ │ ├── loss-3.png
│ │ ├── loss-4.png
│ │ ├── model_error_exp_1.png
│ │ ├── reward-1.png
│ │ ├── reward-2.png
│ │ ├── reward-3.png
│ │ └── reward-4.png
│ └── utils.py
├── MPC-Qube
│ ├── Hive
│ │ ├── Hive.py
│ │ ├── README.md
│ │ ├── SelectionMethods.py
│ │ ├── Utilities.py
│ │ └── __init__.py
│ ├── README.md
│ ├── config.yml
│ ├── controller.py
│ ├── dynamics.py
│ ├── run.py
│ ├── storage
│ │ ├── Angle Error h_0 100.png
│ │ ├── State Error h_0 100.png
│ │ ├── config-1.yml
│ │ ├── config-2.yml
│ │ ├── config-4.yml
│ │ ├── config-5.yml
│ │ ├── config-6.yml
│ │ ├── config-7.yml
│ │ ├── config_3.yml
│ │ ├── loss-1.png
│ │ ├── loss-2.png
│ │ ├── loss-3.png
│ │ ├── loss-4.png
│ │ ├── loss-5.png
│ │ ├── loss-6.png
│ │ ├── loss-7.png
│ │ ├── mpc.png
│ │ ├── reward-1.png
│ │ ├── reward-2.png
│ │ ├── reward-3.png
│ │ ├── reward-4.png
│ │ ├── reward-5.png
│ │ ├── reward-6.png
│ │ └── reward-7.png
│ ├── test.py
│ └── utils.py
└── README.md
├── README.md
├── Resources
├── DQN
│ ├── Playing Atari with Deep Reinforcement Learning.pdf
│ ├── Q-Learning in Continuous State Action Spaces.pdf
│ └── README.md
├── MPC
│ ├── Approximate Dynamic Programming with Gaussian Processes.pdf
│ ├── Constrained model predictive control: Stability and optimality.pdf
│ ├── Neural Network Dynamics for Model based Deep Rl with Model free fine tuning.pdf
│ └── README.md
├── README.md
└── figures
│ ├── README.md
│ ├── qube-after-fine-tuning.gif
│ ├── qube-before-fine-tuning.gif
│ ├── qube.gif
│ ├── stabe.gif
│ ├── swing.gif
│ └── swing_interesting.gif
└── environment.yaml
/DQN/DQN-CartPoleStab/README.md:
--------------------------------------------------------------------------------
1 | # DQN - CartPoleStab
2 |
3 | This folder contains the implementation of DQN algorithm and the evaluation on the CartPoleStab environment
4 |
5 | All the hyper-parameters and experiment setting are stored in the ```config.yml``` file
6 |
7 | All the results (figure and model) will be stored in the ```./storage``` folder by default
8 |
9 | If you are not familiar with this environment, you can use the `analyze_env()` function in the `utils.py` to help you quickly understand the environment's state space, action space, reward range, etc.
10 |
11 | ## How to run
12 |
13 | ### Test the pre-trained
14 |
15 | To try our pre-trained model, simply run
16 |
17 | ```angularjs
18 | python test.py
19 | ```
20 |
21 | The script will load the model from the path specified in the ```config.yml``` file
22 |
23 | ### Train your own model
24 |
25 | To train your own model, you can change the hyper-parameters in the ```config.yml``` to whatever you want,
26 | and then run
27 |
28 | ```angularjs
29 | python train.py
30 | ```
31 |
32 | The script will load the configurations in the ```config.yml``` file and begin to train
33 |
34 | ### Configuration parameter explanation
35 |
36 | In the ```config.yml``` file, there are two set of configuration.
37 | The first `model_config` is the configuration of the neural network architecture;
38 | The second `training_config` is the configuration for the training process.
39 |
40 | The `exp_number` parameter in the `training_config` is the number of your experiment. The name of saved figure results in the `./storage` folder will be determined by this parameter.
41 |
42 | If you want to train your model from scratch, then set the `load_model` parameter to `False`. If set to `True`, the trainer will load the model from `model_path`.
43 |
44 | If you think your training process is not stable and you want to save the model when the model has the best performance, set the `save_best` parameter to `True`.
45 |
46 |
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/config.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_4.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 128 # hidden layer size
7 | use_cuda: False
8 |
9 | training_config:
10 | render: True # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.0001
13 | batch_size: 32
14 | gamma: 0.98
15 | n_update_target: 4 # how many episode to update the target network
16 | memory_size: 100000 # replay memory buffer size
17 | max_episode_step: 2000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/test.ckpt" # the path to save the model
20 | use_fix_epsilon: True # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.1
22 | epsilon_start: 0.5 # episilon decay start
23 | epsilon_final: 0.2 # episilon decay end
24 | epsilon_decay: 200 # bandwidth
25 | exp_number: 7 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/README.md:
--------------------------------------------------------------------------------
1 | # DQN - Experiment Results
2 |
3 | This folder contains the experiment results on the CartPoleStab environment
4 |
5 | The number in each file name represents the experiment number.
6 |
7 | For example, `config-1.yml` represents the configuration parameters in the first experiment.
8 |
9 | We only store the best model, the configuration files and the figures here to save space,
10 | for complete results, please see [https://github.com/liuzuxin/RL_Project_Results](https://github.com/liuzuxin/RL_Project_Results)
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/config-1.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_1.ckpt" # the path to load the model
4 | n_actions: 7
5 | n_hidden: 1 # hidden layer number
6 | size_hidden: 256 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 64
14 | gamma: 0.98
15 | n_update_target: 2 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 4000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_1.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.2 # episilon decay end
24 | epsilon_decay: 300 # bandwidth
25 | exp_number: 1 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/config-2.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_2.ckpt" # the path to load the model
4 | n_actions: 5
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 64 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 64
14 | gamma: 0.98
15 | n_update_target: 2 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 4000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_2.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.3 # episilon decay end
24 | epsilon_decay: 500 # bandwidth
25 | exp_number: 2 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/config-3.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_3.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 128 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 64
14 | gamma: 0.995
15 | n_update_target: 2 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 4000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_3.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.3 # episilon decay end
24 | epsilon_decay: 500 # bandwidth
25 | exp_number: 3 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/config-4.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_4.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 128 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.0001
13 | batch_size: 32
14 | gamma: 0.98
15 | n_update_target: 2 # how many episode to update the target network
16 | memory_size: 50000 # replay memory buffer size
17 | max_episode_step: 4000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_4.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.5 # episilon decay start
23 | epsilon_final: 0.2 # episilon decay end
24 | epsilon_decay: 200 # bandwidth
25 | exp_number: 4 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/exp_4.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-CartPoleStab/storage/exp_4.ckpt
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/loss-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-CartPoleStab/storage/loss-1.png
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/loss-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-CartPoleStab/storage/loss-2.png
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/loss-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-CartPoleStab/storage/loss-3.png
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/loss-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-CartPoleStab/storage/loss-4.png
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/loss-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-CartPoleStab/storage/loss-5.png
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/loss-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-CartPoleStab/storage/loss-6.png
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/reward-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-CartPoleStab/storage/reward-1.png
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/reward-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-CartPoleStab/storage/reward-2.png
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/reward-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-CartPoleStab/storage/reward-3.png
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/reward-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-CartPoleStab/storage/reward-4.png
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/reward-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-CartPoleStab/storage/reward-5.png
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/storage/reward-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-CartPoleStab/storage/reward-6.png
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/test.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | from DQN import *
4 | import gym
5 | from quanser_robots.common import GentlyTerminating
6 | import time
7 |
8 |
9 | def test():
10 | config_path = "config.yml"
11 | print_config(config_path)
12 | config = load_config(config_path)
13 | training_config = config["training_config"]
14 | config["model_config"]["load_model"] = True
15 |
16 | env_id = "CartpoleStabShort-v0"
17 | env = GentlyTerminating(gym.make(env_id))
18 |
19 | n_episodes = 10
20 | max_episode_step = 10000
21 | print("*********************************************")
22 | print("Testing the model for 10 episodes with 10000 maximum steps per episode")
23 | print("*********************************************")
24 |
25 | policy = Policy(env,config)
26 |
27 | losses = []
28 | all_rewards = []
29 | avg_rewards = []
30 | epsilons = []
31 | for i_episode in range(n_episodes):
32 | episode_reward = 0
33 | state = env.reset()
34 | epsilon = 0
35 | epsilons.append(epsilon)
36 | for step in range(max_episode_step):
37 | env.render()
38 | time.sleep(0.003)
39 | action = policy.act(state, epsilon)
40 |
41 | f_action = 12*(action-(policy.n_actions-1)/2)/((policy.n_actions-1)/2)
42 | next_state, reward, done, _ = env.step(f_action)
43 |
44 | policy.replay_buffer.push(state, action[0], reward, next_state, done)
45 |
46 | state = next_state
47 | episode_reward += reward
48 |
49 | if done:
50 | break
51 | print(" episode: %s, episode reward: %s" % (i_episode, episode_reward))
52 | all_rewards.append(episode_reward)
53 | avg_rewards.append(np.mean(all_rewards[-3:]))
54 |
55 | env.close()
56 | plot_fig(n_episodes, all_rewards,avg_rewards, losses)
57 |
58 | if __name__ =="__main__":
59 | test()
60 |
--------------------------------------------------------------------------------
/DQN/DQN-CartPoleStab/test_rr.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | from DQN import *
4 | import argparse
5 |
6 |
7 | use_plot = True
8 | render = True
9 |
10 | window = 500
11 | collect_fr = 10
12 | plot_fr = 10
13 | render_fr = 10
14 |
15 | if use_plot:
16 | plt.ion()
17 | plot = PlotSignal(window=window)
18 |
19 | # Initialize Controller & Environment:
20 | env, ctrl = get_env_and_controller(long_pendulum=False, simulation=True, swinging=False, mouse_control=False)
21 |
22 |
23 | config_path = "config.yml"
24 | print_config(config_path)
25 | config = load_config(config_path)
26 | training_config = config["training_config"]
27 | config["model_config"]["load_model"] = True
28 |
29 | n_episodes = 10
30 | max_episode_step = 100000
31 | print("*********************************************")
32 | print("Testing the model on real platform for 10 episodes with 100000 maximum steps per episode")
33 | print("*********************************************")
34 |
35 | policy = Policy(env,config)
36 | losses = []
37 | all_rewards = []
38 | avg_rewards = []
39 | epsilons = []
40 |
41 |
42 | for i in range(n_episodes):
43 | print("\n\n###############################")
44 | print("Episode {0}".format(0))
45 |
46 | # Reset the environment:
47 | env.reset()
48 | obs, reward, done, _ = env.step(np.zeros(1))
49 | # Start the Control Loop:
50 | print("\nStart Controller:\t\t\t", end="")
51 | for n in range(max_episode_step):
52 | action = policy.act(obs, 0)
53 | f_action = 12 * (action - (policy.n_actions - 1) / 2) / ((policy.n_actions - 1) / 2)
54 | obs, reward, done, _ = env.step(f_action)
55 | all_rewards.append(reward)
56 | if done:
57 | print("Physical Limits or End of Time reached")
58 | break
59 |
60 | if render and np.mod(n, render_fr) == 0:
61 | env.render()
62 |
63 | if use_plot and np.mod(n, collect_fr) == 0:
64 | alpha, theta = get_angles(obs[1], obs[2])
65 | plot.update(theta=theta, alpha=alpha, theta_dt=obs[4], volt=f_action, u=0, x=obs[0])
66 | env.render()
67 |
68 | if use_plot and np.mod(n, plot_fr) == 0:
69 | plot.plot_signal()
70 |
71 | # Stop the cart:
72 | env.step(np.zeros(1))
73 |
74 | print("avg reward: ",np.mean(all_rewards))
75 | print("rewards: ", all_rewards)
76 | env.close()
77 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/README.md:
--------------------------------------------------------------------------------
1 | # DQN - DoublePendulum
2 |
3 | This folder contains the implementation of DQN algorithm and the evaluation on the DoublePendulum environment
4 |
5 | All the hyper-parameters and experiment setting are stored in the ```config.yml``` file
6 |
7 | All the results (figure and model) will be stored in the ```./storage``` folder by default
8 |
9 | If you are not familiar with this environment, you can use the `analyze_env()` function in the `utils.py` to help you quickly understand the environment's state space, action space, reward range, etc.
10 |
11 | ## How to run
12 |
13 | ### Test the pre-trained
14 |
15 | To try our pre-trained model, simply run
16 |
17 | ```angularjs
18 | python test.py
19 | ```
20 |
21 | The script will load the model from the path specified in the ```config.yml``` file
22 |
23 | ### Train your own model
24 |
25 | To train your own model, you can change the hyper-parameters in the ```config.yml``` to whatever you want,
26 | and then run
27 |
28 | ```angularjs
29 | python train.py
30 | ```
31 |
32 | The script will load the configurations in the ```config.yml``` file and begin to train
33 |
34 | ### Configuration parameter explanation
35 |
36 | In the ```config.yml``` file, there are two set of configuration.
37 | The first `model_config` is the configuration of the neural network architecture;
38 | The second `training_config` is the configuration for the training process.
39 |
40 | The `exp_number` parameter in the `training_config` is the number of your experiment. The name of saved figure results in the `./storage` folder will be determined by this parameter.
41 |
42 | If you want to train your model from scratch, then set the `load_model` parameter to `False`. If set to `True`, the trainer will load the model from `model_path`.
43 |
44 | If you think your training process is not stable and you want to save the model when the model has the best performance, set the `save_best` parameter to `True`.
45 |
46 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/config.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_4.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 128 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 64
14 | gamma: 0.99 # discount factor
15 | n_update_target: 8 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/test.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.1 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 14 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/README.md:
--------------------------------------------------------------------------------
1 | # DQN - Experiment Results
2 |
3 | This folder contains the experiment results on the DoublePendulum environment
4 |
5 | The number in each file name represents the experiment number.
6 |
7 | For example, `config-1.yml` represents the configuration parameters in the first experiment.
8 |
9 | We only store the best model, the configuration files and the figures here to save space,
10 | for complete results, please see [https://github.com/liuzuxin/RL_Project_Results](https://github.com/liuzuxin/RL_Project_Results)
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/config-0.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_0.ckpt" # the path to load the model
4 | n_actions: 7
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 64 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 15000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 64
14 | gamma: 0.995
15 | n_update_target: 10 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_0.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.1 # episilon decay end
24 | epsilon_decay: 2000 # bandwidth
25 | exp_number: 0 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/config-1.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_1.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 128 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 64
14 | gamma: 0.99
15 | n_update_target: 8 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_10.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.1 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 1 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/config-10.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_10.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 128 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 64
14 | gamma: 0.9
15 | n_update_target: 8 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_10.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.2 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 10 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/config-11.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_11.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 128 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 64
14 | gamma: 0.9
15 | n_update_target: 8 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_11.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.2 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 11 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/config-12.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_12.ckpt" # the path to load the model
4 | n_actions: 15
5 | n_hidden: 3 # hidden layer number
6 | size_hidden: 64 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 32
14 | gamma: 0.95
15 | n_update_target: 8 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_12.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.05 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 12 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/config-13.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_13.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 3 # hidden layer number
6 | size_hidden: 128 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.0001
13 | batch_size: 32
14 | gamma: 0.95
15 | n_update_target: 8 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_13.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.1 # episilon decay end
24 | epsilon_decay: 2000 # bandwidth
25 | exp_number: 13 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/config-2.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_2.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 128 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 64
14 | gamma: 0.99
15 | n_update_target: 8 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_2.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.1 # episilon decay end
24 | epsilon_decay: 2000 # bandwidth
25 | exp_number: 2 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/config-3.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_3.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 128 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 64
14 | gamma: 0.99
15 | n_update_target: 8 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_3.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.1 # episilon decay end
24 | epsilon_decay: 3000 # bandwidth
25 | exp_number: 3 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/config-4.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_4.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 1 # hidden layer number
6 | size_hidden: 256 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.0001
13 | batch_size: 64
14 | gamma: 0.95
15 | n_update_target: 5 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_4.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.1 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 5 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/config-5.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_5.ckpt" # the path to load the model
4 | n_actions: 17
5 | n_hidden: 1 # hidden layer number
6 | size_hidden: 256 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.0001
13 | batch_size: 64
14 | gamma: 0.95
15 | n_update_target: 5 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_5.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.1 # episilon decay end
24 | epsilon_decay: 2000 # bandwidth
25 | exp_number: 5 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/config-6.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_6.ckpt" # the path to load the model
4 | n_actions: 13
5 | n_hidden: 1 # hidden layer number
6 | size_hidden: 256 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.0001
13 | batch_size: 64
14 | gamma: 0.95
15 | n_update_target: 5 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_5.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.1 # episilon decay end
24 | epsilon_decay: 2000 # bandwidth
25 | exp_number: 5 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/config-7.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_7.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 128 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.0001
13 | batch_size: 64
14 | gamma: 0.9
15 | n_update_target: 5 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_7.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.5 # episilon decay start
23 | epsilon_final: 0.2 # episilon decay end
24 | epsilon_decay: 5000 # bandwidth
25 | exp_number: 7 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/config-8.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_8.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 128 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.01
13 | batch_size: 64
14 | gamma: 0.9
15 | n_update_target: 5 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_8.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.1 # episilon decay end
24 | epsilon_decay: 5000 # bandwidth
25 | exp_number: 8 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/config-9.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_9.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 128 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.0001
13 | batch_size: 64
14 | gamma: 0.9
15 | n_update_target: 5 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 3000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_9.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.5 # episilon decay start
23 | epsilon_final: 0.2 # episilon decay end
24 | epsilon_decay: 3000 # bandwidth
25 | exp_number: 9 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/exp_4.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/exp_4.ckpt
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/loss-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/loss-0.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/loss-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/loss-1.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/loss-10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/loss-10.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/loss-11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/loss-11.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/loss-12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/loss-12.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/loss-13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/loss-13.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/loss-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/loss-2.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/loss-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/loss-3.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/loss-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/loss-4.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/loss-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/loss-5.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/loss-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/loss-6.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/loss-7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/loss-7.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/loss-8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/loss-8.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/loss-9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/loss-9.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/reward-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/reward-0.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/reward-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/reward-1.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/reward-10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/reward-10.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/reward-11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/reward-11.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/reward-12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/reward-12.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/reward-13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/reward-13.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/reward-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/reward-2.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/reward-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/reward-3.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/reward-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/reward-4.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/reward-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/reward-5.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/reward-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/reward-6.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/reward-7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/reward-7.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/reward-8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/reward-8.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/storage/reward-9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Double/storage/reward-9.png
--------------------------------------------------------------------------------
/DQN/DQN-Double/test.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | from DQN import *
4 | import gym
5 | from quanser_robots.common import GentlyTerminating
6 | import time
7 |
8 | def test():
9 | config_path = "config.yml"
10 | print_config(config_path)
11 | config = load_config(config_path)
12 | training_config = config["training_config"]
13 | config["model_config"]["load_model"] = True
14 |
15 | env_id = "DoublePendulum-v0"
16 | env = GentlyTerminating(gym.make(env_id))
17 |
18 | n_episodes = 10
19 | max_episode_step = 10000
20 | print("*********************************************")
21 | print("Testing the model for 10 episodes with 10000 maximum steps per episode")
22 | print("*********************************************")
23 |
24 | policy = Policy(env,config)
25 |
26 | losses = []
27 | all_rewards = []
28 | avg_rewards = []
29 | epsilons = []
30 | for i_episode in range(n_episodes):
31 | episode_reward = 0
32 | state = env.reset()
33 | state[4]/=10
34 | epsilon = 0
35 | epsilons.append(epsilon)
36 | for step in range(max_episode_step):
37 | env.render()
38 | time.sleep(0.01)
39 | action = policy.act(state, epsilon)
40 |
41 | f_action = 6*(action-(policy.n_actions-1)/2)/((policy.n_actions-1)/2)
42 | next_state, reward, done, _ = env.step(f_action)
43 | reward = 10*reward
44 | next_state[4]/=10
45 |
46 | policy.replay_buffer.push(state, action[0], reward, next_state, done)
47 |
48 | state = next_state
49 | episode_reward += reward
50 |
51 | if done:
52 | break
53 | print(" episode: %s, episode reward: %s" % (i_episode, episode_reward))
54 | all_rewards.append(episode_reward)
55 | avg_rewards.append(np.mean(all_rewards[-3:]))
56 |
57 | env.close()
58 | plot_fig(n_episodes, all_rewards,avg_rewards, losses)
59 |
60 | if __name__ =="__main__":
61 | test()
62 |
63 |
--------------------------------------------------------------------------------
/DQN/DQN-Qube/README.md:
--------------------------------------------------------------------------------
1 | # DQN - Qube
2 |
3 | This folder contains the implementation of DQN algorithm and the evaluation on the Qube environment
4 |
5 | All the hyper-parameters and experiment setting are stored in the ```config.yml``` file
6 |
7 | All the results (figure and model) will be stored in the ```./storage``` folder by default
8 |
9 | If you are not familiar with this environment, you can use the `analyze_env()` function in the `utils.py` to help you quickly understand the environment's state space, action space, reward range, etc.
10 |
11 | ## How to run
12 |
13 | ### Test the pre-trained
14 |
15 | To try our pre-trained model, simply run
16 |
17 | ```angularjs
18 | python test.py
19 | ```
20 |
21 | The script will load the model from the path specified in the ```config.yml``` file
22 |
23 | ### Train your own model
24 |
25 | To train your own model, you can change the hyper-parameters in the ```config.yml``` to whatever you want,
26 | and then run
27 |
28 | ```angularjs
29 | python train.py
30 | ```
31 |
32 | The script will load the configurations in the ```config.yml``` file and begin to train
33 |
34 | ### Configuration parameter explanation
35 |
36 | In the ```config.yml``` file, there are two set of configuration.
37 | The first `model_config` is the configuration of the neural network architecture;
38 | The second `training_config` is the configuration for the training process.
39 |
40 | The `exp_number` parameter in the `training_config` is the number of your experiment. The name of saved figure results in the `./storage` folder will be determined by this parameter.
41 |
42 | If you want to train your model from scratch, then set the `load_model` parameter to `False`. If set to `True`, the trainer will load the model from `model_path`.
43 |
44 | If you think your training process is not stable and you want to save the model when the model has the best performance, set the `save_best` parameter to `True`.
45 |
46 |
--------------------------------------------------------------------------------
/DQN/DQN-Qube/config.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: True # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_6.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 1 # hidden layer number
6 | size_hidden: 256 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 25000 # how many episodes to train
12 | learning_rate: 0.0001
13 | batch_size: 64
14 | gamma: 0.99
15 | n_update_target: 6 # how many episodes to update the target network
16 | memory_size: 100000 # replay memory buffer size
17 | max_episode_step: 500 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/test.ckpt" # the path to save the model
20 | use_fix_epsilon: True # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.1
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.05 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 11 # experiment number
26 | save_best: False
27 | save_thres: 510
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/.~lock.Parameters.ods#:
--------------------------------------------------------------------------------
1 | ,lambert,lambert-Alienware-15-R3,09.03.2019 21:02,file:///home/lambert/.config/libreoffice/4;
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/Parameters.ods:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/Parameters.ods
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/README.md:
--------------------------------------------------------------------------------
1 | # DQN - Experiment Results
2 |
3 | This folder contains the experiment results on the Qube environment
4 |
5 | The number in each file name represents the experiment number.
6 |
7 | For example, `config-1.yml` represents the configuration parameters in the first experiment.
8 |
9 | We only store the best model, the configuration files and the figures here to save space,
10 | for complete results, please see [https://github.com/liuzuxin/RL_Project_Results](https://github.com/liuzuxin/RL_Project_Results)
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/config-1.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/1000_5000_0.001.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 1 # hidden layer number
6 | size_hidden: 256 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 5000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 50
14 | gamma: 0.99
15 | n_update_target: 6 # how many episodes to update the target network
16 | memory_size: 100000 # replay memory buffer size
17 | max_episode_step: 500 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/1000_5000_0.001.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.1
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.05 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 1 # experiment number
26 | save_best: False
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/config-2.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/1000_5000_0.0001_2.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 1 # hidden layer number
6 | size_hidden: 256 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 5000 # how many episodes to train
12 | learning_rate: 0.00001
13 | batch_size: 50
14 | gamma: 0.99
15 | n_update_target: 6 # how many episodes to update the target network
16 | memory_size: 100000 # replay memory buffer size
17 | max_episode_step: 500 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/1000_5000_0.0001_2.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.1
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.05 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 2 # experiment number
26 | save_best: False
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/config-3.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/1000_5000_0.00001_3.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 1 # hidden layer number
6 | size_hidden: 256 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 5000 # how many episodes to train
12 | learning_rate: 0.000001
13 | batch_size: 50
14 | gamma: 0.99
15 | n_update_target: 6 # how many episodes to update the target network
16 | memory_size: 100000 # replay memory buffer size
17 | max_episode_step: 500 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/1000_5000_0.00001_3.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.1
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.05 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 3 # experiment number
26 | save_best: False
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/config-5.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/5000_5000_0.001_5.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 1 # hidden layer number
6 | size_hidden: 256 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 5000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 50
14 | gamma: 0.99
15 | n_update_target: 6 # how many episodes to update the target network
16 | memory_size: 100000 # replay memory buffer size
17 | max_episode_step: 500 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/5000_5000_0.001_5.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.1
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.05 # episilon decay end
24 | epsilon_decay: 5000 # bandwidth
25 | exp_number: 5 # experiment number
26 | save_best: False
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/config-6.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_6.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 1 # hidden layer number
6 | size_hidden: 256 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 5000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 30
14 | gamma: 0.99
15 | n_update_target: 6 # how many episodes to update the target network
16 | memory_size: 100000 # replay memory buffer size
17 | max_episode_step: 500 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_6.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.1
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.05 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 6 # experiment number
26 | save_best: False
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/config-7.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/1000_5000_0.001_7.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 1 # hidden layer number
6 | size_hidden: 256 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 5000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 70
14 | gamma: 0.99
15 | n_update_target: 6 # how many episodes to update the target network
16 | memory_size: 100000 # replay memory buffer size
17 | max_episode_step: 500 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/1000_5000_0.001_7.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.1
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.05 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 7 # experiment number
26 | save_best: False
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/config-8.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/1000_5000_0.001_8.ckpt" # the path to load the model
4 | n_actions: 27
5 | n_hidden: 1 # hidden layer number
6 | size_hidden: 256 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 5000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 30
14 | gamma: 0.99
15 | n_update_target: 6 # how many episodes to update the target network
16 | memory_size: 100000 # replay memory buffer size
17 | max_episode_step: 500 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/1000_20000_0.001_8.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.1
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.05 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 8 # experiment number
26 | save_best: False
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/config-9.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/1000_20000_0.001_9.ckpt" # the path to load the model
4 | n_actions: 45
5 | n_hidden: 1 # hidden layer number
6 | size_hidden: 256 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 20000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 30
14 | gamma: 0.99
15 | n_update_target: 6 # how many episodes to update the target network
16 | memory_size: 100000 # replay memory buffer size
17 | max_episode_step: 500 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/1000_20000_0.001_9.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.1
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.05 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 9 # experiment number
26 | save_best: False
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/data_real_world.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/data_real_world.pkl
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/exp_6.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/exp_6.ckpt
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/loss-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/loss-1.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/loss-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/loss-2.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/loss-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/loss-3.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/loss-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/loss-4.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/loss-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/loss-5.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/loss-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/loss-6.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/loss-7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/loss-7.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/loss-8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/loss-8.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/loss-9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/loss-9.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/reward-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/reward-1.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/reward-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/reward-2.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/reward-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/reward-3.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/reward-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/reward-4.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/reward-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/reward-5.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/reward-6-real-world.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/reward-6-real-world.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/reward-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/reward-6.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/reward-7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/reward-7.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/reward-8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/reward-8.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/reward-9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/reward-9.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/simulatedModelOnRealPlatform-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/simulatedModelOnRealPlatform-2.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/simulatedModelOnRealPlatform-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/simulatedModelOnRealPlatform-3.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/simulatedModelOnRealPlatform-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/simulatedModelOnRealPlatform-4.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/storage/simulatedModelOnRealPlatform.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Qube/storage/simulatedModelOnRealPlatform.png
--------------------------------------------------------------------------------
/DQN/DQN-Qube/test.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | from DQN import *
4 | import gym
5 | from quanser_robots.common import GentlyTerminating
6 |
7 | def test():
8 | config_path = "config.yml"
9 | print_config(config_path)
10 | config = load_config(config_path)
11 | training_config = config["training_config"]
12 | config["model_config"]["load_model"] = True
13 |
14 | env_id ="Qube-v0"
15 | env = GentlyTerminating(gym.make(env_id))
16 |
17 | n_episodes = 10
18 | max_episode_step = 10000
19 | print("*********************************************")
20 | print("Testing the model for 10 episodes with 10000 maximum steps per episode")
21 | print("*********************************************")
22 |
23 | policy = Policy(env,config)
24 |
25 | losses = []
26 | all_rewards = []
27 | avg_rewards = []
28 | epsilons = []
29 | for i_episode in range(n_episodes):
30 | episode_reward = 0
31 | state = env.reset()
32 | state[4:6]/=20
33 | epsilon = 0
34 | epsilons.append(epsilon)
35 | for step in range(max_episode_step):
36 | env.render()
37 | action = policy.act(state, epsilon)
38 | f_action = 5*(action-(policy.n_actions-1)/2)/((policy.n_actions-1)/2)
39 | next_state, reward, done, _ = env.step(f_action)
40 | reward = 100*reward
41 | next_state[4:6]/=20
42 | policy.replay_buffer.push(state, action[0], reward, next_state, done)
43 | state = next_state
44 | episode_reward += reward
45 | if done:
46 | break
47 | all_rewards.append(episode_reward)
48 | avg_rewards.append(np.mean(all_rewards[-3:]))
49 | plot_fig(n_episodes, all_rewards,avg_rewards, losses)
50 | env.close()
51 |
52 | if __name__ =="__main__":
53 | test()
54 |
--------------------------------------------------------------------------------
/DQN/DQN-Qube/test_on_real_platform.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | from DQN import *
4 | import argparse
5 | from quanser_robots import GentlyTerminating
6 |
7 | plt.style.use('seaborn')
8 | env = GentlyTerminating(gym.make('QubeRR-v0'))
9 |
10 | config_path = "config.yml"
11 | print_config(config_path)
12 | config = load_config(config_path)
13 | training_config = config["training_config"]
14 | config["model_config"]["load_model"] = True
15 |
16 | n_episodes = 10
17 | max_episode_step = 10000
18 | print("*********************************************")
19 | print("Testing the model for 10 episodes with 10000 maximum steps per episode")
20 | print("*********************************************")
21 |
22 | policy = Policy(env,config)
23 |
24 | losses = []
25 | all_rewards = []
26 | avg_rewards = []
27 | epsilons = []
28 |
29 | s_all = []
30 | a_all = []
31 |
32 | for i in range(n_episodes):
33 | print("Testing episodes %s" %i)
34 | obs_old = env.reset()
35 | obs_old[4:6] /= 20
36 | done = False
37 | while not done:
38 | env.render()
39 | action = policy.act(obs_old, 0.0)
40 | f_action = 5 * (action - (policy.n_actions - 1) / 2) / ((policy.n_actions - 1) / 2)
41 | obs_new, reward, done, info = env.step(f_action)
42 | reward = 100*reward
43 | all_rewards.append(reward)
44 | obs_new[4:6] /= 20
45 | obs_old = obs_new
46 | s_all.append(info['s'])
47 | a_all.append(info['a'])
48 |
49 | print("avg reward: ",np.mean(all_rewards))
50 | print("rewards: ", all_rewards)
51 | env.close()
52 |
53 | fig, axes = plt.subplots(5, 1, figsize=(5, 8), tight_layout=True)
54 |
55 | s_all = np.stack(s_all)
56 | a_all = np.stack(a_all)
57 |
58 | n_points = s_all.shape[0]
59 | t = np.linspace(0, n_points * env.unwrapped.timing.dt_ctrl, n_points)
60 | for i in range(4):
61 | state_labels = env.unwrapped.state_space.labels[i]
62 | axes[i].plot(t, s_all.T[i], label=state_labels, c='C{}'.format(i))
63 | axes[i].legend(loc='lower right')
64 | action_labels = env.unwrapped.action_space.labels[0]
65 | axes[4].plot(t, a_all.T[0], label=action_labels, c='C{}'.format(4))
66 | axes[4].legend(loc='lower right')
67 |
68 | axes[0].set_ylabel('ang pos [rad]')
69 | axes[1].set_ylabel('ang pos [rad]')
70 | axes[2].set_ylabel('ang vel [rad/s]')
71 | axes[3].set_ylabel('ang vel [rad/s]')
72 | axes[4].set_ylabel('voltage [V]')
73 | axes[4].set_xlabel('time [seconds]')
74 | plt.show()
75 |
76 |
77 |
--------------------------------------------------------------------------------
/DQN/DQN-Swing/README.md:
--------------------------------------------------------------------------------
1 | # DQN - CartPoleSwing
2 |
3 | This folder contains the implementation of DQN algorithm and the evaluation on the CartPoleSwing environment
4 |
5 | All the hyper-parameters and experiment setting are stored in the ```config.yml``` file
6 |
7 | All the results (figure and model) will be stored in the ```./storage``` folder by default
8 |
9 | If you are not familiar with this environment, you can use the `analyze_env()` function in the `utils.py` to help you quickly understand the environment's state space, action space, reward range, etc.
10 |
11 | ## How to run
12 |
13 | ### Test the pre-trained
14 |
15 | To try our pre-trained model, simply run
16 |
17 | ```angularjs
18 | python test.py
19 | ```
20 |
21 | The script will load the model from the path specified in the ```config.yml``` file
22 |
23 | ### Train your own model
24 |
25 | To train your own model, you can change the hyper-parameters in the ```config.yml``` to whatever you want,
26 | and then run
27 |
28 | ```angularjs
29 | python train.py
30 | ```
31 |
32 | The script will load the configurations in the ```config.yml``` file and begin to train
33 |
34 | ### Configuration parameter explanation
35 |
36 | In the ```config.yml``` file, there are two set of configuration.
37 | The first `model_config` is the configuration of the neural network architecture;
38 | The second `training_config` is the configuration for the training process.
39 |
40 | The `exp_number` parameter in the `training_config` is the number of your experiment. The name of saved figure results in the `./storage` folder will be determined by this parameter.
41 |
42 | If you want to train your model from scratch, then set the `load_model` parameter to `False`. If set to `True`, the trainer will load the model from `model_path`.
43 |
44 | If you think your training process is not stable and you want to save the model when the model has the best performance, set the `save_best` parameter to `True`.
45 |
46 |
--------------------------------------------------------------------------------
/DQN/DQN-Swing/config.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_1_best.ckpt" # the path to load the model
4 | n_actions: 7
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 64 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 15000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 64
14 | gamma: 0.99
15 | n_update_target: 3 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 4000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/test.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.1 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 4 # experiment number
26 | save_best: False
27 | save_thres: 5000
28 |
29 |
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/README.md:
--------------------------------------------------------------------------------
1 | # DQN - Experiment Results
2 |
3 | This folder contains the experiment results on the CartPoleSwing environment
4 |
5 | The number in each file name represents the experiment number.
6 |
7 | For example, `config-1.yml` represents the configuration parameters in the first experiment.
8 |
9 | We only store the best model, the configuration files and the figures here to save space,
10 | for complete results, please see [https://github.com/liuzuxin/RL_Project_Results](https://github.com/liuzuxin/RL_Project_Results)
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/config-0.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/epsilon_decay_4000.ckpt" # the path to load the model
4 | n_actions: 9
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 128 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 15000 # how many episodes to train
12 | learning_rate: 0.0003
13 | batch_size: 64
14 | gamma: 0.995
15 | n_update_target: 4 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 4000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/epsilon_decay_4000.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.05 # episilon decay end
24 | epsilon_decay: 4000 # bandwidth
25 | exp_number: 0 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/config-1.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_1.ckpt" # the path to load the model
4 | n_actions: 7
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 64 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 15000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 64
14 | gamma: 0.995
15 | n_update_target: 3 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 4000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_1.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.1 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 1 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/config-2.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_2.ckpt" # the path to load the model
4 | n_actions: 5
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 64 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 15000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 64
14 | gamma: 0.999
15 | n_update_target: 3 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 4000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_2.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.1 # episilon decay end
24 | epsilon_decay: 2000 # bandwidth
25 | exp_number: 2 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/config-3.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_2.ckpt" # the path to load the model
4 | n_actions: 11
5 | n_hidden: 2 # hidden layer number
6 | size_hidden: 64 # hidden layer size
7 | use_cuda: True
8 |
9 | training_config:
10 | render: False # render the environment, set false to accelerate training. test.py does not need this parameter
11 | n_episodes: 15000 # how many episodes to train
12 | learning_rate: 0.001
13 | batch_size: 64
14 | gamma: 0.999
15 | n_update_target: 3 # how many episode to update the target network
16 | memory_size: 1000000 # replay memory buffer size
17 | max_episode_step: 4000 # maximum steps per episode
18 | random_seed: 1234 # do not have to change this parameter
19 | save_model_path: "storage/exp_3.ckpt" # the path to save the model
20 | use_fix_epsilon: False # set true to use a fixed epsilon, otherwise the episilon will decay
21 | fix_epsilon: 0.3
22 | epsilon_start: 0.9 # episilon decay start
23 | epsilon_final: 0.1 # episilon decay end
24 | epsilon_decay: 1000 # bandwidth
25 | exp_number: 3 # experiment number
26 | save_best: False
27 |
28 |
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/exp_0.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Swing/storage/exp_0.ckpt
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/exp_1_best.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Swing/storage/exp_1_best.ckpt
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/loss-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Swing/storage/loss-0.png
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/loss-1-find-best.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Swing/storage/loss-1-find-best.png
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/loss-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Swing/storage/loss-1.png
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/loss-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Swing/storage/loss-2.png
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/loss-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Swing/storage/loss-3.png
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/reward-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Swing/storage/reward-0.png
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/reward-1-find-best.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Swing/storage/reward-1-find-best.png
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/reward-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Swing/storage/reward-1.png
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/reward-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Swing/storage/reward-2.png
--------------------------------------------------------------------------------
/DQN/DQN-Swing/storage/reward-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/DQN/DQN-Swing/storage/reward-3.png
--------------------------------------------------------------------------------
/DQN/DQN-Swing/test.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | from DQN import *
4 | import gym
5 | from quanser_robots.common import GentlyTerminating
6 | import time
7 |
8 | def test():
9 | config_path = "config.yml"
10 | print_config(config_path)
11 | config = load_config(config_path)
12 | training_config = config["training_config"]
13 | config["model_config"]["load_model"] = True
14 |
15 | env_id = "CartpoleSwingShort-v0"
16 | env = GentlyTerminating(gym.make(env_id))
17 |
18 | n_episodes = 10
19 | max_episode_step = 10000
20 | print("*********************************************")
21 | print("Testing the model for 10 episodes with 10000 maximum steps per episode")
22 | print("*********************************************")
23 |
24 | policy = Policy(env,config)
25 | losses = []
26 | all_rewards = []
27 | avg_rewards = []
28 | epsilons = []
29 | for i_episode in range(n_episodes):
30 | episode_reward = 0
31 | state = env.reset()
32 | state[4]/=10
33 | epsilon = 0
34 | epsilons.append(epsilon)
35 | for step in range(max_episode_step):
36 | env.render()
37 | time.sleep(0.001)
38 | action = policy.act(state, epsilon)
39 | f_action = 5*(action-(policy.n_actions-1)/2)/((policy.n_actions-1)/2)
40 | next_state, reward, done, _ = env.step(f_action)
41 | next_state[4]/=10
42 | policy.replay_buffer.push(state, action[0], reward, next_state, done)
43 | state = next_state
44 | episode_reward += reward
45 | if done:
46 | break
47 | print(" episode: %s, episode reward: %s" % (i_episode, episode_reward))
48 | all_rewards.append(episode_reward)
49 | avg_rewards.append(np.mean(all_rewards[-3:]))
50 |
51 | env.close()
52 | plot_fig(n_episodes, all_rewards,avg_rewards, losses)
53 |
54 | if __name__ =="__main__":
55 | test()
56 |
--------------------------------------------------------------------------------
/DQN/DQN-Swing/test_rr.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | from DQN import *
4 |
5 |
6 | use_plot = True
7 | render = True
8 |
9 | window = 500
10 | collect_fr = 10
11 | plot_fr = 10
12 | render_fr = 10
13 |
14 | if use_plot:
15 | plt.ion()
16 | plot = PlotSignal(window=window)
17 |
18 | # Initialize Controller & Environment:
19 | env, ctrl = get_env_and_controller(long_pendulum=False, simulation=True, swinging=True, mouse_control=False)
20 |
21 |
22 | config_path = "config.yml"
23 | print_config(config_path)
24 | config = load_config(config_path)
25 | training_config = config["training_config"]
26 | config["model_config"]["load_model"] = True
27 |
28 | n_episodes = 10
29 | max_episode_step = 100000
30 | print("*********************************************")
31 | print("Testing the model on real platform for 10 episodes with 100000 maximum steps per episode")
32 | print("*********************************************")
33 |
34 | policy = Policy(env,config)
35 | losses = []
36 | all_rewards = []
37 | avg_rewards = []
38 | epsilons = []
39 |
40 |
41 | for i in range(n_episodes):
42 | print("\n\n###############################")
43 | print("Episode {0}".format(0))
44 |
45 | # Reset the environment:
46 | env.reset()
47 | obs, reward, done, _ = env.step(np.zeros(1))
48 | # Start the Control Loop:
49 | print("\nStart Controller:\t\t\t", end="")
50 | for n in range(max_episode_step):
51 | obs[4] /= 10
52 | action = policy.act(obs, 0)
53 | f_action = 5 * (action - (policy.n_actions - 1) / 2) / ((policy.n_actions - 1) / 2)
54 | obs, reward, done, _ = env.step(f_action)
55 | all_rewards.append(reward)
56 | if done:
57 | print("Physical Limits or End of Time reached")
58 | break
59 |
60 | if render and np.mod(n, render_fr) == 0:
61 | env.render()
62 |
63 | if use_plot and np.mod(n, collect_fr) == 0:
64 | alpha, theta = get_angles(obs[1], obs[2])
65 | plot.update(theta=theta, alpha=alpha, theta_dt=obs[4], volt=f_action, u=0, x=obs[0])
66 | env.render()
67 |
68 | if use_plot and np.mod(n, plot_fr) == 0:
69 | plot.plot_signal()
70 |
71 | # Stop the cart:
72 | env.step(np.zeros(1))
73 |
74 | print("avg reward: ",np.mean(all_rewards))
75 | print("rewards: ", all_rewards)
76 | env.close()
77 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/DQN/README.md:
--------------------------------------------------------------------------------
1 | # DQN - Deep Q-Network
2 |
3 | This folder contains the implementation of DQN algorithm and the evaluation of it.
4 |
5 | For more details of DQN, see the paper [here](https://arxiv.org/abs/1312.5602)
6 |
7 | Choose the environment folder and follow the instructions to run everything.
8 |
9 | ## Overview of the experiment results:
10 |
11 | The best experients parameters in different environments:
12 |
13 | | Environment | Learning Rate | Epsilon Decay | Batch Size | Action Number | Gamma | average episode reward |
14 | | -------- | -----: | :----: | :----: | :----: | :----: | :----: |
15 | | Qube | 0.001 | 1000 | 50 | 9 | 0.99 | 410 |
16 | | CartPole Swingup | 0.001 | 1000 | 64 | 7 | 0.995 | 4126 |
17 | | CartPole Stab | 0.001 | 500 | 64 | 9 | 0.995 | 1535 |
18 | | Double CartPole | 0.001 | 2000 | 64 | 7 | 0.995 | 383 |
19 |
20 |
21 |
22 |
23 | ### CartpoleStabShort-v0
24 | episode_rewards:
25 |
26 | learning_rate: 3e-5
27 |
28 | networks architecture:
29 |
30 | gamma: 0.98
31 |
32 | batch size: 20
33 |
34 | weight_decay: 1e-4
35 |
36 | num_epochs: 2000
37 |
38 | ### Qube-v0:
39 | episode_rewards:
40 |
41 | learning_rate: 3e-5
42 |
43 | networks architecture:
44 |
45 | gamma: 0.98
46 |
47 | batch size: 20
48 |
49 | weight_decay: 1e-4
50 |
51 | num_epochs: 2000
52 |
53 |
54 | ### DoublePendulum-v0
55 | episode_rewards:
56 |
57 | learning_rate: 3e-5
58 |
59 | networks architecture:
60 |
61 | gamma: 0.99
62 |
63 | batch size: 20
64 |
65 | weight_decay: 1e-4
66 |
67 | num_epochs: 2000
68 |
69 | ### CartpoleSwingShort-v0
70 | learning_rate: 3e-5
71 |
72 | networks architecture:
73 |
74 | gamma: 0.98
75 |
76 | batch size: 20
77 |
78 | weight_decay: 1e-4
79 |
80 | num_epochs: 2000
81 |
82 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 liuzuxin
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/.idea/MPC qube.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/Hive/SelectionMethods.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # ---- SELECTION METHODS
4 |
5 | __all__ = ["tournament", "disruptive"]
6 |
7 | # ---- MODULE DOCSTRING
8 |
9 | __doc__ = """
10 |
11 | (C) Hive, Romain Wuilbercq, 2017
12 | _
13 | /_/_ .'''.
14 | =O(_)))) ...' `.
15 | \_\ `. .'''X
16 | `..'
17 | .---. .---..-./`) ,---. ,---. .-''-.
18 | | | |_ _|\ .-.')| / | | .'_ _ \
19 | | | ( ' )/ `-' \| | | .'/ ( ` ) '
20 | | '-(_{;}_)`-'`"`| | _ | |. (_ o _) |
21 | | (_,_) .---. | _( )_ || (_,_)___|
22 | | _ _--. | | | \ (_ o._) /' \ .---.
23 | |( ' ) | | | | \ (_,_) / \ `-' /
24 | (_{;}_)| | | | \ / \ /
25 | '(_,_) '---' '---' `---` `'-..-'
26 |
27 | Description:
28 | -----------
29 |
30 | SelectionMethods.py
31 |
32 | Defines a collection of selection methods to be used with Hive.
33 |
34 | """
35 |
36 | # ---- IMPORT MODULES
37 |
38 | import random
39 |
40 | import numpy as np
41 |
42 | # ---- SELECTION METHOD(S)
43 |
44 | def tournament(values, crowd_size=None):
45 | """
46 |
47 | Defines a selection process whereby a number of individuals
48 | from a colony/generation are selected to compete.
49 |
50 | Individuals with greater fitness values compared to the rest
51 | have higher chance to be kept for the next cycle/generation
52 | - i.e. survival of the fittest. This method prones elitism.
53 |
54 | A solution compete with a fixed number of randomly chosen individuals
55 | (i.e. "crowd_size") from the population.
56 |
57 | This function uses the "random.sample" function from the python base
58 | "random" module and the "np.where" function from the "numpy" module.
59 |
60 | Parameters:
61 | ----------
62 |
63 | :param int crowd_size: number of individuals competing
64 |
65 | """
66 |
67 | # computes battle score metrics
68 | scores = []
69 | for i in range(len(values)):
70 |
71 | # selects a pool of opponents randomly
72 | if (crowd_size != None) and (type(crowd_size) is int):
73 | opponents = random.sample(values, crowd_size)
74 | else:
75 | opponents = values
76 |
77 | # battles against opponents
78 | scores.append( sum(np.where(values[i]>opponents, 1, 0)) )
79 |
80 | # returns an array of normalized scores
81 | return scores / sum(scores)
82 |
83 | def disruptive(values):
84 | """
85 |
86 | Defines a selection process whereby a better chance is given to
87 | individuals with the highest and lowest fitness values - i.e. those
88 | further away from a "norm".
89 |
90 | This method represents a good mechanism by which diversity can
91 | be passed onto the next generation/cycle and avoid too-early
92 | convergence - i.e. improves the exploration of the search domain.
93 |
94 | This function uses the "np.mean" function from the "numpy" module.
95 |
96 | """
97 |
98 | # computes mean fitness of population
99 | mean_ = np.mean(values)
100 |
101 | # computes score metrics
102 | scores = []
103 | for i in range(len(values)):
104 | scores.append(abs(values[i] - mean_))
105 |
106 | # returns an array of normalized scores
107 | return scores / sum(scores)
108 |
109 | # ---- END
110 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/Hive/Utilities.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # ---- MODULE DOCSTRING
4 |
5 | __doc__ = """
6 |
7 | (C) Hive, Romain Wuilbercq, 2017
8 | _
9 | /_/_ .'''.
10 | =O(_)))) ...' `.
11 | \_\ `. .'''X
12 | `..'
13 | .---. .---..-./`) ,---. ,---. .-''-.
14 | | | |_ _|\ .-.')| / | | .'_ _ \
15 | | | ( ' )/ `-' \| | | .'/ ( ` ) '
16 | | '-(_{;}_)`-'`"`| | _ | |. (_ o _) |
17 | | (_,_) .---. | _( )_ || (_,_)___|
18 | | _ _--. | | | \ (_ o._) /' \ .---.
19 | |( ' ) | | | | \ (_,_) / \ `-' /
20 | (_{;}_)| | | | \ / \ /
21 | '(_,_) '---' '---' `---` `'-..-'
22 |
23 | Description:
24 | -----------
25 |
26 | A series of utility functions (such as plotting function etc...).
27 |
28 | """
29 |
30 | # ---- IMPORT MODULES
31 |
32 | try:
33 | import matplotlib.pyplot as plt
34 | from matplotlib.font_manager import FontProperties
35 | except:
36 | raise ImportError("Install 'matplotlib' to plot convergence results.")
37 |
38 | # ---- CONVERGENCE PLOT
39 |
40 | def ConvergencePlot(cost):
41 | """
42 |
43 | Monitors convergence.
44 |
45 | Parameters:
46 | ----------
47 |
48 | :param dict cost: mean and best cost over cycles/generations as returned
49 | by an optimiser.
50 |
51 | """
52 | plt.rc('font',family= 'Tibetan Machine Uni')
53 | fs=15
54 | font = FontProperties();
55 | font.set_size( fs) #'larger');
56 | labels = ["Best Bee's Cost", "Mean Bees' Cost"]
57 | plt.figure(figsize=(7, 4.5));
58 | plt.plot(range(len(cost["best"])), cost["best"], label=labels[0]);
59 | plt.scatter(range(len(cost["mean"])), cost["mean"], color='red', label=labels[1]);
60 | plt.xlabel("Iteration",fontsize=fs);
61 | plt.ylabel("Cost",fontsize=fs);
62 | plt.xticks(fontsize=10)
63 | plt.yticks(fontsize=10)
64 | plt.legend(loc="best", prop = font);
65 | plt.xlim([0,len(cost["mean"])]);
66 | plt.grid();
67 | plt.show();
68 |
69 | # ---- END
70 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/Hive/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # ---- MODULE DOCSTRING
4 |
5 | __doc__ = """
6 |
7 | (C) Hive, Romain Wuilbercq, 2017
8 | _
9 | /_/_ .'''.
10 | =O(_)))) ...' `.
11 | \_\ `. .'''X
12 | `..'
13 | .---. .---..-./`) ,---. ,---. .-''-.
14 | | | |_ _|\ .-.')| / | | .'_ _ \
15 | | | ( ' )/ `-' \| | | .'/ ( ` ) '
16 | | '-(_{;}_)`-'`"`| | _ | |. (_ o _) |
17 | | (_,_) .---. | _( )_ || (_,_)___|
18 | | _ _--. | | | \ (_ o._) /' \ .---.
19 | |( ' ) | | | | \ (_,_) / \ `-' /
20 | (_{;}_)| | | | \ / \ /
21 | '(_,_) '---' '---' `---` `'-..-'
22 |
23 | **Hive** is a simple implementation of a swarm-based optimisation
24 | algorithm called the Artificial Bee Colony (ABC) algorithm.
25 |
26 | The Artificial Bee Colony (ABC) algorithm is based on the intelligent foraging
27 | behaviour of honey bee swarm, proposed by Karaboga in 2005.
28 |
29 | """
30 |
31 | __author__ = "Romain Wuilbercq"
32 |
33 | # ---- END
34 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/README.md:
--------------------------------------------------------------------------------
1 | # MPC - CartPoleStab
2 | This folder contains the implementation of MPC algorithm and the evaluation on the CartPoleStab environment
3 |
4 | The implementation is mainly followed in this paper [here](https://ieeexplore.ieee.org/abstract/document/8463189)
5 |
6 | To optimize the MPC controller, we use the [Artificial Bee Colony](https://en.wikipedia.org/wiki/Artificial_bee_colony_algorithm) (ABC) optimization algorithm,
7 | instead of the original random shooting method in the paper. The implementation of ABC algorithm is based on this repo: [https://github.com/rwuilbercq/Hive](https://github.com/rwuilbercq/Hive)
8 |
9 | All the hyper-parameters and experiment setting are stored in the ```config.yml``` file
10 |
11 | All the results (figure and model) will be stored in the ```./storage``` folder by default
12 |
13 | If you are not familiar with this environment, you can use the `analyze_env()` function in the `utils.py` to help you quickly understand the environment's state space, action space, reward range, etc.
14 |
15 | ### How to run
16 |
17 | To try our pre-trained model, simply run
18 |
19 | ```angularjs
20 | python run.py --path config.yml
21 | ```
22 | The script will load the configurations in the ```config.yml``` file and begin to train
23 |
24 | Note that because of the long time of optimization, boost the data with MPC controller would take a long time
25 |
26 | If you want to load the dataset and a pre-trained dynamic model, note that you should normalize the dataset first, because the dynamic model need the data distribution information.
27 | You can use the `norm_train_data()` method in the `DynamicModel` class.
28 |
29 | You can also see some results in the Jupyter Notebook ```example.ipynb```
30 |
31 | ### Configuration explanation
32 |
33 | In the ```config.yml``` file, there are 4 sets of configuration.
34 |
35 | The `model_config` part is the configuration of the parameters which determine the neural network architecture and the environment basis.
36 |
37 | The `training_config` part is the configuration of the training process parameters.
38 |
39 | The `dataset_config` part is the configuration of the dataset parameters.
40 |
41 | The `mpc_config` part is the configuration of the MPC algorithm parameters.
42 |
43 | The `exp_number` parameter in the `training_config` is the number of your experiment. The name of saved figure results in the `./storage` folder will be determined by this parameter.
44 |
45 | If you want to train your model from scratch, then set the `load_model` parameter to `False`. If set to `True`, the trainer will load the model from `model_path`.
46 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/config.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_1.ckpt" # the path to load the model
4 | n_states: 5 # environment states
5 | n_actions: 1 # how many controls we need
6 | n_hidden: 1 # hidden layer number
7 | size_hidden: 512 # hidden layer size
8 | use_cuda: True
9 |
10 | training_config:
11 | n_epochs: 60 # how many epoches to train the dynamic model
12 | learning_rate: 0.001
13 | batch_size: 256
14 | save_model_flag: True
15 | save_model_path: "storage/exp_1.ckpt" # the path to save the model
16 | save_loss_fig: True
17 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
18 | exp_number: 1 # experiment number
19 |
20 | dataset_config:
21 | load_flag: False
22 | load_path: "storage/data_exp_1.pkl"
23 | n_max_steps: 1000 # maximum steps per episode
24 | n_random_episodes: 800 # how many random episodes' data to fit the initial dynamic model
25 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
26 | n_mpc_episodes: 4 # how many episodes data sampled with the MPC controller
27 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
28 | min_train_samples: 6000
29 | n_mpc_itrs: 100 # the number to perform reinforce iteration
30 | save_flag: True # set True if you want to save all the dataset
31 | save_path: "storage/data_exp_1.pkl"
32 |
33 | # MPC controller configuration
34 | mpc_config:
35 | horizon: 12 # how long of the horizon to predict
36 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
37 | max_itrs: 20 # max iterations for the ABC optimization
38 | gamma: 0.99 # reward discount coefficient
39 | action_low: -12 # lower bound of the solution space
40 | action_high: 12 # upper bound of the solution space
41 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/controller.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from Hive import Hive
3 | from Hive import Utilities
4 |
5 |
6 | class MPC(object):
7 | def __init__(self, env, config):
8 | self.env = env
9 | mpc_config = config["mpc_config"]
10 | self.horizon = mpc_config["horizon"]
11 | self.numb_bees = mpc_config["numb_bees"]
12 | self.max_itrs = mpc_config["max_itrs"]
13 | self.gamma = mpc_config["gamma"]
14 | self.action_low = mpc_config["action_low"]
15 | self.action_high = mpc_config["action_high"]
16 | self.evaluator = Evaluator(self.gamma)
17 |
18 | def act(self, state, dynamic_model):
19 | '''
20 | Optimize the action by Artificial Bee Colony algorithm
21 | :param state: (numpy array) current state
22 | :param dynamic_model: system dynamic model
23 | :return: (float) optimal action
24 | '''
25 | self.evaluator.update(state, dynamic_model)
26 | optimizer = Hive.BeeHive( lower = [float(self.action_low)] * self.horizon,
27 | upper = [float(self.action_high)] * self.horizon,
28 | fun = self.evaluator.evaluate,
29 | numb_bees = self.numb_bees,
30 | max_itrs = self.max_itrs,
31 | verbose=False)
32 | cost = optimizer.run()
33 | #print("Solution: ",optimizer.solution[0])
34 | #print("Fitness Value ABC: {0}".format(optimizer.best))
35 | # Uncomment this if you want to see the performance of the optimizer
36 | #Utilities.ConvergencePlot(cost)
37 | return optimizer.solution[0]
38 |
39 | class Evaluator(object):
40 | def __init__(self, gamma=0.8):
41 | self.gamma = gamma
42 |
43 | def update(self, state, dynamic_model):
44 | self.state = state
45 | self.dynamic_model = dynamic_model
46 |
47 | def evaluate(self, actions):
48 | actions = np.array(actions)
49 | horizon = actions.shape[0]
50 | rewards = 0
51 | state_tmp = self.state.copy()
52 | for j in range(horizon):
53 | input_data = np.concatenate( (state_tmp,[actions[j]]) )
54 | state_dt = self.dynamic_model.predict(input_data)
55 | state_tmp = state_tmp + state_dt[0]
56 | rewards -= (self.gamma ** j) * self.get_reward(state_tmp, actions[j])
57 | return rewards
58 |
59 | def get_reward(self,obs, action_n):
60 | '''
61 | Overwrite this function according to different environment
62 | '''
63 | x, sin_th, cos_th, x_dot, theta_dot = obs
64 | cos_th = min(max(cos_th, -1), 1)
65 | reward = -cos_th + 1
66 | return reward
67 |
68 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/run.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import gym
3 | import argparse
4 | from dynamics import *
5 | from controller import *
6 | from utils import *
7 | from quanser_robots.common import GentlyTerminating
8 | import time
9 |
10 | parser = argparse.ArgumentParser(description='Specify the configuraton file path')
11 | parser.add_argument('--path', required=False, type=str, default='config.yml',
12 | help='Specify the configuraton file path')
13 |
14 |
15 | args = parser.parse_args()
16 |
17 | config_path = args.path # "config.yml"
18 | config = load_config(config_path)
19 | print_config(config_path)
20 |
21 | env_id = "CartpoleStabShort-v0"
22 | env = GentlyTerminating(gym.make(env_id))
23 |
24 | model = DynamicModel(config)
25 |
26 | data_fac = DatasetFactory(env,config)
27 |
28 | data_fac.collect_random_dataset()
29 |
30 | '''Train on the random collected dataset'''
31 | loss = model.train(data_fac.random_trainset,data_fac.random_testset)
32 |
33 | mpc = MPC(env,config)
34 |
35 | rewards_list = []
36 | for itr in range(config["dataset_config"]["n_mpc_itrs"]):
37 | t = time.time()
38 | print("**********************************************")
39 | print("The reinforce process [%s], collecting data ..." % itr)
40 | rewards = data_fac.collect_mpc_dataset(mpc, model)
41 | trainset, testset = data_fac.make_dataset()
42 | rewards_list += rewards
43 |
44 | plt.close("all")
45 | plt.figure(figsize=(12, 5))
46 | plt.title('Reward Trend with %s iteration' % itr)
47 | plt.plot(rewards_list)
48 | plt.savefig("storage/reward-" + str(model.exp_number) + ".png")
49 | print("Consume %s s in this iteration" % (time.time() - t))
50 | loss = model.train(trainset, testset)
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/storage/config-1.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_1.ckpt" # the path to load the model
4 | n_states: 5 # environment states
5 | n_actions: 1 # how many controls we need
6 | n_hidden: 1 # hidden layer number
7 | size_hidden: 512 # hidden layer size
8 | use_cuda: True
9 |
10 | training_config:
11 | n_epochs: 60 # how many epoches to train the dynamic model
12 | learning_rate: 0.001
13 | batch_size: 256
14 | save_model_flag: True
15 | save_model_path: "storage/exp_1.ckpt" # the path to save the model
16 | save_loss_fig: True
17 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
18 | exp_number: 1 # experiment number
19 |
20 | dataset_config:
21 | load_flag: False
22 | load_path: "storage/data_exp_1.pkl"
23 | n_max_steps: 1000 # maximum steps per episode
24 | n_random_episodes: 800 # how many random episodes' data to fit the initial dynamic model
25 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
26 | n_mpc_episodes: 4 # how many episodes data sampled with the MPC controller
27 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
28 | min_train_samples: 6000
29 | n_mpc_itrs: 100 # the number to perform reinforce iteration
30 | save_flag: True # set True if you want to save all the dataset
31 | save_path: "storage/data_exp_1.pkl"
32 |
33 | # MPC controller configuration
34 | mpc_config:
35 | horizon: 12 # how long of the horizon to predict
36 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
37 | max_itrs: 20 # max iterations for the ABC optimization
38 | gamma: 0.99 # reward discount coefficient
39 | action_low: -12 # lower bound of the solution space
40 | action_high: 12 # upper bound of the solution space
41 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/storage/config-2.yml:
--------------------------------------------------------------------------------
1 | # change the mpc horizon w.r.t. config 1 to compare
2 |
3 | model_config:
4 | load_model: False # If set true, you must specify the model path, otherwise train a new model
5 | model_path: "storage/exp_2.ckpt" # the path to load the model
6 | n_states: 5 # environment states
7 | n_actions: 1 # how many controls we need
8 | n_hidden: 1 # hidden layer number
9 | size_hidden: 512 # hidden layer size
10 | use_cuda: True
11 |
12 | training_config:
13 | n_epochs: 60 # how many epoches to train the dynamic model
14 | learning_rate: 0.001
15 | batch_size: 256
16 | save_model_flag: True
17 | save_model_path: "storage/exp_2.ckpt" # the path to save the model
18 | save_loss_fig: True
19 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
20 | exp_number: 2 # experiment number
21 |
22 | dataset_config:
23 | load_flag: False
24 | load_path: "storage/data_exp_2.pkl"
25 | n_max_steps: 1000 # maximum steps per episode
26 | n_random_episodes: 800 # how many random episodes' data to fit the initial dynamic model
27 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
28 | n_mpc_episodes: 4 # how many episodes data sampled with the MPC controller
29 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
30 | min_train_samples: 6000
31 | n_mpc_itrs: 100 # the number to perform reinforce iteration
32 | save_flag: True # set True if you want to save all the dataset
33 | save_path: "storage/data_exp_2.pkl"
34 |
35 | # MPC controller configuration
36 | mpc_config:
37 | horizon: 5 # how long of the horizon to predict
38 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
39 | max_itrs: 20 # max iterations for the ABC optimization
40 | gamma: 0.99 # reward discount coefficient
41 | action_low: -12 # lower bound of the solution space
42 | action_high: 12 # upper bound of the solution space
43 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/storage/exp_1.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-CartPoleStab/storage/exp_1.ckpt
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/storage/loss-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-CartPoleStab/storage/loss-1.png
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/storage/loss-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-CartPoleStab/storage/loss-2.png
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/storage/model_error_exp_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-CartPoleStab/storage/model_error_exp_1.png
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/storage/reward-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-CartPoleStab/storage/reward-1.png
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleStab/storage/reward-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-CartPoleStab/storage/reward-2.png
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/Hive/Utilities.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # ---- MODULE DOCSTRING
4 |
5 | __doc__ = """
6 |
7 | (C) Hive, Romain Wuilbercq, 2017
8 | _
9 | /_/_ .'''.
10 | =O(_)))) ...' `.
11 | \_\ `. .'''X
12 | `..'
13 | .---. .---..-./`) ,---. ,---. .-''-.
14 | | | |_ _|\ .-.')| / | | .'_ _ \
15 | | | ( ' )/ `-' \| | | .'/ ( ` ) '
16 | | '-(_{;}_)`-'`"`| | _ | |. (_ o _) |
17 | | (_,_) .---. | _( )_ || (_,_)___|
18 | | _ _--. | | | \ (_ o._) /' \ .---.
19 | |( ' ) | | | | \ (_,_) / \ `-' /
20 | (_{;}_)| | | | \ / \ /
21 | '(_,_) '---' '---' `---` `'-..-'
22 |
23 | Description:
24 | -----------
25 |
26 | A series of utility functions (such as plotting function etc...).
27 |
28 | """
29 |
30 | # ---- IMPORT MODULES
31 |
32 | try:
33 | import matplotlib.pyplot as plt
34 | from matplotlib.font_manager import FontProperties
35 | except:
36 | raise ImportError("Install 'matplotlib' to plot convergence results.")
37 |
38 | # ---- CONVERGENCE PLOT
39 |
40 | def ConvergencePlot(cost):
41 | """
42 |
43 | Monitors convergence.
44 |
45 | Parameters:
46 | ----------
47 |
48 | :param dict cost: mean and best cost over cycles/generations as returned
49 | by an optimiser.
50 |
51 | """
52 | plt.rc('font',family= 'Tibetan Machine Uni')
53 | fs=15
54 | font = FontProperties();
55 | font.set_size( fs) #'larger');
56 | labels = ["Best Bee's Cost", "Mean Bees' Cost"]
57 | plt.figure(figsize=(7, 4.5));
58 | plt.plot(range(len(cost["best"])), cost["best"], label=labels[0]);
59 | plt.scatter(range(len(cost["mean"])), cost["mean"], color='red', label=labels[1]);
60 | plt.xlabel("Iteration",fontsize=fs);
61 | plt.ylabel("Cost",fontsize=fs);
62 | plt.xticks(fontsize=10)
63 | plt.yticks(fontsize=10)
64 | plt.legend(loc="best", prop = font);
65 | plt.xlim([0,len(cost["mean"])]);
66 | plt.grid();
67 | plt.show();
68 |
69 | # ---- END
70 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/Hive/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # ---- MODULE DOCSTRING
4 |
5 | __doc__ = """
6 |
7 | (C) Hive, Romain Wuilbercq, 2017
8 | _
9 | /_/_ .'''.
10 | =O(_)))) ...' `.
11 | \_\ `. .'''X
12 | `..'
13 | .---. .---..-./`) ,---. ,---. .-''-.
14 | | | |_ _|\ .-.')| / | | .'_ _ \
15 | | | ( ' )/ `-' \| | | .'/ ( ` ) '
16 | | '-(_{;}_)`-'`"`| | _ | |. (_ o _) |
17 | | (_,_) .---. | _( )_ || (_,_)___|
18 | | _ _--. | | | \ (_ o._) /' \ .---.
19 | |( ' ) | | | | \ (_,_) / \ `-' /
20 | (_{;}_)| | | | \ / \ /
21 | '(_,_) '---' '---' `---` `'-..-'
22 |
23 | **Hive** is a simple implementation of a swarm-based optimisation
24 | algorithm called the Artificial Bee Colony (ABC) algorithm.
25 |
26 | The Artificial Bee Colony (ABC) algorithm is based on the intelligent foraging
27 | behaviour of honey bee swarm, proposed by Karaboga in 2005.
28 |
29 | """
30 |
31 | __author__ = "Romain Wuilbercq"
32 |
33 | # ---- END
34 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/README.md:
--------------------------------------------------------------------------------
1 | # MPC - CartPoleSwing
2 | This folder contains the implementation of MPC algorithm and the evaluation on the CartPoleSwing environment
3 |
4 | The implementation is mainly followed in this paper [here](https://ieeexplore.ieee.org/abstract/document/8463189)
5 |
6 | To optimize the MPC controller, we use the [Artificial Bee Colony](https://en.wikipedia.org/wiki/Artificial_bee_colony_algorithm) (ABC) optimization algorithm,
7 | instead of the original random shooting method in the paper. The implementation of ABC algorithm is based on this repo: [https://github.com/rwuilbercq/Hive](https://github.com/rwuilbercq/Hive)
8 |
9 | All the hyper-parameters and experiment setting are stored in the ```config.yml``` file
10 |
11 | All the results (figure and model) will be stored in the ```./storage``` folder by default
12 |
13 | If you are not familiar with this environment, you can use the `analyze_env()` function in the `utils.py` to help you quickly understand the environment's state space, action space, reward range, etc.
14 |
15 | ### How to run
16 |
17 | To try our pre-trained model, simply run
18 |
19 | ```angularjs
20 | python run.py --path config.yml
21 | ```
22 | The script will load the configurations in the ```config.yml``` file and begin to train
23 |
24 | Note that because of the long time of optimization, boost the data with MPC controller would take a long time
25 |
26 | If you want to load the dataset and a pre-trained dynamic model, note that you should normalize the dataset first, because the dynamic model need the data distribution information.
27 | You can use the `norm_train_data()` method in the `DynamicModel` class.
28 | ### Configuration explanation
29 |
30 | In the ```config.yml``` file, there are 4 sets of configuration.
31 |
32 | The `model_config` part is the configuration of the parameters which determine the neural network architecture and the environment basis.
33 |
34 | The `training_config` part is the configuration of the training process parameters.
35 |
36 | The `dataset_config` part is the configuration of the dataset parameters.
37 |
38 | The `mpc_config` part is the configuration of the MPC algorithm parameters.
39 |
40 | The `exp_number` parameter in the `training_config` is the number of your experiment. The name of saved figure results in the `./storage` folder will be determined by this parameter.
41 |
42 | If you want to train your model from scratch, then set the `load_model` parameter to `False`. If set to `True`, the trainer will load the model from `model_path`.
43 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/config.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_1.ckpt" # the path to load the model
4 | n_states: 5 # environment states
5 | n_actions: 1 # how many controls we need
6 | n_hidden: 2 # hidden layer number
7 | size_hidden: 512 # hidden layer size
8 | use_cuda: True
9 |
10 | training_config:
11 | n_epochs: 100 # how many epoches to train the dynamic model
12 | learning_rate: 0.001
13 | batch_size: 512
14 | save_model_flag: True
15 | save_model_path: "storage/exp_1.ckpt" # the path to save the model
16 | save_loss_fig: True
17 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
18 | exp_number: 1 # experiment number
19 |
20 | dataset_config:
21 | load_flag: False
22 | load_path: "storage/data_exp_1.pkl"
23 | n_max_steps: 2000 # maximum steps per episode
24 | n_random_episodes: 80 # how many random episodes' data to fit the initial dynamic model
25 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
26 | n_mpc_episodes: 2 # how many episodes data sampled with the MPC controller
27 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
28 | min_train_samples: 6000
29 | n_mpc_itrs: 100 # the number to perform reinforce iteration
30 | save_flag: True # set True if you want to save all the dataset
31 | save_path: "storage/data_exp_1.pkl"
32 |
33 | # MPC controller configuration
34 | mpc_config:
35 | horizon: 12 # how long of the horizon to predict
36 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
37 | max_itrs: 20 # max iterations for the ABC optimization
38 | gamma: 0.99 # reward discount coefficient
39 | action_low: -12 # lower bound of the solution space
40 | action_high: 12 # upper bound of the solution space
41 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/controller.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from Hive import Hive
3 | from Hive import Utilities
4 |
5 |
6 | class MPC(object):
7 | def __init__(self, env, config):
8 | self.env = env
9 | mpc_config = config["mpc_config"]
10 | self.horizon = mpc_config["horizon"]
11 | self.numb_bees = mpc_config["numb_bees"]
12 | self.max_itrs = mpc_config["max_itrs"]
13 | self.gamma = mpc_config["gamma"]
14 | self.action_low = mpc_config["action_low"]
15 | self.action_high = mpc_config["action_high"]
16 | self.evaluator = Evaluator(self.gamma)
17 |
18 | def act(self, state, dynamic_model):
19 | '''
20 | Optimize the action by Artificial Bee Colony algorithm
21 | :param state: (numpy array) current state
22 | :param dynamic_model: system dynamic model
23 | :return: (float) optimal action
24 | '''
25 | self.evaluator.update(state, dynamic_model)
26 | optimizer = Hive.BeeHive( lower = [float(self.action_low)] * self.horizon,
27 | upper = [float(self.action_high)] * self.horizon,
28 | fun = self.evaluator.evaluate,
29 | numb_bees = self.numb_bees,
30 | max_itrs = self.max_itrs,
31 | verbose=False)
32 | cost = optimizer.run()
33 | #print("Solution: ",optimizer.solution[0])
34 | #print("Fitness Value ABC: {0}".format(optimizer.best))
35 | # Uncomment this if you want to see the performance of the optimizer
36 | #Utilities.ConvergencePlot(cost)
37 | return optimizer.solution[0]
38 |
39 | class Evaluator(object):
40 | def __init__(self, gamma=0.8):
41 | self.gamma = gamma
42 |
43 | def update(self, state, dynamic_model):
44 | self.state = state
45 | self.dynamic_model = dynamic_model
46 |
47 | def evaluate(self, actions):
48 | actions = np.array(actions)
49 | horizon = actions.shape[0]
50 | rewards = 0
51 | state_tmp = self.state.copy()
52 | for j in range(horizon):
53 | input_data = np.concatenate( (state_tmp,[actions[j]]) )
54 | state_dt = self.dynamic_model.predict(input_data)
55 | state_tmp = state_tmp + state_dt[0]
56 | rewards -= (self.gamma ** j) * self.get_reward(state_tmp, actions[j])
57 | return rewards
58 |
59 | # need to change this function according to different environment
60 | def get_reward(self,obs, action_n):
61 | x, sin_th, cos_th, x_dot, theta_dot = obs
62 | cos_th = min(max(cos_th, -1), 1)
63 | reward = -cos_th + 1
64 | return reward
65 |
66 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/run.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import gym
3 | import argparse
4 | from dynamics import *
5 | from controller import *
6 | from utils import *
7 | from quanser_robots.common import GentlyTerminating
8 | import time
9 |
10 | parser = argparse.ArgumentParser(description='Specify the configuraton file path')
11 | parser.add_argument('--path', required=False, type=str, default='config.yml',
12 | help='Specify the configuraton file path')
13 |
14 |
15 | args = parser.parse_args()
16 |
17 | config_path = args.path # "config.yml"
18 | config = load_config(config_path)
19 | print_config(config_path)
20 |
21 | env_id = "CartpoleSwingShort-v0"
22 | env = GentlyTerminating(gym.make(env_id))
23 |
24 | model = DynamicModel(config)
25 |
26 | data_fac = DatasetFactory(env,config)
27 | data_fac.collect_random_dataset()
28 |
29 | loss = model.train(data_fac.random_trainset,data_fac.random_testset)
30 | model.plot_model_validation(env,n_sample=200)
31 | mpc = MPC(env,config)
32 |
33 | rewards_list = []
34 | for itr in range(config["dataset_config"]["n_mpc_itrs"]):
35 | t = time.time()
36 | print("**********************************************")
37 | print("The reinforce process [%s], collecting data ..." % itr)
38 | rewards = data_fac.collect_mpc_dataset(mpc, model)
39 | trainset, testset = data_fac.make_dataset()
40 | rewards_list += rewards
41 |
42 | plt.close("all")
43 | plt.figure(figsize=(12, 5))
44 | plt.title('Reward Trend with %s iteration' % itr)
45 | plt.plot(rewards_list)
46 | plt.savefig("storage/reward-" + str(model.exp_number) + ".png")
47 | print("Consume %s s in this iteration" % (time.time() - t))
48 | loss = model.train(trainset, testset)
49 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/storage/config-1.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_1.ckpt" # the path to load the model
4 | n_states: 5 # environment states
5 | n_actions: 1 # how many controls we need
6 | n_hidden: 1 # hidden layer number
7 | size_hidden: 512 # hidden layer size
8 | use_cuda: True
9 |
10 | training_config:
11 | n_epochs: 60 # how many epoches to train the dynamic model
12 | learning_rate: 0.001
13 | batch_size: 256
14 | save_model_flag: True
15 | save_model_path: "storage/exp_1.ckpt" # the path to save the model
16 | save_loss_fig: True
17 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
18 | exp_number: 1 # experiment number
19 |
20 | dataset_config:
21 | load_flag: False
22 | load_path: "storage/data_exp_1.pkl"
23 | n_max_steps: 1000 # maximum steps per episode
24 | n_random_episodes: 800 # how many random episodes' data to fit the initial dynamic model
25 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
26 | n_mpc_episodes: 4 # how many episodes data sampled with the MPC controller
27 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
28 | min_train_samples: 6000
29 | n_mpc_itrs: 100 # the number to perform reinforce iteration
30 | save_flag: True # set True if you want to save all the dataset
31 | save_path: "storage/data_exp_1.pkl"
32 |
33 | # MPC controller configuration
34 | mpc_config:
35 | horizon: 12 # how long of the horizon to predict
36 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
37 | max_itrs: 20 # max iterations for the ABC optimization
38 | gamma: 0.99 # reward discount coefficient
39 | action_low: -12 # lower bound of the solution space
40 | action_high: 12 # upper bound of the solution space
41 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/storage/config-2.yml:
--------------------------------------------------------------------------------
1 | # change the mpc horizon and network architecture w.r.t. config 1 to compare
2 |
3 | model_config:
4 | load_model: False # If set true, you must specify the model path, otherwise train a new model
5 | model_path: "storage/exp_2.ckpt" # the path to load the model
6 | n_states: 5 # environment states
7 | n_actions: 1 # how many controls we need
8 | n_hidden: 1 # hidden layer number
9 | size_hidden: 512 # hidden layer size
10 | use_cuda: True
11 |
12 | training_config:
13 | n_epochs: 60 # how many epoches to train the dynamic model
14 | learning_rate: 0.001
15 | batch_size: 256
16 | save_model_flag: True
17 | save_model_path: "storage/exp_2.ckpt" # the path to save the model
18 | save_loss_fig: True
19 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
20 | exp_number: 2 # experiment number
21 |
22 | dataset_config:
23 | load_flag: False
24 | load_path: "storage/data_exp_2.pkl"
25 | n_max_steps: 2000 # maximum steps per episode
26 | n_random_episodes: 80 # how many random episodes' data to fit the initial dynamic model
27 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
28 | n_mpc_episodes: 2 # how many episodes data sampled with the MPC controller
29 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
30 | min_train_samples: 6000
31 | n_mpc_itrs: 100 # the number to perform reinforce iteration
32 | save_flag: True # set True if you want to save all the dataset
33 | save_path: "storage/data_exp_2.pkl"
34 |
35 | # MPC controller configuration
36 | mpc_config:
37 | horizon: 20 # how long of the horizon to predict
38 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
39 | max_itrs: 20 # max iterations for the ABC optimization
40 | gamma: 0.99 # reward discount coefficient
41 | action_low: -12 # lower bound of the solution space
42 | action_high: 12 # upper bound of the solution space
43 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/storage/config-3.yml:
--------------------------------------------------------------------------------
1 | # change the n_mpc_episodes w.r.t. config 1 to compare
2 |
3 | model_config:
4 | load_model: False # If set true, you must specify the model path, otherwise train a new model
5 | model_path: "storage/exp_3.ckpt" # the path to load the model
6 | n_states: 5 # environment states
7 | n_actions: 1 # how many controls we need
8 | n_hidden: 1 # hidden layer number
9 | size_hidden: 512 # hidden layer size
10 | use_cuda: True
11 |
12 | training_config:
13 | n_epochs: 60 # how many epoches to train the dynamic model
14 | learning_rate: 0.001
15 | batch_size: 256
16 | save_model_flag: True
17 | save_model_path: "storage/exp_3.ckpt" # the path to save the model
18 | save_loss_fig: True
19 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
20 | exp_number: 3 # experiment number
21 |
22 | dataset_config:
23 | load_flag: False
24 | load_path: "storage/data_exp_3.pkl"
25 | n_max_steps: 2000 # maximum steps per episode
26 | n_random_episodes: 800 # how many random episodes' data to fit the initial dynamic model
27 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
28 | n_mpc_episodes: 4 # how many episodes data sampled with the MPC controller
29 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
30 | min_train_samples: 6000
31 | n_mpc_itrs: 100 # the number to perform reinforce iteration
32 | save_flag: True # set True if you want to save all the dataset
33 | save_path: "storage/data_exp_3.pkl"
34 |
35 | # MPC controller configuration
36 | mpc_config:
37 | horizon: 12 # how long of the horizon to predict
38 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
39 | max_itrs: 20 # max iterations for the ABC optimization
40 | gamma: 0.99 # reward discount coefficient
41 | action_low: -12 # lower bound of the solution space
42 | action_high: 12 # upper bound of the solution space
43 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/storage/config-4.yml:
--------------------------------------------------------------------------------
1 | # change the mpc_dataset_split w.r.t. config 3 to compare
2 |
3 | model_config:
4 | load_model: False # If set true, you must specify the model path, otherwise train a new model
5 | model_path: "storage/exp_4.ckpt" # the path to load the model
6 | n_states: 5 # environment states
7 | n_actions: 1 # how many controls we need
8 | n_hidden: 1 # hidden layer number
9 | size_hidden: 512 # hidden layer size
10 | use_cuda: True
11 |
12 | training_config:
13 | n_epochs: 60 # how many epoches to train the dynamic model
14 | learning_rate: 0.001
15 | batch_size: 256
16 | save_model_flag: True
17 | save_model_path: "storage/exp_4.ckpt" # the path to save the model
18 | save_loss_fig: True
19 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
20 | exp_number: 4 # experiment number
21 |
22 | dataset_config:
23 | load_flag: False
24 | load_path: "storage/data_exp_3.pkl"
25 | n_max_steps: 2000 # maximum steps per episode
26 | n_random_episodes: 800 # how many random episodes' data to fit the initial dynamic model
27 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
28 | n_mpc_episodes: 4 # how many episodes data sampled with the MPC controller
29 | mpc_dataset_split: 0.8 # mpc dataset's portion in the training set
30 | min_train_samples: 6000
31 | n_mpc_itrs: 100 # the number to perform reinforce iteration
32 | save_flag: True # set True if you want to save all the dataset
33 | save_path: "storage/data_exp_4.pkl"
34 |
35 | # MPC controller configuration
36 | mpc_config:
37 | horizon: 12 # how long of the horizon to predict
38 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
39 | max_itrs: 20 # max iterations for the ABC optimization
40 | gamma: 0.99 # reward discount coefficient
41 | action_low: -12 # lower bound of the solution space
42 | action_high: 12 # upper bound of the solution space
43 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/storage/config-5.yml:
--------------------------------------------------------------------------------
1 | # change the mpc horizon w.r.t. config 1 to compare
2 |
3 | model_config:
4 | load_model: False # If set true, you must specify the model path, otherwise train a new model
5 | model_path: "storage/exp_5.ckpt" # the path to load the model
6 | n_states: 5 # environment states
7 | n_actions: 1 # how many controls we need
8 | n_hidden: 1 # hidden layer number
9 | size_hidden: 512 # hidden layer size
10 | use_cuda: True
11 |
12 | training_config:
13 | n_epochs: 60 # how many epoches to train the dynamic model
14 | learning_rate: 0.001
15 | batch_size: 256
16 | save_model_flag: True
17 | save_model_path: "storage/exp_5.ckpt" # the path to save the model
18 | save_loss_fig: True
19 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
20 | exp_number: 5 # experiment number
21 |
22 | dataset_config:
23 | load_flag: False
24 | load_path: "storage/data_exp_5.pkl"
25 | n_max_steps: 2000 # maximum steps per episode
26 | n_random_episodes: 800 # how many random episodes' data to fit the initial dynamic model
27 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
28 | n_mpc_episodes: 2 # how many episodes data sampled with the MPC controller
29 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
30 | min_train_samples: 6000
31 | n_mpc_itrs: 100 # the number to perform reinforce iteration
32 | save_flag: True # set True if you want to save all the dataset
33 | save_path: "storage/data_exp_5.pkl"
34 |
35 | # MPC controller configuration
36 | mpc_config:
37 | horizon: 25 # how long of the horizon to predict
38 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
39 | max_itrs: 20 # max iterations for the ABC optimization
40 | gamma: 0.99 # reward discount coefficient
41 | action_low: -12 # lower bound of the solution space
42 | action_high: 12 # upper bound of the solution space
43 |
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/storage/loss-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-CartPoleSwing/storage/loss-1.png
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/storage/loss-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-CartPoleSwing/storage/loss-2.png
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/storage/model_error_exp_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-CartPoleSwing/storage/model_error_exp_1.png
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/storage/model_error_exp_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-CartPoleSwing/storage/model_error_exp_2.png
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/storage/reward-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-CartPoleSwing/storage/reward-1.png
--------------------------------------------------------------------------------
/MPC/MPC-CartPoleSwing/storage/reward-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-CartPoleSwing/storage/reward-2.png
--------------------------------------------------------------------------------
/MPC/MPC-Double/Hive/SelectionMethods.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # ---- SELECTION METHODS
4 |
5 | __all__ = ["tournament", "disruptive"]
6 |
7 | # ---- MODULE DOCSTRING
8 |
9 | __doc__ = """
10 |
11 | (C) Hive, Romain Wuilbercq, 2017
12 | _
13 | /_/_ .'''.
14 | =O(_)))) ...' `.
15 | \_\ `. .'''X
16 | `..'
17 | .---. .---..-./`) ,---. ,---. .-''-.
18 | | | |_ _|\ .-.')| / | | .'_ _ \
19 | | | ( ' )/ `-' \| | | .'/ ( ` ) '
20 | | '-(_{;}_)`-'`"`| | _ | |. (_ o _) |
21 | | (_,_) .---. | _( )_ || (_,_)___|
22 | | _ _--. | | | \ (_ o._) /' \ .---.
23 | |( ' ) | | | | \ (_,_) / \ `-' /
24 | (_{;}_)| | | | \ / \ /
25 | '(_,_) '---' '---' `---` `'-..-'
26 |
27 | Description:
28 | -----------
29 |
30 | SelectionMethods.py
31 |
32 | Defines a collection of selection methods to be used with Hive.
33 |
34 | """
35 |
36 | # ---- IMPORT MODULES
37 |
38 | import random
39 |
40 | import numpy as np
41 |
42 | # ---- SELECTION METHOD(S)
43 |
44 | def tournament(values, crowd_size=None):
45 | """
46 |
47 | Defines a selection process whereby a number of individuals
48 | from a colony/generation are selected to compete.
49 |
50 | Individuals with greater fitness values compared to the rest
51 | have higher chance to be kept for the next cycle/generation
52 | - i.e. survival of the fittest. This method prones elitism.
53 |
54 | A solution compete with a fixed number of randomly chosen individuals
55 | (i.e. "crowd_size") from the population.
56 |
57 | This function uses the "random.sample" function from the python base
58 | "random" module and the "np.where" function from the "numpy" module.
59 |
60 | Parameters:
61 | ----------
62 |
63 | :param int crowd_size: number of individuals competing
64 |
65 | """
66 |
67 | # computes battle score metrics
68 | scores = []
69 | for i in range(len(values)):
70 |
71 | # selects a pool of opponents randomly
72 | if (crowd_size != None) and (type(crowd_size) is int):
73 | opponents = random.sample(values, crowd_size)
74 | else:
75 | opponents = values
76 |
77 | # battles against opponents
78 | scores.append( sum(np.where(values[i]>opponents, 1, 0)) )
79 |
80 | # returns an array of normalized scores
81 | return scores / sum(scores)
82 |
83 | def disruptive(values):
84 | """
85 |
86 | Defines a selection process whereby a better chance is given to
87 | individuals with the highest and lowest fitness values - i.e. those
88 | further away from a "norm".
89 |
90 | This method represents a good mechanism by which diversity can
91 | be passed onto the next generation/cycle and avoid too-early
92 | convergence - i.e. improves the exploration of the search domain.
93 |
94 | This function uses the "np.mean" function from the "numpy" module.
95 |
96 | """
97 |
98 | # computes mean fitness of population
99 | mean_ = np.mean(values)
100 |
101 | # computes score metrics
102 | scores = []
103 | for i in range(len(values)):
104 | scores.append(abs(values[i] - mean_))
105 |
106 | # returns an array of normalized scores
107 | return scores / sum(scores)
108 |
109 | # ---- END
110 |
--------------------------------------------------------------------------------
/MPC/MPC-Double/Hive/Utilities.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # ---- MODULE DOCSTRING
4 |
5 | __doc__ = """
6 |
7 | (C) Hive, Romain Wuilbercq, 2017
8 | _
9 | /_/_ .'''.
10 | =O(_)))) ...' `.
11 | \_\ `. .'''X
12 | `..'
13 | .---. .---..-./`) ,---. ,---. .-''-.
14 | | | |_ _|\ .-.')| / | | .'_ _ \
15 | | | ( ' )/ `-' \| | | .'/ ( ` ) '
16 | | '-(_{;}_)`-'`"`| | _ | |. (_ o _) |
17 | | (_,_) .---. | _( )_ || (_,_)___|
18 | | _ _--. | | | \ (_ o._) /' \ .---.
19 | |( ' ) | | | | \ (_,_) / \ `-' /
20 | (_{;}_)| | | | \ / \ /
21 | '(_,_) '---' '---' `---` `'-..-'
22 |
23 | Description:
24 | -----------
25 |
26 | A series of utility functions (such as plotting function etc...).
27 |
28 | """
29 |
30 | # ---- IMPORT MODULES
31 |
32 | try:
33 | import matplotlib.pyplot as plt
34 | from matplotlib.font_manager import FontProperties
35 | except:
36 | raise ImportError("Install 'matplotlib' to plot convergence results.")
37 |
38 | # ---- CONVERGENCE PLOT
39 |
40 | def ConvergencePlot(cost):
41 | """
42 |
43 | Monitors convergence.
44 |
45 | Parameters:
46 | ----------
47 |
48 | :param dict cost: mean and best cost over cycles/generations as returned
49 | by an optimiser.
50 |
51 | """
52 | plt.rc('font',family= 'Tibetan Machine Uni')
53 | fs=15
54 | font = FontProperties();
55 | font.set_size( fs) #'larger');
56 | labels = ["Best Bee's Cost", "Mean Bees' Cost"]
57 | plt.figure(figsize=(7, 4.5));
58 | plt.plot(range(len(cost["best"])), cost["best"], label=labels[0]);
59 | plt.scatter(range(len(cost["mean"])), cost["mean"], color='red', label=labels[1]);
60 | plt.xlabel("Iteration",fontsize=fs);
61 | plt.ylabel("Cost",fontsize=fs);
62 | plt.xticks(fontsize=10)
63 | plt.yticks(fontsize=10)
64 | plt.legend(loc="best", prop = font);
65 | plt.xlim([0,len(cost["mean"])]);
66 | plt.grid();
67 | plt.show();
68 |
69 | # ---- END
70 |
--------------------------------------------------------------------------------
/MPC/MPC-Double/Hive/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # ---- MODULE DOCSTRING
4 |
5 | __doc__ = """
6 |
7 | (C) Hive, Romain Wuilbercq, 2017
8 | _
9 | /_/_ .'''.
10 | =O(_)))) ...' `.
11 | \_\ `. .'''X
12 | `..'
13 | .---. .---..-./`) ,---. ,---. .-''-.
14 | | | |_ _|\ .-.')| / | | .'_ _ \
15 | | | ( ' )/ `-' \| | | .'/ ( ` ) '
16 | | '-(_{;}_)`-'`"`| | _ | |. (_ o _) |
17 | | (_,_) .---. | _( )_ || (_,_)___|
18 | | _ _--. | | | \ (_ o._) /' \ .---.
19 | |( ' ) | | | | \ (_,_) / \ `-' /
20 | (_{;}_)| | | | \ / \ /
21 | '(_,_) '---' '---' `---` `'-..-'
22 |
23 | **Hive** is a simple implementation of a swarm-based optimisation
24 | algorithm called the Artificial Bee Colony (ABC) algorithm.
25 |
26 | The Artificial Bee Colony (ABC) algorithm is based on the intelligent foraging
27 | behaviour of honey bee swarm, proposed by Karaboga in 2005.
28 |
29 | """
30 |
31 | __author__ = "Romain Wuilbercq"
32 |
33 | # ---- END
34 |
--------------------------------------------------------------------------------
/MPC/MPC-Double/README.md:
--------------------------------------------------------------------------------
1 | # MPC - Double
2 | This folder contains the implementation of MPC algorithm and the evaluation on the Double environment
3 |
4 | The implementation is mainly followed in this paper [here](https://ieeexplore.ieee.org/abstract/document/8463189)
5 |
6 | To optimize the MPC controller, we use the [Artificial Bee Colony](https://en.wikipedia.org/wiki/Artificial_bee_colony_algorithm) (ABC) optimization algorithm,
7 | instead of the original random shooting method in the paper. The implementation of ABC algorithm is based on this repo: [https://github.com/rwuilbercq/Hive](https://github.com/rwuilbercq/Hive)
8 |
9 | All the hyper-parameters and experiment setting are stored in the ```config.yml``` file
10 |
11 | All the results (figure and model) will be stored in the ```./storage``` folder by default
12 |
13 | If you are not familiar with this environment, you can use the `analyze_env()` function in the `utils.py` to help you quickly understand the environment's state space, action space, reward range, etc.
14 |
15 | ### How to run
16 |
17 | To try our pre-trained model, simply run
18 |
19 | ```angularjs
20 | python run.py --path config.yml
21 | ```
22 | The script will load the configurations in the ```config.yml``` file and begin to train
23 |
24 | Note that because of the long time of optimization, boost the data with MPC controller would take a long time
25 |
26 | If you want to load the dataset and a pre-trained dynamic model, note that you should normalize the dataset first, because the dynamic model need the data distribution information.
27 | You can use the `norm_train_data()` method in the `DynamicModel` class.
28 | ### Configuration explanation
29 |
30 | In the ```config.yml``` file, there are 4 sets of configuration.
31 |
32 | The `model_config` part is the configuration of the parameters which determine the neural network architecture and the environment basis.
33 |
34 | The `training_config` part is the configuration of the training process parameters.
35 |
36 | The `dataset_config` part is the configuration of the dataset parameters.
37 |
38 | The `mpc_config` part is the configuration of the MPC algorithm parameters.
39 |
40 | The `exp_number` parameter in the `training_config` is the number of your experiment. The name of saved figure results in the `./storage` folder will be determined by this parameter.
41 |
42 | If you want to train your model from scratch, then set the `load_model` parameter to `False`. If set to `True`, the trainer will load the model from `model_path`.
43 |
--------------------------------------------------------------------------------
/MPC/MPC-Double/config.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_5.ckpt" # the path to load the model
4 | n_states: 6 # environment states
5 | n_actions: 1 # how many controls we need
6 | n_hidden: 1 # hidden layer number
7 | size_hidden: 512 # hidden layer size
8 | use_cuda: True
9 |
10 | training_config:
11 | n_epochs: 60 # how many epoches to train the dynamic model
12 | learning_rate: 0.001
13 | batch_size: 256
14 | save_model_flag: True
15 | save_model_path: "storage/exp_5.ckpt" # the path to save the model
16 | save_loss_fig: True
17 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
18 | exp_number: 5 # experiment number
19 |
20 | dataset_config:
21 | load_flag: False
22 | load_path: "storage/data_exp_5.pkl"
23 | n_max_steps: 1000 # maximum steps per episode
24 | n_random_episodes: 800 # how many random episodes' data to fit the initial dynamic model
25 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
26 | n_mpc_episodes: 4 # how many episodes data sampled with the MPC controller
27 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
28 | min_train_samples: 6000
29 | n_mpc_itrs: 100 # the number to perform reinforce iteration
30 | save_flag: True # set True if you want to save all the dataset
31 | save_path: "storage/data_exp_5.pkl"
32 |
33 | # MPC controller configuration
34 | mpc_config:
35 | horizon: 25 # how long of the horizon to predict
36 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
37 | max_itrs: 20 # max iterations for the ABC optimization
38 | gamma: 0.99 # reward discount coefficient
39 | action_low: -12 # lower bound of the solution space
40 | action_high: 12 # upper bound of the solution space
41 |
--------------------------------------------------------------------------------
/MPC/MPC-Double/controller.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from Hive import Hive
3 | from Hive import Utilities
4 |
5 |
6 | class MPC(object):
7 | def __init__(self, env, config):
8 | self.env = env
9 | mpc_config = config["mpc_config"]
10 | self.horizon = mpc_config["horizon"]
11 | self.numb_bees = mpc_config["numb_bees"]
12 | self.max_itrs = mpc_config["max_itrs"]
13 | self.gamma = mpc_config["gamma"]
14 | self.action_low = mpc_config["action_low"]
15 | self.action_high = mpc_config["action_high"]
16 | self.evaluator = Evaluator(self.gamma)
17 |
18 | def act(self, state, dynamic_model):
19 | '''
20 | Optimize the action by Artificial Bee Colony algorithm
21 | :param state: (numpy array) current state
22 | :param dynamic_model: system dynamic model
23 | :return: (float) optimal action
24 | '''
25 | self.evaluator.update(state, dynamic_model)
26 | optimizer = Hive.BeeHive( lower = [float(self.action_low)] * self.horizon,
27 | upper = [float(self.action_high)] * self.horizon,
28 | fun = self.evaluator.evaluate,
29 | numb_bees = self.numb_bees,
30 | max_itrs = self.max_itrs,
31 | verbose=False)
32 | cost = optimizer.run()
33 | #print("Solution: ",optimizer.solution[0])
34 | #print("Fitness Value ABC: {0}".format(optimizer.best))
35 | # Uncomment this if you want to see the performance of the optimizer
36 | #Utilities.ConvergencePlot(cost)
37 | return optimizer.solution[0]
38 |
39 | class Evaluator(object):
40 | def __init__(self, gamma=0.8):
41 | self.gamma = gamma
42 |
43 | def update(self, state, dynamic_model):
44 | self.state = state
45 | self.dynamic_model = dynamic_model
46 |
47 | def evaluate(self, actions):
48 | actions = np.array(actions)
49 | horizon = actions.shape[0]
50 | rewards = 0
51 | state_tmp = self.state.copy()
52 | for j in range(horizon):
53 | input_data = np.concatenate( (state_tmp,[actions[j]]) )
54 | state_dt = self.dynamic_model.predict(input_data)
55 | state_tmp = state_tmp + state_dt[0]
56 | rewards -= (self.gamma ** j) * self.get_reward(state_tmp, actions[j])
57 | return rewards
58 |
59 | # need to change this function according to different environment
60 | def get_reward(self,obs, action_n):
61 | x_c, th1, th2, _, _, _ = obs
62 | rwd = -(th1 ** 2 + th2 ** 2)
63 | return np.float32(rwd) + 2 * 0.25 ** 2
64 |
65 |
66 |
--------------------------------------------------------------------------------
/MPC/MPC-Double/run.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import gym
3 | import argparse
4 | from dynamics import *
5 | from controller import *
6 | from utils import *
7 | from quanser_robots.common import GentlyTerminating
8 | import time
9 |
10 | parser = argparse.ArgumentParser(description='Specify the configuraton file path')
11 | parser.add_argument('--path', required=False, type=str, default='config.yml',
12 | help='Specify the configuraton file path')
13 |
14 |
15 | args = parser.parse_args()
16 |
17 | config_path = args.path # "config.yml"
18 | config = load_config(config_path)
19 | print_config(config_path)
20 |
21 | env_id = "DoublePendulum-v0"
22 | env = GentlyTerminating(gym.make(env_id))
23 |
24 | model = DynamicModel(config)
25 |
26 | data_fac = DatasetFactory(env,config)
27 | data_fac.collect_random_dataset()
28 |
29 | loss = model.train(data_fac.random_trainset,data_fac.random_testset)
30 |
31 | mpc = MPC(env,config)
32 |
33 | rewards_list = []
34 | for itr in range(config["dataset_config"]["n_mpc_itrs"]):
35 | t = time.time()
36 | print("**********************************************")
37 | print("The reinforce process [%s], collecting data ..." % itr)
38 | rewards = data_fac.collect_mpc_dataset(mpc, model)
39 | trainset, testset = data_fac.make_dataset()
40 | rewards_list += rewards
41 |
42 | plt.close("all")
43 | plt.figure(figsize=(12, 5))
44 | plt.title('Reward Trend with %s iteration' % itr)
45 | plt.plot(rewards_list)
46 | plt.savefig("storage/reward-" + str(model.exp_number) + ".png")
47 | print("Consume %s s in this iteration" % (time.time() - t))
48 | loss = model.train(trainset, testset)
--------------------------------------------------------------------------------
/MPC/MPC-Double/storage/config-1.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_1.ckpt" # the path to load the model
4 | n_states: 6 # environment states
5 | n_actions: 1 # how many controls we need
6 | n_hidden: 1 # hidden layer number
7 | size_hidden: 512 # hidden layer size
8 | use_cuda: True
9 |
10 | training_config:
11 | n_epochs: 60 # how many epoches to train the dynamic model
12 | learning_rate: 0.001
13 | batch_size: 256
14 | save_model_flag: True
15 | save_model_path: "storage/exp_1.ckpt" # the path to save the model
16 | save_loss_fig: True
17 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
18 | exp_number: 1 # experiment number
19 |
20 | dataset_config:
21 | load_flag: False
22 | load_path: "storage/data_exp_1.pkl"
23 | n_max_steps: 20000 # maximum steps per episode
24 | n_random_episodes: 800 # how many random episodes' data to fit the initial dynamic model
25 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
26 | n_mpc_episodes: 4 # how many episodes data sampled with the MPC controller
27 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
28 | min_train_samples: 6000
29 | n_mpc_itrs: 100 # the number to perform reinforce iteration
30 | save_flag: True # set True if you want to save all the dataset
31 | save_path: "storage/data_exp_1.pkl"
32 |
33 | # MPC controller configuration
34 | mpc_config:
35 | horizon: 12 # how long of the horizon to predict
36 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
37 | max_itrs: 15 # max iterations for the ABC optimization
38 | gamma: 0.99 # reward discount coefficient
39 | action_low: -12 # lower bound of the solution space
40 | action_high: 12 # upper bound of the solution space
41 |
--------------------------------------------------------------------------------
/MPC/MPC-Double/storage/config-2.yml:
--------------------------------------------------------------------------------
1 | # change the mpc horizon w.r.t. config 1 to compare
2 |
3 | model_config:
4 | load_model: False # If set true, you must specify the model path, otherwise train a new model
5 | model_path: "storage/exp_2.ckpt" # the path to load the model
6 | n_states: 6 # environment states
7 | n_actions: 1 # how many controls we need
8 | n_hidden: 1 # hidden layer number
9 | size_hidden: 512 # hidden layer size
10 | use_cuda: True
11 |
12 | training_config:
13 | n_epochs: 60 # how many epoches to train the dynamic model
14 | learning_rate: 0.001
15 | batch_size: 256
16 | save_model_flag: True
17 | save_model_path: "storage/exp_2.ckpt" # the path to save the model
18 | save_loss_fig: True
19 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
20 | exp_number: 2 # experiment number
21 |
22 | dataset_config:
23 | load_flag: False
24 | load_path: "storage/data_exp_2.pkl"
25 | n_max_steps: 1000 # maximum steps per episode
26 | n_random_episodes: 800 # how many random episodes' data to fit the initial dynamic model
27 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
28 | n_mpc_episodes: 4 # how many episodes data sampled with the MPC controller
29 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
30 | min_train_samples: 6000
31 | n_mpc_itrs: 100 # the number to perform reinforce iteration
32 | save_flag: True # set True if you want to save all the dataset
33 | save_path: "storage/data_exp_2.pkl"
34 |
35 | # MPC controller configuration
36 | mpc_config:
37 | horizon: 5 # how long of the horizon to predict
38 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
39 | max_itrs: 20 # max iterations for the ABC optimization
40 | gamma: 0.99 # reward discount coefficient
41 | action_low: -12 # lower bound of the solution space
42 | action_high: 12 # upper bound of the solution space
43 |
--------------------------------------------------------------------------------
/MPC/MPC-Double/storage/config-3.yml:
--------------------------------------------------------------------------------
1 | # change the n_mpc_episodes w.r.t. config 1 to compare
2 |
3 | model_config:
4 | load_model: False # If set true, you must specify the model path, otherwise train a new model
5 | model_path: "storage/exp_3.ckpt" # the path to load the model
6 | n_states: 6 # environment states
7 | n_actions: 1 # how many controls we need
8 | n_hidden: 1 # hidden layer number
9 | size_hidden: 512 # hidden layer size
10 | use_cuda: True
11 |
12 | training_config:
13 | n_epochs: 60 # how many epoches to train the dynamic model
14 | learning_rate: 0.001
15 | batch_size: 256
16 | save_model_flag: True
17 | save_model_path: "storage/exp_3.ckpt" # the path to save the model
18 | save_loss_fig: True
19 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
20 | exp_number: 3 # experiment number
21 |
22 | dataset_config:
23 | load_flag: False
24 | load_path: "storage/data_exp_3.pkl"
25 | n_max_steps: 1000 # maximum steps per episode
26 | n_random_episodes: 800 # how many random episodes' data to fit the initial dynamic model
27 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
28 | n_mpc_episodes: 8 # how many episodes data sampled with the MPC controller
29 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
30 | min_train_samples: 6000
31 | n_mpc_itrs: 100 # the number to perform reinforce iteration
32 | save_flag: True # set True if you want to save all the dataset
33 | save_path: "storage/data_exp_3.pkl"
34 |
35 | # MPC controller configuration
36 | mpc_config:
37 | horizon: 12 # how long of the horizon to predict
38 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
39 | max_itrs: 20 # max iterations for the ABC optimization
40 | gamma: 0.99 # reward discount coefficient
41 | action_low: -12 # lower bound of the solution space
42 | action_high: 12 # upper bound of the solution space
43 |
--------------------------------------------------------------------------------
/MPC/MPC-Double/storage/config-4.yml:
--------------------------------------------------------------------------------
1 | # change the mpc_dataset_split w.r.t. config 3 to compare
2 |
3 | model_config:
4 | load_model: False # If set true, you must specify the model path, otherwise train a new model
5 | model_path: "storage/exp_4.ckpt" # the path to load the model
6 | n_states: 6 # environment states
7 | n_actions: 1 # how many controls we need
8 | n_hidden: 1 # hidden layer number
9 | size_hidden: 512 # hidden layer size
10 | use_cuda: True
11 |
12 | training_config:
13 | n_epochs: 60 # how many epoches to train the dynamic model
14 | learning_rate: 0.001
15 | batch_size: 256
16 | save_model_flag: True
17 | save_model_path: "storage/exp_4.ckpt" # the path to save the model
18 | save_loss_fig: True
19 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
20 | exp_number: 4 # experiment number
21 |
22 | dataset_config:
23 | load_flag: False
24 | load_path: "storage/data_exp_3.pkl"
25 | n_max_steps: 1000 # maximum steps per episode
26 | n_random_episodes: 800 # how many random episodes' data to fit the initial dynamic model
27 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
28 | n_mpc_episodes: 8 # how many episodes data sampled with the MPC controller
29 | mpc_dataset_split: 0.8 # mpc dataset's portion in the training set
30 | min_train_samples: 6000
31 | n_mpc_itrs: 100 # the number to perform reinforce iteration
32 | save_flag: True # set True if you want to save all the dataset
33 | save_path: "storage/data_exp_4.pkl"
34 |
35 | # MPC controller configuration
36 | mpc_config:
37 | horizon: 12 # how long of the horizon to predict
38 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
39 | max_itrs: 20 # max iterations for the ABC optimization
40 | gamma: 0.99 # reward discount coefficient
41 | action_low: -12 # lower bound of the solution space
42 | action_high: 12 # upper bound of the solution space
43 |
--------------------------------------------------------------------------------
/MPC/MPC-Double/storage/loss-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Double/storage/loss-1.png
--------------------------------------------------------------------------------
/MPC/MPC-Double/storage/loss-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Double/storage/loss-2.png
--------------------------------------------------------------------------------
/MPC/MPC-Double/storage/loss-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Double/storage/loss-3.png
--------------------------------------------------------------------------------
/MPC/MPC-Double/storage/loss-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Double/storage/loss-4.png
--------------------------------------------------------------------------------
/MPC/MPC-Double/storage/model_error_exp_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Double/storage/model_error_exp_1.png
--------------------------------------------------------------------------------
/MPC/MPC-Double/storage/reward-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Double/storage/reward-1.png
--------------------------------------------------------------------------------
/MPC/MPC-Double/storage/reward-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Double/storage/reward-2.png
--------------------------------------------------------------------------------
/MPC/MPC-Double/storage/reward-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Double/storage/reward-3.png
--------------------------------------------------------------------------------
/MPC/MPC-Double/storage/reward-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Double/storage/reward-4.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/Hive/SelectionMethods.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # ---- SELECTION METHODS
4 |
5 | __all__ = ["tournament", "disruptive"]
6 |
7 | # ---- MODULE DOCSTRING
8 |
9 | __doc__ = """
10 |
11 | (C) Hive, Romain Wuilbercq, 2017
12 | _
13 | /_/_ .'''.
14 | =O(_)))) ...' `.
15 | \_\ `. .'''X
16 | `..'
17 | .---. .---..-./`) ,---. ,---. .-''-.
18 | | | |_ _|\ .-.')| / | | .'_ _ \
19 | | | ( ' )/ `-' \| | | .'/ ( ` ) '
20 | | '-(_{;}_)`-'`"`| | _ | |. (_ o _) |
21 | | (_,_) .---. | _( )_ || (_,_)___|
22 | | _ _--. | | | \ (_ o._) /' \ .---.
23 | |( ' ) | | | | \ (_,_) / \ `-' /
24 | (_{;}_)| | | | \ / \ /
25 | '(_,_) '---' '---' `---` `'-..-'
26 |
27 | Description:
28 | -----------
29 |
30 | SelectionMethods.py
31 |
32 | Defines a collection of selection methods to be used with Hive.
33 |
34 | """
35 |
36 | # ---- IMPORT MODULES
37 |
38 | import random
39 |
40 | import numpy as np
41 |
42 | # ---- SELECTION METHOD(S)
43 |
44 | def tournament(values, crowd_size=None):
45 | """
46 |
47 | Defines a selection process whereby a number of individuals
48 | from a colony/generation are selected to compete.
49 |
50 | Individuals with greater fitness values compared to the rest
51 | have higher chance to be kept for the next cycle/generation
52 | - i.e. survival of the fittest. This method prones elitism.
53 |
54 | A solution compete with a fixed number of randomly chosen individuals
55 | (i.e. "crowd_size") from the population.
56 |
57 | This function uses the "random.sample" function from the python base
58 | "random" module and the "np.where" function from the "numpy" module.
59 |
60 | Parameters:
61 | ----------
62 |
63 | :param int crowd_size: number of individuals competing
64 |
65 | """
66 |
67 | # computes battle score metrics
68 | scores = []
69 | for i in range(len(values)):
70 |
71 | # selects a pool of opponents randomly
72 | if (crowd_size != None) and (type(crowd_size) is int):
73 | opponents = random.sample(values, crowd_size)
74 | else:
75 | opponents = values
76 |
77 | # battles against opponents
78 | scores.append( sum(np.where(values[i]>opponents, 1, 0)) )
79 |
80 | # returns an array of normalized scores
81 | return scores / sum(scores)
82 |
83 | def disruptive(values):
84 | """
85 |
86 | Defines a selection process whereby a better chance is given to
87 | individuals with the highest and lowest fitness values - i.e. those
88 | further away from a "norm".
89 |
90 | This method represents a good mechanism by which diversity can
91 | be passed onto the next generation/cycle and avoid too-early
92 | convergence - i.e. improves the exploration of the search domain.
93 |
94 | This function uses the "np.mean" function from the "numpy" module.
95 |
96 | """
97 |
98 | # computes mean fitness of population
99 | mean_ = np.mean(values)
100 |
101 | # computes score metrics
102 | scores = []
103 | for i in range(len(values)):
104 | scores.append(abs(values[i] - mean_))
105 |
106 | # returns an array of normalized scores
107 | return scores / sum(scores)
108 |
109 | # ---- END
110 |
--------------------------------------------------------------------------------
/MPC/MPC-Qube/Hive/Utilities.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # ---- MODULE DOCSTRING
4 |
5 | __doc__ = """
6 |
7 | (C) Hive, Romain Wuilbercq, 2017
8 | _
9 | /_/_ .'''.
10 | =O(_)))) ...' `.
11 | \_\ `. .'''X
12 | `..'
13 | .---. .---..-./`) ,---. ,---. .-''-.
14 | | | |_ _|\ .-.')| / | | .'_ _ \
15 | | | ( ' )/ `-' \| | | .'/ ( ` ) '
16 | | '-(_{;}_)`-'`"`| | _ | |. (_ o _) |
17 | | (_,_) .---. | _( )_ || (_,_)___|
18 | | _ _--. | | | \ (_ o._) /' \ .---.
19 | |( ' ) | | | | \ (_,_) / \ `-' /
20 | (_{;}_)| | | | \ / \ /
21 | '(_,_) '---' '---' `---` `'-..-'
22 |
23 | Description:
24 | -----------
25 |
26 | A series of utility functions (such as plotting function etc...).
27 |
28 | """
29 |
30 | # ---- IMPORT MODULES
31 |
32 | try:
33 | import matplotlib.pyplot as plt
34 | from matplotlib.font_manager import FontProperties
35 | except:
36 | raise ImportError("Install 'matplotlib' to plot convergence results.")
37 |
38 | # ---- CONVERGENCE PLOT
39 |
40 | def ConvergencePlot(cost):
41 | """
42 |
43 | Monitors convergence.
44 |
45 | Parameters:
46 | ----------
47 |
48 | :param dict cost: mean and best cost over cycles/generations as returned
49 | by an optimiser.
50 |
51 | """
52 | plt.rc('font',family= 'Tibetan Machine Uni')
53 | fs=15
54 | font = FontProperties();
55 | font.set_size( fs) #'larger');
56 | labels = ["Best Bee's Cost", "Mean Bees' Cost"]
57 | plt.figure(figsize=(7, 4.5));
58 | plt.plot(range(len(cost["best"])), cost["best"], label=labels[0]);
59 | plt.scatter(range(len(cost["mean"])), cost["mean"], color='red', label=labels[1]);
60 | plt.xlabel("Iteration",fontsize=fs);
61 | plt.ylabel("Cost",fontsize=fs);
62 | plt.xticks(fontsize=10)
63 | plt.yticks(fontsize=10)
64 | plt.legend(loc="best", prop = font);
65 | plt.xlim([0,len(cost["mean"])]);
66 | plt.grid();
67 | plt.savefig("mpc.png")
68 | plt.show();
69 |
70 | # ---- END
71 |
--------------------------------------------------------------------------------
/MPC/MPC-Qube/Hive/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # ---- MODULE DOCSTRING
4 |
5 | __doc__ = """
6 |
7 | (C) Hive, Romain Wuilbercq, 2017
8 | _
9 | /_/_ .'''.
10 | =O(_)))) ...' `.
11 | \_\ `. .'''X
12 | `..'
13 | .---. .---..-./`) ,---. ,---. .-''-.
14 | | | |_ _|\ .-.')| / | | .'_ _ \
15 | | | ( ' )/ `-' \| | | .'/ ( ` ) '
16 | | '-(_{;}_)`-'`"`| | _ | |. (_ o _) |
17 | | (_,_) .---. | _( )_ || (_,_)___|
18 | | _ _--. | | | \ (_ o._) /' \ .---.
19 | |( ' ) | | | | \ (_,_) / \ `-' /
20 | (_{;}_)| | | | \ / \ /
21 | '(_,_) '---' '---' `---` `'-..-'
22 |
23 | **Hive** is a simple implementation of a swarm-based optimisation
24 | algorithm called the Artificial Bee Colony (ABC) algorithm.
25 |
26 | The Artificial Bee Colony (ABC) algorithm is based on the intelligent foraging
27 | behaviour of honey bee swarm, proposed by Karaboga in 2005.
28 |
29 | """
30 |
31 | __author__ = "Romain Wuilbercq"
32 |
33 | # ---- END
34 |
--------------------------------------------------------------------------------
/MPC/MPC-Qube/README.md:
--------------------------------------------------------------------------------
1 | # MPC - Qube
2 | This folder contains the implementation of MPC algorithm and the evaluation on the Qube environment
3 |
4 | The implementation is mainly followed in this paper [here](https://ieeexplore.ieee.org/abstract/document/8463189)
5 |
6 | To optimize the MPC controller, we use the [Artificial Bee Colony](https://en.wikipedia.org/wiki/Artificial_bee_colony_algorithm) (ABC) optimization algorithm,
7 | instead of the original random shooting method in the paper. The implementation of ABC algorithm is based on this repo: [https://github.com/rwuilbercq/Hive](https://github.com/rwuilbercq/Hive)
8 |
9 | All the hyper-parameters and experiment setting are stored in the ```config.yml``` file
10 |
11 | All the results (figure and model) will be stored in the ```./storage``` folder by default
12 |
13 | If you are not familiar with this environment, you can use the `analyze_env()` function in the `utils.py` to help you quickly understand the environment's state space, action space, reward range, etc.
14 |
15 | ### How to run
16 |
17 | To try our pre-trained model, simply run
18 |
19 | ```angularjs
20 | python run.py --path config.yml
21 | ```
22 | The script will load the configurations in the ```config.yml``` file and begin to train
23 |
24 | Note that because of the long time of optimization, boost the data with MPC controller would take a long time
25 |
26 | If you want to load the dataset and a pre-trained dynamic model, note that you should normalize the dataset first, because the dynamic model need the data distribution information.
27 | You can use the `norm_train_data()` method in the `DynamicModel` class.
28 |
29 | ### Configuration explanation
30 |
31 | In the ```config.yml``` file, there are 4 sets of configuration.
32 |
33 | The `model_config` part is the configuration of the parameters which determine the neural network architecture and the environment basis.
34 |
35 | The `training_config` part is the configuration of the training process parameters.
36 |
37 | The `dataset_config` part is the configuration of the dataset parameters.
38 |
39 | The `mpc_config` part is the configuration of the MPC algorithm parameters.
40 |
41 | The `exp_number` parameter in the `training_config` is the number of your experiment. The name of saved figure results in the `./storage` folder will be determined by this parameter.
42 |
43 | If you want to train your model from scratch, then set the `load_model` parameter to `False`. If set to `True`, the trainer will load the model from `model_path`.
44 |
--------------------------------------------------------------------------------
/MPC/MPC-Qube/config.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_7.ckpt" # the path to load the model
4 | n_states: 6 # environment states
5 | n_actions: 1 # how many controls we need
6 | n_hidden: 1 # hidden layer number
7 | size_hidden: 500 # hidden layer size
8 | use_cuda: True
9 |
10 | training_config:
11 | n_epochs: 100 # how many epoches to train the dynamic model
12 | learning_rate: 0.001
13 | batch_size: 512
14 | save_model_flag: True
15 | save_model_path: "storage/exp_7.ckpt" # the path to save the model
16 | save_loss_fig: True
17 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
18 | exp_number: 5 # experiment number
19 | save_loss_fig_frequency: 100 # how many every epochs to save the loss figure
20 | exp_number: 7 # experiment number
21 |
22 | dataset_config:
23 | load_flag: False
24 | load_path: "storage/data_exp_7.pkl"
25 | n_max_steps: 500 # maximum steps per episode
26 | n_random_episodes: 700 # how many random episodes' data to fit the initial dynamic model
27 | testset_split: 0.1 # testset's portion in the random dataset, the rest portion is the training set
28 | n_mpc_episodes: 4 # how many episodes data sampled with the MPC controller
29 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
30 | min_train_samples: 8000
31 | n_mpc_itrs: 100 # the number to perform reinforce iteration
32 | save_flag: True # set True if you want to save all the dataset
33 | save_path: "storage/data_exp_7.pkl"
34 |
35 | # MPC controller configuration
36 | mpc_config:
37 | horizon: 5 # how long of the horizon to predict
38 | numb_bees: 4 # ABC optimization algorithm param: number of the bees
39 | max_itrs: 10 # max iterations for the ABC optimization
40 | gamma: 0.999 # reward discount coefficient
41 | max_itrs: 40 # max iterations for the ABC optimization
42 | action_low: -5 # lower bound of the solution space
43 | action_high: 5 # upper bound of the solution space
44 |
--------------------------------------------------------------------------------
/MPC/MPC-Qube/controller.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from Hive import Hive
3 | from Hive import Utilities
4 |
5 |
6 | class MPC(object):
7 | def __init__(self, env, config):
8 | self.env = env
9 | mpc_config = config["mpc_config"]
10 | self.horizon = mpc_config["horizon"]
11 | self.numb_bees = mpc_config["numb_bees"]
12 | self.max_itrs = mpc_config["max_itrs"]
13 | self.gamma = mpc_config["gamma"]
14 | self.action_low = mpc_config["action_low"]
15 | self.action_high = mpc_config["action_high"]
16 | self.evaluator = Evaluator(self.gamma)
17 |
18 | def act(self, state, dynamic_model):
19 | '''
20 | Optimize the action by Artificial Bee Colony algorithm
21 | :param state: (numpy array) current state
22 | :param dynamic_model: system dynamic model
23 | :return: (float) optimal action
24 | '''
25 | self.evaluator.update(state, dynamic_model)
26 | optimizer = Hive.BeeHive( lower = [float(self.action_low)] * self.horizon,
27 | upper = [float(self.action_high)] * self.horizon,
28 | fun = self.evaluator.evaluate,
29 | numb_bees = self.numb_bees,
30 | max_itrs = self.max_itrs,
31 | verbose=False)
32 | cost = optimizer.run()
33 | #print("Solution: ",optimizer.solution[0])
34 | #print("Fitness Value ABC: {0}".format(optimizer.best))
35 | # Uncomment this if you want to see the performance of the optimizer
36 | #Utilities.ConvergencePlot(cost)
37 | return optimizer.solution[0]
38 |
39 | class Evaluator(object):
40 | def __init__(self, gamma=0.8):
41 | self.gamma = gamma
42 |
43 | def update(self, state, dynamic_model):
44 | self.state = state
45 | self.dynamic_model = dynamic_model
46 |
47 | def evaluate(self, actions):
48 | actions = np.array(actions)
49 | horizon = actions.shape[0]
50 | rewards = 0
51 | state_tmp = self.state.copy()
52 | for j in range(horizon):
53 | input_data = np.concatenate( (state_tmp,[actions[j]]) )
54 | state_dt = self.dynamic_model.predict(input_data)
55 | state_tmp = state_tmp + state_dt[0]
56 | rewards -= (self.gamma ** j) * self.get_reward(state_tmp, actions[j])
57 | return rewards
58 |
59 | def get_reward(self,obs, action_n):
60 | cos_th, sin_th, cos_al, sin_al, th_d, al_d = obs
61 | cos_th = min(max(cos_th, -1), 1)
62 | cos_al = min(max(cos_al, -1), 1)
63 | al=np.arccos(cos_al)
64 | th=np.arccos(cos_th)
65 | al_mod = al % (2 * np.pi) - np.pi
66 | action = action_n * 5
67 | cost = al_mod**2 + 5e-3*al_d**2 + 1e-1*th**2 + 2e-2*th_d**2 + 3e-3*action**2
68 | reward = np.exp(-cost)*0.02
69 | return reward
70 |
71 |
--------------------------------------------------------------------------------
/MPC/MPC-Qube/run.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import gym
3 | import torch.utils.data as data
4 | from dynamics import *
5 | from controller import *
6 | from utils import *
7 | from quanser_robots.common import GentlyTerminating
8 | import time
9 |
10 | # datasets: numpy array, size:[sample number, input dimension]
11 | # labels: numpy array, size:[sample number, output dimension]
12 |
13 | env_id ="Qube-v0" # "CartPole-v0"
14 | env = GentlyTerminating(gym.make(env_id))
15 | config_path = "config.yml"
16 | config = load_config(config_path)
17 | print_config(config_path)
18 |
19 | model = DynamicModel(config)
20 |
21 | data_fac = DatasetFactory(env,config)
22 | data_fac.collect_random_dataset()
23 |
24 | loss = model.train(data_fac.random_trainset,data_fac.random_testset)
25 |
26 | mpc = MPC(env,config)
27 |
28 | rewards_list = []
29 | for itr in range(config["dataset_config"]["n_mpc_itrs"]):
30 | t = time.time()
31 | print("**********************************************")
32 | print("The reinforce process [%s], collecting data ..." % itr)
33 | rewards = data_fac.collect_mpc_dataset(mpc, model)
34 | trainset, testset = data_fac.make_dataset()
35 | rewards_list += rewards
36 |
37 | plt.close("all")
38 | plt.figure(figsize=(12, 5))
39 | plt.title('Reward Trend with %s iteration' % itr)
40 | plt.plot(rewards_list)
41 | plt.savefig("storage/reward-" + str(model.exp_number) + ".png")
42 | print("Consume %s s in this iteration" % (time.time() - t))
43 | loss = model.train(trainset, testset)
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/Angle Error h_0 100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/Angle Error h_0 100.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/State Error h_0 100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/State Error h_0 100.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/config-1.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_1.ckpt" # the path to load the model
4 | n_states: 6 # environment states
5 | n_actions: 1 # how many controls we need
6 | n_hidden: 1 # hidden layer number
7 | size_hidden: 256 # hidden layer size
8 | use_cuda: True
9 |
10 | training_config:
11 | n_epochs: 500 # how many epoches to train the dynamic model
12 | learning_rate: 0.001
13 | batch_size: 64
14 | save_model_flag: True
15 | save_model_path: "storage/exp_1.ckpt" # the path to save the model
16 | save_loss_fig: True
17 | save_loss_fig_frequency: 50 # how many every epochs to save the loss figure
18 | exp_number: 1 # experiment number
19 |
20 | dataset_config:
21 | load_flag: False
22 | load_path: "storage/data_exp_1.pkl"
23 | n_max_steps: 500 # maximum steps per episode
24 | n_random_episodes: 30 # how many random episodes' data to fit the initial dynamic model
25 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
26 | n_mpc_episodes: 6 # how many episodes data sampled with the MPC controller
27 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
28 | min_train_samples: 6000
29 | n_mpc_itrs: 100 # the number to perform reinforce iteration
30 | save_flag: True # set True if you want to save all the dataset
31 | save_path: "storage/data_exp_1.pkl"
32 |
33 | # MPC controller configuration
34 | mpc_config:
35 | horizon: 12 # how long of the horizon to predict
36 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
37 | max_itrs: 20 # max iterations for the ABC optimization
38 | gamma: 0.98 # reward discount coefficient
39 | action_low: -5 # lower bound of the solution space
40 | action_high: 5 # upper bound of the solution space
41 |
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/config-2.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_2.ckpt" # the path to load the model
4 | n_states: 6 # environment states
5 | n_actions: 1 # how many controls we need
6 | n_hidden: 1 # hidden layer number
7 | size_hidden: 256 # hidden layer size
8 | use_cuda: True
9 |
10 | training_config:
11 | n_epochs: 1000 # how many epoches to train the dynamic model
12 | learning_rate: 0.001
13 | batch_size: 32
14 | save_model_flag: True
15 | save_model_path: "storage/exp_2.ckpt" # the path to save the model
16 | save_loss_fig: True
17 | save_loss_fig_frequency: 50 # how many every epochs to save the loss figure
18 | exp_number: 2 # experiment number
19 |
20 | dataset_config:
21 | load_flag: False
22 | load_path: "storage/data_exp_2.pkl"
23 | n_max_steps: 500 # maximum steps per episode
24 | n_random_episodes: 30 # how many random episodes' data to fit the initial dynamic model
25 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
26 | n_mpc_episodes: 6 # how many episodes data sampled with the MPC controller
27 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
28 | min_train_samples: 6000
29 | n_mpc_itrs: 100 # the number to perform reinforce iteration
30 | save_flag: True # set True if you want to save all the dataset
31 | save_path: "storage/data_exp_2.pkl"
32 |
33 | # MPC controller configuration
34 | mpc_config:
35 | horizon: 15 # how long of the horizon to predict
36 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
37 | max_itrs: 15 # max iterations for the ABC optimization
38 | gamma: 0.99 # reward discount coefficient
39 | action_low: -5 # lower bound of the solution space
40 | action_high: 5 # upper bound of the solution space
41 |
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/config-4.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_4.ckpt" # the path to load the model
4 | n_states: 6 # environment states
5 | n_actions: 1 # how many controls we need
6 | n_hidden: 2 # hidden layer number
7 | size_hidden: 128 # hidden layer size
8 | use_cuda: True
9 |
10 | training_config:
11 | n_epochs: 1000 # how many epoches to train the dynamic model
12 | learning_rate: 0.0006
13 | batch_size: 64
14 | save_model_flag: True
15 | save_model_path: "storage/exp_4.ckpt" # the path to save the model
16 | save_loss_fig: True
17 | save_loss_fig_frequency: 100 # how many every epochs to save the loss figure
18 | exp_number: 4 # experiment number
19 |
20 | dataset_config:
21 | load_flag: False
22 | load_path: "storage/data_exp_4.pkl"
23 | n_max_steps: 500 # maximum steps per episode
24 | n_random_episodes: 30 # how many random episodes' data to fit the initial dynamic model
25 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
26 | n_mpc_episodes: 8 # how many episodes data sampled with the MPC controller
27 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
28 | min_train_samples: 6000
29 | n_mpc_itrs: 100 # the number to perform reinforce iteration
30 | save_flag: True # set True if you want to save all the dataset
31 | save_path: "storage/data_exp_4.pkl"
32 |
33 | # MPC controller configuration
34 | mpc_config:
35 | horizon: 20 # how long of the horizon to predict
36 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
37 | max_itrs: 20 # max iterations for the ABC optimization
38 | gamma: 0.98 # reward discount coefficient
39 | action_low: -5 # lower bound of the solution space
40 | action_high: 5 # upper bound of the solution space
41 |
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/config-5.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_5.ckpt" # the path to load the model
4 | n_states: 6 # environment states
5 | n_actions: 1 # how many controls we need
6 | n_hidden: 2 # hidden layer number
7 | size_hidden: 128 # hidden layer size
8 | use_cuda: True
9 |
10 | training_config:
11 | n_epochs: 1000 # how many epoches to train the dynamic model
12 | learning_rate: 0.0006
13 | batch_size: 64
14 | save_model_flag: True
15 | save_model_path: "storage/exp_5.ckpt" # the path to save the model
16 | save_loss_fig: True
17 | save_loss_fig_frequency: 100 # how many every epochs to save the loss figure
18 | exp_number: 5 # experiment number
19 |
20 | dataset_config:
21 | load_flag: False
22 | load_path: "storage/data_exp_5.pkl"
23 | n_max_steps: 500 # maximum steps per episode
24 | n_random_episodes: 30 # how many random episodes' data to fit the initial dynamic model
25 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
26 | n_mpc_episodes: 8 # how many episodes data sampled with the MPC controller
27 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
28 | min_train_samples: 6000
29 | n_mpc_itrs: 100 # the number to perform reinforce iteration
30 | save_flag: True # set True if you want to save all the dataset
31 | save_path: "storage/data_exp_5.pkl"
32 |
33 | # MPC controller configuration
34 | mpc_config:
35 | horizon: 30 # how long of the horizon to predict
36 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
37 | max_itrs: 30 # max iterations for the ABC optimization
38 | gamma: 0.98 # reward discount coefficient
39 | action_low: -5 # lower bound of the solution space
40 | action_high: 5 # upper bound of the solution space
41 |
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/config-6.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_6.ckpt" # the path to load the model
4 | n_states: 6 # environment states
5 | n_actions: 1 # how many controls we need
6 | n_hidden: 2 # hidden layer number
7 | size_hidden: 128 # hidden layer size
8 | use_cuda: True
9 |
10 | training_config:
11 | n_epochs: 1000 # how many epoches to train the dynamic model
12 | learning_rate: 0.0006
13 | batch_size: 64
14 | save_model_flag: True
15 | save_model_path: "storage/exp_6.ckpt" # the path to save the model
16 | save_loss_fig: True
17 | save_loss_fig_frequency: 100 # how many every epochs to save the loss figure
18 | exp_number: 6 # experiment number
19 |
20 | dataset_config:
21 | load_flag: False
22 | load_path: "storage/data_exp_6.pkl"
23 | n_max_steps: 500 # maximum steps per episode
24 | n_random_episodes: 30 # how many random episodes' data to fit the initial dynamic model
25 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
26 | n_mpc_episodes: 8 # how many episodes data sampled with the MPC controller
27 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
28 | min_train_samples: 6000
29 | n_mpc_itrs: 100 # the number to perform reinforce iteration
30 | save_flag: True # set True if you want to save all the dataset
31 | save_path: "storage/data_exp_6.pkl"
32 |
33 | # MPC controller configuration
34 | mpc_config:
35 | horizon: 20 # how long of the horizon to predict
36 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
37 | max_itrs: 40 # max iterations for the ABC optimization
38 | gamma: 0.999 # reward discount coefficient
39 | action_low: -5 # lower bound of the solution space
40 | action_high: 5 # upper bound of the solution space
41 |
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/config-7.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_7.ckpt" # the path to load the model
4 | n_states: 6 # environment states
5 | n_actions: 1 # how many controls we need
6 | n_hidden: 1 # hidden layer number
7 | size_hidden: 500 # hidden layer size
8 | use_cuda: True
9 |
10 | training_config:
11 | n_epochs: 100 # how many epoches to train the dynamic model
12 | learning_rate: 0.001
13 | batch_size: 512
14 | save_model_flag: True
15 | save_model_path: "storage/exp_7.ckpt" # the path to save the model
16 | save_loss_fig: True
17 | save_loss_fig_frequency: 10 # how many every epochs to save the loss figure
18 | exp_number: 5 # experiment number
19 | save_loss_fig_frequency: 100 # how many every epochs to save the loss figure
20 | exp_number: 7 # experiment number
21 |
22 | dataset_config:
23 | load_flag: False
24 | load_path: "storage/data_exp_7.pkl"
25 | n_max_steps: 500 # maximum steps per episode
26 | n_random_episodes: 700 # how many random episodes' data to fit the initial dynamic model
27 | testset_split: 0.1 # testset's portion in the random dataset, the rest portion is the training set
28 | n_mpc_episodes: 4 # how many episodes data sampled with the MPC controller
29 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
30 | min_train_samples: 8000
31 | n_mpc_itrs: 100 # the number to perform reinforce iteration
32 | save_flag: True # set True if you want to save all the dataset
33 | save_path: "storage/data_exp_7.pkl"
34 |
35 | # MPC controller configuration
36 | mpc_config:
37 | horizon: 15 # how long of the horizon to predict
38 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
39 | max_itrs: 15 # max iterations for the ABC optimization
40 | gamma: 0.98 # reward discount coefficient
41 | max_itrs: 40 # max iterations for the ABC optimization
42 | gamma: 0.8 # reward discount coefficient
43 | action_low: -5 # lower bound of the solution space
44 | action_high: 5 # upper bound of the solution space
45 |
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/config_3.yml:
--------------------------------------------------------------------------------
1 | model_config:
2 | load_model: False # If set true, you must specify the model path, otherwise train a new model
3 | model_path: "storage/exp_5.ckpt" # the path to load the model
4 | n_states: 6 # environment states
5 | n_actions: 1 # how many controls we need
6 | n_hidden: 2 # hidden layer number
7 | size_hidden: 128 # hidden layer size
8 | use_cuda: True
9 |
10 | training_config:
11 | n_epochs: 1000 # how many epoches to train the dynamic model
12 | learning_rate: 0.0006
13 | batch_size: 64
14 | save_model_flag: True
15 | save_model_path: "storage/exp_5.ckpt" # the path to save the model
16 | save_loss_fig: True
17 | save_loss_fig_frequency: 100 # how many every epochs to save the loss figure
18 | exp_number: 5 # experiment number
19 |
20 | dataset_config:
21 | load_flag: False
22 | load_path: "storage/data_exp_5.pkl"
23 | n_max_steps: 500 # maximum steps per episode
24 | n_random_episodes: 30 # how many random episodes' data to fit the initial dynamic model
25 | testset_split: 0.2 # testset's portion in the random dataset, the rest portion is the training set
26 | n_mpc_episodes: 8 # how many episodes data sampled with the MPC controller
27 | mpc_dataset_split: 0.5 # mpc dataset's portion in the training set
28 | min_train_samples: 6000
29 | n_mpc_itrs: 100 # the number to perform reinforce iteration
30 | save_flag: True # set True if you want to save all the dataset
31 | save_path: "storage/data_exp_5.pkl"
32 |
33 | # MPC controller configuration
34 | mpc_config:
35 | horizon: 20 # how long of the horizon to predict
36 | numb_bees: 8 # ABC optimization algorithm param: number of the bees
37 | max_itrs: 40 # max iterations for the ABC optimization
38 | gamma: 0.999 # reward discount coefficient
39 | action_low: -5 # lower bound of the solution space
40 | action_high: 5 # upper bound of the solution space
41 |
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/loss-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/loss-1.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/loss-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/loss-2.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/loss-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/loss-3.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/loss-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/loss-4.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/loss-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/loss-5.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/loss-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/loss-6.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/loss-7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/loss-7.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/mpc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/mpc.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/reward-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/reward-1.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/reward-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/reward-2.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/reward-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/reward-3.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/reward-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/reward-4.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/reward-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/reward-5.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/reward-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/reward-6.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/storage/reward-7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/MPC/MPC-Qube/storage/reward-7.png
--------------------------------------------------------------------------------
/MPC/MPC-Qube/test.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import gym
3 | import torch.utils.data as data
4 | from dynamics import *
5 | from controller import *
6 | from utils import *
7 | from quanser_robots.common import GentlyTerminating
8 | import time
9 |
10 | def test(mpc, model):
11 | reward_episodes = []
12 | for i in range(data_fac.n_mpc_episodes):
13 | data_tmp = []
14 | label_tmp = []
15 | reward_episode = 0
16 | state_old = data_fac.env.reset()
17 | for j in range(data_fac.n_max_steps):
18 | env.render()
19 | action = mpc.act(state_old, model)
20 | action = np.array([action])
21 | data_tmp.append(np.concatenate((state_old, action)))
22 | state_new, reward, done, info = data_fac.env.step(action)
23 | reward_episode += reward
24 | label_tmp.append(state_new - state_old)
25 | if done:
26 | break
27 | state_old = state_new
28 | reward_episodes.append(reward_episode)
29 | print(f"Episode [{i}/{data_fac.n_mpc_episodes}], Reward: {reward_episode:.8f}")
30 | return reward_episodes
31 |
32 | env_id ="Qube-v0" # "CartPole-v0"
33 | env = GentlyTerminating(gym.make(env_id))
34 | config_path = "config.yml"
35 | config = load_config(config_path)
36 | print_config(config_path)
37 |
38 | config["model_config"]["load_model"] = True
39 | config["dataset_config"]["load_flag"] = True
40 |
41 | model = DynamicModel(config)
42 |
43 | data_fac = DatasetFactory(env,config)
44 | model.norm_train_data(data_fac.all_dataset["data"],data_fac.all_dataset["label"])
45 |
46 | mpc = MPC(env,config)
47 |
48 | rewards_list = []
49 | for itr in range(config["dataset_config"]["n_mpc_itrs"]):
50 | t = time.time()
51 | print("**********************************************")
52 | print("The reinforce process [%s], collecting data ..." % itr)
53 | rewards = test(mpc, model)
54 | rewards_list += rewards
55 | plt.close("all")
56 | plt.figure(figsize=(12, 5))
57 | plt.title('Reward Trend with %s iteration' % itr)
58 | plt.plot(rewards_list)
59 | plt.savefig("storage/reward-" + str(model.exp_number) + "_test.png")
60 | print("Consume %s s in this iteration" % (time.time() - t))
61 | loss = model.trai
62 |
--------------------------------------------------------------------------------
/MPC/README.md:
--------------------------------------------------------------------------------
1 | # MPC - Model Predictive Control
2 |
3 | This folder contains the implementation of MPC algorithm and the evaluation of it.
4 |
5 | The implementation is mainly followed in this paper [here](https://ieeexplore.ieee.org/abstract/document/8463189)
6 |
7 | To optimize the MPC controller, we use the [Artificial Bee Colony](https://en.wikipedia.org/wiki/Artificial_bee_colony_algorithm) (ABC) optimization algorithm,
8 | instead of the original random shooting method in the paper. The implementation of ABC algorithm is based on this repo: [https://github.com/rwuilbercq/Hive](https://github.com/rwuilbercq/Hive)
9 |
10 | Choose the environment folder and follow the instructions to run everything.
11 |
12 | Jupyter notebook example is in the ```./MPC-CartPoleStab``` folder.
13 |
14 | ## Overview of the experiment results:
15 |
16 |
17 | The best results in different environments:
18 |
19 | | Environment | Horizon |Numb\_bees | Max\_itrs | Gamma | Episode reward |
20 | | -------- | -----: | :----: | :----: | :----: | :----: |
21 | | Qube | 30 | 8 | 30 | 0.98 | 4.0 |
22 | | CartPole Swingup | 20 | 8 | 20 | 0.99 | 2000 |
23 | | CartPole Stab | 12 | 8 | 20 | 0.99 | 19999 |
24 | | Double CartPole | 5 | 8 | 20 | 0.99 | 91 |
25 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Reinforcement Learning Course Project
2 | ## Note: This repo is deprecated, for a newer and faster implementation of MPC, please go to the following repo: [https://github.com/liuzuxin/MPC_template-model_predictive_control_for_reinforcement_learning](https://github.com/liuzuxin/MPC_template-model_predictive_control_for_reinforcement_learning)
3 |
4 | Technische Universität Darmstadt winter semester 2018/2019
5 |
6 | Supervisor: Jan Peters, Riad Akrour
7 |
8 | This repository contains the PyTorch implementation of Deep Q-Network and Model Predictive Control (MPC),
9 | and the evaluation of them on the [quanser robot platform](https://git.ias.informatik.tu-darmstadt.de/quanser/clients).
10 |
11 |
12 |
13 |
14 |
15 |
16 | ## Authors
17 | + Zuxin Liu (Implement algorithms, clean code, run experiment and write report)
18 | + Yunhao Li (Run experiment and write report)
19 | + Junfei Xiao (Run experiment and write report)
20 |
21 | ## Algorithms
22 | + [DQN](https://arxiv.org/abs/1312.5602)
23 | + [MPC](https://ieeexplore.ieee.org/abstract/document/8463189)
24 |
25 | ## Platforms
26 | + [Qube](https://git.ias.informatik.tu-darmstadt.de/quanser/clients/tree/master/quanser_robots/qube)
27 | + [Double Pendlum](https://git.ias.informatik.tu-darmstadt.de/quanser/clients/tree/master/quanser_robots/double_pendulum)
28 | + [Cartpole Swing-up](https://git.ias.informatik.tu-darmstadt.de/quanser/clients/tree/master/quanser_robots/cartpole)
29 | + [Cartpole Stab](https://git.ias.informatik.tu-darmstadt.de/quanser/clients/tree/master/quanser_robots/cartpole)
30 |
31 | ## Installation
32 | For the installation of the Quanser robot simulation environment, please see [this page](https://git.ias.informatik.tu-darmstadt.de/quanser/clients)
33 |
34 | For the implementation of the algorithms, the following packages are required:
35 |
36 | + python = 3.6.2
37 | + pytorch = 1.0.1
38 | + numpy = 1.12.1
39 | + matplotlib = 2.1.1
40 | + gym
41 |
42 | You can simply create the same environment as ours by using [Anaconda](https://www.anaconda.com/).
43 | All the required packages are included in the ```environment.yaml``` file. You can create the environment by the following command
44 |
45 | ```angular2html
46 | conda env create -f environment.yaml
47 | ```
48 | Then, activate your environment by
49 |
50 | ```
51 | source activate pytorch
52 | ```
53 |
54 | ## How to run
55 |
56 | 1. Choose the algorithm you want to use and change to the corresponding folder (DQN or MPC)
57 | 2. Choose the environment you want to evaluate and change to the folder (CartPoleStab, Double, Qube or Swing)
58 | 3. Change the configuration file ```config.yml``` to the parameters you want, and follow the instructions in the folder
59 |
--------------------------------------------------------------------------------
/Resources/DQN/Playing Atari with Deep Reinforcement Learning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/Resources/DQN/Playing Atari with Deep Reinforcement Learning.pdf
--------------------------------------------------------------------------------
/Resources/DQN/Q-Learning in Continuous State Action Spaces.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/Resources/DQN/Q-Learning in Continuous State Action Spaces.pdf
--------------------------------------------------------------------------------
/Resources/DQN/README.md:
--------------------------------------------------------------------------------
1 | # Resources - DQN
2 |
3 | This folder contains some resources we used in the DQN.
4 |
--------------------------------------------------------------------------------
/Resources/MPC/Approximate Dynamic Programming with Gaussian Processes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/Resources/MPC/Approximate Dynamic Programming with Gaussian Processes.pdf
--------------------------------------------------------------------------------
/Resources/MPC/Constrained model predictive control: Stability and optimality.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/Resources/MPC/Constrained model predictive control: Stability and optimality.pdf
--------------------------------------------------------------------------------
/Resources/MPC/Neural Network Dynamics for Model based Deep Rl with Model free fine tuning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/Resources/MPC/Neural Network Dynamics for Model based Deep Rl with Model free fine tuning.pdf
--------------------------------------------------------------------------------
/Resources/MPC/README.md:
--------------------------------------------------------------------------------
1 | # Resources MPC
2 |
3 | This folder contains some resources we used in MPC.
4 |
--------------------------------------------------------------------------------
/Resources/README.md:
--------------------------------------------------------------------------------
1 | # Resources
2 |
3 | This folder contains some resources we used in this project.
4 |
5 | The `./figures` folder contains some gif results in simulation and real environment
--------------------------------------------------------------------------------
/Resources/figures/README.md:
--------------------------------------------------------------------------------
1 | # Resources - figures
2 |
3 | This folder contains some gif results in simulation and real environment
--------------------------------------------------------------------------------
/Resources/figures/qube-after-fine-tuning.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/Resources/figures/qube-after-fine-tuning.gif
--------------------------------------------------------------------------------
/Resources/figures/qube-before-fine-tuning.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/Resources/figures/qube-before-fine-tuning.gif
--------------------------------------------------------------------------------
/Resources/figures/qube.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/Resources/figures/qube.gif
--------------------------------------------------------------------------------
/Resources/figures/stabe.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/Resources/figures/stabe.gif
--------------------------------------------------------------------------------
/Resources/figures/swing.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/Resources/figures/swing.gif
--------------------------------------------------------------------------------
/Resources/figures/swing_interesting.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuzuxin/Deep-Q-Network-and-Model-Predictive-Control-Project/3032a127c445b4821aad7ca38daade5caa8210a7/Resources/figures/swing_interesting.gif
--------------------------------------------------------------------------------