' + 106 | '' + 107 | _("Hide Search Matches") + 108 | "
" 109 | ) 110 | ); 111 | }, 112 | 113 | /** 114 | * helper function to hide the search marks again 115 | */ 116 | hideSearchWords: () => { 117 | document 118 | .querySelectorAll("#searchbox .highlight-link") 119 | .forEach((el) => el.remove()); 120 | document 121 | .querySelectorAll("span.highlighted") 122 | .forEach((el) => el.classList.remove("highlighted")); 123 | localStorage.removeItem("sphinx_highlight_terms") 124 | }, 125 | 126 | initEscapeListener: () => { 127 | // only install a listener if it is really needed 128 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; 129 | 130 | document.addEventListener("keydown", (event) => { 131 | // bail for input elements 132 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; 133 | // bail with special keys 134 | if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; 135 | if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { 136 | SphinxHighlight.hideSearchWords(); 137 | event.preventDefault(); 138 | } 139 | }); 140 | }, 141 | }; 142 | 143 | _ready(SphinxHighlight.highlightSearchWords); 144 | _ready(SphinxHighlight.initEscapeListener); 145 | -------------------------------------------------------------------------------- /docs/_build/html/_static/language_data.js: -------------------------------------------------------------------------------- 1 | /* 2 | * language_data.js 3 | * ~~~~~~~~~~~~~~~~ 4 | * 5 | * This script contains the language-specific data used by searchtools.js, 6 | * namely the list of stopwords, stemmer, scorer and splitter. 7 | * 8 | * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. 9 | * :license: BSD, see LICENSE for details. 10 | * 11 | */ 12 | 13 | var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; 14 | 15 | 16 | /* Non-minified version is copied as a separate JS file, is available */ 17 | 18 | /** 19 | * Porter Stemmer 20 | */ 21 | var Stemmer = function() { 22 | 23 | var step2list = { 24 | ational: 'ate', 25 | tional: 'tion', 26 | enci: 'ence', 27 | anci: 'ance', 28 | izer: 'ize', 29 | bli: 'ble', 30 | alli: 'al', 31 | entli: 'ent', 32 | eli: 'e', 33 | ousli: 'ous', 34 | ization: 'ize', 35 | ation: 'ate', 36 | ator: 'ate', 37 | alism: 'al', 38 | iveness: 'ive', 39 | fulness: 'ful', 40 | ousness: 'ous', 41 | aliti: 'al', 42 | iviti: 'ive', 43 | biliti: 'ble', 44 | logi: 'log' 45 | }; 46 | 47 | var step3list = { 48 | icate: 'ic', 49 | ative: '', 50 | alize: 'al', 51 | iciti: 'ic', 52 | ical: 'ic', 53 | ful: '', 54 | ness: '' 55 | }; 56 | 57 | var c = "[^aeiou]"; // consonant 58 | var v = "[aeiouy]"; // vowel 59 | var C = c + "[^aeiouy]*"; // consonant sequence 60 | var V = v + "[aeiou]*"; // vowel sequence 61 | 62 | var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 63 | var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 64 | var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 65 | var s_v = "^(" + C + ")?" + v; // vowel in stem 66 | 67 | this.stemWord = function (w) { 68 | var stem; 69 | var suffix; 70 | var firstch; 71 | var origword = w; 72 | 73 | if (w.length < 3) 74 | return w; 75 | 76 | var re; 77 | var re2; 78 | var re3; 79 | var re4; 80 | 81 | firstch = w.substr(0,1); 82 | if (firstch == "y") 83 | w = firstch.toUpperCase() + w.substr(1); 84 | 85 | // Step 1a 86 | re = /^(.+?)(ss|i)es$/; 87 | re2 = /^(.+?)([^s])s$/; 88 | 89 | if (re.test(w)) 90 | w = w.replace(re,"$1$2"); 91 | else if (re2.test(w)) 92 | w = w.replace(re2,"$1$2"); 93 | 94 | // Step 1b 95 | re = /^(.+?)eed$/; 96 | re2 = /^(.+?)(ed|ing)$/; 97 | if (re.test(w)) { 98 | var fp = re.exec(w); 99 | re = new RegExp(mgr0); 100 | if (re.test(fp[1])) { 101 | re = /.$/; 102 | w = w.replace(re,""); 103 | } 104 | } 105 | else if (re2.test(w)) { 106 | var fp = re2.exec(w); 107 | stem = fp[1]; 108 | re2 = new RegExp(s_v); 109 | if (re2.test(stem)) { 110 | w = stem; 111 | re2 = /(at|bl|iz)$/; 112 | re3 = new RegExp("([^aeiouylsz])\\1$"); 113 | re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 114 | if (re2.test(w)) 115 | w = w + "e"; 116 | else if (re3.test(w)) { 117 | re = /.$/; 118 | w = w.replace(re,""); 119 | } 120 | else if (re4.test(w)) 121 | w = w + "e"; 122 | } 123 | } 124 | 125 | // Step 1c 126 | re = /^(.+?)y$/; 127 | if (re.test(w)) { 128 | var fp = re.exec(w); 129 | stem = fp[1]; 130 | re = new RegExp(s_v); 131 | if (re.test(stem)) 132 | w = stem + "i"; 133 | } 134 | 135 | // Step 2 136 | re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; 137 | if (re.test(w)) { 138 | var fp = re.exec(w); 139 | stem = fp[1]; 140 | suffix = fp[2]; 141 | re = new RegExp(mgr0); 142 | if (re.test(stem)) 143 | w = stem + step2list[suffix]; 144 | } 145 | 146 | // Step 3 147 | re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; 148 | if (re.test(w)) { 149 | var fp = re.exec(w); 150 | stem = fp[1]; 151 | suffix = fp[2]; 152 | re = new RegExp(mgr0); 153 | if (re.test(stem)) 154 | w = stem + step3list[suffix]; 155 | } 156 | 157 | // Step 4 158 | re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; 159 | re2 = /^(.+?)(s|t)(ion)$/; 160 | if (re.test(w)) { 161 | var fp = re.exec(w); 162 | stem = fp[1]; 163 | re = new RegExp(mgr1); 164 | if (re.test(stem)) 165 | w = stem; 166 | } 167 | else if (re2.test(w)) { 168 | var fp = re2.exec(w); 169 | stem = fp[1] + fp[2]; 170 | re2 = new RegExp(mgr1); 171 | if (re2.test(stem)) 172 | w = stem; 173 | } 174 | 175 | // Step 5 176 | re = /^(.+?)e$/; 177 | if (re.test(w)) { 178 | var fp = re.exec(w); 179 | stem = fp[1]; 180 | re = new RegExp(mgr1); 181 | re2 = new RegExp(meq1); 182 | re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 183 | if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) 184 | w = stem; 185 | } 186 | re = /ll$/; 187 | re2 = new RegExp(mgr1); 188 | if (re.test(w) && re2.test(w)) { 189 | re = /.$/; 190 | w = w.replace(re,""); 191 | } 192 | 193 | // and turn initial Y back to y 194 | if (firstch == "y") 195 | w = firstch.toLowerCase() + w.substr(1); 196 | return w; 197 | } 198 | } 199 | 200 | -------------------------------------------------------------------------------- /docs/_build/html/test_trained_agent.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |
72 | import random
73 | import numpy as np
74 |
75 | [docs]def sample_actions():
76 | F = 5 + random.random() * 95
77 | Q = -1 * random.random() * 8500
78 |
79 | return np.array([F, Q]).reshape(2, )
80 |
81 |
82 |
83 | [docs]def sample_states():
84 | c_a = 0.1 + random.random() * 1.9
85 | c_b = 0.1 + random.random() * 1.9
86 | t_r = 50 + random.random() * 100
87 | t_k = 50 + random.random() * 90
88 |
89 | return np.array([c_a, c_b, t_r, t_k]).reshape(4, )
90 |
91 |
92 | Stable Baselines3 (SB3) is a set of reliable implementations of reinforcement learning algorithms in PyTorch. 76 | It is the next major version of Stable Baselines.
77 |Github repository: https://github.com/DLR-RM/stable-baselines3
78 |Paper: https://jmlr.org/papers/volume22/20-1364/20-1364.pdf
79 |RL Baselines3 Zoo (training framework for SB3): https://github.com/DLR-RM/rl-baselines3-zoo
80 |RL Baselines3 Zoo provides a collection of pre-trained agents, scripts for training, evaluating agents, tuning hyperparameters, plotting results and recording videos.
81 |SB3 Contrib (experimental RL code, latest algorithms): https://github.com/Stable-Baselines-Team/stable-baselines3-contrib
82 |Unified structure for all algorithms
PEP8 compliant (unified code style)
Documented functions and classes
Tests, high code coverage and type hints
Clean code
Tensorboard support
The performance of each algorithm was tested (see Results section in their respective page)
User Guide
95 || 96 | a | ||
| 99 | |
100 | argparse_actions | 101 | |
| 104 | b | ||
| 107 | |
108 | buffer | 109 | |
| 112 | d | ||
| 115 | |
116 | DDPG | 117 | |
| 120 | e | ||
| 123 | |
124 | env | 125 | |
| 128 | m | ||
| 131 | |
132 | main | 133 | |
| 136 | |
137 | matrics | 138 | |
| 141 | p | ||
| 144 | |
145 | ploting | 146 | |
| 149 | r | ||
| 152 | |
153 | random_sa | 154 | |
| 157 | s | ||
| 160 | |
161 | simulator | 162 | |
| 165 | t | ||
| 168 | |
169 | test_trained_agent | 170 | |
| 173 | u | ||
| 176 | |
177 | utils | 178 | |
Function plot_xu is used to plots the four states \([C_A, C_B, T_R, T_K]\) and two action \([F, \dot Q]\).
99 |rl_xk (np.ndarray) : rl_xk is the numpy array contents the four states (closed loop simulation by the reinforcement learning) \([C_A, C_B, T_R, T_K]\).
101 |rl_uk (np.ndarray) : rl_uk is the numpy array contents the two actions \([F, \dot Q]\).
102 |mpc_xk (np.ndarray) : mpc_xk is the numpy array contents the four states of closed loop simulation by the MPC controller \([C_A, C_B, T_R, T_K]\).
103 |mpc_uk (np.ndarray) : mpc_xk is the numpy array contents the four actions of closed loop simulation by the MPC controller. \([F, \dot Q]\).
104 |itr (integer) : itr is unique key given to the figure.
105 |file (str, path) : path where figure need to be saved.
106 |None
108 |Function plot_xu is used to plots the four states \([C_A, C_B, T_R, T_K]\) and two action \([F, \dot Q]\).
116 |rl_xk (np.ndarray) : S is the numpy array contents the four states \([C_A, C_B, T_R, T_K]\).
118 |rl_uk (np.ndarray) : I is the numpy array contents the two actions \([F, \dot Q]\).
119 |itr (integer) : itr is unique key given to the figure.
120 |file (str, path) : path where figure need to be saved.
121 |None
123 |Function plot_xu is used to plots the four states \([C_A, C_B, T_R, T_K]\) and two action \([F, \dot Q]\).
131 |S (np.ndarray) : S is the numpy array contents the four states \([C_A, C_B, T_R, T_K]\).
133 |I (np.ndarray) : I is the numpy array contents the two actions \([F, \dot Q]\).
134 |file (str, path) : path where figure need to be saved.
135 |None
137 |
72 | import utils
73 | import numpy as np
74 | import matplotlib.pyplot as plt
75 |
76 | [docs]def plot_series(series, title, xlabel, ylabel, file):
77 | """
78 | """
79 | x = list(range(len(series)))
80 | plt.figure(figsize=(10, 6))
81 | plt.xticks(fontsize=16)
82 | plt.yticks(fontsize=16)
83 | plt.title(title, fontsize=24)
84 | plt.xlabel(xlabel, fontsize=24)
85 | plt.ylabel(ylabel, fontsize=24)
86 | plt.plot(x, series)
87 | plt.savefig(file)
88 | plt.close()
89 |
90 |
91 | [docs]def plot_sliding_mean(data, window, titile, xlabel, ylabel, file):
92 |
93 | m_cl = []
94 | for i in range(0, len(data)-window):
95 | # print(i)
96 | me = sum(data[i:i+window])/window
97 | m_cl.append(me)
98 |
99 |
100 | plot_series(m_cl, titile, xlabel, ylabel, file)
101 |
102 |
103 | if __name__=="__main__":
104 |
105 | file1 = utils.global_dir + '/data/mat/reward_list.csv'
106 | file2 = utils.global_dir + '/data/mat/actor_loss 17.csv'
107 | file3 = utils.global_dir + '/data/mat/critic_loss17.csv'
108 | # file4 = utils.global_dir + '/data/mat/ep_len.csv'
109 |
110 | reward = np.loadtxt(file1, delimiter=',')
111 | actor_loss = np.loadtxt(file2, delimiter=',')
112 | critic_loss = np.loadtxt(file3, delimiter=',')
113 | # ep_len = np.loadtxt(file4, delimiter=',')
114 |
115 | # critic_loss = critic_loss[100:]
116 | file1 = utils.global_dir + '/data/mat/reward_list.png'
117 | file2 = utils.global_dir + '/data/mat/actor_loss.png'
118 | file3 = utils.global_dir + '/data/mat/critic_loss.png'
119 | # file4 = utils.global_dir + '/data/mat/ep_len.png'
120 |
121 | plot_series(reward, 'Reward', 'episode ', 'reward', file1)
122 | plot_series(actor_loss, 'Actor loss', 'number of updates ', 'actor loss', file2)
123 | plot_series(critic_loss, 'critic loss', 'number of updates ', 'critic loss', file3)
124 | # # plot_series(ep_len, 'ep length', 'episode ', 'ep length', file4)
125 |
126 | # window = 10
127 | # plot_sliding_mean(reward, window, 'Reward', 'episode ', 'reward', file1)
128 | # plot_sliding_mean(actor_loss, window, 'Actor loss', 'episode ', 'actor loss', file2)
129 |
130 | # window = 500
131 | # plot_sliding_mean(critic_loss, window, 'critic loss', 'episode ', 'critic loss', file3)
132 |
133 |
134 |
135 |
136 |
137 |