├── .gitignore ├── Agent.py ├── README.md ├── config.yaml ├── figures ├── TestReward.svg ├── TrainReward_per_Episode.svg └── relative_TrainReward.svg ├── main.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /Agent.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from utils import * 3 | import copy 4 | from torch.distributions import Categorical 5 | import torch.nn as nn 6 | import numpy as np 7 | import torch.nn.functional as F 8 | 9 | 10 | #Xavier weight initialization 11 | def init_weights(m): 12 | if isinstance(m,nn.Linear): 13 | torch.nn.init.xavier_uniform_(m.weight,gain=torch.nn.init.calculate_gain('tanh')) 14 | torch.nn.init.zeros_(m.bias) 15 | 16 | 17 | class Agent(object): 18 | 19 | def __init__(self, env, opt): 20 | self.opt=opt 21 | self.env=env 22 | if opt.fromFile is not None: 23 | self.load(opt.fromFile) 24 | self.action_space = env.action_space 25 | self.test=False 26 | self.nbEvents=0 27 | 28 | self.device=torch.device('cuda' if torch.cuda.is_available() else 'cpu') 29 | 30 | # Define actor network 31 | self.actor=nn.Sequential( 32 | nn.Linear(env.observation_space.shape[0],128), 33 | nn.Tanh(), 34 | nn.Linear(128,64), 35 | nn.Tanh(), 36 | nn.Linear(64,self.action_space.n), 37 | nn.Softmax(dim=-1) 38 | ) 39 | 40 | # Define critic network 41 | self.critic=nn.Sequential( 42 | nn.Linear(env.observation_space.shape[0],128), 43 | nn.Tanh(), 44 | nn.Linear(128,64), 45 | nn.Tanh(), 46 | nn.Linear(64,1) 47 | ) 48 | 49 | self.actor.apply(init_weights) 50 | self.critic.apply(init_weights) 51 | self.actor.to(self.device) 52 | self.critic.to(self.device) 53 | 54 | #Define learning rates and optimizers 55 | self.lr_a=opt.lr_a 56 | self.lr_c=opt.lr_c 57 | self.optimizer_actor = torch.optim.Adam(self.actor.parameters(),self.lr_a) 58 | self.optimizer_critic= torch.optim.Adam(self.critic.parameters(),self.lr_c) 59 | 60 | # Define algorithm variables 61 | self.clip=opt.clip 62 | self.ppo=opt.PPO 63 | self.kl=opt.KL 64 | 65 | #Define hyperparameters 66 | self.K=opt.K_epochs 67 | self.discount=opt.discount # Discount factor 68 | self.gae_lambda=opt.gae_lambda # Lambda of TD(lambda) advantage estimation 69 | 70 | #Hyperparameters of clipped PPO 71 | self.eps_clip=0.2 72 | # Hyperparameters of KL-Div Algo 73 | self.beta=1. 74 | self.delta=0.01 75 | 76 | #Initialize memory 77 | self.states=[] 78 | self.actions=[] 79 | self.log_probs=[] 80 | self.rewards=[] 81 | self.dones=[] 82 | self.new_states=[] 83 | self.values=[] 84 | 85 | #counters 86 | self.actor_count=0 87 | self.critic_count=0 88 | 89 | 90 | 91 | def act(self, obs): 92 | 93 | #Calculate distribution of policy 94 | prob=self.actor(torch.FloatTensor(obs).to(self.device)) 95 | dist=Categorical(prob) 96 | #sample action w.r.t policy 97 | action=dist.sample() 98 | 99 | #store values 100 | if not self.test: 101 | self.log_probs.append(dist.log_prob(action)) 102 | self.actions.append(action.detach()) 103 | self.states.append(torch.FloatTensor(obs).to(self.device)) 104 | self.values.append(self.critic(torch.FloatTensor(obs).to(self.device)).detach()) 105 | 106 | 107 | return action.item() 108 | 109 | 110 | #learning algorithm of PPO with Adaptive Kullback-Leibler divergence 111 | def learn_kl(self): 112 | 113 | #Compute the TD(lambda) advantage estimation 114 | last_val=self.critic(torch.FloatTensor(self.new_states[-1]).to(self.device)).item() 115 | rewards = np.zeros_like(self.rewards) 116 | advantage = np.zeros_like(self.rewards) 117 | adv=0. 118 | for t in reversed(range(len(self.rewards))): 119 | if t==len(self.rewards)-1: 120 | rewards[t]=self.rewards[t]+self.discount*(1-self.dones[t])*last_val 121 | delta = self.rewards[t]+self.discount*(1-self.dones[t])*last_val - self.values[t].item() 122 | else: 123 | rewards[t]=self.rewards[t]+self.discount*(1-self.dones[t])*rewards[t+1] 124 | delta=self.rewards[t]+self.discount*(1-self.dones[t])*self.values[t+1].item()-self.values[t].item() 125 | 126 | adv=adv*self.discount*self.gae_lambda*(1-self.dones[t])+delta 127 | advantage[t]=adv 128 | 129 | rewards = torch.FloatTensor(rewards).to(self.device) 130 | advantage = torch.FloatTensor(advantage).to(self.device) 131 | #Normalize the advantage 132 | advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-10) 133 | 134 | old_states = torch.squeeze(torch.stack(self.states, dim=0)).detach().to(self.device) 135 | old_actions = torch.squeeze(torch.stack(self.actions, dim=0)).detach().to(self.device) 136 | old_logprobs = torch.squeeze(torch.stack(self.log_probs, dim=0)).detach().to(self.device) 137 | 138 | pi_old=self.actor(old_states).view((-1,self.action_space.n)) 139 | state_value=self.critic(old_states).view(-1) 140 | 141 | for _ in range(self.K): 142 | 143 | probs = self.actor(old_states) 144 | dist=Categorical(probs) 145 | log_probs=dist.log_prob(old_actions) 146 | ratios=torch.exp(log_probs-old_logprobs.detach()) 147 | 148 | #PPO Loss 149 | loss1=torch.mean(ratios*advantage.detach()) 150 | #KL-Divergence Loss 151 | loss2=F.kl_div(input=probs,target=pi_old.detach(),reduction='batchmean') 152 | 153 | #Actor update 154 | actor_loss=- (loss1-self.beta*loss2) 155 | self.actor_count+=1 156 | self.actor_loss=actor_loss 157 | self.optimizer_actor.zero_grad() 158 | actor_loss.backward() 159 | self.optimizer_actor.step() 160 | 161 | #KL-Divergence update 162 | DL=F.kl_div(input=probs.view((-1,self.action_space.n)),target=pi_old.view((-1,self.action_space.n)),reduction='batchmean') 163 | if DL>=1.5*self.delta: 164 | self.beta*=2 165 | if DL<=self.delta/1.5: 166 | self.beta*=0.5 167 | 168 | #Critic update 169 | loss=F.smooth_l1_loss(rewards,state_value.view(-1)) 170 | self.critic_loss=loss 171 | self.critic_count+=1 172 | self.optimizer_critic.zero_grad() 173 | loss.backward() 174 | self.optimizer_critic.step() 175 | 176 | #Clear memory 177 | self.states=[] 178 | self.actions=[] 179 | self.log_probs=[] 180 | self.rewards=[] 181 | self.dones=[] 182 | self.new_states=[] 183 | self.values=[] 184 | 185 | #learning algorithm of PPO 186 | def learn_ppo(self): 187 | 188 | #Compute the TD(lambda) advantage estimation 189 | last_val=self.critic(torch.FloatTensor(self.new_states[-1]).to(self.device)).item() 190 | rewards = np.zeros_like(self.rewards) 191 | advantage = np.zeros_like(self.rewards) 192 | for t in reversed(range(len(self.rewards))): 193 | if t==len(self.rewards)-1: 194 | rewards[t]=self.rewards[t]+self.discount*(1-self.dones[t])*last_val 195 | #td_error = self.rewards[t]+self.discount*(1-self.dones[t])*last_val - self.values[t].item() 196 | else: 197 | rewards[t]=self.rewards[t]+self.discount*(1-self.dones[t])*rewards[t+1] 198 | #td_error=self.rewards[t]+self.discount*(1-self.dones[t])*self.values[t+1]-self.values[t] 199 | 200 | advantage[t]=rewards[t]-self.values[t] 201 | 202 | rewards = torch.FloatTensor(rewards).to(self.device) 203 | advantage = torch.FloatTensor(advantage).to(self.device) 204 | #Normalize the advantage 205 | advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-10) 206 | 207 | old_states = torch.squeeze(torch.stack(self.states, dim=0)).detach().to(self.device) 208 | old_actions = torch.squeeze(torch.stack(self.actions, dim=0)).detach().to(self.device) 209 | old_logprobs = torch.squeeze(torch.stack(self.log_probs, dim=0)).detach().to(self.device) 210 | 211 | pi_old=self.actor(old_states).view((-1,self.action_space.n)) 212 | state_value=self.critic(old_states).view(-1) 213 | 214 | for _ in range(self.K): 215 | 216 | probs = self.actor(old_states) 217 | dist=Categorical(probs) 218 | log_probs=dist.log_prob(old_actions) 219 | ratios=torch.exp(log_probs-old_logprobs.detach()) 220 | 221 | #Use only the PPO Loss here 222 | loss1=torch.mean(ratios*advantage.detach()) 223 | actor_loss=-loss1 224 | 225 | #Actor update 226 | self.actor_count+=1 227 | self.actor_loss=actor_loss 228 | self.optimizer_actor.zero_grad() 229 | actor_loss.backward() 230 | self.optimizer_actor.step() 231 | 232 | #Critic update 233 | loss=F.smooth_l1_loss(rewards,state_value.view(-1)) 234 | self.critic_loss=loss 235 | self.critic_count+=1 236 | self.optimizer_critic.zero_grad() 237 | loss.backward() 238 | self.optimizer_critic.step() 239 | 240 | #Clear memory 241 | self.states=[] 242 | self.actions=[] 243 | self.log_probs=[] 244 | self.rewards=[] 245 | self.dones=[] 246 | self.new_states=[] 247 | self.values=[] 248 | 249 | 250 | #learning algorithm of PPO with clipped objective 251 | def learn_clip(self): 252 | 253 | #Compute TD(lambda) advantage estimation 254 | last_val=self.critic(torch.FloatTensor(self.new_states[-1]).to(self.device)).item() 255 | rewards = np.zeros_like(self.rewards) 256 | advantage = np.zeros_like(self.rewards) 257 | adv=0. 258 | for t in reversed(range(len(self.rewards))): 259 | if t==len(self.rewards)-1: 260 | rewards[t]=self.rewards[t]+self.discount*(1-self.dones[t])*last_val 261 | delta = self.rewards[t]+self.discount*(1-self.dones[t])*last_val - self.values[t].item() 262 | else: 263 | rewards[t]=self.rewards[t]+self.discount*(1-self.dones[t])*rewards[t+1] 264 | delta=self.rewards[t]+self.discount*(1-self.dones[t])*self.values[t+1].item()-self.values[t].item() 265 | 266 | adv=adv*self.discount*self.gae_lambda*(1-self.dones[t])+delta 267 | advantage[t]=adv 268 | 269 | 270 | rewards = torch.FloatTensor(rewards).to(self.device) 271 | advantage = torch.FloatTensor(advantage).to(self.device) 272 | #Normalize the advantage 273 | advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-10) 274 | 275 | old_states = torch.squeeze(torch.stack(self.states, dim=0)).detach().to(self.device) 276 | old_actions = torch.squeeze(torch.stack(self.actions, dim=0)).detach().to(self.device) 277 | old_logprobs = torch.squeeze(torch.stack(self.log_probs, dim=0)).detach().to(self.device) 278 | 279 | state_values=self.critic(old_states).view(-1) 280 | 281 | for _ in range(self.K): 282 | 283 | probs = self.actor(old_states) 284 | dist=Categorical(probs) 285 | log_probs=dist.log_prob(old_actions) 286 | ratios=torch.exp(log_probs-old_logprobs.detach()) 287 | 288 | #PPO-Loss 289 | loss1=ratios*advantage.detach() 290 | #Clipped Loss 291 | loss2=torch.clamp(ratios,min=1-self.eps_clip,max=1+self.eps_clip)*advantage.detach() 292 | 293 | #Actor update 294 | actor_loss= -torch.mean(torch.min(loss1,loss2)) 295 | self.actor_count+=1 296 | self.actor_loss=actor_loss 297 | self.optimizer_actor.zero_grad() 298 | actor_loss.backward() 299 | self.optimizer_actor.step() 300 | 301 | #Critic update 302 | loss=F.smooth_l1_loss(rewards,state_values) 303 | self.critic_loss=loss 304 | self.critic_count+=1 305 | self.optimizer_critic.zero_grad() 306 | loss.backward() 307 | self.optimizer_critic.step() 308 | 309 | #Clear memory 310 | self.states=[] 311 | self.actions=[] 312 | self.log_probs=[] 313 | self.rewards=[] 314 | self.dones=[] 315 | self.new_states=[] 316 | self.values=[] 317 | 318 | def learn(self): 319 | 320 | if self.clip: 321 | self.learn_clip() 322 | elif self.kl: 323 | self.learn_kl() 324 | elif self.ppo: 325 | self.learn_ppo() 326 | 327 | 328 | 329 | def store(self,ob, action, new_obs, reward, done, it): 330 | 331 | if not self.test: 332 | 333 | if it == self.opt.maxLengthTrain: 334 | print("undone") 335 | done=False 336 | 337 | self.rewards.append(reward) 338 | self.dones.append(float(done)) 339 | self.new_states.append(new_obs) 340 | 341 | 342 | #defines the timesteps when the agent learns 343 | def timeToLearn(self,done): 344 | if self.test: 345 | return False 346 | 347 | self.nbEvents+=1 348 | return self.nbEvents%self.opt.freqOptim == 0 349 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PPO-Algorithm 2 | 3 |
I implemented three versions of the PPO-Algorithm as proposed in John Schulman et al. 'Proximal policy optimization algorithms' (https://arxiv.org/abs/1707.06347).
4 | 5 |We see that the PPO with adpative KL-penalty outperforms the other two algorithms in this example. However, the second plot shows that this alogrithm takes the longest on the other hand , but still outperforms on a relative basis.
PPO with adpative KL-Divergence outperforms also while testing.
Note that the first two plots are smoothed.
14 | 15 | 16 | ### Reward per episode: 17 |  18 | ### Relative reward to the time: 19 |  20 | 21 | ### Reward per test episode: 22 |  23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | env: CartPole-v1 # or 'LunarLander-v2' # environnement 2 | seed: 5 3 | freqTest: 100 # testing frequency 4 | nbTest: 10 # number of test episodes 5 | freqVerbose: 50000 # verbose frequency 6 | freqOptim: 500 # learning frequency 7 | nbEpisodes: 511 # maximal number of episodes 8 | maxLengthTest: 500 # maximal length of a test episode 9 | maxLengthTrain: 500 # maximal length of a train episode 10 | K_epochs: 10 # length of for-loop inside the learning function 11 | discount: 0.99 # Discount factor 12 | gae_lambda: 0.99 # Lambda of TD(lambda) advantage estimation 13 | lr_a: 0.0001 # Learning rate of actor 14 | lr_c: 0.0001 # Learning rate of critic 15 | clip: True # choice of algorithm 16 | KL: False # choice of algorithm 17 | PPO: False # choice of algorithm 18 | -------------------------------------------------------------------------------- /figures/TestReward.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /figures/TrainReward_per_Episode.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from utils import * 2 | from Agent import Agent 3 | import numpy as np 4 | 5 | 6 | if __name__ == '__main__': 7 | env, config, outdir, logger = init('config.yaml') 8 | 9 | freqTest = config["freqTest"] 10 | nbTest = config["nbTest"] 11 | env.seed(config["seed"]) 12 | np.random.seed(config["seed"]) 13 | episode_count = config["nbEpisodes"] 14 | 15 | agent = Agent(env,config) 16 | 17 | rsum = 0 18 | mean = 0 19 | verbose = True 20 | itest = 0 21 | reward = 0 22 | done = False 23 | for i in range(episode_count): 24 | checkConfUpdate(outdir, config) 25 | 26 | rsum = 0 27 | 28 | ob = env.reset() 29 | 30 | # Check if verbose 31 | if i % int(config["freqVerbose"]) == 0: 32 | verbose = True 33 | else: 34 | verbose = False 35 | 36 | # Check if it is a testing episode 37 | if i % freqTest == 0 and i >= freqTest: ##### Same as train for now 38 | print("Test time! ") 39 | mean = 0 40 | agent.test = True 41 | 42 | # End of testing, evaluate testing results and go back to train modus 43 | if i % freqTest == nbTest and i > freqTest: 44 | print("End of test, mean reward=", mean / nbTest) 45 | itest += 1 46 | logger.direct_write("rewardTest", mean / nbTest, itest) 47 | agent.test = False 48 | 49 | j = 0 50 | if verbose: 51 | env.render() 52 | 53 | new_obs=ob 54 | 55 | while True: 56 | if verbose: 57 | env.render() 58 | 59 | ob = new_obs 60 | 61 | action= agent.act(ob) 62 | new_obs, reward, done, _ = env.step(action) 63 | agent.store(ob, action, new_obs, reward, done,j) 64 | 65 | j+=1 66 | 67 | # If we reached the maximal length per episode 68 | if ((config["maxLengthTrain"] > 0) and (not agent.test) and (j == config["maxLengthTrain"])) or ( (agent.test) and (config["maxLengthTest"] > 0) and (j == config["maxLengthTest"])): 69 | done = True 70 | print("forced done!") 71 | 72 | 73 | rsum += reward 74 | 75 | # If it is time to learn, let the agent learn 76 | if agent.timeToLearn(done): 77 | agent.learn() 78 | logger.direct_write("actor loss", agent.actor_loss, agent.actor_count) 79 | logger.direct_write("critic loss", agent.critic_loss, agent.critic_count) 80 | 81 | # If episode is done, evaluate the results of this episode and start a new episode 82 | if done: 83 | print(str(i) + " rsum=" + str(rsum) + ", " + str(j) + " actions ") 84 | logger.direct_write("reward", rsum, i) 85 | mean += rsum 86 | rsum = 0 87 | 88 | break 89 | 90 | 91 | env.close() 92 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | import subprocess 3 | from collections import namedtuple,defaultdict 4 | import logging 5 | import json 6 | import os 7 | import yaml 8 | import gym 9 | import sys 10 | import threading 11 | from datetime import datetime 12 | from torch.utils.tensorboard import SummaryWriter 13 | 14 | def loadTensorBoard(outdir): 15 | t = threading.Thread(target=launchTensorBoard, args=([outdir])) 16 | t.start() 17 | 18 | def launchTensorBoard(tensorBoardPath): 19 | print('tensorboard --logdir=' + tensorBoardPath) 20 | ret=os.system('tensorboard --logdir=' + tensorBoardPath) 21 | if ret!=0: 22 | syspath = os.path.dirname(sys.executable) 23 | print(os.path.dirname(sys.executable)) 24 | ret = os.system(syspath+"/"+'tensorboard --logdir=' + tensorBoardPath) 25 | return 26 | 27 | 28 | 29 | class LogMe(dict): 30 | def __init__(self,writer,term=True): 31 | self.writer = writer 32 | self.dic = defaultdict(list) 33 | self.term = term 34 | def write(self,i): 35 | if len(self.dic)==0: return 36 | s=f"Epoch {i} : " 37 | for k,v in self.dic.items(): 38 | self.writer.add_scalar(k,sum(v)*1./len(v),i) 39 | s+=f"{k}:{sum(v)*1./len(v)} -- " 40 | self.dic.clear() 41 | if self.term: logging.info(s) 42 | def update(self,l): 43 | for k,v in l: 44 | self.add(k,v) 45 | def direct_write(self,k,v,i): 46 | self.writer.add_scalar(k,v,i) 47 | def add(self,k,v): 48 | self.dic[k].append(v) 49 | 50 | def save_src(path): 51 | current_dir = os.getcwd() 52 | package_dir = current_dir.split('RL', 1)[0] 53 | #path = os.path.abspath(path) 54 | os.chdir(package_dir) 55 | #print(package_dir) 56 | src_files = subprocess.Popen(('find', 'RL', '-name', '*.py', '-o', '-name', '*.yaml'), 57 | stdout=subprocess.PIPE) 58 | #print(package_dir,path) 59 | #path=os.path.abspath(path) 60 | 61 | 62 | #print(str(src_files)) 63 | 64 | subprocess.check_output(('tar', '-zcf', path+"/arch.tar", '-T', '-'), stdin=src_files.stdout, stderr=subprocess.STDOUT) 65 | src_files.wait() 66 | os.chdir(current_dir) 67 | 68 | 69 | 70 | def prs(*args): 71 | st = "" 72 | for s in args: 73 | st += str(s) 74 | print(st) 75 | 76 | 77 | class DotDict(dict): 78 | """dot.notation access to dictionary attributes (Thomas Robert)""" 79 | __getattr__ = dict.get 80 | __setattr__ = dict.__setitem__ 81 | __delattr__ = dict.__delitem__ 82 | 83 | 84 | def load_yaml(path): 85 | with open(path, 'r') as stream: 86 | opt = yaml.load(stream,Loader=yaml.Loader) 87 | return DotDict(opt) 88 | 89 | def write_yaml(file,dotdict): 90 | d=dict(dotdict) 91 | with open(file, 'w', encoding='utf8') as outfile: 92 | yaml.dump(d, outfile, default_flow_style=False, allow_unicode=True) 93 | 94 | global verbose 95 | verbose=2 96 | 97 | def printv(*o,p=0): 98 | if p