├── LICENSE
├── README.md
├── core
    ├── __init__.py
    ├── buffer.py
    ├── env_wrapper.py
    ├── genealogy.py
    ├── learner.py
    ├── mod_utils.py
    ├── models.py
    ├── neuroevolution.py
    ├── off_policy_algo.py
    ├── portfolio.py
    ├── runner.py
    └── ucb.py
└── main.py


/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DISCONTINUATION OF PROJECT #
 2 | This project will no longer be maintained by Intel.
 3 | Intel has ceased development and contributions including, but not limited to, maintenance, bug fixes, new releases, or updates, to this project.
 4 | Intel no longer accepts patches to this project.
 5 | ![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)
 6 | 
 7 | Codebase for [Collaborative Evolutionary Reinforcement Learning](https://arxiv.org/pdf/1905.00976.pdf) accepted to be published in the Proceedings of the 36th International Conference on Machine Learning, Long Beach, California, PMLR 97, 2019. Copyright 2019 by the author(s).
 8 | 
 9 | ## Guide to set up and run CERL Experiments
10 | 
11 | 
12 | 1. Setup Conda
13 |     - Install Anaconda3
14 |     - conda create -n $ENV_NAME$ python=3.6.1
15 |     - source activate $ENV_NAME$
16 | 
17 | 2. Install Pytorch version 1.0
18 |     - Refer to https://pytorch.org/ for instructions
19 |     - conda install pytorch torchvision -c pytorch [GPU-version]
20 | 
21 | 3. Install Numpy, Cython and Scipy
22 |     - pip install numpy==1.15.4
23 |     - pip install cython==0.29.2
24 |     - pip install scipy==1.1.0
25 |     
26 | 4. Install Mujoco and OpenAI_Gym
27 |     - Download mjpro150 from https://www.roboti.us/index.html
28 |     - Unzip mjpro150 and place it + mjkey.txt (license file) in ~/.mujoco/ (create the .mujoco dir in you home folder)
29 |     - pip install -U 'mujoco-py<1.50.2,>=1.50.1'
30 |     - pip install 'gym[all]'
31 |     
32 | ## Code labels
33 | 
34 | main.py: Main Script runs everything
35 | 
36 | core/runner.py: Rollout worker
37 | 
38 | core/ucb.py: Upper Confidence Bound implemented for learner selection by the resource-manager
39 | 
40 | core/portfolio.py: Portfolio of learners which can vary in their hyperparameters
41 | 
42 | core/learner.py: Learner agent encapsulating the algo and sum-statistics
43 | 
44 | core/buffer.py: Cyclic Replay buffer
45 | 
46 | core/env_wrapper.py: Wrapper around the Mujoco env
47 | 
48 | core/models.py: Actor/Critic model
49 | 
50 | core/neuroevolution.py: Implements Neuroevolution
51 | 
52 | core/off_policy_algo.py: Implements the off_policy_gradient learner TD3
53 | 
54 | core/mod_utils.py: Helper functions
55 | 
56 | ## Reproduce Results
57 | 
58 | python main.py -env HalfCheetah-v2 -portfolio {10,14} -total_steps 2 -seed {2018,2022}
59 | 
60 | python main.py -env Hopper-v2 -portfolio {10,14} -total_steps 1.5 -seed {2018,2022}
61 | 
62 | python main.py -env Humanoid-v2 -portfolio {10,14} -total_steps 1 -seed {2018,2022}
63 | 
64 | python main.py -env Walker2d-v2 -portfolio {10,14} -total_steps 2 -seed {2018,2022}
65 | 
66 | python main.py -env Swimmer-v2 -portfolio {10,14} -total_steps 2 -seed {2018,2022}
67 | 
68 | python main.py -env Hopper-v2 -portfolio {100,102} -total_steps 5 -seed {2018,2022}
69 | 
70 | where {} represents an inclusive discrete range: {10, 14} --> {10, 11, 12, 13, 14}
71 | 
72 | 
73 | ## Note
74 | All roll-outs (evaluation of actors in the evolutionary population and the explorative roll-outs 
75 | conducted by the learners run in parallel). They are farmed out to different CPU cores, 
76 | and write asynchronously to the collective replay buffer. Thus, slight variations in results 
77 | are observed even with the same seed. 
78 | 


--------------------------------------------------------------------------------
/core/__init__.py:
--------------------------------------------------------------------------------
 1 | # ******************************************************************************
 2 | # Copyright 2019 Intel Corporation
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ******************************************************************************
16 | 


--------------------------------------------------------------------------------
/core/buffer.py:
--------------------------------------------------------------------------------
  1 | # ******************************************************************************
  2 | # Copyright 2019 Intel Corporation
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ******************************************************************************
 16 | 
 17 | import numpy as np
 18 | import random
 19 | import torch
 20 | from torch.multiprocessing import Manager
 21 | 
 22 | 
 23 | class Buffer():
 24 | 	"""Cyclic Buffer stores experience tuples from the rollouts
 25 | 		Parameters:
 26 | 			capacity (int): Maximum number of experiences to hold in cyclic buffer
 27 | 		"""
 28 | 
 29 | 	def __init__(self, capacity, buffer_gpu):
 30 | 		self.capacity = capacity; self.buffer_gpu = buffer_gpu; self.counter = 0
 31 | 		self.manager = Manager()
 32 | 		self.tuples = self.manager.list() #Temporary shared buffer to get experiences from processes
 33 | 		self.s = []; self.ns = []; self.a = []; self.r = []; self.done = []
 34 | 
 35 | 		# Temporary tensors that cane be loaded in GPU for fast sampling during gradient updates (updated each gen) --> Faster sampling - no need to cycle experiences in and out of gpu 1000 times
 36 | 		self.sT = None; self.nsT = None; self.aT = None; self.rT = None; self.doneT = None
 37 | 
 38 | 
 39 | 	def referesh(self):
 40 | 		"""Housekeeping
 41 | 			Parameters:
 42 | 				None
 43 | 			Returns:
 44 | 				None
 45 | 		"""
 46 | 
 47 | 		# Add ALL EXPERIENCE COLLECTED TO MEMORY concurrently
 48 | 		for _ in range(len(self.tuples)):
 49 | 			exp = self.tuples.pop()
 50 | 			self.s.append(exp[0])
 51 | 			self.ns.append(exp[1])
 52 | 			self.a.append(exp[2])
 53 | 			self.r.append(exp[3])
 54 | 			self.done.append(exp[4])
 55 | 
 56 | 
 57 | 		#Trim to make the buffer size < capacity
 58 | 		while self.__len__() > self.capacity:
 59 | 			self.s.pop(0); self.ns.pop(0); self.a.pop(0); self.r.pop(0); self.done.pop(0)
 60 | 
 61 | 
 62 | 	def __len__(self):
 63 | 		return len(self.s)
 64 | 
 65 | 	def sample(self, batch_size):
 66 | 		"""Sample a batch of experiences from memory with uniform probability
 67 | 			   Parameters:
 68 | 				   batch_size (int): Size of the batch to sample
 69 | 			   Returns:
 70 | 				   Experience (tuple): A tuple of (state, next_state, action, shaped_reward, done) each as a numpy array with shape (batch_size, :)
 71 | 		   """
 72 | 		ind = random.sample(range(len(self.s)), batch_size)
 73 | 
 74 | 		return self.sT[ind], self.nsT[ind], self.aT[ind], self.rT[ind], self.doneT[ind]
 75 | 		#return np.vstack([self.s[i] for i in ind]), np.vstack([self.ns[i] for i in ind]), np.vstack([self.a[i] for i in ind]), np.vstack([self.r[i] for i in ind]), np.vstack([self.done[i] for i in ind])
 76 | 
 77 | 
 78 | 	def tensorify(self):
 79 | 		"""Method to save experiences to drive
 80 | 			   Parameters:
 81 | 				   None
 82 | 			   Returns:
 83 | 				   None
 84 | 		   """
 85 | 		self.referesh() #Referesh first
 86 | 
 87 | 		self.sT = torch.tensor(np.vstack(self.s))
 88 | 		self.nsT = torch.tensor(np.vstack(self.ns))
 89 | 		self.aT = torch.tensor(np.vstack(self.a))
 90 | 		self.rT = torch.tensor(np.vstack(self.r))
 91 | 		self.doneT = torch.tensor(np.vstack(self.done))
 92 | 		if self.buffer_gpu:
 93 | 			self.sT = self.sT.cuda()
 94 | 			self.nsT = self.nsT.cuda()
 95 | 			self.aT = self.aT.cuda()
 96 | 			self.rT = self.rT.cuda()
 97 | 			self.doneT = self.doneT.cuda()
 98 | 
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/core/env_wrapper.py:
--------------------------------------------------------------------------------
 1 | # ******************************************************************************
 2 | # Copyright 2019 Intel Corporation
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ******************************************************************************
16 | 
17 | import gym
18 | 
19 | 
20 | class EnvironmentWrapper:
21 | 	"""Wrapper around the Environment to expose a cleaner interface for RL
22 | 
23 | 		Parameters:
24 | 			env_name (str): Env name
25 | 
26 | 
27 | 	"""
28 | 	def __init__(self, env_name, ALGO):
29 | 		"""
30 | 		A base template for all environment wrappers.
31 | 		"""
32 | 		self.env = gym.make(env_name)
33 | 		self.action_low = float(self.env.action_space.low[0])
34 | 		self.action_high = float(self.env.action_space.high[0])
35 | 		self.ALGO = ALGO
36 | 
37 | 
38 | 
39 | 
40 | 	def reset(self):
41 | 		"""Method overloads reset
42 | 			Parameters:
43 | 				None
44 | 
45 | 			Returns:
46 | 				next_obs (list): Next state
47 | 		"""
48 | 		return self.env.reset()
49 | 
50 | 
51 | 	def step(self, action: object): #Expects a numpy action
52 | 		"""Take an action to forward the simulation
53 | 
54 | 			Parameters:
55 | 				action (ndarray): action to take in the env
56 | 
57 | 			Returns:
58 | 				next_obs (list): Next state
59 | 				reward (float): Reward for this step
60 | 				done (bool): Simulation done?
61 | 				info (None): Template from OpenAi gym (doesnt have anything)
62 | 		"""
63 | 
64 | 		action = self.action_low + action * (self.action_high - self.action_low)
65 | 		return self.env.step(action)
66 | 
67 | 	def render(self):
68 | 		self.env.render()
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/core/genealogy.py:
--------------------------------------------------------------------------------
 1 | # ******************************************************************************
 2 | # Copyright 2019 Intel Corporation
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ******************************************************************************
16 | 
17 | from copy import deepcopy
18 | 
19 | 
20 | class Info():
21 | 	def __init__(self, origin):
22 | 		self.origin = origin
23 | 		self.history = [origin]
24 | 		self.crossover = []
25 | 		self.num_mut = 0.0
26 | 
27 | 	def reset(self):
28 | 		self.history = []
29 | 		self.crossover = []
30 | 		self.num_mut = 0.0
31 | 
32 | 
33 | 
34 | 
35 | 
36 | class Genealogy():
37 | 	def __init__(self):
38 | 		self.wwid_counter = 0
39 | 		self.tree = {}
40 | 
41 | 	def new_id(self, origin):
42 | 		wwid = self.wwid_counter + 1
43 | 		self.wwid_counter += 1
44 | 		self.tree[wwid] = Info(origin)
45 | 		return wwid
46 | 
47 | 
48 | 	def mutation(self, wwid, gen):
49 | 		self.tree[wwid].history.append('mut_'+str(gen))
50 | 
51 | 	def elite(self, wwid, gen):
52 | 		self.tree[wwid].history.append('elite_' + str(gen))
53 | 
54 | 	######### INHERITANCE OPS ###########
55 | 	def crossover(self, parent1, parent2, gen):
56 | 		origin =  'crossover_' + str(gen)
57 | 		wwid = self.wwid_counter + 1
58 | 		self.wwid_counter += 1
59 | 		self.tree[wwid] = Info(origin)
60 | 		return wwid
61 | 
62 | 	def asexual(self, parent):
63 | 		wwid = self.wwid_counter + 1
64 | 		self.wwid_counter += 1
65 | 		self.tree[wwid] = deepcopy(self.tree[parent])
66 | 		return wwid
67 | 
68 | 


--------------------------------------------------------------------------------
/core/learner.py:
--------------------------------------------------------------------------------
 1 | # ******************************************************************************
 2 | # Copyright 2019 Intel Corporation
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ******************************************************************************
16 | 
17 | from core.off_policy_algo import Off_Policy_Algo
18 | 
19 | 
20 | 
21 | 
22 | class Learner:
23 | 	"""Learner object encapsulating a local learner
24 | 
25 | 		Parameters:
26 | 		algo_name (str): Algorithm Identifier
27 | 		state_dim (int): State size
28 | 		action_dim (int): Action size
29 | 		actor_lr (float): Actor learning rate
30 | 		critic_lr (float): Critic learning rate
31 | 		gamma (float): DIscount rate
32 | 		tau (float): Target network sync generate
33 | 		init_w (bool): Use kaimling normal to initialize?
34 | 		**td3args (**kwargs): arguments for TD3 algo
35 | 
36 | 
37 | 	"""
38 | 
39 | 	def __init__(self, wwid, algo_name, state_dim, action_dim, actor_lr, critic_lr, gamma, tau, init_w = True, **td3args):
40 | 		self.td3args = td3args; self.id = id
41 | 		self.algo = Off_Policy_Algo(wwid, algo_name, state_dim, action_dim, actor_lr, critic_lr, gamma, tau, init_w)
42 | 
43 | 
44 | 		#LEARNER STATISTICS
45 | 		self.fitnesses = []
46 | 		self.ep_lens = []
47 | 		self.value = None
48 | 		self.visit_count = 0
49 | 
50 | 
51 | 	def update_parameters(self, replay_buffer, buffer_gpu, batch_size, iterations):
52 | 		for _ in range(iterations):
53 | 			s, ns, a, r, done = replay_buffer.sample(batch_size)
54 | 			if not buffer_gpu:
55 | 				s = s.cuda(); ns = ns.cuda(); a = a.cuda(); r = r.cuda(); done = done.cuda()
56 | 			self.algo.update_parameters(s, ns, a, r, done, 1, **self.td3args)
57 | 
58 | 
59 | 	def update_stats(self, fitness, ep_len, gamma=0.2):
60 | 		self.visit_count += 1
61 | 		self.fitnesses.append(fitness)
62 | 		self.ep_lens.append(ep_len)
63 | 
64 | 		if self.value == None: self.value = fitness
65 | 		else: self.value = gamma * fitness + (1-gamma) * self.value
66 | 


--------------------------------------------------------------------------------
/core/mod_utils.py:
--------------------------------------------------------------------------------
  1 | # ******************************************************************************
  2 | # Copyright 2019 Intel Corporation
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ******************************************************************************
 16 | 
 17 | from torch import nn
 18 | from torch.autograd import Variable
 19 | import random, pickle, copy, argparse
 20 | import numpy as np, torch, os
 21 | 
 22 | class Tracker(): #Tracker
 23 |     """Tracker class to log progress and save metrics periodically
 24 | 
 25 |     Parameters:
 26 |         save_folder (str): Folder name for saving progress
 27 |         vars_string (list): List of metric names to log
 28 |         project_string: (str): String decorator for metric filenames
 29 | 
 30 |     Returns:
 31 |         None
 32 |     """
 33 | 
 34 |     def __init__(self, save_folder, vars_string, project_string):
 35 |         self.vars_string = vars_string; self.project_string = project_string
 36 |         self.foldername = save_folder
 37 |         self.all_tracker = [[[],0.0,[]] for _ in vars_string] #[Id of var tracked][fitnesses, avg_fitness, csv_fitnesses]
 38 |         self.counter = 0
 39 |         self.conv_size = 1
 40 |         if not os.path.exists(self.foldername):
 41 |             os.makedirs(self.foldername)
 42 | 
 43 | 
 44 |     def update(self, updates, generation):
 45 |         """Add a metric observed
 46 | 
 47 |         Parameters:
 48 |             updates (list): List of new scoresfor each tracked metric
 49 |             generation (int): Current gen
 50 | 
 51 |         Returns:
 52 |             None
 53 |         """
 54 | 
 55 |         self.counter += 1
 56 |         for update, var in zip(updates, self.all_tracker):
 57 |             if update == None: continue
 58 |             var[0].append(update)
 59 | 
 60 |         #Constrain size of convolution
 61 |         for var in self.all_tracker:
 62 |             if len(var[0]) > self.conv_size: var[0].pop(0)
 63 | 
 64 |         #Update new average
 65 |         for var in self.all_tracker:
 66 |             if len(var[0]) == 0: continue
 67 |             var[1] = sum(var[0])/float(len(var[0]))
 68 | 
 69 |         if self.counter % 1 == 0:  # Save to csv file
 70 |             for i, var in enumerate(self.all_tracker):
 71 |                 if len(var[0]) == 0: continue
 72 |                 var[2].append(np.array([generation, var[1]]))
 73 |                 filename = self.foldername + self.vars_string[i] + self.project_string
 74 |                 np.savetxt(filename, np.array(var[2]), fmt='%.3f', delimiter=',')
 75 | 
 76 | 
 77 | def str2bool(v):
 78 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 79 |         return True
 80 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
 81 |         return False
 82 |     else:
 83 |         raise argparse.ArgumentTypeError('Boolean value expected.')
 84 | 
 85 | 
 86 | def hard_update(target, source):
 87 |     """Hard update (clone) from target network to source
 88 | 
 89 |         Parameters:
 90 |               target (object): A pytorch model
 91 |               source (object): A pytorch model
 92 | 
 93 |         Returns:
 94 |             None
 95 |     """
 96 | 
 97 |     for target_param, param in zip(target.parameters(), source.parameters()):
 98 |         target_param.data.copy_(param.data)
 99 | 
100 |     #Signature transfer if applicable
101 |     try:
102 |         target.wwid[0] = source.wwid[0]
103 |     except:
104 | 	    None
105 | 
106 | 
107 | def soft_update(target, source, tau):
108 |     """Soft update from target network to source
109 | 
110 |         Parameters:
111 |               target (object): A pytorch model
112 |               source (object): A pytorch model
113 |               tau (float): Tau parameter
114 | 
115 |         Returns:
116 |             None
117 | 
118 |     """
119 | 
120 |     for target_param, param in zip(target.parameters(), source.parameters()):
121 |         target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau)
122 | 
123 | 
124 | def to_numpy(var):
125 |     """Tensor --> numpy
126 | 
127 |     Parameters:
128 |         var (tensor): tensor
129 | 
130 |     Returns:
131 |         var (ndarray): ndarray
132 |     """
133 |     return var.data.numpy()
134 | 
135 | def to_tensor(ndarray, volatile=False, requires_grad=False):
136 |     """numpy --> Variable
137 | 
138 |     Parameters:
139 |         ndarray (ndarray): ndarray
140 |         volatile (bool): create a volatile tensor?
141 |         requires_grad (bool): tensor requires gradients?
142 | 
143 |     Returns:
144 |         var (variable): variable
145 |     """
146 | 
147 |     if isinstance(ndarray, list): ndarray = np.array(ndarray)
148 |     return Variable(torch.from_numpy(ndarray).float(), volatile=volatile, requires_grad=requires_grad)
149 | 
150 | def pickle_obj(filename, object):
151 |     """Pickle object
152 | 
153 |     Parameters:
154 |         filename (str): folder to dump pickled object
155 |         object (object): object to pickle
156 | 
157 |     Returns:
158 |         None
159 |     """
160 | 
161 |     handle = open(filename, "wb")
162 |     pickle.dump(object, handle)
163 | 
164 | def unpickle_obj(filename):
165 |     """Unpickle object from disk
166 | 
167 |     Parameters:
168 |         filename (str): file from which to load and unpickle object
169 | 
170 |     Returns:
171 |         obj (object): unpickled object
172 |     """
173 |     with open(filename, 'rb') as f:
174 |         return pickle.load(f)
175 | 
176 | def init_weights(m):
177 |     """Initialize weights using kaiming uniform initialization in place
178 | 
179 |     Parameters:
180 |         m (nn.module): Linear module from torch.nn
181 | 
182 |     Returns:
183 |         None
184 |     """
185 |     if type(m) == nn.Linear:
186 |         nn.init.kaiming_uniform_(m.weight)
187 |         m.bias.data.fill_(0.01)
188 | 
189 | 
190 | def list_mean(l):
191 |     """compute avergae from a list
192 | 
193 |     Parameters:
194 |         l (list): list
195 | 
196 |     Returns:
197 |         mean (float): mean
198 |     """
199 |     if len(l) == 0: return None
200 |     else: return sum(l)/len(l)
201 | 
202 | def pprint(l):
203 |     """Pretty print
204 | 
205 |     Parameters:
206 |         l (list/float/None): object to print
207 | 
208 |     Returns:
209 |         pretty print str
210 |     """
211 | 
212 |     if isinstance(l, list):
213 |         if len(l) == 0: return None
214 |     else:
215 |         if l == None: return None
216 |         else: return '%.2f'%l
217 | 
218 | 
219 | 
220 | 
221 | def flatten(d):
222 |     """Recursive method to flatten a dict -->list
223 | 
224 |         Parameters:
225 |             d (dict): dict
226 | 
227 |         Returns:
228 |             l (list)
229 |     """
230 | 
231 |     res = []  # Result list
232 |     if isinstance(d, dict):
233 |         for key, val in sorted(d.items()):
234 |             res.extend(flatten(val))
235 |     elif isinstance(d, list):
236 |         res = d
237 |     else:
238 |         res = [d]
239 |     return res
240 | 
241 | def reverse_flatten(d, l):
242 |     """Recursive method to unflatten a list -->dict [Reverse of flatten] in place
243 | 
244 |         Parameters:
245 |             d (dict): dict
246 |             l (list): l
247 | 
248 |         Returns:
249 |             None
250 |     """
251 | 
252 |     if isinstance(d, dict):
253 |         for key, _ in sorted(d.items()):
254 | 
255 |             #FLoat is immutable so
256 |             if isinstance(d[key], float):
257 |                 d[key] = l[0]
258 |                 l[:] = l[1:]
259 |                 continue
260 | 
261 |             reverse_flatten(d[key], l)
262 |     elif isinstance(d, list):
263 |         d[:] = l[0:len(d)]
264 |         l[:] = l[len(d):]
265 | 
266 | 
267 | def load_all_models_dir(dir, model_template):
268 |     """Load all models from a given directory onto a template
269 | 
270 |         Parameters:
271 |             dir (str): directory
272 |             model_template (object): Class template to load the objects onto
273 | 
274 |         Returns:
275 |             models (list): list of loaded objects
276 |     """
277 | 
278 |     list_files = os.listdir(dir)
279 |     print(list_files)
280 |     models = []
281 |     for i, fname in enumerate(list_files):
282 |         try:
283 |             model_template.load_state_dict(torch.load(dir + fname))
284 |             model_template.eval()
285 |             models.append(copy.deepcopy(model_template))
286 |         except:
287 |             print(fname, 'failed to load')
288 |     return models
289 | 
290 | 
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 


--------------------------------------------------------------------------------
/core/models.py:
--------------------------------------------------------------------------------
  1 | # ******************************************************************************
  2 | # Copyright 2019 Intel Corporation
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ******************************************************************************
 16 | 
 17 | import torch
 18 | import torch.nn as nn
 19 | import torch.nn.functional as F
 20 | from torch.distributions import Normal
 21 | 
 22 | 
 23 | class Actor(nn.Module):
 24 |     """Actor model
 25 | 
 26 |         Parameters:
 27 |               args (object): Parameter class
 28 |     """
 29 | 
 30 |     def __init__(self, state_dim, action_dim, wwid):
 31 |         super(Actor, self).__init__()
 32 | 
 33 |         self.wwid = torch.Tensor([wwid])
 34 |         l1 = 400; l2 = 300
 35 | 
 36 |         # Construct Hidden Layer 1
 37 |         self.f1 = nn.Linear(state_dim, l1)
 38 |         self.ln1 = nn.LayerNorm(l1)
 39 | 
 40 |         #Hidden Layer 2
 41 |         self.f2 = nn.Linear(l1, l2)
 42 |         self.ln2 = nn.LayerNorm(l2)
 43 | 
 44 |         #Out
 45 |         self.w_out = nn.Linear(l2, action_dim)
 46 | 
 47 |     def forward(self, input):
 48 |         """Method to forward propagate through the actor's graph
 49 | 
 50 |             Parameters:
 51 |                   input (tensor): states
 52 | 
 53 |             Returns:
 54 |                   action (tensor): actions
 55 | 
 56 | 
 57 |         """
 58 |         #Hidden Layer 1
 59 |         out = F.elu(self.f1(input))
 60 |         out = self.ln1(out)
 61 | 
 62 |         #Hidden Layer 2
 63 |         out = F.elu(self.f2(out))
 64 |         out = self.ln2(out)
 65 | 
 66 |         #Out
 67 |         return torch.sigmoid(self.w_out(out))
 68 | 
 69 | 
 70 | class Critic(nn.Module):
 71 | 
 72 |     """Critic model
 73 | 
 74 |         Parameters:
 75 |               args (object): Parameter class
 76 | 
 77 |     """
 78 | 
 79 |     def __init__(self, state_dim, action_dim):
 80 |         super(Critic, self).__init__()
 81 |         l1 = 400; l2 = 300
 82 | 
 83 |         ######################## Q1 Head ##################
 84 |         # Construct Hidden Layer 1 with state
 85 |         self.q1f1 = nn.Linear(state_dim + action_dim, l1)
 86 |         self.q1ln1 = nn.LayerNorm(l1)
 87 | 
 88 |         #Hidden Layer 2
 89 |         self.q1f2 = nn.Linear(l1, l2)
 90 |         self.q1ln2 = nn.LayerNorm(l2)
 91 | 
 92 |         #Out
 93 |         self.q1out = nn.Linear(l2, 1)
 94 | 
 95 | 
 96 |         ######################## Q2 Head ##################
 97 |         # Construct Hidden Layer 1 with state
 98 |         self.q2f1 = nn.Linear(state_dim + action_dim, l1)
 99 |         self.q2ln1 = nn.LayerNorm(l1)
100 | 
101 |         #Hidden Layer 2
102 |         self.q2f2 = nn.Linear(l1, l2)
103 |         self.q2ln2 = nn.LayerNorm(l2)
104 | 
105 |         #Out
106 |         self.q2out = nn.Linear(l2, 1)
107 | 
108 |         ######################## Value Head ##################  [NOT USED IN CERL]
109 |         # Construct Hidden Layer 1 with
110 |         self.vf1 = nn.Linear(state_dim, l1)
111 |         self.vln1 = nn.LayerNorm(l1)
112 | 
113 |         # Hidden Layer 2
114 |         self.vf2 = nn.Linear(l1, l2)
115 |         self.vln2 = nn.LayerNorm(l2)
116 | 
117 |         # Out
118 |         self.vout = nn.Linear(l2, 1)
119 | 
120 | 
121 | 
122 | 
123 | 
124 |     def forward(self, obs, action):
125 |         """Method to forward propagate through the critic's graph
126 | 
127 |              Parameters:
128 |                    input (tensor): states
129 |                    input (tensor): actions
130 | 
131 |              Returns:
132 |                    Q1 (tensor): Qval 1
133 |                    Q2 (tensor): Qval 2
134 |                    V (tensor): Value
135 | 
136 | 
137 | 
138 |          """
139 | 
140 |         #Concatenate observation+action as critic state
141 |         state = torch.cat([obs, action], 1)
142 | 
143 |         ###### Q1 HEAD ####
144 |         q1 = F.elu(self.q1f1(state))
145 |         q1 = self.q1ln1(q1)
146 |         q1 = F.elu(self.q1f2(q1))
147 |         q1 = self.q1ln2(q1)
148 |         q1 = self.q1out(q1)
149 | 
150 |         ###### Q2 HEAD ####
151 |         q2 = F.elu(self.q2f1(state))
152 |         q2 = self.q2ln1(q2)
153 |         q2 = F.elu(self.q2f2(q2))
154 |         q2 = self.q2ln2(q2)
155 |         q2 = self.q2out(q2)
156 | 
157 |         ###### Value HEAD ####
158 |         v = F.elu(self.vf1(obs))
159 |         v = self.vln1(v)
160 |         v = F.elu(self.vf2(v))
161 |         v = self.vln2(v)
162 |         v = self.vout(v)
163 | 
164 | 
165 |         return q1, q2, v
166 | 
167 | 
168 | 
169 | # Initialize weights
170 | def weights_init(m):
171 |     classname = m.__class__.__name__
172 |     if classname.find('Linear') != -1:
173 |         torch.nn.init.xavier_uniform_(m.weight)
174 | 
175 | 


--------------------------------------------------------------------------------
/core/neuroevolution.py:
--------------------------------------------------------------------------------
  1 | # ******************************************************************************
  2 | # Copyright 2019 Intel Corporation
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ******************************************************************************
 16 | 
 17 | import random
 18 | import numpy as np
 19 | import math
 20 | import core.mod_utils as utils
 21 | 
 22 | 
 23 | 
 24 | class SSNE:
 25 | 	"""Neuroevolution object that contains all then method to run SUb-structure based Neuroevolution (SSNE)
 26 | 
 27 | 		Parameters:
 28 | 			  args (object): parameter class
 29 | 
 30 | 
 31 | 	"""
 32 | 
 33 | 	def __init__(self, args):
 34 | 		self.gen = 0
 35 | 		self.args = args;
 36 | 		self.population_size = self.args.pop_size;
 37 | 		#RL TRACKERS
 38 | 		self.rl_sync_pool = []; self.all_offs = []; self.rl_res = {"elites":0.0, 'selects': 0.0, 'discarded':0.0}; self.num_rl_syncs = 0.0001
 39 | 		self.lineage = [0.0 for _ in range(self.population_size)]; self.lineage_depth = 10
 40 | 
 41 | 	def selection_tournament(self, index_rank, num_offsprings, tournament_size):
 42 | 		"""Conduct tournament selection
 43 | 
 44 | 			Parameters:
 45 | 				  index_rank (list): Ranking encoded as net_indexes
 46 | 				  num_offsprings (int): Number of offsprings to generate
 47 | 				  tournament_size (int): Size of tournament
 48 | 
 49 | 			Returns:
 50 | 				  offsprings (list): List of offsprings returned as a list of net indices
 51 | 
 52 | 		"""
 53 | 
 54 | 
 55 | 		total_choices = len(index_rank)
 56 | 		offsprings = []
 57 | 		for i in range(num_offsprings):
 58 | 			winner = np.min(np.random.randint(total_choices, size=tournament_size))
 59 | 			offsprings.append(index_rank[winner])
 60 | 
 61 | 		offsprings = list(set(offsprings))  # Find unique offsprings
 62 | 		if len(offsprings) % 2 != 0:  # Number of offsprings should be even
 63 | 			offsprings.append(index_rank[winner])
 64 | 		return offsprings
 65 | 
 66 | 	def list_argsort(self, seq):
 67 | 		"""Sort the list
 68 | 
 69 | 			Parameters:
 70 | 				  seq (list): list
 71 | 
 72 | 			Returns:
 73 | 				  sorted list
 74 | 
 75 | 		"""
 76 | 		return sorted(range(len(seq)), key=seq.__getitem__)
 77 | 
 78 | 	def regularize_weight(self, weight, mag):
 79 | 		"""Clamps on the weight magnitude (reguralizer)
 80 | 
 81 | 			Parameters:
 82 | 				  weight (float): weight
 83 | 				  mag (float): max/min value for weight
 84 | 
 85 | 			Returns:
 86 | 				  weight (float): clamped weight
 87 | 
 88 | 		"""
 89 | 		if weight > mag: weight = mag
 90 | 		if weight < -mag: weight = -mag
 91 | 		return weight
 92 | 
 93 | 	def crossover_inplace(self, gene1, gene2):
 94 | 		"""Conduct one point crossover in place
 95 | 
 96 | 			Parameters:
 97 | 				  gene1 (object): A pytorch model
 98 | 				  gene2 (object): A pytorch model
 99 | 
100 | 			Returns:
101 | 				None
102 | 
103 | 		"""
104 | 
105 | 
106 | 		keys1 =  list(gene1.state_dict())
107 | 		keys2 = list(gene2.state_dict())
108 | 
109 | 		for key in keys1:
110 | 			if key not in keys2: continue
111 | 
112 | 			# References to the variable tensors
113 | 			W1 = gene1.state_dict()[key]
114 | 			W2 = gene2.state_dict()[key]
115 | 
116 | 			if len(W1.shape) == 2: #Weights no bias
117 | 				num_variables = W1.shape[0]
118 | 				# Crossover opertation [Indexed by row]
119 | 				try: num_cross_overs = random.randint(0, int(num_variables * 0.3))  # Number of Cross overs
120 | 				except: num_cross_overs = 1
121 | 				for i in range(num_cross_overs):
122 | 					receiver_choice = random.random()  # Choose which gene to receive the perturbation
123 | 					if receiver_choice < 0.5:
124 | 						ind_cr = random.randint(0, W1.shape[0]-1)  #
125 | 						W1[ind_cr, :] = W2[ind_cr, :]
126 | 					else:
127 | 						ind_cr = random.randint(0, W1.shape[0]-1)  #
128 | 						W2[ind_cr, :] = W1[ind_cr, :]
129 | 
130 | 			elif len(W1.shape) == 1: #Bias or LayerNorm
131 | 				if random.random() <0.8: continue #Crossover here with low frequency
132 | 				num_variables = W1.shape[0]
133 | 				# Crossover opertation [Indexed by row]
134 | 				#num_cross_overs = random.randint(0, int(num_variables * 0.05))  # Crossover number
135 | 				for i in range(1):
136 | 					receiver_choice = random.random()  # Choose which gene to receive the perturbation
137 | 					if receiver_choice < 0.5:
138 | 						ind_cr = random.randint(0, W1.shape[0]-1)  #
139 | 						W1[ind_cr] = W2[ind_cr]
140 | 					else:
141 | 						ind_cr = random.randint(0, W1.shape[0]-1)  #
142 | 						W2[ind_cr] = W1[ind_cr]
143 | 
144 | 	def mutate_inplace(self, gene):
145 | 		"""Conduct mutation in place
146 | 
147 | 			Parameters:
148 | 				  gene (object): A pytorch model
149 | 
150 | 			Returns:
151 | 				None
152 | 
153 | 		"""
154 | 		mut_strength = 0.02
155 | 		num_mutation_frac = 0.03
156 | 		super_mut_strength = 1.0
157 | 		super_mut_prob = 0.1
158 | 		reset_prob = super_mut_prob + 0.1
159 | 
160 | 		num_params = len(list(gene.parameters()))
161 | 		ssne_probabilities = np.random.uniform(0, 1, num_params) * 2
162 | 
163 | 		for i, param in enumerate(gene.parameters()):  # Mutate each param
164 | 
165 | 			# References to the variable keys
166 | 			W = param.data
167 | 			if len(W.shape) == 2:  # Weights, no bias
168 | 
169 | 				num_weights = W.shape[0] * W.shape[1]
170 | 				ssne_prob = ssne_probabilities[i]
171 | 
172 | 				if random.random() < ssne_prob:
173 | 					num_mutations = random.randint(0,
174 | 						int(math.ceil(num_mutation_frac * num_weights)))  # Number of mutation instances
175 | 					for _ in range(num_mutations):
176 | 						ind_dim1 = random.randint(0, W.shape[0]-1)
177 | 						ind_dim2 = random.randint(0, W.shape[-1]-1)
178 | 						random_num = random.random()
179 | 
180 | 						if random_num < super_mut_prob:  # Super Mutation probability
181 | 							W[ind_dim1, ind_dim2] += random.gauss(0, super_mut_strength * W[ind_dim1, ind_dim2])
182 | 						elif random_num < reset_prob:  # Reset probability
183 | 							W[ind_dim1, ind_dim2] = random.gauss(0, 0.1)
184 | 						else:  # mutauion even normal
185 | 							W[ind_dim1, ind_dim2] += random.gauss(0, mut_strength * W[ind_dim1, ind_dim2])
186 | 
187 | 						# Regularization hard limit
188 | 						W[ind_dim1, ind_dim2] = self.regularize_weight(W[ind_dim1, ind_dim2],
189 | 																	   self.args.weight_magnitude_limit)
190 | 
191 | 			elif len(W.shape) == 1:  # Bias or layernorm
192 | 				num_weights = W.shape[0]
193 | 				ssne_prob = ssne_probabilities[i]*0.04 #Low probability of mutation here
194 | 
195 | 				if random.random() < ssne_prob:
196 | 					num_mutations = random.randint(0,
197 | 						int(math.ceil(num_mutation_frac * num_weights)))  # Number of mutation instances
198 | 					for _ in range(num_mutations):
199 | 						ind_dim = random.randint(0, W.shape[0]-1)
200 | 						random_num = random.random()
201 | 
202 | 						if random_num < super_mut_prob:  # Super Mutation probability
203 | 							W[ind_dim] += random.gauss(0, super_mut_strength * W[ind_dim])
204 | 						elif random_num < reset_prob:  # Reset probability
205 | 							W[ind_dim] = random.gauss(0, 1)
206 | 						else:  # mutauion even normal
207 | 							W[ind_dim] += random.gauss(0, mut_strength * W[ind_dim])
208 | 
209 | 						# Regularization hard limit
210 | 						W[ind_dim] = self.regularize_weight(W[ind_dim], self.args.weight_magnitude_limit)
211 | 
212 | 
213 | 
214 | 	def reset_genome(self, gene):
215 | 		"""Reset a model's weights in place
216 | 
217 | 			Parameters:
218 | 				  gene (object): A pytorch model
219 | 
220 | 			Returns:
221 | 				None
222 | 
223 | 		"""
224 | 		for param in (gene.parameters()):
225 | 			param.data.copy_(param.data)
226 | 
227 | 	def epoch(self, gen, genealogy, pop, net_inds, fitness_evals, migration):
228 | 		"""Method to implement a round of selection and mutation operation
229 | 
230 | 			Parameters:
231 | 				  pop (shared_list): Population of models
232 | 				  net_inds (list): Indices of individuals evaluated this generation
233 | 				  fitness_evals (list): Fitness values for evaluated individuals
234 | 				  **migration (object): Policies from learners to be synced into population
235 | 
236 | 			Returns:
237 | 				None
238 | 
239 | 		"""
240 | 
241 | 		self.gen+= 1; num_elitists = int(self.args.elite_fraction * len(fitness_evals))
242 | 		if num_elitists < 2: num_elitists = 2
243 | 
244 | 
245 | 		# Entire epoch is handled with indices; Index rank nets by fitness evaluation (0 is the best after reversing)
246 | 		index_rank = self.list_argsort(fitness_evals); index_rank.reverse()
247 | 		elitist_index = index_rank[:num_elitists]  # Elitist indexes safeguard
248 | 
249 | 		# Selection step
250 | 		offsprings = self.selection_tournament(index_rank, num_offsprings=len(index_rank) - len(elitist_index) - len(migration), tournament_size=3)
251 | 
252 | 		#Transcripe ranked indexes from now on to refer to net indexes
253 | 		elitist_index = [net_inds[i] for i in elitist_index]
254 | 		offsprings = [net_inds[i] for i in offsprings]
255 | 
256 | 		#Figure out unselected candidates
257 | 		unselects = []; new_elitists = []
258 | 		for net_i in net_inds:
259 | 			if net_i in offsprings or net_i in elitist_index:
260 | 				continue
261 | 			else:
262 | 				unselects.append(net_i)
263 | 		random.shuffle(unselects)
264 | 
265 | 		#Inheritance step (sync learners to population)
266 | 		for policy in migration:
267 | 			replacee = unselects.pop(0)
268 | 			utils.hard_update(target=pop[replacee], source=policy)
269 | 			wwid = genealogy.asexual(int(policy.wwid.item()))
270 | 			pop[replacee].wwid[0] = wwid
271 | 
272 | 		# Elitism step, assigning elite candidates to some unselects
273 | 		for i in elitist_index:
274 | 			try: replacee = unselects.pop(0)
275 | 			except: replacee = offsprings.pop(0)
276 | 			new_elitists.append(replacee)
277 | 			utils.hard_update(target=pop[replacee], source=pop[i])
278 | 			wwid = genealogy.asexual(int(pop[i].wwid.item()))
279 | 			pop[replacee].wwid[0] = wwid
280 | 			genealogy.elite(wwid, gen)
281 | 
282 | 			#self.lineage[replacee] = self.lineage[i]
283 | 
284 | 		# Crossover for unselected genes with 100 percent probability
285 | 		if len(unselects) % 2 != 0:  # Number of unselects left should be even
286 | 			unselects.append(unselects[random.randint(0, len(unselects)-1)])
287 | 		for i, j in zip(unselects[0::2], unselects[1::2]):
288 | 			off_i = random.choice(new_elitists);
289 | 			off_j = random.choice(offsprings)
290 | 			utils.hard_update(target=pop[i], source=pop[off_i])
291 | 			utils.hard_update(target=pop[j], source=pop[off_j])
292 | 			self.crossover_inplace(pop[i], pop[j])
293 | 			wwid1 = genealogy.crossover(int(pop[off_i].wwid.item()), int(pop[off_j].wwid.item()), gen)
294 | 			wwid2 = genealogy.crossover(int(pop[off_i].wwid.item()), int(pop[off_j].wwid.item()), gen)
295 | 			pop[i].wwid[0] = wwid1; pop[j].wwid[0] = wwid2
296 | 
297 | 			#self.lineage[i] = (self.lineage[off_i]+self.lineage[off_j])/2
298 | 			#self.lineage[j] = (self.lineage[off_i] + self.lineage[off_j]) / 2
299 | 
300 | 		# Crossover for selected offsprings
301 | 		for i, j in zip(offsprings[0::2], offsprings[1::2]):
302 | 			if random.random() < self.args.crossover_prob:
303 | 				self.crossover_inplace(pop[i], pop[j])
304 | 				wwid1 = genealogy.crossover(int(pop[i].wwid.item()), int(pop[j].wwid.item()), gen)
305 | 				wwid2 = genealogy.crossover(int(pop[i].wwid.item()), int(pop[j].wwid.item()), gen)
306 | 				pop[i].wwid[0] = wwid1; pop[j].wwid[0] = wwid2
307 | 
308 | 
309 | 		# Mutate all genes in the population except the new elitists
310 | 		for net_i in net_inds:
311 | 			if net_i not in new_elitists:  # Spare the new elitists
312 | 				if random.random() < self.args.mutation_prob:
313 | 					self.mutate_inplace(pop[net_i])
314 | 					genealogy.mutation(int(pop[net_i].wwid.item()), gen)
315 | 
316 | 
317 | 		self.all_offs[:] = offsprings[:]
318 | 
319 | 
320 | 
321 | 
322 | 
323 | 
324 | 
325 | 
326 | 


--------------------------------------------------------------------------------
/core/off_policy_algo.py:
--------------------------------------------------------------------------------
  1 | # ******************************************************************************
  2 | # Copyright 2019 Intel Corporation
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ******************************************************************************
 16 | 
 17 | import torch
 18 | import torch.nn as nn
 19 | from torch.optim import Adam
 20 | import torch.nn.functional as F
 21 | import numpy as np
 22 | from core import mod_utils as utils
 23 | from core.models import Actor, Critic
 24 | 
 25 | 
 26 | class Off_Policy_Algo(object):
 27 |     """Classes implementing TD3 and DDPG off-policy learners
 28 | 
 29 |          Parameters:
 30 |                args (object): Parameter class
 31 | 
 32 | 
 33 |      """
 34 |     def __init__(self, wwid, algo_name, state_dim, action_dim, actor_lr, critic_lr, gamma, tau, init_w = True):
 35 | 
 36 |         self.algo_name = algo_name; self.gamma = gamma; self.tau = tau
 37 | 
 38 |         #Initialize actors
 39 |         self.actor = Actor(state_dim, action_dim, wwid)
 40 |         if init_w: self.actor.apply(utils.init_weights)
 41 |         self.actor_target = Actor(state_dim, action_dim, wwid)
 42 |         utils.hard_update(self.actor_target, self.actor)
 43 |         self.actor_optim = Adam(self.actor.parameters(), actor_lr)
 44 | 
 45 | 
 46 |         self.critic = Critic(state_dim, action_dim)
 47 |         if init_w: self.critic.apply(utils.init_weights)
 48 |         self.critic_target = Critic(state_dim, action_dim)
 49 |         utils.hard_update(self.critic_target, self.critic)
 50 |         self.critic_optim = Adam(self.critic.parameters(), critic_lr)
 51 | 
 52 |         self.loss = nn.MSELoss()
 53 | 
 54 |         self.actor_target.cuda(); self.critic_target.cuda(); self.actor.cuda(); self.critic.cuda()
 55 |         self.num_critic_updates = 0
 56 | 
 57 |         #Statistics Tracker
 58 |         self.action_loss = {'min':[], 'max': [], 'mean':[], 'std':[]}
 59 |         self.policy_loss = {'min':[], 'max': [], 'mean':[], 'std':[]}
 60 |         self.critic_loss = {'mean':[]}
 61 |         self.q = {'min':[], 'max': [], 'mean':[], 'std':[]}
 62 |         self.val = {'min':[], 'max': [], 'mean':[], 'std':[]}
 63 | 
 64 |     def compute_stats(self, tensor, tracker):
 65 |         """Computes stats from intermediate tensors
 66 | 
 67 |              Parameters:
 68 |                    tensor (tensor): tensor
 69 |                    tracker (object): logger
 70 | 
 71 |              Returns:
 72 |                    None
 73 | 
 74 | 
 75 |          """
 76 |         tracker['min'].append(torch.min(tensor).item())
 77 |         tracker['max'].append(torch.max(tensor).item())
 78 |         tracker['mean'].append(torch.mean(tensor).item())
 79 |         tracker['mean'].append(torch.mean(tensor).item())
 80 | 
 81 |     def update_parameters(self, state_batch, next_state_batch, action_batch, reward_batch, done_batch, num_epoch=1, **kwargs):
 82 |         """Runs a step of Bellman upodate and policy gradient using a batch of experiences
 83 | 
 84 |              Parameters:
 85 |                   state_batch (tensor): Current States
 86 |                   next_state_batch (tensor): Next States
 87 |                   action_batch (tensor): Actions
 88 |                   reward_batch (tensor): Rewards
 89 |                   done_batch (tensor): Done batch
 90 |                   num_epoch (int): Number of learning iteration to run with the same data
 91 | 
 92 |              Returns:
 93 |                    None
 94 | 
 95 |          """
 96 | 
 97 |         if isinstance(state_batch, list): state_batch = torch.cat(state_batch); next_state_batch = torch.cat(next_state_batch); action_batch = torch.cat(action_batch); reward_batch = torch.cat(reward_batch). done_batch = torch.cat(done_batch)
 98 | 
 99 |         for _ in range(num_epoch):
100 |             ########### CRITIC UPDATE ####################
101 | 
102 |             #Compute next q-val, next_v and target
103 |             with torch.no_grad():
104 |                 #Policy Noise
105 |                 policy_noise = np.random.normal(0, kwargs['policy_noise'], (action_batch.size()[0], action_batch.size()[1]))
106 |                 policy_noise = torch.clamp(torch.Tensor(policy_noise), -kwargs['policy_noise_clip'], kwargs['policy_noise_clip'])
107 | 
108 |                 #Compute next action_bacth
109 |                 next_action_batch = self.actor_target.forward(next_state_batch) + policy_noise.cuda()
110 |                 next_action_batch = torch.clamp(next_action_batch, 0,1)
111 | 
112 |                 #Compute Q-val and value of next state masking by done
113 |                 q1, q2, _ = self.critic_target.forward(next_state_batch, next_action_batch)
114 |                 q1 = (1 - done_batch) * q1
115 |                 q2 = (1 - done_batch) * q2
116 | 
117 |                 #Select which q to use as next-q (depends on algo)
118 |                 if self.algo_name == 'TD3' or self.algo_name == 'TD3_actor_min': next_q = torch.min(q1, q2)
119 |                 elif self.algo_name == 'DDPG': next_q = q1
120 |                 elif self.algo_name == 'TD3_max': next_q = torch.max(q1, q2)
121 | 
122 |                 #Compute target q and target val
123 |                 target_q = reward_batch + (self.gamma * next_q)
124 | 
125 | 
126 |             self.critic_optim.zero_grad()
127 |             current_q1, current_q2, current_val = self.critic.forward((state_batch), (action_batch))
128 |             self.compute_stats(current_q1, self.q)
129 | 
130 |             dt = self.loss(current_q1, target_q)
131 | 
132 |             if self.algo_name == 'TD3' or self.algo_name == 'TD3_max': dt = dt + self.loss(current_q2, target_q)
133 |             self.critic_loss['mean'].append(dt.item())
134 | 
135 |             dt.backward()
136 | 
137 |             self.critic_optim.step()
138 |             self.num_critic_updates += 1
139 | 
140 | 
141 |             #Delayed Actor Update
142 |             if self.num_critic_updates % kwargs['policy_ups_freq'] == 0:
143 | 
144 |                 actor_actions = self.actor.forward(state_batch)
145 |                 Q1, Q2, val = self.critic.forward(state_batch, actor_actions)
146 | 
147 |                 # if self.args.use_advantage: policy_loss = -(Q1 - val)
148 |                 policy_loss = -Q1
149 | 
150 |                 self.compute_stats(policy_loss,self.policy_loss)
151 |                 policy_loss = policy_loss.mean()
152 | 
153 | 
154 |                 self.actor_optim.zero_grad()
155 | 
156 | 
157 | 
158 |                 policy_loss.backward(retain_graph=True)
159 |                 self.actor_optim.step()
160 | 
161 | 
162 |             if self.num_critic_updates % kwargs['policy_ups_freq'] == 0: utils.soft_update(self.actor_target, self.actor, self.tau)
163 |             utils.soft_update(self.critic_target, self.critic, self.tau)
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 


--------------------------------------------------------------------------------
/core/portfolio.py:
--------------------------------------------------------------------------------
  1 | # ******************************************************************************
  2 | # Copyright 2019 Intel Corporation
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ******************************************************************************
 16 | 
 17 | from core.learner import Learner
 18 | 
 19 | 
 20 | def initialize_portfolio(portfolio, args, genealogy, portfolio_id):
 21 | 	"""Portfolio of learners
 22 | 
 23 |         Parameters:
 24 |             portfolio (list): Incoming list
 25 |             args (object): param class
 26 | 
 27 |         Returns:
 28 |             portfolio (list): Portfolio of learners
 29 |     """
 30 | 
 31 | 
 32 | 	if portfolio_id == 10:
 33 | 		td3args = {'policy_noise': 0.2, 'policy_noise_clip': 0.5, 'policy_ups_freq': 2, 'action_low': args.action_low, 'action_high': args.action_high}
 34 | 
 35 | 		# Learner 1
 36 | 		wwid = genealogy.new_id('learner_1')
 37 | 		portfolio.append(
 38 | 			Learner(wwid, 'TD3', args.state_dim, args.action_dim, actor_lr=1e-3, critic_lr=1e-3, gamma=0.9, tau=5e-3,
 39 | 			        init_w=True, **td3args))
 40 | 
 41 | 		# Learner 3
 42 | 		wwid = genealogy.new_id('learner_3')
 43 | 		portfolio.append(
 44 | 			Learner(wwid, 'TD3', args.state_dim, args.action_dim, actor_lr=1e-3, critic_lr=1e-3, gamma=0.99, tau=5e-3,
 45 | 			        init_w=True, **td3args))
 46 | 
 47 | 		# Learner 4
 48 | 		wwid = genealogy.new_id('learner_4')
 49 | 		portfolio.append(
 50 | 			Learner(wwid, 'TD3', args.state_dim, args.action_dim, actor_lr=1e-3, critic_lr=1e-3, gamma=0.997, tau=5e-3,
 51 | 			        init_w=True, **td3args))
 52 | 
 53 | 		# Learner 4
 54 | 		wwid = genealogy.new_id('learner_4')
 55 | 		portfolio.append(
 56 | 			Learner(wwid, 'TD3', args.state_dim, args.action_dim, actor_lr=1e-3, critic_lr=1e-3, gamma=0.9995, tau=5e-3,
 57 | 			        init_w=True, **td3args))
 58 | 
 59 | 	if portfolio_id == 11:
 60 | 		td3args = {'policy_noise': 0.2, 'policy_noise_clip': 0.5, 'policy_ups_freq': 2, 'action_low': args.action_low, 'action_high': args.action_high}
 61 | 
 62 | 		# Learner 1
 63 | 		wwid = genealogy.new_id('learner_1')
 64 | 		portfolio.append(
 65 | 			Learner(wwid, 'TD3', args.state_dim, args.action_dim, actor_lr=1e-3, critic_lr=1e-3, gamma=0.9, tau=5e-3,
 66 | 			        init_w=True, **td3args))
 67 | 
 68 | 	if portfolio_id == 12:
 69 | 		td3args = {'policy_noise': 0.2, 'policy_noise_clip': 0.5, 'policy_ups_freq': 2, 'action_low': args.action_low, 'action_high': args.action_high}
 70 | 
 71 | 		# Learner 1
 72 | 		wwid = genealogy.new_id('learner_1')
 73 | 		portfolio.append(
 74 | 			Learner(wwid, 'TD3', args.state_dim, args.action_dim, actor_lr=1e-3, critic_lr=1e-3, gamma=0.99, tau=5e-3,
 75 | 			        init_w=True, **td3args))
 76 | 
 77 | 	if portfolio_id == 13:
 78 | 		td3args = {'policy_noise': 0.2, 'policy_noise_clip': 0.5, 'policy_ups_freq': 2, 'action_low': args.action_low, 'action_high': args.action_high}
 79 | 
 80 | 		# Learner 1
 81 | 		wwid = genealogy.new_id('learner_1')
 82 | 		portfolio.append(
 83 | 			Learner(wwid, 'TD3', args.state_dim, args.action_dim, actor_lr=1e-3, critic_lr=1e-3, gamma=0.997, tau=5e-3,
 84 | 			        init_w=True, **td3args))
 85 | 
 86 | 	if portfolio_id == 14:
 87 | 		td3args = {'policy_noise': 0.2, 'policy_noise_clip': 0.5, 'policy_ups_freq': 2, 'action_low': args.action_low, 'action_high': args.action_high}
 88 | 
 89 | 		# Learner 1
 90 | 		wwid = genealogy.new_id('learner_1')
 91 | 		portfolio.append(
 92 | 			Learner(wwid, 'TD3', args.state_dim, args.action_dim, actor_lr=1e-3, critic_lr=1e-3, gamma=0.9995, tau=5e-3,
 93 | 			        init_w=True, **td3args))
 94 | 
 95 | 
 96 | 
 97 | 	##############MOTIVATING EXAMPLE #######
 98 | 	if portfolio_id == 100:
 99 | 
100 | 		td3args = {'policy_noise': 0.2, 'policy_noise_clip': 0.5, 'policy_ups_freq': 2, 'action_low': args.action_low, 'action_high': args.action_high}
101 | 
102 | 
103 | 
104 | 		# Learner 1
105 | 		wwid = genealogy.new_id('learner_1')
106 | 		portfolio.append(
107 | 			Learner(wwid, 'TD3', args.state_dim, args.action_dim, actor_lr=1e-3, critic_lr=1e-3, gamma=0.0, tau=5e-3,
108 | 			        init_w=True, **td3args))
109 | 
110 | 		# Learner 2
111 | 		wwid = genealogy.new_id('learner_2')
112 | 		portfolio.append(
113 | 			Learner(wwid, 'TD3', args.state_dim, args.action_dim, actor_lr=1e-3, critic_lr=1e-3, gamma=1.0, tau=5e-3, init_w=True,
114 | 			        **td3args))
115 | 
116 | 	if portfolio_id == 101:
117 | 		td3args = {'policy_noise': 0.2, 'policy_noise_clip': 0.5, 'policy_ups_freq': 2, 'action_low': args.action_low, 'action_high': args.action_high}
118 | 
119 | 
120 | 		# Learner 3
121 | 		wwid = genealogy.new_id('learner_3')
122 | 		portfolio.append(
123 | 			Learner(wwid, 'TD3', args.state_dim, args.action_dim, actor_lr=1e-3, critic_lr=1e-3, gamma=0.0, tau=5e-3,
124 | 			        init_w=True, **td3args))
125 | 
126 | 	if portfolio_id == 102:
127 | 		td3args = {'policy_noise': 0.2, 'policy_noise_clip': 0.5, 'policy_ups_freq': 2, 'action_low': args.action_low, 'action_high': args.action_high}
128 | 
129 | 
130 | 		# Learner 1
131 | 		wwid = genealogy.new_id('learner_1')
132 | 		portfolio.append(
133 | 			Learner(wwid, 'TD3', args.state_dim, args.action_dim, actor_lr=1e-3, critic_lr=1e-3, gamma=1.0, tau=5e-3,
134 | 			        init_w=True, **td3args))
135 | 
136 | 
137 | 
138 | 	return portfolio
139 | 


--------------------------------------------------------------------------------
/core/runner.py:
--------------------------------------------------------------------------------
 1 | # ******************************************************************************
 2 | # Copyright 2019 Intel Corporation
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ******************************************************************************
16 | 
17 | from core.env_wrapper import EnvironmentWrapper
18 | from core import mod_utils as utils
19 | import numpy as np
20 | import torch
21 | 
22 | 
23 | # Rollout evaluate an agent in a complete game
24 | def rollout_worker(id, task_pipe, result_pipe, is_noise, data_bucket, model_bucket, env_name, noise_std, ALGO):
25 | 	"""Rollout Worker runs a simulation in the environment to generate experiences and fitness values
26 | 
27 |         Parameters:
28 |             task_pipe (pipe): Receiver end of the task pipe used to receive signal to start on a task
29 |             result_pipe (pipe): Sender end of the pipe used to report back results
30 |             is_noise (bool): Use noise?
31 |             data_bucket (list of shared object): A list of shared object reference to s,ns,a,r,done (replay buffer) managed by a manager that is used to store experience tuples
32 |             model_bucket (shared list object): A shared list object managed by a manager used to store all the models (actors)
33 | 			env_name (str): Environment name?
34 | 			noise_std (float): Standard deviation of Gaussian for sampling noise
35 | 
36 |         Returns:
37 |             None
38 |     """
39 | 	env = EnvironmentWrapper(env_name, ALGO)
40 | 	np.random.seed(id) ###make sure the random seeds across learners are different
41 | 
42 | 	###LOOP###
43 | 	while True:
44 | 		identifier = task_pipe.recv()  # Wait until a signal is received  to start rollout
45 | 		if identifier == 'TERMINATE': exit(0) #Kill yourself
46 | 
47 | 		# Get the requisite network
48 | 		net = model_bucket[identifier]
49 | 
50 | 
51 | 		fitness = 0.0;
52 | 		total_frame = 0
53 | 		state = env.reset();
54 | 		rollout_trajectory = []
55 | 		state = utils.to_tensor(np.array(state)).unsqueeze(0)
56 | 		while True:  # unless done
57 | 
58 | 			action = net.forward(state)
59 | 			action = utils.to_numpy(action)
60 | 			if is_noise:
61 | 				action = (action + np.random.normal(0, noise_std, size=env.env.action_space.shape[0])).clip(env.env.action_space.low, env.env.action_space.high)
62 | 
63 | 			next_state, reward, done, info = env.step(action.flatten())  # Simulate one step in environment
64 | 
65 | 
66 | 			next_state = utils.to_tensor(np.array(next_state)).unsqueeze(0)
67 | 			fitness += reward
68 | 
69 | 			# If storing transitions
70 | 			if data_bucket != None: #Skip for test set
71 | 				rollout_trajectory.append([utils.to_numpy(state), utils.to_numpy(next_state),
72 | 				                        np.float32(action), np.reshape(np.float32(np.array([reward])), (1, 1)),
73 | 				                           np.reshape(np.float32(np.array([float(done)])), (1, 1))])
74 | 			state = next_state
75 | 			total_frame += 1
76 | 
77 | 			# DONE FLAG IS Received
78 | 			if done:
79 | 
80 | 				# Push experiences to main
81 | 				for entry in rollout_trajectory:
82 | 					data_bucket.append(entry)
83 | 
84 | 
85 | 				break
86 | 
87 | 		# Send back id, fitness, total length and shaped fitness using the result pipe
88 | 		result_pipe.send([identifier, fitness, total_frame])
89 | 


--------------------------------------------------------------------------------
/core/ucb.py:
--------------------------------------------------------------------------------
 1 | # ******************************************************************************
 2 | # Copyright 2019 Intel Corporation
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ******************************************************************************
16 | 
17 | import math, random
18 | 
19 | 
20 | def ucb(allocation_size, portfolio, c):
21 | 	"""Upper Confidence Bound implementation to pick learners
22 | 
23 |         Parameters:
24 |             allocation_size (int): Size of allocation (num of resources)
25 | 			portfolio (list): List of learners
26 | 			c (float): Exploration coefficient in UCB
27 | 
28 |         Returns:
29 |             allocation (list): List of learner ids formulating the resource allocation
30 | 	"""
31 | 
32 | 
33 | 	values = [learner.value for learner in portfolio]
34 | 	#Normalize values
35 | 	values = [val - min(values)  for val in values]
36 | 	values = [val/(sum(values)+0.1) for val in values]
37 | 
38 | 	visit_counts = [learner.visit_count for learner in portfolio]
39 | 	total_visit = sum(visit_counts)
40 | 
41 | 	######## Implement UCB ########
42 | 	ucb_scores = [(values[i]) + c * math.sqrt( math.log(total_visit)/visit_counts[i]) for i in range(len(portfolio))]
43 | 
44 | 
45 | 	########## Use UCB scores to perform probabilistic resource allocation (different from making one choice) ##########
46 | 	allocation = roulette_wheel(ucb_scores, allocation_size)
47 | 
48 | 
49 | 
50 | 
51 | 	return allocation
52 | 
53 | 
54 | 
55 | def roulette_wheel(probs, num_samples):
56 | 	"""Roulette_wheel selection from a prob. distribution
57 | 
58 |         Parameters:
59 |             probs (list): Probability distribution
60 | 			num_samples (int): Num of iterations to sample from distribution
61 | 
62 |         Returns:
63 |             out (list): List of samples based on incoming distribution
64 | 	"""
65 | 
66 | 	#Normalize
67 | 	probs = [prob - min(probs) + abs(min(probs)) for prob in probs] #Biased translation (to positive axis) to ensure the lowest does not end up with a probability of zero
68 | 
69 | 	####### HACK FOR ROLLOUT_SIZE = 1 #####
70 | 	if sum(probs) != 0:
71 | 		probs = [prob / sum(probs) for prob in probs]
72 | 	else:
73 | 		probs = [1.0 for _ in probs]
74 | 	####### END HACK #####
75 | 
76 | 
77 | 	#Selection
78 | 	out = []
79 | 	for _ in range(num_samples):
80 | 		rand = random.random()
81 | 
82 | 		for i in range(len(probs)):
83 | 			if rand < sum(probs[0:i+1]):
84 | 				out.append(i)
85 | 				break
86 | 
87 | 	print('UCB_prob_mass', ["%.2f" %i for i in probs])
88 | 	print('Allocation', out)
89 | 	print()
90 | 
91 | 	return out
92 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | # ******************************************************************************
  2 | # Copyright 2019 Intel Corporation
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ******************************************************************************
 16 | 
 17 | import numpy as np, os, time, random, torch, sys
 18 | from core.neuroevolution import SSNE
 19 | from core.models import Actor
 20 | from core import mod_utils as utils
 21 | from core.mod_utils import str2bool
 22 | from core.ucb import ucb
 23 | from core.runner import rollout_worker
 24 | from core.portfolio import initialize_portfolio
 25 | from torch.multiprocessing import Process, Pipe, Manager
 26 | import threading
 27 | from core.buffer import Buffer
 28 | from core.genealogy import Genealogy
 29 | import gym
 30 | import argparse
 31 | 
 32 | 
 33 | 
 34 | parser = argparse.ArgumentParser()
 35 | parser.add_argument('-pop_size', type=int, help='#Policies in the population',  default=10)
 36 | parser.add_argument('-seed', type=int, help='Seed',  default=2018)
 37 | parser.add_argument('-rollout_size', type=int, help='#Policies in rolout size',  default=10)
 38 | parser.add_argument('-env', type=str, help='#Environment name',  default='Humanoid-v2')
 39 | parser.add_argument('-gradperstep', type=float, help='#Gradient step per env step',  default=1.0)
 40 | parser.add_argument('-savetag', type=str, help='#Tag to append to savefile',  default='')
 41 | parser.add_argument('-gpu_id', type=int, help='#GPU ID ',  default=0)
 42 | parser.add_argument('-buffer_gpu', type=str2bool, help='#Store buffer in GPU?',  default=0)
 43 | parser.add_argument('-portfolio', type=int, help='Portfolio ID',  default=10)
 44 | parser.add_argument('-total_steps', type=float, help='#Total steps in the env in millions ',  default=2)
 45 | parser.add_argument('-batchsize', type=int, help='Seed',  default=256)
 46 | parser.add_argument('-noise', type=float, help='Noise STD',  default=0.1)
 47 | 
 48 | 
 49 | POP_SIZE = vars(parser.parse_args())['pop_size']
 50 | BATCHSIZE = vars(parser.parse_args())['batchsize']
 51 | ROLLOUT_SIZE = vars(parser.parse_args())['rollout_size']
 52 | ENV_NAME = vars(parser.parse_args())['env']
 53 | GRADPERSTEP = vars(parser.parse_args())['gradperstep']
 54 | SAVETAG = vars(parser.parse_args())['savetag']
 55 | BUFFER_GPU = vars(parser.parse_args())['buffer_gpu']
 56 | SEED = vars(parser.parse_args())['seed']
 57 | GPU_DEVICE = vars(parser.parse_args())['gpu_id']
 58 | PORTFOLIO_ID = vars(parser.parse_args())['portfolio']
 59 | TOTAL_STEPS = int(vars(parser.parse_args())['total_steps'] * 1000000)
 60 | NOISE_STD = vars(parser.parse_args())['noise']
 61 | os.environ["CUDA_VISIBLE_DEVICES"]=str(GPU_DEVICE)
 62 | 
 63 | #ICML EXPERIMENT
 64 | if PORTFOLIO_ID == 11 or PORTFOLIO_ID == 12 or PORTFOLIO_ID == 13 or PORTFOLIO_ID == 14 or PORTFOLIO_ID == 101 or PORTFOLIO_ID == 102: ISOLATE_PG = True
 65 | else:
 66 |     ISOLATE_PG = False
 67 | ALGO = "TD3"
 68 | SAVE = True
 69 | TEST_SIZE=10
 70 | 
 71 | 
 72 | class Parameters:
 73 | 	def __init__(self):
 74 | 		"""Parameter class stores all parameters for policy gradient
 75 | 
 76 | 		Parameters:
 77 | 			None
 78 | 
 79 | 		Returns:
 80 | 			None
 81 | 		"""
 82 | 		self.seed = SEED
 83 | 		self.asynch_frac = 1.0 #Aynchronosity of NeuroEvolution
 84 | 		self.algo = ALGO
 85 | 
 86 | 		self.batch_size = BATCHSIZE #Batch size
 87 | 		self.noise_std = NOISE_STD #Gaussian noise exploration std
 88 | 		self.ucb_coefficient = 0.9 #Exploration coefficient in UCB
 89 | 		self.gradperstep = GRADPERSTEP
 90 | 		self.buffer_gpu = BUFFER_GPU
 91 | 		self.rollout_size = ROLLOUT_SIZE #Size of learner rollouts
 92 | 
 93 | 		#NeuroEvolution stuff
 94 | 		self.pop_size = POP_SIZE
 95 | 		self.elite_fraction = 0.2
 96 | 		self.crossover_prob = 0.01
 97 | 		self.mutation_prob = 0.2
 98 | 
 99 | 		#######unused########
100 | 		self.extinction_prob = 0.005  # Probability of extinction event
101 | 		self.extinction_magnituide = 0.5  # Probabilty of extinction for each genome, given an extinction event
102 | 		self.weight_magnitude_limit = 10000000
103 | 		self.mut_distribution = 1  # 1-Gaussian, 2-Laplace, 3-Uniform
104 | 
105 | 
106 | 		#Save Results
107 | 		dummy_env = gym.make(ENV_NAME)
108 | 		self.state_dim = dummy_env.observation_space.shape[0]; self.action_dim = dummy_env.action_space.shape[0]
109 | 		self.action_low = float(dummy_env.action_space.low[0]); self.action_high = float(dummy_env.action_space.high[0])
110 | 		self.savefolder = 'Results/'
111 | 		if not os.path.exists('Results/'): os.makedirs('Results/')
112 | 		self.aux_folder = self.savefolder + 'Auxiliary/'
113 | 		if not os.path.exists(self.aux_folder): os.makedirs(self.aux_folder)
114 | 
115 | 
116 | class CERL_Agent:
117 | 	"""Main CERL class containing all methods for CERL
118 | 
119 | 		Parameters:
120 | 		args (int): Parameter class with all the parameters
121 | 
122 | 	"""
123 | 
124 | 	def __init__(self, args):
125 | 		self.args = args
126 | 		self.evolver = SSNE(self.args)
127 | 
128 | 		#MP TOOLS
129 | 		self.manager = Manager()
130 | 
131 | 		#Genealogy tool
132 | 		self.genealogy = Genealogy()
133 | 
134 | 		#Initialize population
135 | 		self.pop = self.manager.list()
136 | 		for _ in range(args.pop_size):
137 | 			wwid = self.genealogy.new_id('evo')
138 | 			if ALGO == 'SAC': self.pop.append(GaussianPolicy(args.state_dim, args.action_dim, args.hidden_size, wwid))
139 | 			else: self.pop.append(Actor(args.state_dim, args.action_dim, wwid))
140 | 
141 | 		if ALGO == "SAC": self.best_policy = GaussianPolicy(args.state_dim, args.action_dim, args.hidden_size, -1)
142 | 		else:
143 | 			self.best_policy = Actor(args.state_dim, args.action_dim, -1)
144 | 
145 | 
146 | 		#Turn off gradients and put in eval mod
147 | 		for actor in self.pop:
148 | 			actor = actor.cpu()
149 | 			actor.eval()
150 | 
151 | 		#Init BUFFER
152 | 		self.replay_buffer = Buffer(1000000, self.args.buffer_gpu)
153 | 
154 | 		#Intialize portfolio of learners
155 | 		self.portfolio = []
156 | 		self.portfolio = initialize_portfolio(self.portfolio, self.args, self.genealogy, PORTFOLIO_ID)
157 | 		self.rollout_bucket = self.manager.list()
158 | 		for _ in range(len(self.portfolio)):
159 | 			if ALGO == 'SAC': self.rollout_bucket.append(GaussianPolicy(args.state_dim, args.action_dim, args.hidden_size, -1))
160 | 			else: self.rollout_bucket.append(Actor(args.state_dim, args.action_dim, -1))
161 | 
162 | 
163 | 
164 | 		# Initialize shared data bucket
165 | 		self.data_bucket = self.replay_buffer.tuples
166 | 
167 | 		############## MULTIPROCESSING TOOLS ###################
168 | 
169 | 
170 | 		#Evolutionary population Rollout workers
171 | 		self.evo_task_pipes = [Pipe() for _ in range(args.pop_size)]
172 | 		self.evo_result_pipes = [Pipe() for _ in range(args.pop_size)]
173 | 		self.evo_workers = [Process(target=rollout_worker, args=(id, self.evo_task_pipes[id][1], self.evo_result_pipes[id][0], False, self.data_bucket, self.pop, ENV_NAME, None, ALGO)) for id in range(args.pop_size)]
174 | 		for worker in self.evo_workers: worker.start()
175 | 		self.evo_flag = [True for _ in range(args.pop_size)]
176 | 
177 | 		#Learner rollout workers
178 | 		self.task_pipes = [Pipe() for _ in range(args.rollout_size)]
179 | 		self.result_pipes = [Pipe() for _ in range(args.rollout_size)]
180 | 		self.workers = [Process(target=rollout_worker, args=(id, self.task_pipes[id][1], self.result_pipes[id][0], True, self.data_bucket, self.rollout_bucket, ENV_NAME, args.noise_std, ALGO)) for id in range(args.rollout_size)]
181 | 		for worker in self.workers: worker.start()
182 | 		self.roll_flag = [True for _ in range(args.rollout_size)]
183 | 
184 | 		#Test bucket
185 | 		self.test_bucket = self.manager.list()
186 | 		if ALGO == 'SAC':
187 | 			self.test_bucket.append(GaussianPolicy(args.state_dim, args.action_dim, args.hidden_size, -1))
188 | 		else:
189 | 			self.test_bucket.append(Actor(args.state_dim, args.action_dim, -1))
190 | 
191 | 		#5 Test workers
192 | 		self.test_task_pipes = [Pipe() for _ in range(TEST_SIZE)]
193 | 		self.test_result_pipes = [Pipe() for _ in range(TEST_SIZE)]
194 | 		self.test_workers = [Process(target=rollout_worker, args=(id, self.test_task_pipes[id][1], self.test_result_pipes[id][0], False, None, self.test_bucket, ENV_NAME, None, ALGO)) for id in range(TEST_SIZE)]
195 | 		for worker in self.test_workers: worker.start()
196 | 		self.test_flag = False
197 | 
198 | 		#Meta-learning controller (Resource Distribution)
199 | 		self.allocation = [] #Allocation controls the resource allocation across learners
200 | 		for i in range(args.rollout_size): self.allocation.append(i % len(self.portfolio)) #Start uniformly (equal resources)
201 | 		#self.learner_stats = [{'fitnesses': [], 'ep_lens': [], 'value': 0.0, 'visit_count':0} for _ in range(len(self.portfolio))] #Track node statistsitic (each node is a learner), to compute UCB scores
202 | 
203 | 		#Trackers
204 | 		self.best_score = 0.0; self.gen_frames = 0; self.total_frames = 0; self.best_shaped_score = None; self.test_score = None; self.test_std = None
205 | 
206 | 
207 | 
208 | 	def train(self, gen, frame_tracker):
209 | 		"""Main training loop to do rollouts, neureoevolution, and policy gradients
210 | 
211 | 			Parameters:
212 | 				gen (int): Current epoch of training
213 | 
214 | 			Returns:
215 | 				None
216 | 		"""
217 | 		################ START ROLLOUTS ##############
218 | 
219 | 		#Start Evolution rollouts
220 | 		if not ISOLATE_PG:
221 | 			for id, actor in enumerate(self.pop):
222 | 				if self.evo_flag[id]:
223 | 					self.evo_task_pipes[id][0].send(id)
224 | 					self.evo_flag[id] = False
225 | 
226 | 		#Sync all learners actor to cpu (rollout) actor
227 | 		for i, learner in enumerate(self.portfolio):
228 | 			learner.algo.actor.cpu()
229 | 			utils.hard_update(self.rollout_bucket[i], learner.algo.actor)
230 | 			learner.algo.actor.cuda()
231 | 
232 | 		# Start Learner rollouts
233 | 		for rollout_id, learner_id in enumerate(self.allocation):
234 | 			if self.roll_flag[rollout_id]:
235 | 				self.task_pipes[rollout_id][0].send(learner_id)
236 | 				self.roll_flag[rollout_id] = False
237 | 
238 | 		#Start Test rollouts
239 | 		if gen % 5 == 0:
240 | 			self.test_flag = True
241 | 			for pipe in self.test_task_pipes: pipe[0].send(0)
242 | 
243 | 
244 | 		############# UPDATE PARAMS USING GRADIENT DESCENT ##########
245 | 		if self.replay_buffer.__len__() > self.args.batch_size * 10: ###BURN IN PERIOD
246 | 			self.replay_buffer.tensorify()  # Tensorify the buffer for fast sampling
247 | 
248 | 			#Spin up threads for each learner
249 | 			threads = [threading.Thread(target=learner.update_parameters, args=(self.replay_buffer, self.args.buffer_gpu, self.args.batch_size, int(self.gen_frames * self.args.gradperstep))) for learner in
250 | 			           self.portfolio]
251 | 
252 | 			# Start threads
253 | 			for thread in threads: thread.start()
254 | 
255 | 			#Join threads
256 | 			for thread in threads: thread.join()
257 | 			self.gen_frames = 0
258 | 
259 | 
260 | 		########## SOFT -JOIN ROLLOUTS FOR EVO POPULATION ############
261 | 		if not ISOLATE_PG:
262 | 			all_fitness = []; all_net_ids = []; all_eplens = []
263 | 			while True:
264 | 				for i in range(self.args.pop_size):
265 | 					if self.evo_result_pipes[i][1].poll():
266 | 						entry = self.evo_result_pipes[i][1].recv()
267 | 						all_fitness.append(entry[1]); all_net_ids.append(entry[0]); all_eplens.append(entry[2]); self.gen_frames+= entry[2]; self.total_frames += entry[2]
268 | 						self.evo_flag[i] = True
269 | 
270 | 				# Soft-join (50%)
271 | 				if len(all_fitness) / self.args.pop_size >= self.args.asynch_frac: break
272 | 
273 | 		########## HARD -JOIN ROLLOUTS FOR LEARNER ROLLOUTS ############
274 | 		for i in range(self.args.rollout_size):
275 | 			entry = self.result_pipes[i][1].recv()
276 | 			learner_id = entry[0]; fitness = entry[1]; num_frames = entry[2]
277 | 			self.portfolio[learner_id].update_stats(fitness, num_frames)
278 | 
279 | 			self.gen_frames += num_frames; self.total_frames += num_frames
280 | 			if fitness > self.best_score: self.best_score = fitness
281 | 
282 | 			self.roll_flag[i] = True
283 | 
284 | 		#Referesh buffer (housekeeping tasks - pruning to keep under capacity)
285 | 		self.replay_buffer.referesh()
286 | 		######################### END OF PARALLEL ROLLOUTS ################
287 | 
288 | 		############ PROCESS MAX FITNESS #############
289 | 		if not ISOLATE_PG:
290 | 			champ_index = all_net_ids[all_fitness.index(max(all_fitness))]
291 | 			utils.hard_update(self.test_bucket[0], self.pop[champ_index])
292 | 			if max(all_fitness) > self.best_score:
293 | 				self.best_score = max(all_fitness)
294 | 				utils.hard_update(self.best_policy, self.pop[champ_index])
295 | 				if SAVE:
296 | 					torch.save(self.pop[champ_index].state_dict(), self.args.aux_folder + ENV_NAME+'_best'+SAVETAG)
297 | 					print("Best policy saved with score", '%.2f'%max(all_fitness))
298 | 
299 | 		else: #Run PG in isolation
300 | 			utils.hard_update(self.test_bucket[0], self.rollout_bucket[0])
301 | 
302 | 		###### TEST SCORE ######
303 | 		if self.test_flag:
304 | 			self.test_flag = False
305 | 			test_scores = []
306 | 			for pipe in self.test_result_pipes: #Collect all results
307 | 				entry = pipe[1].recv()
308 | 				test_scores.append(entry[1])
309 | 			test_scores = np.array(test_scores)
310 | 			test_mean = np.mean(test_scores); test_std = (np.std(test_scores))
311 | 
312 | 			# Update score to trackers
313 | 			frame_tracker.update([test_mean], self.total_frames)
314 | 		else:
315 | 			test_mean, test_std = None, None
316 | 
317 | 
318 | 		#NeuroEvolution's probabilistic selection and recombination step
319 | 		if not ISOLATE_PG:
320 | 			if gen % 5 == 0:
321 | 				self.evolver.epoch(gen, self.genealogy, self.pop, all_net_ids, all_fitness, self.rollout_bucket)
322 | 			else:
323 | 				self.evolver.epoch(gen, self.genealogy, self.pop, all_net_ids, all_fitness, [])
324 | 
325 | 		#META LEARNING - RESET ALLOCATION USING UCB
326 | 		if gen % 1 == 0:
327 | 			self.allocation = ucb(len(self.allocation), self.portfolio, self.args.ucb_coefficient)
328 | 
329 | 
330 | 		#Metrics
331 | 		if not ISOLATE_PG:
332 | 			champ_len = all_eplens[all_fitness.index(max(all_fitness))]
333 | 			champ_wwid = int(self.pop[champ_index].wwid.item())
334 | 			max_fit = max(all_fitness)
335 | 		else:
336 | 			champ_len = num_frames; champ_wwid = int(self.rollout_bucket[0].wwid.item())
337 | 			all_fitness = [fitness]; max_fit = fitness; all_eplens = [num_frames]
338 | 
339 | 		return max_fit, champ_len, all_fitness, all_eplens, test_mean, test_std, champ_wwid
340 | 
341 | if __name__ == "__main__":
342 | 	args = Parameters()  # Create the Parameters class
343 | 	SAVETAG = SAVETAG + '_p' + str(PORTFOLIO_ID)
344 | 	SAVETAG = SAVETAG + '_s' + str(SEED)
345 | 	SAVETAg = SAVETAG + 'noise' + str(NOISE_STD)
346 | 
347 | 	frame_tracker = utils.Tracker(args.savefolder, ['score_'+ENV_NAME+SAVETAG], '.csv')  #Tracker class to log progress
348 | 	max_tracker = utils.Tracker(args.aux_folder, ['pop_max_score_'+ENV_NAME+SAVETAG], '.csv')  #Tracker class to log progress FOR MAX (NOT REPORTED)
349 | 
350 | 	#Set seeds
351 | 	torch.manual_seed(args.seed); np.random.seed(args.seed); random.seed(args.seed)
352 | 
353 | 	#INITIALIZE THE MAIN AGENT CLASS
354 | 	agent = CERL_Agent(args) #Initialize the agent
355 | 	print('Running CERL for', ENV_NAME, 'State_dim:', args.state_dim, ' Action_dim:', args.action_dim)
356 | 
357 | 	time_start = time.time()
358 | 	for gen in range(1, 1000000000): #Infinite generations
359 | 
360 | 		#Train one iteration
361 | 		best_score, test_len, all_fitness, all_eplen, test_mean, test_std, champ_wwid = agent.train(gen, frame_tracker)
362 | 
363 | 		#PRINT PROGRESS
364 | 		print('Env', ENV_NAME, 'Gen', gen, 'Frames', agent.total_frames, ' Pop_max/max_ever:','%.2f'%best_score, '/','%.2f'%agent.best_score, ' Avg:','%.2f'%frame_tracker.all_tracker[0][1],
365 | 		      ' Frames/sec:','%.2f'%(agent.total_frames/(time.time()-time_start)),
366 | 			  ' Champ_len', '%.2f'%test_len, ' Test_score u/std', utils.pprint(test_mean), utils.pprint(test_std), 'savetag', SAVETAG, 'noise', args.noise_std)
367 | 
368 | 		# # PRINT MORE DETAILED STATS PERIODICALLY
369 | 		if gen % 5 == 0:
370 | 			print('Learner Fitness', [utils.pprint(learner.value) for learner in agent.portfolio], 'Sum_stats_resource_allocation', [learner.visit_count for learner in agent.portfolio])
371 | 			print('Pop/rollout size', args.pop_size,'/',args.rollout_size, 'gradperstep', args.gradperstep, 'Seed', SEED, 'Portfolio_id', PORTFOLIO_ID)
372 | 			try:
373 | 				print('Best Policy ever genealogy:', agent.genealogy.tree[int(agent.best_policy.wwid.item())].history)
374 | 				print('Champ genealogy:', agent.genealogy.tree[champ_wwid].history)
375 | 			except: None
376 | 			print()
377 | 
378 | 		max_tracker.update([best_score], agent.total_frames)
379 | 		if agent.total_frames > TOTAL_STEPS:
380 | 			break
381 | 
382 | 		#Save sum stats
383 | 		if PORTFOLIO_ID == 10 or PORTFOLIO_ID == 100:
384 | 			visit_counts = np.array([learner.visit_count for learner in agent.portfolio])
385 | 			np.savetxt(args.aux_folder + 'allocation_' + ENV_NAME + SAVETAG, visit_counts, fmt='%.3f', delimiter=',')
386 | 
387 | 	###Kill all processes
388 | 	try:
389 | 		for p in agent.task_pipes: p[0].send('TERMINATE')
390 | 		for p in agent.test_task_pipes: p[0].send('TERMINATE')
391 | 		for p in agent.evo_task_pipes: p[0].send('TERMINATE')
392 | 
393 | 	except: None
394 | 
395 | 
396 | 


--------------------------------------------------------------------------------