├── .gitignore
├── LICENSE
├── README.md
├── bayes
├── Bayes_by_Backprop.py
├── ConcreteDropout.py
├── MNF.py
└── __init__.py
├── data
├── __init__.py
├── toy_regression.py
├── train_data_ian_regression.npz
└── train_data_regression.npz
├── dqn
├── Bayes_by_Backprop_DQN.py
├── Concrete_Dropout_DQN.py
├── DQN.py
├── MC_Dropout_DQN.py
├── MNF_DQN.py
├── __init__.py
└── train.py
├── envs
├── __init__.py
├── env_utils.py
└── nchain.py
├── normalizingflows
├── __init__.py
├── flow_catalog.py
├── nf_utils.py
└── normalizing_flow.py
├── plots
├── BayesByBackprop.png
├── ConcreteDropout.png
├── ConcreteDropout_heterostatic.png
├── MCDropout.png
├── MCDropout_heteroscedastic.png
├── MNF_all_layers.png
├── MNF_last_layers.png
├── avg_acc_reward_cartpole.png
└── avg_acc_reward_mountaincar.png
├── requirements.txt
├── toy_regression_bayes.py
├── toy_regression_concrete_dropout.py
├── toy_regression_mc_dropout.py
├── train_bbb_dqn.py
├── train_dqn.py
├── train_dqn_dropout.py
├── train_dqn_dropout_concrete.py
└── train_mnf_dqn.py
/.gitignore:
--------------------------------------------------------------------------------
1 | ### JupyterNotebooks ###
2 | # gitignore template for Jupyter Notebooks
3 | # website: http://jupyter.org/
4 |
5 | .ipynb_checkpoints
6 | */.ipynb_checkpoints/*
7 |
8 | # IPython
9 | profile_default/
10 | ipython_config.py
11 |
12 | # Remove previous ipynb_checkpoints
13 | # git rm -r .ipynb_checkpoints/
14 |
15 | ### macOS ###
16 | # General
17 | .DS_Store
18 | .AppleDouble
19 | .LSOverride
20 |
21 | # Icon must end with two \r
22 | Icon
23 |
24 | # Thumbnails
25 | ._*
26 |
27 | # Files that might appear in the root of a volume
28 | .DocumentRevisions-V100
29 | .fseventsd
30 | .Spotlight-V100
31 | .TemporaryItems
32 | .Trashes
33 | .VolumeIcon.icns
34 | .com.apple.timemachine.donotpresent
35 |
36 | # Directories potentially created on remote AFP share
37 | .AppleDB
38 | .AppleDesktop
39 | Network Trash Folder
40 | Temporary Items
41 | .apdisk
42 |
43 | ### Python ###
44 | # Byte-compiled / optimized / DLL files
45 | __pycache__/
46 | *.py[cod]
47 | *$py.class
48 |
49 | # C extensions
50 | *.so
51 |
52 | # Distribution / packaging
53 | .Python
54 | build/
55 | develop-eggs/
56 | dist/
57 | downloads/
58 | eggs/
59 | .eggs/
60 | lib/
61 | lib64/
62 | parts/
63 | sdist/
64 | var/
65 | wheels/
66 | pip-wheel-metadata/
67 | share/python-wheels/
68 | *.egg-info/
69 | .installed.cfg
70 | *.egg
71 | MANIFEST
72 |
73 | # PyInstaller
74 | # Usually these files are written by a python script from a template
75 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
76 | *.manifest
77 | *.spec
78 |
79 | # Installer logs
80 | pip-log.txt
81 | pip-delete-this-directory.txt
82 |
83 | # Unit test / coverage reports
84 | htmlcov/
85 | .tox/
86 | .nox/
87 | .coverage
88 | .coverage.*
89 | .cache
90 | nosetests.xml
91 | coverage.xml
92 | *.cover
93 | .hypothesis/
94 | .pytest_cache/
95 |
96 | # Translations
97 | *.mo
98 | *.pot
99 |
100 | # Scrapy stuff:
101 | .scrapy
102 |
103 | # Sphinx documentation
104 | docs/_build/
105 |
106 | # PyBuilder
107 | target/
108 |
109 | # pyenv
110 | .python-version
111 |
112 | # pipenv
113 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
114 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
115 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
116 | # install all needed dependencies.
117 | #Pipfile.lock
118 |
119 | # celery beat schedule file
120 | celerybeat-schedule
121 |
122 | # SageMath parsed files
123 | *.sage.py
124 |
125 | # Spyder project settings
126 | .spyderproject
127 | .spyproject
128 |
129 | # Rope project settings
130 | .ropeproject
131 |
132 | # Mr Developer
133 | .mr.developer.cfg
134 | .project
135 | .pydevproject
136 |
137 | # mkdocs documentation
138 | /site
139 |
140 | # mypy
141 | .mypy_cache/
142 | .dmypy.json
143 | dmypy.json
144 |
145 | # Pyre type checker
146 | .pyre/
147 |
148 | ### VisualStudioCode ###
149 | .vscode
150 | .vscode/*
151 | !.vscode/settings.json
152 | !.vscode/tasks.json
153 | !.vscode/launch.json
154 | !.vscode/extensions.json
155 |
156 | ### VisualStudioCode Patch ###
157 | # Ignore all local history of files
158 | .history
159 |
160 | ### Virtual Environment
161 | venv
162 | experiments
163 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Uncertainty Estimation in the Context of Efficient Exploration
2 |
3 | This repository investigates recent variational Bayesian inference approaches for uncertainty estimation. The approaches
4 | are evaluated and visualized on regression tasks. Furthermore, the uncertainty estimates from the variational
5 | Bayesian neural networks are used to perform approximate Thompson sampling within a deep Q-network (DQN) for efficient
6 | exploration. The approaches are compared against each other and against the well known epsilon-greedy strategy.
7 |
8 | Currently, following variational Bayesian neural networks are implemented:
9 |
10 | - Bayes by Backprop [1]
11 | - Multiplicative Normalizing Flows (MNF) [2]
12 | - Dropout as a Bayesian Approximation [3]
13 | - Concrete Dropout [4]
14 |
15 | Touati et al. [5] describe how to augment DQNs with multiplicative normalizing flows for a efficient
16 | exploration-exploitation strategy.
17 |
18 | The repository is structured in the following way:
19 | - [bayes](/bayes) contains implementations of Bayes By Backprop, MNF, and Concrete Dropout layers. Monte Carlo
20 | dropout utilizes the standard Tensorflow dropout layer.
21 | - [data](/data) contains two regression data sets mentioned in [6] and [7] used to visualize the uncertainty estimates.
22 | - [dqn](/dqn) includes the DQN implementations utilizing the respective variational Bayesian neural networks.
23 | - [envs](/envs) includes an implementation of a N-chain gym environment and environment utility functions.
24 | - [normalizingflows](/normalizingflows) contains normalizing flows for the use in Multiplicative Normalizing Flows.
25 | - [plots](/plots) contains example visualizations.
26 |
27 | Training functions are located at the root of the repository.
28 |
29 | Below we show example uncertainty estimates on the regression task mentioned in [6]. Additionally, we show the
30 | average accumulated reward over 5 runs on the OpenAi Gym envionments CartPole and MountainCar.
31 |
32 | - Aleatoric (data) uncertainty and epistemic (knowledge) uncertainty predicted by MC Dropout with two network heads:
33 |
34 |
35 |
36 |
37 | - Network utilizing 3 MNF dense layers:
38 |
39 |
40 |
41 |
42 | - Network utilizing 2 regular dense layers and 1 MNF dense layers:
43 |
44 |
45 |
46 |
47 | - Average accumulated reward over 5 runs on the OpenAI Gym CartPole task:
48 |
49 |
50 |
51 |
52 | - Average accumulated reward over 5 runs on the OpenAI Gym MountainCar task:
53 |
54 |
55 |
56 |
57 | This work was done during the Advanced Deep Learning for Robotics course at TUM in cooperation with the German Aerospace
58 | Center (DLR).
59 | In case of any questions, feel free to reach out to us.
60 |
61 | Jan Rüttinger, jan.ruettinger@tum.de
62 |
63 | Lukas Rinder, lukas.rinder@tum.de
64 |
65 |
66 | ### References
67 |
68 | [1] C. Blundell, J. Cornebise, K. Kavukcuoglu, and D. Wierstra, “Weight uncertainty in neural networks,” 32nd Int. Conf. Mach. Learn. ICML 2015, vol. 2, pp. 1613–1622, 2015.
69 |
70 | [2] C. Louizos and M. Welling, “Multiplicative normalizing flows for variational Bayesian neural networks,” 34th Int. Conf. Mach. Learn. ICML 2017, vol. 5, pp. 3480–3489, 2017.
71 |
72 | [3] Y. Gal and Z. Ghahramani, “Dropout as a Bayesian Approximation: Representing Model Uncertainty in Deep Learning,” 33rd Int. Conf. Mach. Learn. ICML 2016, vol. 3, pp. 1651–1660, Jun. 2015.
73 |
74 | [4] Y. Gal, J. Hron, and A. Kendall, “Concrete dropout,” in Advances in Neural Information Processing Systems, 2017, vol. 2017-Decem, pp. 3582–3591.
75 |
76 | [5] A. Touati, H. Satija, J. Romoff, J. Pineau, and P. Vincent, “Randomized value functions via multiplicative normalizing flows,” 35th Conf. Uncertain. Artif. Intell. UAI 2019, 2019.
77 |
78 | [6] I. Osband, “Risk versus uncertainty in deep learning: Bayes, bootstrap and the dangers of dropout.,” NIPS Work. Bayesian Deep Learn., vol. 192, 2016.
79 |
80 | [7] J. M. Hernández-Lobato and R. P. Adams, “Probabilistic backpropagation for scalable learning of Bayesian neural networks,” 32nd Int. Conf. Mach. Learn. ICML 2015, vol. 3, pp. 1861–1869, 2015.
81 |
--------------------------------------------------------------------------------
/bayes/Bayes_by_Backprop.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import tensorflow_probability as tfp
3 |
4 | tfd = tfp.distributions
5 | tfb = tfp.bijectors
6 |
7 |
8 | class BayesByBackprop(tf.keras.layers.Layer):
9 | """Bayesian fully-connected layer. The weight posterior distribution is modelled by a fully-factorized
10 | Gaussian.
11 |
12 | "Weight Uncertainty in Neural Networks" - Blundell et al. (2015)
13 | https://arxiv.org/abs/1505.05424
14 | """
15 |
16 | def __init__(
17 | self,
18 | n_out, # output dimensions
19 | prior_var_w=1, # variance of weight prior
20 | prior_var_b=1, # variance of bias prior
21 | max_std=1.0, # limit the standard deviation in the forward pass to avoid local minima (e.g. see Louizos et al.)
22 | log_var_mean_init=-3.0,
23 | log_var_init=1e-3,
24 | **kwargs,
25 | ):
26 | self.n_out = n_out
27 | self.prior_var_w = prior_var_w
28 | self.prior_var_b = prior_var_b
29 | self.max_std = max_std
30 | self.log_var_mean_init = log_var_mean_init
31 | self.log_var_init = log_var_init
32 | super().__init__(**kwargs)
33 |
34 | def build(self, input_shape):
35 | n_in = self.n_in = input_shape[-1]
36 | # initialization according to He et al. (2015)
37 | # log variance initialized with N(-9, 0.001) -> e^-9 = 1e-4
38 | glorot = tf.keras.initializers.GlorotNormal() # Xavier normal initializer
39 | mean_init, var_init = self.log_var_mean_init, self.log_var_init # -9.0, 1e-3
40 |
41 | self.mean_W = tf.Variable(glorot([n_in, self.n_out]))
42 | self.log_var_W = tf.Variable(glorot([n_in, self.n_out]) * var_init + mean_init)
43 |
44 | self.mean_b = tf.Variable(tf.zeros(self.n_out))
45 | self.log_var_b = tf.Variable(glorot([self.n_out]) * var_init + mean_init)
46 |
47 | self.epsilon_w = tf.Variable(tf.random.normal([self.n_out]), trainable=False)
48 | self.reset_noise()
49 |
50 | def reset_noise(self):
51 | # sample new epsilon values
52 | self.epsilon_w.assign(tf.random.normal([self.n_out])) # sample epsilon_w
53 |
54 | @tf.function
55 | def kl_div(self, same_noise=True):
56 | kldiv_weight = 0.5 * tf.reduce_sum((- self.log_var_W + tf.math.exp(self.log_var_W)
57 | + tf.square(self.mean_W) - 1))
58 | kldiv_bias = 0.5 * tf.reduce_sum((- self.log_var_b + tf.math.exp(self.log_var_b)
59 | + tf.square(self.mean_b) - 1))
60 |
61 | kldiv = kldiv_weight + kldiv_bias
62 |
63 | return kldiv
64 |
65 | @tf.function
66 | def call(self, x, same_noise=False, training=True):
67 | batch_size = tf.shape(x)[0]
68 | if training:
69 | mu_out = tf.matmul(x, self.mean_W) + self.mean_b
70 |
71 | var_W = tf.clip_by_value(tf.exp(self.log_var_W), 0, self.max_std ** 2)
72 | var_b = tf.clip_by_value(tf.exp(self.log_var_b), 0, self.max_std ** 2)
73 |
74 | V_h = tf.matmul(tf.square(x), var_W) + var_b
75 |
76 | if same_noise: # use the same epsilon per batch
77 | epsilon_w = tf.expand_dims(self.epsilon_w, axis=0) # expand batch dimension
78 | epsilon_w = tf.repeat(epsilon_w, batch_size, axis=0) # repeat batch dimension
79 | else:
80 | epsilon_w = tf.random.normal(tf.shape(mu_out))
81 |
82 | sigma_out = tf.sqrt(V_h) * epsilon_w
83 |
84 | out = mu_out + sigma_out
85 | else: # evaluation without noise
86 | mu_out = tf.matmul(x, self.mean_W) + self.mean_b
87 | out = mu_out
88 |
89 | return out
90 |
--------------------------------------------------------------------------------
/bayes/ConcreteDropout.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | from tensorflow.keras.layers import InputSpec, Wrapper
4 |
5 |
6 | class ConcreteDropout(Wrapper):
7 | """This wrapper allows to learn the dropout probability for any given input Dense layer.
8 | ```python
9 | # as the first layer in a model
10 | model = Sequential()
11 | model.add(ConcreteDropout(Dense(8), input_shape=(16)))
12 | # now model.output_shape == (None, 8)
13 | # subsequent layers: no need for input_shape
14 | model.add(ConcreteDropout(Dense(32)))
15 | # now model.output_shape == (None, 32)
16 | ```
17 | `ConcreteDropout` can be used with arbitrary layers which have 2D
18 | kernels, not just `Dense`. However, Conv2D layers require different
19 | weighing of the regulariser (use SpatialConcreteDropout instead).
20 | # Arguments
21 | layer: a layer instance.
22 | weight_regularizer:
23 | A positive number which satisfies
24 | $weight_regularizer = l**2 / (\tau * N)$
25 | with prior lengthscale l, model precision $\tau$ (inverse observation noise),
26 | and N the number of instances in the dataset.
27 | Note that kernel_regularizer is not needed.
28 | dropout_regularizer:
29 | A positive number which satisfies
30 | $dropout_regularizer = 2 / (\tau * N)$
31 | with model precision $\tau$ (inverse observation noise) and N the number of
32 | instances in the dataset.
33 | Note the relation between dropout_regularizer and weight_regularizer:
34 | $weight_regularizer / dropout_regularizer = l**2 / 2$
35 | with prior lengthscale l. Note also that the factor of two should be
36 | ignored for cross-entropy loss, and used only for the eculedian loss.
37 | """
38 |
39 | def __init__(self, layer, weight_regularizer=0, dropout_regularizer=1e-5,
40 | init_min=0.1, init_max=0.1, is_mc_dropout=True, **kwargs):
41 | assert 'kernel_regularizer' not in kwargs
42 | super(ConcreteDropout, self).__init__(layer, **kwargs)
43 | self.weight_regularizer = weight_regularizer
44 | self.dropout_regularizer = dropout_regularizer
45 | self.is_mc_dropout = is_mc_dropout
46 | self.supports_masking = True
47 | self.p_logit = None
48 | self.init_min = np.log(init_min) - np.log(1. - init_min)
49 | self.init_max = np.log(init_max) - np.log(1. - init_max)
50 |
51 | def build(self, input_shape=None):
52 | self.input_spec = InputSpec(shape=input_shape)
53 | if not self.layer.built:
54 | self.layer.build(input_shape)
55 | self.layer.built = True
56 | super(ConcreteDropout, self).build()
57 |
58 | # initialise p
59 | self.p_logit = self.add_weight(name='p_logit',
60 | shape=(1,),
61 | initializer=tf.random_uniform_initializer(self.init_min, self.init_max),
62 | dtype=tf.dtypes.float32,
63 | trainable=True)
64 |
65 | def compute_output_shape(self, input_shape):
66 | return self.layer.compute_output_shape(input_shape)
67 |
68 | def concrete_dropout(self, x, p):
69 | """
70 | Concrete dropout - used at training time (gradients can be propagated)
71 | :param x: input
72 | :return: approx. dropped out input
73 | """
74 | eps = 1e-07
75 | temp = 0.1
76 |
77 | unif_noise = tf.random.uniform(shape=tf.shape(x))
78 | drop_prob = (
79 | tf.math.log(p + eps)
80 | - tf.math.log(1. - p + eps)
81 | + tf.math.log(unif_noise + eps)
82 | - tf.math.log(1. - unif_noise + eps)
83 | )
84 | drop_prob = tf.math.sigmoid(drop_prob / temp)
85 | random_tensor = 1. - drop_prob
86 |
87 | retain_prob = 1. - p
88 | x *= random_tensor
89 | x /= retain_prob
90 | return x
91 |
92 | def call(self, inputs, training=True):
93 | p = tf.math.sigmoid(self.p_logit)
94 |
95 | # initialise regulariser / prior KL term
96 | input_dim = inputs.shape[-1] # last dim
97 | weight = self.layer.kernel
98 | kernel_regularizer = self.weight_regularizer * tf.reduce_sum(tf.square(weight)) / (1. - p)
99 | dropout_regularizer = p * tf.math.log(p) + (1. - p) * tf.math.log(1. - p)
100 | dropout_regularizer *= self.dropout_regularizer * input_dim
101 | regularizer = tf.reduce_sum(kernel_regularizer + dropout_regularizer)
102 | if self.is_mc_dropout:
103 | return self.layer.call(self.concrete_dropout(inputs, p)), regularizer
104 | else:
105 | def relaxed_dropped_inputs():
106 | return self.layer.call(self.concrete_dropout(inputs, p)), regularizer
107 |
108 | return tf.keras.backend.in_train_phase(relaxed_dropped_inputs,
109 | self.layer.call(inputs),
110 | training=training), regularizer
111 |
--------------------------------------------------------------------------------
/bayes/MNF.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | import tensorflow_probability as tfp
4 |
5 | from normalizingflows.flow_catalog import Made
6 | from normalizingflows.nf_utils import NormalReparamMNF
7 | from normalizingflows.normalizing_flow import NormalizingFlowModel, NormalizingFlow
8 |
9 |
10 | tfd = tfp.distributions
11 | tfb = tfp.bijectors
12 |
13 |
14 | class DenseMNF(tf.keras.layers.Layer):
15 | """Bayesian fully-connected layer. The weight posterior distribution is modelled by a fully-factorized
16 | Gaussian. The Gaussian means depend on an auxiliary random variable z, which is modelled by a normalizing flow.
17 | This allows for multimodality and nonlinear dependencies between the elements of the weight matrix and improves
18 | significantly upon classical mean field approximation. The flow's base distribution is a normal distribution with
19 | zero mean and unit variance.
20 |
21 | "Multiplicative Normalizing Flows for Variational Bayesian Neural Networks",
22 | Christos Louizos, Max Welling (Jun 2017)
23 | https://arxiv.org/abs/1703.01961
24 | """
25 |
26 | def __init__(
27 | self,
28 | n_out, # output dimensions
29 | n_flows_q=2, # length flow q(z)
30 | n_flows_r=2, # length flow r(z|w)
31 | use_z=True, # use auxiliary random variable z
32 | prior_var_w=1, # variance of weight prior
33 | prior_var_b=1, # variance of bias prior
34 | flow_h_sizes=[32], # hidden size of flow
35 | max_std=1.0, # limit the standard deviation in the forward pass to avoid local minima (e.g. see Louizos et al.)
36 | **kwargs,
37 | ):
38 | self.n_out = n_out
39 | self.prior_var_w = prior_var_w
40 | self.prior_var_b = prior_var_b
41 | self.max_std = max_std
42 | self.n_flows_q = n_flows_q
43 | self.n_flows_r = n_flows_r
44 | self.use_z = use_z
45 | self.flow_h_sizes = flow_h_sizes
46 | super().__init__(**kwargs)
47 |
48 | def build(self, input_shape):
49 | n_in = self.n_in = input_shape[-1]
50 | # initialization according to He et al. (2015)
51 | # log variance initialized with N(-9, 0.001) -> e^-9 = 1e-4
52 | glorot = tf.keras.initializers.GlorotNormal() # Xavier normal initializer
53 | mean_init, var_init = -3.0, 1e-3 # -9.0, 1e-3
54 |
55 | # q(w|z): weights and bias separately
56 | self.mean_W = tf.Variable(glorot([n_in, self.n_out]))
57 | self.log_var_W = tf.Variable(glorot([n_in, self.n_out]) * var_init + mean_init)
58 |
59 | self.mean_b = tf.Variable(tf.zeros(self.n_out))
60 | self.log_var_b = tf.Variable(glorot([self.n_out]) * var_init + mean_init)
61 |
62 | if self.use_z:
63 | # q(z_o): q0_mean has similar function to a dropout rate as it determines the
64 | # mean of the multiplicative noise z_i in eq. (4)
65 | self.qz_base = NormalReparamMNF([n_in], var_init=var_init, mean_init=mean_init)
66 |
67 | if n_in > 1:
68 | permutation = tf.cast(np.concatenate((np.arange(n_in / 2, n_in), np.arange(0, n_in / 2))), tf.int32)
69 |
70 | bijectors_q = []
71 | for _ in range(self.n_flows_q):
72 | bijectors_q.append(tfb.Invert(tfb.MaskedAutoregressiveFlow(
73 | shift_and_log_scale_fn=Made(params=2, hidden_units=self.flow_h_sizes, activation="relu"))))
74 | if n_in > 1:
75 | bijectors_q.append(tfp.bijectors.Permute(permutation))
76 |
77 | self.qz = NormalizingFlowModel(base=self.qz_base, flows=bijectors_q, chain=True, name="qz")
78 |
79 | # r(z|w): c, b1, b2 to compute the mean and std
80 | self.r0_c = tf.Variable(glorot([n_in]))
81 | self.r0_b1 = tf.Variable(glorot([n_in]))
82 | self.r0_b2 = tf.Variable(glorot([n_in]))
83 |
84 | bijectors_r = []
85 | for _ in range(self.n_flows_r):
86 | bijectors_r.append(tfb.MaskedAutoregressiveFlow(
87 | shift_and_log_scale_fn=Made(params=2, hidden_units=self.flow_h_sizes, activation="relu")))
88 | if n_in > 1:
89 | bijectors_r.append(tfp.bijectors.Permute(permutation))
90 |
91 | self.flow_r = NormalizingFlow(flows=bijectors_r, chain=True)
92 |
93 | self.epsilon_w = tf.Variable(tf.random.normal([self.n_out]), trainable=False)
94 | self.reset_noise()
95 |
96 | def reset_noise(self):
97 | # sample new epsilon values
98 | self.epsilon_w.assign(tf.random.normal([self.n_out])) # sample epsilon_w
99 | if self.use_z:
100 | self.qz.base.reset_noise() # sample epsilon_z
101 |
102 | def sample_z(self, batch_size, same_noise=False, training=True):
103 | if self.use_z:
104 | if training:
105 | z_samples, log_prob = self.qz.sample(batch_size, same_noise=same_noise)
106 | else: # evaluation without noise
107 | z_samples, log_prob = self.qz.sample_no_noise(batch_size)
108 |
109 | else:
110 | z_samples = tf.ones([batch_size, self.n_in])
111 | log_prob = tf.zeros(batch_size)
112 |
113 | return z_samples, log_prob
114 |
115 | @tf.function
116 | def kl_div(self, same_noise=False):
117 | z, log_q = self.sample_z(1, same_noise=same_noise)
118 | log_q = tf.reduce_sum(log_q)
119 |
120 | weight_mu = tf.reshape(z, shape=(self.n_in, 1)) * self.mean_W
121 |
122 | kldiv_weight = 0.5 * tf.reduce_sum((- self.log_var_W + tf.math.exp(self.log_var_W)
123 | + tf.square(weight_mu) - 1))
124 | kldiv_bias = 0.5 * tf.reduce_sum((- self.log_var_b + tf.math.exp(self.log_var_b)
125 | + tf.square(self.mean_b) - 1))
126 |
127 | log_r = 0
128 | if self.use_z:
129 | cw_mu = tf.linalg.matvec(tf.transpose(weight_mu), self.r0_c)
130 | if same_noise:
131 | epsilon_w = self.epsilon_w
132 | else:
133 | epsilon_w = tf.random.normal([self.n_out])
134 |
135 | cw_var = tf.linalg.matvec(tf.transpose(tf.math.exp(self.log_var_W)), tf.square(self.r0_c))
136 | cw = tf.math.tanh(cw_mu + tf.math.sqrt(cw_var) * epsilon_w) # sample W
137 |
138 | mu_tilde = tf.reduce_mean(tf.tensordot(cw, self.r0_b1, axes=0), axis=0)
139 | neg_log_var_tilde = tf.reduce_mean(tf.tensordot(cw, self.r0_b2, axes=0), axis=0)
140 |
141 | z0, log_r = self.flow_r.inverse(z)
142 | log_r = tf.reduce_sum(log_r)
143 |
144 | dims = float(z0.shape[-1])
145 | exponent = tf.squeeze(tf.reduce_sum(tf.square(z0 - mu_tilde) * tf.math.exp(neg_log_var_tilde), axis=1))
146 | neg_log_det_var = tf.reduce_sum(neg_log_var_tilde)
147 | log_r += 0.5 * (-dims * tf.math.log(2 * np.pi) + neg_log_det_var - exponent)
148 |
149 | kldiv = kldiv_weight + kldiv_bias + log_q - log_r
150 |
151 | return kldiv
152 |
153 | @tf.function
154 | def call(self, x, same_noise=False, training=True):
155 | batch_size = tf.shape(x)[0]
156 | if training:
157 | z, _ = self.sample_z(batch_size, same_noise=same_noise)
158 | mu_out = tf.matmul(x * z, self.mean_W) + self.mean_b
159 |
160 | var_W = tf.clip_by_value(tf.exp(self.log_var_W), 0, self.max_std ** 2)
161 | var_b = tf.clip_by_value(tf.exp(self.log_var_b), 0, self.max_std ** 2)
162 | # var_W = tf.square(std_W)
163 | V_h = tf.matmul(tf.square(x), var_W) + var_b
164 |
165 | if same_noise: # use the same epsilon per batch
166 | epsilon_w = tf.expand_dims(self.epsilon_w, axis=0) # expand batch dimension
167 | epsilon_w = tf.repeat(epsilon_w, batch_size, axis=0) # repeat batch dimension
168 | else:
169 | epsilon_w = tf.random.normal(tf.shape(mu_out)) # TODO: test implementation
170 |
171 | sigma_out = tf.sqrt(V_h) * epsilon_w
172 |
173 | out = mu_out + sigma_out
174 | else: # evaluation without noise
175 | z, _ = self.sample_z(batch_size, training=training)
176 | mu_out = tf.matmul(x * z, self.mean_W) + self.mean_b
177 | out = mu_out
178 |
179 | return out
180 |
--------------------------------------------------------------------------------
/bayes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/bayes/__init__.py
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/data/__init__.py
--------------------------------------------------------------------------------
/data/toy_regression.py:
--------------------------------------------------------------------------------
1 | """
2 | Toy regression problem.
3 |
4 | Based on the toy regression task introduced in:
5 | Hernández-Lobato et al. 2015 -
6 | Probabilistic backpropagation for scalable learning of bayesian neural networks.
7 | """
8 |
9 | import numpy as np
10 |
11 |
12 | class ToyRegressionData():
13 | """
14 | Generates toy data for a regression task.
15 | """
16 | def __init__(self):
17 | self.x_lim = [-4, 4]
18 | self.sigma = 3
19 | self.eps_loc = 0.0
20 | self.eps_scale = 1.0
21 |
22 | def gen_data(self, n_samples):
23 | x = np.random.uniform(self.x_lim[0], self.x_lim[1], size=(n_samples, 1)).astype('float32')
24 | epsilon = np.random.normal(self.eps_loc, self.eps_scale, size=x.shape).astype('float32')
25 | y = np.power(x, 3) + self.sigma * epsilon
26 |
27 | return x, y
28 |
29 | def eval_data(self, x):
30 | return np.power(x, 3)
31 |
--------------------------------------------------------------------------------
/data/train_data_ian_regression.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/data/train_data_ian_regression.npz
--------------------------------------------------------------------------------
/data/train_data_regression.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/data/train_data_regression.npz
--------------------------------------------------------------------------------
/dqn/Bayes_by_Backprop_DQN.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | from bayes.Bayes_by_Backprop import BayesByBackprop
5 |
6 | tfkl = tf.keras.layers
7 |
8 |
9 | class BBB_Backbone(tf.keras.Model):
10 | """
11 | Backbone of the Deep Q-Network (DQN) with Bayes by Backprop - Blundell et al. (2015).
12 |
13 | Takes 'num_states' inputs and outputs one Q-value for each action.
14 | """
15 | def __init__(self, num_states, hidden_units, num_actions, max_std=1.0, log_var_mean_init=-3.0, log_var_init=1e-3):
16 | super(BBB_Backbone, self).__init__()
17 | self.input_layer = tfkl.InputLayer(input_shape=(num_states,))
18 |
19 | self.hidden_layers = []
20 | for i in hidden_units:
21 | self.hidden_layers.append(tfkl.Dense(i, activation='relu', kernel_initializer='RandomNormal'))
22 | self.dense_bbb_out = BayesByBackprop(n_out=num_actions, max_std=max_std, log_var_mean_init=log_var_mean_init,
23 | log_var_init=log_var_init)
24 |
25 | @tf.function
26 | def call(self, inputs, same_noise=False, training=True):
27 | out = self.input_layer(inputs)
28 | for layer in self.hidden_layers:
29 | out = layer(out)
30 | out = self.dense_bbb_out(out, same_noise=same_noise, training=training)
31 | return out
32 |
33 | def kl_div(self, same_noise=True):
34 | """
35 | Compute current KL divergence of the Bayes by Backprop layers.
36 | Used as a regularization term during training.
37 | """
38 | kldiv = self.dense_bbb_out.kl_div(same_noise)
39 | return kldiv
40 |
41 | def reset_noise(self):
42 | """
43 | Re-sample noise/epsilon parameters of the Bayes by Backprop layers. Required for the case of having the same
44 | epsilon parameters across one batch.
45 | """
46 | self.dense_bbb_out.reset_noise()
47 |
48 | def print_variance(self):
49 | print(f"Variance layer 1: {self.hidden_layers[0].log_var_W}")
50 |
51 |
52 | class BBBDQN(tf.Module):
53 | """
54 | Deep Q-Network utilizing Bayes by Backprop for efficient sampling.
55 | """
56 | def __init__(self, num_states, num_actions, hidden_units, gamma, max_experiences, min_experiences, batch_size, lr,
57 | alpha):
58 | super(BBBDQN, self).__init__()
59 | self.num_actions = num_actions
60 | self.batch_size = batch_size
61 | self.optimizer = tf.keras.optimizers.Adam(lr)
62 | self.gamma = gamma
63 | self.kl_coeff = alpha*batch_size / max_experiences
64 | self.model = BBB_Backbone(num_states, hidden_units, num_actions, max_std=0.5, log_var_mean_init=-3.0,
65 | log_var_init=1e-3)
66 | self.experience = {'s': [], 'a': [], 'r': [], 's_next': [], 'end': []}
67 | self.max_experiences = max_experiences
68 | self.min_experiences = min_experiences
69 |
70 | def predict(self, inputs, same_noise=False, training=True):
71 | """
72 | Get Q-values from backbone network.
73 | :param inputs: inputs for the backbone network, e.g. states.
74 | :param same_noise: uses the same epsilon parameter for one mini-batch, if set to `True`.
75 | :param training: forward pass without stochasticity, if set to `False`.
76 | :return: outputs of the backbone network, e.g. num_action Q-values.
77 | """
78 | return self.model(tf.convert_to_tensor(inputs, tf.float32), same_noise=same_noise, training=training)
79 |
80 | def train(self, target_net):
81 | """
82 | Train with experience replay, e.g. replay using a randomized order removing correlation in observation sequence
83 | to deal with biased sampling
84 | :param target_net: target network.
85 | """
86 | if len(self.experience['s']) < self.min_experiences:
87 | return 0, 0
88 |
89 | experience_replay_enabled = True # set False to disable experience replay
90 | if experience_replay_enabled:
91 | # sample random minibatch of transitions
92 | ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
93 | else:
94 | n = len(self.experience['s'])
95 | if n < self.batch_size:
96 | ids = np.full(self.batch_size, n-1)
97 | else:
98 | ids = np.arange(max(0, n - self.batch_size), (n - 1), 1)
99 |
100 | states = tf.convert_to_tensor([self.experience['s'][i] for i in ids], tf.float32)
101 | actions = tf.convert_to_tensor([self.experience['a'][i] for i in ids], tf.float32)
102 | rewards = tf.convert_to_tensor([self.experience['r'][i] for i in ids], tf.float32)
103 | states_next = tf.convert_to_tensor([self.experience['s_next'][i] for i in ids], tf.float32)
104 | ends = tf.convert_to_tensor([self.experience['end'][i] for i in ids], tf.bool)
105 |
106 | # compute loss and perform gradient descent
107 | loss, kl_loss = self.gradient_update(target_net, states, actions, rewards, states_next, ends)
108 |
109 | return loss, kl_loss
110 |
111 | @tf.function
112 | def gradient_update(self, target_net, states, actions, rewards, states_next, ends):
113 | """
114 | Gradient update with @tf.function decorator for faster performance.
115 | """
116 | # make predictions with target network without stochasticity and get sample q for Q-function update
117 | # sample is different if epoch ends
118 | double_dqn = True
119 | if double_dqn:
120 | next_action = tf.math.argmax(self.predict(states_next, training=False), axis=1)
121 | q_values = target_net.predict(states_next, training=False)
122 | q_max = tf.math.reduce_sum(q_values * tf.one_hot(next_action, self.num_actions), axis=1)
123 | else:
124 | q_max = tf.math.reduce_max(target_net.predict(states_next, training=False), axis=1)
125 |
126 | y = tf.where(ends, rewards, rewards + self.gamma * q_max)
127 |
128 | self.model.reset_noise() # sample new epsilon_w and epsilon_z
129 |
130 | # perform gradient descent
131 | with tf.GradientTape() as tape:
132 | tape.watch(self.model.trainable_variables)
133 |
134 | kl_loss = self.kl_coeff * self.model.kl_div(same_noise=True)
135 | # Q-values from training network for selected actions
136 | q_values = self.predict(states, same_noise=True)
137 | selected_q_values = tf.math.reduce_sum(q_values * tf.one_hot(tf.cast(actions, tf.int32), self.num_actions),
138 | axis=1)
139 |
140 | td_error = tf.math.reduce_sum(tf.square(y - selected_q_values))
141 | loss = td_error + kl_loss
142 |
143 | gradients = tape.gradient(loss, self.model.trainable_variables)
144 | self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
145 |
146 | self.model.reset_noise() # sample new epsilon_w and epsilon_z
147 |
148 | return loss, kl_loss
149 |
150 | def get_action(self, states, same_noise=False, training=True):
151 | """
152 | Predict action with the Bayes By Backprop network. In each forward pass the weights are sampled from the weight
153 | posterior distribution. Hence, approximated Thompson sampling is performed. For uncertain weight posterior
154 | distributions the variance in the sampled values will be higher, leading inherently to more exploration.
155 |
156 | :param states: observed states, e.g. [x, dx, th, dth].
157 | :return: action
158 | """
159 | q_values = self.predict(np.atleast_2d(states), same_noise=same_noise, training=training)
160 | action = np.argmax(q_values)
161 |
162 | return action
163 |
164 | def add_experience(self, exp):
165 | """
166 | Add experience to experience history. If 'max_experiences' exceeded, remove first item and append current
167 | experience.
168 | :param exp: experience {'s': prev_observations, 'a': action, 'r': reward, 's_next': observations, 'end': end}.
169 | """
170 | if len(self.experience['s']) >= self.max_experiences:
171 | for key in self.experience.keys():
172 | self.experience[key].pop(0)
173 |
174 | for key, value in exp.items():
175 | self.experience[key].append(value)
176 |
177 | def copy_weights(self, train_net):
178 | """
179 | Copy weights from train network to target network.
180 | :param train_net: model of train network.
181 | """
182 | variables_target = self.model.trainable_variables
183 | variables_train = train_net.model.trainable_variables
184 |
185 | for v_target, v_train in zip(variables_target, variables_train):
186 | v_target.assign(v_train.numpy())
187 |
--------------------------------------------------------------------------------
/dqn/Concrete_Dropout_DQN.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | from tensorflow.keras.layers import Dense, Input
5 | from tensorflow.keras import Model
6 | from bayes.ConcreteDropout import ConcreteDropout
7 |
8 |
9 | def make_backbone(num_states, hidden_units, num_actions, dropout_reg=1e-5, wd=1e-3):
10 | """
11 | Build a tensorflow keras backbone model utilizing concrete dropout layers.
12 | """
13 | losses: list = []
14 | inp = Input(shape=(num_states,))
15 | x = inp
16 |
17 | for i in hidden_units:
18 | x, loss = ConcreteDropout(Dense(i, activation='relu'),
19 | weight_regularizer=wd, dropout_regularizer=dropout_reg)(x)
20 | losses.append(loss)
21 |
22 | x = Dense(100, activation='relu')(x)
23 | out = Dense(num_actions, activation='linear')(x)
24 | model = Model(inp, out)
25 | model.add_loss(losses)
26 |
27 | return model
28 |
29 |
30 | class DQN(tf.Module):
31 | """
32 | Deep Q-Network.
33 | """
34 | def __init__(self, num_states, num_actions, hidden_units, gamma, max_experiences, min_experiences, batch_size, lr):
35 | super(DQN, self).__init__()
36 | self.num_actions = num_actions
37 | self.batch_size = batch_size
38 | self.optimizer = tf.optimizers.SGD(lr)
39 | self.gamma = gamma
40 | self.model = make_backbone(num_states, hidden_units, num_actions)
41 | self.experience = {'s': [], 'a': [], 'r': [], 's_next': [], 'end': []}
42 | self.max_experiences = max_experiences
43 | self.min_experiences = min_experiences
44 | self.states_uncertainty = {}
45 |
46 | def predict(self, inputs, training=True):
47 | """
48 | Get Q-values from backbone network.
49 | :param inputs: inputs for the backbone network, e.g. states.
50 | :param training: forward pass without stochasticity, if set to `False`.
51 | :return: outputs of the backbone network, e.g. num_action Q-values.
52 | """
53 | return self.model(tf.convert_to_tensor(inputs, tf.float32), training=training)
54 |
55 | def train(self, target_net):
56 | """
57 | Train with experience replay, e.g. replay using a randomized order removing correlation in observation sequence
58 | to deal with biased sampling
59 | :param target_net: target network.
60 | """
61 | if len(self.experience['s']) < self.min_experiences:
62 | return 0, 0
63 |
64 | experience_replay_enabled = True # set False to disable experience replay
65 | if experience_replay_enabled:
66 | # sample random minibatch of transitions
67 | ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
68 | else:
69 | n = len(self.experience['s'])
70 | if n < self.batch_size:
71 | ids = np.full(self.batch_size, n-1)
72 | else:
73 | ids = np.arange(max(0, n - self.batch_size), (n - 1), 1)
74 |
75 | states = tf.convert_to_tensor([self.experience['s'][i] for i in ids], tf.float32)
76 | actions = tf.convert_to_tensor([self.experience['a'][i] for i in ids], tf.float32)
77 | rewards = tf.convert_to_tensor([self.experience['r'][i] for i in ids], tf.float32)
78 | states_next = tf.convert_to_tensor([self.experience['s_next'][i] for i in ids], tf.float32)
79 | ends = tf.convert_to_tensor([self.experience['end'][i] for i in ids], tf.bool)
80 |
81 | # compute loss and perform gradient descent
82 | loss, reg_loss = self.gradient_update(target_net, states, actions, rewards, states_next, ends)
83 |
84 | return loss, reg_loss
85 |
86 | @tf.function
87 | def gradient_update(self, target_net, states, actions, rewards, states_next, ends):
88 | """
89 | Gradient update with @tf.function decorator for faster performance.
90 | """
91 | # make predictions with target network and get sample q for Q-function update, sample is different if epoch end
92 | double_dqn = True
93 | if double_dqn:
94 | next_action = tf.math.argmax(self.predict(states_next), axis=1)
95 | q_values = target_net.predict(states_next)
96 | q_max = tf.math.reduce_sum(q_values * tf.one_hot(next_action, self.num_actions), axis=1)
97 | else:
98 | q_max = tf.math.reduce_max(target_net.predict(states_next), axis=1)
99 |
100 | y = tf.where(ends, rewards, rewards + self.gamma * q_max)
101 |
102 | # perform gradient descent
103 | with tf.GradientTape() as tape:
104 | tape.watch(self.model.trainable_variables)
105 |
106 | # Q-values from training network for selected actions
107 | q_values = self.predict(states)
108 | selected_q_values = tf.math.reduce_sum(q_values * tf.one_hot(tf.cast(actions, tf.int32), self.num_actions), axis=1)
109 |
110 | regularization_loss = tf.reduce_sum(self.model.losses)
111 | loss_pred = tf.math.reduce_sum(tf.square(y - selected_q_values)) # compute loss
112 | loss = loss_pred + regularization_loss
113 |
114 | gradients = tape.gradient(loss, self.model.trainable_variables)
115 | self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
116 |
117 | return loss, regularization_loss
118 |
119 | def get_action(self, states, training=True):
120 | """
121 | Predict action with the Concrete Dropout network. Keeping Concrete Dropout enabled in the forward pass forms a
122 | Bayesian approximation. Hence, approximated Thompson sampling is performed.
123 |
124 | :param states: observed states, e.g. [x, dx, th, dth].
125 | :param training: forward pass without stochasticity, if set to `False`.
126 | :return: action
127 | """
128 | q_values = self.predict(np.atleast_2d(states), training)
129 | action = np.argmax(q_values)
130 |
131 | return action
132 |
133 | def add_experience(self, exp):
134 | """
135 | Add experience to experience history. If 'max_experiences' exceeded, remove first item and append current
136 | experience.
137 | :param exp: experience {'s': prev_observations, 'a': action, 'r': reward, 's_next': observations, 'end': end}.
138 | """
139 | if len(self.experience['s']) >= self.max_experiences:
140 | for key in self.experience.keys():
141 | self.experience[key].pop(0)
142 |
143 | for key, value in exp.items():
144 | self.experience[key].append(value)
145 |
146 | def copy_weights(self, train_net):
147 | """
148 | Copy weights from train network to target network.
149 | :param train_net: model of train network.
150 | """
151 | variables_target = self.model.trainable_variables
152 | variables_train = train_net.model.trainable_variables
153 |
154 | for v_target, v_train in zip(variables_target, variables_train):
155 | v_target.assign(v_train.numpy())
156 |
--------------------------------------------------------------------------------
/dqn/DQN.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | tfkl = tf.keras.layers
5 |
6 |
7 | class Backbone(tf.keras.Model):
8 | """
9 | Backbone of the Deep Q-Network (DQN) that approximates the Q-function.
10 | Takes 'num_states' inputs and outputs one Q-value for each action.
11 | """
12 | def __init__(self, num_states, hidden_units, num_actions):
13 | super(Backbone, self).__init__()
14 | self.input_layer = tf.keras.layers.InputLayer(input_shape=(num_states,))
15 |
16 | self.hidden_layers = []
17 | for i in hidden_units:
18 | self.hidden_layers.append(tf.keras.layers.Dense(
19 | i, activation='relu', kernel_initializer='RandomNormal'))
20 |
21 | self.output_layer = tf.keras.layers.Dense(
22 | num_actions, activation='linear', kernel_initializer='RandomNormal')
23 |
24 | @tf.function
25 | def call(self, inputs):
26 | z = self.input_layer(inputs)
27 | for layer in self.hidden_layers:
28 | z = layer(z)
29 | output = self.output_layer(z)
30 | return output
31 |
32 |
33 | class DQN(tf.Module):
34 | """
35 | Deep Q-Network.
36 | """
37 | def __init__(self, num_states, num_actions, hidden_units, gamma, max_experiences, min_experiences, batch_size, lr):
38 | super(DQN, self).__init__()
39 | self.num_actions = num_actions
40 | self.batch_size = batch_size
41 | self.optimizer = tf.keras.optimizers.Adam(lr)
42 | self.gamma = gamma
43 | self.model = Backbone(num_states, hidden_units, num_actions)
44 | self.experience = {'s': [], 'a': [], 'r': [], 's_next': [], 'end': []}
45 | self.max_experiences = max_experiences
46 | self.min_experiences = min_experiences
47 |
48 | def predict(self, inputs):
49 | """
50 | Get Q-values from backbone network.
51 | :param inputs: inputs for the backbone network, e.g. states.
52 | :return: outputs of the backbone network, e.g. num_action Q-values.
53 | """
54 | return self.model(tf.convert_to_tensor(inputs, tf.float32))
55 |
56 | def train(self, target_net):
57 | """
58 | Train with experience replay, e.g. replay using a randomized order removing correlation in observation sequence
59 | to deal with biased sampling
60 | :param target_net: target network.
61 | """
62 | if len(self.experience['s']) < self.min_experiences:
63 | return 0
64 |
65 | experience_replay_enabled = True # set False to disable experience replay
66 | if experience_replay_enabled:
67 | # sample random minibatch of transitions
68 | ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
69 | else:
70 | n = len(self.experience['s'])
71 | if n < self.batch_size:
72 | ids = np.full(self.batch_size, n-1)
73 | else:
74 | ids = np.arange(max(0, n - self.batch_size), (n - 1), 1)
75 |
76 | states = tf.convert_to_tensor([self.experience['s'][i] for i in ids], tf.float32)
77 | actions = tf.convert_to_tensor([self.experience['a'][i] for i in ids], tf.float32)
78 | rewards = tf.convert_to_tensor([self.experience['r'][i] for i in ids], tf.float32)
79 | states_next = tf.convert_to_tensor([self.experience['s_next'][i] for i in ids], tf.float32)
80 | ends = tf.convert_to_tensor([self.experience['end'][i] for i in ids], tf.bool)
81 |
82 | # compute loss and perform gradient descent
83 | loss = self.gradient_update(target_net, states, actions, rewards, states_next, ends)
84 |
85 | return loss
86 |
87 | @tf.function
88 | def gradient_update(self, target_net, states, actions, rewards, states_next, ends):
89 | """
90 | Gradient update with @tf.function decorator for faster performance.
91 | """
92 | # make predictions with target network and get sample q for Q-function update, sample is different if epoch ends
93 | target_network_enabled = True # set False to disable target network
94 | double_dqn = True
95 | if target_network_enabled:
96 | if double_dqn:
97 | next_action = tf.math.argmax(self.predict(states_next), axis=1)
98 | q_values = target_net.predict(states_next)
99 | q_max = tf.math.reduce_sum(q_values * tf.one_hot(next_action, self.num_actions), axis=1)
100 | else:
101 | q_max = tf.math.reduce_max(target_net.predict(states_next), axis=1)
102 | else:
103 | q_max = tf.math.reduce_max(self.predict(states_next), axis=1)
104 | y = tf.where(ends, rewards, rewards + self.gamma * q_max)
105 |
106 | # perform gradient descent
107 | with tf.GradientTape() as tape:
108 | tape.watch(self.model.trainable_variables)
109 |
110 | # Q-values from training network for selected actions
111 | q_values = self.predict(states)
112 | selected_q_values = tf.math.reduce_sum(q_values * tf.one_hot(tf.cast(actions, tf.int32), self.num_actions), axis=1)
113 |
114 | loss = tf.math.reduce_sum(tf.square(y - selected_q_values)) # compute loss
115 |
116 | gradients = tape.gradient(loss, self.model.trainable_variables)
117 | self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
118 |
119 | return loss
120 |
121 | def get_action(self, states, epsilon=0):
122 | """
123 | Choose random action with probability 'epsilon', otherwise choose action with greedy policy, e.g. action that
124 | maximizes the Q-value function.
125 | :param states: observed states, e.g. [x, dx, th, dth].
126 | :param epsilon: probability of random action.
127 | :return: action
128 | """
129 | # take random action with probability 'epsilon'
130 | if np.random.random() < epsilon:
131 | action = np.random.choice(self.num_actions)
132 | return action
133 |
134 | # else take action that maximizes the Q-function
135 | else:
136 | q_values = self.predict(np.atleast_2d(states))
137 | action = np.argmax(q_values)
138 | return action
139 |
140 | def add_experience(self, exp):
141 | """
142 | Add experience to experience history. If 'max_experiences' exceeded, remove first item and append current
143 | experience.
144 | :param exp: experience {'s': prev_observations, 'a': action, 'r': reward, 's_next': observations, 'end': end}.
145 | """
146 | if len(self.experience['s']) >= self.max_experiences:
147 | for key in self.experience.keys():
148 | self.experience[key].pop(0)
149 |
150 | for key, value in exp.items():
151 | self.experience[key].append(value)
152 |
153 | def copy_weights(self, train_net):
154 | """
155 | Copy weights from train network to target network.
156 | :param train_net: model of train network.
157 | """
158 | variables_target = self.model.trainable_variables
159 | variables_train = train_net.model.trainable_variables
160 |
161 | for v_target, v_train in zip(variables_target, variables_train):
162 | v_target.assign(v_train.numpy())
163 |
--------------------------------------------------------------------------------
/dqn/MC_Dropout_DQN.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | from tensorflow.keras.layers import InputLayer
4 |
5 | tfkl = tf.keras.layers
6 |
7 |
8 | class Backbone(tf.keras.Model):
9 | """
10 | Backbone of the Deep Q-Network (DQN) with Bayesian fully-connected layers that approximates the Q-function.
11 | The Bayesian fully-connected layers utilize Dropout as Bayesian approximation according to
12 | "Dropout as a Bayesian Approximation: Representing Model Uncertainty in Deep Learning"
13 | - Gal and Ghahramani (2015): https://arxiv.org/abs/1506.02142.
14 |
15 | Takes 'num_states' inputs and outputs one Q-value for each action.
16 | """
17 | def __init__(self, num_states, hidden_units, dropout_rate, num_actions, N):
18 | super(Backbone, self).__init__()
19 |
20 | self.N = N # data points
21 | lengthscale = 1e-2
22 | tau = 1.0
23 | reg = lengthscale**2 * (1 - dropout_rate) / (2.0 * self.N * tau)
24 |
25 | self.hidden_layers = []
26 | self.input_layer = InputLayer(input_shape=(num_states,))
27 | for i in hidden_units:
28 | self.hidden_layers.append(tfkl.Dense(i, activation='relu', kernel_initializer='RandomNormal',
29 | kernel_regularizer=tf.keras.regularizers.L1L2(l2=reg)))
30 |
31 | self.hidden_layers.append(tfkl.Dropout(dropout_rate)) # only one dropout layer before the output
32 |
33 | self.output_layer = tfkl.Dense(num_actions, activation='linear', kernel_initializer='RandomNormal')
34 |
35 | @tf.function
36 | def call(self, inputs):
37 | out = self.input_layer(inputs)
38 |
39 | for layer in self.hidden_layers:
40 | if isinstance(layer, tfkl.Dropout):
41 | out = layer(out, training=True)
42 | else:
43 | out = layer(out)
44 | out = self.output_layer(out)
45 | return out
46 |
47 |
48 | class DQN(tf.Module):
49 | """
50 | Deep Q-Network utilizing Dropout as Bayesian approximation for efficient sampling.
51 | """
52 | def __init__(self, num_states, num_actions, hidden_units, gamma, max_experiences, min_experiences, batch_size, lr, dropout_rate):
53 | super(DQN, self).__init__()
54 | self.num_actions = num_actions
55 | self.batch_size = batch_size
56 | self.optimizer = tf.keras.optimizers.Adam(lr)
57 | self.gamma = gamma
58 | self.model = Backbone(num_states, hidden_units, dropout_rate, num_actions, max_experiences)
59 | self.experience = {'s': [], 'a': [], 'r': [], 's_next': [], 'end': []}
60 | self.max_experiences = max_experiences
61 | self.min_experiences = min_experiences
62 | self.states_uncertainty = {}
63 |
64 | def predict(self, inputs, training=True):
65 | """
66 | Get Q-values from backbone network.
67 | :param inputs: inputs for the backbone network, e.g. states.
68 | :param training: forward pass without stochasticity, if set to `False`.
69 | :return: outputs of the backbone network, e.g. num_action Q-values.
70 | """
71 | return self.model(tf.convert_to_tensor(inputs, tf.float32), training=training)
72 |
73 | def train(self, target_net):
74 | """
75 | Train with experience replay, e.g. replay using a randomized order removing correlation in observation sequence
76 | to deal with biased sampling
77 | :param target_net: target network.
78 | """
79 | if len(self.experience['s']) < self.min_experiences:
80 | return 0, 0
81 |
82 | experience_replay_enabled = True # set False to disable experience replay
83 | if experience_replay_enabled:
84 | # sample random minibatch of transitions
85 | ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
86 | else:
87 | n = len(self.experience['s'])
88 | if n < self.batch_size:
89 | ids = np.full(self.batch_size, n-1)
90 | else:
91 | ids = np.arange(max(0, n - self.batch_size), (n - 1), 1)
92 |
93 | states = tf.convert_to_tensor([self.experience['s'][i] for i in ids], tf.float32)
94 | actions = tf.convert_to_tensor([self.experience['a'][i] for i in ids], tf.float32)
95 | rewards = tf.convert_to_tensor([self.experience['r'][i] for i in ids], tf.float32)
96 | states_next = tf.convert_to_tensor([self.experience['s_next'][i] for i in ids], tf.float32)
97 | ends = tf.convert_to_tensor([self.experience['end'][i] for i in ids], tf.bool)
98 |
99 | # compute loss and perform gradient descent
100 | loss, reg_loss = self.gradient_update(target_net, states, actions, rewards, states_next, ends)
101 |
102 | return loss, reg_loss
103 |
104 | @tf.function
105 | def gradient_update(self, target_net, states, actions, rewards, states_next, ends):
106 | """
107 | Gradient update with @tf.function decorator for faster performance.
108 | """
109 | # make predictions with target network and get sample q for Q-function update, sample is different if epoch end
110 | double_dqn = True
111 | if double_dqn:
112 | next_action = tf.math.argmax(self.predict(states_next), axis=1)
113 | q_values = target_net.predict(states_next)
114 | q_max = tf.math.reduce_sum(q_values * tf.one_hot(next_action, self.num_actions), axis=1)
115 | else:
116 | q_max = tf.math.reduce_max(target_net.predict(states_next), axis=1)
117 |
118 | y = tf.where(ends, rewards, rewards + self.gamma * q_max)
119 |
120 | # perform gradient descent
121 | with tf.GradientTape() as tape:
122 | tape.watch(self.model.trainable_variables)
123 |
124 | # Q-values from training network for selected actions
125 | q_values = self.predict(states)
126 | selected_q_values = tf.math.reduce_sum(q_values * tf.one_hot(tf.cast(actions, tf.int32), self.num_actions), axis=1)
127 |
128 | regularization_loss = tf.reduce_sum(self.model.losses)
129 | loss_pred = tf.math.reduce_sum(tf.square(y - selected_q_values)) # compute loss
130 | loss = loss_pred + regularization_loss
131 |
132 | gradients = tape.gradient(loss, self.model.trainable_variables)
133 | self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
134 |
135 | return loss, regularization_loss
136 |
137 | def get_action(self, states, training=True):
138 | """
139 | Predict action with the MC Dropout network. Keeping MC Dropout enabled in the forward pass forms a Bayesian
140 | approximation. Hence, approximated Thompson sampling is performed.
141 |
142 | :param states: observed states, e.g. [x, dx, th, dth].
143 | :param training: forward pass without stochasticity, if set to `False`.
144 | :return: action
145 | """
146 | q_values = self.predict(np.atleast_2d(states), training)
147 | action = np.argmax(q_values)
148 | return action
149 |
150 | def add_experience(self, exp):
151 | """
152 | Add experience to experience history. If 'max_experiences' exceeded, remove first item and append current
153 | experience.
154 | :param exp: experience {'s': prev_observations, 'a': action, 'r': reward, 's_next': observations, 'end': end}.
155 | """
156 | if len(self.experience['s']) >= self.max_experiences:
157 | for key in self.experience.keys():
158 | self.experience[key].pop(0)
159 |
160 | for key, value in exp.items():
161 | self.experience[key].append(value)
162 |
163 | def copy_weights(self, train_net):
164 | """
165 | Copy weights from train network to target network.
166 | :param train_net: model of train network.
167 | """
168 | variables_target = self.model.trainable_variables
169 | variables_train = train_net.model.trainable_variables
170 |
171 | for v_target, v_train in zip(variables_target, variables_train):
172 | v_target.assign(v_train.numpy())
173 |
--------------------------------------------------------------------------------
/dqn/MNF_DQN.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | from bayes.MNF import DenseMNF
5 |
6 | tfkl = tf.keras.layers
7 |
8 |
9 | class MNFBackbone(tf.Module):
10 | """
11 | Backbone of the Deep Q-Network (DQN) with Bayesian fully-connected layers that approximates the Q-function.
12 | The Bayesian fully-connected layers utilize multiplicative normalizing flows by Christos Louizos, Max Welling
13 | (Jun 2017).
14 |
15 | Takes 'num_states' inputs and outputs one Q-value for each action.
16 | """
17 | def __init__(self, num_states, hidden_units, num_actions, use_z=True, max_std=1.0):
18 | super(MNFBackbone, self).__init__()
19 | self.input_layer = tfkl.InputLayer(input_shape=(num_states,))
20 |
21 | self.hidden_layers = []
22 | for i in hidden_units:
23 | self.hidden_layers.append(tfkl.Dense(i, activation='relu', kernel_initializer='RandomNormal'))
24 | self.dense_mnf_out = DenseMNF(n_out=num_actions, use_z=use_z, max_std=max_std, n_flows_q=2, n_flows_r=2,
25 | flow_h_sizes=[32])
26 |
27 | @tf.function
28 | def __call__(self, inputs, same_noise=False, training=True):
29 | out = self.input_layer(inputs)
30 | for layer in self.hidden_layers:
31 | out = layer(out)
32 | out = self.dense_mnf_out(out, same_noise=same_noise, training=training)
33 | return out
34 |
35 | def kl_div(self, same_noise=True):
36 | """
37 | Compute current KL-divergence of all Bayesian layers.
38 | Can be used as a regularization term during training.
39 | """
40 | kldiv = self.dense_mnf_out.kl_div(same_noise)
41 | return kldiv
42 |
43 | def reset_noise(self):
44 | """
45 | Re-sample noise/epsilon parameters of the MNF layers. Required for the case of having the same epsilon
46 | parameters across one batch.
47 | """
48 | self.dense_mnf_out.reset_noise()
49 |
50 | def print_variance(self):
51 | print(f"Variance layer 1: {self.hidden_layers[0].log_var_W}")
52 |
53 |
54 | class MNFDQN(tf.Module):
55 | """
56 | Deep Q-Network utilizing Multiplicative Normalizing Flows for efficient sampling.
57 | """
58 | def __init__(self, num_states, num_actions, hidden_units, gamma, max_experiences, min_experiences, batch_size, lr,
59 | alpha):
60 | super(MNFDQN, self).__init__()
61 | self.num_actions = num_actions
62 | self.batch_size = batch_size
63 | self.optimizer = tf.keras.optimizers.Adam(lr)
64 | self.gamma = gamma
65 | self.kl_coeff = alpha*batch_size / max_experiences
66 | self.model = MNFBackbone(num_states, hidden_units, num_actions, use_z=True, max_std=0.5)
67 | self.experience = {'s': [], 'a': [], 'r': [], 's_next': [], 'end': []}
68 | self.max_experiences = max_experiences
69 | self.min_experiences = min_experiences
70 |
71 | def predict(self, inputs, same_noise=False, training=True):
72 | """
73 | Get Q-values from backbone network.
74 | :param inputs: inputs for the backbone network, e.g. states.
75 | :param same_noise: uses the same epsilon parameter, if set to `True`.
76 | :param training: forward pass without stochasticity, if set to `False`.
77 | :return: outputs of the backbone network, e.g. num_action Q-values.
78 | """
79 | return self.model(tf.convert_to_tensor(inputs, tf.float32), same_noise=same_noise, training=training)
80 |
81 | def train(self, target_net):
82 | """
83 | Train with experience replay, e.g. replay using a randomized order removing correlation in observation sequence
84 | to deal with biased sampling
85 | :param target_net: target network.
86 | """
87 | if len(self.experience['s']) < self.min_experiences:
88 | return 0, 0
89 |
90 | experience_replay_enabled = True # set False to disable experience replay
91 | if experience_replay_enabled:
92 | # sample random minibatch of transitions
93 | ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
94 | else:
95 | n = len(self.experience['s'])
96 | if n < self.batch_size:
97 | ids = np.full(self.batch_size, n-1)
98 | else:
99 | ids = np.arange(max(0, n - self.batch_size), (n - 1), 1)
100 |
101 | states = tf.convert_to_tensor([self.experience['s'][i] for i in ids], tf.float32)
102 | actions = tf.convert_to_tensor([self.experience['a'][i] for i in ids], tf.float32)
103 | rewards = tf.convert_to_tensor([self.experience['r'][i] for i in ids], tf.float32)
104 | states_next = tf.convert_to_tensor([self.experience['s_next'][i] for i in ids], tf.float32)
105 | ends = tf.convert_to_tensor([self.experience['end'][i] for i in ids], tf.bool)
106 |
107 | # compute loss and perform gradient descent
108 | loss, kl_loss = self.gradient_update(target_net, states, actions, rewards, states_next, ends)
109 |
110 | return loss, kl_loss
111 |
112 | @tf.function
113 | def gradient_update(self, target_net, states, actions, rewards, states_next, ends):
114 | """
115 | Gradient update with @tf.function decorator for faster performance.
116 | """
117 | # make predictions with target network without stochasticity and get sample q for Q-function update
118 | # sample is different if epoch ends
119 | double_dqn = True
120 | if double_dqn:
121 | next_action = tf.math.argmax(self.predict(states_next, training=False), axis=1)
122 | q_values = target_net.predict(states_next, training=False)
123 | q_max = tf.math.reduce_sum(q_values * tf.one_hot(next_action, self.num_actions), axis=1)
124 | else:
125 | q_max = tf.math.reduce_max(target_net.predict(states_next, training=False), axis=1)
126 |
127 | y = tf.where(ends, rewards, rewards + self.gamma * q_max)
128 |
129 | self.model.reset_noise() # sample new epsilon_w and epsilon_z
130 |
131 | # perform gradient descent
132 | with tf.GradientTape() as tape:
133 | tape.watch(self.model.trainable_variables)
134 |
135 | kl_loss = self.kl_coeff * self.model.kl_div(same_noise=True)
136 | # Q-values from training network for selected actions
137 | q_values = self.predict(states, same_noise=True)
138 | selected_q_values = tf.math.reduce_sum(q_values * tf.one_hot(tf.cast(actions, tf.int32), self.num_actions),
139 | axis=1)
140 |
141 | td_error = tf.math.reduce_sum(tf.square(y - selected_q_values))
142 | loss = td_error + kl_loss
143 |
144 | gradients = tape.gradient(loss, self.model.trainable_variables)
145 | self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
146 |
147 | self.model.reset_noise() # sample new epsilon_w and epsilon_z
148 |
149 | return loss, kl_loss
150 |
151 | def get_action(self, states, same_noise=False, training=True):
152 | """
153 | Predict action with the MNF network. In each forward pass the weights are sampled from the weight posterior
154 | distribution. Hence, approximated Thompson sampling is performed. For uncertain weight posterior distributions
155 | the variance in the sampled values will be higher, leading inherently to more exploration
156 |
157 | :param states: observed states, e.g. [x, dx, th, dth].
158 | :param same_noise: uses the same epsilon parameter, if set to `True`.
159 | :param training: forward pass without stochasticity, if set to `False`.
160 | :return: action
161 | """
162 | q_values = self.predict(np.atleast_2d(states), same_noise=same_noise, training=training)
163 | action = np.argmax(q_values)
164 |
165 | return action
166 |
167 | def add_experience(self, exp):
168 | """
169 | Add experience to experience history. If 'max_experiences' exceeded, remove first item and append current
170 | experience.
171 | :param exp: experience {'s': prev_observations, 'a': action, 'r': reward, 's_next': observations, 'end': end}.
172 | """
173 | if len(self.experience['s']) >= self.max_experiences:
174 | for key in self.experience.keys():
175 | self.experience[key].pop(0)
176 |
177 | for key, value in exp.items():
178 | self.experience[key].append(value)
179 |
180 | def copy_weights(self, train_net):
181 | """
182 | Copy weights from train network to target network.
183 | :param train_net: model of train network.
184 | """
185 | variables_target = self.model.trainable_variables
186 | variables_train = train_net.model.trainable_variables
187 |
188 | for v_target, v_train in zip(variables_target, variables_train):
189 | v_target.assign(v_train.numpy())
190 |
--------------------------------------------------------------------------------
/dqn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/dqn/__init__.py
--------------------------------------------------------------------------------
/dqn/train.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import os
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 |
6 | from gym import wrappers
7 |
8 | DQN = "dqn"
9 | MC_DROPOUT = "mc_dropout"
10 | CONCRETE_DROPOUT = "concrete_dropout"
11 | BAYES_BY_BACKPROP = "bayes_by_backprop"
12 | MNF = "mnf"
13 | ALLOWED_NETWORK_CONFIGS = {DQN, MC_DROPOUT, CONCRETE_DROPOUT, BAYES_BY_BACKPROP, MNF}
14 | BAYES_NETWORK_CONFIGS = {MC_DROPOUT, CONCRETE_DROPOUT, BAYES_BY_BACKPROP, MNF}
15 |
16 |
17 | def train_episode(env, train_net, target_net, config):
18 | rewards = 0
19 | reward_list = []
20 | losses = []
21 | kl_losses = []
22 | state = env.reset()
23 | algorithm = config["algorithm"]
24 |
25 | for step in range(1, config["step_limit"]+1):
26 | if config["env_render"] == True:
27 | env.render()
28 |
29 | # choose next action base on network
30 | if algorithm == DQN:
31 | action = train_net.get_action(state, epsilon=config["epsilon"])
32 | elif algorithm == BAYES_BY_BACKPROP:
33 | action = train_net.get_action(state, same_noise=True)
34 | elif algorithm == MNF:
35 | action = train_net.get_action(state, same_noise=True)
36 | elif algorithm == MC_DROPOUT:
37 | action = train_net.get_action(state, training=True)
38 | elif algorithm == CONCRETE_DROPOUT:
39 | action = train_net.get_action(state, training=True)
40 |
41 | prev_state = state # store old observations
42 | state, reward, done, _ = env.step(action) # execute action, observe reward and next state
43 | rewards = rewards + reward
44 |
45 | if step == (config["step_limit"]):
46 | done = True
47 |
48 | # store transitions
49 | exp = {'s': prev_state, 'a': action, 'r': reward, 's_next': state, 'end': done}
50 | train_net.add_experience(exp)
51 |
52 | if step % config["gradient_steps"] == 0:
53 | if algorithm in BAYES_NETWORK_CONFIGS:
54 | loss, kl_loss = train_net.train(target_net)
55 | kl_losses.append(kl_loss)
56 | losses.append(loss)
57 | else:
58 | loss = train_net.train(target_net)
59 | losses.append(loss)
60 |
61 | # copy weights every 'copy_steps' to target network
62 | if step % config["copy_steps"] == 0:
63 | target_net.copy_weights(train_net)
64 |
65 | if done:
66 | state = env.reset()
67 | reward_list.append(rewards)
68 | rewards = 0
69 |
70 | mean_loss = np.mean(losses)
71 |
72 | if algorithm in BAYES_NETWORK_CONFIGS:
73 | mean_kl = np.mean(kl_losses)
74 | return reward_list[0], step, mean_loss, mean_kl
75 |
76 | else:
77 | return reward_list[0], step, mean_loss
78 |
79 |
80 | def test_policy(env, train_net, config, video=False):
81 | if video:
82 | env = wrappers.Monitor(env, os.path.join(os.getcwd(), "videos"), force=True)
83 |
84 | rewards = 0
85 | state = env.reset()
86 | algorithm = config["algorithm"]
87 |
88 | for step in range(config["step_limit"]):
89 | if config["env_render"] == True:
90 | env.render()
91 |
92 | # choose next action base on network
93 | if algorithm == DQN:
94 | action = train_net.get_action(state, epsilon=0)
95 | elif algorithm == BAYES_BY_BACKPROP:
96 | action = train_net.get_action(state, training=False)
97 | elif algorithm == MNF:
98 | action = train_net.get_action(state, training=False)
99 | elif algorithm == MC_DROPOUT:
100 | action = train_net.get_action(state, training=False)
101 | elif algorithm == CONCRETE_DROPOUT:
102 | action = train_net.get_action(state, training=False)
103 |
104 | state, reward, done, _ = env.step(action)
105 | rewards = rewards + reward
106 |
107 | if step == (config["step_limit"] - 1):
108 | done = True
109 |
110 | if done:
111 | break
112 |
113 | return rewards, step
114 |
115 |
116 | def train_dqn(config, env, train_net, target_net, run_id):
117 | algorithm = config["algorithm"]
118 | if algorithm not in ALLOWED_NETWORK_CONFIGS:
119 | raise AssertionError(f"'algorithm' has to be one of {ALLOWED_NETWORK_CONFIGS} but is set to {algorithm}.")
120 |
121 | epsilon = config["epsilon"]
122 | n_epochs = config["epochs_num"]
123 | train_losses = np.empty(n_epochs)
124 | train_kl = np.empty(n_epochs)
125 | train_rewards = np.empty(n_epochs)
126 |
127 | test_rewards = [0]
128 | test_iterations = [0]
129 | mean_kl = 0
130 | total_steps = 0
131 |
132 | # initialize train and target net
133 | state = env.reset()
134 | _ = train_net.get_action(state)
135 | _ = target_net.get_action(state)
136 | if algorithm in {BAYES_BY_BACKPROP, MNF}:
137 | train_net.model.kl_div(same_noise=True)
138 | target_net.model.kl_div(same_noise=True)
139 | target_net.copy_weights(train_net) # initialize with same weights
140 |
141 | for n in range(n_epochs):
142 | env.reset() # initialize sequence
143 |
144 | if algorithm == DQN:
145 | epsilon = max(config["epsilon_min"], epsilon * config["epsilon_decay"])
146 | train_reward, steps, mean_loss = train_episode(env, train_net, target_net, config)
147 |
148 | elif algorithm == BAYES_BY_BACKPROP:
149 | if n > 0:
150 | train_net.model.reset_noise()
151 | train_reward, steps, mean_loss, mean_kl = train_episode(env, train_net, target_net, config)
152 | train_kl[n] = mean_kl
153 |
154 | elif algorithm == MNF:
155 | if n > 0:
156 | train_net.model.reset_noise()
157 | train_reward, steps, mean_loss, mean_kl = train_episode(env, train_net, target_net, config)
158 | train_kl[n] = mean_kl
159 |
160 | elif algorithm == MC_DROPOUT:
161 | train_reward, steps, mean_loss, mean_kl = train_episode(env, train_net, target_net, config)
162 |
163 | elif algorithm == CONCRETE_DROPOUT:
164 | train_reward, steps, mean_loss, mean_kl = train_episode(env, train_net, target_net, config)
165 |
166 | total_steps = total_steps + steps
167 | train_losses[n] = mean_loss
168 | train_rewards[n] = train_reward
169 | avg_train_rewards = train_rewards[max(0, n - 100):(n + 1)].mean() # average reward of the last 100 episodes
170 |
171 | if n % config["test_episodes"] == 0:
172 | if n == 0: # first episode is burn in phase
173 | total_reward = 0
174 | iterations = 0
175 | else:
176 | total_reward, iterations = test_policy(env, train_net, config)
177 |
178 | test_rewards.append(total_reward)
179 | test_iterations.append(total_steps)
180 |
181 | print(f"Epoch: {n}, reward: {total_reward}, loss: {mean_loss}, kl-loss: {mean_kl} iterations: {iterations}"
182 | f", epsilon: {epsilon}, avg reward (last 100): {avg_train_rewards}")
183 |
184 | if config["plot_avg_reward"]:
185 | directory = f"results/plots/{algorithm}/"
186 | if not os.path.exists(directory):
187 | os.makedirs(directory)
188 |
189 | plt.figure()
190 | filename = f"AccumulatedReward_{algorithm}_{str(run_id)}.pdf"
191 | plt.plot(test_iterations, test_rewards, linewidth=0.75)
192 | plt.xlabel("Iterations")
193 | plt.legend(["Accumulated reward"])
194 | plt.tight_layout()
195 | plt.savefig(os.path.join(directory, filename))
196 | plt.close()
197 |
198 | plt.figure()
199 | filename = f"Loss_{algorithm}_{str(run_id)}.pdf"
200 | plt.plot(range(config["epochs_num"]), train_losses, linewidth=0.75)
201 | plt.plot(range(config["epochs_num"]), train_kl, linewidth=0.75)
202 | plt.xlabel("Iterations")
203 | plt.legend(["Mean loss", "Mean kl-loss"])
204 | plt.tight_layout()
205 | plt.savefig(os.path.join(directory, filename))
206 | plt.close()
207 |
208 | if config["save"]:
209 | current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
210 | save_dir = f"results/{config['env_name']}/{algorithm}/" + str(run_id) + '_' + current_time
211 | if not os.path.exists(save_dir):
212 | os.makedirs(save_dir)
213 | np.savez(save_dir, test_rewards=test_rewards, test_iterations=test_iterations)
214 |
--------------------------------------------------------------------------------
/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/envs/__init__.py
--------------------------------------------------------------------------------
/envs/env_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Utils file for OpenAi gym envrionments.
3 | """
4 |
5 |
6 | class WrapFrameSkip():
7 | """
8 | Wraps OpenAi gym environments to skip frames. This is also know as action repeat.
9 | """
10 | def __init__(self, env, frameskip):
11 | assert frameskip >= 1
12 | self._env = env
13 | self._frameskip = frameskip
14 | self.observation_space = env.observation_space
15 | self.action_space = env.action_space
16 |
17 | def reset(self):
18 | return self._env.reset()
19 |
20 | def step(self, action):
21 | sum_rew = 0
22 | for _ in range(self._frameskip):
23 | obs, rew, done, info = self._env.step(action)
24 | sum_rew += rew
25 | if done:
26 | break
27 | return obs, sum_rew, done, info
28 |
29 | def render(self, mode='human'):
30 | return self._env.render(mode=mode)
31 |
32 | def close(self):
33 | self._env.close()
34 |
--------------------------------------------------------------------------------
/envs/nchain.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym import spaces
3 | import numpy as np
4 |
5 |
6 | class NChainEnv(gym.Env):
7 | """
8 | n-Chain environment.
9 | The environment consists of a chain of N states and the agent always starts in state s2, from where it can either
10 | move left or right. In state s1, the agent receives a small reward of r = 0.001 and a larger reward r = 1 in state
11 | sN. This environment is described in Deep Exploration via Bootstrapped DQN
12 | (https://papers.nips.cc/paper/6501-deep-exploration-via-bootstrapped-dqn.pdf).
13 |
14 | Code from:
15 | Randomized Value Functions via Multiplicative Normalizing Flows
16 | (https://github.com/facebookresearch/RandomizedValueFunctions)
17 | """
18 | def __init__(self, n):
19 | self.n = n
20 | self.state = 1 # Start at state s2
21 | self.action_space = spaces.Discrete(2)
22 | self.observation_space = spaces.Discrete(self.n)
23 | self.max_nsteps = n + 8
24 |
25 | def step(self, action):
26 | assert self.action_space.contains(action)
27 | v = np.arange(self.n)
28 | reward = lambda s, a: 1.0 if (s == (self.n - 1) and a == 1) else (0.001 if (s == 0 and a == 0) else 0)
29 | is_done = lambda nsteps: nsteps >= self.max_nsteps
30 |
31 | r = reward(self.state, action)
32 | if action: # forward
33 | if self.state != self.n - 1:
34 | self.state += 1
35 | else: # backward
36 | if self.state != 0:
37 | self.state -= 1
38 | self.nsteps += 1
39 | return (v <= self.state).astype('float32'), r, is_done(self.nsteps), None
40 |
41 | def reset(self):
42 | v = np.arange(self.n)
43 | self.state = 1
44 | self.nsteps = 0
45 | return (v <= self.state).astype('float32')
46 |
--------------------------------------------------------------------------------
/normalizingflows/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/normalizingflows/__init__.py
--------------------------------------------------------------------------------
/normalizingflows/flow_catalog.py:
--------------------------------------------------------------------------------
1 | """
2 | Implementation of various Normalizing Flows.
3 | Tensorflow Bijectors are used as base class. To perform density estimation and sampling, four functions have to be defined
4 | for each Normalizing Flow.
5 |
6 |
7 | 1. _forward:
8 | Turns one random outcome into another random outcome from a different distribution.
9 |
10 | 2. _inverse:
11 | Useful for 'reversing' a transformation to compute one probability in terms of another.
12 |
13 | 3. _forward_log_det_jacobian:
14 | The log of the absolute value of the determinant of the matrix of all first-order partial derivatives of the function.
15 |
16 | 4. _inverse_log_det_jacobian:
17 | The log of the absolute value of the determinant of the matrix of all first-order partial derivatives of the inverse function.
18 |
19 |
20 | "forward" and "forward_log_det_jacobian" have to be defined to perform sampling.
21 | "inverse" and "inverse_log_det_jacobian" have to be defined to perform density estimation.
22 | """
23 |
24 | import numpy as np
25 | import tensorflow as tf
26 | import tensorflow_probability as tfp
27 |
28 |
29 | tfd = tfp.distributions
30 | tfb = tfp.bijectors
31 | tfk = tf.keras
32 |
33 | tf.keras.backend.set_floatx('float32')
34 |
35 | print('tensorflow: ', tf.__version__)
36 | print('tensorflow-probability: ', tfp.__version__)
37 |
38 |
39 | '''--------------------------------------- Masked Autoregressive Flow -----------------------------------------------'''
40 |
41 |
42 | class Made(tfk.layers.Layer):
43 | """
44 | Implementation of a Masked Autoencoder for Distribution Estimation (MADE) [Germain et al. (2015)].
45 | The existing TensorFlow bijector "AutoregressiveNetwork" is used. The output is reshaped to output one shift vector
46 | and one log_scale vector.
47 |
48 | :param params: Python integer specifying the number of parameters to output per input.
49 | :param event_shape: Python list-like of positive integers (or a single int), specifying the shape of the input to this layer, which is also the event_shape of the distribution parameterized by this layer. Currently only rank-1 shapes are supported. That is, event_shape must be a single integer. If not specified, the event shape is inferred when this layer is first called or built.
50 | :param hidden_units: Python list-like of non-negative integers, specifying the number of units in each hidden layer.
51 | :param activation: An activation function. See tf.keras.layers.Dense. Default: None.
52 | :param use_bias: Whether or not the dense layers constructed in this layer should have a bias term. See tf.keras.layers.Dense. Default: True.
53 | :param kernel_regularizer: Regularizer function applied to the Dense kernel weight matrices. Default: None.
54 | :param bias_regularizer: Regularizer function applied to the Dense bias weight vectors. Default: None.
55 | """
56 |
57 | def __init__(self, params, event_shape=None, hidden_units=None, activation=None, use_bias=True,
58 | kernel_regularizer=None, bias_regularizer=None, name="made"):
59 |
60 | super(Made, self).__init__(name=name)
61 |
62 | self.params = params
63 | self.event_shape = event_shape
64 | self.hidden_units = hidden_units
65 | self.activation = activation
66 | self.use_bias = use_bias
67 | self.kernel_regularizer = kernel_regularizer
68 | self.bias_regularizer = bias_regularizer
69 |
70 | self.network = tfb.AutoregressiveNetwork(params=params, event_shape=event_shape, hidden_units=hidden_units,
71 | activation=activation, use_bias=use_bias, kernel_regularizer=kernel_regularizer,
72 | bias_regularizer=bias_regularizer)
73 |
74 | def call(self, x):
75 | shift, log_scale = tf.unstack(self.network(x), num=2, axis=-1)
76 |
77 | return shift, tf.math.tanh(log_scale)
78 |
79 |
80 | '''------------------------------------- Batch Normalization Bijector -----------------------------------------------'''
81 |
82 |
83 | class BatchNorm(tfb.Bijector):
84 | """
85 | Implementation of a Batch Normalization layer for use in normalizing flows according to [Papamakarios et al. (2017)].
86 | The moving average of the layer statistics is adapted from [Dinh et al. (2016)].
87 |
88 | :param eps: Hyperparameter that ensures numerical stability, if any of the elements of v is near zero.
89 | :param decay: Weight for the update of the moving average, e.g. avg = (1-decay)*avg + decay*new_value.
90 | """
91 |
92 | def __init__(self, eps=1e-5, decay=0.95, validate_args=False, name="batch_norm"):
93 | super(BatchNorm, self).__init__(
94 | forward_min_event_ndims=1,
95 | inverse_min_event_ndims=1,
96 | validate_args=validate_args,
97 | name=name)
98 |
99 | self._vars_created = False
100 | self.eps = eps
101 | self.decay = decay
102 |
103 | def _create_vars(self, x):
104 | # account for 1xd and dx1 vectors
105 | if len(x.get_shape()) == 1:
106 | n = x.get_shape().as_list()[0]
107 | if len(x.get_shape()) == 2:
108 | n = x.get_shape().as_list()[1]
109 |
110 | self.beta = tf.compat.v1.get_variable('beta', [1, n], dtype=tf.float32)
111 | self.gamma = tf.compat.v1.get_variable('gamma', [1, n], dtype=tf.float32)
112 | self.train_m = tf.compat.v1.get_variable(
113 | 'mean', [1, n], dtype=tf.float32, trainable=False)
114 | self.train_v = tf.compat.v1.get_variable(
115 | 'var', [1, n], dtype=tf.float32, trainable=False)
116 |
117 | self._vars_created = True
118 |
119 | def _forward(self, u):
120 | if not self._vars_created:
121 | self._create_vars(u)
122 | return (u - self.beta) * tf.exp(-self.gamma) * tf.sqrt(self.train_v + self.eps) + self.train_m
123 |
124 | def _inverse(self, x):
125 | # Eq. 22 of [Papamakarios et al. (2017)]. Called during training of a normalizing flow.
126 | if not self._vars_created:
127 | self._create_vars(x)
128 |
129 | # statistics of current minibatch
130 | m, v = tf.nn.moments(x, axes=[0], keepdims=True)
131 |
132 | # update train statistics via exponential moving average
133 | self.train_v.assign_sub(self.decay * (self.train_v - v))
134 | self.train_m.assign_sub(self.decay * (self.train_m - m))
135 |
136 | # normalize using current minibatch statistics, followed by BN scale and shift
137 | return (x - m) * 1. / tf.sqrt(v + self.eps) * tf.exp(self.gamma) + self.beta
138 |
139 | def _inverse_log_det_jacobian(self, x):
140 | # at training time, the log_det_jacobian is computed from statistics of the
141 | # current minibatch.
142 | if not self._vars_created:
143 | self._create_vars(x)
144 |
145 | _, v = tf.nn.moments(x, axes=[0], keepdims=True)
146 | abs_log_det_J_inv = tf.reduce_sum(
147 | self.gamma - .5 * tf.math.log(v + self.eps))
148 | return abs_log_det_J_inv
149 |
--------------------------------------------------------------------------------
/normalizingflows/nf_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Implementation of functions that are important for training normalizing flows.
3 | """
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | import tensorflow_probability as tfp
8 | tfd = tfp.distributions
9 | tfb = tfp.bijectors
10 |
11 |
12 | '''----------------------------------- Normal distribution with reparametrization -----------------------------------'''
13 |
14 |
15 | class NormalReparamMNF(tf.Module):
16 | """
17 | Normal distribution with reparameterization to be able to learn the mean and variance.
18 |
19 | :param shape: Shape of the tensor
20 | :param std_init (float): initialization value for the standard deviation, optional
21 | :param mean_init (float): initialization value for the mean, optional
22 | """
23 | def __init__(self, shape, var_init=1.0, mean_init=0.0):
24 | super(NormalReparamMNF, self).__init__()
25 |
26 | glorot = tf.keras.initializers.GlorotNormal() # Xavier normal initializer
27 |
28 | self.shape = shape
29 | self.mean = tf.Variable(glorot(shape), trainable=True)
30 | self.log_var = tf.Variable(glorot(shape) * var_init + mean_init, trainable=True)
31 | self.epsilon = tf.Variable(tf.random.normal(self.shape), trainable=False)
32 |
33 | @tf.function
34 | def sample(self, batch_size, same_noise=False):
35 | mean = tf.tile(self.mean[None, :], [batch_size, 1]) # split tensor into batches
36 | if same_noise:
37 | epsilon = tf.expand_dims(self.epsilon, axis=0) # expand batch size dimension
38 | epsilon = tf.repeat(epsilon, batch_size, axis=0) # use the same noise/epsilon for the whole batch
39 | else:
40 | epsilon = tf.random.normal([batch_size, self.shape[0]])
41 | var = tf.exp(self.log_var)
42 | samples = mean + tf.sqrt(var) * epsilon
43 |
44 | return samples
45 |
46 | @tf.function
47 | def log_prob(self, samples):
48 | dims = float(samples.shape[-1])
49 | var = tf.exp(self.log_var)
50 | exponent = tf.reduce_sum(tf.square(samples - self.mean)/var, axis=1)
51 | log_det_var = tf.reduce_sum(self.log_var)
52 | log_prob = -0.5 * (dims * tf.math.log(2 * np.pi) + log_det_var + exponent)
53 |
54 | return log_prob
55 |
56 | def prob(self, samples):
57 | log_prob = self.log_prob(samples)
58 |
59 | return tf.exp(log_prob)
60 |
61 | def log_std(self):
62 | return 0.5 * self.log_var
63 |
64 | def reset_noise(self):
65 | self.epsilon.assign(tf.random.normal(self.shape))
66 |
--------------------------------------------------------------------------------
/normalizingflows/normalizing_flow.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import tensorflow_probability as tfp
3 | tfb = tfp.bijectors
4 |
5 |
6 | class NormalizingFlow(tf.Module):
7 | """
8 | Stacking of several normalizing flows. Constitutes a normalizing flow itself.
9 | """
10 |
11 | def __init__(self, flows, chain=True, name=None, **kwargs):
12 | super(NormalizingFlow).__init__(**kwargs)
13 | if not isinstance(name, str):
14 | name = "flow"
15 |
16 | self.flows = flows
17 | self.chain = chain # use tfb.Chain
18 | if chain:
19 | self.flow = tfb.Chain(bijectors=list(reversed(flows)), name=name)
20 |
21 | @tf.function
22 | def forward(self, z): # z -> x
23 | if self.chain:
24 | x = self.flow.forward(z)
25 | log_dets = self.flow.forward_log_det_jacobian(z, event_ndims=1)
26 | else:
27 | log_dets = tf.zeros(tf.shape(z)[0])
28 | zk = z
29 | for flow in self.flows:
30 | log_dets = log_dets + flow._forward_log_det_jacobian(zk) # "-" already in forward_log_det_jacobian
31 | zk = flow.forward(zk)
32 |
33 | x = zk
34 |
35 | return x, log_dets
36 |
37 | @tf.function
38 | def inverse(self, x): # x -> z
39 | if self.chain:
40 | z = self.flow.inverse(x)
41 | log_dets = self.flow.inverse_log_det_jacobian(x, event_ndims=1)
42 | else:
43 | log_dets = tf.zeros(tf.shape(x)[0])
44 | zk = x
45 | for flow in reversed(self.flows):
46 | log_dets = log_dets + flow._inverse_log_det_jacobian(zk)
47 | zk = flow.inverse(zk)
48 |
49 | z = zk
50 |
51 | return z, log_dets
52 |
53 |
54 | class NormalizingFlowModel(NormalizingFlow):
55 | """A normalizing flow model as a combination of base distribution and flow."""
56 |
57 | def __init__(self, base, flows, name="transformed_dist", **kwargs):
58 | super().__init__(flows, name=name, **kwargs)
59 |
60 | self.base = base # distribution class that exposes a log_prob() and sample() method
61 | self.flows = flows
62 |
63 | def log_prob(self, x):
64 | z, log_dets = self.inverse(x)
65 | base_prob = self.base.log_prob(z)
66 |
67 | return base_prob + log_dets
68 |
69 | def prob(self, x):
70 | return tf.exp(self.log_prob(x))
71 |
72 | def sample(self, batch_size, same_noise=False):
73 | z = self.base.sample(batch_size, same_noise=same_noise)
74 | base_prob = self.base.log_prob(z)
75 | x, log_dets = self.forward(z)
76 |
77 | return x, base_prob + log_dets
78 |
79 | def sample_no_noise(self, batch_size):
80 | z = tf.expand_dims(self.base.mean, axis=0) # expand batch dimension
81 | z = tf.repeat(z, batch_size, axis=0)
82 | base_prob = self.base.log_prob(z)
83 | x, log_dets = self.forward(z)
84 |
85 | return x, base_prob + log_dets
86 |
--------------------------------------------------------------------------------
/plots/BayesByBackprop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/BayesByBackprop.png
--------------------------------------------------------------------------------
/plots/ConcreteDropout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/ConcreteDropout.png
--------------------------------------------------------------------------------
/plots/ConcreteDropout_heterostatic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/ConcreteDropout_heterostatic.png
--------------------------------------------------------------------------------
/plots/MCDropout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/MCDropout.png
--------------------------------------------------------------------------------
/plots/MCDropout_heteroscedastic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/MCDropout_heteroscedastic.png
--------------------------------------------------------------------------------
/plots/MNF_all_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/MNF_all_layers.png
--------------------------------------------------------------------------------
/plots/MNF_last_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/MNF_last_layers.png
--------------------------------------------------------------------------------
/plots/avg_acc_reward_cartpole.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/avg_acc_reward_cartpole.png
--------------------------------------------------------------------------------
/plots/avg_acc_reward_mountaincar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/avg_acc_reward_mountaincar.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow-gpu>=2.0
2 | tensorflow-probability>=0.8.0
3 | tensorflow-datasets>=1.2.0
4 | numpy<1.19.0,>=1.16.0
5 | matplotlib>=3.1.1
6 | jupyterlab>=1.1.4
7 | gym
--------------------------------------------------------------------------------
/toy_regression_bayes.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | import matplotlib.pyplot as plt
4 |
5 | from data.toy_regression import ToyRegressionData
6 | from bayes.MNF import DenseMNF
7 | from bayes.Bayes_by_Backprop import BayesByBackprop
8 |
9 | tfkl = tf.keras.layers
10 |
11 | TOY_DATA = "toy"
12 | IAN_DATA = "ian"
13 | SAMPLE_DATA = "sample"
14 | ALLOWED_DATA_CONFIGS = {TOY_DATA, IAN_DATA, SAMPLE_DATA}
15 |
16 | MNF = "mnf"
17 | BAYES_BY_BACKPROP = "bayesbybackprop"
18 | DENSE = "dense"
19 | ALLOWED_NETWORK_CONFIGS = {MNF, BAYES_BY_BACKPROP, DENSE}
20 |
21 |
22 | class MLP(tf.Module):
23 | """
24 | Simple Multi-layer Perceptron Model.
25 | """
26 | def __init__(self):
27 | super(MLP, self).__init__()
28 | self.input_layer = tfkl.InputLayer(input_shape=(1,))
29 | self.hidden_layer_1 = tfkl.Dense(100, activation='relu')
30 | self.hidden_layer_2 = tfkl.Dense(100, activation='relu')
31 | self.output_layer = tfkl.Dense(1, activation='linear')
32 |
33 | @tf.function
34 | def __call__(self, x, *args, **kwargs):
35 | y = self.input_layer(x)
36 | y = self.hidden_layer_1(y)
37 | y = self.hidden_layer_2(y)
38 | y = self.output_layer(y)
39 | return y
40 |
41 |
42 | class BNN_MNF(tf.Module):
43 | """
44 | Bayesian Neural Network with fully-connected layers utilizing Multiplicative Normalizing Flows by Christos Louizos, Max Welling
45 | (Jun 2017).
46 | """
47 | def __init__(self, input_dim=1, hidden_units=[100, 100], output_dim=1, hidden_bayes=False, use_z=True, max_std=1.0):
48 | super(BNN_MNF, self).__init__()
49 | self.input_layer = tfkl.InputLayer(input_shape=(input_dim,))
50 |
51 | self.hidden_layers = []
52 | self.hidden_bayes = hidden_bayes
53 | for i in hidden_units:
54 | if self.hidden_bayes:
55 | self.hidden_layers.append(DenseMNF(n_out=i, use_z=use_z, max_std=max_std))
56 | else:
57 | self.hidden_layers.append(tfkl.Dense(i, activation='relu', kernel_initializer='RandomNormal'))
58 |
59 | self.dense_mnf_out = DenseMNF(n_out=output_dim, use_z=use_z, max_std=max_std)
60 |
61 | @tf.function
62 | def __call__(self, inputs, same_noise=False, training=True, *args, **kwargs):
63 | out = self.input_layer(inputs)
64 | for layer in self.hidden_layers:
65 | if self.hidden_bayes:
66 | out = layer(out, same_noise=same_noise, training=training)
67 | out = tf.nn.relu(out)
68 | else:
69 | out = layer(out) # relu already in keras layer
70 | out = self.dense_mnf_out(out, same_noise=same_noise, training=training)
71 |
72 | return out
73 |
74 | def kl_div(self, same_noise=True):
75 | """
76 | Compute current KL divergence of all layers.
77 | Can be used as a regularization term during training.
78 | """
79 | kldiv = 0
80 | if self.hidden_bayes:
81 | for dense_mnf in self.hidden_layers:
82 | kldiv = kldiv + dense_mnf.kl_div(same_noise)
83 | kldiv = kldiv + self.dense_mnf_out.kl_div(same_noise)
84 | return kldiv
85 |
86 | def reset_noise(self):
87 | if self.hidden_bayes:
88 | for dense_mnf in self.hidden_layers:
89 | dense_mnf.reset_noise()
90 | self.dense_mnf_out.reset_noise()
91 |
92 |
93 | class BNN_BBB(tf.Module):
94 | """
95 | Bayesian Neural Network with fully-connected layers utilizing Bayes by Backprop by Blundell et al. (2015).
96 | """
97 | def __init__(self, input_dim=1, hidden_units=[100, 100], output_dim=1, hidden_bayes=False, max_std=1.0):
98 | super(BNN_BBB, self).__init__()
99 | self.input_layer = tfkl.InputLayer(input_shape=(input_dim,))
100 |
101 | self.hidden_layers = []
102 | self.hidden_bayes = hidden_bayes
103 | for i in hidden_units:
104 | if hidden_bayes:
105 | self.hidden_layers.append(BayesByBackprop(n_out=i, max_std=max_std))
106 | else:
107 | self.hidden_layers.append(tfkl.Dense(i, activation='relu', kernel_initializer='RandomNormal'))
108 | self.dense_bbb_out = BayesByBackprop(n_out=output_dim, max_std=max_std)
109 |
110 | @tf.function
111 | def __call__(self, inputs, same_noise=False, training=True, *args, **kwargs):
112 | out = self.input_layer(inputs)
113 | for layer in self.hidden_layers:
114 | if self.hidden_bayes:
115 | out = layer(out, same_noise=same_noise, training=training)
116 | out = tf.nn.relu(out)
117 | else:
118 | out = layer(out) # relu already in keras layer
119 | out = self.dense_bbb_out(out, same_noise=same_noise, training=training)
120 | return out
121 |
122 | def kl_div(self, same_noise=True):
123 | """
124 | Compute current KL divergence of the Bayes by Backprop layers.
125 | Used as a regularization term during training.
126 | """
127 | kldiv = 0
128 | if self.hidden_bayes:
129 | for dense_bbb in self.hidden_layers:
130 | kldiv = kldiv + dense_bbb.kl_div(same_noise)
131 | kldiv = kldiv + self.dense_bbb_out.kl_div(same_noise)
132 | return kldiv
133 |
134 | def reset_noise(self):
135 | """
136 | Re-sample noise/epsilon parameters of the Bayes by Backprop layers. Required for the case of having the same
137 | epsilon parameters across one batch.
138 | """
139 | if self.hidden_bayes:
140 | for dense_bbb in self.hidden_layers:
141 | dense_bbb.reset_noise()
142 | self.dense_bbb_out.reset_noise()
143 |
144 |
145 | @tf.function
146 | def loss_fn(y_train, x_train, model, bayes, reg=1.0, same_noise=False):
147 | if bayes:
148 | # divide by divided by the total number of samples in an epoch (batch_size * steps_per_epoch)
149 | # here: steps_per_epoch = 1
150 | mse = tf.reduce_mean(tf.losses.mse(y_train, model(x_train, same_noise=same_noise)))
151 | kl_loss = model.kl_div() / tf.cast(x_train.shape[0]*reg, tf.float32)
152 | else:
153 | mse = tf.reduce_mean(tf.losses.mse(y_train, model(x_train)))
154 | kl_loss = 0
155 |
156 | return mse + kl_loss, kl_loss
157 |
158 |
159 | def fit_regression(network, hidden_bayes=False, same_noise=False, max_std=0.5, data="ian", save=False):
160 |
161 | # load data
162 | if data not in ALLOWED_DATA_CONFIGS:
163 | raise AssertionError(f"'data' has to be in {ALLOWED_DATA_CONFIGS} but was set to {data}.")
164 | elif data == TOY_DATA:
165 | data = np.load("data/train_data_regression.npz")
166 | x_train = data["x_train"]
167 | y_train = data["y_train"]
168 | x_lim, y_lim = 4.5, 70.0
169 | reg = 10.0 # regularization parameter lambda
170 | elif data == IAN_DATA:
171 | data = np.load("data/train_data_ian_regression.npz", allow_pickle=True)
172 | x_train = data["x_train"]
173 | y_train = data["y_train"]
174 | x_lim, y_lim = 12.0, 8.0
175 | reg = 30 # regularization parameter lambda
176 | elif data == SAMPLE_DATA:
177 | n_samples = 20
178 | toy_regression = ToyRegressionData()
179 | x_train, y_train = toy_regression.gen_data(n_samples)
180 | x_lim, y_lim = 4.5, 70.0
181 | reg = 10.0 # regularization parameter lambda
182 |
183 | # choose network
184 | if network not in ALLOWED_NETWORK_CONFIGS:
185 | raise AssertionError(f"'network' has to be in {ALLOWED_NETWORK_CONFIGS} but was set to {network}.")
186 | elif network == MNF:
187 | model = BNN_MNF(hidden_bayes=hidden_bayes, max_std=max_std)
188 | bayes = True
189 | elif network == BAYES_BY_BACKPROP:
190 | model = BNN_BBB(hidden_bayes=hidden_bayes, max_std=max_std)
191 | bayes = True
192 | elif network == DENSE:
193 | model = MLP()
194 | bayes = False
195 |
196 | epochs = 500
197 | learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(1e-2, epochs, 1e-6, power=0.5)
198 | opt = tf.keras.optimizers.Adam(learning_rate=learning_rate_fn)
199 |
200 | # initialize
201 | _, _ = loss_fn(y_train, x_train, model, bayes, reg, same_noise)
202 |
203 | train_losses = []
204 | kl_losses = []
205 | for i in range(epochs):
206 | with tf.GradientTape() as tape:
207 | tape.watch(model.trainable_variables)
208 | loss, kl_loss = loss_fn(y_train, x_train, model, bayes, reg, same_noise)
209 | gradients = tape.gradient(loss, model.trainable_variables)
210 | opt.apply_gradients(zip(gradients, model.trainable_variables))
211 |
212 | if same_noise:
213 | model.reset_noise() # sample new epsilons
214 |
215 | train_losses.append(loss)
216 | kl_losses.append(kl_loss)
217 |
218 | if i % int(10) == 0:
219 | print(f"Epoch: {i}, MSE: {loss}, KL-loss: {kl_loss}")
220 |
221 | plt.plot(range(epochs), train_losses)
222 | plt.plot(range(epochs), kl_losses)
223 | plt.legend(["Train loss", "KL loss"])
224 |
225 | n_test = 500
226 | x_test = np.linspace(-x_lim, x_lim, n_test).reshape(n_test, 1).astype('float32')
227 |
228 | if bayes:
229 | y_preds = []
230 | for _ in range(20):
231 | y_pred = model(x_test)
232 | y_preds.append(y_pred)
233 | plt.figure(figsize=(10, 4))
234 | y_preds = np.array(y_preds).reshape(20, n_test)
235 | y_preds_mean = np.mean(y_preds, axis=0)
236 | y_preds_std = np.std(y_preds, axis=0)
237 |
238 | plt.scatter(x_train, y_train, c="orangered")
239 | color_pred = (0.0, 101.0 / 255.0, 189.0 / 255.0)
240 | plt.plot(x_test, y_preds_mean, color=color_pred)
241 | plt.fill_between(x_test.reshape(n_test,), y_preds_mean - y_preds_std, y_preds_mean + y_preds_std,
242 | alpha=0.25, color=color_pred)
243 | plt.fill_between(x_test.reshape(n_test,), y_preds_mean - 2.0 * y_preds_std, y_preds_mean + 2.0 * y_preds_std,
244 | alpha=0.35, color=color_pred)
245 |
246 | plt.xlim(-x_lim, x_lim)
247 | plt.ylim(-y_lim, y_lim)
248 | plt.legend(["Mean function", "Observations"])
249 |
250 | else:
251 | plt.figure(figsize=(10, 4))
252 | y_pred = model(x_test)
253 | plt.scatter(x_train, y_train, c="orangered")
254 | color_pred = (0.0, 101.0 / 255.0, 189.0 / 255.0)
255 | plt.plot(x_test, y_pred, color=color_pred)
256 | plt.xlim(-x_lim, x_lim)
257 | plt.ylim(-y_lim, y_lim)
258 | plt.legend(["Mean function", "Observations"])
259 |
260 | plt.tight_layout()
261 | if save:
262 | plt.savefig(f"plots/{network}.pdf")
263 | else:
264 | plt.show()
265 |
266 |
267 | if __name__ == '__main__':
268 | # test gpu availability
269 | print(f"GPU available: {tf.test.is_gpu_available()}")
270 |
271 | # set configuration
272 | network = MNF # choose from ALLOWED_NETWORK_CONFIGS
273 | hidden_bayes = False # False: last layer bayes, True: all layers bayes
274 | same_noise = True # set if same noise/epsilon should be used within a batch
275 | max_std = 0.5
276 | data = IAN_DATA # choose from ALLOWED_DATA_CONFIGS
277 | save = False # save images
278 |
279 | fit_regression(network=network, hidden_bayes=hidden_bayes, same_noise=same_noise, max_std=max_std, data=data,
280 | save=save)
281 |
--------------------------------------------------------------------------------
/toy_regression_concrete_dropout.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | import tensorflow as tf
4 |
5 | from data.toy_regression import ToyRegressionData
6 | from bayes.ConcreteDropout import ConcreteDropout
7 | from tensorflow.keras import optimizers
8 | from tensorflow.keras.layers import InputSpec, Dense, Wrapper, Input, concatenate
9 | from tensorflow.keras.models import Model
10 |
11 |
12 | TOY_DATA = "toy"
13 | IAN_DATA = "ian"
14 | SAMPLE_DATA = "sample"
15 | ALLOWED_DATA_CONFIGS = {TOY_DATA, IAN_DATA, SAMPLE_DATA}
16 |
17 | MSE = "mse"
18 | HETEROSCEDASTIC = "heteroscedastic"
19 | ALLOWED_LOSS_TYPES = {MSE, HETEROSCEDASTIC}
20 |
21 |
22 | def mse_loss(true, pred):
23 | return tf.reduce_mean((true - pred) ** 2, -1)
24 |
25 |
26 | def heteroscedastic_loss(y_train, pred):
27 | n_outputs = pred.shape[1] // 2
28 | mean = pred[:, :n_outputs]
29 | log_var = pred[:, n_outputs:]
30 | return tf.reduce_sum(0.5 * tf.exp(-1 * log_var) * tf.square(y_train - mean) + 0.5 * log_var)
31 |
32 |
33 | def make_model(loss_type, n_features, n_outputs, n_nodes=400, dropout_reg=1e-5, wd=1e-3):
34 | losses = []
35 | inp = Input(shape=(n_features,))
36 | x = inp
37 |
38 | x, loss = ConcreteDropout(Dense(n_nodes, activation='relu'),
39 | weight_regularizer=wd, dropout_regularizer=dropout_reg)(x)
40 | losses.append(loss)
41 | x, loss = ConcreteDropout(Dense(n_nodes, activation='relu'),
42 | weight_regularizer=wd, dropout_regularizer=dropout_reg)(x)
43 | losses.append(loss)
44 | x, loss = ConcreteDropout(Dense(n_nodes, activation='relu'),
45 | weight_regularizer=wd, dropout_regularizer=dropout_reg)(x)
46 | losses.append(loss)
47 |
48 | if loss_type == MSE:
49 | mean = Dense(100, activation='relu')(x)
50 | final_mean = Dense(n_outputs, activation='linear')(mean)
51 | model = Model(inp, final_mean)
52 | learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(1e-3, 500, 1e-5, power=0.5)
53 | model.compile(optimizer=optimizers.Adam(learning_rate=learning_rate_fn), loss=mse_loss)
54 |
55 | if loss_type == HETEROSCEDASTIC:
56 | mean = Dense(100, activation='relu')(x)
57 | final_mean = Dense(n_outputs, activation='linear')(mean)
58 |
59 | log_var = Dense(100, activation='relu')(x)
60 | final_log_var = Dense(n_outputs, activation='linear')(log_var)
61 |
62 | out = concatenate([final_mean, final_log_var])
63 | model = Model(inp, out)
64 | for loss in losses:
65 | model.add_loss(loss)
66 | learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(1e-3, 500, 1e-5, power=0.5)
67 | model.compile(optimizer=optimizers.Adam(learning_rate=learning_rate_fn), loss=heteroscedastic_loss,
68 | metrics=[mse_loss])
69 |
70 | return model
71 |
72 |
73 | def plot_heteroscedastic(model, save, x_train, y_train, x_lim, y_lim):
74 | n_test = 500
75 | x_test = np.linspace(-x_lim, x_lim, n_test).reshape(n_test, 1).astype('float32')
76 |
77 | preds_mean = []
78 | preds_var = []
79 | n_repeats = 20
80 | for _ in range(n_repeats):
81 | pred = model(x_test, training=True)
82 | n_outputs = pred.shape[1] // 2
83 | pred_mean = pred[:, :n_outputs]
84 | pred_var = pred[:, n_outputs:]
85 | preds_mean.append(pred_mean)
86 | preds_var.append(pred_var)
87 |
88 | plt.figure(figsize=(10, 4))
89 | preds_mean = np.array(preds_mean).reshape(20, n_test)
90 | preds_var = np.array(preds_var).reshape(20, n_test)
91 | preds_mean_mean = np.mean(preds_mean, axis=0)
92 | preds_mean_std = np.std(preds_mean, axis=0)
93 | preds_var_mean = np.mean(preds_var, axis=0)
94 |
95 | plt.scatter(x_train, y_train, c="orangered",label='Training data')
96 | color_pred = (0.0, 101.0 / 255.0, 189.0 / 255.0)
97 | plt.plot(x_test, preds_mean_mean, color=color_pred, label='Mean function/Epistemic uncertainty')
98 | plt.plot(x_test, np.sqrt(np.exp(preds_var_mean)), color="green", label="Aleatoric uncertainty")
99 | plt.fill_between(x_test.reshape(n_test,), preds_mean_mean - preds_mean_std, preds_mean_mean + preds_mean_std,
100 | alpha=0.25, color=color_pred)
101 | plt.fill_between(x_test.reshape(n_test,), preds_mean_mean - 2.0 * preds_mean_std, preds_mean_mean + 2.0 * preds_mean_std,
102 | alpha=0.35, color=color_pred)
103 |
104 | plt.xlim(-x_lim, x_lim)
105 | plt.ylim(-y_lim, y_lim)
106 | plt.legend()
107 |
108 | plt.tight_layout()
109 | if save:
110 | plt.savefig("plots/Concrete_Dropout_heteroscedastic.png")
111 | else:
112 | plt.show()
113 |
114 |
115 | def plot_mse(model, save, x_train, y_train, x_lim, y_lim):
116 | n_test = 500
117 | x_test = np.linspace(-x_lim, x_lim, n_test).reshape(n_test, 1).astype('float32')
118 |
119 | preds = []
120 | n_repeats = 20
121 | for _ in range(n_repeats):
122 | pred = model(x_test, training=True)
123 | preds.append(pred)
124 |
125 | plt.figure(figsize=(10, 4))
126 | preds = np.array(preds).reshape(n_repeats, n_test)
127 | preds_mean = np.mean(preds, axis=0)
128 | preds_std = np.std(preds, axis=0)
129 |
130 | plt.scatter(x_train, y_train, c="orangered", label='Training data')
131 | color_pred = (0.0, 101.0 / 255.0, 189.0 / 255.0)
132 | plt.plot(x_test, preds_mean, color=color_pred, label='Mean function/Epistemic uncertainty')
133 | plt.fill_between(x_test.reshape(n_test,), preds_mean - preds_std, preds_mean + preds_std,
134 | alpha=0.25, color=color_pred)
135 | plt.fill_between(x_test.reshape(n_test,), preds_mean - 2.0 * preds_std, preds_mean + 2.0 * preds_std,
136 | alpha=0.35, color=color_pred)
137 |
138 | plt.xlim(-x_lim, x_lim)
139 | plt.ylim(-y_lim, y_lim)
140 | plt.legend()
141 |
142 | plt.tight_layout()
143 | if save:
144 | plt.savefig("plots/Concrete_Dropout_mse.pdf")
145 | else:
146 | plt.show()
147 |
148 |
149 | def fit_regression(loss_type="heteroscedastic", data="ian", save=False):
150 | # load data
151 | if data not in ALLOWED_DATA_CONFIGS:
152 | raise AssertionError(f"'data' has to be in {ALLOWED_DATA_CONFIGS} but was set to {data}.")
153 | elif data == TOY_DATA:
154 | data = np.load("data/train_data_regression.npz")
155 | x_train = data["x_train"]
156 | y_train = data["y_train"]
157 | x_lim, y_lim = 4.5, 70.0
158 | elif data == IAN_DATA:
159 | data = np.load("data/train_data_ian_regression.npz", allow_pickle=True)
160 | x_train = data["x_train"]
161 | y_train = data["y_train"]
162 | x_lim, y_lim = 12.0, 8.0
163 | elif data == SAMPLE_DATA:
164 | n_samples = 20
165 | toy_regression = ToyRegressionData()
166 | x_train, y_train = toy_regression.gen_data(n_samples)
167 | x_lim, y_lim = 4.5, 70.0
168 |
169 | if loss_type not in ALLOWED_LOSS_TYPES:
170 | raise AssertionError(f"'loss_type' has to be in {ALLOWED_LOSS_TYPES} but was set to {loss_type}.")
171 | elif loss_type == HETEROSCEDASTIC:
172 | y_lim = 20 # adapt y limit
173 |
174 | n_epochs = 500
175 | l = 1e-3 # length-scale
176 | weight_reg = l**2.0 / len(x_train)
177 | dropout_reg = 2.0 / len(x_train)
178 |
179 | model = make_model(loss_type, 1, 1, n_nodes=200, dropout_reg=dropout_reg, wd=weight_reg)
180 |
181 | print("Starting training...")
182 | model.fit(x_train, y_train, epochs=n_epochs)
183 |
184 | print("Starting plotting...")
185 | if loss_type == "mse":
186 | plot_mse(model, save, x_train, y_train, x_lim, y_lim)
187 | if loss_type == "heteroscedastic":
188 | plot_heteroscedastic(model, save, x_train, y_train, x_lim, y_lim)
189 |
190 | print("Dropout rates:")
191 | for i in model.layers:
192 | if isinstance(i, ConcreteDropout):
193 | print(tf.math.sigmoid(i.p_logit))
194 |
195 |
196 | if __name__ == '__main__':
197 | # test gpu availability
198 | print(f"GPU available: {tf.test.is_gpu_available()}")
199 |
200 | # set configuration
201 | loss_type = MSE
202 | data = IAN_DATA # choose from ALLOWED_DATA_CONFIGS
203 | save = False # save images
204 |
205 | fit_regression(loss_type=loss_type, data=data, save=save)
206 |
--------------------------------------------------------------------------------
/toy_regression_mc_dropout.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | import matplotlib.pyplot as plt
4 |
5 | from data.toy_regression import ToyRegressionData
6 |
7 | tfkl = tf.keras.layers
8 |
9 | TOY_DATA = "toy"
10 | IAN_DATA = "ian"
11 | SAMPLE_DATA = "sample"
12 | ALLOWED_DATA_CONFIGS = {TOY_DATA, IAN_DATA, SAMPLE_DATA}
13 |
14 | MSE = "mse"
15 | HETEROSCEDASTIC = "heteroscedastic"
16 | ALLOWED_LOSS_TYPES = {MSE, HETEROSCEDASTIC}
17 |
18 |
19 | class MC_Dropout(tf.keras.Model):
20 | """
21 | Neural network with MC dropout according to
22 | "Dropout as a Bayesian Approximation: Representing Model Uncertainty in Deep Learning"
23 | - Gal and Ghahramani (2015): https://arxiv.org/abs/1506.02142.
24 |
25 | Two different models are possible depending on the specified 'loss_type':
26 | - 'mse': bayesian model that only predicts the output mean
27 | - 'heteroscedastic': bayesian model that predicts the output mean and variance; can be used to model the
28 | epistemic (knowledge) and aleatoric (data) uncertainty separately
29 | """
30 | def __init__(self, input_dim=1, hidden_units=[100, 100], dropout_per_layer=[0.2, 0.2], output_dim=1,
31 | loss_type="mse"):
32 | super(MC_Dropout, self).__init__()
33 |
34 | N = 100 # data points, constant for simplicity
35 | lengthscale = 1e-1
36 | tau = 1
37 | reg_no_dropout = lengthscale**2.0 / (2.0 * N * tau)
38 |
39 | self.loss_type = loss_type
40 |
41 | self.input_layer = tfkl.InputLayer(input_shape=(input_dim,))
42 | self.hidden_layers = []
43 | for n_neurons, dropout_rate in zip(hidden_units, dropout_per_layer):
44 | reg = ((1 - dropout_rate) * lengthscale**2.0) / (2.0 * N * tau)
45 | self.hidden_layers.append(tfkl.Dense(n_neurons, activation='relu',
46 | kernel_regularizer=tf.keras.regularizers.L1L2(l2=reg)))
47 | self.hidden_layers.append(tfkl.Dropout(dropout_rate, trainable=True))
48 |
49 | self.hidden_layer_mean = tfkl.Dense(100, activation='relu',
50 | kernel_regularizer=tf.keras.regularizers.L1L2(l2=reg_no_dropout))
51 | self.hidden_layer_var = tfkl.Dense(100, activation='relu',
52 | kernel_regularizer=tf.keras.regularizers.L1L2(l2=reg_no_dropout))
53 |
54 | self.output_layer_mean = tfkl.Dense(output_dim, activation='linear',
55 | kernel_regularizer=tf.keras.regularizers.L1L2(l2=reg_no_dropout))
56 | self.output_layer_var = tfkl.Dense(output_dim, activation='linear',
57 | kernel_regularizer=tf.keras.regularizers.L1L2(l2=reg_no_dropout))
58 |
59 | @tf.function
60 | def call(self, inputs):
61 | out = self.input_layer(inputs)
62 | for layer in self.hidden_layers:
63 | out = layer(out)
64 |
65 | if self.loss_type == MSE:
66 | # one head for the mean
67 | final_mean = self.output_layer_mean(out)
68 | return final_mean
69 |
70 | if self.loss_type == HETEROSCEDASTIC:
71 | # two heads for mean and variance
72 | y_mean = self.hidden_layer_mean(out)
73 | final_mean = self.output_layer_mean(y_mean)
74 |
75 | y_var = self.hidden_layer_var(out)
76 | final_log_var = self.output_layer_var(y_var)
77 |
78 | return final_mean, final_log_var
79 |
80 |
81 | def plot_heteroscedastic(model, save, x_train, y_train, x_lim, y_lim):
82 | n_test = 500
83 | x_test = np.linspace(-x_lim, x_lim, n_test).reshape(n_test, 1).astype('float32')
84 |
85 | preds_mean = []
86 | preds_var = []
87 | n_repeats = 20
88 | for _ in range(n_repeats):
89 | pred_mean, pred_var = model(x_test, training=True)
90 | preds_mean.append(pred_mean)
91 | preds_var.append(pred_var)
92 |
93 | plt.figure(figsize=(10, 4))
94 | preds_mean = np.array(preds_mean).reshape(20, n_test)
95 | preds_var = np.array(preds_var).reshape(20, n_test)
96 | preds_mean_mean = np.mean(preds_mean, axis=0)
97 | preds_mean_std = np.std(preds_mean, axis=0)
98 | preds_var_mean = np.mean(preds_var, axis=0)
99 |
100 | plt.scatter(x_train, y_train, c="orangered",label='Training data')
101 | color_pred = (0.0, 101.0 / 255.0, 189.0 / 255.0)
102 | plt.plot(x_test, preds_mean_mean, color=color_pred, label='Mean function/Epistemic uncertainty')
103 | plt.plot(x_test, np.sqrt(np.exp(preds_var_mean)), color="green", label="Aleatoric uncertainty")
104 | plt.fill_between(x_test.reshape(n_test,), preds_mean_mean - preds_mean_std, preds_mean_mean + preds_mean_std,
105 | alpha=0.25, color=color_pred)
106 | plt.fill_between(x_test.reshape(n_test,), preds_mean_mean - 2.0 * preds_mean_std, preds_mean_mean + 2.0 * preds_mean_std,
107 | alpha=0.35, color=color_pred)
108 |
109 | plt.xlim(-x_lim, x_lim)
110 | plt.ylim(-y_lim, y_lim)
111 | plt.legend()
112 |
113 | plt.tight_layout()
114 | if save:
115 | plt.savefig("plots/MC_Dropout_heteroscedastic.png")
116 | else:
117 | plt.show()
118 |
119 |
120 | def plot_mse(model, save, x_train, y_train, x_lim, y_lim):
121 | n_test = 500
122 | x_test = np.linspace(-x_lim, x_lim, n_test).reshape(n_test, 1).astype('float32')
123 |
124 | preds = []
125 | n_repeats = 20
126 | for _ in range(n_repeats):
127 | pred = model(x_test, training=True)
128 | preds.append(pred)
129 |
130 | plt.figure(figsize=(10, 4))
131 | preds = np.array(preds).reshape(n_repeats, n_test)
132 | preds_mean = np.mean(preds, axis=0)
133 | preds_std = np.std(preds, axis=0)
134 |
135 | plt.scatter(x_train, y_train, c="orangered", label='Training data')
136 | color_pred = (0.0, 101.0 / 255.0, 189.0 / 255.0)
137 | plt.plot(x_test, preds_mean, color=color_pred, label='Mean function/Epistemic uncertainty')
138 | plt.fill_between(x_test.reshape(n_test,), preds_mean - preds_std, preds_mean + preds_std,
139 | alpha=0.25, color=color_pred)
140 | plt.fill_between(x_test.reshape(n_test,), preds_mean - 2.0 * preds_std, preds_mean + 2.0 * preds_std,
141 | alpha=0.35, color=color_pred)
142 |
143 | plt.xlim(-x_lim, x_lim)
144 | plt.ylim(-y_lim, y_lim)
145 | plt.legend()
146 |
147 | plt.tight_layout()
148 | if save:
149 | plt.savefig("plots/MC_Dropout_mse.pdf")
150 | else:
151 | plt.show()
152 |
153 |
154 | @tf.function
155 | def mse_loss(y_train, x_train, model):
156 | mse = tf.reduce_mean(tf.losses.mse(y_train, model(x_train)))
157 | reg = tf.reduce_sum(model.losses) # regularization loss
158 | return mse + reg, reg
159 |
160 |
161 | @tf.function
162 | def heteroscedastic_loss(y_train, x_train, model):
163 | mean, log_var = model(x_train)
164 | mse = tf.reduce_sum(0.5 * tf.exp(-1.0 * log_var) * tf.square(y_train - mean) + 0.5 * log_var)
165 | reg = tf.reduce_sum(model.losses) # regularization loss
166 | return mse + reg, reg
167 |
168 |
169 | def fit_regression(loss_type="heteroscedastic", data="ian", additional_data=False, save=False):
170 | # load data
171 | if data not in ALLOWED_DATA_CONFIGS:
172 | raise AssertionError(f"'data' has to be in {ALLOWED_DATA_CONFIGS} but was set to {data}.")
173 | elif data == TOY_DATA:
174 | data = np.load("data/train_data_regression.npz")
175 | x_train = data["x_train"]
176 | y_train = data["y_train"]
177 | x_lim, y_lim = 4.5, 70.0
178 | elif data == IAN_DATA:
179 | data = np.load("data/train_data_ian_regression.npz", allow_pickle=True)
180 | x_train = data["x_train"]
181 | y_train = data["y_train"]
182 | x_lim, y_lim = 12.0, 8.0
183 | elif data == SAMPLE_DATA:
184 | n_samples = 20
185 | toy_regression = ToyRegressionData()
186 | x_train, y_train = toy_regression.gen_data(n_samples)
187 | x_lim, y_lim = 4.5, 70.0
188 |
189 | if loss_type not in ALLOWED_LOSS_TYPES:
190 | raise AssertionError(f"'loss_type' has to be in {ALLOWED_LOSS_TYPES} but was set to {loss_type}.")
191 | elif loss_type == HETEROSCEDASTIC:
192 | y_lim = 20 # adapt y limit
193 |
194 | hidden_units = [100, 100]
195 | dropout_per_layer = [0.09, 0.119]
196 |
197 | model = MC_Dropout(hidden_units=hidden_units, dropout_per_layer=dropout_per_layer, loss_type=loss_type)
198 |
199 | # Add special points
200 | if additional_data:
201 | x_extension = np.array([[-10.2], [-10.1]])
202 | y_extension = np.array([[-6.1], [-6.2]])
203 | x_train = np.insert(x_train, 0, x_extension, axis=0)
204 | y_train = np.insert(y_train, 0, y_extension, axis=0)
205 |
206 | epochs = 500
207 | learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(1e-3, epochs, 1e-5, power=0.5)
208 | opt = tf.keras.optimizers.Adam(learning_rate=learning_rate_fn)
209 |
210 | for i in range(epochs):
211 | with tf.GradientTape() as tape:
212 | tape.watch(model.trainable_variables)
213 | if loss_type == MSE:
214 | loss, reg = mse_loss(y_train, x_train, model)
215 | if loss_type == HETEROSCEDASTIC:
216 | loss, reg = heteroscedastic_loss(y_train, x_train, model)
217 | gradients = tape.gradient(loss, model.trainable_variables)
218 | opt.apply_gradients(zip(gradients, model.trainable_variables))
219 |
220 | if i % int(10) == 0:
221 | if loss_type == "mse":
222 | print(f"Epoch: {i}, Loss: {loss} Regularization: {reg}")
223 | if loss_type == "heteroscedastic":
224 | print(f"Epoch: {i}, Loss: {loss} Regularization: {reg}")
225 |
226 | if loss_type == MSE:
227 | plot_mse(model, save, x_train, y_train, x_lim, y_lim)
228 | if loss_type == HETEROSCEDASTIC:
229 | plot_heteroscedastic(model, save, x_train, y_train, x_lim, y_lim)
230 |
231 |
232 | if __name__ == '__main__':
233 | # test gpu availability
234 | print(f"GPU available: {tf.test.is_gpu_available()}")
235 |
236 | # set configuration
237 | loss_type = HETEROSCEDASTIC
238 | data = IAN_DATA # choose from ALLOWED_DATA_CONFIGS
239 | additional_data = False
240 | save = False # save images
241 |
242 | fit_regression(loss_type=loss_type, data=data, additional_data=additional_data, save=save)
243 |
--------------------------------------------------------------------------------
/train_bbb_dqn.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import tensorflow as tf
3 |
4 | from envs.env_utils import WrapFrameSkip
5 | from dqn.Bayes_by_Backprop_DQN import BBBDQN
6 | from dqn.train import train_dqn
7 |
8 | # config cart pole
9 | CONFIG_CARTPOLE = {
10 | "env_name": "CartPole-v1",
11 | "algorithm": "bayes_by_backprop",
12 | "seed": [210, 142, 531, 461, 314],
13 | "runs": 1, # perform e.g. 5 runs
14 | "env_render": True,
15 | "alpha": 1,
16 | "skip_frame_num": 0,
17 | "epochs_num": 50,
18 | "hidden_units": "100,100",
19 | "gradient_update_gamma": 0.9,
20 | "batch_size": 64,
21 | "learning_rate_init": 1e-3,
22 | "experiences_max": 5000,
23 | "experiences_min": 200,
24 | "epsilon_min": None,
25 | "epsilon": None,
26 | "epsilon_decay": None,
27 | "copy_steps": 25,
28 | "gradient_steps": 1,
29 | "step_limit": 200,
30 | "test_episodes": 5, # perform a test episode after 'test episode' many train epochs
31 | "plot_avg_reward": True,
32 | "save": False, # saves a npz-file with the data of the runs
33 | }
34 |
35 | # config mountain car
36 | CONFIG_MOUNTAINCAR = {
37 | "env_name": "MountainCar-v0",
38 | "algorithm": "bayes_by_backprop",
39 | "seed": [210, 142, 531, 461, 314],
40 | "runs": 1, # perform e.g. 5 runs
41 | "env_render": True,
42 | "alpha": 1,
43 | "skip_frame_num": 4,
44 | "epochs_num": 100,
45 | "hidden_units": "200,200,200,200",
46 | "gradient_update_gamma": 0.9,
47 | "batch_size": 64,
48 | "learning_rate_init": 1e-3,
49 | "experiences_max": 5000,
50 | "experiences_min": 200,
51 | "epsilon_min": None,
52 | "epsilon": None,
53 | "epsilon_decay": None,
54 | "copy_steps": 25,
55 | "gradient_steps": 1,
56 | "step_limit": 500,
57 | "test_episodes": 10, # perform a test episode after 'test episode' many train epochs
58 | "plot_avg_reward": True,
59 | "save": False, # saves a npz-file with the data of the runs
60 | }
61 |
62 | config = CONFIG_CARTPOLE # switch between cart pole and mountain car
63 |
64 | config_static = {
65 | "learning_rate": tf.keras.optimizers.schedules.PolynomialDecay(config["learning_rate_init"],
66 | config["epochs_num"]*config["step_limit"], 1e-5,
67 | power=0.5)
68 | }
69 |
70 | # Setup environment
71 | env = gym.make(config["env_name"]).env # remove 200 step limit
72 |
73 | if config["skip_frame_num"] > 0: # optional: skip frames to ease training in MountainCar
74 | env = WrapFrameSkip(env, frameskip=config["skip_frame_num"])
75 |
76 | num_states = len(env.observation_space.sample())
77 | num_actions = env.action_space.n
78 | print(f"Number of available actions: {num_actions}")
79 | print(f"Available action values (force on the cart in N): {env.action_space}")
80 |
81 | hidden_units = []
82 | for i in config["hidden_units"].split(","):
83 | hidden_units.append(int(i))
84 |
85 | print(f"GPU available: {tf.test.is_gpu_available()}")
86 |
87 | for run_id in (range(config["runs"])):
88 | tf.random.set_seed(config["seed"][run_id])
89 |
90 | # initialize train (action-value function) and target network (target action-value function)
91 | train_net = BBBDQN(num_states, num_actions, hidden_units, config["gradient_update_gamma"], config["experiences_max"],
92 | config["experiences_min"], config["batch_size"], config_static["learning_rate"], config["alpha"])
93 | target_net = BBBDQN(num_states, num_actions, hidden_units, config["gradient_update_gamma"], config["experiences_max"],
94 | config["experiences_min"], config["batch_size"], config_static["learning_rate"], config["alpha"])
95 |
96 | train_dqn(config, env, train_net, target_net, run_id)
97 |
--------------------------------------------------------------------------------
/train_dqn.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import tensorflow as tf
3 |
4 | from envs.env_utils import WrapFrameSkip
5 | from dqn.DQN import DQN
6 | from dqn.train import train_dqn
7 |
8 | # config carte pole
9 | CONFIG_CARTPOLE = {
10 | "env_name": "CartPole-v1",
11 | "algorithm": "dqn",
12 | "seed": [210, 142, 531, 461, 314],
13 | "runs": 1, # perform e.g. 5 runs
14 | "env_render": True,
15 | "alpha": None,
16 | "skip_frame_num": 0,
17 | "epochs_num": 50,
18 | "hidden_units": "100,100",
19 | "gradient_update_gamma": 0.9,
20 | "batch_size": 64,
21 | "learning_rate_init": 1e-3,
22 | "experiences_max": 5000,
23 | "experiences_min": 200,
24 | "epsilon_min": 0.2,
25 | "epsilon": 1.0,
26 | "epsilon_decay": 0.95,
27 | "copy_steps": 25,
28 | "gradient_steps": 1,
29 | "step_limit": 200,
30 | "test_episodes": 5, # perform a test episode after 'test episode' many train epochs
31 | "plot_avg_reward": True,
32 | "save": False, # saves a npz-file with the data of the runs
33 | }
34 |
35 | # config mountain car
36 | CONFIG_MOUNTAINCAR = {
37 | "env_name": "MountainCar-v0",
38 | "algorithm": "dqn",
39 | "seed": [210, 142, 531, 461, 314],
40 | "runs": 1, # perform e.g. 5 runs
41 | "env_render": True,
42 | "alpha": None,
43 | "skip_frame_num": 4,
44 | "epochs_num": 100,
45 | "hidden_units": "200,200,200,200",
46 | "gradient_update_gamma": 0.9,
47 | "batch_size": 64,
48 | "learning_rate_init": 1e-3,
49 | "experiences_max": 5000,
50 | "experiences_min": 200,
51 | "epsilon_min": 0.2,
52 | "epsilon": 1.0,
53 | "epsilon_decay": 0.99,
54 | "copy_steps": 25,
55 | "gradient_steps": 1,
56 | "step_limit": 500,
57 | "test_episodes": 10, # perform a test episode after 'test episode' many train epochs
58 | "plot_avg_reward": True,
59 | "save": False, # saves a npz-file with the data of the runs
60 | }
61 |
62 | config = CONFIG_MOUNTAINCAR # switch between cart pole and mountain car
63 |
64 | config_static = {
65 | "learning_rate": tf.keras.optimizers.schedules.PolynomialDecay(config["learning_rate_init"],
66 | config["epochs_num"]*config["step_limit"], 1e-5,
67 | power=0.5)
68 | }
69 |
70 | # Setup environment
71 | env = gym.make(config["env_name"]).env # remove 200 step limit
72 |
73 | if config["skip_frame_num"] > 0: # optional: skip frames to ease training in MountainCar
74 | env = WrapFrameSkip(env, frameskip=config["skip_frame_num"])
75 |
76 | num_states = len(env.observation_space.sample())
77 | num_actions = env.action_space.n
78 | print(f"Number of available actions: {num_actions}")
79 | print(f"Available action values (force on the cart in N): {env.action_space}")
80 |
81 | hidden_units = []
82 | for i in config["hidden_units"].split(","):
83 | hidden_units.append(int(i))
84 |
85 | print(f"GPU available: {tf.test.is_gpu_available()}")
86 |
87 | for run_id in (range(config["runs"])):
88 | tf.random.set_seed(config["seed"][run_id])
89 |
90 | # initialize train (action-value function) and target network (target action-value function)
91 | train_net = DQN(num_states, num_actions, hidden_units, config["gradient_update_gamma"], config["experiences_max"],
92 | config["experiences_min"], config["batch_size"], config_static["learning_rate"])
93 | target_net = DQN(num_states, num_actions, hidden_units, config["gradient_update_gamma"], config["experiences_max"],
94 | config["experiences_min"], config["batch_size"], config_static["learning_rate"])
95 |
96 | train_dqn(config, env, train_net, target_net, run_id)
97 |
--------------------------------------------------------------------------------
/train_dqn_dropout.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import tensorflow as tf
3 |
4 | from envs.env_utils import WrapFrameSkip
5 | from dqn.MC_Dropout_DQN import DQN
6 | from dqn.train import train_dqn
7 |
8 | # config cart pole
9 | CONFIG_CARTPOLE = {
10 | "env_name": "CartPole-v1",
11 | "algorithm": "mc_dropout",
12 | "seed": [210, 142, 531, 461, 314],
13 | "runs": 1, # perform e.g. 5 runs
14 | "env_render": True,
15 | "alpha": 1,
16 | "skip_frame_num": 0,
17 | "epochs_num": 50,
18 | "hidden_units": "100,100",
19 | "gradient_update_gamma": 0.9,
20 | "batch_size": 64,
21 | "learning_rate_init": 1e-3,
22 | "experiences_max": 5000,
23 | "experiences_min": 200,
24 | "epsilon_min": None,
25 | "epsilon": None,
26 | "epsilon_decay": None,
27 | "copy_steps": 25,
28 | "gradient_steps": 1,
29 | "step_limit": 200,
30 | "test_episodes": 5, # perform a test episode after 'test episode' many train epochs
31 | "plot_avg_reward": True,
32 | "save": False, # saves a npz-file with the data of the runs
33 | "dropout_rate": 0.2,
34 | }
35 |
36 | # config mountain car
37 | CONFIG_MOUNTAINCAR = {
38 | "env_name": "MountainCar-v0",
39 | "algorithm": "mc_dropout",
40 | "seed": [210, 142, 531, 461, 314],
41 | "runs": 1, # perform e.g. 5 runs
42 | "env_render": True,
43 | "alpha": 1,
44 | "skip_frame_num": 4,
45 | "epochs_num": 100,
46 | "hidden_units": "200,200,200,200",
47 | "gradient_update_gamma": 0.9,
48 | "batch_size": 64,
49 | "learning_rate_init": 1e-3,
50 | "experiences_max": 5000,
51 | "experiences_min": 200,
52 | "epsilon_min": None,
53 | "epsilon": None,
54 | "epsilon_decay": None,
55 | "copy_steps": 25,
56 | "gradient_steps": 1,
57 | "step_limit": 500,
58 | "test_episodes": 10, # perform a test episode after 'test episode' many train epochs
59 | "plot_avg_reward": True,
60 | "save": False, # saves a npz-file with the data of the runs
61 | "dropout_rate": 0.2,
62 | }
63 |
64 | config = CONFIG_CARTPOLE # switch between cart pole and mountain car
65 |
66 | config_static = {
67 | "learning_rate": tf.keras.optimizers.schedules.PolynomialDecay(config["learning_rate_init"],
68 | config["epochs_num"]*config["step_limit"], 1e-5,
69 | power=0.5)
70 | }
71 |
72 | # Setup environment
73 | env = gym.make(config["env_name"]).env # remove 200 step limit
74 |
75 | if config["skip_frame_num"] > 0: # optional: skip frames to ease training in MountainCar
76 | env = WrapFrameSkip(env, frameskip=config["skip_frame_num"])
77 |
78 | num_states = len(env.observation_space.sample())
79 | num_actions = env.action_space.n
80 | print(f"Number of available actions: {num_actions}")
81 | print(f"Available action values (force on the cart in N): {env.action_space}")
82 |
83 | hidden_units = []
84 | for i in config["hidden_units"].split(","):
85 | hidden_units.append(int(i))
86 |
87 | print(f"GPU available: {tf.test.is_gpu_available()}")
88 |
89 | for run_id in (range(config["runs"])):
90 | tf.random.set_seed(config["seed"][run_id])
91 |
92 | # initialize train (action-value function) and target network (target action-value function)
93 | train_net = DQN(num_states=num_states, num_actions=num_actions, hidden_units=hidden_units,
94 | gamma=config["gradient_update_gamma"], max_experiences=config["experiences_max"],
95 | min_experiences=config["experiences_min"], batch_size=config["batch_size"],
96 | lr=config_static["learning_rate"], dropout_rate=config["dropout_rate"])
97 | target_net = DQN(num_states=num_states, num_actions=num_actions, hidden_units=hidden_units,
98 | gamma=config["gradient_update_gamma"], max_experiences=config["experiences_max"],
99 | min_experiences=config["experiences_min"], batch_size=config["batch_size"],
100 | lr=config_static["learning_rate"], dropout_rate=config["dropout_rate"])
101 |
102 | train_dqn(config, env, train_net, target_net, run_id)
103 |
--------------------------------------------------------------------------------
/train_dqn_dropout_concrete.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import tensorflow as tf
3 |
4 | from envs.env_utils import WrapFrameSkip
5 | from dqn.Concrete_Dropout_DQN import DQN
6 | from dqn.train import train_dqn
7 |
8 | # config cart pole
9 | CONFIG_CARTPOLE = {
10 | "env_name": "CartPole-v1",
11 | "algorithm": "concrete_dropout",
12 | "seed": [210, 142, 531, 461, 314],
13 | "runs": 1, # perform e.g. 5 runs
14 | "env_render": True,
15 | "alpha": 1,
16 | "skip_frame_num": 0,
17 | "epochs_num": 50,
18 | "hidden_units": "100,100", # 400, 400
19 | "gradient_update_gamma": 0.9,
20 | "batch_size": 64,
21 | "learning_rate_init": 1e-3,
22 | "experiences_max": 5000,
23 | "experiences_min": 200,
24 | "epsilon_min": None,
25 | "epsilon": None,
26 | "epsilon_decay": None,
27 | "copy_steps": 25,
28 | "gradient_steps": 1,
29 | "step_limit": 200,
30 | "test_episodes": 5, # perform a test episode after 'test episode' many train epochs
31 | "plot_avg_reward": True,
32 | "save": False, # saves a npz-file with the data of the runs
33 | }
34 |
35 | # config mountain car
36 | CONFIG_MOUNTAINCAR = {
37 | "env_name": "MountainCar-v0",
38 | "algorithm": "concrete_dropout",
39 | "seed": [210, 142, 531, 461, 314],
40 | "runs": 1, # perform e.g. 5 runs
41 | "env_render": True,
42 | "alpha": 1,
43 | "skip_frame_num": 4,
44 | "epochs_num": 100,
45 | "hidden_units": "200,200,200,200",
46 | "gradient_update_gamma": 0.9,
47 | "batch_size": 64,
48 | "learning_rate_init": 1e-3,
49 | "experiences_max": 5000,
50 | "experiences_min": 200,
51 | "epsilon_min": None,
52 | "epsilon": None,
53 | "epsilon_decay": None,
54 | "copy_steps": 25,
55 | "gradient_steps": 1,
56 | "step_limit": 500,
57 | "test_episodes": 10, # perform a test episode after 'test episode' many train epochs
58 | "plot_avg_reward": True,
59 | "save": False, # saves a npz-file with the data of the runs
60 | }
61 |
62 | config = CONFIG_CARTPOLE # switch between cart pole and mountain car
63 |
64 | config_static = {
65 | "learning_rate": tf.keras.optimizers.schedules.PolynomialDecay(config["learning_rate_init"],
66 | config["epochs_num"]*config["step_limit"], 1e-5,
67 | power=0.5)
68 | }
69 |
70 | # Setup environment
71 | env = gym.make(config["env_name"]).env # remove 200 step limit
72 |
73 | if config["skip_frame_num"] > 0: # optional: skip frames to ease training in MountainCar
74 | env = WrapFrameSkip(env, frameskip=config["skip_frame_num"])
75 |
76 | num_states = len(env.observation_space.sample())
77 | num_actions = env.action_space.n
78 | print(f"Number of available actions: {num_actions}")
79 | print(f"Available action values (force on the cart in N): {env.action_space}")
80 |
81 | hidden_units = []
82 | for i in config["hidden_units"].split(","):
83 | hidden_units.append(int(i))
84 |
85 | print(f"GPU available: {tf.test.is_gpu_available()}")
86 |
87 | for run_id in (range(config["runs"])):
88 | tf.random.set_seed(config["seed"][run_id])
89 |
90 | # initialize train (action-value function) and target network (target action-value function)
91 | train_net = DQN(num_states=num_states, num_actions=num_actions, hidden_units=hidden_units,
92 | gamma=config["gradient_update_gamma"], max_experiences=config["experiences_max"],
93 | min_experiences=config["experiences_min"], batch_size=config["batch_size"],
94 | lr=config_static["learning_rate"])
95 | target_net = DQN(num_states=num_states, num_actions=num_actions, hidden_units=hidden_units,
96 | gamma=config["gradient_update_gamma"], max_experiences=config["experiences_max"],
97 | min_experiences=config["experiences_min"], batch_size=config["batch_size"],
98 | lr=config_static["learning_rate"])
99 |
100 | train_dqn(config, env, train_net, target_net, run_id)
101 |
--------------------------------------------------------------------------------
/train_mnf_dqn.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import tensorflow as tf
3 |
4 | from envs.env_utils import WrapFrameSkip
5 | from dqn.MNF_DQN import MNFDQN
6 | from dqn.train import train_dqn
7 |
8 | # config cart pole
9 | CONFIG_CARTPOLE = {
10 | "env_name": "CartPole-v1",
11 | "algorithm": "mnf",
12 | "seed": [210, 142, 531, 461, 314],
13 | "runs": 1, # perform e.g. 5 runs
14 | "env_render": True,
15 | "alpha": 1,
16 | "skip_frame_num": 0,
17 | "epochs_num": 50,
18 | "hidden_units": "100,100",
19 | "gradient_update_gamma": 0.9,
20 | "batch_size": 64,
21 | "learning_rate_init": 1e-3,
22 | "experiences_max": 5000,
23 | "experiences_min": 200,
24 | "epsilon_min": None,
25 | "epsilon": None,
26 | "epsilon_decay": None,
27 | "copy_steps": 25,
28 | "gradient_steps": 1,
29 | "step_limit": 200,
30 | "test_episodes": 5, # perform a test episode after 'test episode' many train epochs
31 | "plot_avg_reward": True,
32 | "save": False, # saves a npz-file with the data of the runs
33 | }
34 |
35 | # config mountain car
36 | CONFIG_MOUNTAINCAR = {
37 | "env_name": "MountainCar-v0",
38 | "algorithm": "mnf",
39 | "seed": [210, 142, 531, 461, 314],
40 | "runs": 1, # perform e.g. 5 runs
41 | "env_render": True,
42 | "alpha": 1,
43 | "skip_frame_num": 4,
44 | "epochs_num": 100,
45 | "hidden_units": "200,200,200,200",
46 | "gradient_update_gamma": 0.9,
47 | "batch_size": 64,
48 | "learning_rate_init": 1e-3,
49 | "experiences_max": 5000,
50 | "experiences_min": 200,
51 | "epsilon_min": None,
52 | "epsilon": None,
53 | "epsilon_decay": None,
54 | "copy_steps": 25,
55 | "gradient_steps": 1,
56 | "step_limit": 500,
57 | "test_episodes": 10, # perform a test episode after 'test episode' many train epochs
58 | "plot_avg_reward": True,
59 | "save": False, # saves a npz-file with the data of the runs
60 | }
61 |
62 | config = CONFIG_CARTPOLE # switch between cart pole and mountain car
63 |
64 | config_static = {
65 | "learning_rate": tf.keras.optimizers.schedules.PolynomialDecay(config["learning_rate_init"],
66 | config["epochs_num"]*config["step_limit"], 1e-5,
67 | power=0.5)
68 | }
69 |
70 | # Setup environment
71 | env = gym.make(config["env_name"]).env # remove 200 step limit
72 |
73 | if config["skip_frame_num"] > 0: # optional: skip frames to ease training in MountainCar
74 | env = WrapFrameSkip(env, frameskip=config["skip_frame_num"])
75 |
76 | num_states = len(env.observation_space.sample())
77 | num_actions = env.action_space.n
78 | print(f"Number of available actions: {num_actions}")
79 | print(f"Available action values (force on the cart in N): {env.action_space}")
80 |
81 | hidden_units = []
82 | for i in config["hidden_units"].split(","):
83 | hidden_units.append(int(i))
84 |
85 | print(f"GPU available: {tf.test.is_gpu_available()}")
86 |
87 | for run_id in (range(config["runs"])):
88 | tf.random.set_seed(config["seed"][run_id])
89 |
90 | # initialize train (action-value function) and target network (target action-value function)
91 | train_net = MNFDQN(num_states, num_actions, hidden_units, config["gradient_update_gamma"],
92 | config["experiences_max"], config["experiences_min"], config["batch_size"],
93 | config_static["learning_rate"], config["alpha"])
94 | target_net = MNFDQN(num_states, num_actions, hidden_units, config["gradient_update_gamma"],
95 | config["experiences_max"], config["experiences_min"], config["batch_size"],
96 | config_static["learning_rate"], config["alpha"])
97 |
98 | train_dqn(config, env, train_net, target_net, run_id)
99 |
--------------------------------------------------------------------------------