├── .gitignore
├── LICENSE
├── README.md
├── bayes
    ├── Bayes_by_Backprop.py
    ├── ConcreteDropout.py
    ├── MNF.py
    └── __init__.py
├── data
    ├── __init__.py
    ├── toy_regression.py
    ├── train_data_ian_regression.npz
    └── train_data_regression.npz
├── dqn
    ├── Bayes_by_Backprop_DQN.py
    ├── Concrete_Dropout_DQN.py
    ├── DQN.py
    ├── MC_Dropout_DQN.py
    ├── MNF_DQN.py
    ├── __init__.py
    └── train.py
├── envs
    ├── __init__.py
    ├── env_utils.py
    └── nchain.py
├── normalizingflows
    ├── __init__.py
    ├── flow_catalog.py
    ├── nf_utils.py
    └── normalizing_flow.py
├── plots
    ├── BayesByBackprop.png
    ├── ConcreteDropout.png
    ├── ConcreteDropout_heterostatic.png
    ├── MCDropout.png
    ├── MCDropout_heteroscedastic.png
    ├── MNF_all_layers.png
    ├── MNF_last_layers.png
    ├── avg_acc_reward_cartpole.png
    └── avg_acc_reward_mountaincar.png
├── requirements.txt
├── toy_regression_bayes.py
├── toy_regression_concrete_dropout.py
├── toy_regression_mc_dropout.py
├── train_bbb_dqn.py
├── train_dqn.py
├── train_dqn_dropout.py
├── train_dqn_dropout_concrete.py
└── train_mnf_dqn.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | ### JupyterNotebooks ###
  2 | # gitignore template for Jupyter Notebooks
  3 | # website: http://jupyter.org/
  4 | 
  5 | .ipynb_checkpoints
  6 | */.ipynb_checkpoints/*
  7 | 
  8 | # IPython
  9 | profile_default/
 10 | ipython_config.py
 11 | 
 12 | # Remove previous ipynb_checkpoints
 13 | #   git rm -r .ipynb_checkpoints/
 14 | 
 15 | ### macOS ###
 16 | # General
 17 | .DS_Store
 18 | .AppleDouble
 19 | .LSOverride
 20 | 
 21 | # Icon must end with two \r
 22 | Icon
 23 | 
 24 | # Thumbnails
 25 | ._*
 26 | 
 27 | # Files that might appear in the root of a volume
 28 | .DocumentRevisions-V100
 29 | .fseventsd
 30 | .Spotlight-V100
 31 | .TemporaryItems
 32 | .Trashes
 33 | .VolumeIcon.icns
 34 | .com.apple.timemachine.donotpresent
 35 | 
 36 | # Directories potentially created on remote AFP share
 37 | .AppleDB
 38 | .AppleDesktop
 39 | Network Trash Folder
 40 | Temporary Items
 41 | .apdisk
 42 | 
 43 | ### Python ###
 44 | # Byte-compiled / optimized / DLL files
 45 | __pycache__/
 46 | *.py[cod]
 47 | *$py.class
 48 | 
 49 | # C extensions
 50 | *.so
 51 | 
 52 | # Distribution / packaging
 53 | .Python
 54 | build/
 55 | develop-eggs/
 56 | dist/
 57 | downloads/
 58 | eggs/
 59 | .eggs/
 60 | lib/
 61 | lib64/
 62 | parts/
 63 | sdist/
 64 | var/
 65 | wheels/
 66 | pip-wheel-metadata/
 67 | share/python-wheels/
 68 | *.egg-info/
 69 | .installed.cfg
 70 | *.egg
 71 | MANIFEST
 72 | 
 73 | # PyInstaller
 74 | #  Usually these files are written by a python script from a template
 75 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 76 | *.manifest
 77 | *.spec
 78 | 
 79 | # Installer logs
 80 | pip-log.txt
 81 | pip-delete-this-directory.txt
 82 | 
 83 | # Unit test / coverage reports
 84 | htmlcov/
 85 | .tox/
 86 | .nox/
 87 | .coverage
 88 | .coverage.*
 89 | .cache
 90 | nosetests.xml
 91 | coverage.xml
 92 | *.cover
 93 | .hypothesis/
 94 | .pytest_cache/
 95 | 
 96 | # Translations
 97 | *.mo
 98 | *.pot
 99 | 
100 | # Scrapy stuff:
101 | .scrapy
102 | 
103 | # Sphinx documentation
104 | docs/_build/
105 | 
106 | # PyBuilder
107 | target/
108 | 
109 | # pyenv
110 | .python-version
111 | 
112 | # pipenv
113 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
114 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
115 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
116 | #   install all needed dependencies.
117 | #Pipfile.lock
118 | 
119 | # celery beat schedule file
120 | celerybeat-schedule
121 | 
122 | # SageMath parsed files
123 | *.sage.py
124 | 
125 | # Spyder project settings
126 | .spyderproject
127 | .spyproject
128 | 
129 | # Rope project settings
130 | .ropeproject
131 | 
132 | # Mr Developer
133 | .mr.developer.cfg
134 | .project
135 | .pydevproject
136 | 
137 | # mkdocs documentation
138 | /site
139 | 
140 | # mypy
141 | .mypy_cache/
142 | .dmypy.json
143 | dmypy.json
144 | 
145 | # Pyre type checker
146 | .pyre/
147 | 
148 | ### VisualStudioCode ###
149 | .vscode
150 | .vscode/*
151 | !.vscode/settings.json
152 | !.vscode/tasks.json
153 | !.vscode/launch.json
154 | !.vscode/extensions.json
155 | 
156 | ### VisualStudioCode Patch ###
157 | # Ignore all local history of files
158 | .history
159 | 
160 | ### Virtual Environment
161 | venv
162 | experiments
163 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     <one line to give the program's name and a brief idea of what it does.>
635 |     Copyright (C) <year>  <name of author>
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     <program>  Copyright (C) <year>  <name of author>
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <https://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <https://www.gnu.org/licenses/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Uncertainty Estimation in the Context of Efficient Exploration
 2 | 
 3 | This repository investigates recent variational Bayesian inference approaches for uncertainty estimation. The approaches
 4 | are evaluated and visualized on regression tasks. Furthermore, the uncertainty estimates from the variational 
 5 | Bayesian neural networks are used to perform approximate Thompson sampling within a deep Q-network (DQN) for efficient 
 6 | exploration. The approaches are compared against each other and against the well known epsilon-greedy strategy.
 7 | 
 8 | Currently, following variational Bayesian neural networks are implemented:
 9 |  
10 | - Bayes by Backprop [1]
11 | - Multiplicative Normalizing Flows (MNF) [2]
12 | - Dropout as a Bayesian Approximation [3]
13 | - Concrete Dropout [4]
14 | 
15 | Touati et al. [5] describe how to augment DQNs with multiplicative normalizing flows for a efficient 
16 | exploration-exploitation strategy.
17 | 
18 | The repository is structured in the following way:
19 | - [bayes](/bayes) contains implementations of Bayes By Backprop, MNF, and Concrete Dropout layers. Monte Carlo 
20 | dropout utilizes the standard Tensorflow dropout layer.
21 | - [data](/data) contains two regression data sets mentioned in [6] and [7] used to visualize the uncertainty estimates.
22 | - [dqn](/dqn) includes the DQN implementations utilizing the respective variational Bayesian neural networks.
23 | - [envs](/envs) includes an implementation of a N-chain gym environment and environment utility functions.
24 | - [normalizingflows](/normalizingflows) contains normalizing flows for the use in Multiplicative Normalizing Flows.
25 | - [plots](/plots) contains example visualizations.
26 | 
27 | Training functions are located at the root of the repository.
28 | 
29 | Below we show example uncertainty estimates on the regression task mentioned in [6]. Additionally, we show the
30 | average accumulated reward over 5 runs on the OpenAi Gym envionments CartPole and MountainCar.
31 | 
32 | - Aleatoric (data) uncertainty and epistemic (knowledge) uncertainty predicted by MC Dropout with two network heads:
33 | 
34 | <img src="plots/MCDropout_heteroscedastic.png" width="500" height="200" />
35 | 
36 | 
37 | - Network utilizing 3 MNF dense layers:
38 | 
39 | <img src="plots/MNF_all_layers.png" width="500" height="200" />
40 | 
41 | 
42 | - Network utilizing 2 regular dense layers and 1 MNF dense layers:
43 | 
44 | <img src="plots/MNF_last_layers.png" width="500" height="200" />
45 | 
46 | 
47 | - Average accumulated reward over 5 runs on the OpenAI Gym CartPole task:
48 | 
49 | <img src="plots/avg_acc_reward_cartpole.png" width="500" height="200" />
50 | 
51 | 
52 | - Average accumulated reward over 5 runs on the OpenAI Gym MountainCar task:
53 | 
54 | <img src="plots/avg_acc_reward_mountaincar.png" width="500" height="200" />
55 | 
56 | 
57 | This work was done during the Advanced Deep Learning for Robotics course at TUM in cooperation with the German Aerospace 
58 | Center (DLR).
59 | In case of any questions, feel free to reach out to us.
60 | 
61 | Jan Rüttinger, jan.ruettinger@tum.de
62 | 
63 | Lukas Rinder, lukas.rinder@tum.de
64 | 
65 | 
66 | ### References
67 | 
68 | [1] C. Blundell, J. Cornebise, K. Kavukcuoglu, and D. Wierstra, “Weight uncertainty in neural networks,” 32nd Int. Conf. Mach. Learn. ICML 2015, vol. 2, pp. 1613–1622, 2015.
69 | 
70 | [2] C. Louizos and M. Welling, “Multiplicative normalizing flows for variational Bayesian neural networks,” 34th Int. Conf. Mach. Learn. ICML 2017, vol. 5, pp. 3480–3489, 2017.
71 | 
72 | [3] Y. Gal and Z. Ghahramani, “Dropout as a Bayesian Approximation: Representing Model Uncertainty in Deep Learning,” 33rd Int. Conf. Mach. Learn. ICML 2016, vol. 3, pp. 1651–1660, Jun. 2015.
73 | 
74 | [4] Y. Gal, J. Hron, and A. Kendall, “Concrete dropout,” in Advances in Neural Information Processing Systems, 2017, vol. 2017-Decem, pp. 3582–3591.
75 | 
76 | [5] A. Touati, H. Satija, J. Romoff, J. Pineau, and P. Vincent, “Randomized value functions via multiplicative normalizing flows,” 35th Conf. Uncertain. Artif. Intell. UAI 2019, 2019.
77 | 
78 | [6] I. Osband, “Risk versus uncertainty in deep learning: Bayes, bootstrap and the dangers of dropout.,” NIPS Work. Bayesian Deep Learn., vol. 192, 2016.
79 | 
80 | [7] J. M. Hernández-Lobato and R. P. Adams, “Probabilistic backpropagation for scalable learning of Bayesian neural networks,” 32nd Int. Conf. Mach. Learn. ICML 2015, vol. 3, pp. 1861–1869, 2015.
81 | 


--------------------------------------------------------------------------------
/bayes/Bayes_by_Backprop.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_probability as tfp
 3 | 
 4 | tfd = tfp.distributions
 5 | tfb = tfp.bijectors
 6 | 
 7 | 
 8 | class BayesByBackprop(tf.keras.layers.Layer):
 9 |     """Bayesian fully-connected layer. The weight posterior distribution is modelled by a fully-factorized
10 |     Gaussian.
11 | 
12 |     "Weight Uncertainty in Neural Networks" - Blundell et al. (2015)
13 |     https://arxiv.org/abs/1505.05424
14 |     """
15 | 
16 |     def __init__(
17 |         self,
18 |         n_out,  # output dimensions
19 |         prior_var_w=1,  # variance of weight prior
20 |         prior_var_b=1,  # variance of bias prior
21 |         max_std=1.0,  # limit the standard deviation in the forward pass to avoid local minima (e.g. see Louizos et al.)
22 |         log_var_mean_init=-3.0,
23 |         log_var_init=1e-3,
24 |         **kwargs,
25 |     ):
26 |         self.n_out = n_out
27 |         self.prior_var_w = prior_var_w
28 |         self.prior_var_b = prior_var_b
29 |         self.max_std = max_std
30 |         self.log_var_mean_init = log_var_mean_init
31 |         self.log_var_init = log_var_init
32 |         super().__init__(**kwargs)
33 | 
34 |     def build(self, input_shape):
35 |         n_in = self.n_in = input_shape[-1]
36 |         # initialization according to He et al. (2015)
37 |         # log variance initialized with N(-9, 0.001) -> e^-9 = 1e-4
38 |         glorot = tf.keras.initializers.GlorotNormal()  # Xavier normal initializer
39 |         mean_init, var_init = self.log_var_mean_init, self.log_var_init  # -9.0, 1e-3
40 | 
41 |         self.mean_W = tf.Variable(glorot([n_in, self.n_out]))
42 |         self.log_var_W = tf.Variable(glorot([n_in, self.n_out]) * var_init + mean_init)
43 | 
44 |         self.mean_b = tf.Variable(tf.zeros(self.n_out))
45 |         self.log_var_b = tf.Variable(glorot([self.n_out]) * var_init + mean_init)
46 | 
47 |         self.epsilon_w = tf.Variable(tf.random.normal([self.n_out]), trainable=False)
48 |         self.reset_noise()
49 | 
50 |     def reset_noise(self):
51 |         # sample new epsilon values
52 |         self.epsilon_w.assign(tf.random.normal([self.n_out]))  # sample epsilon_w
53 | 
54 |     @tf.function
55 |     def kl_div(self, same_noise=True):
56 |         kldiv_weight = 0.5 * tf.reduce_sum((- self.log_var_W + tf.math.exp(self.log_var_W)
57 |                                             + tf.square(self.mean_W) - 1))
58 |         kldiv_bias = 0.5 * tf.reduce_sum((- self.log_var_b + tf.math.exp(self.log_var_b)
59 |                                           + tf.square(self.mean_b) - 1))
60 | 
61 |         kldiv = kldiv_weight + kldiv_bias
62 | 
63 |         return kldiv
64 | 
65 |     @tf.function
66 |     def call(self, x, same_noise=False, training=True):
67 |         batch_size = tf.shape(x)[0]
68 |         if training:
69 |             mu_out = tf.matmul(x, self.mean_W) + self.mean_b
70 | 
71 |             var_W = tf.clip_by_value(tf.exp(self.log_var_W), 0, self.max_std ** 2)
72 |             var_b = tf.clip_by_value(tf.exp(self.log_var_b), 0, self.max_std ** 2)
73 | 
74 |             V_h = tf.matmul(tf.square(x), var_W) + var_b
75 | 
76 |             if same_noise:  # use the same epsilon per batch
77 |                 epsilon_w = tf.expand_dims(self.epsilon_w, axis=0)  # expand batch dimension
78 |                 epsilon_w = tf.repeat(epsilon_w, batch_size, axis=0)  # repeat batch dimension
79 |             else:
80 |                 epsilon_w = tf.random.normal(tf.shape(mu_out))
81 | 
82 |             sigma_out = tf.sqrt(V_h) * epsilon_w
83 | 
84 |             out = mu_out + sigma_out
85 |         else:  # evaluation without noise
86 |             mu_out = tf.matmul(x, self.mean_W) + self.mean_b
87 |             out = mu_out
88 | 
89 |         return out
90 | 


--------------------------------------------------------------------------------
/bayes/ConcreteDropout.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from tensorflow.keras.layers import InputSpec, Wrapper
  4 | 
  5 | 
  6 | class ConcreteDropout(Wrapper):
  7 |     """This wrapper allows to learn the dropout probability for any given input Dense layer.
  8 |     ```python
  9 |         # as the first layer in a model
 10 |         model = Sequential()
 11 |         model.add(ConcreteDropout(Dense(8), input_shape=(16)))
 12 |         # now model.output_shape == (None, 8)
 13 |         # subsequent layers: no need for input_shape
 14 |         model.add(ConcreteDropout(Dense(32)))
 15 |         # now model.output_shape == (None, 32)
 16 |     ```
 17 |     `ConcreteDropout` can be used with arbitrary layers which have 2D
 18 |     kernels, not just `Dense`. However, Conv2D layers require different
 19 |     weighing of the regulariser (use SpatialConcreteDropout instead).
 20 |     # Arguments
 21 |         layer: a layer instance.
 22 |         weight_regularizer:
 23 |             A positive number which satisfies
 24 |                 $weight_regularizer = l**2 / (\tau * N)$
 25 |             with prior lengthscale l, model precision $\tau$ (inverse observation noise),
 26 |             and N the number of instances in the dataset.
 27 |             Note that kernel_regularizer is not needed.
 28 |         dropout_regularizer:
 29 |             A positive number which satisfies
 30 |                 $dropout_regularizer = 2 / (\tau * N)$
 31 |             with model precision $\tau$ (inverse observation noise) and N the number of
 32 |             instances in the dataset.
 33 |             Note the relation between dropout_regularizer and weight_regularizer:
 34 |                 $weight_regularizer / dropout_regularizer = l**2 / 2$
 35 |             with prior lengthscale l. Note also that the factor of two should be
 36 |             ignored for cross-entropy loss, and used only for the eculedian loss.
 37 |     """
 38 | 
 39 |     def __init__(self, layer, weight_regularizer=0, dropout_regularizer=1e-5,
 40 |                  init_min=0.1, init_max=0.1, is_mc_dropout=True, **kwargs):
 41 |         assert 'kernel_regularizer' not in kwargs
 42 |         super(ConcreteDropout, self).__init__(layer, **kwargs)
 43 |         self.weight_regularizer = weight_regularizer
 44 |         self.dropout_regularizer = dropout_regularizer
 45 |         self.is_mc_dropout = is_mc_dropout
 46 |         self.supports_masking = True
 47 |         self.p_logit = None
 48 |         self.init_min = np.log(init_min) - np.log(1. - init_min)
 49 |         self.init_max = np.log(init_max) - np.log(1. - init_max)
 50 | 
 51 |     def build(self, input_shape=None):
 52 |         self.input_spec = InputSpec(shape=input_shape)
 53 |         if not self.layer.built:
 54 |             self.layer.build(input_shape)
 55 |             self.layer.built = True
 56 |         super(ConcreteDropout, self).build()
 57 | 
 58 |         # initialise p
 59 |         self.p_logit = self.add_weight(name='p_logit',
 60 |                                        shape=(1,),
 61 |                                        initializer=tf.random_uniform_initializer(self.init_min, self.init_max),
 62 |                                        dtype=tf.dtypes.float32,
 63 |                                        trainable=True)
 64 | 
 65 |     def compute_output_shape(self, input_shape):
 66 |         return self.layer.compute_output_shape(input_shape)
 67 | 
 68 |     def concrete_dropout(self, x, p):
 69 |         """
 70 |         Concrete dropout - used at training time (gradients can be propagated)
 71 |         :param x: input
 72 |         :return:  approx. dropped out input
 73 |         """
 74 |         eps = 1e-07
 75 |         temp = 0.1
 76 | 
 77 |         unif_noise = tf.random.uniform(shape=tf.shape(x))
 78 |         drop_prob = (
 79 |             tf.math.log(p + eps)
 80 |             - tf.math.log(1. - p + eps)
 81 |             + tf.math.log(unif_noise + eps)
 82 |             - tf.math.log(1. - unif_noise + eps)
 83 |         )
 84 |         drop_prob = tf.math.sigmoid(drop_prob / temp)
 85 |         random_tensor = 1. - drop_prob
 86 | 
 87 |         retain_prob = 1. - p
 88 |         x *= random_tensor
 89 |         x /= retain_prob
 90 |         return x
 91 | 
 92 |     def call(self, inputs, training=True):
 93 |         p = tf.math.sigmoid(self.p_logit)
 94 | 
 95 |         # initialise regulariser / prior KL term
 96 |         input_dim = inputs.shape[-1]  # last dim
 97 |         weight = self.layer.kernel
 98 |         kernel_regularizer = self.weight_regularizer * tf.reduce_sum(tf.square(weight)) / (1. - p)
 99 |         dropout_regularizer = p * tf.math.log(p) + (1. - p) * tf.math.log(1. - p)
100 |         dropout_regularizer *= self.dropout_regularizer * input_dim
101 |         regularizer = tf.reduce_sum(kernel_regularizer + dropout_regularizer)
102 |         if self.is_mc_dropout:
103 |             return self.layer.call(self.concrete_dropout(inputs, p)), regularizer
104 |         else:
105 |             def relaxed_dropped_inputs():
106 |                 return self.layer.call(self.concrete_dropout(inputs, p)), regularizer
107 | 
108 |             return tf.keras.backend.in_train_phase(relaxed_dropped_inputs,
109 |                                                    self.layer.call(inputs),
110 |                                                    training=training), regularizer
111 | 


--------------------------------------------------------------------------------
/bayes/MNF.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import tensorflow_probability as tfp
  4 | 
  5 | from normalizingflows.flow_catalog import Made
  6 | from normalizingflows.nf_utils import NormalReparamMNF
  7 | from normalizingflows.normalizing_flow import NormalizingFlowModel, NormalizingFlow
  8 | 
  9 | 
 10 | tfd = tfp.distributions
 11 | tfb = tfp.bijectors
 12 | 
 13 | 
 14 | class DenseMNF(tf.keras.layers.Layer):
 15 |     """Bayesian fully-connected layer. The weight posterior distribution is modelled by a fully-factorized
 16 |     Gaussian. The Gaussian means depend on an auxiliary random variable z, which is modelled by a normalizing flow.
 17 |     This allows for multimodality and nonlinear dependencies between the elements of the weight matrix and improves
 18 |     significantly upon classical mean field approximation. The flow's base distribution is a normal distribution with
 19 |     zero mean and unit variance.
 20 | 
 21 |     "Multiplicative Normalizing Flows for Variational Bayesian Neural Networks",
 22 |     Christos Louizos, Max Welling (Jun 2017)
 23 |     https://arxiv.org/abs/1703.01961
 24 |     """
 25 | 
 26 |     def __init__(
 27 |         self,
 28 |         n_out,  # output dimensions
 29 |         n_flows_q=2,  # length flow q(z)
 30 |         n_flows_r=2,  # length flow r(z|w)
 31 |         use_z=True,  # use auxiliary random variable z
 32 |         prior_var_w=1,  # variance of weight prior
 33 |         prior_var_b=1,  # variance of bias prior
 34 |         flow_h_sizes=[32],  # hidden size of flow
 35 |         max_std=1.0,  # limit the standard deviation in the forward pass to avoid local minima (e.g. see Louizos et al.)
 36 |         **kwargs,
 37 |     ):
 38 |         self.n_out = n_out
 39 |         self.prior_var_w = prior_var_w
 40 |         self.prior_var_b = prior_var_b
 41 |         self.max_std = max_std
 42 |         self.n_flows_q = n_flows_q
 43 |         self.n_flows_r = n_flows_r
 44 |         self.use_z = use_z
 45 |         self.flow_h_sizes = flow_h_sizes
 46 |         super().__init__(**kwargs)
 47 | 
 48 |     def build(self, input_shape):
 49 |         n_in = self.n_in = input_shape[-1]
 50 |         # initialization according to He et al. (2015)
 51 |         # log variance initialized with N(-9, 0.001) -> e^-9 = 1e-4
 52 |         glorot = tf.keras.initializers.GlorotNormal()  # Xavier normal initializer
 53 |         mean_init, var_init = -3.0, 1e-3  # -9.0, 1e-3
 54 | 
 55 |         # q(w|z): weights and bias separately
 56 |         self.mean_W = tf.Variable(glorot([n_in, self.n_out]))
 57 |         self.log_var_W = tf.Variable(glorot([n_in, self.n_out]) * var_init + mean_init)
 58 | 
 59 |         self.mean_b = tf.Variable(tf.zeros(self.n_out))
 60 |         self.log_var_b = tf.Variable(glorot([self.n_out]) * var_init + mean_init)
 61 | 
 62 |         if self.use_z:
 63 |             # q(z_o): q0_mean has similar function to a dropout rate as it determines the
 64 |             # mean of the multiplicative noise z_i in eq. (4)
 65 |             self.qz_base = NormalReparamMNF([n_in], var_init=var_init, mean_init=mean_init)
 66 | 
 67 |             if n_in > 1:
 68 |                 permutation = tf.cast(np.concatenate((np.arange(n_in / 2, n_in), np.arange(0, n_in / 2))), tf.int32)
 69 | 
 70 |             bijectors_q = []
 71 |             for _ in range(self.n_flows_q):
 72 |                 bijectors_q.append(tfb.Invert(tfb.MaskedAutoregressiveFlow(
 73 |                     shift_and_log_scale_fn=Made(params=2, hidden_units=self.flow_h_sizes, activation="relu"))))
 74 |                 if n_in > 1:
 75 |                     bijectors_q.append(tfp.bijectors.Permute(permutation))
 76 | 
 77 |             self.qz = NormalizingFlowModel(base=self.qz_base, flows=bijectors_q, chain=True, name="qz")
 78 | 
 79 |             # r(z|w): c, b1, b2 to compute the mean and std
 80 |             self.r0_c = tf.Variable(glorot([n_in]))
 81 |             self.r0_b1 = tf.Variable(glorot([n_in]))
 82 |             self.r0_b2 = tf.Variable(glorot([n_in]))
 83 | 
 84 |             bijectors_r = []
 85 |             for _ in range(self.n_flows_r):
 86 |                 bijectors_r.append(tfb.MaskedAutoregressiveFlow(
 87 |                     shift_and_log_scale_fn=Made(params=2, hidden_units=self.flow_h_sizes, activation="relu")))
 88 |                 if n_in > 1:
 89 |                     bijectors_r.append(tfp.bijectors.Permute(permutation))
 90 | 
 91 |             self.flow_r = NormalizingFlow(flows=bijectors_r, chain=True)
 92 | 
 93 |         self.epsilon_w = tf.Variable(tf.random.normal([self.n_out]), trainable=False)
 94 |         self.reset_noise()
 95 | 
 96 |     def reset_noise(self):
 97 |         # sample new epsilon values
 98 |         self.epsilon_w.assign(tf.random.normal([self.n_out]))  # sample epsilon_w
 99 |         if self.use_z:
100 |             self.qz.base.reset_noise()  # sample epsilon_z
101 | 
102 |     def sample_z(self, batch_size, same_noise=False, training=True):
103 |         if self.use_z:
104 |             if training:
105 |                 z_samples, log_prob = self.qz.sample(batch_size, same_noise=same_noise)
106 |             else:  # evaluation without noise
107 |                 z_samples, log_prob = self.qz.sample_no_noise(batch_size)
108 | 
109 |         else:
110 |             z_samples = tf.ones([batch_size, self.n_in])
111 |             log_prob = tf.zeros(batch_size)
112 | 
113 |         return z_samples, log_prob
114 | 
115 |     @tf.function
116 |     def kl_div(self, same_noise=False):
117 |         z, log_q = self.sample_z(1, same_noise=same_noise)
118 |         log_q = tf.reduce_sum(log_q)
119 | 
120 |         weight_mu = tf.reshape(z, shape=(self.n_in, 1)) * self.mean_W
121 | 
122 |         kldiv_weight = 0.5 * tf.reduce_sum((- self.log_var_W + tf.math.exp(self.log_var_W)
123 |                                             + tf.square(weight_mu) - 1))
124 |         kldiv_bias = 0.5 * tf.reduce_sum((- self.log_var_b + tf.math.exp(self.log_var_b)
125 |                                           + tf.square(self.mean_b) - 1))
126 | 
127 |         log_r = 0
128 |         if self.use_z:
129 |             cw_mu = tf.linalg.matvec(tf.transpose(weight_mu), self.r0_c)
130 |             if same_noise:
131 |                 epsilon_w = self.epsilon_w
132 |             else:
133 |                 epsilon_w = tf.random.normal([self.n_out])
134 | 
135 |             cw_var = tf.linalg.matvec(tf.transpose(tf.math.exp(self.log_var_W)), tf.square(self.r0_c))
136 |             cw = tf.math.tanh(cw_mu + tf.math.sqrt(cw_var) * epsilon_w)  # sample W
137 | 
138 |             mu_tilde = tf.reduce_mean(tf.tensordot(cw, self.r0_b1, axes=0), axis=0)
139 |             neg_log_var_tilde = tf.reduce_mean(tf.tensordot(cw, self.r0_b2, axes=0), axis=0)
140 | 
141 |             z0, log_r = self.flow_r.inverse(z)
142 |             log_r = tf.reduce_sum(log_r)
143 | 
144 |             dims = float(z0.shape[-1])
145 |             exponent = tf.squeeze(tf.reduce_sum(tf.square(z0 - mu_tilde) * tf.math.exp(neg_log_var_tilde), axis=1))
146 |             neg_log_det_var = tf.reduce_sum(neg_log_var_tilde)
147 |             log_r += 0.5 * (-dims * tf.math.log(2 * np.pi) + neg_log_det_var - exponent)
148 | 
149 |         kldiv = kldiv_weight + kldiv_bias + log_q - log_r
150 | 
151 |         return kldiv
152 | 
153 |     @tf.function
154 |     def call(self, x, same_noise=False, training=True):
155 |         batch_size = tf.shape(x)[0]
156 |         if training:
157 |             z, _ = self.sample_z(batch_size, same_noise=same_noise)
158 |             mu_out = tf.matmul(x * z, self.mean_W) + self.mean_b
159 | 
160 |             var_W = tf.clip_by_value(tf.exp(self.log_var_W), 0, self.max_std ** 2)
161 |             var_b = tf.clip_by_value(tf.exp(self.log_var_b), 0, self.max_std ** 2)
162 |             # var_W = tf.square(std_W)
163 |             V_h = tf.matmul(tf.square(x), var_W) + var_b
164 | 
165 |             if same_noise:  # use the same epsilon per batch
166 |                 epsilon_w = tf.expand_dims(self.epsilon_w, axis=0)  # expand batch dimension
167 |                 epsilon_w = tf.repeat(epsilon_w, batch_size, axis=0)  # repeat batch dimension
168 |             else:
169 |                 epsilon_w = tf.random.normal(tf.shape(mu_out))  # TODO: test implementation
170 | 
171 |             sigma_out = tf.sqrt(V_h) * epsilon_w
172 | 
173 |             out = mu_out + sigma_out
174 |         else:  # evaluation without noise
175 |             z, _ = self.sample_z(batch_size, training=training)
176 |             mu_out = tf.matmul(x * z, self.mean_W) + self.mean_b
177 |             out = mu_out
178 | 
179 |         return out
180 | 


--------------------------------------------------------------------------------
/bayes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/bayes/__init__.py


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/data/__init__.py


--------------------------------------------------------------------------------
/data/toy_regression.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Toy regression problem.
 3 | 
 4 | Based on the toy regression task introduced in:
 5 | Hernández-Lobato et al. 2015 -
 6 | Probabilistic backpropagation for scalable learning of bayesian neural networks.
 7 | """
 8 | 
 9 | import numpy as np
10 | 
11 | 
12 | class ToyRegressionData():
13 |     """
14 |     Generates toy data for a regression task.
15 |     """
16 |     def __init__(self):
17 |         self.x_lim = [-4, 4]
18 |         self.sigma = 3
19 |         self.eps_loc = 0.0
20 |         self.eps_scale = 1.0
21 | 
22 |     def gen_data(self, n_samples):
23 |         x = np.random.uniform(self.x_lim[0], self.x_lim[1], size=(n_samples, 1)).astype('float32')
24 |         epsilon = np.random.normal(self.eps_loc, self.eps_scale, size=x.shape).astype('float32')
25 |         y = np.power(x, 3) + self.sigma * epsilon
26 | 
27 |         return x, y
28 | 
29 |     def eval_data(self, x):
30 |         return np.power(x, 3)
31 | 


--------------------------------------------------------------------------------
/data/train_data_ian_regression.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/data/train_data_ian_regression.npz


--------------------------------------------------------------------------------
/data/train_data_regression.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/data/train_data_regression.npz


--------------------------------------------------------------------------------
/dqn/Bayes_by_Backprop_DQN.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | from bayes.Bayes_by_Backprop import BayesByBackprop
  5 | 
  6 | tfkl = tf.keras.layers
  7 | 
  8 | 
  9 | class BBB_Backbone(tf.keras.Model):
 10 |     """
 11 |     Backbone of the Deep Q-Network (DQN) with Bayes by Backprop - Blundell et al. (2015).
 12 | 
 13 |     Takes 'num_states' inputs and outputs one Q-value for each action.
 14 |     """
 15 |     def __init__(self, num_states, hidden_units, num_actions, max_std=1.0, log_var_mean_init=-3.0, log_var_init=1e-3):
 16 |         super(BBB_Backbone, self).__init__()
 17 |         self.input_layer = tfkl.InputLayer(input_shape=(num_states,))
 18 | 
 19 |         self.hidden_layers = []
 20 |         for i in hidden_units:
 21 |             self.hidden_layers.append(tfkl.Dense(i, activation='relu', kernel_initializer='RandomNormal'))
 22 |         self.dense_bbb_out = BayesByBackprop(n_out=num_actions, max_std=max_std, log_var_mean_init=log_var_mean_init,
 23 |                                              log_var_init=log_var_init)
 24 | 
 25 |     @tf.function
 26 |     def call(self, inputs, same_noise=False, training=True):
 27 |         out = self.input_layer(inputs)
 28 |         for layer in self.hidden_layers:
 29 |             out = layer(out)
 30 |         out = self.dense_bbb_out(out, same_noise=same_noise, training=training)
 31 |         return out
 32 | 
 33 |     def kl_div(self, same_noise=True):
 34 |         """
 35 |         Compute current KL divergence of the Bayes by Backprop layers.
 36 |         Used as a regularization term during training.
 37 |         """
 38 |         kldiv = self.dense_bbb_out.kl_div(same_noise)
 39 |         return kldiv
 40 | 
 41 |     def reset_noise(self):
 42 |         """
 43 |         Re-sample noise/epsilon parameters of the Bayes by Backprop layers. Required for the case of having the same
 44 |         epsilon parameters across one batch.
 45 |         """
 46 |         self.dense_bbb_out.reset_noise()
 47 | 
 48 |     def print_variance(self):
 49 |         print(f"Variance layer 1: {self.hidden_layers[0].log_var_W}")
 50 | 
 51 | 
 52 | class BBBDQN(tf.Module):
 53 |     """
 54 |     Deep Q-Network utilizing Bayes by Backprop for efficient sampling.
 55 |     """
 56 |     def __init__(self, num_states, num_actions, hidden_units, gamma, max_experiences, min_experiences, batch_size, lr,
 57 |                  alpha):
 58 |         super(BBBDQN, self).__init__()
 59 |         self.num_actions = num_actions
 60 |         self.batch_size = batch_size
 61 |         self.optimizer = tf.keras.optimizers.Adam(lr)
 62 |         self.gamma = gamma
 63 |         self.kl_coeff = alpha*batch_size / max_experiences
 64 |         self.model = BBB_Backbone(num_states, hidden_units, num_actions, max_std=0.5, log_var_mean_init=-3.0,
 65 |                                   log_var_init=1e-3)
 66 |         self.experience = {'s': [], 'a': [], 'r': [], 's_next': [], 'end': []}
 67 |         self.max_experiences = max_experiences
 68 |         self.min_experiences = min_experiences
 69 | 
 70 |     def predict(self, inputs, same_noise=False, training=True):
 71 |         """
 72 |         Get Q-values from backbone network.
 73 |         :param inputs: inputs for the backbone network, e.g. states.
 74 |         :param same_noise: uses the same epsilon parameter for one mini-batch, if set to `True`.
 75 |         :param training: forward pass without stochasticity, if set to `False`.
 76 |         :return: outputs of the backbone network, e.g. num_action Q-values.
 77 |         """
 78 |         return self.model(tf.convert_to_tensor(inputs, tf.float32), same_noise=same_noise, training=training)
 79 | 
 80 |     def train(self, target_net):
 81 |         """
 82 |         Train with experience replay, e.g. replay using a randomized order removing correlation in observation sequence
 83 |         to deal with biased sampling
 84 |         :param target_net: target network.
 85 |         """
 86 |         if len(self.experience['s']) < self.min_experiences:
 87 |             return 0, 0
 88 | 
 89 |         experience_replay_enabled = True  # set False to disable experience replay
 90 |         if experience_replay_enabled:
 91 |             # sample random minibatch of transitions
 92 |             ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
 93 |         else:
 94 |             n = len(self.experience['s'])
 95 |             if n < self.batch_size:
 96 |                 ids = np.full(self.batch_size, n-1)
 97 |             else:
 98 |                 ids = np.arange(max(0, n - self.batch_size), (n - 1), 1)
 99 | 
100 |         states = tf.convert_to_tensor([self.experience['s'][i] for i in ids], tf.float32)
101 |         actions = tf.convert_to_tensor([self.experience['a'][i] for i in ids], tf.float32)
102 |         rewards = tf.convert_to_tensor([self.experience['r'][i] for i in ids], tf.float32)
103 |         states_next = tf.convert_to_tensor([self.experience['s_next'][i] for i in ids], tf.float32)
104 |         ends = tf.convert_to_tensor([self.experience['end'][i] for i in ids], tf.bool)
105 | 
106 |         # compute loss and perform gradient descent
107 |         loss, kl_loss = self.gradient_update(target_net, states, actions, rewards, states_next, ends)
108 | 
109 |         return loss, kl_loss
110 | 
111 |     @tf.function
112 |     def gradient_update(self, target_net, states, actions, rewards, states_next, ends):
113 |         """
114 |         Gradient update with @tf.function decorator for faster performance.
115 |         """
116 |         # make predictions with target network without stochasticity and get sample q for Q-function update
117 |         # sample is different if epoch ends
118 |         double_dqn = True
119 |         if double_dqn:
120 |             next_action = tf.math.argmax(self.predict(states_next, training=False), axis=1)
121 |             q_values = target_net.predict(states_next, training=False)
122 |             q_max = tf.math.reduce_sum(q_values * tf.one_hot(next_action, self.num_actions), axis=1)
123 |         else:
124 |             q_max = tf.math.reduce_max(target_net.predict(states_next, training=False), axis=1)
125 | 
126 |         y = tf.where(ends, rewards, rewards + self.gamma * q_max)
127 | 
128 |         self.model.reset_noise()  # sample new epsilon_w and epsilon_z
129 | 
130 |         # perform gradient descent
131 |         with tf.GradientTape() as tape:
132 |             tape.watch(self.model.trainable_variables)
133 | 
134 |             kl_loss = self.kl_coeff * self.model.kl_div(same_noise=True)
135 |             # Q-values from training network for selected actions
136 |             q_values = self.predict(states, same_noise=True)
137 |             selected_q_values = tf.math.reduce_sum(q_values * tf.one_hot(tf.cast(actions, tf.int32), self.num_actions),
138 |                                                    axis=1)
139 | 
140 |             td_error = tf.math.reduce_sum(tf.square(y - selected_q_values))
141 |             loss = td_error + kl_loss
142 | 
143 |         gradients = tape.gradient(loss, self.model.trainable_variables)
144 |         self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
145 | 
146 |         self.model.reset_noise()  # sample new epsilon_w and epsilon_z
147 | 
148 |         return loss, kl_loss
149 | 
150 |     def get_action(self, states, same_noise=False, training=True):
151 |         """
152 |         Predict action with the Bayes By Backprop network. In each forward pass the weights are sampled from the weight
153 |         posterior distribution. Hence, approximated Thompson sampling is performed. For uncertain weight posterior
154 |         distributions the variance in the sampled values will be higher, leading inherently to more exploration.
155 | 
156 |         :param states: observed states, e.g. [x, dx, th, dth].
157 |         :return: action
158 |         """
159 |         q_values = self.predict(np.atleast_2d(states), same_noise=same_noise, training=training)
160 |         action = np.argmax(q_values)
161 | 
162 |         return action
163 | 
164 |     def add_experience(self, exp):
165 |         """
166 |         Add experience to experience history. If 'max_experiences' exceeded, remove first item and append current
167 |         experience.
168 |         :param exp: experience {'s': prev_observations, 'a': action, 'r': reward, 's_next': observations, 'end': end}.
169 |         """
170 |         if len(self.experience['s']) >= self.max_experiences:
171 |             for key in self.experience.keys():
172 |                 self.experience[key].pop(0)
173 | 
174 |         for key, value in exp.items():
175 |             self.experience[key].append(value)
176 | 
177 |     def copy_weights(self, train_net):
178 |         """
179 |         Copy weights from train network to target network.
180 |         :param train_net: model of train network.
181 |         """
182 |         variables_target = self.model.trainable_variables
183 |         variables_train = train_net.model.trainable_variables
184 | 
185 |         for v_target, v_train in zip(variables_target, variables_train):
186 |             v_target.assign(v_train.numpy())
187 | 


--------------------------------------------------------------------------------
/dqn/Concrete_Dropout_DQN.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | from tensorflow.keras.layers import Dense, Input
  5 | from tensorflow.keras import Model
  6 | from bayes.ConcreteDropout import ConcreteDropout
  7 | 
  8 | 
  9 | def make_backbone(num_states, hidden_units, num_actions, dropout_reg=1e-5, wd=1e-3):
 10 |     """
 11 |     Build a tensorflow keras backbone model utilizing concrete dropout layers.
 12 |     """
 13 |     losses: list = []
 14 |     inp = Input(shape=(num_states,))
 15 |     x = inp
 16 | 
 17 |     for i in hidden_units:
 18 |         x, loss = ConcreteDropout(Dense(i, activation='relu'),
 19 |                                   weight_regularizer=wd, dropout_regularizer=dropout_reg)(x)
 20 |         losses.append(loss)
 21 | 
 22 |     x = Dense(100, activation='relu')(x)
 23 |     out = Dense(num_actions, activation='linear')(x)
 24 |     model = Model(inp, out)
 25 |     model.add_loss(losses)
 26 | 
 27 |     return model
 28 |     
 29 | 
 30 | class DQN(tf.Module):
 31 |     """
 32 |     Deep Q-Network.
 33 |     """
 34 |     def __init__(self, num_states, num_actions, hidden_units, gamma, max_experiences, min_experiences, batch_size, lr):
 35 |         super(DQN, self).__init__()
 36 |         self.num_actions = num_actions
 37 |         self.batch_size = batch_size
 38 |         self.optimizer = tf.optimizers.SGD(lr)
 39 |         self.gamma = gamma
 40 |         self.model = make_backbone(num_states, hidden_units, num_actions)
 41 |         self.experience = {'s': [], 'a': [], 'r': [], 's_next': [], 'end': []}
 42 |         self.max_experiences = max_experiences
 43 |         self.min_experiences = min_experiences
 44 |         self.states_uncertainty = {}
 45 | 
 46 |     def predict(self, inputs, training=True):
 47 |         """
 48 |         Get Q-values from backbone network.
 49 |         :param inputs: inputs for the backbone network, e.g. states.
 50 |                 :param training: forward pass without stochasticity, if set to `False`.
 51 |         :return: outputs of the backbone network, e.g. num_action Q-values.
 52 |         """
 53 |         return self.model(tf.convert_to_tensor(inputs, tf.float32), training=training)
 54 | 
 55 |     def train(self, target_net):
 56 |         """
 57 |         Train with experience replay, e.g. replay using a randomized order removing correlation in observation sequence
 58 |         to deal with biased sampling
 59 |         :param target_net: target network.
 60 |         """
 61 |         if len(self.experience['s']) < self.min_experiences:
 62 |             return 0, 0
 63 | 
 64 |         experience_replay_enabled = True  # set False to disable experience replay
 65 |         if experience_replay_enabled:
 66 |             # sample random minibatch of transitions
 67 |             ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
 68 |         else:
 69 |             n = len(self.experience['s'])
 70 |             if n < self.batch_size:
 71 |                 ids = np.full(self.batch_size, n-1)
 72 |             else:
 73 |                 ids = np.arange(max(0, n - self.batch_size), (n - 1), 1)
 74 | 
 75 |         states = tf.convert_to_tensor([self.experience['s'][i] for i in ids], tf.float32)
 76 |         actions = tf.convert_to_tensor([self.experience['a'][i] for i in ids], tf.float32)
 77 |         rewards = tf.convert_to_tensor([self.experience['r'][i] for i in ids], tf.float32)
 78 |         states_next = tf.convert_to_tensor([self.experience['s_next'][i] for i in ids], tf.float32)
 79 |         ends = tf.convert_to_tensor([self.experience['end'][i] for i in ids], tf.bool)
 80 | 
 81 |         # compute loss and perform gradient descent
 82 |         loss, reg_loss = self.gradient_update(target_net, states, actions, rewards, states_next, ends)
 83 | 
 84 |         return loss, reg_loss
 85 | 
 86 |     @tf.function
 87 |     def gradient_update(self, target_net, states, actions, rewards, states_next, ends):
 88 |         """
 89 |         Gradient update with @tf.function decorator for faster performance.
 90 |         """
 91 |         # make predictions with target network and get sample q for Q-function update, sample is different if epoch end
 92 |         double_dqn = True
 93 |         if double_dqn:
 94 |             next_action = tf.math.argmax(self.predict(states_next), axis=1)
 95 |             q_values = target_net.predict(states_next)
 96 |             q_max = tf.math.reduce_sum(q_values * tf.one_hot(next_action, self.num_actions), axis=1)
 97 |         else:
 98 |             q_max = tf.math.reduce_max(target_net.predict(states_next), axis=1)
 99 | 
100 |         y = tf.where(ends, rewards, rewards + self.gamma * q_max)
101 | 
102 |         # perform gradient descent
103 |         with tf.GradientTape() as tape:
104 |             tape.watch(self.model.trainable_variables)
105 | 
106 |             # Q-values from training network for selected actions
107 |             q_values = self.predict(states)
108 |             selected_q_values = tf.math.reduce_sum(q_values * tf.one_hot(tf.cast(actions, tf.int32), self.num_actions), axis=1)
109 | 
110 |             regularization_loss = tf.reduce_sum(self.model.losses)
111 |             loss_pred = tf.math.reduce_sum(tf.square(y - selected_q_values))  # compute loss
112 |             loss = loss_pred + regularization_loss
113 | 
114 |         gradients = tape.gradient(loss, self.model.trainable_variables)
115 |         self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
116 | 
117 |         return loss, regularization_loss
118 | 
119 |     def get_action(self, states, training=True):
120 |         """
121 |         Predict action with the Concrete Dropout network. Keeping Concrete Dropout enabled in the forward pass forms a
122 |         Bayesian approximation. Hence, approximated Thompson sampling is performed.
123 | 
124 |         :param states: observed states, e.g. [x, dx, th, dth].
125 |         :param training: forward pass without stochasticity, if set to `False`.
126 |         :return: action
127 |         """
128 |         q_values = self.predict(np.atleast_2d(states), training)
129 |         action = np.argmax(q_values)
130 | 
131 |         return action
132 | 
133 |     def add_experience(self, exp):
134 |         """
135 |         Add experience to experience history. If 'max_experiences' exceeded, remove first item and append current
136 |         experience.
137 |         :param exp: experience {'s': prev_observations, 'a': action, 'r': reward, 's_next': observations, 'end': end}.
138 |         """
139 |         if len(self.experience['s']) >= self.max_experiences:
140 |             for key in self.experience.keys():
141 |                 self.experience[key].pop(0)
142 | 
143 |         for key, value in exp.items():
144 |             self.experience[key].append(value)
145 | 
146 |     def copy_weights(self, train_net):
147 |         """
148 |         Copy weights from train network to target network.
149 |         :param train_net: model of train network.
150 |         """
151 |         variables_target = self.model.trainable_variables
152 |         variables_train = train_net.model.trainable_variables
153 | 
154 |         for v_target, v_train in zip(variables_target, variables_train):
155 |             v_target.assign(v_train.numpy())
156 | 


--------------------------------------------------------------------------------
/dqn/DQN.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | tfkl = tf.keras.layers
  5 | 
  6 | 
  7 | class Backbone(tf.keras.Model):
  8 |     """
  9 |     Backbone of the Deep Q-Network (DQN) that approximates the Q-function.
 10 |     Takes 'num_states' inputs and outputs one Q-value for each action.
 11 |     """
 12 |     def __init__(self, num_states, hidden_units, num_actions):
 13 |         super(Backbone, self).__init__()
 14 |         self.input_layer = tf.keras.layers.InputLayer(input_shape=(num_states,))
 15 | 
 16 |         self.hidden_layers = []
 17 |         for i in hidden_units:
 18 |             self.hidden_layers.append(tf.keras.layers.Dense(
 19 |                 i, activation='relu', kernel_initializer='RandomNormal'))
 20 | 
 21 |         self.output_layer = tf.keras.layers.Dense(
 22 |             num_actions, activation='linear', kernel_initializer='RandomNormal')
 23 | 
 24 |     @tf.function
 25 |     def call(self, inputs):
 26 |         z = self.input_layer(inputs)
 27 |         for layer in self.hidden_layers:
 28 |             z = layer(z)
 29 |         output = self.output_layer(z)
 30 |         return output
 31 | 
 32 | 
 33 | class DQN(tf.Module):
 34 |     """
 35 |     Deep Q-Network.
 36 |     """
 37 |     def __init__(self, num_states, num_actions, hidden_units, gamma, max_experiences, min_experiences, batch_size, lr):
 38 |         super(DQN, self).__init__()
 39 |         self.num_actions = num_actions
 40 |         self.batch_size = batch_size
 41 |         self.optimizer = tf.keras.optimizers.Adam(lr)
 42 |         self.gamma = gamma
 43 |         self.model = Backbone(num_states, hidden_units, num_actions)
 44 |         self.experience = {'s': [], 'a': [], 'r': [], 's_next': [], 'end': []}
 45 |         self.max_experiences = max_experiences
 46 |         self.min_experiences = min_experiences
 47 | 
 48 |     def predict(self, inputs):
 49 |         """
 50 |         Get Q-values from backbone network.
 51 |         :param inputs: inputs for the backbone network, e.g. states.
 52 |         :return: outputs of the backbone network, e.g. num_action Q-values.
 53 |         """
 54 |         return self.model(tf.convert_to_tensor(inputs, tf.float32))
 55 | 
 56 |     def train(self, target_net):
 57 |         """
 58 |         Train with experience replay, e.g. replay using a randomized order removing correlation in observation sequence
 59 |         to deal with biased sampling
 60 |         :param target_net: target network.
 61 |         """
 62 |         if len(self.experience['s']) < self.min_experiences:
 63 |             return 0
 64 | 
 65 |         experience_replay_enabled = True  # set False to disable experience replay
 66 |         if experience_replay_enabled:
 67 |             # sample random minibatch of transitions
 68 |             ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
 69 |         else:
 70 |             n = len(self.experience['s'])
 71 |             if n < self.batch_size:
 72 |                 ids = np.full(self.batch_size, n-1)
 73 |             else:
 74 |                 ids = np.arange(max(0, n - self.batch_size), (n - 1), 1)
 75 | 
 76 |         states = tf.convert_to_tensor([self.experience['s'][i] for i in ids], tf.float32)
 77 |         actions = tf.convert_to_tensor([self.experience['a'][i] for i in ids], tf.float32)
 78 |         rewards = tf.convert_to_tensor([self.experience['r'][i] for i in ids], tf.float32)
 79 |         states_next = tf.convert_to_tensor([self.experience['s_next'][i] for i in ids], tf.float32)
 80 |         ends = tf.convert_to_tensor([self.experience['end'][i] for i in ids], tf.bool)
 81 | 
 82 |         # compute loss and perform gradient descent
 83 |         loss = self.gradient_update(target_net, states, actions, rewards, states_next, ends)
 84 | 
 85 |         return loss
 86 | 
 87 |     @tf.function
 88 |     def gradient_update(self, target_net, states, actions, rewards, states_next, ends):
 89 |         """
 90 |         Gradient update with @tf.function decorator for faster performance.
 91 |         """
 92 |         # make predictions with target network and get sample q for Q-function update, sample is different if epoch ends
 93 |         target_network_enabled = True  # set False to disable target network
 94 |         double_dqn = True
 95 |         if target_network_enabled:
 96 |             if double_dqn:
 97 |                 next_action = tf.math.argmax(self.predict(states_next), axis=1)
 98 |                 q_values = target_net.predict(states_next)
 99 |                 q_max = tf.math.reduce_sum(q_values * tf.one_hot(next_action, self.num_actions), axis=1)
100 |             else:
101 |                 q_max = tf.math.reduce_max(target_net.predict(states_next), axis=1)
102 |         else:
103 |             q_max = tf.math.reduce_max(self.predict(states_next), axis=1)
104 |         y = tf.where(ends, rewards, rewards + self.gamma * q_max)
105 | 
106 |         # perform gradient descent
107 |         with tf.GradientTape() as tape:
108 |             tape.watch(self.model.trainable_variables)
109 | 
110 |             # Q-values from training network for selected actions
111 |             q_values = self.predict(states)
112 |             selected_q_values = tf.math.reduce_sum(q_values * tf.one_hot(tf.cast(actions, tf.int32), self.num_actions), axis=1)
113 | 
114 |             loss = tf.math.reduce_sum(tf.square(y - selected_q_values))  # compute loss
115 | 
116 |         gradients = tape.gradient(loss, self.model.trainable_variables)
117 |         self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
118 | 
119 |         return loss
120 | 
121 |     def get_action(self, states, epsilon=0):
122 |         """
123 |         Choose random action with probability 'epsilon', otherwise choose action with greedy policy, e.g. action that
124 |         maximizes the Q-value function.
125 |         :param states: observed states, e.g. [x, dx, th, dth].
126 |         :param epsilon: probability of random action.
127 |         :return: action
128 |         """
129 |         # take random action with probability 'epsilon'
130 |         if np.random.random() < epsilon:
131 |             action = np.random.choice(self.num_actions)
132 |             return action
133 | 
134 |         # else take action that maximizes the Q-function
135 |         else:
136 |             q_values = self.predict(np.atleast_2d(states))
137 |             action = np.argmax(q_values)
138 |             return action
139 | 
140 |     def add_experience(self, exp):
141 |         """
142 |         Add experience to experience history. If 'max_experiences' exceeded, remove first item and append current
143 |         experience.
144 |         :param exp: experience {'s': prev_observations, 'a': action, 'r': reward, 's_next': observations, 'end': end}.
145 |         """
146 |         if len(self.experience['s']) >= self.max_experiences:
147 |             for key in self.experience.keys():
148 |                 self.experience[key].pop(0)
149 | 
150 |         for key, value in exp.items():
151 |             self.experience[key].append(value)
152 | 
153 |     def copy_weights(self, train_net):
154 |         """
155 |         Copy weights from train network to target network.
156 |         :param train_net: model of train network.
157 |         """
158 |         variables_target = self.model.trainable_variables
159 |         variables_train = train_net.model.trainable_variables
160 | 
161 |         for v_target, v_train in zip(variables_target, variables_train):
162 |             v_target.assign(v_train.numpy())
163 | 


--------------------------------------------------------------------------------
/dqn/MC_Dropout_DQN.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from tensorflow.keras.layers import InputLayer
  4 | 
  5 | tfkl = tf.keras.layers
  6 | 
  7 | 
  8 | class Backbone(tf.keras.Model):
  9 |     """
 10 |     Backbone of the Deep Q-Network (DQN) with Bayesian fully-connected layers that approximates the Q-function.
 11 |     The Bayesian fully-connected layers utilize Dropout as Bayesian approximation according to
 12 |     "Dropout as a Bayesian Approximation: Representing Model Uncertainty in Deep Learning"
 13 |     - Gal and Ghahramani (2015): https://arxiv.org/abs/1506.02142.
 14 | 
 15 |     Takes 'num_states' inputs and outputs one Q-value for each action.
 16 |     """
 17 |     def __init__(self, num_states, hidden_units, dropout_rate, num_actions, N):
 18 |         super(Backbone, self).__init__()
 19 | 
 20 |         self.N = N  # data points
 21 |         lengthscale = 1e-2
 22 |         tau = 1.0
 23 |         reg = lengthscale**2 * (1 - dropout_rate) / (2.0 * self.N * tau)
 24 | 
 25 |         self.hidden_layers = []
 26 |         self.input_layer = InputLayer(input_shape=(num_states,))
 27 |         for i in hidden_units:
 28 |             self.hidden_layers.append(tfkl.Dense(i, activation='relu', kernel_initializer='RandomNormal',
 29 |                                                             kernel_regularizer=tf.keras.regularizers.L1L2(l2=reg)))
 30 | 
 31 |         self.hidden_layers.append(tfkl.Dropout(dropout_rate))  # only one dropout layer before the output
 32 | 
 33 |         self.output_layer = tfkl.Dense(num_actions, activation='linear', kernel_initializer='RandomNormal')
 34 | 
 35 |     @tf.function
 36 |     def call(self, inputs):
 37 |         out = self.input_layer(inputs)
 38 | 
 39 |         for layer in self.hidden_layers:
 40 |             if isinstance(layer, tfkl.Dropout):
 41 |                 out = layer(out, training=True)
 42 |             else:
 43 |                 out = layer(out)
 44 |         out = self.output_layer(out)
 45 |         return out
 46 | 
 47 | 
 48 | class DQN(tf.Module):
 49 |     """
 50 |     Deep Q-Network utilizing Dropout as Bayesian approximation for efficient sampling.
 51 |     """
 52 |     def __init__(self, num_states, num_actions, hidden_units, gamma, max_experiences, min_experiences, batch_size, lr, dropout_rate):
 53 |         super(DQN, self).__init__()
 54 |         self.num_actions = num_actions
 55 |         self.batch_size = batch_size
 56 |         self.optimizer = tf.keras.optimizers.Adam(lr)
 57 |         self.gamma = gamma
 58 |         self.model = Backbone(num_states, hidden_units, dropout_rate, num_actions, max_experiences)
 59 |         self.experience = {'s': [], 'a': [], 'r': [], 's_next': [], 'end': []}
 60 |         self.max_experiences = max_experiences
 61 |         self.min_experiences = min_experiences
 62 |         self.states_uncertainty = {}
 63 | 
 64 |     def predict(self, inputs, training=True):
 65 |         """
 66 |         Get Q-values from backbone network.
 67 |         :param inputs: inputs for the backbone network, e.g. states.
 68 |         :param training: forward pass without stochasticity, if set to `False`.
 69 |         :return: outputs of the backbone network, e.g. num_action Q-values.
 70 |         """
 71 |         return self.model(tf.convert_to_tensor(inputs, tf.float32), training=training)
 72 | 
 73 |     def train(self, target_net):
 74 |         """
 75 |         Train with experience replay, e.g. replay using a randomized order removing correlation in observation sequence
 76 |         to deal with biased sampling
 77 |         :param target_net: target network.
 78 |         """
 79 |         if len(self.experience['s']) < self.min_experiences:
 80 |             return 0, 0
 81 | 
 82 |         experience_replay_enabled = True  # set False to disable experience replay
 83 |         if experience_replay_enabled:
 84 |             # sample random minibatch of transitions
 85 |             ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
 86 |         else:
 87 |             n = len(self.experience['s'])
 88 |             if n < self.batch_size:
 89 |                 ids = np.full(self.batch_size, n-1)
 90 |             else:
 91 |                 ids = np.arange(max(0, n - self.batch_size), (n - 1), 1)
 92 | 
 93 |         states = tf.convert_to_tensor([self.experience['s'][i] for i in ids], tf.float32)
 94 |         actions = tf.convert_to_tensor([self.experience['a'][i] for i in ids], tf.float32)
 95 |         rewards = tf.convert_to_tensor([self.experience['r'][i] for i in ids], tf.float32)
 96 |         states_next = tf.convert_to_tensor([self.experience['s_next'][i] for i in ids], tf.float32)
 97 |         ends = tf.convert_to_tensor([self.experience['end'][i] for i in ids], tf.bool)
 98 | 
 99 |         # compute loss and perform gradient descent
100 |         loss, reg_loss = self.gradient_update(target_net, states, actions, rewards, states_next, ends)
101 | 
102 |         return loss, reg_loss
103 | 
104 |     @tf.function
105 |     def gradient_update(self, target_net, states, actions, rewards, states_next, ends):
106 |         """
107 |         Gradient update with @tf.function decorator for faster performance.
108 |         """
109 |         # make predictions with target network and get sample q for Q-function update, sample is different if epoch end
110 |         double_dqn = True
111 |         if double_dqn:
112 |             next_action = tf.math.argmax(self.predict(states_next), axis=1)
113 |             q_values = target_net.predict(states_next)
114 |             q_max = tf.math.reduce_sum(q_values * tf.one_hot(next_action, self.num_actions), axis=1)
115 |         else:
116 |             q_max = tf.math.reduce_max(target_net.predict(states_next), axis=1)
117 | 
118 |         y = tf.where(ends, rewards, rewards + self.gamma * q_max)
119 | 
120 |         # perform gradient descent
121 |         with tf.GradientTape() as tape:
122 |             tape.watch(self.model.trainable_variables)
123 | 
124 |             # Q-values from training network for selected actions
125 |             q_values = self.predict(states)
126 |             selected_q_values = tf.math.reduce_sum(q_values * tf.one_hot(tf.cast(actions, tf.int32), self.num_actions), axis=1)
127 | 
128 |             regularization_loss = tf.reduce_sum(self.model.losses)
129 |             loss_pred = tf.math.reduce_sum(tf.square(y - selected_q_values))  # compute loss
130 |             loss = loss_pred + regularization_loss
131 | 
132 |         gradients = tape.gradient(loss, self.model.trainable_variables)
133 |         self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
134 | 
135 |         return loss, regularization_loss
136 | 
137 |     def get_action(self, states, training=True):
138 |         """
139 |         Predict action with the MC Dropout network. Keeping MC Dropout enabled in the forward pass forms a Bayesian
140 |         approximation. Hence, approximated Thompson sampling is performed.
141 | 
142 |         :param states: observed states, e.g. [x, dx, th, dth].
143 |         :param training: forward pass without stochasticity, if set to `False`.
144 |         :return: action
145 |         """
146 |         q_values = self.predict(np.atleast_2d(states), training)
147 |         action = np.argmax(q_values)
148 |         return action
149 | 
150 |     def add_experience(self, exp):
151 |         """
152 |         Add experience to experience history. If 'max_experiences' exceeded, remove first item and append current
153 |         experience.
154 |         :param exp: experience {'s': prev_observations, 'a': action, 'r': reward, 's_next': observations, 'end': end}.
155 |         """
156 |         if len(self.experience['s']) >= self.max_experiences:
157 |             for key in self.experience.keys():
158 |                 self.experience[key].pop(0)
159 | 
160 |         for key, value in exp.items():
161 |             self.experience[key].append(value)
162 | 
163 |     def copy_weights(self, train_net):
164 |         """
165 |         Copy weights from train network to target network.
166 |         :param train_net: model of train network.
167 |         """
168 |         variables_target = self.model.trainable_variables
169 |         variables_train = train_net.model.trainable_variables
170 | 
171 |         for v_target, v_train in zip(variables_target, variables_train):
172 |             v_target.assign(v_train.numpy())
173 | 


--------------------------------------------------------------------------------
/dqn/MNF_DQN.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | from bayes.MNF import DenseMNF
  5 | 
  6 | tfkl = tf.keras.layers
  7 | 
  8 | 
  9 | class MNFBackbone(tf.Module):
 10 |     """
 11 |     Backbone of the Deep Q-Network (DQN) with Bayesian fully-connected layers that approximates the Q-function.
 12 |     The Bayesian fully-connected layers utilize multiplicative normalizing flows by Christos Louizos, Max Welling
 13 |     (Jun 2017).
 14 | 
 15 |     Takes 'num_states' inputs and outputs one Q-value for each action.
 16 |     """
 17 |     def __init__(self, num_states, hidden_units, num_actions, use_z=True, max_std=1.0):
 18 |         super(MNFBackbone, self).__init__()
 19 |         self.input_layer = tfkl.InputLayer(input_shape=(num_states,))
 20 | 
 21 |         self.hidden_layers = []
 22 |         for i in hidden_units:
 23 |             self.hidden_layers.append(tfkl.Dense(i, activation='relu', kernel_initializer='RandomNormal'))
 24 |         self.dense_mnf_out = DenseMNF(n_out=num_actions, use_z=use_z, max_std=max_std, n_flows_q=2, n_flows_r=2,
 25 |                                       flow_h_sizes=[32])
 26 | 
 27 |     @tf.function
 28 |     def __call__(self, inputs, same_noise=False, training=True):
 29 |         out = self.input_layer(inputs)
 30 |         for layer in self.hidden_layers:
 31 |             out = layer(out)
 32 |         out = self.dense_mnf_out(out, same_noise=same_noise, training=training)
 33 |         return out
 34 | 
 35 |     def kl_div(self, same_noise=True):
 36 |         """
 37 |         Compute current KL-divergence of all Bayesian layers.
 38 |         Can be used as a regularization term during training.
 39 |         """
 40 |         kldiv = self.dense_mnf_out.kl_div(same_noise)
 41 |         return kldiv
 42 | 
 43 |     def reset_noise(self):
 44 |         """
 45 |         Re-sample noise/epsilon parameters of the MNF layers. Required for the case of having the same epsilon
 46 |         parameters across one batch.
 47 |         """
 48 |         self.dense_mnf_out.reset_noise()
 49 | 
 50 |     def print_variance(self):
 51 |         print(f"Variance layer 1: {self.hidden_layers[0].log_var_W}")
 52 | 
 53 | 
 54 | class MNFDQN(tf.Module):
 55 |     """
 56 |     Deep Q-Network utilizing Multiplicative Normalizing Flows for efficient sampling.
 57 |     """
 58 |     def __init__(self, num_states, num_actions, hidden_units, gamma, max_experiences, min_experiences, batch_size, lr,
 59 |                  alpha):
 60 |         super(MNFDQN, self).__init__()
 61 |         self.num_actions = num_actions
 62 |         self.batch_size = batch_size
 63 |         self.optimizer = tf.keras.optimizers.Adam(lr)
 64 |         self.gamma = gamma
 65 |         self.kl_coeff = alpha*batch_size / max_experiences
 66 |         self.model = MNFBackbone(num_states, hidden_units, num_actions, use_z=True, max_std=0.5)
 67 |         self.experience = {'s': [], 'a': [], 'r': [], 's_next': [], 'end': []}
 68 |         self.max_experiences = max_experiences
 69 |         self.min_experiences = min_experiences
 70 | 
 71 |     def predict(self, inputs, same_noise=False, training=True):
 72 |         """
 73 |         Get Q-values from backbone network.
 74 |         :param inputs: inputs for the backbone network, e.g. states.
 75 |         :param same_noise: uses the same epsilon parameter, if set to `True`.
 76 |         :param training: forward pass without stochasticity, if set to `False`.
 77 |         :return: outputs of the backbone network, e.g. num_action Q-values.
 78 |         """
 79 |         return self.model(tf.convert_to_tensor(inputs, tf.float32), same_noise=same_noise, training=training)
 80 | 
 81 |     def train(self, target_net):
 82 |         """
 83 |         Train with experience replay, e.g. replay using a randomized order removing correlation in observation sequence
 84 |         to deal with biased sampling
 85 |         :param target_net: target network.
 86 |         """
 87 |         if len(self.experience['s']) < self.min_experiences:
 88 |             return 0, 0
 89 | 
 90 |         experience_replay_enabled = True  # set False to disable experience replay
 91 |         if experience_replay_enabled:
 92 |             # sample random minibatch of transitions
 93 |             ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
 94 |         else:
 95 |             n = len(self.experience['s'])
 96 |             if n < self.batch_size:
 97 |                 ids = np.full(self.batch_size, n-1)
 98 |             else:
 99 |                 ids = np.arange(max(0, n - self.batch_size), (n - 1), 1)
100 | 
101 |         states = tf.convert_to_tensor([self.experience['s'][i] for i in ids], tf.float32)
102 |         actions = tf.convert_to_tensor([self.experience['a'][i] for i in ids], tf.float32)
103 |         rewards = tf.convert_to_tensor([self.experience['r'][i] for i in ids], tf.float32)
104 |         states_next = tf.convert_to_tensor([self.experience['s_next'][i] for i in ids], tf.float32)
105 |         ends = tf.convert_to_tensor([self.experience['end'][i] for i in ids], tf.bool)
106 | 
107 |         # compute loss and perform gradient descent
108 |         loss, kl_loss = self.gradient_update(target_net, states, actions, rewards, states_next, ends)
109 | 
110 |         return loss, kl_loss
111 | 
112 |     @tf.function
113 |     def gradient_update(self, target_net, states, actions, rewards, states_next, ends):
114 |         """
115 |         Gradient update with @tf.function decorator for faster performance.
116 |         """
117 |         # make predictions with target network without stochasticity and get sample q for Q-function update
118 |         # sample is different if epoch ends
119 |         double_dqn = True
120 |         if double_dqn:
121 |             next_action = tf.math.argmax(self.predict(states_next, training=False), axis=1)
122 |             q_values = target_net.predict(states_next, training=False)
123 |             q_max = tf.math.reduce_sum(q_values * tf.one_hot(next_action, self.num_actions), axis=1)
124 |         else:
125 |             q_max = tf.math.reduce_max(target_net.predict(states_next, training=False), axis=1)
126 | 
127 |         y = tf.where(ends, rewards, rewards + self.gamma * q_max)
128 | 
129 |         self.model.reset_noise()  # sample new epsilon_w and epsilon_z
130 | 
131 |         # perform gradient descent
132 |         with tf.GradientTape() as tape:
133 |             tape.watch(self.model.trainable_variables)
134 | 
135 |             kl_loss = self.kl_coeff * self.model.kl_div(same_noise=True)
136 |             # Q-values from training network for selected actions
137 |             q_values = self.predict(states, same_noise=True)
138 |             selected_q_values = tf.math.reduce_sum(q_values * tf.one_hot(tf.cast(actions, tf.int32), self.num_actions),
139 |                                                    axis=1)
140 | 
141 |             td_error = tf.math.reduce_sum(tf.square(y - selected_q_values))
142 |             loss = td_error + kl_loss
143 | 
144 |         gradients = tape.gradient(loss, self.model.trainable_variables)
145 |         self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
146 | 
147 |         self.model.reset_noise()  # sample new epsilon_w and epsilon_z
148 | 
149 |         return loss, kl_loss
150 | 
151 |     def get_action(self, states, same_noise=False, training=True):
152 |         """
153 |         Predict action with the MNF network. In each forward pass the weights are sampled from the weight posterior
154 |         distribution. Hence, approximated Thompson sampling is performed. For uncertain weight posterior distributions
155 |         the variance in the sampled values will be higher, leading inherently to more exploration
156 | 
157 |         :param states: observed states, e.g. [x, dx, th, dth].
158 |         :param same_noise: uses the same epsilon parameter, if set to `True`.
159 |         :param training: forward pass without stochasticity, if set to `False`.
160 |         :return: action
161 |         """
162 |         q_values = self.predict(np.atleast_2d(states), same_noise=same_noise, training=training)
163 |         action = np.argmax(q_values)
164 | 
165 |         return action
166 | 
167 |     def add_experience(self, exp):
168 |         """
169 |         Add experience to experience history. If 'max_experiences' exceeded, remove first item and append current
170 |         experience.
171 |         :param exp: experience {'s': prev_observations, 'a': action, 'r': reward, 's_next': observations, 'end': end}.
172 |         """
173 |         if len(self.experience['s']) >= self.max_experiences:
174 |             for key in self.experience.keys():
175 |                 self.experience[key].pop(0)
176 | 
177 |         for key, value in exp.items():
178 |             self.experience[key].append(value)
179 | 
180 |     def copy_weights(self, train_net):
181 |         """
182 |         Copy weights from train network to target network.
183 |         :param train_net: model of train network.
184 |         """
185 |         variables_target = self.model.trainable_variables
186 |         variables_train = train_net.model.trainable_variables
187 | 
188 |         for v_target, v_train in zip(variables_target, variables_train):
189 |             v_target.assign(v_train.numpy())
190 | 


--------------------------------------------------------------------------------
/dqn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/dqn/__init__.py


--------------------------------------------------------------------------------
/dqn/train.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import os
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | 
  6 | from gym import wrappers
  7 | 
  8 | DQN = "dqn"
  9 | MC_DROPOUT = "mc_dropout"
 10 | CONCRETE_DROPOUT = "concrete_dropout"
 11 | BAYES_BY_BACKPROP = "bayes_by_backprop"
 12 | MNF = "mnf"
 13 | ALLOWED_NETWORK_CONFIGS = {DQN, MC_DROPOUT, CONCRETE_DROPOUT, BAYES_BY_BACKPROP, MNF}
 14 | BAYES_NETWORK_CONFIGS = {MC_DROPOUT, CONCRETE_DROPOUT, BAYES_BY_BACKPROP, MNF}
 15 | 
 16 | 
 17 | def train_episode(env, train_net, target_net, config):
 18 |     rewards = 0
 19 |     reward_list = []
 20 |     losses = []
 21 |     kl_losses = []
 22 |     state = env.reset()
 23 |     algorithm = config["algorithm"]
 24 | 
 25 |     for step in range(1, config["step_limit"]+1):
 26 |         if config["env_render"] == True:
 27 |             env.render()
 28 | 
 29 |         # choose next action base on network
 30 |         if algorithm == DQN:
 31 |             action = train_net.get_action(state, epsilon=config["epsilon"])
 32 |         elif algorithm == BAYES_BY_BACKPROP:
 33 |             action = train_net.get_action(state, same_noise=True)
 34 |         elif algorithm == MNF:
 35 |             action = train_net.get_action(state, same_noise=True)
 36 |         elif algorithm == MC_DROPOUT:
 37 |             action = train_net.get_action(state, training=True)
 38 |         elif algorithm == CONCRETE_DROPOUT:
 39 |             action = train_net.get_action(state, training=True)
 40 | 
 41 |         prev_state = state  # store old observations
 42 |         state, reward, done, _ = env.step(action)  # execute action, observe reward and next state
 43 |         rewards = rewards + reward
 44 | 
 45 |         if step == (config["step_limit"]):
 46 |             done = True
 47 | 
 48 |         # store transitions
 49 |         exp = {'s': prev_state, 'a': action, 'r': reward, 's_next': state, 'end': done}
 50 |         train_net.add_experience(exp)
 51 | 
 52 |         if step % config["gradient_steps"] == 0:
 53 |             if algorithm in BAYES_NETWORK_CONFIGS:
 54 |                 loss, kl_loss = train_net.train(target_net)
 55 |                 kl_losses.append(kl_loss)
 56 |                 losses.append(loss)
 57 |             else:
 58 |                 loss = train_net.train(target_net)
 59 |                 losses.append(loss)
 60 | 
 61 |         # copy weights every 'copy_steps' to target network
 62 |         if step % config["copy_steps"] == 0:
 63 |             target_net.copy_weights(train_net)
 64 | 
 65 |         if done:
 66 |             state = env.reset()
 67 |             reward_list.append(rewards)
 68 |             rewards = 0
 69 | 
 70 |     mean_loss = np.mean(losses)
 71 | 
 72 |     if algorithm in BAYES_NETWORK_CONFIGS:
 73 |         mean_kl = np.mean(kl_losses)
 74 |         return reward_list[0], step, mean_loss, mean_kl
 75 | 
 76 |     else:
 77 |         return reward_list[0], step, mean_loss
 78 | 
 79 | 
 80 | def test_policy(env, train_net, config, video=False):
 81 |     if video:
 82 |         env = wrappers.Monitor(env, os.path.join(os.getcwd(), "videos"), force=True)
 83 | 
 84 |     rewards = 0
 85 |     state = env.reset()
 86 |     algorithm = config["algorithm"]
 87 | 
 88 |     for step in range(config["step_limit"]):
 89 |         if config["env_render"] == True:
 90 |             env.render()
 91 | 
 92 |         # choose next action base on network
 93 |         if algorithm == DQN:
 94 |             action = train_net.get_action(state, epsilon=0)
 95 |         elif algorithm == BAYES_BY_BACKPROP:
 96 |             action = train_net.get_action(state, training=False)
 97 |         elif algorithm == MNF:
 98 |             action = train_net.get_action(state, training=False)
 99 |         elif algorithm == MC_DROPOUT:
100 |             action = train_net.get_action(state, training=False)
101 |         elif algorithm == CONCRETE_DROPOUT:
102 |             action = train_net.get_action(state, training=False)
103 | 
104 |         state, reward, done, _ = env.step(action)
105 |         rewards = rewards + reward
106 | 
107 |         if step == (config["step_limit"] - 1):
108 |             done = True
109 | 
110 |         if done:
111 |             break
112 | 
113 |     return rewards, step
114 | 
115 | 
116 | def train_dqn(config, env, train_net, target_net, run_id):
117 |     algorithm = config["algorithm"]
118 |     if algorithm not in ALLOWED_NETWORK_CONFIGS:
119 |         raise AssertionError(f"'algorithm' has to be one of {ALLOWED_NETWORK_CONFIGS} but is set to {algorithm}.")
120 | 
121 |     epsilon = config["epsilon"]
122 |     n_epochs = config["epochs_num"]
123 |     train_losses = np.empty(n_epochs)
124 |     train_kl = np.empty(n_epochs)
125 |     train_rewards = np.empty(n_epochs)
126 | 
127 |     test_rewards = [0]
128 |     test_iterations = [0]
129 |     mean_kl = 0
130 |     total_steps = 0
131 | 
132 |     # initialize train and target net
133 |     state = env.reset()
134 |     _ = train_net.get_action(state)
135 |     _ = target_net.get_action(state)
136 |     if algorithm in {BAYES_BY_BACKPROP, MNF}:
137 |         train_net.model.kl_div(same_noise=True)
138 |         target_net.model.kl_div(same_noise=True)
139 |     target_net.copy_weights(train_net)  # initialize with same weights
140 | 
141 |     for n in range(n_epochs):
142 |         env.reset()  # initialize sequence
143 | 
144 |         if algorithm == DQN:
145 |             epsilon = max(config["epsilon_min"], epsilon * config["epsilon_decay"])
146 |             train_reward, steps, mean_loss = train_episode(env, train_net, target_net, config)
147 | 
148 |         elif algorithm == BAYES_BY_BACKPROP:
149 |             if n > 0:
150 |                 train_net.model.reset_noise()
151 |             train_reward, steps, mean_loss, mean_kl = train_episode(env, train_net, target_net, config)
152 |             train_kl[n] = mean_kl
153 | 
154 |         elif algorithm == MNF:
155 |             if n > 0:
156 |                 train_net.model.reset_noise()
157 |             train_reward, steps, mean_loss, mean_kl = train_episode(env, train_net, target_net, config)
158 |             train_kl[n] = mean_kl
159 | 
160 |         elif algorithm == MC_DROPOUT:
161 |             train_reward, steps, mean_loss, mean_kl = train_episode(env, train_net, target_net, config)
162 | 
163 |         elif algorithm == CONCRETE_DROPOUT:
164 |             train_reward, steps, mean_loss, mean_kl = train_episode(env, train_net, target_net, config)
165 | 
166 |         total_steps = total_steps + steps
167 |         train_losses[n] = mean_loss
168 |         train_rewards[n] = train_reward
169 |         avg_train_rewards = train_rewards[max(0, n - 100):(n + 1)].mean()  # average reward of the last 100 episodes
170 | 
171 |         if n % config["test_episodes"] == 0:
172 |             if n == 0:  # first episode is burn in phase
173 |                 total_reward = 0
174 |                 iterations = 0
175 |             else:
176 |                 total_reward, iterations = test_policy(env, train_net, config)
177 | 
178 |             test_rewards.append(total_reward)
179 |             test_iterations.append(total_steps)
180 | 
181 |             print(f"Epoch: {n}, reward: {total_reward}, loss: {mean_loss}, kl-loss: {mean_kl} iterations: {iterations}"
182 |                   f", epsilon: {epsilon}, avg reward (last 100): {avg_train_rewards}")
183 | 
184 |     if config["plot_avg_reward"]:
185 |         directory = f"results/plots/{algorithm}/"
186 |         if not os.path.exists(directory):
187 |             os.makedirs(directory)
188 | 
189 |         plt.figure()
190 |         filename = f"AccumulatedReward_{algorithm}_{str(run_id)}.pdf"
191 |         plt.plot(test_iterations, test_rewards, linewidth=0.75)
192 |         plt.xlabel("Iterations")
193 |         plt.legend(["Accumulated reward"])
194 |         plt.tight_layout()
195 |         plt.savefig(os.path.join(directory, filename))
196 |         plt.close()
197 | 
198 |         plt.figure()
199 |         filename = f"Loss_{algorithm}_{str(run_id)}.pdf"
200 |         plt.plot(range(config["epochs_num"]), train_losses, linewidth=0.75)
201 |         plt.plot(range(config["epochs_num"]), train_kl, linewidth=0.75)
202 |         plt.xlabel("Iterations")
203 |         plt.legend(["Mean loss", "Mean kl-loss"])
204 |         plt.tight_layout()
205 |         plt.savefig(os.path.join(directory, filename))
206 |         plt.close()
207 | 
208 |     if config["save"]:
209 |         current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
210 |         save_dir = f"results/{config['env_name']}/{algorithm}/" + str(run_id) + '_' + current_time
211 |         if not os.path.exists(save_dir):
212 |             os.makedirs(save_dir)
213 |         np.savez(save_dir, test_rewards=test_rewards, test_iterations=test_iterations)
214 | 


--------------------------------------------------------------------------------
/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/envs/__init__.py


--------------------------------------------------------------------------------
/envs/env_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utils file for OpenAi gym envrionments.
 3 | """
 4 | 
 5 | 
 6 | class WrapFrameSkip():
 7 |     """
 8 |     Wraps OpenAi gym environments to skip frames. This is also know as action repeat.
 9 |     """
10 |     def __init__(self, env, frameskip):
11 |         assert frameskip >= 1
12 |         self._env = env
13 |         self._frameskip = frameskip
14 |         self.observation_space = env.observation_space
15 |         self.action_space = env.action_space
16 | 
17 |     def reset(self):
18 |         return self._env.reset()
19 | 
20 |     def step(self, action):
21 |         sum_rew = 0
22 |         for _ in range(self._frameskip):
23 |             obs, rew, done, info = self._env.step(action)
24 |             sum_rew += rew
25 |             if done:
26 |                 break
27 |         return obs, sum_rew, done, info
28 | 
29 |     def render(self, mode='human'):
30 |         return self._env.render(mode=mode)
31 | 
32 |     def close(self):
33 |         self._env.close()
34 | 


--------------------------------------------------------------------------------
/envs/nchain.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | from gym import spaces
 3 | import numpy as np
 4 | 
 5 | 
 6 | class NChainEnv(gym.Env):
 7 |     """
 8 |     n-Chain environment.
 9 |     The environment consists of a chain of N states and the agent always starts in state s2, from where it can either
10 |     move left or right. In state s1, the agent receives a small reward of r = 0.001 and a larger reward r = 1 in state
11 |     sN. This environment is described in Deep Exploration via Bootstrapped DQN
12 |     (https://papers.nips.cc/paper/6501-deep-exploration-via-bootstrapped-dqn.pdf).
13 | 
14 |     Code from:
15 |     Randomized Value Functions via Multiplicative Normalizing Flows
16 |     (https://github.com/facebookresearch/RandomizedValueFunctions)
17 |     """
18 |     def __init__(self, n):
19 |         self.n = n
20 |         self.state = 1  # Start at state s2
21 |         self.action_space = spaces.Discrete(2)
22 |         self.observation_space = spaces.Discrete(self.n)
23 |         self.max_nsteps = n + 8
24 | 
25 |     def step(self, action):
26 |         assert self.action_space.contains(action)
27 |         v = np.arange(self.n)
28 |         reward = lambda s, a: 1.0 if (s == (self.n - 1) and a == 1) else (0.001 if (s == 0 and a == 0) else 0)
29 |         is_done = lambda nsteps: nsteps >= self.max_nsteps
30 | 
31 |         r = reward(self.state, action)
32 |         if action:  # forward
33 |             if self.state != self.n - 1:
34 |                 self.state += 1
35 |         else:  # backward
36 |             if self.state != 0:
37 |                 self.state -= 1
38 |         self.nsteps += 1
39 |         return (v <= self.state).astype('float32'), r, is_done(self.nsteps), None
40 | 
41 |     def reset(self):
42 |         v = np.arange(self.n)
43 |         self.state = 1
44 |         self.nsteps = 0
45 |         return (v <= self.state).astype('float32')
46 | 


--------------------------------------------------------------------------------
/normalizingflows/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/normalizingflows/__init__.py


--------------------------------------------------------------------------------
/normalizingflows/flow_catalog.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Implementation of various Normalizing Flows.
  3 | Tensorflow Bijectors are used as base class. To perform density estimation and sampling, four functions have to be defined
  4 | for each Normalizing Flow.
  5 | 
  6 | 
  7 | 1. _forward:
  8 | Turns one random outcome into another random outcome from a different distribution.
  9 | 
 10 | 2. _inverse:
 11 | Useful for 'reversing' a transformation to compute one probability in terms of another.
 12 | 
 13 | 3. _forward_log_det_jacobian:
 14 | The log of the absolute value of the determinant of the matrix of all first-order partial derivatives of the function.
 15 | 
 16 | 4. _inverse_log_det_jacobian:
 17 | The log of the absolute value of the determinant of the matrix of all first-order partial derivatives of the inverse function.
 18 | 
 19 | 
 20 | "forward" and "forward_log_det_jacobian" have to be defined to perform sampling.
 21 | "inverse" and "inverse_log_det_jacobian" have to be defined to perform density estimation.
 22 | """
 23 | 
 24 | import numpy as np
 25 | import tensorflow as tf
 26 | import tensorflow_probability as tfp
 27 | 
 28 | 
 29 | tfd = tfp.distributions
 30 | tfb = tfp.bijectors
 31 | tfk = tf.keras
 32 | 
 33 | tf.keras.backend.set_floatx('float32')
 34 | 
 35 | print('tensorflow: ', tf.__version__)
 36 | print('tensorflow-probability: ', tfp.__version__)
 37 | 
 38 | 
 39 | '''--------------------------------------- Masked Autoregressive Flow -----------------------------------------------'''
 40 | 
 41 | 
 42 | class Made(tfk.layers.Layer):
 43 |     """
 44 |     Implementation of a Masked Autoencoder for Distribution Estimation (MADE) [Germain et al. (2015)].
 45 |     The existing TensorFlow bijector "AutoregressiveNetwork" is used. The output is reshaped to output one shift vector
 46 |     and one log_scale vector.
 47 | 
 48 |     :param params: Python integer specifying the number of parameters to output per input.
 49 |     :param event_shape: Python list-like of positive integers (or a single int), specifying the shape of the input to this layer, which is also the event_shape of the distribution parameterized by this layer. Currently only rank-1 shapes are supported. That is, event_shape must be a single integer. If not specified, the event shape is inferred when this layer is first called or built.
 50 |     :param hidden_units: Python list-like of non-negative integers, specifying the number of units in each hidden layer.
 51 |     :param activation: An activation function. See tf.keras.layers.Dense. Default: None.
 52 |     :param use_bias: Whether or not the dense layers constructed in this layer should have a bias term. See tf.keras.layers.Dense. Default: True.
 53 |     :param kernel_regularizer: Regularizer function applied to the Dense kernel weight matrices. Default: None.
 54 |     :param bias_regularizer: Regularizer function applied to the Dense bias weight vectors. Default: None.
 55 |     """
 56 | 
 57 |     def __init__(self, params, event_shape=None, hidden_units=None, activation=None, use_bias=True,
 58 |                  kernel_regularizer=None, bias_regularizer=None, name="made"):
 59 | 
 60 |         super(Made, self).__init__(name=name)
 61 | 
 62 |         self.params = params
 63 |         self.event_shape = event_shape
 64 |         self.hidden_units = hidden_units
 65 |         self.activation = activation
 66 |         self.use_bias = use_bias
 67 |         self.kernel_regularizer = kernel_regularizer
 68 |         self.bias_regularizer = bias_regularizer
 69 | 
 70 |         self.network = tfb.AutoregressiveNetwork(params=params, event_shape=event_shape, hidden_units=hidden_units,
 71 |                                                  activation=activation, use_bias=use_bias, kernel_regularizer=kernel_regularizer, 
 72 |                                                  bias_regularizer=bias_regularizer)
 73 | 
 74 |     def call(self, x):
 75 |         shift, log_scale = tf.unstack(self.network(x), num=2, axis=-1)
 76 | 
 77 |         return shift, tf.math.tanh(log_scale)
 78 | 
 79 | 
 80 | '''------------------------------------- Batch Normalization Bijector -----------------------------------------------'''
 81 | 
 82 | 
 83 | class BatchNorm(tfb.Bijector):
 84 |     """
 85 |     Implementation of a Batch Normalization layer for use in normalizing flows according to [Papamakarios et al. (2017)].
 86 |     The moving average of the layer statistics is adapted from [Dinh et al. (2016)].
 87 | 
 88 |     :param eps: Hyperparameter that ensures numerical stability, if any of the elements of v is near zero.
 89 |     :param decay: Weight for the update of the moving average, e.g. avg = (1-decay)*avg + decay*new_value.
 90 |     """
 91 | 
 92 |     def __init__(self, eps=1e-5, decay=0.95, validate_args=False, name="batch_norm"):
 93 |         super(BatchNorm, self).__init__(
 94 |             forward_min_event_ndims=1,
 95 |             inverse_min_event_ndims=1,
 96 |             validate_args=validate_args,
 97 |             name=name)
 98 | 
 99 |         self._vars_created = False
100 |         self.eps = eps
101 |         self.decay = decay
102 | 
103 |     def _create_vars(self, x):
104 |         # account for 1xd and dx1 vectors
105 |         if len(x.get_shape()) == 1:
106 |             n = x.get_shape().as_list()[0]
107 |         if len(x.get_shape()) == 2: 
108 |             n = x.get_shape().as_list()[1]
109 | 
110 |         self.beta = tf.compat.v1.get_variable('beta', [1, n], dtype=tf.float32)
111 |         self.gamma = tf.compat.v1.get_variable('gamma', [1, n], dtype=tf.float32)
112 |         self.train_m = tf.compat.v1.get_variable(
113 |             'mean', [1, n], dtype=tf.float32, trainable=False)
114 |         self.train_v = tf.compat.v1.get_variable(
115 |             'var', [1, n], dtype=tf.float32, trainable=False)
116 | 
117 |         self._vars_created = True
118 | 
119 |     def _forward(self, u):
120 |         if not self._vars_created:
121 |             self._create_vars(u)
122 |         return (u - self.beta) * tf.exp(-self.gamma) * tf.sqrt(self.train_v + self.eps) + self.train_m
123 | 
124 |     def _inverse(self, x):
125 |         # Eq. 22 of [Papamakarios et al. (2017)]. Called during training of a normalizing flow.
126 |         if not self._vars_created:
127 |             self._create_vars(x)
128 | 
129 |         # statistics of current minibatch
130 |         m, v = tf.nn.moments(x, axes=[0], keepdims=True)
131 |         
132 |         # update train statistics via exponential moving average
133 |         self.train_v.assign_sub(self.decay * (self.train_v - v))
134 |         self.train_m.assign_sub(self.decay * (self.train_m - m))
135 | 
136 |         # normalize using current minibatch statistics, followed by BN scale and shift
137 |         return (x - m) * 1. / tf.sqrt(v + self.eps) * tf.exp(self.gamma) + self.beta
138 | 
139 |     def _inverse_log_det_jacobian(self, x):
140 |         # at training time, the log_det_jacobian is computed from statistics of the
141 |         # current minibatch.
142 |         if not self._vars_created:
143 |             self._create_vars(x)
144 |             
145 |         _, v = tf.nn.moments(x, axes=[0], keepdims=True)
146 |         abs_log_det_J_inv = tf.reduce_sum(
147 |             self.gamma - .5 * tf.math.log(v + self.eps))
148 |         return abs_log_det_J_inv
149 | 


--------------------------------------------------------------------------------
/normalizingflows/nf_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implementation of functions that are important for training normalizing flows.
 3 | """
 4 | 
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | import tensorflow_probability as tfp
 8 | tfd = tfp.distributions
 9 | tfb = tfp.bijectors
10 | 
11 | 
12 | '''----------------------------------- Normal distribution with reparametrization -----------------------------------'''
13 | 
14 | 
15 | class NormalReparamMNF(tf.Module):
16 |     """
17 |     Normal distribution with reparameterization to be able to learn the mean and variance.
18 | 
19 |     :param shape: Shape of the tensor
20 |     :param std_init (float): initialization value for the standard deviation, optional
21 |     :param mean_init (float): initialization value for the mean, optional
22 |     """
23 |     def __init__(self, shape, var_init=1.0, mean_init=0.0):
24 |         super(NormalReparamMNF, self).__init__()
25 | 
26 |         glorot = tf.keras.initializers.GlorotNormal()  # Xavier normal initializer
27 | 
28 |         self.shape = shape
29 |         self.mean = tf.Variable(glorot(shape), trainable=True)
30 |         self.log_var = tf.Variable(glorot(shape) * var_init + mean_init, trainable=True)
31 |         self.epsilon = tf.Variable(tf.random.normal(self.shape), trainable=False)
32 | 
33 |     @tf.function
34 |     def sample(self, batch_size, same_noise=False):
35 |         mean = tf.tile(self.mean[None, :], [batch_size, 1])  # split tensor into batches
36 |         if same_noise:
37 |             epsilon = tf.expand_dims(self.epsilon, axis=0)  # expand batch size dimension
38 |             epsilon = tf.repeat(epsilon, batch_size, axis=0)  # use the same noise/epsilon for the whole batch
39 |         else:
40 |             epsilon = tf.random.normal([batch_size, self.shape[0]])
41 |         var = tf.exp(self.log_var)
42 |         samples = mean + tf.sqrt(var) * epsilon
43 | 
44 |         return samples
45 | 
46 |     @tf.function
47 |     def log_prob(self, samples):
48 |         dims = float(samples.shape[-1])
49 |         var = tf.exp(self.log_var)
50 |         exponent = tf.reduce_sum(tf.square(samples - self.mean)/var, axis=1)
51 |         log_det_var = tf.reduce_sum(self.log_var)
52 |         log_prob = -0.5 * (dims * tf.math.log(2 * np.pi) + log_det_var + exponent)
53 | 
54 |         return log_prob
55 | 
56 |     def prob(self, samples):
57 |         log_prob = self.log_prob(samples)
58 | 
59 |         return tf.exp(log_prob)
60 | 
61 |     def log_std(self):
62 |         return 0.5 * self.log_var
63 | 
64 |     def reset_noise(self):
65 |         self.epsilon.assign(tf.random.normal(self.shape))
66 | 


--------------------------------------------------------------------------------
/normalizingflows/normalizing_flow.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_probability as tfp
 3 | tfb = tfp.bijectors
 4 | 
 5 | 
 6 | class NormalizingFlow(tf.Module):
 7 |     """
 8 |     Stacking of several normalizing flows. Constitutes a normalizing flow itself.
 9 |     """
10 | 
11 |     def __init__(self, flows, chain=True, name=None, **kwargs):
12 |         super(NormalizingFlow).__init__(**kwargs)
13 |         if not isinstance(name, str):
14 |             name = "flow"
15 | 
16 |         self.flows = flows
17 |         self.chain = chain  # use tfb.Chain
18 |         if chain:
19 |             self.flow = tfb.Chain(bijectors=list(reversed(flows)), name=name)
20 | 
21 |     @tf.function
22 |     def forward(self, z):  # z -> x
23 |         if self.chain:
24 |             x = self.flow.forward(z)
25 |             log_dets = self.flow.forward_log_det_jacobian(z, event_ndims=1)
26 |         else:
27 |             log_dets = tf.zeros(tf.shape(z)[0])
28 |             zk = z
29 |             for flow in self.flows:
30 |                 log_dets = log_dets + flow._forward_log_det_jacobian(zk)  # "-" already in forward_log_det_jacobian
31 |                 zk = flow.forward(zk)
32 | 
33 |             x = zk
34 | 
35 |         return x, log_dets
36 | 
37 |     @tf.function
38 |     def inverse(self, x):  # x -> z
39 |         if self.chain:
40 |             z = self.flow.inverse(x)
41 |             log_dets = self.flow.inverse_log_det_jacobian(x, event_ndims=1)
42 |         else:
43 |             log_dets = tf.zeros(tf.shape(x)[0])
44 |             zk = x
45 |             for flow in reversed(self.flows):
46 |                 log_dets = log_dets + flow._inverse_log_det_jacobian(zk)
47 |                 zk = flow.inverse(zk)
48 | 
49 |             z = zk
50 | 
51 |         return z, log_dets
52 | 
53 | 
54 | class NormalizingFlowModel(NormalizingFlow):
55 |     """A normalizing flow model as a combination of base distribution and flow."""
56 | 
57 |     def __init__(self, base, flows, name="transformed_dist", **kwargs):
58 |         super().__init__(flows, name=name, **kwargs)
59 | 
60 |         self.base = base  # distribution class that exposes a log_prob() and sample() method
61 |         self.flows = flows
62 | 
63 |     def log_prob(self, x):
64 |         z, log_dets = self.inverse(x)
65 |         base_prob = self.base.log_prob(z)
66 | 
67 |         return base_prob + log_dets
68 | 
69 |     def prob(self, x):
70 |         return tf.exp(self.log_prob(x))
71 | 
72 |     def sample(self, batch_size, same_noise=False):
73 |         z = self.base.sample(batch_size, same_noise=same_noise)
74 |         base_prob = self.base.log_prob(z)
75 |         x, log_dets = self.forward(z)
76 | 
77 |         return x, base_prob + log_dets
78 | 
79 |     def sample_no_noise(self, batch_size):
80 |         z = tf.expand_dims(self.base.mean, axis=0)  # expand batch dimension
81 |         z = tf.repeat(z, batch_size, axis=0)
82 |         base_prob = self.base.log_prob(z)
83 |         x, log_dets = self.forward(z)
84 | 
85 |         return x, base_prob + log_dets
86 | 


--------------------------------------------------------------------------------
/plots/BayesByBackprop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/BayesByBackprop.png


--------------------------------------------------------------------------------
/plots/ConcreteDropout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/ConcreteDropout.png


--------------------------------------------------------------------------------
/plots/ConcreteDropout_heterostatic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/ConcreteDropout_heterostatic.png


--------------------------------------------------------------------------------
/plots/MCDropout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/MCDropout.png


--------------------------------------------------------------------------------
/plots/MCDropout_heteroscedastic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/MCDropout_heteroscedastic.png


--------------------------------------------------------------------------------
/plots/MNF_all_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/MNF_all_layers.png


--------------------------------------------------------------------------------
/plots/MNF_last_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/MNF_last_layers.png


--------------------------------------------------------------------------------
/plots/avg_acc_reward_cartpole.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/avg_acc_reward_cartpole.png


--------------------------------------------------------------------------------
/plots/avg_acc_reward_mountaincar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LukasRinder/bayesian-neural-networks/e21e058ffbbe39ff4359b072248c6ecddec73877/plots/avg_acc_reward_mountaincar.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow-gpu>=2.0
2 | tensorflow-probability>=0.8.0
3 | tensorflow-datasets>=1.2.0
4 | numpy<1.19.0,>=1.16.0
5 | matplotlib>=3.1.1
6 | jupyterlab>=1.1.4
7 | gym


--------------------------------------------------------------------------------
/toy_regression_bayes.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | from data.toy_regression import ToyRegressionData
  6 | from bayes.MNF import DenseMNF
  7 | from bayes.Bayes_by_Backprop import BayesByBackprop
  8 | 
  9 | tfkl = tf.keras.layers
 10 | 
 11 | TOY_DATA = "toy"
 12 | IAN_DATA = "ian"
 13 | SAMPLE_DATA = "sample"
 14 | ALLOWED_DATA_CONFIGS = {TOY_DATA, IAN_DATA, SAMPLE_DATA}
 15 | 
 16 | MNF = "mnf"
 17 | BAYES_BY_BACKPROP = "bayesbybackprop"
 18 | DENSE = "dense"
 19 | ALLOWED_NETWORK_CONFIGS = {MNF, BAYES_BY_BACKPROP, DENSE}
 20 | 
 21 | 
 22 | class MLP(tf.Module):
 23 |     """
 24 |     Simple Multi-layer Perceptron Model.
 25 |     """
 26 |     def __init__(self):
 27 |         super(MLP, self).__init__()
 28 |         self.input_layer = tfkl.InputLayer(input_shape=(1,))
 29 |         self.hidden_layer_1 = tfkl.Dense(100, activation='relu')
 30 |         self.hidden_layer_2 = tfkl.Dense(100, activation='relu')
 31 |         self.output_layer = tfkl.Dense(1, activation='linear')
 32 | 
 33 |     @tf.function
 34 |     def __call__(self, x, *args, **kwargs):
 35 |         y = self.input_layer(x)
 36 |         y = self.hidden_layer_1(y)
 37 |         y = self.hidden_layer_2(y)
 38 |         y = self.output_layer(y)
 39 |         return y
 40 | 
 41 | 
 42 | class BNN_MNF(tf.Module):
 43 |     """
 44 |     Bayesian Neural Network with fully-connected layers utilizing Multiplicative Normalizing Flows by Christos Louizos, Max Welling
 45 |     (Jun 2017).
 46 |     """
 47 |     def __init__(self, input_dim=1, hidden_units=[100, 100], output_dim=1, hidden_bayes=False, use_z=True, max_std=1.0):
 48 |         super(BNN_MNF, self).__init__()
 49 |         self.input_layer = tfkl.InputLayer(input_shape=(input_dim,))
 50 | 
 51 |         self.hidden_layers = []
 52 |         self.hidden_bayes = hidden_bayes
 53 |         for i in hidden_units:
 54 |             if self.hidden_bayes:
 55 |                 self.hidden_layers.append(DenseMNF(n_out=i, use_z=use_z, max_std=max_std))
 56 |             else:
 57 |                 self.hidden_layers.append(tfkl.Dense(i, activation='relu', kernel_initializer='RandomNormal'))
 58 | 
 59 |         self.dense_mnf_out = DenseMNF(n_out=output_dim, use_z=use_z, max_std=max_std)
 60 | 
 61 |     @tf.function
 62 |     def __call__(self, inputs, same_noise=False, training=True, *args, **kwargs):
 63 |         out = self.input_layer(inputs)
 64 |         for layer in self.hidden_layers:
 65 |             if self.hidden_bayes:
 66 |                 out = layer(out, same_noise=same_noise, training=training)
 67 |                 out = tf.nn.relu(out)
 68 |             else:
 69 |                 out = layer(out)  # relu already in keras layer
 70 |         out = self.dense_mnf_out(out, same_noise=same_noise, training=training)
 71 | 
 72 |         return out
 73 | 
 74 |     def kl_div(self, same_noise=True):
 75 |         """
 76 |         Compute current KL divergence of all layers.
 77 |         Can be used as a regularization term during training.
 78 |         """
 79 |         kldiv = 0
 80 |         if self.hidden_bayes:
 81 |             for dense_mnf in self.hidden_layers:
 82 |                 kldiv = kldiv + dense_mnf.kl_div(same_noise)
 83 |         kldiv = kldiv + self.dense_mnf_out.kl_div(same_noise)
 84 |         return kldiv
 85 | 
 86 |     def reset_noise(self):
 87 |         if self.hidden_bayes:
 88 |             for dense_mnf in self.hidden_layers:
 89 |                 dense_mnf.reset_noise()
 90 |         self.dense_mnf_out.reset_noise()
 91 | 
 92 | 
 93 | class BNN_BBB(tf.Module):
 94 |     """
 95 |     Bayesian Neural Network with fully-connected layers utilizing Bayes by Backprop by Blundell et al. (2015).
 96 |     """
 97 |     def __init__(self, input_dim=1, hidden_units=[100, 100], output_dim=1, hidden_bayes=False, max_std=1.0):
 98 |         super(BNN_BBB, self).__init__()
 99 |         self.input_layer = tfkl.InputLayer(input_shape=(input_dim,))
100 | 
101 |         self.hidden_layers = []
102 |         self.hidden_bayes = hidden_bayes
103 |         for i in hidden_units:
104 |             if hidden_bayes:
105 |                 self.hidden_layers.append(BayesByBackprop(n_out=i, max_std=max_std))
106 |             else:
107 |                 self.hidden_layers.append(tfkl.Dense(i, activation='relu', kernel_initializer='RandomNormal'))
108 |         self.dense_bbb_out = BayesByBackprop(n_out=output_dim, max_std=max_std)
109 | 
110 |     @tf.function
111 |     def __call__(self, inputs, same_noise=False, training=True, *args, **kwargs):
112 |         out = self.input_layer(inputs)
113 |         for layer in self.hidden_layers:
114 |             if self.hidden_bayes:
115 |                 out = layer(out, same_noise=same_noise, training=training)
116 |                 out = tf.nn.relu(out)
117 |             else:
118 |                 out = layer(out)  # relu already in keras layer
119 |         out = self.dense_bbb_out(out, same_noise=same_noise, training=training)
120 |         return out
121 | 
122 |     def kl_div(self, same_noise=True):
123 |         """
124 |         Compute current KL divergence of the Bayes by Backprop layers.
125 |         Used as a regularization term during training.
126 |         """
127 |         kldiv = 0
128 |         if self.hidden_bayes:
129 |             for dense_bbb in self.hidden_layers:
130 |                 kldiv = kldiv + dense_bbb.kl_div(same_noise)
131 |         kldiv = kldiv + self.dense_bbb_out.kl_div(same_noise)
132 |         return kldiv
133 | 
134 |     def reset_noise(self):
135 |         """
136 |         Re-sample noise/epsilon parameters of the Bayes by Backprop layers. Required for the case of having the same
137 |         epsilon parameters across one batch.
138 |         """
139 |         if self.hidden_bayes:
140 |             for dense_bbb in self.hidden_layers:
141 |                 dense_bbb.reset_noise()
142 |         self.dense_bbb_out.reset_noise()
143 | 
144 | 
145 | @tf.function
146 | def loss_fn(y_train, x_train, model, bayes, reg=1.0, same_noise=False):
147 |     if bayes:
148 |         # divide by divided by the total number of samples in an epoch (batch_size * steps_per_epoch)
149 |         # here: steps_per_epoch = 1
150 |         mse = tf.reduce_mean(tf.losses.mse(y_train, model(x_train, same_noise=same_noise)))
151 |         kl_loss = model.kl_div() / tf.cast(x_train.shape[0]*reg, tf.float32)
152 |     else:
153 |         mse = tf.reduce_mean(tf.losses.mse(y_train, model(x_train)))
154 |         kl_loss = 0
155 | 
156 |     return mse + kl_loss, kl_loss
157 | 
158 | 
159 | def fit_regression(network, hidden_bayes=False, same_noise=False, max_std=0.5, data="ian", save=False):
160 | 
161 |     # load data
162 |     if data not in ALLOWED_DATA_CONFIGS:
163 |         raise AssertionError(f"'data' has to be in {ALLOWED_DATA_CONFIGS} but was set to {data}.")
164 |     elif data == TOY_DATA:
165 |         data = np.load("data/train_data_regression.npz")
166 |         x_train = data["x_train"]
167 |         y_train = data["y_train"]
168 |         x_lim, y_lim = 4.5, 70.0
169 |         reg = 10.0  # regularization parameter lambda
170 |     elif data == IAN_DATA:
171 |         data = np.load("data/train_data_ian_regression.npz", allow_pickle=True)
172 |         x_train = data["x_train"]
173 |         y_train = data["y_train"]
174 |         x_lim, y_lim = 12.0, 8.0
175 |         reg = 30  # regularization parameter lambda
176 |     elif data == SAMPLE_DATA:
177 |         n_samples = 20
178 |         toy_regression = ToyRegressionData()
179 |         x_train, y_train = toy_regression.gen_data(n_samples)
180 |         x_lim, y_lim = 4.5, 70.0
181 |         reg = 10.0  # regularization parameter lambda
182 | 
183 |     # choose network
184 |     if network not in ALLOWED_NETWORK_CONFIGS:
185 |         raise AssertionError(f"'network' has to be in {ALLOWED_NETWORK_CONFIGS} but was set to {network}.")
186 |     elif network == MNF:
187 |         model = BNN_MNF(hidden_bayes=hidden_bayes, max_std=max_std)
188 |         bayes = True
189 |     elif network == BAYES_BY_BACKPROP:
190 |         model = BNN_BBB(hidden_bayes=hidden_bayes, max_std=max_std)
191 |         bayes = True
192 |     elif network == DENSE:
193 |         model = MLP()
194 |         bayes = False
195 | 
196 |     epochs = 500
197 |     learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(1e-2, epochs, 1e-6, power=0.5)
198 |     opt = tf.keras.optimizers.Adam(learning_rate=learning_rate_fn)
199 | 
200 |     # initialize
201 |     _, _ = loss_fn(y_train, x_train, model, bayes, reg, same_noise)
202 | 
203 |     train_losses = []
204 |     kl_losses = []
205 |     for i in range(epochs):
206 |         with tf.GradientTape() as tape:
207 |             tape.watch(model.trainable_variables)
208 |             loss, kl_loss = loss_fn(y_train, x_train, model, bayes, reg, same_noise)
209 |         gradients = tape.gradient(loss, model.trainable_variables)
210 |         opt.apply_gradients(zip(gradients, model.trainable_variables))
211 | 
212 |         if same_noise:
213 |             model.reset_noise()  # sample new epsilons
214 | 
215 |         train_losses.append(loss)
216 |         kl_losses.append(kl_loss)
217 | 
218 |         if i % int(10) == 0:
219 |             print(f"Epoch: {i}, MSE: {loss}, KL-loss: {kl_loss}")
220 | 
221 |     plt.plot(range(epochs), train_losses)
222 |     plt.plot(range(epochs), kl_losses)
223 |     plt.legend(["Train loss", "KL loss"])
224 | 
225 |     n_test = 500
226 |     x_test = np.linspace(-x_lim, x_lim, n_test).reshape(n_test, 1).astype('float32')
227 | 
228 |     if bayes:
229 |         y_preds = []
230 |         for _ in range(20):
231 |             y_pred = model(x_test)
232 |             y_preds.append(y_pred)
233 |         plt.figure(figsize=(10, 4))
234 |         y_preds = np.array(y_preds).reshape(20, n_test)
235 |         y_preds_mean = np.mean(y_preds, axis=0)
236 |         y_preds_std = np.std(y_preds, axis=0)
237 | 
238 |         plt.scatter(x_train, y_train, c="orangered")
239 |         color_pred = (0.0, 101.0 / 255.0, 189.0 / 255.0)
240 |         plt.plot(x_test, y_preds_mean, color=color_pred)
241 |         plt.fill_between(x_test.reshape(n_test,), y_preds_mean - y_preds_std, y_preds_mean + y_preds_std,
242 |                          alpha=0.25, color=color_pred)
243 |         plt.fill_between(x_test.reshape(n_test,), y_preds_mean - 2.0 * y_preds_std, y_preds_mean + 2.0 * y_preds_std,
244 |                          alpha=0.35, color=color_pred)
245 | 
246 |         plt.xlim(-x_lim, x_lim)
247 |         plt.ylim(-y_lim, y_lim)
248 |         plt.legend(["Mean function", "Observations"])
249 | 
250 |     else:
251 |         plt.figure(figsize=(10, 4))
252 |         y_pred = model(x_test)
253 |         plt.scatter(x_train, y_train, c="orangered")
254 |         color_pred = (0.0, 101.0 / 255.0, 189.0 / 255.0)
255 |         plt.plot(x_test, y_pred, color=color_pred)
256 |         plt.xlim(-x_lim, x_lim)
257 |         plt.ylim(-y_lim, y_lim)
258 |         plt.legend(["Mean function", "Observations"])
259 | 
260 |     plt.tight_layout()
261 |     if save:
262 |         plt.savefig(f"plots/{network}.pdf")
263 |     else:
264 |         plt.show()
265 | 
266 | 
267 | if __name__ == '__main__':
268 |     # test gpu availability
269 |     print(f"GPU available: {tf.test.is_gpu_available()}")
270 | 
271 |     # set configuration
272 |     network = MNF  # choose from ALLOWED_NETWORK_CONFIGS
273 |     hidden_bayes = False  # False: last layer bayes, True: all layers bayes
274 |     same_noise = True  # set if same noise/epsilon should be used within a batch
275 |     max_std = 0.5
276 |     data = IAN_DATA  # choose from ALLOWED_DATA_CONFIGS
277 |     save = False  # save images
278 | 
279 |     fit_regression(network=network, hidden_bayes=hidden_bayes, same_noise=same_noise, max_std=max_std, data=data,
280 |                    save=save)
281 | 


--------------------------------------------------------------------------------
/toy_regression_concrete_dropout.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | 
  5 | from data.toy_regression import ToyRegressionData
  6 | from bayes.ConcreteDropout import ConcreteDropout
  7 | from tensorflow.keras import optimizers
  8 | from tensorflow.keras.layers import InputSpec, Dense, Wrapper, Input, concatenate
  9 | from tensorflow.keras.models import Model
 10 | 
 11 | 
 12 | TOY_DATA = "toy"
 13 | IAN_DATA = "ian"
 14 | SAMPLE_DATA = "sample"
 15 | ALLOWED_DATA_CONFIGS = {TOY_DATA, IAN_DATA, SAMPLE_DATA}
 16 | 
 17 | MSE = "mse"
 18 | HETEROSCEDASTIC = "heteroscedastic"
 19 | ALLOWED_LOSS_TYPES = {MSE, HETEROSCEDASTIC}
 20 | 
 21 | 
 22 | def mse_loss(true, pred):
 23 |     return tf.reduce_mean((true - pred) ** 2, -1)
 24 | 
 25 | 
 26 | def heteroscedastic_loss(y_train, pred):
 27 |     n_outputs = pred.shape[1] // 2
 28 |     mean = pred[:, :n_outputs]
 29 |     log_var = pred[:, n_outputs:]
 30 |     return tf.reduce_sum(0.5 * tf.exp(-1 * log_var) * tf.square(y_train - mean) + 0.5 * log_var)
 31 | 
 32 | 
 33 | def make_model(loss_type, n_features, n_outputs, n_nodes=400, dropout_reg=1e-5, wd=1e-3):
 34 |     losses = []
 35 |     inp = Input(shape=(n_features,))
 36 |     x = inp
 37 |         
 38 |     x, loss = ConcreteDropout(Dense(n_nodes, activation='relu'),
 39 |                               weight_regularizer=wd, dropout_regularizer=dropout_reg)(x)
 40 |     losses.append(loss)
 41 |     x, loss = ConcreteDropout(Dense(n_nodes, activation='relu'),
 42 |                               weight_regularizer=wd, dropout_regularizer=dropout_reg)(x)
 43 |     losses.append(loss)
 44 |     x, loss = ConcreteDropout(Dense(n_nodes, activation='relu'),
 45 |                               weight_regularizer=wd, dropout_regularizer=dropout_reg)(x)
 46 |     losses.append(loss)
 47 |     
 48 |     if loss_type == MSE:
 49 |         mean = Dense(100, activation='relu')(x)
 50 |         final_mean = Dense(n_outputs, activation='linear')(mean)
 51 |         model = Model(inp, final_mean)
 52 |         learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(1e-3, 500, 1e-5, power=0.5)
 53 |         model.compile(optimizer=optimizers.Adam(learning_rate=learning_rate_fn), loss=mse_loss)
 54 |         
 55 |     if loss_type == HETEROSCEDASTIC:
 56 |         mean = Dense(100, activation='relu')(x)
 57 |         final_mean = Dense(n_outputs, activation='linear')(mean)
 58 |     
 59 |         log_var = Dense(100, activation='relu')(x)
 60 |         final_log_var = Dense(n_outputs, activation='linear')(log_var)
 61 |     
 62 |         out = concatenate([final_mean, final_log_var])
 63 |         model = Model(inp, out)
 64 |         for loss in losses:
 65 |             model.add_loss(loss)
 66 |         learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(1e-3, 500, 1e-5, power=0.5)
 67 |         model.compile(optimizer=optimizers.Adam(learning_rate=learning_rate_fn), loss=heteroscedastic_loss,
 68 |                       metrics=[mse_loss])
 69 | 
 70 |     return model
 71 | 
 72 | 
 73 | def plot_heteroscedastic(model, save, x_train, y_train, x_lim, y_lim):
 74 |     n_test = 500
 75 |     x_test = np.linspace(-x_lim, x_lim, n_test).reshape(n_test, 1).astype('float32')
 76 | 
 77 |     preds_mean = []
 78 |     preds_var = []
 79 |     n_repeats = 20
 80 |     for _ in range(n_repeats):
 81 |         pred = model(x_test, training=True)
 82 |         n_outputs = pred.shape[1] // 2
 83 |         pred_mean = pred[:, :n_outputs]
 84 |         pred_var = pred[:, n_outputs:]
 85 |         preds_mean.append(pred_mean)
 86 |         preds_var.append(pred_var)
 87 | 
 88 |     plt.figure(figsize=(10, 4))
 89 |     preds_mean = np.array(preds_mean).reshape(20, n_test)
 90 |     preds_var = np.array(preds_var).reshape(20, n_test)
 91 |     preds_mean_mean = np.mean(preds_mean, axis=0)
 92 |     preds_mean_std = np.std(preds_mean, axis=0)
 93 |     preds_var_mean = np.mean(preds_var, axis=0)
 94 | 
 95 |     plt.scatter(x_train, y_train, c="orangered",label='Training data')
 96 |     color_pred = (0.0, 101.0 / 255.0, 189.0 / 255.0)
 97 |     plt.plot(x_test, preds_mean_mean, color=color_pred, label='Mean function/Epistemic uncertainty')
 98 |     plt.plot(x_test, np.sqrt(np.exp(preds_var_mean)), color="green", label="Aleatoric uncertainty")
 99 |     plt.fill_between(x_test.reshape(n_test,), preds_mean_mean - preds_mean_std, preds_mean_mean + preds_mean_std,
100 |                      alpha=0.25, color=color_pred)
101 |     plt.fill_between(x_test.reshape(n_test,), preds_mean_mean - 2.0 * preds_mean_std, preds_mean_mean + 2.0 * preds_mean_std,
102 |                      alpha=0.35, color=color_pred)
103 | 
104 |     plt.xlim(-x_lim, x_lim)
105 |     plt.ylim(-y_lim, y_lim)
106 |     plt.legend()
107 | 
108 |     plt.tight_layout()
109 |     if save:
110 |         plt.savefig("plots/Concrete_Dropout_heteroscedastic.png")
111 |     else:
112 |         plt.show()
113 | 
114 | 
115 | def plot_mse(model, save, x_train, y_train, x_lim, y_lim):
116 |     n_test = 500
117 |     x_test = np.linspace(-x_lim, x_lim, n_test).reshape(n_test, 1).astype('float32')
118 | 
119 |     preds = []
120 |     n_repeats = 20
121 |     for _ in range(n_repeats):
122 |         pred = model(x_test, training=True)
123 |         preds.append(pred)
124 | 
125 |     plt.figure(figsize=(10, 4))
126 |     preds = np.array(preds).reshape(n_repeats, n_test)
127 |     preds_mean = np.mean(preds, axis=0)
128 |     preds_std = np.std(preds, axis=0)
129 | 
130 |     plt.scatter(x_train, y_train, c="orangered", label='Training data')
131 |     color_pred = (0.0, 101.0 / 255.0, 189.0 / 255.0)
132 |     plt.plot(x_test, preds_mean, color=color_pred, label='Mean function/Epistemic uncertainty')
133 |     plt.fill_between(x_test.reshape(n_test,), preds_mean - preds_std, preds_mean + preds_std,
134 |                      alpha=0.25, color=color_pred)
135 |     plt.fill_between(x_test.reshape(n_test,), preds_mean - 2.0 * preds_std, preds_mean + 2.0 * preds_std,
136 |                      alpha=0.35, color=color_pred)
137 | 
138 |     plt.xlim(-x_lim, x_lim)
139 |     plt.ylim(-y_lim, y_lim)
140 |     plt.legend()
141 | 
142 |     plt.tight_layout()
143 |     if save:
144 |         plt.savefig("plots/Concrete_Dropout_mse.pdf")
145 |     else:
146 |         plt.show()
147 | 
148 | 
149 | def fit_regression(loss_type="heteroscedastic", data="ian", save=False):
150 |     # load data
151 |     if data not in ALLOWED_DATA_CONFIGS:
152 |         raise AssertionError(f"'data' has to be in {ALLOWED_DATA_CONFIGS} but was set to {data}.")
153 |     elif data == TOY_DATA:
154 |         data = np.load("data/train_data_regression.npz")
155 |         x_train = data["x_train"]
156 |         y_train = data["y_train"]
157 |         x_lim, y_lim = 4.5, 70.0
158 |     elif data == IAN_DATA:
159 |         data = np.load("data/train_data_ian_regression.npz", allow_pickle=True)
160 |         x_train = data["x_train"]
161 |         y_train = data["y_train"]
162 |         x_lim, y_lim = 12.0, 8.0
163 |     elif data == SAMPLE_DATA:
164 |         n_samples = 20
165 |         toy_regression = ToyRegressionData()
166 |         x_train, y_train = toy_regression.gen_data(n_samples)
167 |         x_lim, y_lim = 4.5, 70.0
168 | 
169 |     if loss_type not in ALLOWED_LOSS_TYPES:
170 |         raise AssertionError(f"'loss_type' has to be in {ALLOWED_LOSS_TYPES} but was set to {loss_type}.")
171 |     elif loss_type == HETEROSCEDASTIC:
172 |         y_lim = 20  # adapt y limit
173 | 
174 |     n_epochs = 500
175 |     l = 1e-3  # length-scale
176 |     weight_reg = l**2.0 / len(x_train)
177 |     dropout_reg = 2.0 / len(x_train)
178 | 
179 |     model = make_model(loss_type, 1, 1, n_nodes=200, dropout_reg=dropout_reg, wd=weight_reg)
180 | 
181 |     print("Starting training...")
182 |     model.fit(x_train, y_train, epochs=n_epochs)
183 | 
184 |     print("Starting plotting...")
185 |     if loss_type == "mse":
186 |         plot_mse(model, save, x_train, y_train, x_lim, y_lim)
187 |     if loss_type == "heteroscedastic":
188 |         plot_heteroscedastic(model, save, x_train, y_train, x_lim, y_lim)
189 | 
190 |     print("Dropout rates:")
191 |     for i in model.layers:
192 |         if isinstance(i, ConcreteDropout):
193 |             print(tf.math.sigmoid(i.p_logit))
194 | 
195 | 
196 | if __name__ == '__main__':
197 |     # test gpu availability
198 |     print(f"GPU available: {tf.test.is_gpu_available()}")
199 | 
200 |     # set configuration
201 |     loss_type = MSE
202 |     data = IAN_DATA  # choose from ALLOWED_DATA_CONFIGS
203 |     save = False  # save images
204 | 
205 |     fit_regression(loss_type=loss_type, data=data, save=save)
206 | 


--------------------------------------------------------------------------------
/toy_regression_mc_dropout.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | from data.toy_regression import ToyRegressionData
  6 | 
  7 | tfkl = tf.keras.layers
  8 | 
  9 | TOY_DATA = "toy"
 10 | IAN_DATA = "ian"
 11 | SAMPLE_DATA = "sample"
 12 | ALLOWED_DATA_CONFIGS = {TOY_DATA, IAN_DATA, SAMPLE_DATA}
 13 | 
 14 | MSE = "mse"
 15 | HETEROSCEDASTIC = "heteroscedastic"
 16 | ALLOWED_LOSS_TYPES = {MSE, HETEROSCEDASTIC}
 17 | 
 18 | 
 19 | class MC_Dropout(tf.keras.Model):
 20 |     """
 21 |     Neural network with MC dropout according to
 22 |     "Dropout as a Bayesian Approximation: Representing Model Uncertainty in Deep Learning"
 23 |     - Gal and Ghahramani (2015): https://arxiv.org/abs/1506.02142.
 24 | 
 25 |     Two different models are possible depending on the specified 'loss_type':
 26 |     - 'mse': bayesian model that only predicts the output mean
 27 |     - 'heteroscedastic': bayesian model that predicts the output mean and variance; can be used to model the
 28 |     epistemic (knowledge) and aleatoric (data) uncertainty separately
 29 |     """
 30 |     def __init__(self, input_dim=1, hidden_units=[100, 100], dropout_per_layer=[0.2, 0.2], output_dim=1,
 31 |                  loss_type="mse"):
 32 |         super(MC_Dropout, self).__init__()
 33 |         
 34 |         N = 100  # data points, constant for simplicity
 35 |         lengthscale = 1e-1
 36 |         tau = 1
 37 |         reg_no_dropout = lengthscale**2.0 / (2.0 * N * tau)
 38 | 
 39 |         self.loss_type = loss_type
 40 | 
 41 |         self.input_layer = tfkl.InputLayer(input_shape=(input_dim,))
 42 |         self.hidden_layers = []
 43 |         for n_neurons, dropout_rate in zip(hidden_units, dropout_per_layer):
 44 |             reg = ((1 - dropout_rate) * lengthscale**2.0) / (2.0 * N * tau)
 45 |             self.hidden_layers.append(tfkl.Dense(n_neurons, activation='relu',
 46 |                                                  kernel_regularizer=tf.keras.regularizers.L1L2(l2=reg)))
 47 |             self.hidden_layers.append(tfkl.Dropout(dropout_rate, trainable=True))
 48 |         
 49 |         self.hidden_layer_mean = tfkl.Dense(100, activation='relu',
 50 |                                             kernel_regularizer=tf.keras.regularizers.L1L2(l2=reg_no_dropout))
 51 |         self.hidden_layer_var = tfkl.Dense(100, activation='relu',
 52 |                                            kernel_regularizer=tf.keras.regularizers.L1L2(l2=reg_no_dropout))
 53 | 
 54 |         self.output_layer_mean = tfkl.Dense(output_dim, activation='linear',
 55 |                                             kernel_regularizer=tf.keras.regularizers.L1L2(l2=reg_no_dropout))
 56 |         self.output_layer_var = tfkl.Dense(output_dim, activation='linear',
 57 |                                            kernel_regularizer=tf.keras.regularizers.L1L2(l2=reg_no_dropout))
 58 | 
 59 |     @tf.function
 60 |     def call(self, inputs):
 61 |         out = self.input_layer(inputs)
 62 |         for layer in self.hidden_layers:
 63 |             out = layer(out)
 64 |     
 65 |         if self.loss_type == MSE:
 66 |             # one head for the mean
 67 |             final_mean = self.output_layer_mean(out)
 68 |             return final_mean
 69 |         
 70 |         if self.loss_type == HETEROSCEDASTIC:
 71 |             # two heads for mean and variance
 72 |             y_mean = self.hidden_layer_mean(out)
 73 |             final_mean = self.output_layer_mean(y_mean)
 74 |         
 75 |             y_var = self.hidden_layer_var(out)
 76 |             final_log_var = self.output_layer_var(y_var)
 77 |         
 78 |             return final_mean, final_log_var
 79 | 
 80 | 
 81 | def plot_heteroscedastic(model, save, x_train, y_train, x_lim, y_lim):
 82 |     n_test = 500
 83 |     x_test = np.linspace(-x_lim, x_lim, n_test).reshape(n_test, 1).astype('float32')
 84 | 
 85 |     preds_mean = []
 86 |     preds_var = []
 87 |     n_repeats = 20
 88 |     for _ in range(n_repeats):
 89 |         pred_mean, pred_var = model(x_test, training=True)
 90 |         preds_mean.append(pred_mean)
 91 |         preds_var.append(pred_var)
 92 | 
 93 |     plt.figure(figsize=(10, 4))
 94 |     preds_mean = np.array(preds_mean).reshape(20, n_test)
 95 |     preds_var = np.array(preds_var).reshape(20, n_test)
 96 |     preds_mean_mean = np.mean(preds_mean, axis=0)
 97 |     preds_mean_std = np.std(preds_mean, axis=0)
 98 |     preds_var_mean = np.mean(preds_var, axis=0)
 99 | 
100 |     plt.scatter(x_train, y_train, c="orangered",label='Training data')
101 |     color_pred = (0.0, 101.0 / 255.0, 189.0 / 255.0)
102 |     plt.plot(x_test, preds_mean_mean, color=color_pred, label='Mean function/Epistemic uncertainty')
103 |     plt.plot(x_test, np.sqrt(np.exp(preds_var_mean)), color="green", label="Aleatoric uncertainty")
104 |     plt.fill_between(x_test.reshape(n_test,), preds_mean_mean - preds_mean_std, preds_mean_mean + preds_mean_std,
105 |                      alpha=0.25, color=color_pred)
106 |     plt.fill_between(x_test.reshape(n_test,), preds_mean_mean - 2.0 * preds_mean_std, preds_mean_mean + 2.0 * preds_mean_std,
107 |                      alpha=0.35, color=color_pred)
108 | 
109 |     plt.xlim(-x_lim, x_lim)
110 |     plt.ylim(-y_lim, y_lim)
111 |     plt.legend()
112 | 
113 |     plt.tight_layout()
114 |     if save:
115 |         plt.savefig("plots/MC_Dropout_heteroscedastic.png")
116 |     else:
117 |         plt.show()
118 | 
119 | 
120 | def plot_mse(model, save, x_train, y_train, x_lim, y_lim):
121 |     n_test = 500
122 |     x_test = np.linspace(-x_lim, x_lim, n_test).reshape(n_test, 1).astype('float32')
123 | 
124 |     preds = []
125 |     n_repeats = 20
126 |     for _ in range(n_repeats):
127 |         pred = model(x_test, training=True)
128 |         preds.append(pred)
129 | 
130 |     plt.figure(figsize=(10, 4))
131 |     preds = np.array(preds).reshape(n_repeats, n_test)
132 |     preds_mean = np.mean(preds, axis=0)
133 |     preds_std = np.std(preds, axis=0)
134 | 
135 |     plt.scatter(x_train, y_train, c="orangered", label='Training data')
136 |     color_pred = (0.0, 101.0 / 255.0, 189.0 / 255.0)
137 |     plt.plot(x_test, preds_mean, color=color_pred, label='Mean function/Epistemic uncertainty')
138 |     plt.fill_between(x_test.reshape(n_test,), preds_mean - preds_std, preds_mean + preds_std,
139 |                      alpha=0.25, color=color_pred)
140 |     plt.fill_between(x_test.reshape(n_test,), preds_mean - 2.0 * preds_std, preds_mean + 2.0 * preds_std,
141 |                      alpha=0.35, color=color_pred)
142 | 
143 |     plt.xlim(-x_lim, x_lim)
144 |     plt.ylim(-y_lim, y_lim)
145 |     plt.legend()
146 | 
147 |     plt.tight_layout()
148 |     if save:
149 |         plt.savefig("plots/MC_Dropout_mse.pdf")
150 |     else:
151 |         plt.show()
152 | 
153 | 
154 | @tf.function
155 | def mse_loss(y_train, x_train, model):
156 |     mse = tf.reduce_mean(tf.losses.mse(y_train, model(x_train)))
157 |     reg = tf.reduce_sum(model.losses)  # regularization loss
158 |     return mse + reg, reg
159 | 
160 | 
161 | @tf.function
162 | def heteroscedastic_loss(y_train, x_train, model):
163 |     mean, log_var = model(x_train)
164 |     mse = tf.reduce_sum(0.5 * tf.exp(-1.0 * log_var) * tf.square(y_train - mean) + 0.5 * log_var)
165 |     reg = tf.reduce_sum(model.losses)  # regularization loss
166 |     return mse + reg, reg
167 | 
168 | 
169 | def fit_regression(loss_type="heteroscedastic", data="ian", additional_data=False, save=False):
170 |     # load data
171 |     if data not in ALLOWED_DATA_CONFIGS:
172 |         raise AssertionError(f"'data' has to be in {ALLOWED_DATA_CONFIGS} but was set to {data}.")
173 |     elif data == TOY_DATA:
174 |         data = np.load("data/train_data_regression.npz")
175 |         x_train = data["x_train"]
176 |         y_train = data["y_train"]
177 |         x_lim, y_lim = 4.5, 70.0
178 |     elif data == IAN_DATA:
179 |         data = np.load("data/train_data_ian_regression.npz", allow_pickle=True)
180 |         x_train = data["x_train"]
181 |         y_train = data["y_train"]
182 |         x_lim, y_lim = 12.0, 8.0
183 |     elif data == SAMPLE_DATA:
184 |         n_samples = 20
185 |         toy_regression = ToyRegressionData()
186 |         x_train, y_train = toy_regression.gen_data(n_samples)
187 |         x_lim, y_lim = 4.5, 70.0
188 | 
189 |     if loss_type not in ALLOWED_LOSS_TYPES:
190 |         raise AssertionError(f"'loss_type' has to be in {ALLOWED_LOSS_TYPES} but was set to {loss_type}.")
191 |     elif loss_type == HETEROSCEDASTIC:
192 |         y_lim = 20  # adapt y limit
193 | 
194 |     hidden_units = [100, 100]
195 |     dropout_per_layer = [0.09, 0.119]
196 | 
197 |     model = MC_Dropout(hidden_units=hidden_units, dropout_per_layer=dropout_per_layer, loss_type=loss_type)
198 |     
199 |     # Add special points
200 |     if additional_data:
201 |         x_extension = np.array([[-10.2], [-10.1]])
202 |         y_extension = np.array([[-6.1], [-6.2]])    
203 |         x_train = np.insert(x_train, 0, x_extension, axis=0)
204 |         y_train = np.insert(y_train, 0, y_extension, axis=0)
205 |     
206 |     epochs = 500
207 |     learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(1e-3, epochs, 1e-5, power=0.5)
208 |     opt = tf.keras.optimizers.Adam(learning_rate=learning_rate_fn)
209 | 
210 |     for i in range(epochs):
211 |         with tf.GradientTape() as tape:
212 |             tape.watch(model.trainable_variables)
213 |             if loss_type == MSE:
214 |                 loss, reg = mse_loss(y_train, x_train, model)
215 |             if loss_type == HETEROSCEDASTIC:
216 |                 loss, reg = heteroscedastic_loss(y_train, x_train, model)
217 |         gradients = tape.gradient(loss, model.trainable_variables)
218 |         opt.apply_gradients(zip(gradients, model.trainable_variables))
219 | 
220 |         if i % int(10) == 0:
221 |             if loss_type == "mse":
222 |                 print(f"Epoch: {i}, Loss: {loss} Regularization: {reg}")
223 |             if loss_type == "heteroscedastic":
224 |                 print(f"Epoch: {i}, Loss: {loss} Regularization: {reg}")
225 |             
226 |     if loss_type == MSE:
227 |         plot_mse(model, save, x_train, y_train, x_lim, y_lim)
228 |     if loss_type == HETEROSCEDASTIC:
229 |         plot_heteroscedastic(model, save, x_train, y_train, x_lim, y_lim)
230 | 
231 | 
232 | if __name__ == '__main__':
233 |     # test gpu availability
234 |     print(f"GPU available: {tf.test.is_gpu_available()}")
235 | 
236 |     # set configuration
237 |     loss_type = HETEROSCEDASTIC
238 |     data = IAN_DATA  # choose from ALLOWED_DATA_CONFIGS
239 |     additional_data = False
240 |     save = False  # save images
241 | 
242 |     fit_regression(loss_type=loss_type, data=data, additional_data=additional_data, save=save)
243 | 


--------------------------------------------------------------------------------
/train_bbb_dqn.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import tensorflow as tf
 3 | 
 4 | from envs.env_utils import WrapFrameSkip
 5 | from dqn.Bayes_by_Backprop_DQN import BBBDQN
 6 | from dqn.train import train_dqn
 7 | 
 8 | # config cart pole
 9 | CONFIG_CARTPOLE = {
10 |     "env_name": "CartPole-v1",
11 |     "algorithm": "bayes_by_backprop",
12 |     "seed": [210, 142, 531, 461, 314],
13 |     "runs": 1,  # perform e.g. 5 runs
14 |     "env_render": True,
15 |     "alpha": 1,
16 |     "skip_frame_num": 0,
17 |     "epochs_num": 50,
18 |     "hidden_units": "100,100",
19 |     "gradient_update_gamma":  0.9,
20 |     "batch_size": 64,
21 |     "learning_rate_init": 1e-3,
22 |     "experiences_max": 5000,
23 |     "experiences_min": 200,
24 |     "epsilon_min": None,
25 |     "epsilon": None,
26 |     "epsilon_decay": None,
27 |     "copy_steps": 25,
28 |     "gradient_steps": 1,
29 |     "step_limit": 200,
30 |     "test_episodes": 5,  # perform a test episode after 'test episode' many train epochs
31 |     "plot_avg_reward": True,
32 |     "save": False,  # saves a npz-file with the data of the runs
33 | }
34 | 
35 | # config mountain car
36 | CONFIG_MOUNTAINCAR = {
37 |     "env_name": "MountainCar-v0",
38 |     "algorithm": "bayes_by_backprop",
39 |     "seed": [210, 142, 531, 461, 314],
40 |     "runs": 1,  # perform e.g. 5 runs
41 |     "env_render": True,
42 |     "alpha": 1,
43 |     "skip_frame_num": 4,
44 |     "epochs_num": 100,
45 |     "hidden_units": "200,200,200,200",
46 |     "gradient_update_gamma":  0.9,
47 |     "batch_size": 64,
48 |     "learning_rate_init": 1e-3,
49 |     "experiences_max": 5000,
50 |     "experiences_min": 200,
51 |     "epsilon_min": None,
52 |     "epsilon": None,
53 |     "epsilon_decay": None,
54 |     "copy_steps": 25,
55 |     "gradient_steps": 1,
56 |     "step_limit": 500,
57 |     "test_episodes": 10,  # perform a test episode after 'test episode' many train epochs
58 |     "plot_avg_reward": True,
59 |     "save": False,  # saves a npz-file with the data of the runs
60 | }
61 | 
62 | config = CONFIG_CARTPOLE  # switch between cart pole and mountain car
63 | 
64 | config_static = {
65 |     "learning_rate": tf.keras.optimizers.schedules.PolynomialDecay(config["learning_rate_init"],
66 |                                                                    config["epochs_num"]*config["step_limit"], 1e-5,
67 |                                                                    power=0.5)
68 | }
69 | 
70 | # Setup environment 
71 | env = gym.make(config["env_name"]).env  # remove 200 step limit
72 | 
73 | if config["skip_frame_num"] > 0:  # optional: skip frames to ease training in MountainCar
74 |     env = WrapFrameSkip(env, frameskip=config["skip_frame_num"])
75 |     
76 | num_states = len(env.observation_space.sample())
77 | num_actions = env.action_space.n
78 | print(f"Number of available actions: {num_actions}")
79 | print(f"Available action values (force on the cart in N): {env.action_space}")
80 | 
81 | hidden_units = []
82 | for i in config["hidden_units"].split(","):
83 |     hidden_units.append(int(i))
84 | 
85 | print(f"GPU available: {tf.test.is_gpu_available()}")
86 | 
87 | for run_id in (range(config["runs"])):
88 |     tf.random.set_seed(config["seed"][run_id])
89 | 
90 |     # initialize train (action-value function) and target network (target action-value function)
91 |     train_net = BBBDQN(num_states, num_actions, hidden_units, config["gradient_update_gamma"], config["experiences_max"],
92 |                        config["experiences_min"], config["batch_size"], config_static["learning_rate"], config["alpha"])
93 |     target_net = BBBDQN(num_states, num_actions, hidden_units, config["gradient_update_gamma"], config["experiences_max"],
94 |                         config["experiences_min"], config["batch_size"], config_static["learning_rate"], config["alpha"])
95 | 
96 |     train_dqn(config, env, train_net, target_net, run_id)
97 | 


--------------------------------------------------------------------------------
/train_dqn.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import tensorflow as tf
 3 | 
 4 | from envs.env_utils import WrapFrameSkip
 5 | from dqn.DQN import DQN
 6 | from dqn.train import train_dqn
 7 | 
 8 | # config carte pole
 9 | CONFIG_CARTPOLE = {
10 |     "env_name": "CartPole-v1",
11 |     "algorithm": "dqn",
12 |     "seed": [210, 142, 531, 461, 314],
13 |     "runs": 1,  # perform e.g. 5 runs
14 |     "env_render": True,
15 |     "alpha": None,
16 |     "skip_frame_num": 0,
17 |     "epochs_num": 50,
18 |     "hidden_units": "100,100",
19 |     "gradient_update_gamma":  0.9,
20 |     "batch_size": 64,
21 |     "learning_rate_init": 1e-3,
22 |     "experiences_max": 5000,
23 |     "experiences_min": 200,
24 |     "epsilon_min": 0.2,
25 |     "epsilon": 1.0,
26 |     "epsilon_decay": 0.95,
27 |     "copy_steps": 25,
28 |     "gradient_steps": 1,
29 |     "step_limit": 200,
30 |     "test_episodes": 5,  # perform a test episode after 'test episode' many train epochs
31 |     "plot_avg_reward": True,
32 |     "save": False,  # saves a npz-file with the data of the runs
33 | }
34 | 
35 | # config mountain car
36 | CONFIG_MOUNTAINCAR = {
37 |     "env_name": "MountainCar-v0",
38 |     "algorithm": "dqn",
39 |     "seed": [210, 142, 531, 461, 314],
40 |     "runs": 1,  # perform e.g. 5 runs
41 |     "env_render": True,
42 |     "alpha": None,
43 |     "skip_frame_num": 4,
44 |     "epochs_num": 100,
45 |     "hidden_units": "200,200,200,200",
46 |     "gradient_update_gamma":  0.9,
47 |     "batch_size": 64,
48 |     "learning_rate_init": 1e-3,
49 |     "experiences_max": 5000,
50 |     "experiences_min": 200,
51 |     "epsilon_min": 0.2,
52 |     "epsilon": 1.0,
53 |     "epsilon_decay": 0.99,
54 |     "copy_steps": 25,
55 |     "gradient_steps": 1,
56 |     "step_limit": 500,
57 |     "test_episodes": 10,  # perform a test episode after 'test episode' many train epochs
58 |     "plot_avg_reward": True,
59 |     "save": False,  # saves a npz-file with the data of the runs
60 | }
61 | 
62 | config = CONFIG_MOUNTAINCAR  # switch between cart pole and mountain car
63 | 
64 | config_static = {
65 |     "learning_rate": tf.keras.optimizers.schedules.PolynomialDecay(config["learning_rate_init"],
66 |                                                                    config["epochs_num"]*config["step_limit"], 1e-5,
67 |                                                                    power=0.5)
68 | }
69 | 
70 | # Setup environment 
71 | env = gym.make(config["env_name"]).env  # remove 200 step limit
72 | 
73 | if config["skip_frame_num"] > 0:  # optional: skip frames to ease training in MountainCar
74 |     env = WrapFrameSkip(env, frameskip=config["skip_frame_num"])
75 |     
76 | num_states = len(env.observation_space.sample())
77 | num_actions = env.action_space.n
78 | print(f"Number of available actions: {num_actions}")
79 | print(f"Available action values (force on the cart in N): {env.action_space}")
80 | 
81 | hidden_units = []
82 | for i in config["hidden_units"].split(","):
83 |     hidden_units.append(int(i))
84 | 
85 | print(f"GPU available: {tf.test.is_gpu_available()}")
86 | 
87 | for run_id in (range(config["runs"])):
88 |     tf.random.set_seed(config["seed"][run_id])
89 | 
90 |     # initialize train (action-value function) and target network (target action-value function)
91 |     train_net = DQN(num_states, num_actions, hidden_units, config["gradient_update_gamma"], config["experiences_max"],
92 |                     config["experiences_min"], config["batch_size"], config_static["learning_rate"])
93 |     target_net = DQN(num_states, num_actions, hidden_units, config["gradient_update_gamma"], config["experiences_max"],
94 |                      config["experiences_min"], config["batch_size"], config_static["learning_rate"])
95 | 
96 |     train_dqn(config, env, train_net, target_net, run_id)
97 | 


--------------------------------------------------------------------------------
/train_dqn_dropout.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import tensorflow as tf
  3 | 
  4 | from envs.env_utils import WrapFrameSkip
  5 | from dqn.MC_Dropout_DQN import DQN
  6 | from dqn.train import train_dqn
  7 | 
  8 | # config cart pole
  9 | CONFIG_CARTPOLE = {
 10 |     "env_name": "CartPole-v1",
 11 |     "algorithm": "mc_dropout",
 12 |     "seed": [210, 142, 531, 461, 314],
 13 |     "runs": 1,  # perform e.g. 5 runs
 14 |     "env_render": True,
 15 |     "alpha": 1,
 16 |     "skip_frame_num": 0,
 17 |     "epochs_num": 50,
 18 |     "hidden_units": "100,100",
 19 |     "gradient_update_gamma":  0.9,
 20 |     "batch_size": 64,
 21 |     "learning_rate_init": 1e-3,
 22 |     "experiences_max": 5000,
 23 |     "experiences_min": 200,
 24 |     "epsilon_min": None,
 25 |     "epsilon": None,
 26 |     "epsilon_decay": None,
 27 |     "copy_steps": 25,
 28 |     "gradient_steps": 1,
 29 |     "step_limit": 200,
 30 |     "test_episodes": 5,  # perform a test episode after 'test episode' many train epochs
 31 |     "plot_avg_reward": True,
 32 |     "save": False,  # saves a npz-file with the data of the runs
 33 |     "dropout_rate": 0.2,
 34 | }
 35 | 
 36 | # config mountain car
 37 | CONFIG_MOUNTAINCAR = {
 38 |     "env_name": "MountainCar-v0",
 39 |     "algorithm": "mc_dropout",
 40 |     "seed": [210, 142, 531, 461, 314],
 41 |     "runs": 1,  # perform e.g. 5 runs
 42 |     "env_render": True,
 43 |     "alpha": 1,
 44 |     "skip_frame_num": 4,
 45 |     "epochs_num": 100,
 46 |     "hidden_units": "200,200,200,200",
 47 |     "gradient_update_gamma":  0.9,
 48 |     "batch_size": 64,
 49 |     "learning_rate_init": 1e-3,
 50 |     "experiences_max": 5000,
 51 |     "experiences_min": 200,
 52 |     "epsilon_min": None,
 53 |     "epsilon": None,
 54 |     "epsilon_decay": None,
 55 |     "copy_steps": 25,
 56 |     "gradient_steps": 1,
 57 |     "step_limit": 500,
 58 |     "test_episodes": 10,  # perform a test episode after 'test episode' many train epochs
 59 |     "plot_avg_reward": True,
 60 |     "save": False,  # saves a npz-file with the data of the runs
 61 |     "dropout_rate": 0.2,
 62 | }
 63 | 
 64 | config = CONFIG_CARTPOLE  # switch between cart pole and mountain car
 65 | 
 66 | config_static = {
 67 |     "learning_rate": tf.keras.optimizers.schedules.PolynomialDecay(config["learning_rate_init"],
 68 |                                                                    config["epochs_num"]*config["step_limit"], 1e-5,
 69 |                                                                    power=0.5)
 70 | }
 71 | 
 72 | # Setup environment
 73 | env = gym.make(config["env_name"]).env  # remove 200 step limit
 74 | 
 75 | if config["skip_frame_num"] > 0:    # optional: skip frames to ease training in MountainCar
 76 |     env = WrapFrameSkip(env, frameskip=config["skip_frame_num"])
 77 | 
 78 | num_states = len(env.observation_space.sample())
 79 | num_actions = env.action_space.n
 80 | print(f"Number of available actions: {num_actions}")
 81 | print(f"Available action values (force on the cart in N): {env.action_space}")
 82 | 
 83 | hidden_units = []
 84 | for i in config["hidden_units"].split(","):
 85 |     hidden_units.append(int(i))
 86 | 
 87 | print(f"GPU available: {tf.test.is_gpu_available()}")
 88 | 
 89 | for run_id in (range(config["runs"])):
 90 |     tf.random.set_seed(config["seed"][run_id])
 91 | 
 92 |     # initialize train (action-value function) and target network (target action-value function)
 93 |     train_net = DQN(num_states=num_states, num_actions=num_actions, hidden_units=hidden_units,
 94 |                     gamma=config["gradient_update_gamma"], max_experiences=config["experiences_max"],
 95 |                     min_experiences=config["experiences_min"], batch_size=config["batch_size"],
 96 |                     lr=config_static["learning_rate"], dropout_rate=config["dropout_rate"])
 97 |     target_net = DQN(num_states=num_states, num_actions=num_actions, hidden_units=hidden_units,
 98 |                     gamma=config["gradient_update_gamma"], max_experiences=config["experiences_max"],
 99 |                     min_experiences=config["experiences_min"], batch_size=config["batch_size"],
100 |                     lr=config_static["learning_rate"], dropout_rate=config["dropout_rate"])
101 | 
102 |     train_dqn(config, env, train_net, target_net, run_id)
103 | 


--------------------------------------------------------------------------------
/train_dqn_dropout_concrete.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import tensorflow as tf
  3 | 
  4 | from envs.env_utils import WrapFrameSkip
  5 | from dqn.Concrete_Dropout_DQN import DQN
  6 | from dqn.train import train_dqn
  7 | 
  8 | # config cart pole
  9 | CONFIG_CARTPOLE = {
 10 |     "env_name": "CartPole-v1",
 11 |     "algorithm": "concrete_dropout",
 12 |     "seed": [210, 142, 531, 461, 314],
 13 |     "runs": 1,  # perform e.g. 5 runs
 14 |     "env_render": True,
 15 |     "alpha": 1,
 16 |     "skip_frame_num": 0,
 17 |     "epochs_num": 50,
 18 |     "hidden_units": "100,100",  # 400, 400
 19 |     "gradient_update_gamma":  0.9,
 20 |     "batch_size": 64,
 21 |     "learning_rate_init": 1e-3,
 22 |     "experiences_max": 5000,
 23 |     "experiences_min": 200,
 24 |     "epsilon_min": None,
 25 |     "epsilon": None,
 26 |     "epsilon_decay": None,
 27 |     "copy_steps": 25,
 28 |     "gradient_steps": 1,
 29 |     "step_limit": 200,
 30 |     "test_episodes": 5,  # perform a test episode after 'test episode' many train epochs
 31 |     "plot_avg_reward": True,
 32 |     "save": False,  # saves a npz-file with the data of the runs
 33 | }
 34 | 
 35 | # config mountain car
 36 | CONFIG_MOUNTAINCAR = {
 37 |     "env_name": "MountainCar-v0",
 38 |     "algorithm": "concrete_dropout",
 39 |     "seed": [210, 142, 531, 461, 314],
 40 |     "runs": 1,  # perform e.g. 5 runs
 41 |     "env_render": True,
 42 |     "alpha": 1,
 43 |     "skip_frame_num": 4,
 44 |     "epochs_num": 100,
 45 |     "hidden_units": "200,200,200,200",
 46 |     "gradient_update_gamma":  0.9,
 47 |     "batch_size": 64,
 48 |     "learning_rate_init": 1e-3,
 49 |     "experiences_max": 5000,
 50 |     "experiences_min": 200,
 51 |     "epsilon_min": None,
 52 |     "epsilon": None,
 53 |     "epsilon_decay": None,
 54 |     "copy_steps": 25,
 55 |     "gradient_steps": 1,
 56 |     "step_limit": 500,
 57 |     "test_episodes": 10,  # perform a test episode after 'test episode' many train epochs
 58 |     "plot_avg_reward": True,
 59 |     "save": False,  # saves a npz-file with the data of the runs
 60 | }
 61 | 
 62 | config = CONFIG_CARTPOLE  # switch between cart pole and mountain car
 63 | 
 64 | config_static = {
 65 |     "learning_rate": tf.keras.optimizers.schedules.PolynomialDecay(config["learning_rate_init"],
 66 |                                                                    config["epochs_num"]*config["step_limit"], 1e-5,
 67 |                                                                    power=0.5)
 68 | }
 69 | 
 70 | # Setup environment
 71 | env = gym.make(config["env_name"]).env  # remove 200 step limit
 72 | 
 73 | if config["skip_frame_num"] > 0:    # optional: skip frames to ease training in MountainCar
 74 |     env = WrapFrameSkip(env, frameskip=config["skip_frame_num"])
 75 | 
 76 | num_states = len(env.observation_space.sample())
 77 | num_actions = env.action_space.n
 78 | print(f"Number of available actions: {num_actions}")
 79 | print(f"Available action values (force on the cart in N): {env.action_space}")
 80 | 
 81 | hidden_units = []
 82 | for i in config["hidden_units"].split(","):
 83 |     hidden_units.append(int(i))
 84 | 
 85 | print(f"GPU available: {tf.test.is_gpu_available()}")
 86 | 
 87 | for run_id in (range(config["runs"])):
 88 |     tf.random.set_seed(config["seed"][run_id])
 89 | 
 90 |     # initialize train (action-value function) and target network (target action-value function)
 91 |     train_net = DQN(num_states=num_states, num_actions=num_actions, hidden_units=hidden_units,
 92 |                     gamma=config["gradient_update_gamma"], max_experiences=config["experiences_max"],
 93 |                     min_experiences=config["experiences_min"], batch_size=config["batch_size"],
 94 |                     lr=config_static["learning_rate"])
 95 |     target_net = DQN(num_states=num_states, num_actions=num_actions, hidden_units=hidden_units,
 96 |                      gamma=config["gradient_update_gamma"], max_experiences=config["experiences_max"],
 97 |                      min_experiences=config["experiences_min"], batch_size=config["batch_size"],
 98 |                      lr=config_static["learning_rate"])
 99 | 
100 |     train_dqn(config, env, train_net, target_net, run_id)
101 | 


--------------------------------------------------------------------------------
/train_mnf_dqn.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import tensorflow as tf
 3 | 
 4 | from envs.env_utils import WrapFrameSkip
 5 | from dqn.MNF_DQN import MNFDQN
 6 | from dqn.train import train_dqn
 7 | 
 8 | # config cart pole
 9 | CONFIG_CARTPOLE = {
10 |     "env_name": "CartPole-v1",
11 |     "algorithm": "mnf",
12 |     "seed": [210, 142, 531, 461, 314],
13 |     "runs": 1,  # perform e.g. 5 runs
14 |     "env_render": True,
15 |     "alpha": 1,
16 |     "skip_frame_num": 0,
17 |     "epochs_num": 50,
18 |     "hidden_units": "100,100",
19 |     "gradient_update_gamma":  0.9,
20 |     "batch_size": 64,
21 |     "learning_rate_init": 1e-3,
22 |     "experiences_max": 5000,
23 |     "experiences_min": 200,
24 |     "epsilon_min": None,
25 |     "epsilon": None,
26 |     "epsilon_decay": None,
27 |     "copy_steps": 25,
28 |     "gradient_steps": 1,
29 |     "step_limit": 200,
30 |     "test_episodes": 5,  # perform a test episode after 'test episode' many train epochs
31 |     "plot_avg_reward": True,
32 |     "save": False,  # saves a npz-file with the data of the runs
33 | }
34 | 
35 | # config mountain car
36 | CONFIG_MOUNTAINCAR = {
37 |     "env_name": "MountainCar-v0",
38 |     "algorithm": "mnf",
39 |     "seed": [210, 142, 531, 461, 314],
40 |     "runs": 1,  # perform e.g. 5 runs
41 |     "env_render": True,
42 |     "alpha": 1,
43 |     "skip_frame_num": 4,
44 |     "epochs_num": 100,
45 |     "hidden_units": "200,200,200,200",
46 |     "gradient_update_gamma":  0.9,
47 |     "batch_size": 64,
48 |     "learning_rate_init": 1e-3,
49 |     "experiences_max": 5000,
50 |     "experiences_min": 200,
51 |     "epsilon_min": None,
52 |     "epsilon": None,
53 |     "epsilon_decay": None,
54 |     "copy_steps": 25,
55 |     "gradient_steps": 1,
56 |     "step_limit": 500,
57 |     "test_episodes": 10,  # perform a test episode after 'test episode' many train epochs
58 |     "plot_avg_reward": True,
59 |     "save": False,  # saves a npz-file with the data of the runs
60 | }
61 | 
62 | config = CONFIG_CARTPOLE  # switch between cart pole and mountain car
63 | 
64 | config_static = {
65 |     "learning_rate": tf.keras.optimizers.schedules.PolynomialDecay(config["learning_rate_init"],
66 |                                                                    config["epochs_num"]*config["step_limit"], 1e-5,
67 |                                                                    power=0.5)
68 | }
69 | 
70 | # Setup environment 
71 | env = gym.make(config["env_name"]).env  # remove 200 step limit
72 | 
73 | if config["skip_frame_num"] > 0:    # optional: skip frames to ease training in MountainCar
74 |     env = WrapFrameSkip(env, frameskip=config["skip_frame_num"])
75 |     
76 | num_states = len(env.observation_space.sample())
77 | num_actions = env.action_space.n
78 | print(f"Number of available actions: {num_actions}")
79 | print(f"Available action values (force on the cart in N): {env.action_space}")
80 | 
81 | hidden_units = []
82 | for i in config["hidden_units"].split(","):
83 |     hidden_units.append(int(i))
84 | 
85 | print(f"GPU available: {tf.test.is_gpu_available()}")
86 | 
87 | for run_id in (range(config["runs"])):
88 |     tf.random.set_seed(config["seed"][run_id])
89 | 
90 |     # initialize train (action-value function) and target network (target action-value function)
91 |     train_net = MNFDQN(num_states, num_actions, hidden_units, config["gradient_update_gamma"],
92 |                        config["experiences_max"], config["experiences_min"], config["batch_size"],
93 |                        config_static["learning_rate"], config["alpha"])
94 |     target_net = MNFDQN(num_states, num_actions, hidden_units, config["gradient_update_gamma"],
95 |                         config["experiences_max"], config["experiences_min"], config["batch_size"],
96 |                         config_static["learning_rate"], config["alpha"])
97 | 
98 |     train_dqn(config, env, train_net, target_net, run_id)
99 | 


--------------------------------------------------------------------------------