├── LICENSE
├── README.md
├── data
    └── readme
├── examples
    ├── __init__.py
    └── rec_es
    │   ├── __pycache__
    │       ├── gul_env.cpython-36.pyc
    │       ├── gul_input_fn.cpython-36.pyc
    │       ├── gul_input_fn_local.cpython-36.pyc
    │       ├── rec_env.cpython-36.pyc
    │       └── rec_input_fn_local.cpython-36.pyc
    │   ├── rec_config_local.json
    │   ├── rec_env.py
    │   ├── rec_input_fn_local.py
    │   ├── rec_rl_data_small
    │   └── rec_run_es_local.py
├── setup.cfg
├── setup.py
└── tensorforce
    ├── __init__.py
    ├── __pycache__
        ├── __init__.cpython-36.pyc
        ├── exception.cpython-36.pyc
        ├── meta_parameter_recorder.cpython-36.pyc
        └── util.cpython-36.pyc
    ├── agents
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-36.pyc
        │   ├── a3c_agent.cpython-36.pyc
        │   ├── agent.cpython-36.pyc
        │   ├── batch_agent.cpython-36.pyc
        │   ├── constant_agent.cpython-36.pyc
        │   ├── ddpg_agent.cpython-36.pyc
        │   ├── deterministic_es_agent.cpython-36.pyc
        │   ├── dqfd_agent.cpython-36.pyc
        │   ├── dqn_agent.cpython-36.pyc
        │   ├── dqn_nstep_agent.cpython-36.pyc
        │   ├── es_agent.cpython-36.pyc
        │   ├── memory_agent.cpython-36.pyc
        │   ├── naf_agent.cpython-36.pyc
        │   ├── ppo_agent.cpython-36.pyc
        │   ├── random_agent.cpython-36.pyc
        │   ├── trpo_agent.cpython-36.pyc
        │   └── vpg_agent.cpython-36.pyc
        ├── agent.py
        └── deterministic_es_agent.py
    ├── core
        ├── __init__.py
        ├── __pycache__
        │   └── __init__.cpython-36.pyc
        ├── explorations
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-36.pyc
        │   │   ├── constant.cpython-36.pyc
        │   │   ├── epsilon_anneal.cpython-36.pyc
        │   │   ├── epsilon_decay.cpython-36.pyc
        │   │   ├── exploration.cpython-36.pyc
        │   │   ├── linear_decay.cpython-36.pyc
        │   │   └── ornstein_uhlenbeck_process.cpython-36.pyc
        │   ├── constant.py
        │   └── exploration.py
        ├── lib
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-36.pyc
        │   │   ├── atari_wrapper.cpython-36.pyc
        │   │   ├── env_seeding.cpython-36.pyc
        │   │   ├── optimizers.cpython-36.pyc
        │   │   ├── schedules.cpython-36.pyc
        │   │   └── segment_tree.cpython-36.pyc
        │   ├── env_seeding.py
        │   ├── optimizers.py
        │   └── schedules.py
        ├── memories
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-36.pyc
        │   │   ├── memory.cpython-36.pyc
        │   │   ├── modified_replay.cpython-36.pyc
        │   │   ├── naive_prioritized_replay.cpython-36.pyc
        │   │   ├── prioritized_replay.cpython-36.pyc
        │   │   └── replay.cpython-36.pyc
        │   ├── memory.py
        │   └── replay.py
        ├── networks
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-36.pyc
        │   │   ├── layer.cpython-36.pyc
        │   │   └── network.cpython-36.pyc
        │   ├── layer.py
        │   └── network.py
        ├── optimizers
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-36.pyc
        │   │   ├── clipped_step.cpython-36.pyc
        │   │   ├── evolutionary.cpython-36.pyc
        │   │   ├── global_optimizer.cpython-36.pyc
        │   │   ├── meta_optimizer.cpython-36.pyc
        │   │   ├── multi_step.cpython-36.pyc
        │   │   ├── natural_gradient.cpython-36.pyc
        │   │   ├── optimized_step.cpython-36.pyc
        │   │   ├── optimizer.cpython-36.pyc
        │   │   ├── synchronization.cpython-36.pyc
        │   │   └── tf_optimizer.cpython-36.pyc
        │   ├── global_optimizer.py
        │   ├── lr_decay
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-36.pyc
        │   │   │   └── tf_schedules.cpython-36.pyc
        │   │   └── tf_schedules.py
        │   ├── meta_optimizer.py
        │   ├── optimizer.py
        │   └── tf_optimizer.py
        └── preprocessing
        │   ├── __init__.py
        │   ├── __pycache__
        │       ├── __init__.cpython-36.pyc
        │       ├── clip.cpython-36.pyc
        │       ├── divide.cpython-36.pyc
        │       ├── grayscale.cpython-36.pyc
        │       ├── image_resize.cpython-36.pyc
        │       ├── normalize.cpython-36.pyc
        │       ├── preprocessor.cpython-36.pyc
        │       ├── preprocessor_stack.cpython-36.pyc
        │       ├── running_standardize.cpython-36.pyc
        │       └── standardize.cpython-36.pyc
        │   ├── preprocessor.py
        │   ├── preprocessor_stack.py
        │   └── standardize.py
    ├── environments
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-36.pyc
        │   ├── environment.cpython-36.pyc
        │   ├── gym_environment.cpython-36.pyc
        │   ├── meta_environment.cpython-36.pyc
        │   ├── minimal_test.cpython-36.pyc
        │   ├── oss_environment.cpython-36.pyc
        │   └── table_environment.cpython-36.pyc
        ├── classic_control
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-36.pyc
        │   │   ├── cart_pole.cpython-36.pyc
        │   │   └── pendulum.cpython-36.pyc
        │   ├── cart_pole.py
        │   └── pendulum.py
        ├── environment.py
        └── meta_environment.py
    ├── exception.py
    ├── meta_parameter_recorder.py
    ├── models
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-36.pyc
        │   ├── constant_model.cpython-36.pyc
        │   ├── ddpg_model.cpython-36.pyc
        │   ├── deterministic_es_model.cpython-36.pyc
        │   ├── distribution_model.cpython-36.pyc
        │   ├── es_model.cpython-36.pyc
        │   ├── model.cpython-36.pyc
        │   ├── pg_log_prob_model.cpython-36.pyc
        │   ├── pg_model.cpython-36.pyc
        │   ├── pg_prob_ratio_model.cpython-36.pyc
        │   ├── q_demo_model.cpython-36.pyc
        │   ├── q_model.cpython-36.pyc
        │   ├── q_naf_model.cpython-36.pyc
        │   ├── q_nstep_model.cpython-36.pyc
        │   └── random_model.cpython-36.pyc
        ├── deterministic_es_model.py
        └── model.py
    └── util.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Value-aware Recommendation based on Reinforced Profit Maximization in E-commerce System
 2 | 
 3 | ## Code for paper. This version is for single machine, distributed version will be published later.
 4 | 
 5 | ```bash
 6 | # execute in the root folder
 7 | python examples/rec_es/rec_run_es_local.py -i examples/rec_es/rec_config_local.json
 8 | ```
 9 | 
10 | ## Dataset for paper can be downloaded at:
11 | 
12 | https://drive.google.com/file/d/14OtIC8eiDkzoWCTtaUZHcb7eB-bUmtTT/view?usp=sharing
13 | 


--------------------------------------------------------------------------------
/data/readme:
--------------------------------------------------------------------------------
 1 | ## source file
 2 | The file can be downloaded at: https://drive.google.com/file/d/14OtIC8eiDkzoWCTtaUZHcb7eB-bUmtTT/view?usp=sharing
 3 | 
 4 | ## background (Please refer to the paper for more details. https://arxiv.org/pdf/1902.00851.pdf)
 5 | The following is one short paragraph from the paper which can help to understand the format of the datafile. 
 6 | -----
 7 | In our recommendation platform, items are shown in cascade
 8 | on a mobile App one by one. Each time the user initiates a request,
 9 | 50 items are recommended to him/her. As user scrolls down the
10 | list and have seen all 50 items, a new request is triggered. is
11 | process is repeated until the user leaves the App or return the top
12 | of the cascade, labelled as ”exit” in Figure 2. We use a metric called
13 | ”pageid” to distinguish diferent requests in this interaction, similar
14 | to the concept of ”page” to a search engine. As the user and the
15 | system interact with each other, the system learns how to respond
16 | to the state to obtain an optimized accumulative reward.
17 | -----
18 | 
19 | ## dataformat
20 | Each line contains 15 columns. The meaning of each column is as fololows:
21 | 
22 | column 1: The id of returned page for the current request, which ranges from 0 to 11.  Note that for each page, we return 50 items to the user.
23 | column 2: The hour when the request is launched by the user.
24 | column 3-5: The features used to profile the user which includes age-level, gender and the level of purchase power.
25 | column 6-14: The item-sepcific features/labels. We concat the values of 50 returned items belonging to a request toghether to form a list and separat them by comma.  
26 | More specifically, 
27 | column 6: The concatenated list of **position** for each item in the returned list, which ranges from 0 to 600 (12pages * 50items/page).
28 | column 7-9: The concatenated list of **predicted ctr/cvr/price** for each item in the returned list.
29 | column 10-12: The concatednated list of **isclcik/iscart/isfav** for each item in the returned list to indicate whether the item is cliked/added to cart/added to whishlist by the user.
30 | column 13: The concatednated list of **purchase amount** for each item in the returned list. For example, 0.0 means that the user does not purchase this item. 12.0 means that the user spends 12 Yuan on this item.
31 | column 14: The concatednated list of **an optinal powerful feature** of the item which can be used as one dimension of the "state" vector in RL.
32 | 
33 | column 15: To indicate whether the current page is the last page browsed by the user. 
34 | 
35 | 
36 | So column 1-9,14 can be used to generate **state** in the paper. Column 10-13 can be used to calclulate the **reward** in the paper. Column 15 represents the **terminal** indicator of RL in the paper.
37 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/examples/__init__.py


--------------------------------------------------------------------------------
/examples/rec_es/__pycache__/gul_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/examples/rec_es/__pycache__/gul_env.cpython-36.pyc


--------------------------------------------------------------------------------
/examples/rec_es/__pycache__/gul_input_fn.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/examples/rec_es/__pycache__/gul_input_fn.cpython-36.pyc


--------------------------------------------------------------------------------
/examples/rec_es/__pycache__/gul_input_fn_local.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/examples/rec_es/__pycache__/gul_input_fn_local.cpython-36.pyc


--------------------------------------------------------------------------------
/examples/rec_es/__pycache__/rec_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/examples/rec_es/__pycache__/rec_env.cpython-36.pyc


--------------------------------------------------------------------------------
/examples/rec_es/__pycache__/rec_input_fn_local.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/examples/rec_es/__pycache__/rec_input_fn_local.cpython-36.pyc


--------------------------------------------------------------------------------
/examples/rec_es/rec_config_local.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "agent": {
 3 |         "type": "deterministic_es_agent",
 4 |         "batch_size": 100,
 5 |         "optimizer": {
 6 |             "type": "adam",
 7 |             "learning_rate": 0.01,
 8 |             "lr_schedule":
 9 |             {
10 |                 "type": "linear_decay",
11 |                 "max_decay_steps": 10000,
12 |                 "final_value": 0.01
13 |             }
14 |         },
15 |         "max_episode_timesteps": 20,
16 |         "l2_coeff": 0.005,
17 |         "eval_prob": 0.5,
18 |         "noise_stddev": 0.02,
19 |         "train_iters": 20
20 |     },
21 |     "network_spec": [
22 |         {
23 |             "type": "linear",
24 |             "size": 3,
25 |             "bias": false,
26 |             "weights": [[1,1,1], [1,1,1], [1,0.83,0.83], [1,0.67,0.67], [1,0.5,0.5], [1,0.33,0.33], [1,0.17,0.17], [1,0.0,0.0]]
27 |         }
28 |     ],
29 |     "env": {
30 |         "shuffle": false,
31 |         "capacity": 10000,
32 |         "interactive": true,
33 |         "discount_base": 0.8,
34 |         "max_pageid": 7,
35 |         "pv_item_num": 50,
36 |         "local_mode": true,
37 |         "batch_size": 100,
38 |         "tables": ["examples/rec_es/rec_rl_data_small"]
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/examples/rec_es/rec_env.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | from __future__ import division
  4 | 
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | from tensorforce.environments.meta_environment import MetaEnvironment
  8 | import tensorforce.util as utl
  9 | from tensorforce.exception import TensorforceError
 10 | from rec_input_fn_local import input_fn as input_fn_local
 11 | 
 12 | def _invert_permutation(tensor):
 13 |     '''wrapper for matrix'''
 14 |     return tf.cast(tf.map_fn(tf.invert_permutation, tensor), tf.float32)
 15 | 
 16 | def _gather(param, indices):
 17 |     '''wrapper for matrix'''
 18 |     return tf.map_fn(lambda x : tf.gather(x[0], x[1]), (param, indices), dtype=param.dtype)
 19 | 
 20 | class RecTableEnv(MetaEnvironment):
 21 |     '''
 22 |     ODPS Table env for gul ranking scenario.
 23 |     '''
 24 |     def __init__(self, config):
 25 |         config['env_type'] = 'odps_table'
 26 |         super(RecTableEnv, self).__init__(config)
 27 | 
 28 |         # parse more config
 29 |         self.parse_env_config()
 30 | 
 31 |         self._version = '0.1'
 32 | 
 33 |         self.sess = None
 34 |     def __str__(self):
 35 |         return 'RecTableEnv({})'.format(self._version)
 36 | 
 37 |     def parse_env_config(self):
 38 |         """
 39 |         Obtain table name,schema and partition
 40 |         """
 41 |         print('env config:', self.env_conf)
 42 | 
 43 |         # get worker_num and worker_id
 44 |         self.worker_num = self.env_conf.get('worker_num', 1)
 45 |         self.worker_id = self.env_conf.get('worker_id', 0)
 46 | 
 47 |         # get table name
 48 |         if 'tables' not in self.env_conf:
 49 |             raise TensorforceError("Can't find tables in configuration")
 50 |         self.tables = self.env_conf['tables']
 51 | 
 52 |         self.epoch = self.env_conf.get('epoch', None)
 53 |         self.batch_size = self.env_conf.get('batch_size', 100)
 54 |         self.capacity = self.env_conf.get('capacity', 4 * self.batch_size)
 55 |         self.max_pageid = self.env_conf.get('max_pageid', 7)
 56 |         self.discount_base = self.env_conf.get('discount_base', 0.8)
 57 |         self.local_mode = self.env_conf.get('local_mode', False)
 58 |         self.alipay_coef = self.env_conf.get('alipay_coef', 1.0)
 59 |         self.reward_shaping_method = self.env_conf.get('reward_shaping_method', None)
 60 |         self.alipay_threshold = self.env_conf.get('alipay_threshold', 0.0)
 61 |         self.alipay_penalty = self.env_conf.get('alipay_penalty', 0.0)
 62 |         '''
 63 |         ranking_formula_type 0: ctr * cvr^a * price^b
 64 |         ranking_formula_type 1: (ctr * cvr^a * price^b) * matchtype_weight
 65 |         ranking_formula_type 2: (a * ctr + ctr * cvr^b * price^c) * matchtype_weight
 66 |         ranking_formula_type 3: (a * ctr + b * cvr + ctr * cvr^c * price^d) * matchtype_weight
 67 |         ranking_formula_type 4: (a * ctr + b * ctr * cvr + ctr * cvr^c * price^d) * matchtype_weight
 68 |         '''
 69 |         self.ranking_formula_type = self.env_conf.get('ranking_formula_type', 0)
 70 |         self.feature_include_hour_power = self.env_conf.get('feature_include_hour_power', False)
 71 |         self.feature_include_age_gender = self.env_conf.get('feature_include_age_gender', False)
 72 | 
 73 |         self.states_spec = {}
 74 |         feature_dim = self.max_pageid + 1
 75 |         if self.feature_include_hour_power:
 76 |             feature_dim += 32
 77 |         if self.feature_include_age_gender:
 78 |             feature_dim += 12
 79 | 
 80 |         self.states_spec['state'] = {
 81 |             'type': 'float',
 82 |             'shape': (feature_dim,)
 83 |         }
 84 |         self.actions_spec = {}
 85 |         if self.ranking_formula_type == 0:
 86 |             action_shape = 2
 87 |         elif self.ranking_formula_type == 1:
 88 |             action_shape = 6
 89 |         elif self.ranking_formula_type == 2:
 90 |             action_shape = 7
 91 |         elif self.ranking_formula_type in (3, 4):
 92 |             action_shape = 8
 93 |         else:
 94 |             raise TensorforceError("Invalid ranking formula type " + str(self.ranking_formula_type))
 95 | 
 96 |         self.actions_spec['action'] = {
 97 |             'type': 'float',
 98 |             'shape': (action_shape,),
 99 |             'min_value': -1.0,
100 |             'max_value': 2.0
101 |         }
102 | 
103 |         print('states:', self.states)
104 |         print('actions:', self.actions)
105 | 
106 |     def set_up(self):
107 |         if self.local_mode:
108 |             print('load data in local mode')
109 |             self.batch_data = input_fn_local(
110 |                 name='table_env',
111 |                 tables=self.tables,
112 |                 num_epochs=self.epoch,
113 |                 num_workers=self.worker_num,
114 |                 worker_id=self.worker_id,
115 |                 batch_size=self.batch_size
116 |             )
117 |             self.device = ("/job:localhost/replica:0/task:%d" % self.worker_id) if self.worker_id != -1 else 0
118 |         else:
119 |             self.batch_data = input_fn_local(
120 |                 name='table_env',
121 |                 tables=self.tables,
122 |                 num_epochs=self.epoch,
123 |                 num_workers=self.worker_num,
124 |                 worker_id=self.worker_id,
125 |                 batch_size=self.batch_size,
126 |                 capacity=self.capacity
127 |             )
128 |             self.device = ("/job:worker/task:%d" % self.worker_id) if self.worker_id != -1 else 0
129 |         with tf.variable_scope(name_or_scope='table_env') as scope:
130 |             with tf.device(device_name_or_function = self.device):
131 |                 self.build_graph()
132 | 
133 |     def get_input_tensor(self):
134 |         """
135 |         Get the input tensor for agent
136 |         """
137 |         data = {}
138 |         data['states'] = {}
139 |         data['states']['states'] = self.states_tensor
140 | 
141 |         return data
142 | 
143 |     def set_session(self, session):
144 |         self.sess = session
145 | 
146 |     def update(self):
147 |         if self.sess is None:
148 |             raise TensorforceError("self.session is None")
149 | 
150 |         self.sess.run([self.batch_data, self.assign_cache_ops])
151 | 
152 |     def reset(self):
153 |         self.update()
154 | 
155 |         return self.states_tensor
156 | 
157 |     def build_graph(self):
158 |         self.cache_data = {}
159 |         self.cache_data['pageid'] = tf.Variable(tf.zeros(self.batch_size, dtype=tf.int32),
160 |                                                 trainable=False,
161 |                                                 name='pageid_var')
162 |         if self.feature_include_hour_power:
163 |             self.cache_data['hour'] = tf.Variable(tf.zeros(self.batch_size, dtype=tf.int32),
164 |                                                     trainable=False,
165 |                                                     name='hour_var')
166 |             self.cache_data['power'] = tf.Variable(tf.zeros(self.batch_size, dtype=tf.int32),
167 |                                                     trainable=False,
168 |                                                     name='power_var')
169 |             hour = self.cache_data['hour']
170 |             power = self.cache_data['power']
171 |         if self.feature_include_age_gender:
172 |             self.cache_data['age'] = tf.Variable(tf.zeros(self.batch_size, dtype=tf.int32),
173 |                                                     trainable=False,
174 |                                                     name='age_var')
175 |             self.cache_data['gender'] = tf.Variable(tf.zeros(self.batch_size, dtype=tf.int32),
176 |                                                     trainable=False,
177 |                                                     name='gender_var')
178 |             age = self.cache_data['age']
179 |             gender = self.cache_data['gender']
180 | 
181 |         self.cache_data['ctr'] = tf.Variable(tf.zeros([self.batch_size, 50], dtype=tf.float32),
182 |                                              trainable=False,
183 |                                              name='ctr_var')
184 |         self.cache_data['cvr'] = tf.Variable(tf.zeros([self.batch_size, 50], dtype=tf.float32),
185 |                                              trainable=False,
186 |                                              name='cvr_var')
187 |         self.cache_data['price'] = tf.Variable(tf.zeros([self.batch_size, 50], dtype=tf.float32),
188 |                                                trainable=False,
189 |                                                name='price_var')
190 |         self.cache_data['click'] = tf.Variable(tf.zeros([self.batch_size, 50], dtype=tf.float32),
191 |                                                trainable=False,
192 |                                                name='click_var')
193 |         self.cache_data['pay'] = tf.Variable(tf.zeros([self.batch_size, 50], dtype=tf.float32),
194 |                                              trainable=False,
195 |                                              name='pay_var')
196 |         if self.ranking_formula_type in (1, 2, 3, 4):
197 |             self.cache_data['matchtype'] = tf.Variable(tf.zeros([self.batch_size, 50], dtype=tf.int32),
198 |                                                  trainable=False,
199 |                                                  name='matchtype_var')
200 |             matchtype = self.cache_data['matchtype']
201 | 
202 |         self.assign_cache_ops = {}
203 |         for tensor_name in self.cache_data.keys():
204 |             self.assign_cache_ops[tensor_name] = tf.assign(self.cache_data[tensor_name], self.batch_data[tensor_name], name=tensor_name + 'assign_cache')
205 | 
206 |         ctr = self.cache_data['ctr']
207 |         cvr = self.cache_data['cvr']
208 |         price = self.cache_data['price']
209 |         click = self.cache_data['click']
210 |         pay = self.cache_data['pay']
211 | 
212 |         self.actions_input = tf.placeholder(tf.float32, shape=None, name='env_action')
213 | 
214 |         offset = 0
215 |         if self.ranking_formula_type in (2, 3, 4):
216 |             ctr_weight = tf.reshape(self.actions_input[:,0], (-1,1))
217 |             offset += 1
218 |         if self.ranking_formula_type in (3, 4):
219 |             cvr_weight = tf.reshape(self.actions_input[:,offset], (-1,1))
220 |             offset += 1
221 |         cvr_power = tf.reshape(self.actions_input[:,offset], (-1,1))
222 |         price_power = tf.reshape(self.actions_input[:,1 + offset], (-1,1))
223 | 
224 |         rank_score = ctr * tf.pow(cvr, cvr_power) * tf.pow(price, price_power)
225 |         if self.ranking_formula_type == 2:
226 |             rank_score = rank_score + ctr * ctr_weight
227 |         elif self.ranking_formula_type == 3:
228 |             rank_score = rank_score + ctr * ctr_weight + cvr * cvr_weight
229 |         elif self.ranking_formula_type == 4:
230 |             rank_score = rank_score + ctr * ctr_weight + ctr * cvr * cvr_weight
231 | 
232 |         if self.ranking_formula_type in (1, 2, 3, 4):
233 |             matchtype_params = self.actions_input[:, 2 + offset : 6 + offset]
234 |             i2i_param = tf.ones([self.batch_size, 1], tf.float32)
235 |             full_matchtype_params = tf.concat([i2i_param, matchtype_params], axis=1)
236 |             matchtype_weights = _gather(full_matchtype_params, matchtype)
237 |             rank_score = rank_score * matchtype_weights
238 | 
239 |         sorted_rank_score, sorted_index = tf.nn.top_k(rank_score, k=50, sorted=True)
240 |         # tf.invert_permutation only support 1-D vector, wrap it for matrix
241 |         perm_index = _invert_permutation(sorted_index)
242 |         pos_discount = tf.pow(self.discount_base, perm_index)
243 | 
244 |         discounted_click = click * pos_discount
245 |         discounted_pay = pay * pos_discount
246 | 
247 |         self.pv_discount_click = tf.reduce_sum(discounted_click, 1)
248 |         self.pv_discount_click_mean = tf.reduce_mean(self.pv_discount_click, 0)
249 |         self.pv_discount_pay = tf.reduce_sum(discounted_pay, 1)
250 |         self.pv_discount_pay_mean = tf.reduce_mean(self.pv_discount_pay, 0)
251 | 
252 |         pageid = tf.clip_by_value(self.cache_data['pageid'], 0, self.max_pageid)
253 |         self.pageid_onehot = tf.one_hot(pageid, depth=self.max_pageid + 1, dtype=tf.float32)
254 |         feature_list = [self.pageid_onehot]
255 |         if self.feature_include_hour_power:
256 |             self.hour_onehot = tf.one_hot(hour, depth=24, dtype=tf.float32)
257 |             self.power_onehot = tf.one_hot(power, depth=8, dtype=tf.float32)
258 |             feature_list.append(self.hour_onehot)
259 |             feature_list.append(self.power_onehot)
260 |         if self.feature_include_age_gender:
261 |             self.age_onehot = tf.one_hot(age, depth=9, dtype=tf.float32)
262 |             self.gender_onehot = tf.one_hot(gender, depth=3, dtype=tf.float32)
263 |             feature_list.append(self.age_onehot)
264 |             feature_list.append(self.gender_onehot)
265 | 
266 |         if len(feature_list) == 1:
267 |             self.states_tensor = self.pageid_onehot
268 |         else:
269 |             self.states_tensor = tf.concat(feature_list, 1)
270 | 
271 |         print('build graph done')
272 | 
273 |     def execute(self, actions):
274 |         """
275 |         Interact with the environment
276 |         if set interactive to True, env.execute will apply an action to the environment and
277 |         get an observation after the action
278 | 
279 |         actions are batch_size * 3 tensor
280 | 
281 |         return (next_state, step_reward, terminal)
282 |         """
283 |         step_click, step_pay = self.sess.run([self.pv_discount_click_mean, self.pv_discount_pay_mean], feed_dict={self.actions_input: actions})
284 | 
285 |         return (None, True, self.get_reward(step_click, step_pay))
286 | 
287 |     def get_reward(self, click, pay):
288 |         if self.reward_shaping_method is None:
289 |             return click + pay
290 |         elif self.reward_shaping_method == 'weighting':
291 |             return click + self.alipay_coef * pay
292 |         elif self.reward_shaping_method == 'penalty':
293 |             if pay >= self.alipay_threshold:
294 |                 return click + pay
295 |             else:
296 |                 return click + pay - self.alipay_penalty * (self.alipay_threshold - pay)
297 | 
298 |     def close(self):
299 |         pass
300 | 
301 |     @property
302 |     def states(self):
303 |         return self.states_spec
304 | 
305 |     @property
306 |     def actions(self):
307 |         return self.actions_spec
308 | 
309 | if __name__ == '__main__':
310 |     import json
311 |     with open('rec_config_local.json', 'r') as fp:
312 |         config = json.load(fp=fp)
313 |     print('config:', config)
314 |     action_val = tf.constant(np.array([[1,1,1], [1,1,1], [1,0.83,0.83], [1,0.67,0.67], [1,0.5,0.5], [1,0.33,0.33], [1,0.17,0.17], [1,0.0,0.0]], dtype=np.float32))
315 |     env = RecTableEnv(config)
316 |     sess_config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)
317 |     sess_config.gpu_options.allow_growth = True
318 |     sess = tf.Session(config=sess_config)
319 |     env.set_session(sess)
320 |     env.set_up()
321 |     cur_action = tf.matmul(env.pageid_onehot, action_val)
322 |     sess.run(tf.global_variables_initializer())
323 |     sess.run(tf.local_variables_initializer())
324 |     coord = tf.train.Coordinator()
325 |     threads = tf.train.start_queue_runners(coord=coord, sess=sess)
326 |     try:
327 |         for i in range(4):
328 |             print('pageid_onehot:', sess.run([env.reset()]))
329 |             print('pageid cached:', sess.run(env.cache_data['pageid']))
330 |             print('pageid cached again:', sess.run(env.cache_data['pageid']))
331 |             cur_action_val = sess.run(cur_action)
332 |             print('cur_action:', cur_action_val)
333 |             #print('cur_action again:', sess.run(cur_action))
334 |             next_state, terminal, reward = env.execute(cur_action_val)
335 |             print('reward:', reward)
336 | 
337 |     except tf.errors.OutOfRangeError:
338 |         print('data is out of range')
339 |     finally:
340 |         coord.request_stop()
341 |         coord.join(threads)
342 |     sess.close()
343 | 


--------------------------------------------------------------------------------
/examples/rec_es/rec_input_fn_local.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import datetime
 4 | import time
 5 | 
 6 | def _parse_dense_features(s, dshape, dtype=tf.float32, delimiter=','):
 7 |     record_defaults = [[0.0]] * dshape[1]
 8 |     value = tf.decode_csv(s, record_defaults=record_defaults, field_delim=delimiter)
 9 |     value = tf.stack(value, axis=1)
10 |     value = tf.cast(value, dtype)
11 |     return tf.reshape(value, dshape)
12 | 
13 | def _invert_permutation(input, row_count):
14 |     '''wrapper for matrix'''
15 |     rows = []
16 |     for i in range(row_count):
17 |         row = input[i,:]
18 |         rows.append(tf.invert_permutation(row))
19 |     return tf.cast(tf.stack(rows, axis=0), tf.float32)
20 | 
21 | def input_fn(name="input", tables="", num_epochs=None, num_workers=1, worker_id=0, capacity=0, batch_size=64):
22 |     with tf.variable_scope(name_or_scope=name, reuse=False) as scope:
23 |         with tf.device(device_name_or_function = ("/job:localhost/replica:0/task:%d"%worker_id) if worker_id != -1 else None):
24 |             filename_queue = tf.train.string_input_producer(tables, num_epochs=num_epochs)
25 |             reader = tf.TextLineReader()
26 |             keys, values = reader.read_up_to(filename_queue, batch_size)
27 |             batch_keys, batch_values = tf.train.batch(
28 |                 [keys, values],
29 |                 batch_size=batch_size,
30 |                 capacity=10 * batch_size,
31 |                 enqueue_many=True,
32 |                 num_threads=1)
33 |             record_defaults = [['']] * 4 + [[-1]] + [['']] * 9
34 |             data = tf.decode_csv(batch_values, record_defaults=record_defaults, field_delim=';')
35 | 
36 |             pageid = data[4]
37 |             ctr = data[7]
38 |             cvr = data[8]
39 |             price = data[9]
40 |             isclick = data[10]
41 |             pay = data[11]
42 | 
43 |             ctr = _parse_dense_features(ctr, (-1, 50))
44 |             cvr = _parse_dense_features(cvr, (-1, 50))
45 |             price = _parse_dense_features(price, (-1, 50))
46 |             isclick = _parse_dense_features(isclick, (-1, 50))
47 |             pay = _parse_dense_features(pay, (-1, 50))
48 | 
49 |             batch_data = {'keys': batch_keys,
50 |                         'pageid': pageid,
51 |                         'ctr': ctr,
52 |                         'cvr': cvr,
53 |                         'price': price,
54 |                         'click': isclick,
55 |                         'pay': pay}
56 |     return batch_data


--------------------------------------------------------------------------------
/examples/rec_es/rec_run_es_local.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import argparse
 6 | import inspect
 7 | import json
 8 | import logging
 9 | import os
10 | import sys
11 | import time
12 | 
13 | import tensorflow as tf
14 | from six.moves import xrange, shlex_quote
15 | 
16 | path = os.path.abspath('.')
17 | sys.path.append(path)
18 | 
19 | from tensorforce import TensorforceError
20 | from tensorforce.agents import Agent
21 | from rec_env import RecTableEnv
22 | 
23 | """
24 | # example command
25 | python examples/rec_es/rec_run_es_local.py -i examples/rec_es/rec_config_local.json
26 | """
27 | 
28 | def main():
29 |     parser = argparse.ArgumentParser()
30 | 
31 |     parser.add_argument('-i', '--config', help="Configuration file")
32 | 
33 |     args = parser.parse_args()
34 |     print(args)
35 |     sys.stdout.flush()
36 | 
37 |     if args.config is not None:
38 |         with open(args.config, 'r') as fp:
39 |             config = json.load(fp=fp)
40 |     else:
41 |         raise TensorforceError("No configuration provided.")
42 | 
43 |     if 'agent' not in config:
44 |         raise TensorforceError("No agent configuration provided.")
45 |     else:
46 |         agent_config = config['agent']
47 | 
48 |     if 'network_spec' not in config:
49 |         network_spec = None
50 |         print("No network configuration provided.")
51 |     else:
52 |         network_spec = config['network_spec']
53 | 
54 |     if 'env' not in config:
55 |         raise TensorforceError("No environment configuration provided.")
56 |     else:
57 |         env_config = config['env']
58 | 
59 |     environment = RecTableEnv(config)
60 |     environment.set_up()
61 | 
62 |     agent_config['env'] = environment
63 | 
64 |     agent = Agent.from_spec(
65 |         spec=agent_config,
66 |         kwargs=dict(
67 |             states_spec=environment.states,
68 |             actions_spec=environment.actions,
69 |             network_spec=network_spec,
70 |             batch_data=environment.get_input_tensor()
71 |         )
72 |     )
73 | 
74 |     environment.set_session(agent.model.get_session())
75 | 
76 |     print("********** Configuration ************")
77 |     for key, value in agent_config.items():
78 |         print(str(key) + ": {}".format(value))
79 | 
80 |     agent.run_worker()
81 |     agent.close()
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     logging.info("start...")
86 |     print('start')
87 |     main()
88 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import print_function
18 | from __future__ import division
19 | 
20 | import os
21 | 
22 | from setuptools import setup, find_packages
23 | 
24 | install_requires=[
25 |     'numpy',
26 |     'six',
27 |     'scipy',
28 |     'pillow',
29 |     'pytest'
30 | ]
31 | 
32 | setup_requires=[
33 |     'numpy',
34 |     'recommonmark'
35 | ]
36 | 
37 | extras_require = {
38 |     'tf': ['tensorflow>=1.3.0'],
39 |     'tf_gpu': ['tensorflow-gpu>=1.3.0'],
40 |     'gym': ['gym>=0.7.4'],
41 |     'universe': ['universe>=0.21.3'],
42 |     'mazeexp': ['mazeexp>=0.0.1']
43 | }
44 | 
45 | # Readthedocs requires Sphinx extensions to be specified as part of
46 | # install_requires in order to build properly.
47 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
48 | if on_rtd:
49 |     install_requires.extend(setup_requires)
50 | 
51 | 
52 | setup(name='tensorforce',
53 |       version='0.3.2',
54 |       description='Reinforcement learning for TensorFlow',
55 |       url='',
56 |       author='',
57 |       author_email='',
58 |       license='Apache 2.0',
59 |       packages=[package for package in find_packages() if package.startswith('tensorforce')],
60 |       install_requires=install_requires,
61 |       setup_requires=setup_requires,
62 |       extras_require=extras_require,
63 |       zip_safe=False)
64 | 


--------------------------------------------------------------------------------
/tensorforce/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | 
17 | from tensorforce.exception import TensorforceError
18 | 
19 | 
20 | __version__ = '0.3.2'
21 | 
22 | 
23 | # Libraries should add NullHandler() by default, as its the application code's
24 | # responsibility to configure log handlers.
25 | # https://docs.python.org/3/howto/logging.html#configuring-logging-for-a-library
26 | 
27 | import logging
28 | 
29 | try:
30 |     from logging import NullHandler
31 | except ImportError:
32 |     class NullHandler(logging.Handler):
33 |         def emit(self, record):
34 |             pass
35 | 
36 | logging.getLogger(__name__).addHandler(NullHandler())
37 | 
38 | __all__ = ['TensorforceError']
39 | 


--------------------------------------------------------------------------------
/tensorforce/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/__pycache__/exception.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/__pycache__/exception.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/__pycache__/meta_parameter_recorder.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/__pycache__/meta_parameter_recorder.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/__pycache__/util.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/__pycache__/util.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from tensorforce.agents.agent import Agent
17 | from tensorforce.agents.deterministic_es_agent import DeterministicESAgent
18 | 
19 | agents = dict(
20 |     deterministic_es_agent=DeterministicESAgent
21 | )
22 | 
23 | __all__ = [
24 |     'Agent',
25 |     'DeterministicESAgent',
26 |     'agents'
27 | ]
28 | 


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/a3c_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/a3c_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/batch_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/batch_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/constant_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/constant_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/ddpg_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/ddpg_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/deterministic_es_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/deterministic_es_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/dqfd_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/dqfd_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/dqn_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/dqn_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/dqn_nstep_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/dqn_nstep_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/es_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/es_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/memory_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/memory_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/naf_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/naf_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/ppo_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/ppo_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/random_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/random_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/trpo_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/trpo_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/__pycache__/vpg_agent.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/vpg_agent.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/agents/agent.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 reinforce.io. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import print_function
 18 | from __future__ import division
 19 | 
 20 | from copy import deepcopy
 21 | 
 22 | import numpy as np
 23 | import inspect
 24 | 
 25 | from tensorforce import util, TensorforceError
 26 | import tensorforce.agents
 27 | from tensorforce.meta_parameter_recorder import MetaParameterRecorder
 28 | 
 29 | 
 30 | class Agent(object):
 31 |     """
 32 |     Basic Reinforcement learning agent. An agent encapsulates execution logic
 33 |     of a particular reinforcement learning algorithm and defines the external interface
 34 |     to the environment.
 35 | 
 36 |     The agent hence acts as intermediate layer between environment
 37 |     and backend execution (value function or policy updates).
 38 | 
 39 |     """
 40 | 
 41 |     def __init__(
 42 |         self,
 43 |         states_spec,
 44 |         actions_spec,
 45 |         batched_observe
 46 |     ):
 47 |         """
 48 |         Initializes the reinforcement learning agent.
 49 | 
 50 |         Args:
 51 |             states_spec: Dict containing at least one state definition. In the case of a single state,
 52 |                keys `shape` and `type` are necessary. For multiple states, pass a dict of dicts where each state
 53 |                is a dict itself with a unique name as its key.
 54 |             actions_spec: Dict containing at least one action definition. Actions have types and either `num_actions`
 55 |                 for discrete actions or a `shape` for continuous actions. Consult documentation and tests for more.
 56 |             batched_observe: Optional int specifying how many observe calls are batched into one session run.
 57 |                 Without batching, throughput will be lower because every `observe` triggers a session invocation to
 58 |                 update rewards in the graph.
 59 |         """
 60 | 
 61 |         self.unique_state = ('shape' in states_spec)
 62 |         if self.unique_state:
 63 |             states_spec = dict(state=states_spec)
 64 | 
 65 |         self.states_spec = deepcopy(states_spec)
 66 |         for name, state in self.states_spec.items():
 67 |             # Convert int to unary tuple
 68 |             if isinstance(state['shape'], int):
 69 |                 state['shape'] = (state['shape'],)
 70 | 
 71 |             # Set default type to float
 72 |             if 'type' not in state:
 73 |                 state['type'] = 'float'
 74 | 
 75 |         # Actions config and exploration
 76 |         self.exploration = dict()
 77 |         self.unique_action = ('type' in actions_spec)
 78 |         if self.unique_action:
 79 |             actions_spec = dict(action=actions_spec)
 80 |         self.actions_spec = deepcopy(actions_spec)
 81 | 
 82 |         for name, action in self.actions_spec.items():
 83 |             # Check required values
 84 |             if action['type'] == 'int':
 85 |                 if 'num_actions' not in action:
 86 |                     raise TensorforceError("Action requires value 'num_actions' set!")
 87 |             elif action['type'] == 'float':
 88 |                 if ('min_value' in action) != ('max_value' in action):
 89 |                     raise TensorforceError("Action requires both values 'min_value' and 'max_value' set!")
 90 | 
 91 |             # Set default shape to empty tuple
 92 |             if 'shape' not in action:
 93 |                 action['shape'] = ()
 94 | 
 95 |             # Convert int to unary tuple
 96 |             if isinstance(action['shape'], int):
 97 |                 action['shape'] = (action['shape'],)
 98 | 
 99 |         # TensorFlow summaries & Configuration Meta Parameter Recorder options
100 |         if self.summary_spec is None:
101 |             self.summary_labels = set()
102 |         else:
103 |             self.summary_labels = set(self.summary_spec.get('labels', ()))
104 | 
105 |         self.meta_param_recorder = None
106 | 
107 |         #if 'configuration' in self.summary_labels or 'print_configuration' in self.summary_labels:
108 |         if any(k in self.summary_labels for k in ['configuration','print_configuration']):
109 |             self.meta_param_recorder = MetaParameterRecorder(inspect.currentframe())
110 |             if 'meta_dict' in self.summary_spec:
111 |                 # Custom Meta Dictionary passed
112 |                 self.meta_param_recorder.merge_custom(self.summary_spec['meta_dict'])
113 |             if 'configuration' in self.summary_labels:
114 |                 # Setup for TensorBoard population
115 |                 self.summary_spec['meta_param_recorder_class'] = self.meta_param_recorder
116 |             if 'print_configuration' in self.summary_labels:
117 |                 # Print to STDOUT (TADO: optimize output)
118 |                 self.meta_param_recorder.text_output(format_type=1)
119 | 
120 |         # Init Model, this must follow the Summary Configuration section above to cary meta_param_recorder
121 |         self.model = self.initialize_model()
122 | 
123 |         # Batched observe for better performance with Python.
124 |         self.batched_observe = batched_observe
125 |         if self.batched_observe is not None:
126 |             self.observe_terminal = list()
127 |             self.observe_reward = list()
128 | 
129 |         self.reset()
130 | 
131 |     def __str__(self):
132 |         return str(self.__class__.__name__)
133 | 
134 |     def sync(self, sync_value):
135 |         self.model.sync(sync_value)
136 | 
137 |     def close(self):
138 |         self.model.close()
139 | 
140 |     def initialize_model(self):
141 |         """
142 |         Creates the model for the respective agent based on specifications given by user. This is a separate
143 |         call after constructing the agent because the agent constructor has to perform a number of checks
144 |         on the specs first, sometimes adjusting them e.g. by converting to a dict.
145 |         """
146 |         raise NotImplementedError
147 | 
148 |     def reset(self):
149 |         """
150 |         Reset the agent to its initial state on episode start. Updates internal episode and
151 |         timestep counter, internal states,  and resets preprocessors.
152 |         """
153 |         self.episode, self.timestep, self.next_internals = self.model.reset()
154 |         self.current_internals = self.next_internals
155 | 
156 |         #TODO have to call preprocessing reset in model
157 |         # for preprocessing in self.preprocessing.values():
158 |         #     preprocessing.reset()
159 | 
160 |     def act(self, states, deterministic=False):
161 |         """
162 |         Return action(s) for given state(s). States preprocessing and exploration are applied if
163 |         configured accordingly.
164 | 
165 |         Args:
166 |             states: One state (usually a value tuple) or dict of states if multiple states are expected.
167 |             deterministic: If true, no exploration and sampling is applied.
168 |         Returns:
169 |             Scalar value of the action or dict of multiple actions the agent wants to execute.
170 | 
171 |         """
172 |         self.current_internals = self.next_internals
173 | 
174 |         if self.unique_state:
175 |             self.current_states = dict(state=np.asarray(states))
176 |         else:
177 |             self.current_states = {name: np.asarray(state) for name, state in states.items()}
178 | 
179 |         # Retrieve action
180 |         self.current_actions, self.next_internals, self.timestep = self.model.act(
181 |             states=self.current_states,
182 |             internals=self.current_internals,
183 |             deterministic=deterministic
184 |         )
185 | 
186 |         if self.unique_action:
187 |             return self.current_actions['action']
188 |         else:
189 |             return self.current_actions
190 | 
191 |     def observe_batch(self, current_states, current_internals, current_actions, current_terminal, current_reward, next_states, next_internals):
192 |         """
193 |         Observe one batch data at a time from the environment.
194 |         Usually used in non-interactive mode, and the data is prepared beforehand
195 |         """
196 |         raise NotImplementedError
197 | 
198 | 
199 |     def observe(self, next_states, terminal, reward):
200 |         """
201 |         Observe experience from the environment to learn from. Optionally preprocesses rewards
202 |         Child classes should call super to get the processed reward
203 |         EX: terminal, reward = super()...
204 | 
205 |         Args:
206 |             next_states: One state (usually a value tuple) or dict of states if multiple states are expected.
207 |             terminal: boolean indicating if the episode terminated after the observation.
208 |             reward: scalar reward that resulted from executing the action.
209 |         """
210 |         self.current_terminal = terminal
211 |         self.current_reward = reward
212 | 
213 |         if self.batched_observe is not None and self.batched_observe > 0:
214 |             # Batched observe for better performance with Python.
215 |             self.observe_terminal.append(self.current_terminal)
216 |             self.observe_reward.append(self.current_reward)
217 | 
218 |             if self.current_terminal or len(self.observe_terminal) >= self.batched_observe:
219 |                 self.episode = self.model.observe(
220 |                     terminal=self.observe_terminal,
221 |                     reward=self.observe_reward
222 |                 )
223 |                 self.observe_terminal = list()
224 |                 self.observe_reward = list()
225 | 
226 |         else:
227 |             self.episode = self.model.observe(
228 |                 terminal=self.current_terminal,
229 |                 reward=self.current_reward
230 |             )
231 | 
232 | 
233 |     def should_stop(self):
234 |         return self.model.monitored_session.should_stop()
235 | 
236 |     def last_observation(self):
237 |         return dict(
238 |             states=self.current_states,
239 |             internals=self.current_internals,
240 |             actions=self.current_actions,
241 |             terminal=self.current_terminal,
242 |             reward=self.current_reward
243 |         )
244 | 
245 |     def save_model(self, directory=None, append_timestep=True):
246 |         """
247 |         Save TensorFlow model. If no checkpoint directory is given, the model's default saver
248 |         directory is used. Optionally appends current timestep to prevent overwriting previous
249 |         checkpoint files. Turn off to be able to load model from the same given path argument as
250 |         given here.
251 | 
252 |         Args:
253 |             directory: Optional checkpoint directory.
254 |             use_global_step:  Appends the current timestep to the checkpoint file if true.
255 |             If this is set to True, the load path must include the checkpoint timestep suffix.
256 |             For example, if stored to models/ and set to true, the exported file will be of the
257 |             form models/model.ckpt-X where X is the last timestep saved. The load path must
258 |             precisely match this file name. If this option is turned off, the checkpoint will
259 |             always overwrite the file specified in path and the model can always be loaded under
260 |             this path.
261 | 
262 |         Returns:
263 |             Checkpoint path were the model was saved.
264 |         """
265 |         return self.model.save(directory=directory, append_timestep=append_timestep)
266 | 
267 |     def restore_model(self, directory=None, file=None):
268 |         """
269 |         Restore TensorFlow model. If no checkpoint file is given, the latest checkpoint is
270 |         restored. If no checkpoint directory is given, the model's default saver directory is
271 |         used (unless file specifies the entire path).
272 | 
273 |         Args:
274 |             directory: Optional checkpoint directory.
275 |             file: Optional checkpoint file, or path if directory not given.
276 |         """
277 |         self.model.restore(directory=directory, file=file)
278 | 
279 |     @staticmethod
280 |     def from_spec(spec, kwargs):
281 |         """
282 |         Creates an agent from a specification dict.
283 |         """
284 |         agent = util.get_object(
285 |             obj=spec,
286 |             predefined_objects=tensorforce.agents.agents,
287 |             kwargs=kwargs
288 |         )
289 |         assert isinstance(agent, Agent)
290 |         return agent
291 | 


--------------------------------------------------------------------------------
/tensorforce/agents/deterministic_es_agent.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 |     desc: Evolution stratege agent.
  5 |     created: 2017.01.23
  6 |     @author: cuiqing.cq
  7 | """
  8 | 
  9 | from __future__ import absolute_import
 10 | from __future__ import print_function
 11 | from __future__ import division
 12 | 
 13 | 
 14 | import numpy as np
 15 | 
 16 | from tensorforce.agents import Agent
 17 | from tensorforce import util, TensorforceError
 18 | from tensorforce.models import DeterministicESModel
 19 | 
 20 | 
 21 | class DeterministicESAgent(Agent):
 22 |     """
 23 |     Evolution Strategy as a Scalable Alternative to Reinforcement Learning
 24 |     [Tim Salimans, Jonathan Ho, et al., 2017]
 25 |     (https://arxiv.org/abs/1703.03864).
 26 | 
 27 |     Use DeterministicESModel which does not have the distribution layer.
 28 |     """
 29 | 
 30 |     def __init__(
 31 |         self,
 32 |         env,
 33 |         states_spec,
 34 |         actions_spec,
 35 |         network_spec,
 36 |         device=None,
 37 |         session_config=None,
 38 |         scope='deterministic_es',
 39 |         saver_spec=None,
 40 |         summary_spec=None,
 41 |         distributed_spec=None,
 42 |         optimizer=None,
 43 |         states_preprocessing_spec=None,
 44 |         explorations_spec=None,
 45 |         reward_preprocessing_spec=None,
 46 |         distributions_spec=None,
 47 |         entropy_regularization=None,
 48 |         max_episode_timesteps=None,
 49 |         batch_size=1000,
 50 |         noise_stddev=0.02,
 51 |         eval_prob=0.01,
 52 |         l2_coeff=0.01,
 53 |         train_iters=1000,
 54 |         seed_range=1000000,
 55 |         repeat_actions=1,
 56 |         batch_data=None
 57 |     ):
 58 | 
 59 |         """
 60 |         Args:
 61 |             states_spec: Dict containing at least one state definition. In the case of a single state,
 62 |                keys `shape` and `type` are necessary. For multiple states, pass a dict of dicts where each state
 63 |                is a dict itself with a unique name as its key.
 64 |             actions_spec: Dict containing at least one action definition. Actions have types and either `num_actions`
 65 |                 for discrete actions or a `shape` for continuous actions. Consult documentation and tests for more.
 66 |             network_spec: List of layers specifying a neural network via layer types, sizes and optional arguments
 67 |                 such as activation or regularization. Full examples are in the examples/configs folder.
 68 |             device: Device string specifying model device.
 69 |             session_config: optional tf.ConfigProto with additional desired session configurations
 70 |             scope: TensorFlow scope, defaults to agent name (e.g. `dqn`).
 71 |             saver_spec: Dict specifying automated saving. Use `directory` to specify where checkpoints are saved. Use
 72 |                 either `seconds` or `steps` to specify how often the model should be saved. The `load` flag specifies
 73 |                 if a model is initially loaded (set to True) from a file `file`.
 74 |             summary_spec: Dict specifying summaries for TensorBoard. Requires a 'directory' to store summaries, `steps`
 75 |                 or `seconds` to specify how often to save summaries, and a list of `labels` to indicate which values
 76 |                 to export, e.g. `losses`, `variables`. Consult neural network class and model for all available labels.
 77 |             distributed_spec: Dict specifying distributed functionality. Use `parameter_server` and `replica_model`
 78 |                 Boolean flags to indicate workers and parameter servers. Use a `cluster_spec` key to pass a TensorFlow
 79 |                 cluster spec.
 80 |             states_preprocessing_spec: Optional list of states preprocessors to apply to state
 81 |                 (e.g. `image_resize`, `grayscale`).
 82 |             explorations_spec: Optional dict specifying action exploration type (epsilon greedy
 83 |                 or Gaussian noise).
 84 |             reward_preprocessing_spec: Optional dict specifying reward preprocessing.
 85 |             distributions_spec: Optional dict specifying action distributions to override default distribution choices.
 86 |                 Must match action names.
 87 |             entropy_regularization: Optional positive float specifying an entropy regularization value.
 88 |             batch_size: Int specifying number of samples collected via `observe` before an update is executed.
 89 |             batch_data: Input data tensor, which is for table environment
 90 |             repeat_actions: Int specifying the times of repearting actions to better estimate the reward
 91 |         """
 92 | 
 93 |         if network_spec is None:
 94 |             raise TensorforceError("No network_spec provided.")
 95 | 
 96 |         self.env = env
 97 |         self.network_spec = network_spec
 98 |         self.device = device
 99 |         self.session_config = session_config
100 |         self.scope = scope
101 |         self.saver_spec = saver_spec
102 |         self.summary_spec = summary_spec
103 |         self.distributed_spec = distributed_spec
104 |         self.states_preprocessing_spec = states_preprocessing_spec
105 |         self.explorations_spec = explorations_spec
106 |         self.reward_preprocessing_spec = reward_preprocessing_spec
107 |         self.distributions_spec = distributions_spec
108 |         self.entropy_regularization = entropy_regularization
109 |         self.batch_size=batch_size
110 |         self.max_episode_timesteps = max_episode_timesteps
111 |         self.noise_stddev = noise_stddev
112 |         self.eval_prob = eval_prob
113 |         self.l2_coeff = l2_coeff
114 |         self.train_iters = train_iters
115 |         self.seed_range = seed_range
116 |         self.repeat_actions = repeat_actions
117 |         self.batch_data = batch_data
118 | 
119 |         if optimizer is None:
120 |             self.optimizer = dict(
121 |                 type='adam',
122 |                 learning_rate=0.01
123 |             )
124 |         else:
125 |             self.optimizer = optimizer
126 | 
127 |         super(DeterministicESAgent, self).__init__(
128 |             states_spec=states_spec,
129 |             actions_spec=actions_spec,
130 |             batched_observe=None
131 |         )
132 | 
133 |     def run_worker(self):
134 |         # Start running on all workers.
135 |         self.model.update()
136 | 
137 |     def initialize_model(self):
138 |         return DeterministicESModel(
139 |             env=self.env,
140 |             states_spec=self.states_spec,
141 |             actions_spec=self.actions_spec,
142 |             network_spec=self.network_spec,
143 |             device=self.device,
144 |             session_config=self.session_config,
145 |             scope=self.scope,
146 |             saver_spec=self.saver_spec,
147 |             summary_spec=self.summary_spec,
148 |             distributed_spec=self.distributed_spec,
149 |             optimizer=self.optimizer,
150 |             states_preprocessing_spec=self.states_preprocessing_spec,
151 |             explorations_spec=self.explorations_spec,
152 |             reward_preprocessing_spec=self.reward_preprocessing_spec,
153 |             distributions_spec=self.distributions_spec,
154 |             entropy_regularization=self.entropy_regularization,
155 |             batch_size=self.batch_size,
156 |             max_episode_timesteps=self.max_episode_timesteps,
157 |             noise_stddev=self.noise_stddev,
158 |             eval_prob=self.eval_prob,
159 |             l2_coeff=self.l2_coeff,
160 |             train_iters=self.train_iters,
161 |             seed_range=self.seed_range,
162 |             repeat_actions=self.repeat_actions,
163 |             batch_data=self.batch_data
164 |         )
165 | 


--------------------------------------------------------------------------------
/tensorforce/core/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 


--------------------------------------------------------------------------------
/tensorforce/core/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/explorations/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | 
17 | from tensorforce.core.explorations.exploration import Exploration
18 | from tensorforce.core.explorations.constant import Constant
19 | 
20 | 
21 | explorations = dict(
22 |     constant=Constant
23 | )
24 | 
25 | 
26 | __all__ = ['Exploration', 'Constant', 'explorations']
27 | 


--------------------------------------------------------------------------------
/tensorforce/core/explorations/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/explorations/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/explorations/__pycache__/constant.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/explorations/__pycache__/constant.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/explorations/__pycache__/epsilon_anneal.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/explorations/__pycache__/epsilon_anneal.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/explorations/__pycache__/epsilon_decay.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/explorations/__pycache__/epsilon_decay.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/explorations/__pycache__/exploration.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/explorations/__pycache__/exploration.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/explorations/__pycache__/linear_decay.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/explorations/__pycache__/linear_decay.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/explorations/__pycache__/ornstein_uhlenbeck_process.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/explorations/__pycache__/ornstein_uhlenbeck_process.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/explorations/constant.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from tensorforce.core.explorations import Exploration
17 | 
18 | 
19 | class Constant(Exploration):
20 |     """
21 |     Explore via adding a constant term.
22 |     """
23 | 
24 |     def __init__(self, constant=0.0, scope='constant', summary_labels=()):
25 |         self.constant = float(constant)
26 |         super(Constant, self).__init__(scope=scope, summary_labels=summary_labels)
27 | 
28 |     def tf_explore(self, episode, timestep, action_shape):
29 |         return self.constant
30 | 


--------------------------------------------------------------------------------
/tensorforce/core/explorations/exploration.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | import tensorflow as tf
17 | from tensorforce import util
18 | import tensorforce.core.explorations
19 | 
20 | 
21 | class Exploration(object):
22 |     """
23 |     Abstract exploration object.
24 |     """
25 | 
26 |     def __init__(self, scope='exploration', summary_labels=None):
27 |         self.summary_labels = set(summary_labels or ())
28 | 
29 |         self.variables = dict()
30 |         self.summaries = list()
31 | 
32 |         def custom_getter(getter, name, registered=False, **kwargs):
33 |             variable = getter(name=name, registered=True, **kwargs)
34 |             if not registered:
35 |                 self.variables[name] = variable
36 |             return variable
37 | 
38 |         self.explore = tf.make_template(
39 |             name_=(scope + '/explore'),
40 |             func_=self.tf_explore,
41 |             custom_getter_=custom_getter
42 |         )
43 | 
44 |     def tf_explore(self, episode, timestep, action_shape):
45 |         """
46 |         Creates exploration value, e.g. compute an epsilon for epsilon-greedy or sample normal
47 |         noise.
48 |         """
49 |         raise NotImplementedError
50 | 
51 |     def get_variables(self):
52 |         """
53 |         Returns exploration variables.
54 | 
55 |         Returns:
56 |             List of variables.
57 |         """
58 |         return [self.variables[key] for key in sorted(self.variables)]
59 | 
60 |     @staticmethod
61 |     def from_spec(spec):
62 |         """
63 |         Creates an exploration object from a specification dict.
64 |         """
65 |         exploration = util.get_object(
66 |             obj=spec,
67 |             predefined_objects=tensorforce.core.explorations.explorations
68 |         )
69 |         assert isinstance(exploration, Exploration)
70 |         return exploration
71 | 


--------------------------------------------------------------------------------
/tensorforce/core/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/lib/__init__.py


--------------------------------------------------------------------------------
/tensorforce/core/lib/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/lib/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/lib/__pycache__/atari_wrapper.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/lib/__pycache__/atari_wrapper.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/lib/__pycache__/env_seeding.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/lib/__pycache__/env_seeding.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/lib/__pycache__/optimizers.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/lib/__pycache__/optimizers.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/lib/__pycache__/schedules.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/lib/__pycache__/schedules.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/lib/__pycache__/segment_tree.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/lib/__pycache__/segment_tree.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/lib/env_seeding.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | """
 4 |     desc: Create a strong random seed. Otherwise, Python 2 would seed using
 5 |     the system time, which might be non-robust especially in the presence of
 6 |     concurrency.
 7 |     permalink: https://svn.python.org/projects/python/tags/r32/Lib/random.py  
 8 |     create: 2017.12.11
 9 |     modified by @sam.dm
10 | 
11 | """
12 | 
13 | import hashlib
14 | import numpy as np
15 | import os
16 | import random as _random
17 | import struct
18 | import sys
19 | 
20 | 
21 | if sys.version_info < (3,):
22 |     integer_types = (int, long)
23 | else:
24 |     integer_types = (int,)
25 | 
26 | def np_random(seed=None):
27 |     if seed is not None and not (isinstance(seed, integer_types) and 0 <= seed):
28 |         raise Exception('Seed must be a non-negative integer or omitted, not {}'.format(seed))
29 | 
30 |     seed = _seed(seed)
31 | 
32 |     rng = np.random.RandomState()
33 |     rng.seed(_int_list_from_bigint(hash_seed(seed)))
34 |     return rng, seed
35 | 
36 | def hash_seed(seed=None, max_bytes=8):
37 |     """
38 |     Args:
39 |         seed (Optional[int]): None seeds from an operating system specific randomness source.
40 |         max_bytes: Maximum number of bytes to use in the hashed seed.
41 |     """
42 |     if seed is None:
43 |         seed = _seed(max_bytes=max_bytes)
44 |     hash = hashlib.sha512(str(seed).encode('utf8')).digest()
45 |     return _bigint_from_bytes(hash[:max_bytes])
46 | 
47 | def _seed(a=None, max_bytes=8):
48 |     """
49 |     Args:
50 |         a (Optional[int, str]): None seeds from an operating system specific randomness source.
51 |         max_bytes: Maximum number of bytes to use in the seed.
52 |     """
53 |     if a is None:
54 |         a = _bigint_from_bytes(os.urandom(max_bytes))
55 |     elif isinstance(a, str):
56 |         a = a.encode('utf8')
57 |         a += hashlib.sha512(a).digest()
58 |         a = _bigint_from_bytes(a[:max_bytes])
59 |     elif isinstance(a, integer_types):
60 |         a = a % 2**(8 * max_bytes)
61 |     else:
62 |         raise Exception('Invalid type for seed: {} ({})'.format(type(a), a))
63 | 
64 |     return a
65 | 
66 | def _bigint_from_bytes(bytes):
67 |     sizeof_int = 4
68 |     padding = sizeof_int - len(bytes) % sizeof_int
69 |     bytes += b'\0' * padding
70 |     int_count = int(len(bytes) / sizeof_int)
71 |     unpacked = struct.unpack("{}I".format(int_count), bytes)
72 |     accum = 0
73 |     for i, val in enumerate(unpacked):
74 |         accum += 2 ** (sizeof_int * 8 * i) * val
75 |     return accum
76 | 
77 | def _int_list_from_bigint(bigint):
78 |     # Special case 0
79 |     if bigint < 0:
80 |         raise Exception('Seed must be non-negative, not {}'.format(bigint))
81 |     elif bigint == 0:
82 |         return [0]
83 | 
84 |     ints = []
85 |     while bigint > 0:
86 |         bigint, mod = divmod(bigint, 2 ** 32)
87 |         ints.append(mod)
88 |     return ints
89 | 


--------------------------------------------------------------------------------
/tensorforce/core/lib/optimizers.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     desc: SGD and Adam optimizer numpy implement.
 3 |     create: 2018.01.18
 4 |     @author: sam.dm
 5 | """
 6 | 
 7 | 
 8 | from __future__ import absolute_import
 9 | from __future__ import division
10 | from __future__ import print_function
11 | 
12 | import numpy as np
13 | from tensorforce import util, TensorforceError
14 | from tensorforce.core.lib import schedules
15 | 
16 | 
17 | def from_spec(spec, kwargs=None):
18 |     """
19 |     Creates an optimizer from a specification dict.
20 |     """
21 |     optimizer = util.get_object(
22 |         obj=spec,
23 |         predefined_objects=optimizers,
24 |         kwargs=kwargs
25 |     )
26 |     assert isinstance(optimizer, Optimizer)
27 |     return optimizer
28 | 
29 | 
30 | class Optimizer(object):
31 |     def __init__(self, dim):
32 |         self.dim = dim
33 |         self.t = 0
34 | 
35 |     def update(self, grad):
36 |         self.t += 1
37 |         step = self._compute_step(grad)
38 |         return step
39 | 
40 |     def _compute_step(self, grad):
41 |         raise NotImplementedError
42 | 
43 | 
44 | class Momentum(Optimizer):
45 |     def __init__(self, dim, learning_rate, momentum=0.9, lr_schedule=None):
46 |         Optimizer.__init__(self, dim)
47 |         if lr_schedule is not None:
48 |             lr_schedule['value'] = learning_rate
49 |             self.decay_obj = schedules.from_spec(lr_schedule)
50 |         self.lr_schedule = lr_schedule
51 |         self.v = np.zeros(self.dim, dtype=np.float32)
52 |         self.learning_rate, self.momentum = learning_rate, momentum
53 | 
54 |     def _compute_step(self, globgrad):
55 |         self.v = self.momentum * self.v + (1. - self.momentum) * grad
56 |         if self.lr_schedule is not None:
57 |             self.learning_rate = self.decay_obj(self.t)
58 |         step = -self.learning_rate * self.v
59 |         return step
60 | 
61 | 
62 | class Adam(Optimizer):
63 |     def __init__(self, dim, learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, lr_schedule=None):
64 |         Optimizer.__init__(self, dim)
65 |         if lr_schedule is not None:
66 |             lr_schedule['value'] = learning_rate
67 |             self.decay_obj = schedules.from_spec(lr_schedule)
68 |         self.lr_schedule = lr_schedule
69 |         self.learning_rate = learning_rate
70 |         if isinstance(self.learning_rate, list):
71 |             self.learning_rate = np.asarray(self.learning_rate, dtype=np.float32).flatten()
72 |             assert self.learning_rate.size == self.dim
73 |         self.beta1 = beta1
74 |         self.beta2 = beta2
75 |         self.epsilon = epsilon
76 |         self.m = np.zeros(self.dim, dtype=np.float32)
77 |         self.v = np.zeros(self.dim, dtype=np.float32)
78 | 
79 |     def _compute_step(self, grad):
80 |         if self.lr_schedule is not None:
81 |             self.learning_rate = self.decay_obj(self.t)
82 |         a = self.learning_rate * (np.sqrt(1 - self.beta2 ** self.t) /
83 |                              (1 - self.beta1 ** self.t))
84 |         self.m = self.beta1 * self.m + (1 - self.beta1) *grad
85 |         self.v = self.beta2 * self.v + (1 - self.beta2) * (grad * grad)
86 |         step = -a * self.m / (np.sqrt(self.v) + self.epsilon)
87 |         return step
88 | 
89 | optimizers = {"adam": Adam, "momentum": Momentum}
90 | 


--------------------------------------------------------------------------------
/tensorforce/core/lib/schedules.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 |     desc: schedule type, eg: learning rate, priority beta epsilon etc.
  5 |     created: 2017.12.11
  6 |     @author: sam.dm
  7 | """
  8 | import math
  9 | from tensorforce import util, TensorforceError
 10 | 
 11 | 
 12 | def from_spec(spec, kwargs=None):
 13 |     lr_schedule = util.get_object(
 14 |         obj=spec,
 15 |         predefined_objects=lr_schedulers,
 16 |         kwargs=kwargs
 17 |     )
 18 |     assert isinstance(lr_schedule, Schedule)
 19 | 
 20 |     return lr_schedule
 21 | 
 22 | 
 23 | class Schedule(object):
 24 |     def __call__(self, global_step):
 25 |         """
 26 |         Value of the schedule at time t
 27 |         """
 28 | 
 29 |         raise NotImplementedError()
 30 | 
 31 | 
 32 | class Constant(Schedule):
 33 |     def __init__(self, value):
 34 |         """
 35 |         Value remains constant over time.
 36 |         Args:
 37 |             value: float, Constant value of the schedule
 38 |         """
 39 | 
 40 |         self._value = value
 41 | 
 42 |     def __call__(self, global_step):
 43 | 
 44 |         return self._value
 45 | 
 46 | 
 47 | class PiecewiseDecay(Schedule):
 48 |     def __init__(self, endpoints, outside_value=None):
 49 |         """
 50 |         Piecewise decay schedule.
 51 |         Args:
 52 |             endpoints: [(int, int)], list of pairs (time, value) meanining that schedule should output
 53 |                 value when t==time. All the values for time must be sorted in an increasing order.
 54 |             outside_value: float, if the value is requested outside of all the intervals sepecified in
 55 |                 endpoints this value is returned.
 56 |         """
 57 | 
 58 |         idxes = [e[0] for e in endpoints]
 59 |         assert idxes == sorted(idxes)
 60 |         self._interpolation = interpolation
 61 |         self._outside_value = outside_value
 62 |         self._endpoints = endpoints
 63 | 
 64 |     def _linear_interpolation(self, l, r, alpha):
 65 | 
 66 |         return l + alpha * (r - l)
 67 | 
 68 |     def __call__(self, global_step):
 69 |         for (l_t, l), (r_t, r) in zip(self._endpoints[:-1], self._endpoints[1:]):
 70 |             if l_t <= global_step and global_step < r_t:
 71 |                 alpha = float(t - l_t) / (r_t - l_t)
 72 |                 return self._interpolation(l, r, alpha)
 73 | 
 74 |         # t does not belong to any of the pieces, so doom.
 75 |         assert self._outside_value is not None
 76 | 
 77 |         return self._outside_value
 78 | 
 79 | 
 80 | class LinearDecay(Schedule):
 81 |     def __init__(self, value, max_decay_steps, final_value):
 82 |         """
 83 |         Linear interpolation between initial_value and final_value over schedule_timesteps.
 84 |         Args:
 85 |             max_timesteps: int, Number of max schedule timesteps.
 86 |             value: float, initial output value
 87 |             final_value: float, final output value
 88 |         """
 89 | 
 90 |         self._max_decay_steps = max_decay_steps
 91 |         self._initial_value = value
 92 |         self._final_value = final_value
 93 | 
 94 |     def __call__(self, global_step):
 95 |         fraction = min(float(global_step) / self._max_decay_steps, 1.0)
 96 | 
 97 |         return self._initial_value + fraction * (self._final_value - self._initial_value)
 98 | 
 99 | 
100 | class ExponentialDecay(Schedule):
101 |     def __init__(self, value, decay_steps, decay_rate, staircase=False):
102 |         """
103 |             decayed_value = value * decay_rate ^ (global_step / decay_steps)
104 |         """
105 | 
106 |         self._value = value
107 |         self._decay_steps = decay_steps
108 |         self._decay_rate = decay_rate
109 |         self._staircase = staircase
110 | 
111 |     def __call__(self, global_step):
112 |         p = float(global_step) / self._decay_steps
113 |         if self._staircase:
114 |              p = math.floor(p)
115 | 
116 |         return self._value * math.pow(self._decay_rate, p)
117 | 
118 | class PolynomialDecay(Schedule):
119 |     def __init__(self, value, decay_steps, final_value=0.0001, power=1.0, cycle=False):
120 |         """
121 |         global_step = min(global_step, decay_steps)
122 |         decayed_value = (value - final_value) *
123 |                           (1 - global_step / decay_steps) ^ (power) +
124 |                           final_value
125 |         If cycle is True then a multiple of decay_steps is used, the first one
126 |         that is bigger than global_steps.
127 | 
128 |         decay_steps = decay_steps * ceil(global_step / decay_steps)
129 |         decayed_value = (value - final_value) * (1 - global_step / decay_steps) ^ (power) +
130 |                         final_value
131 |         """
132 | 
133 |         self._value = value
134 |         self._decay_steps = decay_steps
135 |         self._final_value = final_value
136 |         self._power = power
137 |         self._cycle = cycle
138 | 
139 |     def __call__(self, global_step):
140 |         if self._cycle:
141 |             if global_step == 0:
142 |                 multiplier = 1.0
143 |             else:
144 |                 multiplier = math.ceil(global_step / self._decay_steps)
145 |             decay_steps = self._decay_steps * multiplier
146 |         else:
147 |             decay_steps = self._decay_steps
148 |             global_step = min(global_step, self._decay_steps)
149 | 
150 |         p = float(global_step) / decay_steps
151 | 
152 |         return (self._value - self._final_value) * math.pow(
153 |             1 - p, self._power) + self._final_value
154 | 
155 | class NaturalExpDecay(Schedule):
156 |     def __init__(self, value, decay_steps, decay_rate, staircase=False):
157 |         """
158 |         decayed_value = value * exp(-decay_rate * global_step)
159 |         """
160 | 
161 |         self._value = value
162 |         self._decay_steps = decay_steps
163 |         self._decay_rate = decay_rate
164 |         self._staircase = staircase
165 | 
166 |     def __call__(self, global_step):
167 |         p = float(global_step) / self._decay_steps
168 |         if self._staircase:
169 |             p = math.ceil(p)
170 |         exponent = math.exp(-self._decay_rate * p)
171 | 
172 |         return self._value * exponent
173 | 
174 | class InverseTimeDecay(Schedule):
175 |     def __init__(self, value, decay_steps, decay_rate, staircase=False):
176 |         """
177 |           decayed_value = value / (1 + decay_rate * global_step / decay_step)
178 | 
179 |           if staircase is True, as:
180 |           decayed_value = value / (1 + decay_rate * floor(global_step / decay_step))
181 |         """
182 | 
183 |         self._value = value
184 |         self._decay_steps = decay_steps
185 |         self._decay_rate = decay_rate
186 |         self._staircase = staircase
187 | 
188 |     def __call__(self, global_step):
189 |         p = float(global_step) / self._decay_steps
190 |         if self._staircase:
191 |             p = math.ceil(p)
192 |         denom = 1.0 + p * self._decay_rate
193 | 
194 |         return self._value / denom
195 | 
196 | lr_schedulers = {
197 |     "constant": Constant,
198 |     "exp_decay": ExponentialDecay,
199 |     "natural_exp_decay": NaturalExpDecay,
200 |     "inverse_time_decay": InverseTimeDecay,
201 |     "polynomial_decay": PolynomialDecay,
202 |     "linear_decay": LinearDecay
203 | }
204 | 


--------------------------------------------------------------------------------
/tensorforce/core/memories/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | 
17 | from tensorforce.core.memories.memory import Memory
18 | from tensorforce.core.memories.replay import Replay
19 | from tensorforce.core.memories.prioritized_replay import PrioritizedReplay
20 | from tensorforce.core.memories.modified_replay import PrioritizedReplayBuffer
21 | from tensorforce.core.memories.modified_replay import ReplayBuffer
22 | from tensorforce.core.memories.naive_prioritized_replay import NaivePrioritizedReplay
23 | 
24 | 
25 | memories = dict(
26 |     replay=Replay,
27 | )
28 | 
29 | __all__ = ['memories', 'Memory', 'ReplayBuffer']
30 | 


--------------------------------------------------------------------------------
/tensorforce/core/memories/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/memories/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/memories/__pycache__/memory.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/memories/__pycache__/memory.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/memories/__pycache__/modified_replay.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/memories/__pycache__/modified_replay.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/memories/__pycache__/naive_prioritized_replay.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/memories/__pycache__/naive_prioritized_replay.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/memories/__pycache__/prioritized_replay.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/memories/__pycache__/prioritized_replay.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/memories/__pycache__/replay.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/memories/__pycache__/replay.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/memories/memory.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 reinforce.io. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import print_function
 18 | from __future__ import division
 19 | 
 20 | from tensorforce import util
 21 | import tensorforce.core.memories
 22 | 
 23 | 
 24 | # TODO: implement in TensorFlow
 25 | 
 26 | class Memory(object):
 27 |     """
 28 |     Abstract memory class.
 29 |     """
 30 | 
 31 |     def __init__(self, states_spec, actions_spec):
 32 |         """
 33 |         Generic memory without sampling strategy implemented.
 34 | 
 35 |         Args:
 36 |             states_spec: State specifiction
 37 |             actions_spec: Action specification
 38 |         """
 39 |         self.states_spec = states_spec
 40 |         self.actions_spec = actions_spec
 41 | 
 42 |     def add_observation(self, states, internals, actions, terminal, reward, next_states, next_internals):
 43 |         """
 44 |         Inserts a single experience to the memory.
 45 | 
 46 |         Args:
 47 |             states:
 48 |             internals:
 49 |             actions:
 50 |             terminal:
 51 |             reward:
 52 | 
 53 |         Returns:
 54 | 
 55 |         """
 56 |         raise NotImplementedError
 57 | 
 58 |     def get_batch(self, batch_size):
 59 |         """
 60 |         Samples a batch from the memory.
 61 | 
 62 |         Args:
 63 |             batch_size: The batch size
 64 | 
 65 |         Returns: A dict containing states, internal states, actions, terminals, rewards (and next states)
 66 | 
 67 |         """
 68 |         raise NotImplementedError
 69 | 
 70 |     def update_batch(self, idxes, priorities):
 71 |         """
 72 |         Updates loss values for sampling strategies based on loss functions.
 73 | 
 74 |         Args:
 75 |             idxes:
 76 |             priorities:
 77 | 
 78 |         """
 79 |         raise NotImplementedError
 80 | 
 81 |     def set_memory(self, states, internals, actions, terminals, rewards, next_states, next_internals):
 82 |         """
 83 |         Deletes memory content and sets content to provided observations.
 84 | 
 85 |         Args:
 86 |             states:
 87 |             internals:
 88 |             actions:
 89 |             terminals:
 90 |             rewards:
 91 |             next_states:
 92 |             next_internals:
 93 | 
 94 |         """
 95 |         raise NotImplementedError
 96 | 
 97 |     def update_batch(self, idxes, priorities):
 98 |         """
 99 |         Update SumTree Priorities after training.
100 | 
101 |         Args:
102 |             idxes:
103 |             rewards:
104 | 
105 |         """
106 |         raise NotImplementedError
107 | 
108 |     @staticmethod
109 |     def from_spec(spec, kwargs=None):
110 |         """
111 |         Creates a memory from a specification dict.
112 |         """
113 |         memory = util.get_object(
114 |             obj=spec,
115 |             predefined_objects=tensorforce.core.memories.memories,
116 |             kwargs=kwargs
117 |         )
118 |         assert isinstance(memory, Memory)
119 |         return memory
120 | 


--------------------------------------------------------------------------------
/tensorforce/core/memories/replay.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 reinforce.io. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import print_function
 18 | from __future__ import division
 19 | 
 20 | from random import randrange
 21 | import numpy as np
 22 | 
 23 | from tensorforce import util
 24 | from tensorforce.core.memories import Memory
 25 | 
 26 | 
 27 | class Replay(Memory):
 28 |     """
 29 |     Replay memory to store observations and sample mini batches for training from.
 30 |     """
 31 | 
 32 |     def __init__(self, states_spec, actions_spec, capacity, random_sampling=True):
 33 |         super(Replay, self).__init__(states_spec=states_spec, actions_spec=actions_spec)
 34 |         self.capacity = capacity
 35 |         self.states = {name: np.zeros((capacity,) + tuple(state['shape']), dtype=util.np_dtype(state['type']))
 36 |             for name, state in states_spec.items()}
 37 |         self.next_states = {name: np.zeros((capacity,) + tuple(state['shape']), dtype=util.np_dtype(state['type']))
 38 |             for name, state in states_spec.items()}
 39 |         self.internals, self.next_internals = None, None
 40 |         self.actions = {name: np.zeros((capacity,) + tuple(action['shape']), dtype=util.np_dtype(action['type']))
 41 |             for name, action in actions_spec.items()}
 42 |         self.terminal = np.zeros((capacity,), dtype=util.np_dtype('bool'))
 43 |         self.reward = np.zeros((capacity,), dtype=util.np_dtype('float'))
 44 | 
 45 |         self.size = 0
 46 |         self.index = 0
 47 |         self.random_sampling = random_sampling
 48 | 
 49 |     def add_observation(self, states, internals, actions, terminal, reward, next_states, next_internals):
 50 |         if self.internals is None:
 51 |             self.internals = [np.zeros((self.capacity,) + internal.shape, internal.dtype) for internal in internals]
 52 |         if self.next_internals is None:
 53 |             self.next_internals = [np.zeros((self.capacity,) + internal.shape, internal.dtype) for internal in next_internals]
 54 | 
 55 |         for name, state in states.items():
 56 |             self.states[name][self.index] = state
 57 |         for name, next_state in next_states.items():
 58 |             self.next_states[name][self.index] = next_state
 59 |         for n, internal in enumerate(internals):
 60 |             self.internals[n][self.index] = internal
 61 |         for n, next_internal in enumerate(next_internals):
 62 |             self.next_internals[n][self.index] = next_internal
 63 |         for name, action in actions.items():
 64 |             self.actions[name][self.index] = action
 65 |         self.reward[self.index] = reward
 66 |         self.terminal[self.index] = terminal
 67 | 
 68 |         if self.size < self.capacity:
 69 |             self.size += 1
 70 |         self.index = (self.index + 1) % self.capacity
 71 | 
 72 |     def get_batch(self, batch_size):
 73 |         """
 74 |         Samples a batch of the specified size by selecting a random start/end point and returning
 75 |         the contained sequence or random indices depending on the field 'random_sampling'.
 76 | 
 77 |         Args:
 78 |             batch_size: The batch size
 79 |             next_states: A boolean flag indicating whether 'next_states' values should be included
 80 | 
 81 |         Returns: A dict containing states, actions, rewards, terminals, internal states (and next states)
 82 | 
 83 |         """
 84 |         indices = np.random.randint(self.size - 1, size=batch_size)
 85 |         terminal = self.terminal.take(indices)
 86 | 
 87 |         states = {name: state.take(indices, axis=0) for name, state in self.states.items()}
 88 |         internals = [internal.take(indices, axis=0) for internal in self.internals]
 89 |         actions = {name: action.take(indices, axis=0) for name, action in self.actions.items()}
 90 |         terminal = self.terminal.take(indices)
 91 |         reward = self.reward.take(indices)
 92 |         next_states = {name: state.take(indices, axis=0) for name, state in self.next_states.items()}
 93 |         next_internals = [internal.take(indices, axis=0) for internal in self.next_internals]
 94 | 
 95 |         batch = dict(states=states, internals=internals, actions=actions, terminal=terminal, reward=reward,
 96 |             next_states=next_states, next_internals=next_internals)
 97 |         return batch
 98 | 
 99 |     def set_memory(self, states, internals, actions, terminal, reward, next_states, next_internals):
100 |         """
101 |         Convenience function to set whole batches as memory content to bypass
102 |         calling the insert function for every single experience.
103 | 
104 |         """
105 |         self.size = len(terminal)
106 | 
107 |         if len(terminal) == self.capacity:
108 |             # Assign directly if capacity matches size.
109 |             for name, state in states.items():
110 |                 self.states[name] = np.asarray(state)
111 |             for name, state in next_states.items():
112 |                 self.next_states[name] = np.asarray(state)
113 |             self.internals = [np.asarray(internal) for internal in internals]
114 |             self.next_internals = [np.asarray(internal) for internal in next_internals]
115 |             for name, action in actions.items():
116 |                 self.actions[name] = np.asarray(action)
117 |             self.terminal = np.asarray(terminal)
118 |             self.reward = np.asarray(reward)
119 |             # Filled capacity to point of index wrap
120 |             self.index = 0
121 | 
122 |         else:
123 |             # Otherwise partial assignment.
124 |             if self.internals is None:
125 |                 self.internals = [np.zeros((self.capacity,) + internal.shape, internal.dtype) for internal
126 |                                   in internals]
127 |             if self.next_internals is None:
128 |                 self.next_internals = [np.zeros((self.capacity,) + internal.shape, internal.dtype) for internal
129 |                                   in next_internals]
130 | 
131 |             for name, state in states.items():
132 |                 self.states[name][:len(state)] = state
133 |             for name, state in next_states.items():
134 |                 self.next_states[name][:len(state)] = state
135 |             for n, internal in enumerate(internals):
136 |                 self.internals[n][:len(internal)] = internal
137 |             for n, next_internal in enumerate(next_internals):
138 |                 self.next_internals[n][:len(internal)] = next_internal
139 |             for name, action in actions.items():
140 |                 self.actions[name][:len(action)] = action
141 |             self.terminal[:len(terminal)] = terminal
142 |             self.reward[:len(reward)] = reward
143 |             self.index = len(terminal)
144 | 
145 |     def update_batch(self, idxes, priorities):
146 |         pass
147 | 


--------------------------------------------------------------------------------
/tensorforce/core/networks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from tensorforce.core.networks.layer import Layer, Nonlinearity, Dropout, Flatten, Identity, Layernorm, Pool2d, Embedding, Linear, Dense, \
17 |     Dueling, Conv1d, Conv2d, InternalLstm, Lstm
18 | from tensorforce.core.networks.network import Network, LayerBasedNetwork, LayeredNetwork
19 | 
20 | 
21 | layers = dict(
22 |     nonlinearity=Nonlinearity,
23 |     dropout=Dropout,
24 |     flatten=Flatten,
25 |     identity=Identity,
26 |     layer_norm=Layernorm,
27 |     pool2d=Pool2d,
28 |     embedding=Embedding,
29 |     linear=Linear,
30 |     dense=Dense,
31 |     dueling=Dueling,
32 |     conv1d=Conv1d,
33 |     conv2d=Conv2d,
34 |     internal_lstm=InternalLstm,
35 |     lstm=Lstm
36 | )
37 | 
38 | 
39 | __all__ = [
40 |     'layers',
41 |     'Layer',
42 |     'Nonlinearity',
43 |     'Identity',
44 |     'Layernorm',
45 |     'Dropout',
46 |     'Flatten',
47 |     'Pool2d',
48 |     'Embedding',
49 |     'Linear',
50 |     'Dense',
51 |     'Dueling',
52 |     'Conv1d',
53 |     'Conv2d',
54 |     'InternalLstm',
55 |     'Lstm',
56 |     'Network',
57 |     'LayerBasedNetwork',
58 |     'LayeredNetwork'
59 | ]
60 | 


--------------------------------------------------------------------------------
/tensorforce/core/networks/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/networks/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/networks/__pycache__/layer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/networks/__pycache__/layer.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/networks/__pycache__/network.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/networks/__pycache__/network.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/networks/network.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 reinforce.io. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | from collections import Counter
 21 | import json
 22 | import os
 23 | 
 24 | import tensorflow as tf
 25 | 
 26 | from tensorforce import util, TensorforceError
 27 | from tensorforce.core.networks import Layer
 28 | 
 29 | 
 30 | class Network(object):
 31 |     """
 32 |     Base class for neural networks.
 33 |     """
 34 | 
 35 |     def __init__(self, scope='network', summary_labels=None):
 36 |         self.summary_labels = set(summary_labels or ())
 37 | 
 38 |         self.variables = dict()
 39 |         self.all_variables = dict()
 40 |         self.summaries = list()
 41 | 
 42 |         def custom_getter(getter, name, registered=False, **kwargs):
 43 |             variable = getter(name=name, registered=True, **kwargs)
 44 |             if not registered:
 45 |                 self.all_variables[name] = variable
 46 |                 if kwargs.get('trainable', True) and not name.startswith('optimization'):
 47 |                     self.variables[name] = variable
 48 |                     if 'variables' in self.summary_labels:
 49 |                         summary = tf.summary.histogram(name=name, values=variable)
 50 |                         self.summaries.append(summary)
 51 |             return variable
 52 | 
 53 |         self.apply = tf.make_template(
 54 |             name_=(scope + '/apply'),
 55 |             func_=self.tf_apply,
 56 |             custom_getter_=custom_getter
 57 |         )
 58 |         self.regularization_loss = tf.make_template(
 59 |             name_=(scope + '/regularization-loss'),
 60 |             func_=self.tf_regularization_loss,
 61 |             custom_getter_=custom_getter
 62 |         )
 63 | 
 64 |     def tf_apply(self, x, internals, update, return_internals=False):
 65 |         """
 66 |         Creates the TensorFlow operations for applying the network to the given input.
 67 | 
 68 |         Args:
 69 |             x: Network input tensor or dict of input tensors.
 70 |             internals: List of prior internal state tensors
 71 |             update: Boolean tensor indicating whether this call happens during an update.
 72 |             return_internals: If true, also returns posterior internal state tensors
 73 | 
 74 |         Returns:
 75 |             Network output tensor, plus optionally list of posterior internal state tensors
 76 |         """
 77 |         raise NotImplementedError
 78 | 
 79 |     def tf_regularization_loss(self):
 80 |         """
 81 |         Creates the TensorFlow operations for the network regularization loss.
 82 | 
 83 |         Returns:
 84 |             Regularization loss tensor
 85 |         """
 86 |         return None
 87 | 
 88 |     def internals_input(self):
 89 |         """
 90 |         Returns the TensorFlow placeholders for internal state inputs.
 91 | 
 92 |         Returns:
 93 |             List of internal state input placeholders
 94 |         """
 95 |         return list()
 96 | 
 97 |     def internals_init(self):
 98 |         """
 99 |         Returns the TensorFlow tensors for internal state initializations.
100 | 
101 |         Returns:
102 |             List of internal state initialization tensors
103 |         """
104 |         return list()
105 | 
106 |     def get_variables(self, include_non_trainable=False):
107 |         """
108 |         Returns the TensorFlow variables used by the network.
109 | 
110 |         Returns:
111 |             List of variables
112 |         """
113 |         if include_non_trainable:
114 |             return [self.all_variables[key] for key in sorted(self.all_variables)]
115 |         else:
116 |             return [self.variables[key] for key in sorted(self.variables)]
117 | 
118 |     def get_summaries(self):
119 |         """
120 |         Returns the TensorFlow summaries reported by the network.
121 | 
122 |         Returns:
123 |             List of summaries
124 |         """
125 |         return self.summaries
126 | 
127 |     @staticmethod
128 |     def from_spec(spec, kwargs=None):
129 |         """
130 |         Creates a network from a specification dict.
131 |         """
132 |         network = util.get_object(
133 |             obj=spec,
134 |             default_object=LayeredNetwork,
135 |             kwargs=kwargs
136 |         )
137 |         assert isinstance(network, Network)
138 |         return network
139 | 
140 | 
141 | class LayerBasedNetwork(Network):
142 |     """
143 |     Base class for networks using tensorforce layers.
144 |     """
145 | 
146 |     def __init__(self, scope='layerbased-network', summary_labels=()):
147 |         super(LayerBasedNetwork, self).__init__(scope=scope, summary_labels=summary_labels)
148 |         self.layers = list()
149 | 
150 |     def add_layer(self, layer):
151 |         self.layers.append(layer)
152 | 
153 |     def tf_regularization_loss(self):
154 |         regularization_loss = super(LayerBasedNetwork, self).tf_regularization_loss()
155 |         if regularization_loss is None:
156 |             losses = list()
157 |         else:
158 |             losses = [regularization_loss]
159 | 
160 |         for layer in self.layers:
161 |             regularization_loss = layer.regularization_loss()
162 |             if regularization_loss is not None:
163 |                 losses.append(regularization_loss)
164 | 
165 |         if len(losses) > 0:
166 |             return tf.add_n(inputs=losses)
167 |         else:
168 |             return None
169 | 
170 |     def internals_input(self):
171 |         internals_input = super(LayerBasedNetwork, self).internals_input()
172 |         for layer in self.layers:
173 |             internals_input.extend(layer.internals_input())
174 |         return internals_input
175 | 
176 |     def internals_init(self):
177 |         internals_init = super(LayerBasedNetwork, self).internals_init()
178 |         for layer in self.layers:
179 |             internals_init.extend(layer.internals_init())
180 |         return internals_init
181 | 
182 |     def get_variables(self, include_non_trainable=False):
183 |         network_variables = super(LayerBasedNetwork, self).get_variables(
184 |             include_non_trainable=include_non_trainable
185 |         )
186 |         layer_variables = [
187 |             variable for layer in self.layers
188 |             for variable in layer.get_variables(include_non_trainable=include_non_trainable)
189 |         ]
190 | 
191 |         return network_variables + layer_variables
192 | 
193 |     def get_summaries(self):
194 |         network_summaries = super(LayerBasedNetwork, self).get_summaries()
195 |         layer_summaries = [summary for layer in self.layers for summary in layer.get_summaries()]
196 | 
197 |         return network_summaries + layer_summaries
198 | 
199 | 
200 | class LayeredNetwork(LayerBasedNetwork):
201 |     """
202 |     Network consisting of a sequence of layers, which can be created from a specification dict.
203 |     """
204 | 
205 |     def __init__(self, layers_spec, scope='layered-network', summary_labels=()):
206 |         """
207 |         Layered network.
208 | 
209 |         Args:
210 |             layers_spec: List of layer specification dicts
211 |         """
212 |         super(LayeredNetwork, self).__init__(scope=scope, summary_labels=summary_labels)
213 |         self.layers_spec = layers_spec
214 |         layer_counter = Counter()
215 | 
216 |         for layer_spec in self.layers_spec:
217 |             if isinstance(layer_spec['type'], str):
218 |                 name = layer_spec['type']
219 |             else:
220 |                 name = 'layer'
221 |             scope = name + str(layer_counter[name])
222 |             layer_counter[name] += 1
223 | 
224 |             layer = Layer.from_spec(
225 |                 spec=layer_spec,
226 |                 kwargs=dict(scope=scope, summary_labels=summary_labels)
227 |             )
228 |             self.add_layer(layer=layer)
229 | 
230 |     def tf_apply(self, x, internals, update, return_internals=False):
231 |         if isinstance(x, dict):
232 |             if len(x) != 1:
233 |                 raise TensorforceError('Layered network must have only one input, but {} given.'.format(len(x)))
234 |             x = next(iter(x.values()))
235 | 
236 |         internal_outputs = list()
237 |         index = 0
238 |         for layer in self.layers:
239 |             layer_internals = [internals[index + n] for n in range(layer.num_internals)]
240 |             index += layer.num_internals
241 |             x = layer.apply(x, update, *layer_internals)
242 | 
243 |             if not isinstance(x, tf.Tensor):
244 |                 internal_outputs.extend(x[1])
245 |                 x = x[0]
246 | 
247 |         if return_internals:
248 |             return x, internal_outputs
249 |         else:
250 |             return x
251 | 
252 |     @staticmethod
253 |     def from_json(filename):
254 |         """
255 |         Creates a layer_networkd_builder from a JSON.
256 | 
257 |         Args:
258 |             filename: Path to configuration
259 | 
260 |         Returns: A layered_network_builder function with layers generated from the JSON
261 |         """
262 |         path = os.path.join(os.getcwd(), filename)
263 |         with open(path, 'r') as fp:
264 |             config = json.load(fp=fp)
265 |         return LayeredNetwork(layers_spec=config)
266 | 


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from tensorforce.core.optimizers.optimizer import Optimizer
17 | from tensorforce.core.optimizers.tf_optimizer import TFOptimizer
18 | from tensorforce.core.optimizers.meta_optimizer import MetaOptimizer
19 | from tensorforce.core.optimizers.global_optimizer import GlobalOptimizer
20 | 
21 | 
22 | # This can register any class inheriting from tf.train.Optimizer
23 | optimizers = dict(
24 |     adadelta=TFOptimizer.get_wrapper(optimizer='adadelta'),
25 |     adagrad=TFOptimizer.get_wrapper(optimizer='adagrad'),
26 |     adam=TFOptimizer.get_wrapper(optimizer='adam'),
27 |     nadam=TFOptimizer.get_wrapper(optimizer='nadam'),
28 |     gradient_descent=TFOptimizer.get_wrapper(optimizer='gradient_descent'),
29 |     momentum=TFOptimizer.get_wrapper(optimizer='momentum'),
30 |     rmsprop=TFOptimizer.get_wrapper(optimizer='rmsprop'),
31 |     # GlobalOptimizer not (yet) a valid choice
32 | )
33 | 
34 | 
35 | __all__ = ['optimizers', 'Optimizer', 'TFOptimizer', 'MetaOptimizer', 'GlobalOptimizer']
36 | 


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/__pycache__/clipped_step.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/clipped_step.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/__pycache__/evolutionary.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/evolutionary.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/__pycache__/global_optimizer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/global_optimizer.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/__pycache__/meta_optimizer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/meta_optimizer.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/__pycache__/multi_step.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/multi_step.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/__pycache__/natural_gradient.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/natural_gradient.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/__pycache__/optimized_step.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/optimized_step.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/__pycache__/optimizer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/optimizer.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/__pycache__/synchronization.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/synchronization.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/__pycache__/tf_optimizer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/tf_optimizer.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/global_optimizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import print_function
18 | from __future__ import division
19 | 
20 | import tensorflow as tf
21 | 
22 | from tensorforce import util
23 | from tensorforce.core.optimizers import MetaOptimizer
24 | 
25 | 
26 | class GlobalOptimizer(MetaOptimizer):
27 |     """
28 |     The global optimizer applies an optimizer to the local variables. In addition, it also
29 |     applies the update a corresponding set of global variables and subsequently updates the local
30 |     variables to the value of these global variables.
31 |     Note: This is used for the current distributed mode, and will likely change with the next
32 |     major version update.
33 |     """
34 | 
35 |     def __init__(self, optimizer, summaries=None, summary_labels=None):
36 |         """
37 |         Creates a new global optimizer instance.
38 | 
39 |         Args:
40 |             optimizer: The optimizer which is modified by this meta optimizer.
41 |         """
42 |         super(GlobalOptimizer, self).__init__(
43 |             optimizer=optimizer,
44 |             summaries=summaries,
45 |             summary_labels=summary_labels
46 |         )
47 | 
48 |     def tf_step(self, time, variables, global_variables, **kwargs):
49 |         """
50 |         Creates the TensorFlow operations for performing an optimization step.
51 | 
52 |         Args:
53 |             time: Time tensor.
54 |             variables: List of variables to optimize.
55 |             global_variables: List of global variables to apply the proposed optimization step to.
56 |             **kwargs: ??? coming soon
57 | 
58 |         Returns:
59 |             List of delta tensors corresponding to the updates for each optimized variable.
60 |         """
61 |         assert all(util.shape(global_var) == util.shape(local_var) for global_var, local_var
62 |                    in zip(global_variables, variables))
63 | 
64 |         local_deltas = self.optimizer.step(time=time, variables=variables, **kwargs)
65 | 
66 |         with tf.control_dependencies(control_inputs=local_deltas):
67 |             applied = self.optimizer.apply_step(variables=global_variables, deltas=local_deltas)
68 | 
69 |         with tf.control_dependencies(control_inputs=(applied,)):
70 |             update_deltas = list()
71 |             for global_var, local_var in zip(global_variables, variables):
72 |                 delta = global_var - local_var
73 |                 update_deltas.append(delta)
74 | 
75 |             applied = self.apply_step(variables=variables, deltas=update_deltas)
76 | 
77 |             # TODO: Update time, episode, etc (like in Synchronization)?
78 | 
79 |         with tf.control_dependencies(control_inputs=(applied,)):
80 |             return [local_delta + update_delta for local_delta, update_delta in zip(local_deltas, update_deltas)]
81 | 


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/lr_decay/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/lr_decay/__init__.py


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/lr_decay/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/lr_decay/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/lr_decay/__pycache__/tf_schedules.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/lr_decay/__pycache__/tf_schedules.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/lr_decay/tf_schedules.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 |     desc: learning rate decayer.
  5 |     created: 2017.12.27
  6 |     @author: sam.dm
  7 | """
  8 | 
  9 | import tensorflow as tf
 10 | from tensorforce import util, TensorforceError
 11 | 
 12 | 
 13 | def from_spec(spec, kwargs=None):
 14 |     lr_schedule = util.get_object(
 15 |         obj=spec,
 16 |         predefined_objects=lr_schedulers,
 17 |         kwargs=kwargs
 18 |     )
 19 |     assert isinstance(lr_schedule, DecaySchedule)
 20 | 
 21 |     return lr_schedule
 22 | 
 23 | 
 24 | def add_lr_decay(spec, global_step, kwargs=None):
 25 |     """
 26 |     Creates an learning rate decayed instance from a optimizer specification dict.
 27 |     """
 28 | 
 29 |     def parse_decay_conf(optimizer_spec, global_step):
 30 |         lr = optimizer_spec['learning_rate']
 31 |         lr_schedule = optimizer_spec['lr_schedule']
 32 |         if lr_schedule is None:
 33 |             del optimizer_spec['lr_schedule']
 34 |             return optimizer_spec
 35 |         lr_schedule['global_step'] = global_step
 36 |         lr_decay_obj = from_spec(lr_schedule)
 37 |         optimizer_spec['learning_rate'] = lr_decay_obj(value=lr)
 38 |         pop_value = optimizer_spec.pop('lr_schedule', None)
 39 |         return optimizer_spec
 40 | 
 41 |     if 'optimizer' in spec:
 42 |         optimizer_spec = spec['optimizer']
 43 |         if 'learning_rate' in optimizer_spec and 'lr_schedule' in optimizer_spec:
 44 |             spec['optimizer'] = parse_decay_conf(optimizer_spec, global_step)
 45 | 
 46 |     elif 'learning_rate' in spec and 'lr_schedule' in spec:
 47 |         spec = parse_decay_conf(spec, global_step)
 48 | 
 49 |     return spec
 50 | 
 51 | class DecaySchedule(object):
 52 | 
 53 |     def __call(self, value):
 54 | 
 55 |         raise NotImplementedError()
 56 | 
 57 | class Constant(DecaySchedule):
 58 |     def __init__(self, global_step=None):
 59 |         """
 60 |             decayed_value = value
 61 |         """
 62 | 
 63 |         self._global_step = global_step
 64 | 
 65 |     def __call__(self, value):
 66 | 
 67 |         return value
 68 | 
 69 | 
 70 | class TFExponentialDecay(DecaySchedule):
 71 |     def __init__(self, global_step, decay_steps=20000, decay_rate=0.96, staircase=False):
 72 |         """
 73 |             decayed_value = value * decay_rate ^ (global_step / decay_steps)
 74 |         """
 75 | 
 76 |         self._global_step = global_step
 77 |         self._decay_steps = decay_steps
 78 |         self._decay_rate = decay_rate
 79 |         self._staircase = staircase
 80 | 
 81 |     def __call__(self, value):
 82 | 
 83 |         decayed_value = tf.train.exponential_decay(value, self._global_step,
 84 |                                        self._decay_steps, self._decay_rate, self._staircase)
 85 |         return decayed_value
 86 | 
 87 | 
 88 | class TFInverseTimeDecay(DecaySchedule):
 89 |     def __init__(self, global_step, decay_steps=20000, decay_rate=0.96, staircase=False):
 90 |         """
 91 |             decayed_value = value / (1 + decay_rate * t)
 92 |         """
 93 | 
 94 |         self._global_step = global_step
 95 |         self._decay_steps = decay_steps
 96 |         self._decay_rate = decay_rate
 97 |         self._staircase = staircase
 98 | 
 99 |     def __call__(self, value):
100 | 
101 |         decayed_value = tf.train.inverse_time_decay(value, self._global_step,
102 |                                        self._decay_steps, self._decay_rate, self._staircase)
103 |         return decayed_value
104 | 
105 | 
106 | class TFNaturalExpDecay(DecaySchedule):
107 |     def __init__(self, global_step, decay_steps=20000, decay_rate=0.96, staircase=False):
108 |         """
109 |             decayed_value = value * exp(-decay_rate * (global_step / decay_steps))
110 |         """
111 | 
112 |         self._global_step = global_step
113 |         self._decay_steps = decay_steps
114 |         self._decay_rate = decay_rate
115 |         self._staircase = staircase
116 | 
117 |     def __call__(self, value):
118 | 
119 |         decayed_value = tf.train.natural_exp_decay(value, self._global_step,
120 |                                        self._decay_steps, self._decay_rate, self._staircase)
121 |         return decayed_value
122 | 
123 | 
124 | class TFPolynomialDecay(DecaySchedule):
125 |     def __init__(self, global_step, decay_steps=20000, final_value=0.0001,
126 |             power=1.0, cycle=False):
127 |         """
128 |         global_step = min(global_step, decay_steps)
129 |         decayed_final_value = (final_value - final_value) *
130 |                         (1 - global_step / decay_steps) ^ (power) +
131 |                         final_value
132 |         """
133 | 
134 |         self._global_step = global_step
135 |         self._decay_steps = decay_steps
136 |         self._final_value = final_value
137 |         self._power = power
138 |         self._cycle = cycle
139 | 
140 |     def __call__(self, value):
141 |         decayed_value = tf.train.polynomial_decay(value, self._global_step,
142 |                                        self._decay_steps, self._final_value,
143 |                                        self._power, self._cycle)
144 |         return decayed_value
145 | 
146 | class LinearDecay(DecaySchedule):
147 |     def __init__(self, global_step, max_decay_steps=20000, final_value=0.0001):
148 |         """
149 |         decayed_value = init_value + (global_step / max_decay_steps) * (
150 |                        init_value - final_value)
151 |         """
152 | 
153 |         self._global_step = global_step
154 |         self._max_decay_steps = tf.constant(value=max_decay_steps, dtype=tf.int32)
155 |         self._final_value = final_value
156 |         self._first_pass = True
157 | 
158 |     def __call__(self, value):
159 |         if self._first_pass:
160 |             self._init_value = value
161 |             self._first_pass = False
162 | 
163 |         self.fraction = tf.minimum(tf.divide(self._global_step, self._max_decay_steps), 1.0)
164 | 
165 |         return self._init_value + tf.multiply(self.fraction, self._final_value - self._init_value)
166 | 
167 | 
168 | lr_schedulers = {
169 |     "constant": Constant,
170 |     "exp_decay": TFExponentialDecay,
171 |     "natural_exp_decay": TFNaturalExpDecay,
172 |     "inverse_time_decay": TFInverseTimeDecay,
173 |     "polynomial_decay": TFPolynomialDecay,
174 |     "linear_decay": LinearDecay
175 | }
176 | 


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/meta_optimizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import print_function
18 | from __future__ import division
19 | 
20 | from tensorforce.core.optimizers import Optimizer
21 | 
22 | 
23 | class MetaOptimizer(Optimizer):
24 |     """
25 |     A meta optimizer takes the optimization implemented by another optimizer and
26 |     modifies/optimizes its proposed result. For example, line search might be applied to find a
27 |     more optimal step size.
28 |     """
29 | 
30 |     def __init__(self, optimizer, **kwargs):
31 |         """
32 |         Creates a new meta optimizer instance.
33 | 
34 |         Args:
35 |             optimizer: The optimizer which is modified by this meta optimizer.
36 |         """
37 |         super(MetaOptimizer, self).__init__(**kwargs)
38 | 
39 |         self.optimizer = Optimizer.from_spec(spec=optimizer, kwargs=kwargs)
40 | 
41 |     def get_variables(self):
42 |         return super(MetaOptimizer, self).get_variables() + self.optimizer.get_variables()
43 | 


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/optimizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 reinforce.io. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import print_function
 18 | from __future__ import division
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | from tensorforce import util, TensorforceError
 23 | import tensorforce.core.optimizers
 24 | 
 25 | 
 26 | class Optimizer(object):
 27 |     """
 28 |     Generic TensorFlow optimizer which minimizes a not yet further specified expression, usually
 29 |     some kind of loss function. More generally, an optimizer can be considered as some method of
 30 |     updating a set of variables.
 31 |     """
 32 | 
 33 |     def __init__(self, summaries=None, summary_labels=None):
 34 |         """
 35 |         Creates a new optimizer instance.
 36 |         """
 37 |         self.variables = dict()
 38 |         self.summaries = summaries
 39 |         if summary_labels is None:
 40 |             self.summary_labels = dict()
 41 |         else:
 42 |             self.summary_labels = summary_labels
 43 | 
 44 |         def custom_getter(getter, name, registered=False, **kwargs):
 45 |             variable = getter(name=name, registered=True, **kwargs)
 46 |             if not registered:
 47 |                 assert kwargs.get('trainable', False)
 48 |                 self.variables[name] = variable
 49 |             return variable
 50 | 
 51 |         # TensorFlow function
 52 |         self.step = tf.make_template(
 53 |             name_='step',
 54 |             func_=self.tf_step,
 55 |             custom_getter=custom_getter
 56 |         )
 57 | 
 58 |     def tf_step(self, time, variables, **kwargs):
 59 |         """
 60 |         Creates the TensorFlow operations for performing an optimization step.
 61 | 
 62 |         Args:
 63 |             time: Time tensor.
 64 |             variables: List of variables to optimize.
 65 |             **kwargs: Additional arguments depending on the specific optimizer implementation.
 66 |                 For instance, often includes `fn_loss` if a loss function is optimized.
 67 | 
 68 |         Returns:
 69 |             List of delta tensors corresponding to the updates for each optimized variable.
 70 |         """
 71 |         raise NotImplementedError
 72 | 
 73 |     def minimize(self, time, variables, **kwargs):
 74 |         """
 75 |         Performs an optimization step.
 76 | 
 77 |         Args:
 78 |             time: Time tensor.
 79 |             variables: List of variables to optimize.
 80 |             **kwargs: Additional optimizer-specific arguments. The following arguments are used
 81 |                 by some optimizers:
 82 |                 - fn_loss: A callable returning the loss of the current model.
 83 |                 - fn_kl_divergence: A callable returning the KL-divergence relative to the
 84 |                     current model.
 85 |                 - return_estimated_improvement: Returns the estimated improvement resulting from
 86 |                     the natural gradient calculation if true.
 87 |                 - fn_reference: A callable returning the reference values necessary for comparison.
 88 |                 - fn_compare: A callable comparing the current model to the reference model given
 89 |                     by its values.
 90 |                 - source_variables: List of source variables to synchronize with.
 91 |                 - global_variables: List of global variables to apply the proposed optimization
 92 |                     step to.
 93 | 
 94 | 
 95 |         Returns:
 96 |             The optimization operation.
 97 |         """
 98 |         # Add training variable gradient histograms/scalars to summary output
 99 |         #if 'gradients' in self.summary_labels:
100 |         if any(k in self.summary_labels for k in ['gradients', 'gradients_histogram', 'gradients_scalar']):
101 |             valid = True
102 |             if isinstance(self, tensorforce.core.optimizers.TFOptimizer):
103 |                 gradients = self.optimizer.compute_gradients(kwargs['fn_loss']())
104 |             elif isinstance(self.optimizer, tensorforce.core.optimizers.TFOptimizer):
105 |                 ## This section handles "Multi_step" and may handle others
106 |                 #  if failure is found, add another elif to handle that case
107 |                 gradients = self.optimizer.optimizer.compute_gradients(kwargs['fn_loss']())
108 |             else:
109 |                 # Didn't find proper gradient information
110 |                 valid = False
111 | 
112 |             # Valid gradient data found, create summary data items
113 |             if valid:
114 |                 for grad, var in gradients:
115 |                     if grad is not None:
116 |                         if any(k in self.summary_labels for k in ['gradients','gradients_scalar']):
117 |                             axes = list(range(len(grad.shape)))
118 |                             mean, var = tf.nn.moments(grad,axes)
119 |                             summary = tf.summary.scalar(name='gradients/' + var.name+ "/mean", tensor=mean)
120 |                             self.summaries.append(summary)
121 |                             summary = tf.summary.scalar(name='gradients/' + var.name+ "/variance", tensor=var)
122 |                             self.summaries.append(summary)
123 |                         if any(k in self.summary_labels for k in ['gradients', 'gradients_histogram']):
124 |                             summary = tf.summary.histogram(name='gradients/' + var.name, values=grad)
125 |                             self.summaries.append(summary)
126 | 
127 |         deltas = self.step(time=time, variables=variables, **kwargs)
128 |         with tf.control_dependencies(control_inputs=deltas):
129 |             return tf.no_op()
130 | 
131 |     def get_variables(self):
132 |         """
133 |         Returns the TensorFlow variables used by the optimizer.
134 | 
135 |         Returns:
136 |             List of variables.
137 |         """
138 |         return [self.variables[key] for key in sorted(self.variables)]
139 | 
140 |     @staticmethod
141 |     def from_spec(spec, kwargs=None):
142 |         """
143 |         Creates an optimizer from a specification dict.
144 |         """
145 |         optimizer = util.get_object(
146 |             obj=spec,
147 |             predefined_objects=tensorforce.core.optimizers.optimizers,
148 |             kwargs=kwargs
149 |         )
150 |         assert isinstance(optimizer, Optimizer)
151 |         return optimizer
152 | 
153 |     def apply_step(self, variables, deltas):
154 |         """
155 |         Applies step deltas to variable values.
156 | 
157 |         Args:
158 |             variables: List of variables.
159 |             deltas: List of deltas of same length.
160 | 
161 |         Returns:
162 |             The step-applied operation.
163 |         """
164 |         if len(variables) != len(deltas):
165 |             raise TensorforceError("Invalid variables and deltas lists.")
166 |         return tf.group(*(variable.assign_add(delta=delta) for variable, delta in zip(variables, deltas)))
167 | 


--------------------------------------------------------------------------------
/tensorforce/core/optimizers/tf_optimizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 reinforce.io. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import print_function
 18 | from __future__ import division
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | from tensorforce.core.optimizers import Optimizer
 23 | 
 24 | 
 25 | class TFOptimizer(Optimizer):
 26 |     """
 27 |     Wrapper class for TensorFlow optimizers.
 28 |     """
 29 | 
 30 |     tf_optimizers = dict(
 31 |         adadelta=tf.train.AdadeltaOptimizer,
 32 |         adagrad=tf.train.AdagradOptimizer,
 33 |         adam=tf.train.AdamOptimizer,
 34 |         nadam=tf.contrib.opt.NadamOptimizer,
 35 |         gradient_descent=tf.train.GradientDescentOptimizer,
 36 |         momentum=tf.train.MomentumOptimizer,
 37 |         rmsprop=tf.train.RMSPropOptimizer
 38 |     )
 39 | 
 40 |     @staticmethod
 41 |     def get_wrapper(optimizer):
 42 |         """
 43 |         Returns a TFOptimizer constructor callable for the given optimizer name.
 44 | 
 45 |         Args:
 46 |             optimizer: The name of the optimizer, one of 'adadelta', 'adagrad', 'adam', 'nadam',
 47 |             'gradient_descent', 'momentum', 'rmsprop'.
 48 | 
 49 |         Returns:
 50 |             The TFOptimizer constructor callable.
 51 |         """
 52 |         def wrapper(**kwargs):
 53 |             return TFOptimizer(optimizer=optimizer, **kwargs)
 54 |         return wrapper
 55 | 
 56 |     def __init__(self, optimizer, summaries=None, summary_labels=None, **kwargs):
 57 |         """
 58 |         Creates a new optimizer instance of a TensorFlow optimizer.
 59 | 
 60 |         Args:
 61 |             optimizer: The name of the optimizer, one of 'adadelta', 'adagrad', 'adam', 'nadam',
 62 |             'gradient_descent', 'momentum', 'rmsprop'.
 63 |             **kwargs: Additional arguments passed on to the TensorFlow optimizer constructor.
 64 |         """
 65 |         super(TFOptimizer, self).__init__(summaries=summaries, summary_labels=summary_labels)
 66 | 
 67 |         self.name = optimizer
 68 |         self.optimizer = TFOptimizer.tf_optimizers[optimizer](**kwargs)
 69 | 
 70 |     def tf_step(self, time, variables, fn_loss, **kwargs):
 71 |         """
 72 |         Creates the TensorFlow operations for performing an optimization step.
 73 | 
 74 |         Args:
 75 |             time: Time tensor.
 76 |             variables: List of variables to optimize.
 77 |             fn_loss: A callable returning the loss of the current model.
 78 |             gradients: Gradients for update the variables when no fn_loss be given.
 79 |             **kwargs: Additional arguments, not used.
 80 | 
 81 |         Returns:
 82 |             List of delta tensors corresponding to the updates for each optimized variable.
 83 |         """
 84 |         if fn_loss is not None:
 85 |             loss = fn_loss()
 86 |         else:
 87 |             gradients = kwargs.get("gradients", None)
 88 |             assert gradients is not None and len(gradients) == len(variables)
 89 |             loss = tf.no_op()
 90 | 
 91 |         with tf.control_dependencies(control_inputs=(loss,)):
 92 |             # Trivial operation to enforce control dependency
 93 |             vars_before = [var + 0.0 for var in variables]
 94 | 
 95 |         with tf.control_dependencies(control_inputs=vars_before):
 96 |             if fn_loss is not None:
 97 |                 applied = self.optimizer.minimize(loss=loss, var_list=variables)
 98 |             else:
 99 |                 applied = self.optimizer.apply_gradients(zip(gradients, variables))
100 | 
101 |         with tf.control_dependencies(control_inputs=(applied,)):
102 |             return [var - var_before for var, var_before in zip(variables, vars_before)]
103 | 
104 |     def get_variables(self):
105 |         optimizer_variables = super(TFOptimizer, self).get_variables()
106 | 
107 |         slots_variables = [
108 |             self.optimizer._slots[slot][key]
109 |             for slot in sorted(self.optimizer._slots)
110 |             for key in sorted(self.optimizer._slots[slot])
111 |         ]
112 | 
113 |         if self.name in ('adam', 'nadam'):
114 |             additional_variables = [self.optimizer._beta1_power, self.optimizer._beta2_power]
115 |         else:
116 |             additional_variables = list()
117 | 
118 |         return optimizer_variables + slots_variables + additional_variables
119 | 


--------------------------------------------------------------------------------
/tensorforce/core/preprocessing/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from tensorforce.core.preprocessing.preprocessor import Preprocessor
17 | from tensorforce.core.preprocessing.standardize import Standardize
18 | from tensorforce.core.preprocessing.preprocessor_stack import PreprocessorStack
19 | 
20 | 
21 | preprocessors = dict(
22 |     standardize=Standardize,
23 | )
24 | 
25 | 
26 | __all__ = [
27 |     'Preprocessor',
28 |     'Standardize',
29 |     'PreprocessorStack',
30 |     'preprocessors'
31 | ]
32 | 


--------------------------------------------------------------------------------
/tensorforce/core/preprocessing/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/preprocessing/__pycache__/clip.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/clip.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/preprocessing/__pycache__/divide.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/divide.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/preprocessing/__pycache__/grayscale.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/grayscale.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/preprocessing/__pycache__/image_resize.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/image_resize.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/preprocessing/__pycache__/normalize.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/normalize.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/preprocessing/__pycache__/preprocessor.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/preprocessor.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/preprocessing/__pycache__/preprocessor_stack.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/preprocessor_stack.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/preprocessing/__pycache__/running_standardize.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/running_standardize.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/preprocessing/__pycache__/standardize.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/standardize.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/core/preprocessing/preprocessor.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import print_function
18 | from __future__ import division
19 | import tensorflow as tf
20 | 
21 | 
22 | class Preprocessor(object):
23 | 
24 |     def __init__(self, scope='preprocessor', summary_labels=None):
25 |         self.summary_labels = set(summary_labels or ())
26 |         self.variables = dict()
27 |         self.summaries = list()
28 | 
29 |         def custom_getter(getter, name, registered=False, **kwargs):
30 |             variable = getter(name=name, registered=True, **kwargs)
31 |             if not registered:
32 |                 self.variables[name] = variable
33 |             return variable
34 | 
35 |         self.process = tf.make_template(
36 |             name_=(scope + '/process'),
37 |             func_=self.tf_process,
38 |             custom_getter_=custom_getter
39 |         )
40 | 
41 |     def reset(self):
42 |         pass
43 | 
44 |     def tf_process(self, tensor):
45 |         """
46 |         Process state.
47 | 
48 |         Args:
49 |             tensor: tensor to process.
50 | 
51 |         Returns: processed tensor.
52 |         """
53 |         return tensor
54 | 
55 |     def processed_shape(self, shape):
56 |         """
57 |         Shape of preprocessed state given original shape.
58 | 
59 |         Args:
60 |             shape: original shape.
61 | 
62 |         Returns: processed tensor shape
63 |         """
64 |         return shape
65 | 
66 |     def get_variables(self):
67 |         """
68 |         Returns the TensorFlow variables used by the preprocessor.
69 | 
70 |         Returns:
71 |             List of variables.
72 |         """
73 |         return [self.variables[key] for key in sorted(self.variables)]
74 | 


--------------------------------------------------------------------------------
/tensorforce/core/preprocessing/preprocessor_stack.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import print_function
18 | from __future__ import division
19 | 
20 | from tensorforce import util
21 | from tensorforce.core.preprocessing import Preprocessor
22 | import tensorforce.core.preprocessing
23 | 
24 | 
25 | class PreprocessorStack(object):
26 | 
27 |     def __init__(self):
28 |         self.preprocessors = list()
29 | 
30 |     def reset(self):
31 |         for processor in self.preprocessors:
32 |             processor.reset()
33 | 
34 |     def process(self, tensor):
35 |         """
36 |         Process state.
37 | 
38 |         Args:
39 |             tensor: tensor to process
40 | 
41 |         Returns: processed state
42 | 
43 |         """
44 |         for processor in self.preprocessors:
45 |             tensor = processor.process(tensor=tensor)
46 |         return tensor
47 | 
48 |     def processed_shape(self, shape):
49 |         """
50 |         Shape of preprocessed state given original shape.
51 | 
52 |         Args:
53 |             shape: original state shape
54 | 
55 |         Returns: processed state shape
56 |         """
57 |         for processor in self.preprocessors:
58 |             shape = processor.processed_shape(shape=shape)
59 |         return shape
60 | 
61 |     def get_variables(self):
62 |         return [variable for preprocessor in self.preprocessors for variable in preprocessor.get_variables()]
63 | 
64 |     @staticmethod
65 |     def from_spec(spec):
66 |         """
67 |         Creates a preprocessing stack from a specification dict.
68 |         """
69 |         if isinstance(spec, dict):
70 |             spec = [spec]
71 | 
72 |         stack = PreprocessorStack()
73 |         for spec in spec:
74 |             preprocessor = util.get_object(
75 |                 obj=spec,
76 |                 predefined_objects=tensorforce.core.preprocessing.preprocessors
77 |             )
78 |             assert isinstance(preprocessor, Preprocessor)
79 |             stack.preprocessors.append(preprocessor)
80 | 
81 |         return stack
82 | 


--------------------------------------------------------------------------------
/tensorforce/core/preprocessing/standardize.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | import numpy as np
21 | import tensorflow as tf
22 | 
23 | from tensorforce import util
24 | from tensorforce.core.preprocessing import Preprocessor
25 | 
26 | 
27 | class Standardize(Preprocessor):
28 |     """
29 |     Standardize state. Subtract mean and divide by standard deviation.
30 |     """
31 | 
32 |     def __init__(self, mean=None, var=None, across_batch=False, scope='standardize', summary_labels=()):
33 |         self.across_batch = across_batch
34 |         self.mean = mean
35 |         self.var = var
36 | 
37 |         super(Standardize, self).__init__(scope=scope, summary_labels=summary_labels)
38 | 
39 |     def tf_process(self, tensor):
40 |         if self.mean is not None and self.var is not None:
41 |             return (tensor - self.mean) / (self.var + util.epsilon)
42 | 
43 |         if self.across_batch:
44 |             axes = tuple(range(util.rank(tensor)))
45 |         else:
46 |             axes = tuple(range(1, util.rank(tensor)))
47 | 
48 |         mean, variance = tf.nn.moments(x=tensor, axes=axes, keep_dims=True)
49 |         return (tensor - mean) / tf.maximum(x=tf.sqrt(variance), y=util.epsilon)
50 | 


--------------------------------------------------------------------------------
/tensorforce/environments/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | 
17 | from tensorforce.environments.environment import Environment
18 | from tensorforce.environments.meta_environment import MetaEnvironment
19 | from tensorforce.environments.classic_control import CartPole
20 | from tensorforce.environments.classic_control import Pendulum
21 | import six
22 | 
23 | environments = dict(
24 |     cart_pole=CartPole,
25 |     pendulum=Pendulum
26 | )
27 | 
28 | __all__ = ['Environment', 'MetaEnvironment', 'Pendulum', 'CartPole']


--------------------------------------------------------------------------------
/tensorforce/environments/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/environments/__pycache__/environment.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/__pycache__/environment.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/environments/__pycache__/gym_environment.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/__pycache__/gym_environment.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/environments/__pycache__/meta_environment.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/__pycache__/meta_environment.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/environments/__pycache__/minimal_test.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/__pycache__/minimal_test.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/environments/__pycache__/oss_environment.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/__pycache__/oss_environment.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/environments/__pycache__/table_environment.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/__pycache__/table_environment.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/environments/classic_control/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | """
 4 |     desc: classic control environments.
 5 |     create: 2017.12.19
 6 |     modified by @sam.dm
 7 | """
 8 | 
 9 | 
10 | from tensorforce.environments.classic_control.cart_pole import CartPole
11 | from tensorforce.environments.classic_control.pendulum import Pendulum
12 | 
13 | environments = dict(
14 |     cart_pole=CartPole,
15 |     pendulum=Pendulum,
16 | )
17 | 
18 | __all__ = ['Pendulum', 'CartPole']
19 | 


--------------------------------------------------------------------------------
/tensorforce/environments/classic_control/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/classic_control/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/environments/classic_control/__pycache__/cart_pole.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/classic_control/__pycache__/cart_pole.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/environments/classic_control/__pycache__/pendulum.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/classic_control/__pycache__/pendulum.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/environments/classic_control/cart_pole.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | """
  4 |     desc: classic cart-pole.
  5 |     create: 2017.12.11
  6 |     author: @sam.dm
  7 | """
  8 | 
  9 | 
 10 | import math
 11 | import numpy as np
 12 | import tensorforce.core.lib.env_seeding as seeding
 13 | from tensorforce.environments import Environment
 14 | 
 15 | 
 16 | class CartPole(Environment):
 17 | 
 18 |     def __init__(self):
 19 |         self.gravity = 9.8
 20 |         self.masscart = 1.0
 21 |         self.masspole = 0.1
 22 |         self.total_mass = (self.masspole + self.masscart)
 23 |         self.length = 0.5
 24 |         self.polemass_length = (self.masspole * self.length)
 25 |         self.force_mag = 10.0
 26 |         self.tau = 0.02
 27 | 
 28 |         # Angle at which to fail the episode
 29 |         self.theta_threshold_radians = 12 * 2 * math.pi / 360
 30 |         self.x_threshold = 2.4
 31 |         self.high = np.array([
 32 |             self.x_threshold * 2,
 33 |             np.finfo(np.float32).max,
 34 |             self.theta_threshold_radians * 2,
 35 |             np.finfo(np.float32).max])
 36 | 
 37 |         self.seed()
 38 |         self.state = None
 39 |         self.steps_beyond_done = None
 40 | 
 41 |     def __str__(self):
 42 |         return "CartPole"
 43 | 
 44 |     def seed(self, seed=None):
 45 |         self.np_random, seed = seeding.np_random(seed)
 46 |         return [seed]
 47 | 
 48 |     def execute(self, actions):
 49 |         assert self._action_contains(actions), "%r (%s) invalid"%(actions, type(actions))
 50 |         state = self.state
 51 |         x, x_dot, theta, theta_dot = state
 52 |         force = self.force_mag if actions==1 else -self.force_mag
 53 |         costheta = math.cos(theta)
 54 |         sintheta = math.sin(theta)
 55 |         temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
 56 |         thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
 57 |         xacc  = temp - self.polemass_length * thetaacc * costheta / self.total_mass
 58 |         x  = x + self.tau * x_dot
 59 |         x_dot = x_dot + self.tau * xacc
 60 |         theta = theta + self.tau * theta_dot
 61 |         theta_dot = theta_dot + self.tau * thetaacc
 62 |         self.state = (x,x_dot,theta,theta_dot)
 63 |         done =  x < -self.x_threshold \
 64 |                 or x > self.x_threshold \
 65 |                 or theta < -self.theta_threshold_radians \
 66 |                 or theta > self.theta_threshold_radians
 67 |         done = bool(done)
 68 | 
 69 |         if not done:
 70 |             reward = 1.0
 71 |         elif self.steps_beyond_done is None:
 72 |             # Pole just fell!
 73 |             self.steps_beyond_done = 0
 74 |             reward = 1.0
 75 |         else:
 76 |             if self.steps_beyond_done == 0:
 77 |                 print("You are calling 'step()' even though this environment    \
 78 |                        has already returned done = True. You should always call \
 79 |                        'reset()' once you receive 'done = True' -- any further  \
 80 |                        steps are undefined behavior.")
 81 |             self.steps_beyond_done += 1
 82 |             reward = 0.0
 83 | 
 84 |         return np.array(self.state), done, reward
 85 | 
 86 |     def reset(self):
 87 |         self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
 88 |         self.steps_beyond_done = None
 89 |         return np.array(self.state)
 90 | 
 91 |     def close(self):
 92 |         self.state = None
 93 |         self.steps_beyond_done = None
 94 | 
 95 |     def _state_contains(self, state):
 96 |         cons = [np.abs(x)<=y for x,y,z in zip(state, self.high)]
 97 | 
 98 |         return all(cons)
 99 | 
100 |     def _action_contains(self, action):
101 |         cons = action>=0 and action < 2
102 | 
103 |         return cons
104 | 
105 |     @property
106 |     def state_space(self):
107 |         state = dict(shape=4, type='float')
108 | 
109 |         return state
110 | 
111 |     @property
112 |     def action_space(self):
113 |         action = dict(type='int', num_actions=2)
114 | 
115 |         return action
116 | 


--------------------------------------------------------------------------------
/tensorforce/environments/classic_control/pendulum.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 |     desc : the pendulum emulator
 5 |     create: 2017.12.11
 6 |     @author: sam.dm
 7 | """
 8 | 
 9 | import numpy as np
10 | import tensorforce.core.lib.env_seeding as seeding
11 | from tensorforce.environments import Environment
12 | 
13 | class Pendulum(Environment):
14 |     def __init__(self):
15 |         self.max_speed = 8
16 |         self.max_torque = 2.0
17 |         self.dt = 0.05
18 | 
19 |         self.high = np.array([1., 1., self.max_speed])
20 | 
21 |         self.seed()
22 | 
23 |     def __str__(self):
24 |         return "Pendulum"
25 | 
26 |     def seed(self, seed=None):
27 |         self.np_random, seed = seeding.np_random(seed)
28 |         return [seed]
29 | 
30 |     def execute(self,actions):
31 |         th, thdot = self.state # th := theta
32 | 
33 |         g, m, l = 10.0, 1.0, 1.0
34 |         dt = self.dt
35 | 
36 |         action = np.clip(actions, -self.max_torque, self.max_torque)[0]
37 |         costs = self._angle_normalize(th)**2 + .1*thdot**2 + .001*(action**2)
38 | 
39 |         newthdot = thdot + (-3*g/(2*l) * np.sin(th + np.pi) + 3./(m*l**2)*action) * dt
40 |         newth = th + newthdot*dt
41 |         newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
42 | 
43 |         self.state = np.array([newth, newthdot])
44 |         return self._get_obs(), False, -costs
45 | 
46 |     def reset(self):
47 |         high = np.array([np.pi, 1])
48 |         self.state = self.np_random.uniform(low=-high, high=high)
49 |         return self._get_obs()
50 | 
51 |     def _get_obs(self):
52 |         theta, thetadot = self.state
53 |         return np.array([np.cos(theta), np.sin(theta), thetadot])
54 | 
55 |     def _angle_normalize(self, x):
56 |         return (((x+np.pi) % (2*np.pi)) - np.pi)
57 | 
58 |     @property
59 |     def state_space(self):
60 |         state = dict(shape=3, type='float')
61 | 
62 |         return state
63 | 
64 |     @property
65 |     def action_space(self):
66 |         action = dict(type='float', min_value=-self.max_torque, max_value=self.max_torque)
67 | 
68 |         return action
69 | 
70 |     def state_contains(self, state):
71 |         cons = [np.abs(x)<=y for x,y,z in zip(state, self.high)]
72 |         return all(cons)
73 | 
74 |     def action_contains(self, action):
75 |         cons = np.abs(action[0]) <= self.max_torque
76 | 
77 |         return cons
78 | 
79 |     def close(self):
80 |         self.state = None
81 | 


--------------------------------------------------------------------------------
/tensorforce/environments/environment.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import print_function
19 | from __future__ import division
20 | 
21 | 
22 | class Environment(object):
23 |     """
24 |     Base environment class.
25 |     """
26 | 
27 |     def __str__(self):
28 |         raise NotImplementedError
29 | 
30 |     def close(self):
31 |         """
32 |         Close environment. No other method calls possible afterwards.
33 |         """
34 |         pass
35 | 
36 |     def seed(self, seed):
37 |         """
38 |         Sets the random seed of the environment to the given value (current time, if seed=None).
39 |         Naturally deterministic Environments (e.g. ALE or some gym Envs) don't have to implement this method.
40 |         Args:
41 |             seed (int): The seed to use for initializing the pseudo-random number generator (default=epoch time in sec).
42 |         Returns: The actual seed (int) used OR None if Environment did not override this method (no seeding supported).
43 |         """
44 |         return None
45 | 
46 |     def reset(self):
47 |         """
48 |         Reset environment and setup for new episode.
49 | 
50 |         Returns:
51 |             initial state of reset environment.
52 |         """
53 |         raise NotImplementedError
54 | 
55 |     def execute(self, actions):
56 |         """
57 |         Executes action, observes next state(s) and reward.
58 | 
59 |         Args:
60 |             actions: Actions to execute.
61 | 
62 |         Returns:
63 |             (Dict of) next state(s), boolean indicating terminal, and reward signal.
64 |         """
65 |         raise NotImplementedError
66 | 
67 |     @property
68 |     def states(self):
69 |         """
70 |         Return the state space. Might include subdicts if multiple states are available simultaneously.
71 | 
72 |         Returns: dict of state properties (shape and type).
73 | 
74 |         """
75 |         raise NotImplementedError
76 | 
77 |     @property
78 |     def actions(self):
79 |         """
80 |         Return the action space. Might include subdicts if multiple actions are available simultaneously.
81 | 
82 |         Returns: dict of action properties (continuous, number of actions)
83 | 
84 |         """
85 |         raise NotImplementedError
86 | 


--------------------------------------------------------------------------------
/tensorforce/environments/meta_environment.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import print_function
18 | from __future__ import division
19 | 
20 | from tensorforce.environments.environment import Environment
21 | from tensorforce.exception import TensorforceError
22 | 
23 | 
24 | class MetaEnvironment(Environment):
25 |     """
26 |     Base class for unified IO interface
27 |     """
28 | 
29 |     def __init__(self, config):
30 |         super(MetaEnvironment, self).__init__()
31 |         self._parse(config)
32 | 
33 |     def _parse(self, config):
34 |         """
35 |         Base class for configuration parsing
36 |         """
37 |         # get the type of IO,
38 |         # optional params are ('Table','DataHub','Gym','Universe','UserDef')
39 |         if not 'env_type' in config:
40 |             raise TensorforceError('can not find env_type in configuration')
41 |         self.env_type = config['env_type']
42 | 
43 |         if 'env' in config:
44 |             self.env_conf = config['env']
45 |         else:
46 |             raise TensorforceError('can not find env config')
47 | 
48 |         # whether task is in mode of interaction
49 |         # default mode is non-interaction
50 |         self.interactive = False
51 |         if 'interactive' in self.env_conf:
52 |             self.interactive = self.env_conf['interactive']
53 | 
54 | 
55 |     def parse_env_config(self):
56 |         """
57 |         IO specific parsing function
58 |         """
59 |         raise NotImplementedError()
60 | 
61 |     def get_input_tensor(self):
62 |         """
63 |         Init a dict of single state_input tensor,action tensor,reward tensor
64 |         or a dict of state_input tensor if multi-states are provided
65 |         the return will be used to initialize the agent
66 |         """
67 |         raise NotImplementedError()
68 | 
69 |     def read(self):
70 |         """
71 |         Read a batch data for model update
72 |         this method only be used in mode of non-interaction
73 |         Call execute in mode of interaction
74 |         """
75 |         raise NotImplementedError()
76 | 
77 |     def should_stop(self):
78 |         """
79 |         In mode of non-interaction,
80 |         should_stop() will be called in Runner.consumer() to determine whether to end the trianing loop
81 |         this method only be used in mode of non-interaction
82 |         """
83 |         raise NotImplementedError()


--------------------------------------------------------------------------------
/tensorforce/exception.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import print_function
19 | from __future__ import division
20 | 
21 | 
22 | class TensorforceError(Exception):
23 |     """
24 |     tensorforce error
25 |     """
26 |     pass
27 | 


--------------------------------------------------------------------------------
/tensorforce/meta_parameter_recorder.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 reinforce.io. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | import inspect
 17 | import os
 18 | import numpy as np
 19 | import tensorflow as tf
 20 | from tensorforce import TensorforceError
 21 | 
 22 | 
 23 | class MetaParameterRecorder(object):
 24 |     """
 25 |     Class to record MetaParameters as well as Summary/Description for TensorBoard (TEXT & FILE will come later)
 26 | 
 27 |     #### General:
 28 | 
 29 |     * format_type: used to configure data conversion for TensorBoard=0, TEXT & JSON (not Implemented), etc
 30 |     """
 31 | 
 32 |     def __init__(self, current_frame):
 33 |         """
 34 |         Init the MetaPrameterRecord with "Agent" parameters by passing inspect.currentframe() from Agent Class
 35 | 
 36 |         The Init will search back to find the parent class to capture all passed parameters and store
 37 |         them in "self.meta_params".
 38 | 
 39 |         NOTE: Currently only optimized for TensorBoard output
 40 | 
 41 |         TODO: Add JSON Export, TEXT EXPORT
 42 | 
 43 |         Args:
 44 |             current_frame: frame value from class to obtain metaparameters[= inspect.currentframe()]
 45 | 
 46 |         """
 47 |         self.ignore_unknown_dtypes = False
 48 |         self.meta_params = dict()
 49 |         self.method_calling  = inspect.getframeinfo(current_frame)[2]
 50 | 
 51 |         _, _, __, self.vals_current = inspect.getargvalues(current_frame)
 52 |         # self is the class name of the frame involved
 53 |         if 'self' in self.vals_current:
 54 |             self.recorded_class_type = self.vals_current['self']
 55 |             # Add explicit AgentName item so class can be deleted
 56 |             self.meta_params['AgentName'] = str(self.vals_current['self'])
 57 | 
 58 |         frame_list = inspect.getouterframes(current_frame)
 59 | 
 60 |         for frame in frame_list:
 61 |             # Rather than frame.frame (named tuple), use [0] for python2
 62 |             args, varargs, keywords, vals =inspect.getargvalues(frame[0])
 63 |             if 'self' in vals:
 64 |                 if self.recorded_class_type == vals['self']:
 65 |                     for i in args:
 66 |                         self.meta_params[i] = vals[i]
 67 |         # Remove the "CLASS" from the dictionary, has no value "AgentName" contains STR of Class
 68 |         del self.meta_params['self']
 69 | 
 70 |     def merge_custom(self, custom_dict):
 71 |         if type(custom_dict) is not dict:
 72 |             raise TensorforceError(
 73 |                 "Error:  MetaParameterRecorder 'meta_dict' must be passed a dictionary "
 74 |                 "but was passed a type {} which is not supported.".format(str(type(custom_dict)))
 75 |             )
 76 |         for key in custom_dict:
 77 |             if key in self.meta_params:
 78 |                 raise TensorforceError(
 79 |                     "Error:  MetaParameterRecorder 'meta_dict' key {} conflicts with internal key,"
 80 |                     " please change passed key.".format(str(key))
 81 |                 )
 82 |             self.meta_params[key] = custom_dict[key]
 83 |         # This line assumes the merge data came from summary_spec['meta_dict'], remove this from summary_spec
 84 |         del self.meta_params['summary_spec']['meta_dict']
 85 | 
 86 |     def text_output(self, format_type=1):
 87 |         print('======================= ' + self.meta_params['AgentName'] + ' ====================================')
 88 |         for key in self.meta_params:
 89 |             print(
 90 |                 "    ",
 91 |                 key,
 92 |                 type(self.meta_params[key]),
 93 |                 "=",
 94 |                 self.convert_data_to_string(self.meta_params[key], format_type=format_type)
 95 |             )
 96 | 
 97 |         print('======================= ' + self.meta_params['AgentName'] + ' ====================================')
 98 | 
 99 |     def convert_dictionary_to_string(self, data, indent=0, format_type=0, separator=None, eol=None):
100 |         data_string = ""
101 |         add_separator = ""
102 |         if eol is None:
103 |             eol = os.linesep
104 |         if separator is None:
105 |             separator = ", "
106 | 
107 |         # This should not ever occur but here as a catch
108 |         if type(data) is not dict:
109 |             raise TensorforceError(
110 |                 "Error:  MetaParameterRecorder Dictionary conversion was passed a type {}"
111 |                 " not supported.".format(str(type(data)))
112 |             )
113 | 
114 |         # TensorBoard
115 |         if format_type == 0:
116 |             label = ""
117 |             div = ""
118 | 
119 |             if indent > 0:
120 |                 label = "    | "
121 |                 div = "--- | "
122 |             data_string += label + "Key | Value" + eol + div + "--- | ----" + eol
123 | 
124 |         for key in data:
125 |             key_txt = key
126 |             # TensorBoard
127 |             if format_type == 0:
128 |                 key_txt = "**" + key + "**"
129 |                 key_value_sep = ' | '
130 |                 if indent > 0:
131 |                     key_txt = "    | " + key_txt
132 | 
133 |             converted_data = self.convert_data_to_string(data[key], separator=separator, indent=indent+1)
134 |             data_string += add_separator + key_txt + key_value_sep + converted_data + eol
135 | 
136 |         return data_string
137 | 
138 |     def convert_list_to_string(self, data, indent=0, format_type=0, eol=None, count=True):
139 |         data_string = ""
140 |         if eol is None:
141 |             eol = os.linesep
142 | 
143 |         # This should not ever occur but here as a catch
144 |         if type(data) is not list:
145 |             raise TensorforceError(
146 |                 "Error:  MetaParameterRecorder List conversion was passed a type {}"
147 |                 " not supported.".format(str(type(data)))
148 |             )
149 | 
150 |         for index,line in enumerate(data):
151 |             data_string_prefix = ""
152 |             if count and indent == 0:
153 |                 data_string_prefix = str(index+1)+". "
154 |             # TensorBoard
155 |             if format_type == 0:
156 |                 # Only add indent for 2nd item and beyond as this is likely a dictionary entry
157 |                 if indent > 0 and index>0:
158 |                     data_string_prefix = "    | "+data_string_prefix
159 |             if index == (len(data)-1):
160 |                 append_eol = ""
161 |             else:
162 |                 append_eol = eol
163 |             data_string += data_string_prefix + self.convert_data_to_string(line, indent=indent+1) + append_eol
164 | 
165 |         return data_string
166 | 
167 |     def convert_ndarray_to_md(self, data, format_type=0, eol=None):
168 |         data_string = ""
169 |         data_string1 = "|Row|"
170 |         data_string2 = "|:---:|"
171 |         if eol is None:
172 |             eol = os.linesep
173 | 
174 |         # This should not ever occur but here as a catch
175 |         if type(data) is not np.ndarray:
176 |             raise TensorforceError(
177 |                 "Error:  MetaParameterRecorder ndarray conversion was passed"
178 |                 " a type {} not supported.".format(str(type(data)))
179 |             )
180 | 
181 |         shape = data.shape
182 |         rank = data.ndim
183 | 
184 |         if rank == 2:
185 |             for col in range(shape[1]):
186 |                 data_string1 += "Col-" + str(col) + "|"
187 |                 data_string2 += ":----:|"
188 |             data_string += data_string1 + eol + data_string2 + eol
189 | 
190 |             for row in range(shape[0]):
191 |                 data_string += "|" + str(row) + "|"
192 |                 for col in range(shape[1]):
193 |                     data_string += str(data[row,col]) + "|"
194 | 
195 |                 if row != (shape[0]-1):
196 |                     data_string += eol
197 | 
198 |         elif rank == 1:
199 |             data_string += "|Row|Col-0|" + eol + "|:----:|:----:|" + eol
200 | 
201 |             for row in range(shape[0]):
202 |                 data_string += str(row) + "|" + str(data[row]) + "|" + eol
203 | 
204 |         return data_string
205 | 
206 |     def convert_data_to_string(self, data, indent=0, format_type=0, separator=None, eol=None):
207 |         data_string = ""
208 |         if type(data) is int:
209 |             data_string = str(data)
210 |         elif type(data) is float:
211 |             data_string = str(data)
212 |         elif type(data) is str:
213 |             data_string = data
214 |         elif type(data) is tuple:
215 |             data_string = str(data)
216 |         elif type(data) is list:
217 |             data_string = self.convert_list_to_string(data, indent=indent, eol=eol)
218 |         elif type(data) is bool:
219 |             data_string = str(data)
220 |         elif type(data) is dict:
221 |             data_string = self.convert_dictionary_to_string(data, indent=indent, separator=separator)
222 |         elif type(data) is np.ndarray:
223 |             # TensorBoard
224 |             if format_type == 0:
225 |                 data_string = self.convert_ndarray_to_md(data)
226 |             else:
227 |                 data_string = str(data)
228 |         elif data is None:
229 |             data_string = "None"
230 |         else:
231 |             if not self.ignore_unknown_dtypes:
232 |                 data_string = "Error:  MetaParameterRecorder Type conversion from type {} not supported.".\
233 |                     format(str(type(data)))
234 |                 data_string += " ("+str(data)+") "
235 |             else:
236 |                 # TensorBoard
237 |                 if format_type == 0:
238 |                     data_string = "**?**"
239 | 
240 |         return data_string
241 | 
242 |     def build_metagraph_list(self):
243 |         """
244 |         Convert MetaParams into TF Summary Format and create summary_op
245 | 
246 |         Args:
247 |             None
248 | 
249 |         Returns:
250 |             Merged TF Op for TEXT summary elements, should only be executed once to reduce data duplication
251 | 
252 |         """
253 |         ops = []
254 | 
255 |         self.ignore_unknown_dtypes = True
256 |         for key in sorted(self.meta_params):
257 |             value = self.convert_data_to_string(self.meta_params[key])
258 | 
259 |             if len(value) == 0:
260 |                 continue
261 |             if isinstance(value,str):
262 |                 ops.append(tf.summary.text(key, tf.convert_to_tensor(str(value))))
263 |             else:
264 |                 ops.append(tf.summary.text(key, tf.as_string(tf.convert_to_tensor(value))))
265 | 
266 |         with tf.control_dependencies(tf.tuple(ops)):
267 |             self.summary_merged = tf.summary.merge_all()
268 | 
269 |         return self.summary_merged
270 | 


--------------------------------------------------------------------------------
/tensorforce/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 reinforce.io. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | from tensorforce.models.model import Model
16 | from tensorforce.models.deterministic_es_model import DeterministicESModel
17 | 
18 | 
19 | models = dict(
20 |     deterministic_es_model=DeterministicESModel
21 | )
22 | 
23 | 
24 | __all__ = [
25 |     'Model',
26 |     'models',
27 |     'DeterministicESModel'
28 | ]
29 | 


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/constant_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/constant_model.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/ddpg_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/ddpg_model.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/deterministic_es_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/deterministic_es_model.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/distribution_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/distribution_model.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/es_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/es_model.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/model.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/pg_log_prob_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/pg_log_prob_model.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/pg_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/pg_model.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/pg_prob_ratio_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/pg_prob_ratio_model.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/q_demo_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/q_demo_model.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/q_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/q_model.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/q_naf_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/q_naf_model.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/q_nstep_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/q_nstep_model.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/models/__pycache__/random_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/random_model.cpython-36.pyc


--------------------------------------------------------------------------------
/tensorforce/util.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 reinforce.io. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | import importlib
 17 | import logging
 18 | import numpy as np
 19 | import tensorflow as tf
 20 | from tensorflow.core.util.event_pb2 import SessionLog
 21 | 
 22 | from tensorforce import TensorforceError
 23 | 
 24 | 
 25 | epsilon = 1e-6
 26 | 
 27 | 
 28 | log_levels = dict(
 29 |     info=logging.INFO,
 30 |     debug=logging.DEBUG,
 31 |     critical=logging.CRITICAL,
 32 |     warning=logging.WARNING,
 33 |     fatal=logging.FATAL
 34 | )
 35 | 
 36 | 
 37 | def prod(xs):
 38 |     """Computes the product along the elements in an iterable. Returns 1 for empty iterable.
 39 | 
 40 |     Args:
 41 |         xs: Iterable containing numbers.
 42 | 
 43 |     Returns: Product along iterable.
 44 | 
 45 |     """
 46 |     p = 1
 47 |     for x in xs:
 48 |         p *= x
 49 |     return p
 50 | 
 51 | 
 52 | def rank(x):
 53 |     return x.get_shape().ndims
 54 | 
 55 | 
 56 | def shape(x, unknown=-1):
 57 |     return tuple(unknown if dims is None else dims for dims in x.get_shape().as_list())
 58 | 
 59 | 
 60 | def cumulative_discount(values, terminals, discount, cumulative_start=0.0):
 61 |     """
 62 |     Compute cumulative discounts.
 63 |     Args:
 64 |         values: Values to discount
 65 |         terminals: Booleans indicating terminal states
 66 |         discount: Discount factor
 67 |         cumulative_start: Float or ndarray, estimated reward for state t + 1. Default 0.0
 68 | 
 69 |     Returns:
 70 |         dicounted_values: The cumulative discounted rewards.
 71 |     """
 72 |     if discount == 0.0:
 73 |         return np.asarray(values)
 74 | 
 75 |     # cumulative start can either be a number or ndarray
 76 |     if type(cumulative_start) is np.ndarray:
 77 |         discounted_values = np.zeros((len(values),) + (cumulative_start.shape))
 78 |     else:
 79 |         discounted_values = np.zeros(len(values))
 80 | 
 81 |     cumulative = cumulative_start
 82 |     for n, (value, terminal) in reversed(list(enumerate(zip(values, terminals)))):
 83 |         if terminal:
 84 |             cumulative = np.zeros_like(cumulative_start, dtype=np.float32)
 85 |         cumulative = value + cumulative * discount
 86 |         discounted_values[n] = cumulative
 87 | 
 88 |     return discounted_values
 89 | 
 90 | 
 91 | def np_dtype(dtype):
 92 |     """Translates dtype specifications in configurations to numpy data types.
 93 |     Args:
 94 |         dtype: String describing a numerical type (e.g. 'float') or numerical type primitive.
 95 | 
 96 |     Returns: Numpy data type
 97 | 
 98 |     """
 99 |     if dtype == 'float' or dtype == float or dtype == np.float32 or dtype == tf.float32:
100 |         return np.float32
101 |     elif dtype == 'int' or dtype == int or dtype == np.int32 or dtype == tf.int32:
102 |         return np.int32
103 |     elif dtype == 'bool' or dtype == bool or dtype == np.bool_ or dtype == tf.bool:
104 |         return np.bool_
105 |     else:
106 |         raise TensorforceError("Error: Type conversion from type {} not supported.".format(str(dtype)))
107 | 
108 | 
109 | def tf_dtype(dtype):
110 |     """Translates dtype specifications in configurations to tensorflow data types.
111 | 
112 |        Args:
113 |            dtype: String describing a numerical type (e.g. 'float'), numpy data type,
114 |                or numerical type primitive.
115 | 
116 |        Returns: TensorFlow data type
117 | 
118 |        """
119 |     if dtype == 'float' or dtype == float or dtype == np.float32 or dtype == tf.float32:
120 |         return tf.float32
121 |     elif dtype == 'int' or dtype == int or dtype == np.int32 or dtype == tf.int32:
122 |         return tf.int32
123 |     elif dtype == 'bool' or dtype == bool or dtype == np.bool_ or dtype == tf.bool:
124 |         return tf.bool
125 |     else:
126 |         raise TensorforceError("Error: Type conversion from type {} not supported.".format(str(dtype)))
127 | 
128 | 
129 | def unflatten(vector, shapes):
130 |     i = 0
131 |     arrays = []
132 |     for shape in shapes:
133 |         size = np.prod(shape, dtype=np.int)
134 |         array = vector[i:(i + size)].reshape(shape)
135 |         arrays.append(array)
136 |         i += size
137 |     assert len(vector) == i, "Passed weight does not have the correct shape."
138 |     return arrays
139 | 
140 | 
141 | def compute_ranks(x):
142 |     """
143 |     Returns ranks in [0, len(x))
144 |     Note: This is different from scipy.stats.rankdata, which returns ranks in
145 |     [1, len(x)].
146 |     """
147 |     assert x.ndim == 1
148 |     ranks = np.empty(len(x), dtype=int)
149 |     ranks[x.argsort()] = np.arange(len(x))
150 |     return ranks
151 | 
152 | 
153 | def compute_centered_ranks(x):
154 |     y = compute_ranks(x.ravel()).reshape(x.shape).astype(np.float32)
155 |     y /= (x.size - 1)
156 |     y -= 0.5
157 |     return y
158 | 
159 | 
160 | def itergroups(items, group_size):
161 |     assert group_size >= 1
162 |     group = []
163 |     for x in items:
164 |         group.append(x)
165 |         if len(group) == group_size:
166 |             yield tuple(group)
167 |             del group[:]
168 |     if group:
169 |         yield tuple(group)
170 | 
171 | 
172 | def batched_weighted_sum(weights, vecs, slice_size):
173 |     total = 0
174 |     num_items_summed = 0
175 |     for batch_weights, batch_vecs in zip(itergroups(weights, slice_size),
176 |                                          itergroups(vecs, slice_size)):
177 |         assert len(batch_weights) == len(batch_vecs) <= slice_size
178 |         total += np.dot(np.asarray(batch_weights, dtype=np.float32),
179 |                         np.asarray(batch_vecs, dtype=np.float32))
180 |         num_items_summed += len(batch_weights)
181 |     return total, num_items_summed
182 | 
183 | 
184 | def run_with_location_trace(self, sess, op):
185 |     run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
186 |     run_metadata = tf.RunMetadata()
187 |     sess.run(op, options=run_options, run_metadata=run_metadata)
188 |     for device in run_metadata.step_stats.dev_stats:
189 |         print(device.device)
190 |         for node in device.node_stats:
191 |             print("  ", node.node_name)
192 | 
193 | 
194 | 
195 | def get_object(obj, predefined_objects=None, default_object=None, kwargs=None):
196 |     """
197 |     Utility method to map some kind of object specification to its content,
198 |     e.g. optimizer or baseline specifications to the respective classes.
199 | 
200 |     Args:
201 |         obj: A specification dict (value for key 'type' optionally specifies
202 |                 the object, options as follows), a module path (e.g.,
203 |                 my_module.MyClass), a key in predefined_objects, or a callable
204 |                 (e.g., the class type object).
205 |         predefined_objects: Dict containing predefined set of objects,
206 |                 accessible via their key
207 |         default_object: Default object is no other is specified
208 |         kwargs: Arguments for object creation
209 | 
210 |     Returns: The retrieved object
211 | 
212 |     """
213 |     args = ()
214 |     kwargs = dict() if kwargs is None else kwargs
215 | 
216 |     if isinstance(obj, dict):
217 |         kwargs.update(obj)
218 |         obj = kwargs.pop('type', None)
219 | 
220 |     if predefined_objects is not None and obj in predefined_objects:
221 |         obj = predefined_objects[obj]
222 |     elif isinstance(obj, str):
223 |         if obj.find('.') != -1:
224 |             module_name, function_name = obj.rsplit('.', 1)
225 |             module = importlib.import_module(module_name)
226 |             obj = getattr(module, function_name)
227 |         else:
228 |             predef_obj_keys = list(predefined_objects.keys())
229 |             raise TensorforceError("Error: object {} not found in predefined objects: {}".format(obj,predef_obj_keys))
230 |     elif callable(obj):
231 |         pass
232 |     elif default_object is not None:
233 |         args = (obj,)
234 |         obj = default_object
235 |     else:
236 |         # assumes the object is already instantiated
237 |         return obj
238 | 
239 |     return obj(*args, **kwargs)
240 | 
241 | 
242 | class UpdateSummarySaverHook(tf.train.SummarySaverHook):
243 | 
244 |     def __init__(self, update_input, *args, **kwargs):
245 |         super(UpdateSummarySaverHook, self).__init__(*args, **kwargs)
246 |         self.update_input = update_input
247 | 
248 |     def before_run(self, run_context):
249 |         self._request_summary = run_context.original_args[1] is not None and \
250 |             run_context.original_args[1].get(self.update_input, False) and \
251 |             (self._next_step is None or self._timer.should_trigger_for_step(self._next_step))
252 |         requests = {'global_step': self._global_step_tensor}
253 |         if self._request_summary:
254 |             if self._get_summary_op() is not None:
255 |                 requests['summary'] = self._get_summary_op()
256 |         return tf.train.SessionRunArgs(requests)
257 | 
258 |     def after_run(self, run_context, run_values):
259 |         if not self._summary_writer:
260 |             return
261 | 
262 |         stale_global_step = run_values.results["global_step"]
263 |         global_step = stale_global_step + 1
264 |         if self._next_step is None or self._request_summary:
265 |             global_step = run_context.session.run(self._global_step_tensor)
266 | 
267 |         if self._next_step is None:
268 |             self._summary_writer.add_session_log(SessionLog(status=SessionLog.START), global_step)
269 | 
270 |         if "summary" in run_values.results:
271 |             self._timer.update_last_triggered_step(global_step)
272 |             for summary in run_values.results["summary"]:
273 |                 self._summary_writer.add_summary(summary, global_step)
274 | 
275 |         self._next_step = global_step + 1
276 | 
277 | 


--------------------------------------------------------------------------------