├── LICENSE ├── README.md ├── data └── readme ├── examples ├── __init__.py └── rec_es │ ├── __pycache__ │ ├── gul_env.cpython-36.pyc │ ├── gul_input_fn.cpython-36.pyc │ ├── gul_input_fn_local.cpython-36.pyc │ ├── rec_env.cpython-36.pyc │ └── rec_input_fn_local.cpython-36.pyc │ ├── rec_config_local.json │ ├── rec_env.py │ ├── rec_input_fn_local.py │ ├── rec_rl_data_small │ └── rec_run_es_local.py ├── setup.cfg ├── setup.py └── tensorforce ├── __init__.py ├── __pycache__ ├── __init__.cpython-36.pyc ├── exception.cpython-36.pyc ├── meta_parameter_recorder.cpython-36.pyc └── util.cpython-36.pyc ├── agents ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── a3c_agent.cpython-36.pyc │ ├── agent.cpython-36.pyc │ ├── batch_agent.cpython-36.pyc │ ├── constant_agent.cpython-36.pyc │ ├── ddpg_agent.cpython-36.pyc │ ├── deterministic_es_agent.cpython-36.pyc │ ├── dqfd_agent.cpython-36.pyc │ ├── dqn_agent.cpython-36.pyc │ ├── dqn_nstep_agent.cpython-36.pyc │ ├── es_agent.cpython-36.pyc │ ├── memory_agent.cpython-36.pyc │ ├── naf_agent.cpython-36.pyc │ ├── ppo_agent.cpython-36.pyc │ ├── random_agent.cpython-36.pyc │ ├── trpo_agent.cpython-36.pyc │ └── vpg_agent.cpython-36.pyc ├── agent.py └── deterministic_es_agent.py ├── core ├── __init__.py ├── __pycache__ │ └── __init__.cpython-36.pyc ├── explorations │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── constant.cpython-36.pyc │ │ ├── epsilon_anneal.cpython-36.pyc │ │ ├── epsilon_decay.cpython-36.pyc │ │ ├── exploration.cpython-36.pyc │ │ ├── linear_decay.cpython-36.pyc │ │ └── ornstein_uhlenbeck_process.cpython-36.pyc │ ├── constant.py │ └── exploration.py ├── lib │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── atari_wrapper.cpython-36.pyc │ │ ├── env_seeding.cpython-36.pyc │ │ ├── optimizers.cpython-36.pyc │ │ ├── schedules.cpython-36.pyc │ │ └── segment_tree.cpython-36.pyc │ ├── env_seeding.py │ ├── optimizers.py │ └── schedules.py ├── memories │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── memory.cpython-36.pyc │ │ ├── modified_replay.cpython-36.pyc │ │ ├── naive_prioritized_replay.cpython-36.pyc │ │ ├── prioritized_replay.cpython-36.pyc │ │ └── replay.cpython-36.pyc │ ├── memory.py │ └── replay.py ├── networks │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── layer.cpython-36.pyc │ │ └── network.cpython-36.pyc │ ├── layer.py │ └── network.py ├── optimizers │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── clipped_step.cpython-36.pyc │ │ ├── evolutionary.cpython-36.pyc │ │ ├── global_optimizer.cpython-36.pyc │ │ ├── meta_optimizer.cpython-36.pyc │ │ ├── multi_step.cpython-36.pyc │ │ ├── natural_gradient.cpython-36.pyc │ │ ├── optimized_step.cpython-36.pyc │ │ ├── optimizer.cpython-36.pyc │ │ ├── synchronization.cpython-36.pyc │ │ └── tf_optimizer.cpython-36.pyc │ ├── global_optimizer.py │ ├── lr_decay │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ └── tf_schedules.cpython-36.pyc │ │ └── tf_schedules.py │ ├── meta_optimizer.py │ ├── optimizer.py │ └── tf_optimizer.py └── preprocessing │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── clip.cpython-36.pyc │ ├── divide.cpython-36.pyc │ ├── grayscale.cpython-36.pyc │ ├── image_resize.cpython-36.pyc │ ├── normalize.cpython-36.pyc │ ├── preprocessor.cpython-36.pyc │ ├── preprocessor_stack.cpython-36.pyc │ ├── running_standardize.cpython-36.pyc │ └── standardize.cpython-36.pyc │ ├── preprocessor.py │ ├── preprocessor_stack.py │ └── standardize.py ├── environments ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── environment.cpython-36.pyc │ ├── gym_environment.cpython-36.pyc │ ├── meta_environment.cpython-36.pyc │ ├── minimal_test.cpython-36.pyc │ ├── oss_environment.cpython-36.pyc │ └── table_environment.cpython-36.pyc ├── classic_control │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── cart_pole.cpython-36.pyc │ │ └── pendulum.cpython-36.pyc │ ├── cart_pole.py │ └── pendulum.py ├── environment.py └── meta_environment.py ├── exception.py ├── meta_parameter_recorder.py ├── models ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── constant_model.cpython-36.pyc │ ├── ddpg_model.cpython-36.pyc │ ├── deterministic_es_model.cpython-36.pyc │ ├── distribution_model.cpython-36.pyc │ ├── es_model.cpython-36.pyc │ ├── model.cpython-36.pyc │ ├── pg_log_prob_model.cpython-36.pyc │ ├── pg_model.cpython-36.pyc │ ├── pg_prob_ratio_model.cpython-36.pyc │ ├── q_demo_model.cpython-36.pyc │ ├── q_model.cpython-36.pyc │ ├── q_naf_model.cpython-36.pyc │ ├── q_nstep_model.cpython-36.pyc │ └── random_model.cpython-36.pyc ├── deterministic_es_model.py └── model.py └── util.py /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Value-aware Recommendation based on Reinforced Profit Maximization in E-commerce System 2 | 3 | ## Code for paper. This version is for single machine, distributed version will be published later. 4 | 5 | ```bash 6 | # execute in the root folder 7 | python examples/rec_es/rec_run_es_local.py -i examples/rec_es/rec_config_local.json 8 | ``` 9 | 10 | ## Dataset for paper can be downloaded at: 11 | 12 | https://drive.google.com/file/d/14OtIC8eiDkzoWCTtaUZHcb7eB-bUmtTT/view?usp=sharing 13 | -------------------------------------------------------------------------------- /data/readme: -------------------------------------------------------------------------------- 1 | ## source file 2 | The file can be downloaded at: https://drive.google.com/file/d/14OtIC8eiDkzoWCTtaUZHcb7eB-bUmtTT/view?usp=sharing 3 | 4 | ## background (Please refer to the paper for more details. https://arxiv.org/pdf/1902.00851.pdf) 5 | The following is one short paragraph from the paper which can help to understand the format of the datafile. 6 | ----- 7 | In our recommendation platform, items are shown in cascade 8 | on a mobile App one by one. Each time the user initiates a request, 9 | 50 items are recommended to him/her. As user scrolls down the 10 | list and have seen all 50 items, a new request is triggered. Œis 11 | process is repeated until the user leaves the App or return the top 12 | of the cascade, labelled as ”exit” in Figure 2. We use a metric called 13 | ”pageid” to distinguish diferent requests in this interaction, similar 14 | to the concept of ”page” to a search engine. As the user and the 15 | system interact with each other, the system learns how to respond 16 | to the state to obtain an optimized accumulative reward. 17 | ----- 18 | 19 | ## dataformat 20 | Each line contains 15 columns. The meaning of each column is as fololows: 21 | 22 | column 1: The id of returned page for the current request, which ranges from 0 to 11. Note that for each page, we return 50 items to the user. 23 | column 2: The hour when the request is launched by the user. 24 | column 3-5: The features used to profile the user which includes age-level, gender and the level of purchase power. 25 | column 6-14: The item-sepcific features/labels. We concat the values of 50 returned items belonging to a request toghether to form a list and separat them by comma. 26 | More specifically, 27 | column 6: The concatenated list of **position** for each item in the returned list, which ranges from 0 to 600 (12pages * 50items/page). 28 | column 7-9: The concatenated list of **predicted ctr/cvr/price** for each item in the returned list. 29 | column 10-12: The concatednated list of **isclcik/iscart/isfav** for each item in the returned list to indicate whether the item is cliked/added to cart/added to whishlist by the user. 30 | column 13: The concatednated list of **purchase amount** for each item in the returned list. For example, 0.0 means that the user does not purchase this item. 12.0 means that the user spends 12 Yuan on this item. 31 | column 14: The concatednated list of **an optinal powerful feature** of the item which can be used as one dimension of the "state" vector in RL. 32 | 33 | column 15: To indicate whether the current page is the last page browsed by the user. 34 | 35 | 36 | So column 1-9,14 can be used to generate **state** in the paper. Column 10-13 can be used to calclulate the **reward** in the paper. Column 15 represents the **terminal** indicator of RL in the paper. 37 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/examples/__init__.py -------------------------------------------------------------------------------- /examples/rec_es/__pycache__/gul_env.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/examples/rec_es/__pycache__/gul_env.cpython-36.pyc -------------------------------------------------------------------------------- /examples/rec_es/__pycache__/gul_input_fn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/examples/rec_es/__pycache__/gul_input_fn.cpython-36.pyc -------------------------------------------------------------------------------- /examples/rec_es/__pycache__/gul_input_fn_local.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/examples/rec_es/__pycache__/gul_input_fn_local.cpython-36.pyc -------------------------------------------------------------------------------- /examples/rec_es/__pycache__/rec_env.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/examples/rec_es/__pycache__/rec_env.cpython-36.pyc -------------------------------------------------------------------------------- /examples/rec_es/__pycache__/rec_input_fn_local.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/examples/rec_es/__pycache__/rec_input_fn_local.cpython-36.pyc -------------------------------------------------------------------------------- /examples/rec_es/rec_config_local.json: -------------------------------------------------------------------------------- 1 | { 2 | "agent": { 3 | "type": "deterministic_es_agent", 4 | "batch_size": 100, 5 | "optimizer": { 6 | "type": "adam", 7 | "learning_rate": 0.01, 8 | "lr_schedule": 9 | { 10 | "type": "linear_decay", 11 | "max_decay_steps": 10000, 12 | "final_value": 0.01 13 | } 14 | }, 15 | "max_episode_timesteps": 20, 16 | "l2_coeff": 0.005, 17 | "eval_prob": 0.5, 18 | "noise_stddev": 0.02, 19 | "train_iters": 20 20 | }, 21 | "network_spec": [ 22 | { 23 | "type": "linear", 24 | "size": 3, 25 | "bias": false, 26 | "weights": [[1,1,1], [1,1,1], [1,0.83,0.83], [1,0.67,0.67], [1,0.5,0.5], [1,0.33,0.33], [1,0.17,0.17], [1,0.0,0.0]] 27 | } 28 | ], 29 | "env": { 30 | "shuffle": false, 31 | "capacity": 10000, 32 | "interactive": true, 33 | "discount_base": 0.8, 34 | "max_pageid": 7, 35 | "pv_item_num": 50, 36 | "local_mode": true, 37 | "batch_size": 100, 38 | "tables": ["examples/rec_es/rec_rl_data_small"] 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /examples/rec_es/rec_env.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from __future__ import division 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | from tensorforce.environments.meta_environment import MetaEnvironment 8 | import tensorforce.util as utl 9 | from tensorforce.exception import TensorforceError 10 | from rec_input_fn_local import input_fn as input_fn_local 11 | 12 | def _invert_permutation(tensor): 13 | '''wrapper for matrix''' 14 | return tf.cast(tf.map_fn(tf.invert_permutation, tensor), tf.float32) 15 | 16 | def _gather(param, indices): 17 | '''wrapper for matrix''' 18 | return tf.map_fn(lambda x : tf.gather(x[0], x[1]), (param, indices), dtype=param.dtype) 19 | 20 | class RecTableEnv(MetaEnvironment): 21 | ''' 22 | ODPS Table env for gul ranking scenario. 23 | ''' 24 | def __init__(self, config): 25 | config['env_type'] = 'odps_table' 26 | super(RecTableEnv, self).__init__(config) 27 | 28 | # parse more config 29 | self.parse_env_config() 30 | 31 | self._version = '0.1' 32 | 33 | self.sess = None 34 | def __str__(self): 35 | return 'RecTableEnv({})'.format(self._version) 36 | 37 | def parse_env_config(self): 38 | """ 39 | Obtain table name,schema and partition 40 | """ 41 | print('env config:', self.env_conf) 42 | 43 | # get worker_num and worker_id 44 | self.worker_num = self.env_conf.get('worker_num', 1) 45 | self.worker_id = self.env_conf.get('worker_id', 0) 46 | 47 | # get table name 48 | if 'tables' not in self.env_conf: 49 | raise TensorforceError("Can't find tables in configuration") 50 | self.tables = self.env_conf['tables'] 51 | 52 | self.epoch = self.env_conf.get('epoch', None) 53 | self.batch_size = self.env_conf.get('batch_size', 100) 54 | self.capacity = self.env_conf.get('capacity', 4 * self.batch_size) 55 | self.max_pageid = self.env_conf.get('max_pageid', 7) 56 | self.discount_base = self.env_conf.get('discount_base', 0.8) 57 | self.local_mode = self.env_conf.get('local_mode', False) 58 | self.alipay_coef = self.env_conf.get('alipay_coef', 1.0) 59 | self.reward_shaping_method = self.env_conf.get('reward_shaping_method', None) 60 | self.alipay_threshold = self.env_conf.get('alipay_threshold', 0.0) 61 | self.alipay_penalty = self.env_conf.get('alipay_penalty', 0.0) 62 | ''' 63 | ranking_formula_type 0: ctr * cvr^a * price^b 64 | ranking_formula_type 1: (ctr * cvr^a * price^b) * matchtype_weight 65 | ranking_formula_type 2: (a * ctr + ctr * cvr^b * price^c) * matchtype_weight 66 | ranking_formula_type 3: (a * ctr + b * cvr + ctr * cvr^c * price^d) * matchtype_weight 67 | ranking_formula_type 4: (a * ctr + b * ctr * cvr + ctr * cvr^c * price^d) * matchtype_weight 68 | ''' 69 | self.ranking_formula_type = self.env_conf.get('ranking_formula_type', 0) 70 | self.feature_include_hour_power = self.env_conf.get('feature_include_hour_power', False) 71 | self.feature_include_age_gender = self.env_conf.get('feature_include_age_gender', False) 72 | 73 | self.states_spec = {} 74 | feature_dim = self.max_pageid + 1 75 | if self.feature_include_hour_power: 76 | feature_dim += 32 77 | if self.feature_include_age_gender: 78 | feature_dim += 12 79 | 80 | self.states_spec['state'] = { 81 | 'type': 'float', 82 | 'shape': (feature_dim,) 83 | } 84 | self.actions_spec = {} 85 | if self.ranking_formula_type == 0: 86 | action_shape = 2 87 | elif self.ranking_formula_type == 1: 88 | action_shape = 6 89 | elif self.ranking_formula_type == 2: 90 | action_shape = 7 91 | elif self.ranking_formula_type in (3, 4): 92 | action_shape = 8 93 | else: 94 | raise TensorforceError("Invalid ranking formula type " + str(self.ranking_formula_type)) 95 | 96 | self.actions_spec['action'] = { 97 | 'type': 'float', 98 | 'shape': (action_shape,), 99 | 'min_value': -1.0, 100 | 'max_value': 2.0 101 | } 102 | 103 | print('states:', self.states) 104 | print('actions:', self.actions) 105 | 106 | def set_up(self): 107 | if self.local_mode: 108 | print('load data in local mode') 109 | self.batch_data = input_fn_local( 110 | name='table_env', 111 | tables=self.tables, 112 | num_epochs=self.epoch, 113 | num_workers=self.worker_num, 114 | worker_id=self.worker_id, 115 | batch_size=self.batch_size 116 | ) 117 | self.device = ("/job:localhost/replica:0/task:%d" % self.worker_id) if self.worker_id != -1 else 0 118 | else: 119 | self.batch_data = input_fn_local( 120 | name='table_env', 121 | tables=self.tables, 122 | num_epochs=self.epoch, 123 | num_workers=self.worker_num, 124 | worker_id=self.worker_id, 125 | batch_size=self.batch_size, 126 | capacity=self.capacity 127 | ) 128 | self.device = ("/job:worker/task:%d" % self.worker_id) if self.worker_id != -1 else 0 129 | with tf.variable_scope(name_or_scope='table_env') as scope: 130 | with tf.device(device_name_or_function = self.device): 131 | self.build_graph() 132 | 133 | def get_input_tensor(self): 134 | """ 135 | Get the input tensor for agent 136 | """ 137 | data = {} 138 | data['states'] = {} 139 | data['states']['states'] = self.states_tensor 140 | 141 | return data 142 | 143 | def set_session(self, session): 144 | self.sess = session 145 | 146 | def update(self): 147 | if self.sess is None: 148 | raise TensorforceError("self.session is None") 149 | 150 | self.sess.run([self.batch_data, self.assign_cache_ops]) 151 | 152 | def reset(self): 153 | self.update() 154 | 155 | return self.states_tensor 156 | 157 | def build_graph(self): 158 | self.cache_data = {} 159 | self.cache_data['pageid'] = tf.Variable(tf.zeros(self.batch_size, dtype=tf.int32), 160 | trainable=False, 161 | name='pageid_var') 162 | if self.feature_include_hour_power: 163 | self.cache_data['hour'] = tf.Variable(tf.zeros(self.batch_size, dtype=tf.int32), 164 | trainable=False, 165 | name='hour_var') 166 | self.cache_data['power'] = tf.Variable(tf.zeros(self.batch_size, dtype=tf.int32), 167 | trainable=False, 168 | name='power_var') 169 | hour = self.cache_data['hour'] 170 | power = self.cache_data['power'] 171 | if self.feature_include_age_gender: 172 | self.cache_data['age'] = tf.Variable(tf.zeros(self.batch_size, dtype=tf.int32), 173 | trainable=False, 174 | name='age_var') 175 | self.cache_data['gender'] = tf.Variable(tf.zeros(self.batch_size, dtype=tf.int32), 176 | trainable=False, 177 | name='gender_var') 178 | age = self.cache_data['age'] 179 | gender = self.cache_data['gender'] 180 | 181 | self.cache_data['ctr'] = tf.Variable(tf.zeros([self.batch_size, 50], dtype=tf.float32), 182 | trainable=False, 183 | name='ctr_var') 184 | self.cache_data['cvr'] = tf.Variable(tf.zeros([self.batch_size, 50], dtype=tf.float32), 185 | trainable=False, 186 | name='cvr_var') 187 | self.cache_data['price'] = tf.Variable(tf.zeros([self.batch_size, 50], dtype=tf.float32), 188 | trainable=False, 189 | name='price_var') 190 | self.cache_data['click'] = tf.Variable(tf.zeros([self.batch_size, 50], dtype=tf.float32), 191 | trainable=False, 192 | name='click_var') 193 | self.cache_data['pay'] = tf.Variable(tf.zeros([self.batch_size, 50], dtype=tf.float32), 194 | trainable=False, 195 | name='pay_var') 196 | if self.ranking_formula_type in (1, 2, 3, 4): 197 | self.cache_data['matchtype'] = tf.Variable(tf.zeros([self.batch_size, 50], dtype=tf.int32), 198 | trainable=False, 199 | name='matchtype_var') 200 | matchtype = self.cache_data['matchtype'] 201 | 202 | self.assign_cache_ops = {} 203 | for tensor_name in self.cache_data.keys(): 204 | self.assign_cache_ops[tensor_name] = tf.assign(self.cache_data[tensor_name], self.batch_data[tensor_name], name=tensor_name + 'assign_cache') 205 | 206 | ctr = self.cache_data['ctr'] 207 | cvr = self.cache_data['cvr'] 208 | price = self.cache_data['price'] 209 | click = self.cache_data['click'] 210 | pay = self.cache_data['pay'] 211 | 212 | self.actions_input = tf.placeholder(tf.float32, shape=None, name='env_action') 213 | 214 | offset = 0 215 | if self.ranking_formula_type in (2, 3, 4): 216 | ctr_weight = tf.reshape(self.actions_input[:,0], (-1,1)) 217 | offset += 1 218 | if self.ranking_formula_type in (3, 4): 219 | cvr_weight = tf.reshape(self.actions_input[:,offset], (-1,1)) 220 | offset += 1 221 | cvr_power = tf.reshape(self.actions_input[:,offset], (-1,1)) 222 | price_power = tf.reshape(self.actions_input[:,1 + offset], (-1,1)) 223 | 224 | rank_score = ctr * tf.pow(cvr, cvr_power) * tf.pow(price, price_power) 225 | if self.ranking_formula_type == 2: 226 | rank_score = rank_score + ctr * ctr_weight 227 | elif self.ranking_formula_type == 3: 228 | rank_score = rank_score + ctr * ctr_weight + cvr * cvr_weight 229 | elif self.ranking_formula_type == 4: 230 | rank_score = rank_score + ctr * ctr_weight + ctr * cvr * cvr_weight 231 | 232 | if self.ranking_formula_type in (1, 2, 3, 4): 233 | matchtype_params = self.actions_input[:, 2 + offset : 6 + offset] 234 | i2i_param = tf.ones([self.batch_size, 1], tf.float32) 235 | full_matchtype_params = tf.concat([i2i_param, matchtype_params], axis=1) 236 | matchtype_weights = _gather(full_matchtype_params, matchtype) 237 | rank_score = rank_score * matchtype_weights 238 | 239 | sorted_rank_score, sorted_index = tf.nn.top_k(rank_score, k=50, sorted=True) 240 | # tf.invert_permutation only support 1-D vector, wrap it for matrix 241 | perm_index = _invert_permutation(sorted_index) 242 | pos_discount = tf.pow(self.discount_base, perm_index) 243 | 244 | discounted_click = click * pos_discount 245 | discounted_pay = pay * pos_discount 246 | 247 | self.pv_discount_click = tf.reduce_sum(discounted_click, 1) 248 | self.pv_discount_click_mean = tf.reduce_mean(self.pv_discount_click, 0) 249 | self.pv_discount_pay = tf.reduce_sum(discounted_pay, 1) 250 | self.pv_discount_pay_mean = tf.reduce_mean(self.pv_discount_pay, 0) 251 | 252 | pageid = tf.clip_by_value(self.cache_data['pageid'], 0, self.max_pageid) 253 | self.pageid_onehot = tf.one_hot(pageid, depth=self.max_pageid + 1, dtype=tf.float32) 254 | feature_list = [self.pageid_onehot] 255 | if self.feature_include_hour_power: 256 | self.hour_onehot = tf.one_hot(hour, depth=24, dtype=tf.float32) 257 | self.power_onehot = tf.one_hot(power, depth=8, dtype=tf.float32) 258 | feature_list.append(self.hour_onehot) 259 | feature_list.append(self.power_onehot) 260 | if self.feature_include_age_gender: 261 | self.age_onehot = tf.one_hot(age, depth=9, dtype=tf.float32) 262 | self.gender_onehot = tf.one_hot(gender, depth=3, dtype=tf.float32) 263 | feature_list.append(self.age_onehot) 264 | feature_list.append(self.gender_onehot) 265 | 266 | if len(feature_list) == 1: 267 | self.states_tensor = self.pageid_onehot 268 | else: 269 | self.states_tensor = tf.concat(feature_list, 1) 270 | 271 | print('build graph done') 272 | 273 | def execute(self, actions): 274 | """ 275 | Interact with the environment 276 | if set interactive to True, env.execute will apply an action to the environment and 277 | get an observation after the action 278 | 279 | actions are batch_size * 3 tensor 280 | 281 | return (next_state, step_reward, terminal) 282 | """ 283 | step_click, step_pay = self.sess.run([self.pv_discount_click_mean, self.pv_discount_pay_mean], feed_dict={self.actions_input: actions}) 284 | 285 | return (None, True, self.get_reward(step_click, step_pay)) 286 | 287 | def get_reward(self, click, pay): 288 | if self.reward_shaping_method is None: 289 | return click + pay 290 | elif self.reward_shaping_method == 'weighting': 291 | return click + self.alipay_coef * pay 292 | elif self.reward_shaping_method == 'penalty': 293 | if pay >= self.alipay_threshold: 294 | return click + pay 295 | else: 296 | return click + pay - self.alipay_penalty * (self.alipay_threshold - pay) 297 | 298 | def close(self): 299 | pass 300 | 301 | @property 302 | def states(self): 303 | return self.states_spec 304 | 305 | @property 306 | def actions(self): 307 | return self.actions_spec 308 | 309 | if __name__ == '__main__': 310 | import json 311 | with open('rec_config_local.json', 'r') as fp: 312 | config = json.load(fp=fp) 313 | print('config:', config) 314 | action_val = tf.constant(np.array([[1,1,1], [1,1,1], [1,0.83,0.83], [1,0.67,0.67], [1,0.5,0.5], [1,0.33,0.33], [1,0.17,0.17], [1,0.0,0.0]], dtype=np.float32)) 315 | env = RecTableEnv(config) 316 | sess_config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=True) 317 | sess_config.gpu_options.allow_growth = True 318 | sess = tf.Session(config=sess_config) 319 | env.set_session(sess) 320 | env.set_up() 321 | cur_action = tf.matmul(env.pageid_onehot, action_val) 322 | sess.run(tf.global_variables_initializer()) 323 | sess.run(tf.local_variables_initializer()) 324 | coord = tf.train.Coordinator() 325 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 326 | try: 327 | for i in range(4): 328 | print('pageid_onehot:', sess.run([env.reset()])) 329 | print('pageid cached:', sess.run(env.cache_data['pageid'])) 330 | print('pageid cached again:', sess.run(env.cache_data['pageid'])) 331 | cur_action_val = sess.run(cur_action) 332 | print('cur_action:', cur_action_val) 333 | #print('cur_action again:', sess.run(cur_action)) 334 | next_state, terminal, reward = env.execute(cur_action_val) 335 | print('reward:', reward) 336 | 337 | except tf.errors.OutOfRangeError: 338 | print('data is out of range') 339 | finally: 340 | coord.request_stop() 341 | coord.join(threads) 342 | sess.close() 343 | -------------------------------------------------------------------------------- /examples/rec_es/rec_input_fn_local.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import datetime 4 | import time 5 | 6 | def _parse_dense_features(s, dshape, dtype=tf.float32, delimiter=','): 7 | record_defaults = [[0.0]] * dshape[1] 8 | value = tf.decode_csv(s, record_defaults=record_defaults, field_delim=delimiter) 9 | value = tf.stack(value, axis=1) 10 | value = tf.cast(value, dtype) 11 | return tf.reshape(value, dshape) 12 | 13 | def _invert_permutation(input, row_count): 14 | '''wrapper for matrix''' 15 | rows = [] 16 | for i in range(row_count): 17 | row = input[i,:] 18 | rows.append(tf.invert_permutation(row)) 19 | return tf.cast(tf.stack(rows, axis=0), tf.float32) 20 | 21 | def input_fn(name="input", tables="", num_epochs=None, num_workers=1, worker_id=0, capacity=0, batch_size=64): 22 | with tf.variable_scope(name_or_scope=name, reuse=False) as scope: 23 | with tf.device(device_name_or_function = ("/job:localhost/replica:0/task:%d"%worker_id) if worker_id != -1 else None): 24 | filename_queue = tf.train.string_input_producer(tables, num_epochs=num_epochs) 25 | reader = tf.TextLineReader() 26 | keys, values = reader.read_up_to(filename_queue, batch_size) 27 | batch_keys, batch_values = tf.train.batch( 28 | [keys, values], 29 | batch_size=batch_size, 30 | capacity=10 * batch_size, 31 | enqueue_many=True, 32 | num_threads=1) 33 | record_defaults = [['']] * 4 + [[-1]] + [['']] * 9 34 | data = tf.decode_csv(batch_values, record_defaults=record_defaults, field_delim=';') 35 | 36 | pageid = data[4] 37 | ctr = data[7] 38 | cvr = data[8] 39 | price = data[9] 40 | isclick = data[10] 41 | pay = data[11] 42 | 43 | ctr = _parse_dense_features(ctr, (-1, 50)) 44 | cvr = _parse_dense_features(cvr, (-1, 50)) 45 | price = _parse_dense_features(price, (-1, 50)) 46 | isclick = _parse_dense_features(isclick, (-1, 50)) 47 | pay = _parse_dense_features(pay, (-1, 50)) 48 | 49 | batch_data = {'keys': batch_keys, 50 | 'pageid': pageid, 51 | 'ctr': ctr, 52 | 'cvr': cvr, 53 | 'price': price, 54 | 'click': isclick, 55 | 'pay': pay} 56 | return batch_data -------------------------------------------------------------------------------- /examples/rec_es/rec_run_es_local.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import inspect 7 | import json 8 | import logging 9 | import os 10 | import sys 11 | import time 12 | 13 | import tensorflow as tf 14 | from six.moves import xrange, shlex_quote 15 | 16 | path = os.path.abspath('.') 17 | sys.path.append(path) 18 | 19 | from tensorforce import TensorforceError 20 | from tensorforce.agents import Agent 21 | from rec_env import RecTableEnv 22 | 23 | """ 24 | # example command 25 | python examples/rec_es/rec_run_es_local.py -i examples/rec_es/rec_config_local.json 26 | """ 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser() 30 | 31 | parser.add_argument('-i', '--config', help="Configuration file") 32 | 33 | args = parser.parse_args() 34 | print(args) 35 | sys.stdout.flush() 36 | 37 | if args.config is not None: 38 | with open(args.config, 'r') as fp: 39 | config = json.load(fp=fp) 40 | else: 41 | raise TensorforceError("No configuration provided.") 42 | 43 | if 'agent' not in config: 44 | raise TensorforceError("No agent configuration provided.") 45 | else: 46 | agent_config = config['agent'] 47 | 48 | if 'network_spec' not in config: 49 | network_spec = None 50 | print("No network configuration provided.") 51 | else: 52 | network_spec = config['network_spec'] 53 | 54 | if 'env' not in config: 55 | raise TensorforceError("No environment configuration provided.") 56 | else: 57 | env_config = config['env'] 58 | 59 | environment = RecTableEnv(config) 60 | environment.set_up() 61 | 62 | agent_config['env'] = environment 63 | 64 | agent = Agent.from_spec( 65 | spec=agent_config, 66 | kwargs=dict( 67 | states_spec=environment.states, 68 | actions_spec=environment.actions, 69 | network_spec=network_spec, 70 | batch_data=environment.get_input_tensor() 71 | ) 72 | ) 73 | 74 | environment.set_session(agent.model.get_session()) 75 | 76 | print("********** Configuration ************") 77 | for key, value in agent_config.items(): 78 | print(str(key) + ": {}".format(value)) 79 | 80 | agent.run_worker() 81 | agent.close() 82 | 83 | 84 | if __name__ == '__main__': 85 | logging.info("start...") 86 | print('start') 87 | main() 88 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import print_function 18 | from __future__ import division 19 | 20 | import os 21 | 22 | from setuptools import setup, find_packages 23 | 24 | install_requires=[ 25 | 'numpy', 26 | 'six', 27 | 'scipy', 28 | 'pillow', 29 | 'pytest' 30 | ] 31 | 32 | setup_requires=[ 33 | 'numpy', 34 | 'recommonmark' 35 | ] 36 | 37 | extras_require = { 38 | 'tf': ['tensorflow>=1.3.0'], 39 | 'tf_gpu': ['tensorflow-gpu>=1.3.0'], 40 | 'gym': ['gym>=0.7.4'], 41 | 'universe': ['universe>=0.21.3'], 42 | 'mazeexp': ['mazeexp>=0.0.1'] 43 | } 44 | 45 | # Readthedocs requires Sphinx extensions to be specified as part of 46 | # install_requires in order to build properly. 47 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 48 | if on_rtd: 49 | install_requires.extend(setup_requires) 50 | 51 | 52 | setup(name='tensorforce', 53 | version='0.3.2', 54 | description='Reinforcement learning for TensorFlow', 55 | url='', 56 | author='', 57 | author_email='', 58 | license='Apache 2.0', 59 | packages=[package for package in find_packages() if package.startswith('tensorforce')], 60 | install_requires=install_requires, 61 | setup_requires=setup_requires, 62 | extras_require=extras_require, 63 | zip_safe=False) 64 | -------------------------------------------------------------------------------- /tensorforce/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | from tensorforce.exception import TensorforceError 18 | 19 | 20 | __version__ = '0.3.2' 21 | 22 | 23 | # Libraries should add NullHandler() by default, as its the application code's 24 | # responsibility to configure log handlers. 25 | # https://docs.python.org/3/howto/logging.html#configuring-logging-for-a-library 26 | 27 | import logging 28 | 29 | try: 30 | from logging import NullHandler 31 | except ImportError: 32 | class NullHandler(logging.Handler): 33 | def emit(self, record): 34 | pass 35 | 36 | logging.getLogger(__name__).addHandler(NullHandler()) 37 | 38 | __all__ = ['TensorforceError'] 39 | -------------------------------------------------------------------------------- /tensorforce/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/__pycache__/exception.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/__pycache__/exception.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/__pycache__/meta_parameter_recorder.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/__pycache__/meta_parameter_recorder.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/__pycache__/util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/__pycache__/util.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from tensorforce.agents.agent import Agent 17 | from tensorforce.agents.deterministic_es_agent import DeterministicESAgent 18 | 19 | agents = dict( 20 | deterministic_es_agent=DeterministicESAgent 21 | ) 22 | 23 | __all__ = [ 24 | 'Agent', 25 | 'DeterministicESAgent', 26 | 'agents' 27 | ] 28 | -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/a3c_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/a3c_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/batch_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/batch_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/constant_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/constant_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/ddpg_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/ddpg_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/deterministic_es_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/deterministic_es_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/dqfd_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/dqfd_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/dqn_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/dqn_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/dqn_nstep_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/dqn_nstep_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/es_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/es_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/memory_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/memory_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/naf_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/naf_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/ppo_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/ppo_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/random_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/random_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/trpo_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/trpo_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/__pycache__/vpg_agent.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/agents/__pycache__/vpg_agent.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/agents/agent.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import print_function 18 | from __future__ import division 19 | 20 | from copy import deepcopy 21 | 22 | import numpy as np 23 | import inspect 24 | 25 | from tensorforce import util, TensorforceError 26 | import tensorforce.agents 27 | from tensorforce.meta_parameter_recorder import MetaParameterRecorder 28 | 29 | 30 | class Agent(object): 31 | """ 32 | Basic Reinforcement learning agent. An agent encapsulates execution logic 33 | of a particular reinforcement learning algorithm and defines the external interface 34 | to the environment. 35 | 36 | The agent hence acts as intermediate layer between environment 37 | and backend execution (value function or policy updates). 38 | 39 | """ 40 | 41 | def __init__( 42 | self, 43 | states_spec, 44 | actions_spec, 45 | batched_observe 46 | ): 47 | """ 48 | Initializes the reinforcement learning agent. 49 | 50 | Args: 51 | states_spec: Dict containing at least one state definition. In the case of a single state, 52 | keys `shape` and `type` are necessary. For multiple states, pass a dict of dicts where each state 53 | is a dict itself with a unique name as its key. 54 | actions_spec: Dict containing at least one action definition. Actions have types and either `num_actions` 55 | for discrete actions or a `shape` for continuous actions. Consult documentation and tests for more. 56 | batched_observe: Optional int specifying how many observe calls are batched into one session run. 57 | Without batching, throughput will be lower because every `observe` triggers a session invocation to 58 | update rewards in the graph. 59 | """ 60 | 61 | self.unique_state = ('shape' in states_spec) 62 | if self.unique_state: 63 | states_spec = dict(state=states_spec) 64 | 65 | self.states_spec = deepcopy(states_spec) 66 | for name, state in self.states_spec.items(): 67 | # Convert int to unary tuple 68 | if isinstance(state['shape'], int): 69 | state['shape'] = (state['shape'],) 70 | 71 | # Set default type to float 72 | if 'type' not in state: 73 | state['type'] = 'float' 74 | 75 | # Actions config and exploration 76 | self.exploration = dict() 77 | self.unique_action = ('type' in actions_spec) 78 | if self.unique_action: 79 | actions_spec = dict(action=actions_spec) 80 | self.actions_spec = deepcopy(actions_spec) 81 | 82 | for name, action in self.actions_spec.items(): 83 | # Check required values 84 | if action['type'] == 'int': 85 | if 'num_actions' not in action: 86 | raise TensorforceError("Action requires value 'num_actions' set!") 87 | elif action['type'] == 'float': 88 | if ('min_value' in action) != ('max_value' in action): 89 | raise TensorforceError("Action requires both values 'min_value' and 'max_value' set!") 90 | 91 | # Set default shape to empty tuple 92 | if 'shape' not in action: 93 | action['shape'] = () 94 | 95 | # Convert int to unary tuple 96 | if isinstance(action['shape'], int): 97 | action['shape'] = (action['shape'],) 98 | 99 | # TensorFlow summaries & Configuration Meta Parameter Recorder options 100 | if self.summary_spec is None: 101 | self.summary_labels = set() 102 | else: 103 | self.summary_labels = set(self.summary_spec.get('labels', ())) 104 | 105 | self.meta_param_recorder = None 106 | 107 | #if 'configuration' in self.summary_labels or 'print_configuration' in self.summary_labels: 108 | if any(k in self.summary_labels for k in ['configuration','print_configuration']): 109 | self.meta_param_recorder = MetaParameterRecorder(inspect.currentframe()) 110 | if 'meta_dict' in self.summary_spec: 111 | # Custom Meta Dictionary passed 112 | self.meta_param_recorder.merge_custom(self.summary_spec['meta_dict']) 113 | if 'configuration' in self.summary_labels: 114 | # Setup for TensorBoard population 115 | self.summary_spec['meta_param_recorder_class'] = self.meta_param_recorder 116 | if 'print_configuration' in self.summary_labels: 117 | # Print to STDOUT (TADO: optimize output) 118 | self.meta_param_recorder.text_output(format_type=1) 119 | 120 | # Init Model, this must follow the Summary Configuration section above to cary meta_param_recorder 121 | self.model = self.initialize_model() 122 | 123 | # Batched observe for better performance with Python. 124 | self.batched_observe = batched_observe 125 | if self.batched_observe is not None: 126 | self.observe_terminal = list() 127 | self.observe_reward = list() 128 | 129 | self.reset() 130 | 131 | def __str__(self): 132 | return str(self.__class__.__name__) 133 | 134 | def sync(self, sync_value): 135 | self.model.sync(sync_value) 136 | 137 | def close(self): 138 | self.model.close() 139 | 140 | def initialize_model(self): 141 | """ 142 | Creates the model for the respective agent based on specifications given by user. This is a separate 143 | call after constructing the agent because the agent constructor has to perform a number of checks 144 | on the specs first, sometimes adjusting them e.g. by converting to a dict. 145 | """ 146 | raise NotImplementedError 147 | 148 | def reset(self): 149 | """ 150 | Reset the agent to its initial state on episode start. Updates internal episode and 151 | timestep counter, internal states, and resets preprocessors. 152 | """ 153 | self.episode, self.timestep, self.next_internals = self.model.reset() 154 | self.current_internals = self.next_internals 155 | 156 | #TODO have to call preprocessing reset in model 157 | # for preprocessing in self.preprocessing.values(): 158 | # preprocessing.reset() 159 | 160 | def act(self, states, deterministic=False): 161 | """ 162 | Return action(s) for given state(s). States preprocessing and exploration are applied if 163 | configured accordingly. 164 | 165 | Args: 166 | states: One state (usually a value tuple) or dict of states if multiple states are expected. 167 | deterministic: If true, no exploration and sampling is applied. 168 | Returns: 169 | Scalar value of the action or dict of multiple actions the agent wants to execute. 170 | 171 | """ 172 | self.current_internals = self.next_internals 173 | 174 | if self.unique_state: 175 | self.current_states = dict(state=np.asarray(states)) 176 | else: 177 | self.current_states = {name: np.asarray(state) for name, state in states.items()} 178 | 179 | # Retrieve action 180 | self.current_actions, self.next_internals, self.timestep = self.model.act( 181 | states=self.current_states, 182 | internals=self.current_internals, 183 | deterministic=deterministic 184 | ) 185 | 186 | if self.unique_action: 187 | return self.current_actions['action'] 188 | else: 189 | return self.current_actions 190 | 191 | def observe_batch(self, current_states, current_internals, current_actions, current_terminal, current_reward, next_states, next_internals): 192 | """ 193 | Observe one batch data at a time from the environment. 194 | Usually used in non-interactive mode, and the data is prepared beforehand 195 | """ 196 | raise NotImplementedError 197 | 198 | 199 | def observe(self, next_states, terminal, reward): 200 | """ 201 | Observe experience from the environment to learn from. Optionally preprocesses rewards 202 | Child classes should call super to get the processed reward 203 | EX: terminal, reward = super()... 204 | 205 | Args: 206 | next_states: One state (usually a value tuple) or dict of states if multiple states are expected. 207 | terminal: boolean indicating if the episode terminated after the observation. 208 | reward: scalar reward that resulted from executing the action. 209 | """ 210 | self.current_terminal = terminal 211 | self.current_reward = reward 212 | 213 | if self.batched_observe is not None and self.batched_observe > 0: 214 | # Batched observe for better performance with Python. 215 | self.observe_terminal.append(self.current_terminal) 216 | self.observe_reward.append(self.current_reward) 217 | 218 | if self.current_terminal or len(self.observe_terminal) >= self.batched_observe: 219 | self.episode = self.model.observe( 220 | terminal=self.observe_terminal, 221 | reward=self.observe_reward 222 | ) 223 | self.observe_terminal = list() 224 | self.observe_reward = list() 225 | 226 | else: 227 | self.episode = self.model.observe( 228 | terminal=self.current_terminal, 229 | reward=self.current_reward 230 | ) 231 | 232 | 233 | def should_stop(self): 234 | return self.model.monitored_session.should_stop() 235 | 236 | def last_observation(self): 237 | return dict( 238 | states=self.current_states, 239 | internals=self.current_internals, 240 | actions=self.current_actions, 241 | terminal=self.current_terminal, 242 | reward=self.current_reward 243 | ) 244 | 245 | def save_model(self, directory=None, append_timestep=True): 246 | """ 247 | Save TensorFlow model. If no checkpoint directory is given, the model's default saver 248 | directory is used. Optionally appends current timestep to prevent overwriting previous 249 | checkpoint files. Turn off to be able to load model from the same given path argument as 250 | given here. 251 | 252 | Args: 253 | directory: Optional checkpoint directory. 254 | use_global_step: Appends the current timestep to the checkpoint file if true. 255 | If this is set to True, the load path must include the checkpoint timestep suffix. 256 | For example, if stored to models/ and set to true, the exported file will be of the 257 | form models/model.ckpt-X where X is the last timestep saved. The load path must 258 | precisely match this file name. If this option is turned off, the checkpoint will 259 | always overwrite the file specified in path and the model can always be loaded under 260 | this path. 261 | 262 | Returns: 263 | Checkpoint path were the model was saved. 264 | """ 265 | return self.model.save(directory=directory, append_timestep=append_timestep) 266 | 267 | def restore_model(self, directory=None, file=None): 268 | """ 269 | Restore TensorFlow model. If no checkpoint file is given, the latest checkpoint is 270 | restored. If no checkpoint directory is given, the model's default saver directory is 271 | used (unless file specifies the entire path). 272 | 273 | Args: 274 | directory: Optional checkpoint directory. 275 | file: Optional checkpoint file, or path if directory not given. 276 | """ 277 | self.model.restore(directory=directory, file=file) 278 | 279 | @staticmethod 280 | def from_spec(spec, kwargs): 281 | """ 282 | Creates an agent from a specification dict. 283 | """ 284 | agent = util.get_object( 285 | obj=spec, 286 | predefined_objects=tensorforce.agents.agents, 287 | kwargs=kwargs 288 | ) 289 | assert isinstance(agent, Agent) 290 | return agent 291 | -------------------------------------------------------------------------------- /tensorforce/agents/deterministic_es_agent.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | desc: Evolution stratege agent. 5 | created: 2017.01.23 6 | @author: cuiqing.cq 7 | """ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import print_function 11 | from __future__ import division 12 | 13 | 14 | import numpy as np 15 | 16 | from tensorforce.agents import Agent 17 | from tensorforce import util, TensorforceError 18 | from tensorforce.models import DeterministicESModel 19 | 20 | 21 | class DeterministicESAgent(Agent): 22 | """ 23 | Evolution Strategy as a Scalable Alternative to Reinforcement Learning 24 | [Tim Salimans, Jonathan Ho, et al., 2017] 25 | (https://arxiv.org/abs/1703.03864). 26 | 27 | Use DeterministicESModel which does not have the distribution layer. 28 | """ 29 | 30 | def __init__( 31 | self, 32 | env, 33 | states_spec, 34 | actions_spec, 35 | network_spec, 36 | device=None, 37 | session_config=None, 38 | scope='deterministic_es', 39 | saver_spec=None, 40 | summary_spec=None, 41 | distributed_spec=None, 42 | optimizer=None, 43 | states_preprocessing_spec=None, 44 | explorations_spec=None, 45 | reward_preprocessing_spec=None, 46 | distributions_spec=None, 47 | entropy_regularization=None, 48 | max_episode_timesteps=None, 49 | batch_size=1000, 50 | noise_stddev=0.02, 51 | eval_prob=0.01, 52 | l2_coeff=0.01, 53 | train_iters=1000, 54 | seed_range=1000000, 55 | repeat_actions=1, 56 | batch_data=None 57 | ): 58 | 59 | """ 60 | Args: 61 | states_spec: Dict containing at least one state definition. In the case of a single state, 62 | keys `shape` and `type` are necessary. For multiple states, pass a dict of dicts where each state 63 | is a dict itself with a unique name as its key. 64 | actions_spec: Dict containing at least one action definition. Actions have types and either `num_actions` 65 | for discrete actions or a `shape` for continuous actions. Consult documentation and tests for more. 66 | network_spec: List of layers specifying a neural network via layer types, sizes and optional arguments 67 | such as activation or regularization. Full examples are in the examples/configs folder. 68 | device: Device string specifying model device. 69 | session_config: optional tf.ConfigProto with additional desired session configurations 70 | scope: TensorFlow scope, defaults to agent name (e.g. `dqn`). 71 | saver_spec: Dict specifying automated saving. Use `directory` to specify where checkpoints are saved. Use 72 | either `seconds` or `steps` to specify how often the model should be saved. The `load` flag specifies 73 | if a model is initially loaded (set to True) from a file `file`. 74 | summary_spec: Dict specifying summaries for TensorBoard. Requires a 'directory' to store summaries, `steps` 75 | or `seconds` to specify how often to save summaries, and a list of `labels` to indicate which values 76 | to export, e.g. `losses`, `variables`. Consult neural network class and model for all available labels. 77 | distributed_spec: Dict specifying distributed functionality. Use `parameter_server` and `replica_model` 78 | Boolean flags to indicate workers and parameter servers. Use a `cluster_spec` key to pass a TensorFlow 79 | cluster spec. 80 | states_preprocessing_spec: Optional list of states preprocessors to apply to state 81 | (e.g. `image_resize`, `grayscale`). 82 | explorations_spec: Optional dict specifying action exploration type (epsilon greedy 83 | or Gaussian noise). 84 | reward_preprocessing_spec: Optional dict specifying reward preprocessing. 85 | distributions_spec: Optional dict specifying action distributions to override default distribution choices. 86 | Must match action names. 87 | entropy_regularization: Optional positive float specifying an entropy regularization value. 88 | batch_size: Int specifying number of samples collected via `observe` before an update is executed. 89 | batch_data: Input data tensor, which is for table environment 90 | repeat_actions: Int specifying the times of repearting actions to better estimate the reward 91 | """ 92 | 93 | if network_spec is None: 94 | raise TensorforceError("No network_spec provided.") 95 | 96 | self.env = env 97 | self.network_spec = network_spec 98 | self.device = device 99 | self.session_config = session_config 100 | self.scope = scope 101 | self.saver_spec = saver_spec 102 | self.summary_spec = summary_spec 103 | self.distributed_spec = distributed_spec 104 | self.states_preprocessing_spec = states_preprocessing_spec 105 | self.explorations_spec = explorations_spec 106 | self.reward_preprocessing_spec = reward_preprocessing_spec 107 | self.distributions_spec = distributions_spec 108 | self.entropy_regularization = entropy_regularization 109 | self.batch_size=batch_size 110 | self.max_episode_timesteps = max_episode_timesteps 111 | self.noise_stddev = noise_stddev 112 | self.eval_prob = eval_prob 113 | self.l2_coeff = l2_coeff 114 | self.train_iters = train_iters 115 | self.seed_range = seed_range 116 | self.repeat_actions = repeat_actions 117 | self.batch_data = batch_data 118 | 119 | if optimizer is None: 120 | self.optimizer = dict( 121 | type='adam', 122 | learning_rate=0.01 123 | ) 124 | else: 125 | self.optimizer = optimizer 126 | 127 | super(DeterministicESAgent, self).__init__( 128 | states_spec=states_spec, 129 | actions_spec=actions_spec, 130 | batched_observe=None 131 | ) 132 | 133 | def run_worker(self): 134 | # Start running on all workers. 135 | self.model.update() 136 | 137 | def initialize_model(self): 138 | return DeterministicESModel( 139 | env=self.env, 140 | states_spec=self.states_spec, 141 | actions_spec=self.actions_spec, 142 | network_spec=self.network_spec, 143 | device=self.device, 144 | session_config=self.session_config, 145 | scope=self.scope, 146 | saver_spec=self.saver_spec, 147 | summary_spec=self.summary_spec, 148 | distributed_spec=self.distributed_spec, 149 | optimizer=self.optimizer, 150 | states_preprocessing_spec=self.states_preprocessing_spec, 151 | explorations_spec=self.explorations_spec, 152 | reward_preprocessing_spec=self.reward_preprocessing_spec, 153 | distributions_spec=self.distributions_spec, 154 | entropy_regularization=self.entropy_regularization, 155 | batch_size=self.batch_size, 156 | max_episode_timesteps=self.max_episode_timesteps, 157 | noise_stddev=self.noise_stddev, 158 | eval_prob=self.eval_prob, 159 | l2_coeff=self.l2_coeff, 160 | train_iters=self.train_iters, 161 | seed_range=self.seed_range, 162 | repeat_actions=self.repeat_actions, 163 | batch_data=self.batch_data 164 | ) 165 | -------------------------------------------------------------------------------- /tensorforce/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | -------------------------------------------------------------------------------- /tensorforce/core/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/explorations/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | from tensorforce.core.explorations.exploration import Exploration 18 | from tensorforce.core.explorations.constant import Constant 19 | 20 | 21 | explorations = dict( 22 | constant=Constant 23 | ) 24 | 25 | 26 | __all__ = ['Exploration', 'Constant', 'explorations'] 27 | -------------------------------------------------------------------------------- /tensorforce/core/explorations/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/explorations/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/explorations/__pycache__/constant.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/explorations/__pycache__/constant.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/explorations/__pycache__/epsilon_anneal.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/explorations/__pycache__/epsilon_anneal.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/explorations/__pycache__/epsilon_decay.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/explorations/__pycache__/epsilon_decay.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/explorations/__pycache__/exploration.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/explorations/__pycache__/exploration.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/explorations/__pycache__/linear_decay.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/explorations/__pycache__/linear_decay.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/explorations/__pycache__/ornstein_uhlenbeck_process.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/explorations/__pycache__/ornstein_uhlenbeck_process.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/explorations/constant.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from tensorforce.core.explorations import Exploration 17 | 18 | 19 | class Constant(Exploration): 20 | """ 21 | Explore via adding a constant term. 22 | """ 23 | 24 | def __init__(self, constant=0.0, scope='constant', summary_labels=()): 25 | self.constant = float(constant) 26 | super(Constant, self).__init__(scope=scope, summary_labels=summary_labels) 27 | 28 | def tf_explore(self, episode, timestep, action_shape): 29 | return self.constant 30 | -------------------------------------------------------------------------------- /tensorforce/core/explorations/exploration.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import tensorflow as tf 17 | from tensorforce import util 18 | import tensorforce.core.explorations 19 | 20 | 21 | class Exploration(object): 22 | """ 23 | Abstract exploration object. 24 | """ 25 | 26 | def __init__(self, scope='exploration', summary_labels=None): 27 | self.summary_labels = set(summary_labels or ()) 28 | 29 | self.variables = dict() 30 | self.summaries = list() 31 | 32 | def custom_getter(getter, name, registered=False, **kwargs): 33 | variable = getter(name=name, registered=True, **kwargs) 34 | if not registered: 35 | self.variables[name] = variable 36 | return variable 37 | 38 | self.explore = tf.make_template( 39 | name_=(scope + '/explore'), 40 | func_=self.tf_explore, 41 | custom_getter_=custom_getter 42 | ) 43 | 44 | def tf_explore(self, episode, timestep, action_shape): 45 | """ 46 | Creates exploration value, e.g. compute an epsilon for epsilon-greedy or sample normal 47 | noise. 48 | """ 49 | raise NotImplementedError 50 | 51 | def get_variables(self): 52 | """ 53 | Returns exploration variables. 54 | 55 | Returns: 56 | List of variables. 57 | """ 58 | return [self.variables[key] for key in sorted(self.variables)] 59 | 60 | @staticmethod 61 | def from_spec(spec): 62 | """ 63 | Creates an exploration object from a specification dict. 64 | """ 65 | exploration = util.get_object( 66 | obj=spec, 67 | predefined_objects=tensorforce.core.explorations.explorations 68 | ) 69 | assert isinstance(exploration, Exploration) 70 | return exploration 71 | -------------------------------------------------------------------------------- /tensorforce/core/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/lib/__init__.py -------------------------------------------------------------------------------- /tensorforce/core/lib/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/lib/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/lib/__pycache__/atari_wrapper.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/lib/__pycache__/atari_wrapper.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/lib/__pycache__/env_seeding.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/lib/__pycache__/env_seeding.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/lib/__pycache__/optimizers.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/lib/__pycache__/optimizers.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/lib/__pycache__/schedules.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/lib/__pycache__/schedules.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/lib/__pycache__/segment_tree.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/lib/__pycache__/segment_tree.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/lib/env_seeding.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | """ 4 | desc: Create a strong random seed. Otherwise, Python 2 would seed using 5 | the system time, which might be non-robust especially in the presence of 6 | concurrency. 7 | permalink: https://svn.python.org/projects/python/tags/r32/Lib/random.py 8 | create: 2017.12.11 9 | modified by @sam.dm 10 | 11 | """ 12 | 13 | import hashlib 14 | import numpy as np 15 | import os 16 | import random as _random 17 | import struct 18 | import sys 19 | 20 | 21 | if sys.version_info < (3,): 22 | integer_types = (int, long) 23 | else: 24 | integer_types = (int,) 25 | 26 | def np_random(seed=None): 27 | if seed is not None and not (isinstance(seed, integer_types) and 0 <= seed): 28 | raise Exception('Seed must be a non-negative integer or omitted, not {}'.format(seed)) 29 | 30 | seed = _seed(seed) 31 | 32 | rng = np.random.RandomState() 33 | rng.seed(_int_list_from_bigint(hash_seed(seed))) 34 | return rng, seed 35 | 36 | def hash_seed(seed=None, max_bytes=8): 37 | """ 38 | Args: 39 | seed (Optional[int]): None seeds from an operating system specific randomness source. 40 | max_bytes: Maximum number of bytes to use in the hashed seed. 41 | """ 42 | if seed is None: 43 | seed = _seed(max_bytes=max_bytes) 44 | hash = hashlib.sha512(str(seed).encode('utf8')).digest() 45 | return _bigint_from_bytes(hash[:max_bytes]) 46 | 47 | def _seed(a=None, max_bytes=8): 48 | """ 49 | Args: 50 | a (Optional[int, str]): None seeds from an operating system specific randomness source. 51 | max_bytes: Maximum number of bytes to use in the seed. 52 | """ 53 | if a is None: 54 | a = _bigint_from_bytes(os.urandom(max_bytes)) 55 | elif isinstance(a, str): 56 | a = a.encode('utf8') 57 | a += hashlib.sha512(a).digest() 58 | a = _bigint_from_bytes(a[:max_bytes]) 59 | elif isinstance(a, integer_types): 60 | a = a % 2**(8 * max_bytes) 61 | else: 62 | raise Exception('Invalid type for seed: {} ({})'.format(type(a), a)) 63 | 64 | return a 65 | 66 | def _bigint_from_bytes(bytes): 67 | sizeof_int = 4 68 | padding = sizeof_int - len(bytes) % sizeof_int 69 | bytes += b'\0' * padding 70 | int_count = int(len(bytes) / sizeof_int) 71 | unpacked = struct.unpack("{}I".format(int_count), bytes) 72 | accum = 0 73 | for i, val in enumerate(unpacked): 74 | accum += 2 ** (sizeof_int * 8 * i) * val 75 | return accum 76 | 77 | def _int_list_from_bigint(bigint): 78 | # Special case 0 79 | if bigint < 0: 80 | raise Exception('Seed must be non-negative, not {}'.format(bigint)) 81 | elif bigint == 0: 82 | return [0] 83 | 84 | ints = [] 85 | while bigint > 0: 86 | bigint, mod = divmod(bigint, 2 ** 32) 87 | ints.append(mod) 88 | return ints 89 | -------------------------------------------------------------------------------- /tensorforce/core/lib/optimizers.py: -------------------------------------------------------------------------------- 1 | """ 2 | desc: SGD and Adam optimizer numpy implement. 3 | create: 2018.01.18 4 | @author: sam.dm 5 | """ 6 | 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import numpy as np 13 | from tensorforce import util, TensorforceError 14 | from tensorforce.core.lib import schedules 15 | 16 | 17 | def from_spec(spec, kwargs=None): 18 | """ 19 | Creates an optimizer from a specification dict. 20 | """ 21 | optimizer = util.get_object( 22 | obj=spec, 23 | predefined_objects=optimizers, 24 | kwargs=kwargs 25 | ) 26 | assert isinstance(optimizer, Optimizer) 27 | return optimizer 28 | 29 | 30 | class Optimizer(object): 31 | def __init__(self, dim): 32 | self.dim = dim 33 | self.t = 0 34 | 35 | def update(self, grad): 36 | self.t += 1 37 | step = self._compute_step(grad) 38 | return step 39 | 40 | def _compute_step(self, grad): 41 | raise NotImplementedError 42 | 43 | 44 | class Momentum(Optimizer): 45 | def __init__(self, dim, learning_rate, momentum=0.9, lr_schedule=None): 46 | Optimizer.__init__(self, dim) 47 | if lr_schedule is not None: 48 | lr_schedule['value'] = learning_rate 49 | self.decay_obj = schedules.from_spec(lr_schedule) 50 | self.lr_schedule = lr_schedule 51 | self.v = np.zeros(self.dim, dtype=np.float32) 52 | self.learning_rate, self.momentum = learning_rate, momentum 53 | 54 | def _compute_step(self, globgrad): 55 | self.v = self.momentum * self.v + (1. - self.momentum) * grad 56 | if self.lr_schedule is not None: 57 | self.learning_rate = self.decay_obj(self.t) 58 | step = -self.learning_rate * self.v 59 | return step 60 | 61 | 62 | class Adam(Optimizer): 63 | def __init__(self, dim, learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, lr_schedule=None): 64 | Optimizer.__init__(self, dim) 65 | if lr_schedule is not None: 66 | lr_schedule['value'] = learning_rate 67 | self.decay_obj = schedules.from_spec(lr_schedule) 68 | self.lr_schedule = lr_schedule 69 | self.learning_rate = learning_rate 70 | if isinstance(self.learning_rate, list): 71 | self.learning_rate = np.asarray(self.learning_rate, dtype=np.float32).flatten() 72 | assert self.learning_rate.size == self.dim 73 | self.beta1 = beta1 74 | self.beta2 = beta2 75 | self.epsilon = epsilon 76 | self.m = np.zeros(self.dim, dtype=np.float32) 77 | self.v = np.zeros(self.dim, dtype=np.float32) 78 | 79 | def _compute_step(self, grad): 80 | if self.lr_schedule is not None: 81 | self.learning_rate = self.decay_obj(self.t) 82 | a = self.learning_rate * (np.sqrt(1 - self.beta2 ** self.t) / 83 | (1 - self.beta1 ** self.t)) 84 | self.m = self.beta1 * self.m + (1 - self.beta1) *grad 85 | self.v = self.beta2 * self.v + (1 - self.beta2) * (grad * grad) 86 | step = -a * self.m / (np.sqrt(self.v) + self.epsilon) 87 | return step 88 | 89 | optimizers = {"adam": Adam, "momentum": Momentum} 90 | -------------------------------------------------------------------------------- /tensorforce/core/lib/schedules.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | desc: schedule type, eg: learning rate, priority beta epsilon etc. 5 | created: 2017.12.11 6 | @author: sam.dm 7 | """ 8 | import math 9 | from tensorforce import util, TensorforceError 10 | 11 | 12 | def from_spec(spec, kwargs=None): 13 | lr_schedule = util.get_object( 14 | obj=spec, 15 | predefined_objects=lr_schedulers, 16 | kwargs=kwargs 17 | ) 18 | assert isinstance(lr_schedule, Schedule) 19 | 20 | return lr_schedule 21 | 22 | 23 | class Schedule(object): 24 | def __call__(self, global_step): 25 | """ 26 | Value of the schedule at time t 27 | """ 28 | 29 | raise NotImplementedError() 30 | 31 | 32 | class Constant(Schedule): 33 | def __init__(self, value): 34 | """ 35 | Value remains constant over time. 36 | Args: 37 | value: float, Constant value of the schedule 38 | """ 39 | 40 | self._value = value 41 | 42 | def __call__(self, global_step): 43 | 44 | return self._value 45 | 46 | 47 | class PiecewiseDecay(Schedule): 48 | def __init__(self, endpoints, outside_value=None): 49 | """ 50 | Piecewise decay schedule. 51 | Args: 52 | endpoints: [(int, int)], list of pairs (time, value) meanining that schedule should output 53 | value when t==time. All the values for time must be sorted in an increasing order. 54 | outside_value: float, if the value is requested outside of all the intervals sepecified in 55 | endpoints this value is returned. 56 | """ 57 | 58 | idxes = [e[0] for e in endpoints] 59 | assert idxes == sorted(idxes) 60 | self._interpolation = interpolation 61 | self._outside_value = outside_value 62 | self._endpoints = endpoints 63 | 64 | def _linear_interpolation(self, l, r, alpha): 65 | 66 | return l + alpha * (r - l) 67 | 68 | def __call__(self, global_step): 69 | for (l_t, l), (r_t, r) in zip(self._endpoints[:-1], self._endpoints[1:]): 70 | if l_t <= global_step and global_step < r_t: 71 | alpha = float(t - l_t) / (r_t - l_t) 72 | return self._interpolation(l, r, alpha) 73 | 74 | # t does not belong to any of the pieces, so doom. 75 | assert self._outside_value is not None 76 | 77 | return self._outside_value 78 | 79 | 80 | class LinearDecay(Schedule): 81 | def __init__(self, value, max_decay_steps, final_value): 82 | """ 83 | Linear interpolation between initial_value and final_value over schedule_timesteps. 84 | Args: 85 | max_timesteps: int, Number of max schedule timesteps. 86 | value: float, initial output value 87 | final_value: float, final output value 88 | """ 89 | 90 | self._max_decay_steps = max_decay_steps 91 | self._initial_value = value 92 | self._final_value = final_value 93 | 94 | def __call__(self, global_step): 95 | fraction = min(float(global_step) / self._max_decay_steps, 1.0) 96 | 97 | return self._initial_value + fraction * (self._final_value - self._initial_value) 98 | 99 | 100 | class ExponentialDecay(Schedule): 101 | def __init__(self, value, decay_steps, decay_rate, staircase=False): 102 | """ 103 | decayed_value = value * decay_rate ^ (global_step / decay_steps) 104 | """ 105 | 106 | self._value = value 107 | self._decay_steps = decay_steps 108 | self._decay_rate = decay_rate 109 | self._staircase = staircase 110 | 111 | def __call__(self, global_step): 112 | p = float(global_step) / self._decay_steps 113 | if self._staircase: 114 | p = math.floor(p) 115 | 116 | return self._value * math.pow(self._decay_rate, p) 117 | 118 | class PolynomialDecay(Schedule): 119 | def __init__(self, value, decay_steps, final_value=0.0001, power=1.0, cycle=False): 120 | """ 121 | global_step = min(global_step, decay_steps) 122 | decayed_value = (value - final_value) * 123 | (1 - global_step / decay_steps) ^ (power) + 124 | final_value 125 | If cycle is True then a multiple of decay_steps is used, the first one 126 | that is bigger than global_steps. 127 | 128 | decay_steps = decay_steps * ceil(global_step / decay_steps) 129 | decayed_value = (value - final_value) * (1 - global_step / decay_steps) ^ (power) + 130 | final_value 131 | """ 132 | 133 | self._value = value 134 | self._decay_steps = decay_steps 135 | self._final_value = final_value 136 | self._power = power 137 | self._cycle = cycle 138 | 139 | def __call__(self, global_step): 140 | if self._cycle: 141 | if global_step == 0: 142 | multiplier = 1.0 143 | else: 144 | multiplier = math.ceil(global_step / self._decay_steps) 145 | decay_steps = self._decay_steps * multiplier 146 | else: 147 | decay_steps = self._decay_steps 148 | global_step = min(global_step, self._decay_steps) 149 | 150 | p = float(global_step) / decay_steps 151 | 152 | return (self._value - self._final_value) * math.pow( 153 | 1 - p, self._power) + self._final_value 154 | 155 | class NaturalExpDecay(Schedule): 156 | def __init__(self, value, decay_steps, decay_rate, staircase=False): 157 | """ 158 | decayed_value = value * exp(-decay_rate * global_step) 159 | """ 160 | 161 | self._value = value 162 | self._decay_steps = decay_steps 163 | self._decay_rate = decay_rate 164 | self._staircase = staircase 165 | 166 | def __call__(self, global_step): 167 | p = float(global_step) / self._decay_steps 168 | if self._staircase: 169 | p = math.ceil(p) 170 | exponent = math.exp(-self._decay_rate * p) 171 | 172 | return self._value * exponent 173 | 174 | class InverseTimeDecay(Schedule): 175 | def __init__(self, value, decay_steps, decay_rate, staircase=False): 176 | """ 177 | decayed_value = value / (1 + decay_rate * global_step / decay_step) 178 | 179 | if staircase is True, as: 180 | decayed_value = value / (1 + decay_rate * floor(global_step / decay_step)) 181 | """ 182 | 183 | self._value = value 184 | self._decay_steps = decay_steps 185 | self._decay_rate = decay_rate 186 | self._staircase = staircase 187 | 188 | def __call__(self, global_step): 189 | p = float(global_step) / self._decay_steps 190 | if self._staircase: 191 | p = math.ceil(p) 192 | denom = 1.0 + p * self._decay_rate 193 | 194 | return self._value / denom 195 | 196 | lr_schedulers = { 197 | "constant": Constant, 198 | "exp_decay": ExponentialDecay, 199 | "natural_exp_decay": NaturalExpDecay, 200 | "inverse_time_decay": InverseTimeDecay, 201 | "polynomial_decay": PolynomialDecay, 202 | "linear_decay": LinearDecay 203 | } 204 | -------------------------------------------------------------------------------- /tensorforce/core/memories/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | from tensorforce.core.memories.memory import Memory 18 | from tensorforce.core.memories.replay import Replay 19 | from tensorforce.core.memories.prioritized_replay import PrioritizedReplay 20 | from tensorforce.core.memories.modified_replay import PrioritizedReplayBuffer 21 | from tensorforce.core.memories.modified_replay import ReplayBuffer 22 | from tensorforce.core.memories.naive_prioritized_replay import NaivePrioritizedReplay 23 | 24 | 25 | memories = dict( 26 | replay=Replay, 27 | ) 28 | 29 | __all__ = ['memories', 'Memory', 'ReplayBuffer'] 30 | -------------------------------------------------------------------------------- /tensorforce/core/memories/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/memories/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/memories/__pycache__/memory.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/memories/__pycache__/memory.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/memories/__pycache__/modified_replay.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/memories/__pycache__/modified_replay.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/memories/__pycache__/naive_prioritized_replay.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/memories/__pycache__/naive_prioritized_replay.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/memories/__pycache__/prioritized_replay.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/memories/__pycache__/prioritized_replay.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/memories/__pycache__/replay.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/memories/__pycache__/replay.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/memories/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import print_function 18 | from __future__ import division 19 | 20 | from tensorforce import util 21 | import tensorforce.core.memories 22 | 23 | 24 | # TODO: implement in TensorFlow 25 | 26 | class Memory(object): 27 | """ 28 | Abstract memory class. 29 | """ 30 | 31 | def __init__(self, states_spec, actions_spec): 32 | """ 33 | Generic memory without sampling strategy implemented. 34 | 35 | Args: 36 | states_spec: State specifiction 37 | actions_spec: Action specification 38 | """ 39 | self.states_spec = states_spec 40 | self.actions_spec = actions_spec 41 | 42 | def add_observation(self, states, internals, actions, terminal, reward, next_states, next_internals): 43 | """ 44 | Inserts a single experience to the memory. 45 | 46 | Args: 47 | states: 48 | internals: 49 | actions: 50 | terminal: 51 | reward: 52 | 53 | Returns: 54 | 55 | """ 56 | raise NotImplementedError 57 | 58 | def get_batch(self, batch_size): 59 | """ 60 | Samples a batch from the memory. 61 | 62 | Args: 63 | batch_size: The batch size 64 | 65 | Returns: A dict containing states, internal states, actions, terminals, rewards (and next states) 66 | 67 | """ 68 | raise NotImplementedError 69 | 70 | def update_batch(self, idxes, priorities): 71 | """ 72 | Updates loss values for sampling strategies based on loss functions. 73 | 74 | Args: 75 | idxes: 76 | priorities: 77 | 78 | """ 79 | raise NotImplementedError 80 | 81 | def set_memory(self, states, internals, actions, terminals, rewards, next_states, next_internals): 82 | """ 83 | Deletes memory content and sets content to provided observations. 84 | 85 | Args: 86 | states: 87 | internals: 88 | actions: 89 | terminals: 90 | rewards: 91 | next_states: 92 | next_internals: 93 | 94 | """ 95 | raise NotImplementedError 96 | 97 | def update_batch(self, idxes, priorities): 98 | """ 99 | Update SumTree Priorities after training. 100 | 101 | Args: 102 | idxes: 103 | rewards: 104 | 105 | """ 106 | raise NotImplementedError 107 | 108 | @staticmethod 109 | def from_spec(spec, kwargs=None): 110 | """ 111 | Creates a memory from a specification dict. 112 | """ 113 | memory = util.get_object( 114 | obj=spec, 115 | predefined_objects=tensorforce.core.memories.memories, 116 | kwargs=kwargs 117 | ) 118 | assert isinstance(memory, Memory) 119 | return memory 120 | -------------------------------------------------------------------------------- /tensorforce/core/memories/replay.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import print_function 18 | from __future__ import division 19 | 20 | from random import randrange 21 | import numpy as np 22 | 23 | from tensorforce import util 24 | from tensorforce.core.memories import Memory 25 | 26 | 27 | class Replay(Memory): 28 | """ 29 | Replay memory to store observations and sample mini batches for training from. 30 | """ 31 | 32 | def __init__(self, states_spec, actions_spec, capacity, random_sampling=True): 33 | super(Replay, self).__init__(states_spec=states_spec, actions_spec=actions_spec) 34 | self.capacity = capacity 35 | self.states = {name: np.zeros((capacity,) + tuple(state['shape']), dtype=util.np_dtype(state['type'])) 36 | for name, state in states_spec.items()} 37 | self.next_states = {name: np.zeros((capacity,) + tuple(state['shape']), dtype=util.np_dtype(state['type'])) 38 | for name, state in states_spec.items()} 39 | self.internals, self.next_internals = None, None 40 | self.actions = {name: np.zeros((capacity,) + tuple(action['shape']), dtype=util.np_dtype(action['type'])) 41 | for name, action in actions_spec.items()} 42 | self.terminal = np.zeros((capacity,), dtype=util.np_dtype('bool')) 43 | self.reward = np.zeros((capacity,), dtype=util.np_dtype('float')) 44 | 45 | self.size = 0 46 | self.index = 0 47 | self.random_sampling = random_sampling 48 | 49 | def add_observation(self, states, internals, actions, terminal, reward, next_states, next_internals): 50 | if self.internals is None: 51 | self.internals = [np.zeros((self.capacity,) + internal.shape, internal.dtype) for internal in internals] 52 | if self.next_internals is None: 53 | self.next_internals = [np.zeros((self.capacity,) + internal.shape, internal.dtype) for internal in next_internals] 54 | 55 | for name, state in states.items(): 56 | self.states[name][self.index] = state 57 | for name, next_state in next_states.items(): 58 | self.next_states[name][self.index] = next_state 59 | for n, internal in enumerate(internals): 60 | self.internals[n][self.index] = internal 61 | for n, next_internal in enumerate(next_internals): 62 | self.next_internals[n][self.index] = next_internal 63 | for name, action in actions.items(): 64 | self.actions[name][self.index] = action 65 | self.reward[self.index] = reward 66 | self.terminal[self.index] = terminal 67 | 68 | if self.size < self.capacity: 69 | self.size += 1 70 | self.index = (self.index + 1) % self.capacity 71 | 72 | def get_batch(self, batch_size): 73 | """ 74 | Samples a batch of the specified size by selecting a random start/end point and returning 75 | the contained sequence or random indices depending on the field 'random_sampling'. 76 | 77 | Args: 78 | batch_size: The batch size 79 | next_states: A boolean flag indicating whether 'next_states' values should be included 80 | 81 | Returns: A dict containing states, actions, rewards, terminals, internal states (and next states) 82 | 83 | """ 84 | indices = np.random.randint(self.size - 1, size=batch_size) 85 | terminal = self.terminal.take(indices) 86 | 87 | states = {name: state.take(indices, axis=0) for name, state in self.states.items()} 88 | internals = [internal.take(indices, axis=0) for internal in self.internals] 89 | actions = {name: action.take(indices, axis=0) for name, action in self.actions.items()} 90 | terminal = self.terminal.take(indices) 91 | reward = self.reward.take(indices) 92 | next_states = {name: state.take(indices, axis=0) for name, state in self.next_states.items()} 93 | next_internals = [internal.take(indices, axis=0) for internal in self.next_internals] 94 | 95 | batch = dict(states=states, internals=internals, actions=actions, terminal=terminal, reward=reward, 96 | next_states=next_states, next_internals=next_internals) 97 | return batch 98 | 99 | def set_memory(self, states, internals, actions, terminal, reward, next_states, next_internals): 100 | """ 101 | Convenience function to set whole batches as memory content to bypass 102 | calling the insert function for every single experience. 103 | 104 | """ 105 | self.size = len(terminal) 106 | 107 | if len(terminal) == self.capacity: 108 | # Assign directly if capacity matches size. 109 | for name, state in states.items(): 110 | self.states[name] = np.asarray(state) 111 | for name, state in next_states.items(): 112 | self.next_states[name] = np.asarray(state) 113 | self.internals = [np.asarray(internal) for internal in internals] 114 | self.next_internals = [np.asarray(internal) for internal in next_internals] 115 | for name, action in actions.items(): 116 | self.actions[name] = np.asarray(action) 117 | self.terminal = np.asarray(terminal) 118 | self.reward = np.asarray(reward) 119 | # Filled capacity to point of index wrap 120 | self.index = 0 121 | 122 | else: 123 | # Otherwise partial assignment. 124 | if self.internals is None: 125 | self.internals = [np.zeros((self.capacity,) + internal.shape, internal.dtype) for internal 126 | in internals] 127 | if self.next_internals is None: 128 | self.next_internals = [np.zeros((self.capacity,) + internal.shape, internal.dtype) for internal 129 | in next_internals] 130 | 131 | for name, state in states.items(): 132 | self.states[name][:len(state)] = state 133 | for name, state in next_states.items(): 134 | self.next_states[name][:len(state)] = state 135 | for n, internal in enumerate(internals): 136 | self.internals[n][:len(internal)] = internal 137 | for n, next_internal in enumerate(next_internals): 138 | self.next_internals[n][:len(internal)] = next_internal 139 | for name, action in actions.items(): 140 | self.actions[name][:len(action)] = action 141 | self.terminal[:len(terminal)] = terminal 142 | self.reward[:len(reward)] = reward 143 | self.index = len(terminal) 144 | 145 | def update_batch(self, idxes, priorities): 146 | pass 147 | -------------------------------------------------------------------------------- /tensorforce/core/networks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from tensorforce.core.networks.layer import Layer, Nonlinearity, Dropout, Flatten, Identity, Layernorm, Pool2d, Embedding, Linear, Dense, \ 17 | Dueling, Conv1d, Conv2d, InternalLstm, Lstm 18 | from tensorforce.core.networks.network import Network, LayerBasedNetwork, LayeredNetwork 19 | 20 | 21 | layers = dict( 22 | nonlinearity=Nonlinearity, 23 | dropout=Dropout, 24 | flatten=Flatten, 25 | identity=Identity, 26 | layer_norm=Layernorm, 27 | pool2d=Pool2d, 28 | embedding=Embedding, 29 | linear=Linear, 30 | dense=Dense, 31 | dueling=Dueling, 32 | conv1d=Conv1d, 33 | conv2d=Conv2d, 34 | internal_lstm=InternalLstm, 35 | lstm=Lstm 36 | ) 37 | 38 | 39 | __all__ = [ 40 | 'layers', 41 | 'Layer', 42 | 'Nonlinearity', 43 | 'Identity', 44 | 'Layernorm', 45 | 'Dropout', 46 | 'Flatten', 47 | 'Pool2d', 48 | 'Embedding', 49 | 'Linear', 50 | 'Dense', 51 | 'Dueling', 52 | 'Conv1d', 53 | 'Conv2d', 54 | 'InternalLstm', 55 | 'Lstm', 56 | 'Network', 57 | 'LayerBasedNetwork', 58 | 'LayeredNetwork' 59 | ] 60 | -------------------------------------------------------------------------------- /tensorforce/core/networks/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/networks/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/networks/__pycache__/layer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/networks/__pycache__/layer.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/networks/__pycache__/network.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/networks/__pycache__/network.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/networks/network.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from collections import Counter 21 | import json 22 | import os 23 | 24 | import tensorflow as tf 25 | 26 | from tensorforce import util, TensorforceError 27 | from tensorforce.core.networks import Layer 28 | 29 | 30 | class Network(object): 31 | """ 32 | Base class for neural networks. 33 | """ 34 | 35 | def __init__(self, scope='network', summary_labels=None): 36 | self.summary_labels = set(summary_labels or ()) 37 | 38 | self.variables = dict() 39 | self.all_variables = dict() 40 | self.summaries = list() 41 | 42 | def custom_getter(getter, name, registered=False, **kwargs): 43 | variable = getter(name=name, registered=True, **kwargs) 44 | if not registered: 45 | self.all_variables[name] = variable 46 | if kwargs.get('trainable', True) and not name.startswith('optimization'): 47 | self.variables[name] = variable 48 | if 'variables' in self.summary_labels: 49 | summary = tf.summary.histogram(name=name, values=variable) 50 | self.summaries.append(summary) 51 | return variable 52 | 53 | self.apply = tf.make_template( 54 | name_=(scope + '/apply'), 55 | func_=self.tf_apply, 56 | custom_getter_=custom_getter 57 | ) 58 | self.regularization_loss = tf.make_template( 59 | name_=(scope + '/regularization-loss'), 60 | func_=self.tf_regularization_loss, 61 | custom_getter_=custom_getter 62 | ) 63 | 64 | def tf_apply(self, x, internals, update, return_internals=False): 65 | """ 66 | Creates the TensorFlow operations for applying the network to the given input. 67 | 68 | Args: 69 | x: Network input tensor or dict of input tensors. 70 | internals: List of prior internal state tensors 71 | update: Boolean tensor indicating whether this call happens during an update. 72 | return_internals: If true, also returns posterior internal state tensors 73 | 74 | Returns: 75 | Network output tensor, plus optionally list of posterior internal state tensors 76 | """ 77 | raise NotImplementedError 78 | 79 | def tf_regularization_loss(self): 80 | """ 81 | Creates the TensorFlow operations for the network regularization loss. 82 | 83 | Returns: 84 | Regularization loss tensor 85 | """ 86 | return None 87 | 88 | def internals_input(self): 89 | """ 90 | Returns the TensorFlow placeholders for internal state inputs. 91 | 92 | Returns: 93 | List of internal state input placeholders 94 | """ 95 | return list() 96 | 97 | def internals_init(self): 98 | """ 99 | Returns the TensorFlow tensors for internal state initializations. 100 | 101 | Returns: 102 | List of internal state initialization tensors 103 | """ 104 | return list() 105 | 106 | def get_variables(self, include_non_trainable=False): 107 | """ 108 | Returns the TensorFlow variables used by the network. 109 | 110 | Returns: 111 | List of variables 112 | """ 113 | if include_non_trainable: 114 | return [self.all_variables[key] for key in sorted(self.all_variables)] 115 | else: 116 | return [self.variables[key] for key in sorted(self.variables)] 117 | 118 | def get_summaries(self): 119 | """ 120 | Returns the TensorFlow summaries reported by the network. 121 | 122 | Returns: 123 | List of summaries 124 | """ 125 | return self.summaries 126 | 127 | @staticmethod 128 | def from_spec(spec, kwargs=None): 129 | """ 130 | Creates a network from a specification dict. 131 | """ 132 | network = util.get_object( 133 | obj=spec, 134 | default_object=LayeredNetwork, 135 | kwargs=kwargs 136 | ) 137 | assert isinstance(network, Network) 138 | return network 139 | 140 | 141 | class LayerBasedNetwork(Network): 142 | """ 143 | Base class for networks using tensorforce layers. 144 | """ 145 | 146 | def __init__(self, scope='layerbased-network', summary_labels=()): 147 | super(LayerBasedNetwork, self).__init__(scope=scope, summary_labels=summary_labels) 148 | self.layers = list() 149 | 150 | def add_layer(self, layer): 151 | self.layers.append(layer) 152 | 153 | def tf_regularization_loss(self): 154 | regularization_loss = super(LayerBasedNetwork, self).tf_regularization_loss() 155 | if regularization_loss is None: 156 | losses = list() 157 | else: 158 | losses = [regularization_loss] 159 | 160 | for layer in self.layers: 161 | regularization_loss = layer.regularization_loss() 162 | if regularization_loss is not None: 163 | losses.append(regularization_loss) 164 | 165 | if len(losses) > 0: 166 | return tf.add_n(inputs=losses) 167 | else: 168 | return None 169 | 170 | def internals_input(self): 171 | internals_input = super(LayerBasedNetwork, self).internals_input() 172 | for layer in self.layers: 173 | internals_input.extend(layer.internals_input()) 174 | return internals_input 175 | 176 | def internals_init(self): 177 | internals_init = super(LayerBasedNetwork, self).internals_init() 178 | for layer in self.layers: 179 | internals_init.extend(layer.internals_init()) 180 | return internals_init 181 | 182 | def get_variables(self, include_non_trainable=False): 183 | network_variables = super(LayerBasedNetwork, self).get_variables( 184 | include_non_trainable=include_non_trainable 185 | ) 186 | layer_variables = [ 187 | variable for layer in self.layers 188 | for variable in layer.get_variables(include_non_trainable=include_non_trainable) 189 | ] 190 | 191 | return network_variables + layer_variables 192 | 193 | def get_summaries(self): 194 | network_summaries = super(LayerBasedNetwork, self).get_summaries() 195 | layer_summaries = [summary for layer in self.layers for summary in layer.get_summaries()] 196 | 197 | return network_summaries + layer_summaries 198 | 199 | 200 | class LayeredNetwork(LayerBasedNetwork): 201 | """ 202 | Network consisting of a sequence of layers, which can be created from a specification dict. 203 | """ 204 | 205 | def __init__(self, layers_spec, scope='layered-network', summary_labels=()): 206 | """ 207 | Layered network. 208 | 209 | Args: 210 | layers_spec: List of layer specification dicts 211 | """ 212 | super(LayeredNetwork, self).__init__(scope=scope, summary_labels=summary_labels) 213 | self.layers_spec = layers_spec 214 | layer_counter = Counter() 215 | 216 | for layer_spec in self.layers_spec: 217 | if isinstance(layer_spec['type'], str): 218 | name = layer_spec['type'] 219 | else: 220 | name = 'layer' 221 | scope = name + str(layer_counter[name]) 222 | layer_counter[name] += 1 223 | 224 | layer = Layer.from_spec( 225 | spec=layer_spec, 226 | kwargs=dict(scope=scope, summary_labels=summary_labels) 227 | ) 228 | self.add_layer(layer=layer) 229 | 230 | def tf_apply(self, x, internals, update, return_internals=False): 231 | if isinstance(x, dict): 232 | if len(x) != 1: 233 | raise TensorforceError('Layered network must have only one input, but {} given.'.format(len(x))) 234 | x = next(iter(x.values())) 235 | 236 | internal_outputs = list() 237 | index = 0 238 | for layer in self.layers: 239 | layer_internals = [internals[index + n] for n in range(layer.num_internals)] 240 | index += layer.num_internals 241 | x = layer.apply(x, update, *layer_internals) 242 | 243 | if not isinstance(x, tf.Tensor): 244 | internal_outputs.extend(x[1]) 245 | x = x[0] 246 | 247 | if return_internals: 248 | return x, internal_outputs 249 | else: 250 | return x 251 | 252 | @staticmethod 253 | def from_json(filename): 254 | """ 255 | Creates a layer_networkd_builder from a JSON. 256 | 257 | Args: 258 | filename: Path to configuration 259 | 260 | Returns: A layered_network_builder function with layers generated from the JSON 261 | """ 262 | path = os.path.join(os.getcwd(), filename) 263 | with open(path, 'r') as fp: 264 | config = json.load(fp=fp) 265 | return LayeredNetwork(layers_spec=config) 266 | -------------------------------------------------------------------------------- /tensorforce/core/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from tensorforce.core.optimizers.optimizer import Optimizer 17 | from tensorforce.core.optimizers.tf_optimizer import TFOptimizer 18 | from tensorforce.core.optimizers.meta_optimizer import MetaOptimizer 19 | from tensorforce.core.optimizers.global_optimizer import GlobalOptimizer 20 | 21 | 22 | # This can register any class inheriting from tf.train.Optimizer 23 | optimizers = dict( 24 | adadelta=TFOptimizer.get_wrapper(optimizer='adadelta'), 25 | adagrad=TFOptimizer.get_wrapper(optimizer='adagrad'), 26 | adam=TFOptimizer.get_wrapper(optimizer='adam'), 27 | nadam=TFOptimizer.get_wrapper(optimizer='nadam'), 28 | gradient_descent=TFOptimizer.get_wrapper(optimizer='gradient_descent'), 29 | momentum=TFOptimizer.get_wrapper(optimizer='momentum'), 30 | rmsprop=TFOptimizer.get_wrapper(optimizer='rmsprop'), 31 | # GlobalOptimizer not (yet) a valid choice 32 | ) 33 | 34 | 35 | __all__ = ['optimizers', 'Optimizer', 'TFOptimizer', 'MetaOptimizer', 'GlobalOptimizer'] 36 | -------------------------------------------------------------------------------- /tensorforce/core/optimizers/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/optimizers/__pycache__/clipped_step.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/clipped_step.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/optimizers/__pycache__/evolutionary.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/evolutionary.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/optimizers/__pycache__/global_optimizer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/global_optimizer.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/optimizers/__pycache__/meta_optimizer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/meta_optimizer.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/optimizers/__pycache__/multi_step.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/multi_step.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/optimizers/__pycache__/natural_gradient.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/natural_gradient.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/optimizers/__pycache__/optimized_step.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/optimized_step.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/optimizers/__pycache__/optimizer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/optimizer.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/optimizers/__pycache__/synchronization.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/synchronization.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/optimizers/__pycache__/tf_optimizer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/__pycache__/tf_optimizer.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/optimizers/global_optimizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import print_function 18 | from __future__ import division 19 | 20 | import tensorflow as tf 21 | 22 | from tensorforce import util 23 | from tensorforce.core.optimizers import MetaOptimizer 24 | 25 | 26 | class GlobalOptimizer(MetaOptimizer): 27 | """ 28 | The global optimizer applies an optimizer to the local variables. In addition, it also 29 | applies the update a corresponding set of global variables and subsequently updates the local 30 | variables to the value of these global variables. 31 | Note: This is used for the current distributed mode, and will likely change with the next 32 | major version update. 33 | """ 34 | 35 | def __init__(self, optimizer, summaries=None, summary_labels=None): 36 | """ 37 | Creates a new global optimizer instance. 38 | 39 | Args: 40 | optimizer: The optimizer which is modified by this meta optimizer. 41 | """ 42 | super(GlobalOptimizer, self).__init__( 43 | optimizer=optimizer, 44 | summaries=summaries, 45 | summary_labels=summary_labels 46 | ) 47 | 48 | def tf_step(self, time, variables, global_variables, **kwargs): 49 | """ 50 | Creates the TensorFlow operations for performing an optimization step. 51 | 52 | Args: 53 | time: Time tensor. 54 | variables: List of variables to optimize. 55 | global_variables: List of global variables to apply the proposed optimization step to. 56 | **kwargs: ??? coming soon 57 | 58 | Returns: 59 | List of delta tensors corresponding to the updates for each optimized variable. 60 | """ 61 | assert all(util.shape(global_var) == util.shape(local_var) for global_var, local_var 62 | in zip(global_variables, variables)) 63 | 64 | local_deltas = self.optimizer.step(time=time, variables=variables, **kwargs) 65 | 66 | with tf.control_dependencies(control_inputs=local_deltas): 67 | applied = self.optimizer.apply_step(variables=global_variables, deltas=local_deltas) 68 | 69 | with tf.control_dependencies(control_inputs=(applied,)): 70 | update_deltas = list() 71 | for global_var, local_var in zip(global_variables, variables): 72 | delta = global_var - local_var 73 | update_deltas.append(delta) 74 | 75 | applied = self.apply_step(variables=variables, deltas=update_deltas) 76 | 77 | # TODO: Update time, episode, etc (like in Synchronization)? 78 | 79 | with tf.control_dependencies(control_inputs=(applied,)): 80 | return [local_delta + update_delta for local_delta, update_delta in zip(local_deltas, update_deltas)] 81 | -------------------------------------------------------------------------------- /tensorforce/core/optimizers/lr_decay/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/lr_decay/__init__.py -------------------------------------------------------------------------------- /tensorforce/core/optimizers/lr_decay/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/lr_decay/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/optimizers/lr_decay/__pycache__/tf_schedules.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/optimizers/lr_decay/__pycache__/tf_schedules.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/optimizers/lr_decay/tf_schedules.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | desc: learning rate decayer. 5 | created: 2017.12.27 6 | @author: sam.dm 7 | """ 8 | 9 | import tensorflow as tf 10 | from tensorforce import util, TensorforceError 11 | 12 | 13 | def from_spec(spec, kwargs=None): 14 | lr_schedule = util.get_object( 15 | obj=spec, 16 | predefined_objects=lr_schedulers, 17 | kwargs=kwargs 18 | ) 19 | assert isinstance(lr_schedule, DecaySchedule) 20 | 21 | return lr_schedule 22 | 23 | 24 | def add_lr_decay(spec, global_step, kwargs=None): 25 | """ 26 | Creates an learning rate decayed instance from a optimizer specification dict. 27 | """ 28 | 29 | def parse_decay_conf(optimizer_spec, global_step): 30 | lr = optimizer_spec['learning_rate'] 31 | lr_schedule = optimizer_spec['lr_schedule'] 32 | if lr_schedule is None: 33 | del optimizer_spec['lr_schedule'] 34 | return optimizer_spec 35 | lr_schedule['global_step'] = global_step 36 | lr_decay_obj = from_spec(lr_schedule) 37 | optimizer_spec['learning_rate'] = lr_decay_obj(value=lr) 38 | pop_value = optimizer_spec.pop('lr_schedule', None) 39 | return optimizer_spec 40 | 41 | if 'optimizer' in spec: 42 | optimizer_spec = spec['optimizer'] 43 | if 'learning_rate' in optimizer_spec and 'lr_schedule' in optimizer_spec: 44 | spec['optimizer'] = parse_decay_conf(optimizer_spec, global_step) 45 | 46 | elif 'learning_rate' in spec and 'lr_schedule' in spec: 47 | spec = parse_decay_conf(spec, global_step) 48 | 49 | return spec 50 | 51 | class DecaySchedule(object): 52 | 53 | def __call(self, value): 54 | 55 | raise NotImplementedError() 56 | 57 | class Constant(DecaySchedule): 58 | def __init__(self, global_step=None): 59 | """ 60 | decayed_value = value 61 | """ 62 | 63 | self._global_step = global_step 64 | 65 | def __call__(self, value): 66 | 67 | return value 68 | 69 | 70 | class TFExponentialDecay(DecaySchedule): 71 | def __init__(self, global_step, decay_steps=20000, decay_rate=0.96, staircase=False): 72 | """ 73 | decayed_value = value * decay_rate ^ (global_step / decay_steps) 74 | """ 75 | 76 | self._global_step = global_step 77 | self._decay_steps = decay_steps 78 | self._decay_rate = decay_rate 79 | self._staircase = staircase 80 | 81 | def __call__(self, value): 82 | 83 | decayed_value = tf.train.exponential_decay(value, self._global_step, 84 | self._decay_steps, self._decay_rate, self._staircase) 85 | return decayed_value 86 | 87 | 88 | class TFInverseTimeDecay(DecaySchedule): 89 | def __init__(self, global_step, decay_steps=20000, decay_rate=0.96, staircase=False): 90 | """ 91 | decayed_value = value / (1 + decay_rate * t) 92 | """ 93 | 94 | self._global_step = global_step 95 | self._decay_steps = decay_steps 96 | self._decay_rate = decay_rate 97 | self._staircase = staircase 98 | 99 | def __call__(self, value): 100 | 101 | decayed_value = tf.train.inverse_time_decay(value, self._global_step, 102 | self._decay_steps, self._decay_rate, self._staircase) 103 | return decayed_value 104 | 105 | 106 | class TFNaturalExpDecay(DecaySchedule): 107 | def __init__(self, global_step, decay_steps=20000, decay_rate=0.96, staircase=False): 108 | """ 109 | decayed_value = value * exp(-decay_rate * (global_step / decay_steps)) 110 | """ 111 | 112 | self._global_step = global_step 113 | self._decay_steps = decay_steps 114 | self._decay_rate = decay_rate 115 | self._staircase = staircase 116 | 117 | def __call__(self, value): 118 | 119 | decayed_value = tf.train.natural_exp_decay(value, self._global_step, 120 | self._decay_steps, self._decay_rate, self._staircase) 121 | return decayed_value 122 | 123 | 124 | class TFPolynomialDecay(DecaySchedule): 125 | def __init__(self, global_step, decay_steps=20000, final_value=0.0001, 126 | power=1.0, cycle=False): 127 | """ 128 | global_step = min(global_step, decay_steps) 129 | decayed_final_value = (final_value - final_value) * 130 | (1 - global_step / decay_steps) ^ (power) + 131 | final_value 132 | """ 133 | 134 | self._global_step = global_step 135 | self._decay_steps = decay_steps 136 | self._final_value = final_value 137 | self._power = power 138 | self._cycle = cycle 139 | 140 | def __call__(self, value): 141 | decayed_value = tf.train.polynomial_decay(value, self._global_step, 142 | self._decay_steps, self._final_value, 143 | self._power, self._cycle) 144 | return decayed_value 145 | 146 | class LinearDecay(DecaySchedule): 147 | def __init__(self, global_step, max_decay_steps=20000, final_value=0.0001): 148 | """ 149 | decayed_value = init_value + (global_step / max_decay_steps) * ( 150 | init_value - final_value) 151 | """ 152 | 153 | self._global_step = global_step 154 | self._max_decay_steps = tf.constant(value=max_decay_steps, dtype=tf.int32) 155 | self._final_value = final_value 156 | self._first_pass = True 157 | 158 | def __call__(self, value): 159 | if self._first_pass: 160 | self._init_value = value 161 | self._first_pass = False 162 | 163 | self.fraction = tf.minimum(tf.divide(self._global_step, self._max_decay_steps), 1.0) 164 | 165 | return self._init_value + tf.multiply(self.fraction, self._final_value - self._init_value) 166 | 167 | 168 | lr_schedulers = { 169 | "constant": Constant, 170 | "exp_decay": TFExponentialDecay, 171 | "natural_exp_decay": TFNaturalExpDecay, 172 | "inverse_time_decay": TFInverseTimeDecay, 173 | "polynomial_decay": TFPolynomialDecay, 174 | "linear_decay": LinearDecay 175 | } 176 | -------------------------------------------------------------------------------- /tensorforce/core/optimizers/meta_optimizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import print_function 18 | from __future__ import division 19 | 20 | from tensorforce.core.optimizers import Optimizer 21 | 22 | 23 | class MetaOptimizer(Optimizer): 24 | """ 25 | A meta optimizer takes the optimization implemented by another optimizer and 26 | modifies/optimizes its proposed result. For example, line search might be applied to find a 27 | more optimal step size. 28 | """ 29 | 30 | def __init__(self, optimizer, **kwargs): 31 | """ 32 | Creates a new meta optimizer instance. 33 | 34 | Args: 35 | optimizer: The optimizer which is modified by this meta optimizer. 36 | """ 37 | super(MetaOptimizer, self).__init__(**kwargs) 38 | 39 | self.optimizer = Optimizer.from_spec(spec=optimizer, kwargs=kwargs) 40 | 41 | def get_variables(self): 42 | return super(MetaOptimizer, self).get_variables() + self.optimizer.get_variables() 43 | -------------------------------------------------------------------------------- /tensorforce/core/optimizers/optimizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import print_function 18 | from __future__ import division 19 | 20 | import tensorflow as tf 21 | 22 | from tensorforce import util, TensorforceError 23 | import tensorforce.core.optimizers 24 | 25 | 26 | class Optimizer(object): 27 | """ 28 | Generic TensorFlow optimizer which minimizes a not yet further specified expression, usually 29 | some kind of loss function. More generally, an optimizer can be considered as some method of 30 | updating a set of variables. 31 | """ 32 | 33 | def __init__(self, summaries=None, summary_labels=None): 34 | """ 35 | Creates a new optimizer instance. 36 | """ 37 | self.variables = dict() 38 | self.summaries = summaries 39 | if summary_labels is None: 40 | self.summary_labels = dict() 41 | else: 42 | self.summary_labels = summary_labels 43 | 44 | def custom_getter(getter, name, registered=False, **kwargs): 45 | variable = getter(name=name, registered=True, **kwargs) 46 | if not registered: 47 | assert kwargs.get('trainable', False) 48 | self.variables[name] = variable 49 | return variable 50 | 51 | # TensorFlow function 52 | self.step = tf.make_template( 53 | name_='step', 54 | func_=self.tf_step, 55 | custom_getter=custom_getter 56 | ) 57 | 58 | def tf_step(self, time, variables, **kwargs): 59 | """ 60 | Creates the TensorFlow operations for performing an optimization step. 61 | 62 | Args: 63 | time: Time tensor. 64 | variables: List of variables to optimize. 65 | **kwargs: Additional arguments depending on the specific optimizer implementation. 66 | For instance, often includes `fn_loss` if a loss function is optimized. 67 | 68 | Returns: 69 | List of delta tensors corresponding to the updates for each optimized variable. 70 | """ 71 | raise NotImplementedError 72 | 73 | def minimize(self, time, variables, **kwargs): 74 | """ 75 | Performs an optimization step. 76 | 77 | Args: 78 | time: Time tensor. 79 | variables: List of variables to optimize. 80 | **kwargs: Additional optimizer-specific arguments. The following arguments are used 81 | by some optimizers: 82 | - fn_loss: A callable returning the loss of the current model. 83 | - fn_kl_divergence: A callable returning the KL-divergence relative to the 84 | current model. 85 | - return_estimated_improvement: Returns the estimated improvement resulting from 86 | the natural gradient calculation if true. 87 | - fn_reference: A callable returning the reference values necessary for comparison. 88 | - fn_compare: A callable comparing the current model to the reference model given 89 | by its values. 90 | - source_variables: List of source variables to synchronize with. 91 | - global_variables: List of global variables to apply the proposed optimization 92 | step to. 93 | 94 | 95 | Returns: 96 | The optimization operation. 97 | """ 98 | # Add training variable gradient histograms/scalars to summary output 99 | #if 'gradients' in self.summary_labels: 100 | if any(k in self.summary_labels for k in ['gradients', 'gradients_histogram', 'gradients_scalar']): 101 | valid = True 102 | if isinstance(self, tensorforce.core.optimizers.TFOptimizer): 103 | gradients = self.optimizer.compute_gradients(kwargs['fn_loss']()) 104 | elif isinstance(self.optimizer, tensorforce.core.optimizers.TFOptimizer): 105 | ## This section handles "Multi_step" and may handle others 106 | # if failure is found, add another elif to handle that case 107 | gradients = self.optimizer.optimizer.compute_gradients(kwargs['fn_loss']()) 108 | else: 109 | # Didn't find proper gradient information 110 | valid = False 111 | 112 | # Valid gradient data found, create summary data items 113 | if valid: 114 | for grad, var in gradients: 115 | if grad is not None: 116 | if any(k in self.summary_labels for k in ['gradients','gradients_scalar']): 117 | axes = list(range(len(grad.shape))) 118 | mean, var = tf.nn.moments(grad,axes) 119 | summary = tf.summary.scalar(name='gradients/' + var.name+ "/mean", tensor=mean) 120 | self.summaries.append(summary) 121 | summary = tf.summary.scalar(name='gradients/' + var.name+ "/variance", tensor=var) 122 | self.summaries.append(summary) 123 | if any(k in self.summary_labels for k in ['gradients', 'gradients_histogram']): 124 | summary = tf.summary.histogram(name='gradients/' + var.name, values=grad) 125 | self.summaries.append(summary) 126 | 127 | deltas = self.step(time=time, variables=variables, **kwargs) 128 | with tf.control_dependencies(control_inputs=deltas): 129 | return tf.no_op() 130 | 131 | def get_variables(self): 132 | """ 133 | Returns the TensorFlow variables used by the optimizer. 134 | 135 | Returns: 136 | List of variables. 137 | """ 138 | return [self.variables[key] for key in sorted(self.variables)] 139 | 140 | @staticmethod 141 | def from_spec(spec, kwargs=None): 142 | """ 143 | Creates an optimizer from a specification dict. 144 | """ 145 | optimizer = util.get_object( 146 | obj=spec, 147 | predefined_objects=tensorforce.core.optimizers.optimizers, 148 | kwargs=kwargs 149 | ) 150 | assert isinstance(optimizer, Optimizer) 151 | return optimizer 152 | 153 | def apply_step(self, variables, deltas): 154 | """ 155 | Applies step deltas to variable values. 156 | 157 | Args: 158 | variables: List of variables. 159 | deltas: List of deltas of same length. 160 | 161 | Returns: 162 | The step-applied operation. 163 | """ 164 | if len(variables) != len(deltas): 165 | raise TensorforceError("Invalid variables and deltas lists.") 166 | return tf.group(*(variable.assign_add(delta=delta) for variable, delta in zip(variables, deltas))) 167 | -------------------------------------------------------------------------------- /tensorforce/core/optimizers/tf_optimizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import print_function 18 | from __future__ import division 19 | 20 | import tensorflow as tf 21 | 22 | from tensorforce.core.optimizers import Optimizer 23 | 24 | 25 | class TFOptimizer(Optimizer): 26 | """ 27 | Wrapper class for TensorFlow optimizers. 28 | """ 29 | 30 | tf_optimizers = dict( 31 | adadelta=tf.train.AdadeltaOptimizer, 32 | adagrad=tf.train.AdagradOptimizer, 33 | adam=tf.train.AdamOptimizer, 34 | nadam=tf.contrib.opt.NadamOptimizer, 35 | gradient_descent=tf.train.GradientDescentOptimizer, 36 | momentum=tf.train.MomentumOptimizer, 37 | rmsprop=tf.train.RMSPropOptimizer 38 | ) 39 | 40 | @staticmethod 41 | def get_wrapper(optimizer): 42 | """ 43 | Returns a TFOptimizer constructor callable for the given optimizer name. 44 | 45 | Args: 46 | optimizer: The name of the optimizer, one of 'adadelta', 'adagrad', 'adam', 'nadam', 47 | 'gradient_descent', 'momentum', 'rmsprop'. 48 | 49 | Returns: 50 | The TFOptimizer constructor callable. 51 | """ 52 | def wrapper(**kwargs): 53 | return TFOptimizer(optimizer=optimizer, **kwargs) 54 | return wrapper 55 | 56 | def __init__(self, optimizer, summaries=None, summary_labels=None, **kwargs): 57 | """ 58 | Creates a new optimizer instance of a TensorFlow optimizer. 59 | 60 | Args: 61 | optimizer: The name of the optimizer, one of 'adadelta', 'adagrad', 'adam', 'nadam', 62 | 'gradient_descent', 'momentum', 'rmsprop'. 63 | **kwargs: Additional arguments passed on to the TensorFlow optimizer constructor. 64 | """ 65 | super(TFOptimizer, self).__init__(summaries=summaries, summary_labels=summary_labels) 66 | 67 | self.name = optimizer 68 | self.optimizer = TFOptimizer.tf_optimizers[optimizer](**kwargs) 69 | 70 | def tf_step(self, time, variables, fn_loss, **kwargs): 71 | """ 72 | Creates the TensorFlow operations for performing an optimization step. 73 | 74 | Args: 75 | time: Time tensor. 76 | variables: List of variables to optimize. 77 | fn_loss: A callable returning the loss of the current model. 78 | gradients: Gradients for update the variables when no fn_loss be given. 79 | **kwargs: Additional arguments, not used. 80 | 81 | Returns: 82 | List of delta tensors corresponding to the updates for each optimized variable. 83 | """ 84 | if fn_loss is not None: 85 | loss = fn_loss() 86 | else: 87 | gradients = kwargs.get("gradients", None) 88 | assert gradients is not None and len(gradients) == len(variables) 89 | loss = tf.no_op() 90 | 91 | with tf.control_dependencies(control_inputs=(loss,)): 92 | # Trivial operation to enforce control dependency 93 | vars_before = [var + 0.0 for var in variables] 94 | 95 | with tf.control_dependencies(control_inputs=vars_before): 96 | if fn_loss is not None: 97 | applied = self.optimizer.minimize(loss=loss, var_list=variables) 98 | else: 99 | applied = self.optimizer.apply_gradients(zip(gradients, variables)) 100 | 101 | with tf.control_dependencies(control_inputs=(applied,)): 102 | return [var - var_before for var, var_before in zip(variables, vars_before)] 103 | 104 | def get_variables(self): 105 | optimizer_variables = super(TFOptimizer, self).get_variables() 106 | 107 | slots_variables = [ 108 | self.optimizer._slots[slot][key] 109 | for slot in sorted(self.optimizer._slots) 110 | for key in sorted(self.optimizer._slots[slot]) 111 | ] 112 | 113 | if self.name in ('adam', 'nadam'): 114 | additional_variables = [self.optimizer._beta1_power, self.optimizer._beta2_power] 115 | else: 116 | additional_variables = list() 117 | 118 | return optimizer_variables + slots_variables + additional_variables 119 | -------------------------------------------------------------------------------- /tensorforce/core/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from tensorforce.core.preprocessing.preprocessor import Preprocessor 17 | from tensorforce.core.preprocessing.standardize import Standardize 18 | from tensorforce.core.preprocessing.preprocessor_stack import PreprocessorStack 19 | 20 | 21 | preprocessors = dict( 22 | standardize=Standardize, 23 | ) 24 | 25 | 26 | __all__ = [ 27 | 'Preprocessor', 28 | 'Standardize', 29 | 'PreprocessorStack', 30 | 'preprocessors' 31 | ] 32 | -------------------------------------------------------------------------------- /tensorforce/core/preprocessing/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/preprocessing/__pycache__/clip.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/clip.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/preprocessing/__pycache__/divide.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/divide.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/preprocessing/__pycache__/grayscale.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/grayscale.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/preprocessing/__pycache__/image_resize.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/image_resize.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/preprocessing/__pycache__/normalize.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/normalize.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/preprocessing/__pycache__/preprocessor.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/preprocessor.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/preprocessing/__pycache__/preprocessor_stack.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/preprocessor_stack.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/preprocessing/__pycache__/running_standardize.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/running_standardize.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/preprocessing/__pycache__/standardize.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/core/preprocessing/__pycache__/standardize.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/core/preprocessing/preprocessor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import print_function 18 | from __future__ import division 19 | import tensorflow as tf 20 | 21 | 22 | class Preprocessor(object): 23 | 24 | def __init__(self, scope='preprocessor', summary_labels=None): 25 | self.summary_labels = set(summary_labels or ()) 26 | self.variables = dict() 27 | self.summaries = list() 28 | 29 | def custom_getter(getter, name, registered=False, **kwargs): 30 | variable = getter(name=name, registered=True, **kwargs) 31 | if not registered: 32 | self.variables[name] = variable 33 | return variable 34 | 35 | self.process = tf.make_template( 36 | name_=(scope + '/process'), 37 | func_=self.tf_process, 38 | custom_getter_=custom_getter 39 | ) 40 | 41 | def reset(self): 42 | pass 43 | 44 | def tf_process(self, tensor): 45 | """ 46 | Process state. 47 | 48 | Args: 49 | tensor: tensor to process. 50 | 51 | Returns: processed tensor. 52 | """ 53 | return tensor 54 | 55 | def processed_shape(self, shape): 56 | """ 57 | Shape of preprocessed state given original shape. 58 | 59 | Args: 60 | shape: original shape. 61 | 62 | Returns: processed tensor shape 63 | """ 64 | return shape 65 | 66 | def get_variables(self): 67 | """ 68 | Returns the TensorFlow variables used by the preprocessor. 69 | 70 | Returns: 71 | List of variables. 72 | """ 73 | return [self.variables[key] for key in sorted(self.variables)] 74 | -------------------------------------------------------------------------------- /tensorforce/core/preprocessing/preprocessor_stack.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import print_function 18 | from __future__ import division 19 | 20 | from tensorforce import util 21 | from tensorforce.core.preprocessing import Preprocessor 22 | import tensorforce.core.preprocessing 23 | 24 | 25 | class PreprocessorStack(object): 26 | 27 | def __init__(self): 28 | self.preprocessors = list() 29 | 30 | def reset(self): 31 | for processor in self.preprocessors: 32 | processor.reset() 33 | 34 | def process(self, tensor): 35 | """ 36 | Process state. 37 | 38 | Args: 39 | tensor: tensor to process 40 | 41 | Returns: processed state 42 | 43 | """ 44 | for processor in self.preprocessors: 45 | tensor = processor.process(tensor=tensor) 46 | return tensor 47 | 48 | def processed_shape(self, shape): 49 | """ 50 | Shape of preprocessed state given original shape. 51 | 52 | Args: 53 | shape: original state shape 54 | 55 | Returns: processed state shape 56 | """ 57 | for processor in self.preprocessors: 58 | shape = processor.processed_shape(shape=shape) 59 | return shape 60 | 61 | def get_variables(self): 62 | return [variable for preprocessor in self.preprocessors for variable in preprocessor.get_variables()] 63 | 64 | @staticmethod 65 | def from_spec(spec): 66 | """ 67 | Creates a preprocessing stack from a specification dict. 68 | """ 69 | if isinstance(spec, dict): 70 | spec = [spec] 71 | 72 | stack = PreprocessorStack() 73 | for spec in spec: 74 | preprocessor = util.get_object( 75 | obj=spec, 76 | predefined_objects=tensorforce.core.preprocessing.preprocessors 77 | ) 78 | assert isinstance(preprocessor, Preprocessor) 79 | stack.preprocessors.append(preprocessor) 80 | 81 | return stack 82 | -------------------------------------------------------------------------------- /tensorforce/core/preprocessing/standardize.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import numpy as np 21 | import tensorflow as tf 22 | 23 | from tensorforce import util 24 | from tensorforce.core.preprocessing import Preprocessor 25 | 26 | 27 | class Standardize(Preprocessor): 28 | """ 29 | Standardize state. Subtract mean and divide by standard deviation. 30 | """ 31 | 32 | def __init__(self, mean=None, var=None, across_batch=False, scope='standardize', summary_labels=()): 33 | self.across_batch = across_batch 34 | self.mean = mean 35 | self.var = var 36 | 37 | super(Standardize, self).__init__(scope=scope, summary_labels=summary_labels) 38 | 39 | def tf_process(self, tensor): 40 | if self.mean is not None and self.var is not None: 41 | return (tensor - self.mean) / (self.var + util.epsilon) 42 | 43 | if self.across_batch: 44 | axes = tuple(range(util.rank(tensor))) 45 | else: 46 | axes = tuple(range(1, util.rank(tensor))) 47 | 48 | mean, variance = tf.nn.moments(x=tensor, axes=axes, keep_dims=True) 49 | return (tensor - mean) / tf.maximum(x=tf.sqrt(variance), y=util.epsilon) 50 | -------------------------------------------------------------------------------- /tensorforce/environments/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | from tensorforce.environments.environment import Environment 18 | from tensorforce.environments.meta_environment import MetaEnvironment 19 | from tensorforce.environments.classic_control import CartPole 20 | from tensorforce.environments.classic_control import Pendulum 21 | import six 22 | 23 | environments = dict( 24 | cart_pole=CartPole, 25 | pendulum=Pendulum 26 | ) 27 | 28 | __all__ = ['Environment', 'MetaEnvironment', 'Pendulum', 'CartPole'] -------------------------------------------------------------------------------- /tensorforce/environments/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/environments/__pycache__/environment.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/__pycache__/environment.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/environments/__pycache__/gym_environment.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/__pycache__/gym_environment.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/environments/__pycache__/meta_environment.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/__pycache__/meta_environment.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/environments/__pycache__/minimal_test.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/__pycache__/minimal_test.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/environments/__pycache__/oss_environment.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/__pycache__/oss_environment.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/environments/__pycache__/table_environment.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/__pycache__/table_environment.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/environments/classic_control/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | """ 4 | desc: classic control environments. 5 | create: 2017.12.19 6 | modified by @sam.dm 7 | """ 8 | 9 | 10 | from tensorforce.environments.classic_control.cart_pole import CartPole 11 | from tensorforce.environments.classic_control.pendulum import Pendulum 12 | 13 | environments = dict( 14 | cart_pole=CartPole, 15 | pendulum=Pendulum, 16 | ) 17 | 18 | __all__ = ['Pendulum', 'CartPole'] 19 | -------------------------------------------------------------------------------- /tensorforce/environments/classic_control/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/classic_control/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/environments/classic_control/__pycache__/cart_pole.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/classic_control/__pycache__/cart_pole.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/environments/classic_control/__pycache__/pendulum.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/environments/classic_control/__pycache__/pendulum.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/environments/classic_control/cart_pole.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | """ 4 | desc: classic cart-pole. 5 | create: 2017.12.11 6 | author: @sam.dm 7 | """ 8 | 9 | 10 | import math 11 | import numpy as np 12 | import tensorforce.core.lib.env_seeding as seeding 13 | from tensorforce.environments import Environment 14 | 15 | 16 | class CartPole(Environment): 17 | 18 | def __init__(self): 19 | self.gravity = 9.8 20 | self.masscart = 1.0 21 | self.masspole = 0.1 22 | self.total_mass = (self.masspole + self.masscart) 23 | self.length = 0.5 24 | self.polemass_length = (self.masspole * self.length) 25 | self.force_mag = 10.0 26 | self.tau = 0.02 27 | 28 | # Angle at which to fail the episode 29 | self.theta_threshold_radians = 12 * 2 * math.pi / 360 30 | self.x_threshold = 2.4 31 | self.high = np.array([ 32 | self.x_threshold * 2, 33 | np.finfo(np.float32).max, 34 | self.theta_threshold_radians * 2, 35 | np.finfo(np.float32).max]) 36 | 37 | self.seed() 38 | self.state = None 39 | self.steps_beyond_done = None 40 | 41 | def __str__(self): 42 | return "CartPole" 43 | 44 | def seed(self, seed=None): 45 | self.np_random, seed = seeding.np_random(seed) 46 | return [seed] 47 | 48 | def execute(self, actions): 49 | assert self._action_contains(actions), "%r (%s) invalid"%(actions, type(actions)) 50 | state = self.state 51 | x, x_dot, theta, theta_dot = state 52 | force = self.force_mag if actions==1 else -self.force_mag 53 | costheta = math.cos(theta) 54 | sintheta = math.sin(theta) 55 | temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass 56 | thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass)) 57 | xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass 58 | x = x + self.tau * x_dot 59 | x_dot = x_dot + self.tau * xacc 60 | theta = theta + self.tau * theta_dot 61 | theta_dot = theta_dot + self.tau * thetaacc 62 | self.state = (x,x_dot,theta,theta_dot) 63 | done = x < -self.x_threshold \ 64 | or x > self.x_threshold \ 65 | or theta < -self.theta_threshold_radians \ 66 | or theta > self.theta_threshold_radians 67 | done = bool(done) 68 | 69 | if not done: 70 | reward = 1.0 71 | elif self.steps_beyond_done is None: 72 | # Pole just fell! 73 | self.steps_beyond_done = 0 74 | reward = 1.0 75 | else: 76 | if self.steps_beyond_done == 0: 77 | print("You are calling 'step()' even though this environment \ 78 | has already returned done = True. You should always call \ 79 | 'reset()' once you receive 'done = True' -- any further \ 80 | steps are undefined behavior.") 81 | self.steps_beyond_done += 1 82 | reward = 0.0 83 | 84 | return np.array(self.state), done, reward 85 | 86 | def reset(self): 87 | self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,)) 88 | self.steps_beyond_done = None 89 | return np.array(self.state) 90 | 91 | def close(self): 92 | self.state = None 93 | self.steps_beyond_done = None 94 | 95 | def _state_contains(self, state): 96 | cons = [np.abs(x)<=y for x,y,z in zip(state, self.high)] 97 | 98 | return all(cons) 99 | 100 | def _action_contains(self, action): 101 | cons = action>=0 and action < 2 102 | 103 | return cons 104 | 105 | @property 106 | def state_space(self): 107 | state = dict(shape=4, type='float') 108 | 109 | return state 110 | 111 | @property 112 | def action_space(self): 113 | action = dict(type='int', num_actions=2) 114 | 115 | return action 116 | -------------------------------------------------------------------------------- /tensorforce/environments/classic_control/pendulum.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | desc : the pendulum emulator 5 | create: 2017.12.11 6 | @author: sam.dm 7 | """ 8 | 9 | import numpy as np 10 | import tensorforce.core.lib.env_seeding as seeding 11 | from tensorforce.environments import Environment 12 | 13 | class Pendulum(Environment): 14 | def __init__(self): 15 | self.max_speed = 8 16 | self.max_torque = 2.0 17 | self.dt = 0.05 18 | 19 | self.high = np.array([1., 1., self.max_speed]) 20 | 21 | self.seed() 22 | 23 | def __str__(self): 24 | return "Pendulum" 25 | 26 | def seed(self, seed=None): 27 | self.np_random, seed = seeding.np_random(seed) 28 | return [seed] 29 | 30 | def execute(self,actions): 31 | th, thdot = self.state # th := theta 32 | 33 | g, m, l = 10.0, 1.0, 1.0 34 | dt = self.dt 35 | 36 | action = np.clip(actions, -self.max_torque, self.max_torque)[0] 37 | costs = self._angle_normalize(th)**2 + .1*thdot**2 + .001*(action**2) 38 | 39 | newthdot = thdot + (-3*g/(2*l) * np.sin(th + np.pi) + 3./(m*l**2)*action) * dt 40 | newth = th + newthdot*dt 41 | newthdot = np.clip(newthdot, -self.max_speed, self.max_speed) 42 | 43 | self.state = np.array([newth, newthdot]) 44 | return self._get_obs(), False, -costs 45 | 46 | def reset(self): 47 | high = np.array([np.pi, 1]) 48 | self.state = self.np_random.uniform(low=-high, high=high) 49 | return self._get_obs() 50 | 51 | def _get_obs(self): 52 | theta, thetadot = self.state 53 | return np.array([np.cos(theta), np.sin(theta), thetadot]) 54 | 55 | def _angle_normalize(self, x): 56 | return (((x+np.pi) % (2*np.pi)) - np.pi) 57 | 58 | @property 59 | def state_space(self): 60 | state = dict(shape=3, type='float') 61 | 62 | return state 63 | 64 | @property 65 | def action_space(self): 66 | action = dict(type='float', min_value=-self.max_torque, max_value=self.max_torque) 67 | 68 | return action 69 | 70 | def state_contains(self, state): 71 | cons = [np.abs(x)<=y for x,y,z in zip(state, self.high)] 72 | return all(cons) 73 | 74 | def action_contains(self, action): 75 | cons = np.abs(action[0]) <= self.max_torque 76 | 77 | return cons 78 | 79 | def close(self): 80 | self.state = None 81 | -------------------------------------------------------------------------------- /tensorforce/environments/environment.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | from __future__ import absolute_import 18 | from __future__ import print_function 19 | from __future__ import division 20 | 21 | 22 | class Environment(object): 23 | """ 24 | Base environment class. 25 | """ 26 | 27 | def __str__(self): 28 | raise NotImplementedError 29 | 30 | def close(self): 31 | """ 32 | Close environment. No other method calls possible afterwards. 33 | """ 34 | pass 35 | 36 | def seed(self, seed): 37 | """ 38 | Sets the random seed of the environment to the given value (current time, if seed=None). 39 | Naturally deterministic Environments (e.g. ALE or some gym Envs) don't have to implement this method. 40 | Args: 41 | seed (int): The seed to use for initializing the pseudo-random number generator (default=epoch time in sec). 42 | Returns: The actual seed (int) used OR None if Environment did not override this method (no seeding supported). 43 | """ 44 | return None 45 | 46 | def reset(self): 47 | """ 48 | Reset environment and setup for new episode. 49 | 50 | Returns: 51 | initial state of reset environment. 52 | """ 53 | raise NotImplementedError 54 | 55 | def execute(self, actions): 56 | """ 57 | Executes action, observes next state(s) and reward. 58 | 59 | Args: 60 | actions: Actions to execute. 61 | 62 | Returns: 63 | (Dict of) next state(s), boolean indicating terminal, and reward signal. 64 | """ 65 | raise NotImplementedError 66 | 67 | @property 68 | def states(self): 69 | """ 70 | Return the state space. Might include subdicts if multiple states are available simultaneously. 71 | 72 | Returns: dict of state properties (shape and type). 73 | 74 | """ 75 | raise NotImplementedError 76 | 77 | @property 78 | def actions(self): 79 | """ 80 | Return the action space. Might include subdicts if multiple actions are available simultaneously. 81 | 82 | Returns: dict of action properties (continuous, number of actions) 83 | 84 | """ 85 | raise NotImplementedError 86 | -------------------------------------------------------------------------------- /tensorforce/environments/meta_environment.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import print_function 18 | from __future__ import division 19 | 20 | from tensorforce.environments.environment import Environment 21 | from tensorforce.exception import TensorforceError 22 | 23 | 24 | class MetaEnvironment(Environment): 25 | """ 26 | Base class for unified IO interface 27 | """ 28 | 29 | def __init__(self, config): 30 | super(MetaEnvironment, self).__init__() 31 | self._parse(config) 32 | 33 | def _parse(self, config): 34 | """ 35 | Base class for configuration parsing 36 | """ 37 | # get the type of IO, 38 | # optional params are ('Table','DataHub','Gym','Universe','UserDef') 39 | if not 'env_type' in config: 40 | raise TensorforceError('can not find env_type in configuration') 41 | self.env_type = config['env_type'] 42 | 43 | if 'env' in config: 44 | self.env_conf = config['env'] 45 | else: 46 | raise TensorforceError('can not find env config') 47 | 48 | # whether task is in mode of interaction 49 | # default mode is non-interaction 50 | self.interactive = False 51 | if 'interactive' in self.env_conf: 52 | self.interactive = self.env_conf['interactive'] 53 | 54 | 55 | def parse_env_config(self): 56 | """ 57 | IO specific parsing function 58 | """ 59 | raise NotImplementedError() 60 | 61 | def get_input_tensor(self): 62 | """ 63 | Init a dict of single state_input tensor,action tensor,reward tensor 64 | or a dict of state_input tensor if multi-states are provided 65 | the return will be used to initialize the agent 66 | """ 67 | raise NotImplementedError() 68 | 69 | def read(self): 70 | """ 71 | Read a batch data for model update 72 | this method only be used in mode of non-interaction 73 | Call execute in mode of interaction 74 | """ 75 | raise NotImplementedError() 76 | 77 | def should_stop(self): 78 | """ 79 | In mode of non-interaction, 80 | should_stop() will be called in Runner.consumer() to determine whether to end the trianing loop 81 | this method only be used in mode of non-interaction 82 | """ 83 | raise NotImplementedError() -------------------------------------------------------------------------------- /tensorforce/exception.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | from __future__ import absolute_import 18 | from __future__ import print_function 19 | from __future__ import division 20 | 21 | 22 | class TensorforceError(Exception): 23 | """ 24 | tensorforce error 25 | """ 26 | pass 27 | -------------------------------------------------------------------------------- /tensorforce/meta_parameter_recorder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import inspect 17 | import os 18 | import numpy as np 19 | import tensorflow as tf 20 | from tensorforce import TensorforceError 21 | 22 | 23 | class MetaParameterRecorder(object): 24 | """ 25 | Class to record MetaParameters as well as Summary/Description for TensorBoard (TEXT & FILE will come later) 26 | 27 | #### General: 28 | 29 | * format_type: used to configure data conversion for TensorBoard=0, TEXT & JSON (not Implemented), etc 30 | """ 31 | 32 | def __init__(self, current_frame): 33 | """ 34 | Init the MetaPrameterRecord with "Agent" parameters by passing inspect.currentframe() from Agent Class 35 | 36 | The Init will search back to find the parent class to capture all passed parameters and store 37 | them in "self.meta_params". 38 | 39 | NOTE: Currently only optimized for TensorBoard output 40 | 41 | TODO: Add JSON Export, TEXT EXPORT 42 | 43 | Args: 44 | current_frame: frame value from class to obtain metaparameters[= inspect.currentframe()] 45 | 46 | """ 47 | self.ignore_unknown_dtypes = False 48 | self.meta_params = dict() 49 | self.method_calling = inspect.getframeinfo(current_frame)[2] 50 | 51 | _, _, __, self.vals_current = inspect.getargvalues(current_frame) 52 | # self is the class name of the frame involved 53 | if 'self' in self.vals_current: 54 | self.recorded_class_type = self.vals_current['self'] 55 | # Add explicit AgentName item so class can be deleted 56 | self.meta_params['AgentName'] = str(self.vals_current['self']) 57 | 58 | frame_list = inspect.getouterframes(current_frame) 59 | 60 | for frame in frame_list: 61 | # Rather than frame.frame (named tuple), use [0] for python2 62 | args, varargs, keywords, vals =inspect.getargvalues(frame[0]) 63 | if 'self' in vals: 64 | if self.recorded_class_type == vals['self']: 65 | for i in args: 66 | self.meta_params[i] = vals[i] 67 | # Remove the "CLASS" from the dictionary, has no value "AgentName" contains STR of Class 68 | del self.meta_params['self'] 69 | 70 | def merge_custom(self, custom_dict): 71 | if type(custom_dict) is not dict: 72 | raise TensorforceError( 73 | "Error: MetaParameterRecorder 'meta_dict' must be passed a dictionary " 74 | "but was passed a type {} which is not supported.".format(str(type(custom_dict))) 75 | ) 76 | for key in custom_dict: 77 | if key in self.meta_params: 78 | raise TensorforceError( 79 | "Error: MetaParameterRecorder 'meta_dict' key {} conflicts with internal key," 80 | " please change passed key.".format(str(key)) 81 | ) 82 | self.meta_params[key] = custom_dict[key] 83 | # This line assumes the merge data came from summary_spec['meta_dict'], remove this from summary_spec 84 | del self.meta_params['summary_spec']['meta_dict'] 85 | 86 | def text_output(self, format_type=1): 87 | print('======================= ' + self.meta_params['AgentName'] + ' ====================================') 88 | for key in self.meta_params: 89 | print( 90 | " ", 91 | key, 92 | type(self.meta_params[key]), 93 | "=", 94 | self.convert_data_to_string(self.meta_params[key], format_type=format_type) 95 | ) 96 | 97 | print('======================= ' + self.meta_params['AgentName'] + ' ====================================') 98 | 99 | def convert_dictionary_to_string(self, data, indent=0, format_type=0, separator=None, eol=None): 100 | data_string = "" 101 | add_separator = "" 102 | if eol is None: 103 | eol = os.linesep 104 | if separator is None: 105 | separator = ", " 106 | 107 | # This should not ever occur but here as a catch 108 | if type(data) is not dict: 109 | raise TensorforceError( 110 | "Error: MetaParameterRecorder Dictionary conversion was passed a type {}" 111 | " not supported.".format(str(type(data))) 112 | ) 113 | 114 | # TensorBoard 115 | if format_type == 0: 116 | label = "" 117 | div = "" 118 | 119 | if indent > 0: 120 | label = " | " 121 | div = "--- | " 122 | data_string += label + "Key | Value" + eol + div + "--- | ----" + eol 123 | 124 | for key in data: 125 | key_txt = key 126 | # TensorBoard 127 | if format_type == 0: 128 | key_txt = "**" + key + "**" 129 | key_value_sep = ' | ' 130 | if indent > 0: 131 | key_txt = " | " + key_txt 132 | 133 | converted_data = self.convert_data_to_string(data[key], separator=separator, indent=indent+1) 134 | data_string += add_separator + key_txt + key_value_sep + converted_data + eol 135 | 136 | return data_string 137 | 138 | def convert_list_to_string(self, data, indent=0, format_type=0, eol=None, count=True): 139 | data_string = "" 140 | if eol is None: 141 | eol = os.linesep 142 | 143 | # This should not ever occur but here as a catch 144 | if type(data) is not list: 145 | raise TensorforceError( 146 | "Error: MetaParameterRecorder List conversion was passed a type {}" 147 | " not supported.".format(str(type(data))) 148 | ) 149 | 150 | for index,line in enumerate(data): 151 | data_string_prefix = "" 152 | if count and indent == 0: 153 | data_string_prefix = str(index+1)+". " 154 | # TensorBoard 155 | if format_type == 0: 156 | # Only add indent for 2nd item and beyond as this is likely a dictionary entry 157 | if indent > 0 and index>0: 158 | data_string_prefix = " | "+data_string_prefix 159 | if index == (len(data)-1): 160 | append_eol = "" 161 | else: 162 | append_eol = eol 163 | data_string += data_string_prefix + self.convert_data_to_string(line, indent=indent+1) + append_eol 164 | 165 | return data_string 166 | 167 | def convert_ndarray_to_md(self, data, format_type=0, eol=None): 168 | data_string = "" 169 | data_string1 = "|Row|" 170 | data_string2 = "|:---:|" 171 | if eol is None: 172 | eol = os.linesep 173 | 174 | # This should not ever occur but here as a catch 175 | if type(data) is not np.ndarray: 176 | raise TensorforceError( 177 | "Error: MetaParameterRecorder ndarray conversion was passed" 178 | " a type {} not supported.".format(str(type(data))) 179 | ) 180 | 181 | shape = data.shape 182 | rank = data.ndim 183 | 184 | if rank == 2: 185 | for col in range(shape[1]): 186 | data_string1 += "Col-" + str(col) + "|" 187 | data_string2 += ":----:|" 188 | data_string += data_string1 + eol + data_string2 + eol 189 | 190 | for row in range(shape[0]): 191 | data_string += "|" + str(row) + "|" 192 | for col in range(shape[1]): 193 | data_string += str(data[row,col]) + "|" 194 | 195 | if row != (shape[0]-1): 196 | data_string += eol 197 | 198 | elif rank == 1: 199 | data_string += "|Row|Col-0|" + eol + "|:----:|:----:|" + eol 200 | 201 | for row in range(shape[0]): 202 | data_string += str(row) + "|" + str(data[row]) + "|" + eol 203 | 204 | return data_string 205 | 206 | def convert_data_to_string(self, data, indent=0, format_type=0, separator=None, eol=None): 207 | data_string = "" 208 | if type(data) is int: 209 | data_string = str(data) 210 | elif type(data) is float: 211 | data_string = str(data) 212 | elif type(data) is str: 213 | data_string = data 214 | elif type(data) is tuple: 215 | data_string = str(data) 216 | elif type(data) is list: 217 | data_string = self.convert_list_to_string(data, indent=indent, eol=eol) 218 | elif type(data) is bool: 219 | data_string = str(data) 220 | elif type(data) is dict: 221 | data_string = self.convert_dictionary_to_string(data, indent=indent, separator=separator) 222 | elif type(data) is np.ndarray: 223 | # TensorBoard 224 | if format_type == 0: 225 | data_string = self.convert_ndarray_to_md(data) 226 | else: 227 | data_string = str(data) 228 | elif data is None: 229 | data_string = "None" 230 | else: 231 | if not self.ignore_unknown_dtypes: 232 | data_string = "Error: MetaParameterRecorder Type conversion from type {} not supported.".\ 233 | format(str(type(data))) 234 | data_string += " ("+str(data)+") " 235 | else: 236 | # TensorBoard 237 | if format_type == 0: 238 | data_string = "**?**" 239 | 240 | return data_string 241 | 242 | def build_metagraph_list(self): 243 | """ 244 | Convert MetaParams into TF Summary Format and create summary_op 245 | 246 | Args: 247 | None 248 | 249 | Returns: 250 | Merged TF Op for TEXT summary elements, should only be executed once to reduce data duplication 251 | 252 | """ 253 | ops = [] 254 | 255 | self.ignore_unknown_dtypes = True 256 | for key in sorted(self.meta_params): 257 | value = self.convert_data_to_string(self.meta_params[key]) 258 | 259 | if len(value) == 0: 260 | continue 261 | if isinstance(value,str): 262 | ops.append(tf.summary.text(key, tf.convert_to_tensor(str(value)))) 263 | else: 264 | ops.append(tf.summary.text(key, tf.as_string(tf.convert_to_tensor(value)))) 265 | 266 | with tf.control_dependencies(tf.tuple(ops)): 267 | self.summary_merged = tf.summary.merge_all() 268 | 269 | return self.summary_merged 270 | -------------------------------------------------------------------------------- /tensorforce/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | from tensorforce.models.model import Model 16 | from tensorforce.models.deterministic_es_model import DeterministicESModel 17 | 18 | 19 | models = dict( 20 | deterministic_es_model=DeterministicESModel 21 | ) 22 | 23 | 24 | __all__ = [ 25 | 'Model', 26 | 'models', 27 | 'DeterministicESModel' 28 | ] 29 | -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/constant_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/constant_model.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/ddpg_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/ddpg_model.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/deterministic_es_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/deterministic_es_model.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/distribution_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/distribution_model.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/es_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/es_model.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/pg_log_prob_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/pg_log_prob_model.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/pg_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/pg_model.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/pg_prob_ratio_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/pg_prob_ratio_model.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/q_demo_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/q_demo_model.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/q_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/q_model.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/q_naf_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/q_naf_model.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/q_nstep_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/q_nstep_model.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/models/__pycache__/random_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rec-agent/rec-rl/40e52bd5819e3b4e0be66ed550ab073e289fad77/tensorforce/models/__pycache__/random_model.cpython-36.pyc -------------------------------------------------------------------------------- /tensorforce/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 reinforce.io. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import importlib 17 | import logging 18 | import numpy as np 19 | import tensorflow as tf 20 | from tensorflow.core.util.event_pb2 import SessionLog 21 | 22 | from tensorforce import TensorforceError 23 | 24 | 25 | epsilon = 1e-6 26 | 27 | 28 | log_levels = dict( 29 | info=logging.INFO, 30 | debug=logging.DEBUG, 31 | critical=logging.CRITICAL, 32 | warning=logging.WARNING, 33 | fatal=logging.FATAL 34 | ) 35 | 36 | 37 | def prod(xs): 38 | """Computes the product along the elements in an iterable. Returns 1 for empty iterable. 39 | 40 | Args: 41 | xs: Iterable containing numbers. 42 | 43 | Returns: Product along iterable. 44 | 45 | """ 46 | p = 1 47 | for x in xs: 48 | p *= x 49 | return p 50 | 51 | 52 | def rank(x): 53 | return x.get_shape().ndims 54 | 55 | 56 | def shape(x, unknown=-1): 57 | return tuple(unknown if dims is None else dims for dims in x.get_shape().as_list()) 58 | 59 | 60 | def cumulative_discount(values, terminals, discount, cumulative_start=0.0): 61 | """ 62 | Compute cumulative discounts. 63 | Args: 64 | values: Values to discount 65 | terminals: Booleans indicating terminal states 66 | discount: Discount factor 67 | cumulative_start: Float or ndarray, estimated reward for state t + 1. Default 0.0 68 | 69 | Returns: 70 | dicounted_values: The cumulative discounted rewards. 71 | """ 72 | if discount == 0.0: 73 | return np.asarray(values) 74 | 75 | # cumulative start can either be a number or ndarray 76 | if type(cumulative_start) is np.ndarray: 77 | discounted_values = np.zeros((len(values),) + (cumulative_start.shape)) 78 | else: 79 | discounted_values = np.zeros(len(values)) 80 | 81 | cumulative = cumulative_start 82 | for n, (value, terminal) in reversed(list(enumerate(zip(values, terminals)))): 83 | if terminal: 84 | cumulative = np.zeros_like(cumulative_start, dtype=np.float32) 85 | cumulative = value + cumulative * discount 86 | discounted_values[n] = cumulative 87 | 88 | return discounted_values 89 | 90 | 91 | def np_dtype(dtype): 92 | """Translates dtype specifications in configurations to numpy data types. 93 | Args: 94 | dtype: String describing a numerical type (e.g. 'float') or numerical type primitive. 95 | 96 | Returns: Numpy data type 97 | 98 | """ 99 | if dtype == 'float' or dtype == float or dtype == np.float32 or dtype == tf.float32: 100 | return np.float32 101 | elif dtype == 'int' or dtype == int or dtype == np.int32 or dtype == tf.int32: 102 | return np.int32 103 | elif dtype == 'bool' or dtype == bool or dtype == np.bool_ or dtype == tf.bool: 104 | return np.bool_ 105 | else: 106 | raise TensorforceError("Error: Type conversion from type {} not supported.".format(str(dtype))) 107 | 108 | 109 | def tf_dtype(dtype): 110 | """Translates dtype specifications in configurations to tensorflow data types. 111 | 112 | Args: 113 | dtype: String describing a numerical type (e.g. 'float'), numpy data type, 114 | or numerical type primitive. 115 | 116 | Returns: TensorFlow data type 117 | 118 | """ 119 | if dtype == 'float' or dtype == float or dtype == np.float32 or dtype == tf.float32: 120 | return tf.float32 121 | elif dtype == 'int' or dtype == int or dtype == np.int32 or dtype == tf.int32: 122 | return tf.int32 123 | elif dtype == 'bool' or dtype == bool or dtype == np.bool_ or dtype == tf.bool: 124 | return tf.bool 125 | else: 126 | raise TensorforceError("Error: Type conversion from type {} not supported.".format(str(dtype))) 127 | 128 | 129 | def unflatten(vector, shapes): 130 | i = 0 131 | arrays = [] 132 | for shape in shapes: 133 | size = np.prod(shape, dtype=np.int) 134 | array = vector[i:(i + size)].reshape(shape) 135 | arrays.append(array) 136 | i += size 137 | assert len(vector) == i, "Passed weight does not have the correct shape." 138 | return arrays 139 | 140 | 141 | def compute_ranks(x): 142 | """ 143 | Returns ranks in [0, len(x)) 144 | Note: This is different from scipy.stats.rankdata, which returns ranks in 145 | [1, len(x)]. 146 | """ 147 | assert x.ndim == 1 148 | ranks = np.empty(len(x), dtype=int) 149 | ranks[x.argsort()] = np.arange(len(x)) 150 | return ranks 151 | 152 | 153 | def compute_centered_ranks(x): 154 | y = compute_ranks(x.ravel()).reshape(x.shape).astype(np.float32) 155 | y /= (x.size - 1) 156 | y -= 0.5 157 | return y 158 | 159 | 160 | def itergroups(items, group_size): 161 | assert group_size >= 1 162 | group = [] 163 | for x in items: 164 | group.append(x) 165 | if len(group) == group_size: 166 | yield tuple(group) 167 | del group[:] 168 | if group: 169 | yield tuple(group) 170 | 171 | 172 | def batched_weighted_sum(weights, vecs, slice_size): 173 | total = 0 174 | num_items_summed = 0 175 | for batch_weights, batch_vecs in zip(itergroups(weights, slice_size), 176 | itergroups(vecs, slice_size)): 177 | assert len(batch_weights) == len(batch_vecs) <= slice_size 178 | total += np.dot(np.asarray(batch_weights, dtype=np.float32), 179 | np.asarray(batch_vecs, dtype=np.float32)) 180 | num_items_summed += len(batch_weights) 181 | return total, num_items_summed 182 | 183 | 184 | def run_with_location_trace(self, sess, op): 185 | run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) 186 | run_metadata = tf.RunMetadata() 187 | sess.run(op, options=run_options, run_metadata=run_metadata) 188 | for device in run_metadata.step_stats.dev_stats: 189 | print(device.device) 190 | for node in device.node_stats: 191 | print(" ", node.node_name) 192 | 193 | 194 | 195 | def get_object(obj, predefined_objects=None, default_object=None, kwargs=None): 196 | """ 197 | Utility method to map some kind of object specification to its content, 198 | e.g. optimizer or baseline specifications to the respective classes. 199 | 200 | Args: 201 | obj: A specification dict (value for key 'type' optionally specifies 202 | the object, options as follows), a module path (e.g., 203 | my_module.MyClass), a key in predefined_objects, or a callable 204 | (e.g., the class type object). 205 | predefined_objects: Dict containing predefined set of objects, 206 | accessible via their key 207 | default_object: Default object is no other is specified 208 | kwargs: Arguments for object creation 209 | 210 | Returns: The retrieved object 211 | 212 | """ 213 | args = () 214 | kwargs = dict() if kwargs is None else kwargs 215 | 216 | if isinstance(obj, dict): 217 | kwargs.update(obj) 218 | obj = kwargs.pop('type', None) 219 | 220 | if predefined_objects is not None and obj in predefined_objects: 221 | obj = predefined_objects[obj] 222 | elif isinstance(obj, str): 223 | if obj.find('.') != -1: 224 | module_name, function_name = obj.rsplit('.', 1) 225 | module = importlib.import_module(module_name) 226 | obj = getattr(module, function_name) 227 | else: 228 | predef_obj_keys = list(predefined_objects.keys()) 229 | raise TensorforceError("Error: object {} not found in predefined objects: {}".format(obj,predef_obj_keys)) 230 | elif callable(obj): 231 | pass 232 | elif default_object is not None: 233 | args = (obj,) 234 | obj = default_object 235 | else: 236 | # assumes the object is already instantiated 237 | return obj 238 | 239 | return obj(*args, **kwargs) 240 | 241 | 242 | class UpdateSummarySaverHook(tf.train.SummarySaverHook): 243 | 244 | def __init__(self, update_input, *args, **kwargs): 245 | super(UpdateSummarySaverHook, self).__init__(*args, **kwargs) 246 | self.update_input = update_input 247 | 248 | def before_run(self, run_context): 249 | self._request_summary = run_context.original_args[1] is not None and \ 250 | run_context.original_args[1].get(self.update_input, False) and \ 251 | (self._next_step is None or self._timer.should_trigger_for_step(self._next_step)) 252 | requests = {'global_step': self._global_step_tensor} 253 | if self._request_summary: 254 | if self._get_summary_op() is not None: 255 | requests['summary'] = self._get_summary_op() 256 | return tf.train.SessionRunArgs(requests) 257 | 258 | def after_run(self, run_context, run_values): 259 | if not self._summary_writer: 260 | return 261 | 262 | stale_global_step = run_values.results["global_step"] 263 | global_step = stale_global_step + 1 264 | if self._next_step is None or self._request_summary: 265 | global_step = run_context.session.run(self._global_step_tensor) 266 | 267 | if self._next_step is None: 268 | self._summary_writer.add_session_log(SessionLog(status=SessionLog.START), global_step) 269 | 270 | if "summary" in run_values.results: 271 | self._timer.update_last_triggered_step(global_step) 272 | for summary in run_values.results["summary"]: 273 | self._summary_writer.add_summary(summary, global_step) 274 | 275 | self._next_step = global_step + 1 276 | 277 | --------------------------------------------------------------------------------