├── .gitignore ├── .gitmodules ├── LICENSE.txt ├── README.md ├── VERSION ├── build.sh ├── buildspec.yml ├── docker ├── primary │ ├── Dockerfile.cpu │ ├── Dockerfile.gpu │ └── Dockerfile.gpu-legacy ├── requirements.txt └── secondary │ └── Dockerfile ├── files ├── changehostname.c ├── redis.conf ├── rl_coach.patch └── start.sh └── push.sh /.gitignore: -------------------------------------------------------------------------------- 1 | staging/ 2 | *.tmp -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | *This repository is archived as all needed code is in the [Simapp/Robomaker repository](https://github.com/aws-deepracer-community/deepracer-simapp).* 2 | 3 | # Deepracer Sagemaker Container 4 | 5 | This repository provides the build scripts required to build an AWS DeepRacer Sagemaker Container useful for local build / build in cloud infrastructure. 6 | 7 | The work is based on the Dockerfile and patches provided by AWS [https://github.com/awslabs/amazon-sagemaker-examples/tree/master/reinforcement_learning/rl_deepracer_robomaker_coach_gazebo] 8 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 5.2.1 2 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | trap ctrl_c INT 3 | 4 | function ctrl_c() { 5 | echo "Requested to stop." 6 | exit 1 7 | } 8 | 9 | set -e 10 | 11 | PREFIX="awsdeepracercommunity" 12 | ARCH="cpu gpu cpu-intel" 13 | 14 | while getopts ":2fa:p:" opt; do 15 | case $opt in 16 | 2) 17 | OPT_SECOND_STAGE_ONLY="OPT_SECOND_STAGE_ONLY" 18 | ;; 19 | p) 20 | PREFIX="$OPTARG" 21 | ;; 22 | a) 23 | ARCH="$OPTARG" 24 | ;; 25 | f) 26 | OPT_NOCACHE="--no-cache" 27 | ;; 28 | \?) 29 | echo "Invalid option -$OPTARG" >&2 30 | exit 1 31 | ;; 32 | esac 33 | done 34 | 35 | DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" 36 | cd $DIR 37 | VERSION=$(cat VERSION) 38 | 39 | echo "Preparing docker images for [$ARCH]" 40 | 41 | TF_VER="tensorflow==2.13.1\ntensorflow-probability==0.21.0" 42 | 43 | ## First stage 44 | if [[ -z "$OPT_SECOND_STAGE_ONLY" ]]; then 45 | 46 | for arch in $ARCH; do 47 | 48 | if [[ "$arch" == "gpu" ]]; then 49 | docker buildx build $OPT_NOCACHE . -t $PREFIX/sagemaker-tensorflow-container:$VERSION-$arch -f docker/primary/Dockerfile.gpu \ 50 | --build-arg TF_VER=$TF_VER 51 | elif [[ "$arch" == "cpu" ]]; then 52 | docker buildx build $OPT_NOCACHE . -t $PREFIX/sagemaker-tensorflow-container:$VERSION-$arch -f docker/primary/Dockerfile.cpu \ 53 | --build-arg TF_VER=$TF_VER 54 | elif [[ "$arch" == "cpu-intel" ]]; then 55 | TF_VER='intel-tensorflow==2.13.0\ntensorflow-probability==0.21.0' 56 | docker buildx build $OPT_NOCACHE . -t $PREFIX/sagemaker-tensorflow-container:$VERSION-$arch -f docker/primary/Dockerfile.cpu \ 57 | --build-arg TF_VER="$TF_VER" 58 | fi 59 | 60 | done 61 | 62 | fi 63 | cd $DIR 64 | 65 | ## Second stage 66 | for arch in $ARCH; do 67 | docker buildx build $OPT_NOCACHE -f docker/secondary/Dockerfile -t $PREFIX/deepracer-sagemaker:$VERSION-$arch . --build-arg version=$VERSION --build-arg arch=$arch --build-arg prefix=$PREFIX --build-arg IMG_VERSION=$VERSION 68 | done 69 | 70 | set +e 71 | -------------------------------------------------------------------------------- /buildspec.yml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | phases: 3 | install: 4 | commands: 5 | - pip install awscli wheel setuptools 6 | pre_build: 7 | commands: 8 | - docker login --username ${DOCKER_HUB_USER} --password ${DOCKER_HUB_KEY} 9 | build: 10 | commands: 11 | - bash build.sh -a "${CPU_FLAGS}" -p ${DOCKER_HUB_REPO} 12 | post_build: 13 | commands: 14 | - bash push.sh -a "${CPU_FLAGS}" -p ${DOCKER_HUB_REPO} -------------------------------------------------------------------------------- /docker/primary/Dockerfile.cpu: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | ARG TF_VER 3 | LABEL maintainer="Amazon AI" 4 | 5 | # Prevent docker build get stopped by requesting user interaction 6 | ENV DEBIAN_FRONTEND=noninteractive 7 | ENV DEBCONF_NONINTERACTIVE_SEEN=true 8 | # Python won’t try to write .pyc or .pyo files on the import of source modules 9 | ENV PYTHONDONTWRITEBYTECODE=1 10 | ENV PYTHONUNBUFFERED=1 11 | # See http://bugs.python.org/issue19846 12 | ENV PYTHONIOENCODING=UTF-8 13 | ENV LANG=C.UTF-8 14 | ENV LC_ALL=C.UTF-8 15 | # Specify the location of module that contains the training logic for SageMaker 16 | # https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html 17 | ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main 18 | 19 | RUN apt-get update && \ 20 | apt-get install -y --no-install-recommends --allow-unauthenticated gnupg ca-certificates curl git wget vim openssh-client && \ 21 | rm -rf /var/lib/apt/lists/* 22 | 23 | RUN apt-get update \ 24 | && apt-get install -y --no-install-recommends --allow-unauthenticated \ 25 | python3-pip \ 26 | python3-setuptools \ 27 | python3-dev \ 28 | libgomp1 \ 29 | libfreetype6-dev \ 30 | libhdf5-serial-dev \ 31 | libpng-dev \ 32 | libzmq3-dev \ 33 | build-essential \ 34 | zlib1g-dev \ 35 | && rm -rf /var/lib/apt/lists/* \ 36 | && mkdir -p /var/run/sshd 37 | 38 | ENV PATH=/usr/local/nvidia/bin:$PATH 39 | 40 | WORKDIR / 41 | 42 | ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True 43 | ADD docker/requirements.txt / 44 | RUN echo "${TF_VER}" > /requirements-tf.txt 45 | RUN pip --no-cache-dir install --upgrade pip 'setuptools<65' \ 46 | && pip install --no-cache-dir -U -r requirements.txt -r requirements-tf.txt 47 | 48 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py 49 | 50 | RUN chmod +x /usr/local/bin/deep_learning_container.py 51 | 52 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt 53 | 54 | CMD ["bin/bash"] 55 | -------------------------------------------------------------------------------- /docker/primary/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04 2 | ARG TF_VER 3 | LABEL maintainer="Amazon AI" 4 | 5 | # Prevent docker build get stopped by requesting user interaction 6 | ENV DEBIAN_FRONTEND=noninteractive 7 | ENV DEBCONF_NONINTERACTIVE_SEEN=true 8 | # Python won’t try to write .pyc or .pyo files on the import of source modules 9 | ENV PYTHONDONTWRITEBYTECODE=1 10 | ENV PYTHONUNBUFFERED=1 11 | # See http://bugs.python.org/issue19846 12 | ENV PYTHONIOENCODING=UTF-8 13 | ENV LANG=C.UTF-8 14 | ENV LC_ALL=C.UTF-8 15 | # Specify the location of module that contains the training logic for SageMaker 16 | # https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html 17 | ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main 18 | 19 | RUN apt-get update && \ 20 | apt-get install -y --no-install-recommends --allow-unauthenticated gnupg ca-certificates curl git wget vim openssh-client && \ 21 | rm -rf /var/lib/apt/lists/* 22 | RUN apt-get update \ 23 | && apt-get install -y --no-install-recommends --allow-unauthenticated \ 24 | python3-pip \ 25 | python3-setuptools \ 26 | python3-dev \ 27 | cuda-command-line-tools-11-8 \ 28 | cuda-nvcc-11-8 \ 29 | libgomp1 \ 30 | libfreetype6-dev \ 31 | libhdf5-serial-dev \ 32 | libpng-dev \ 33 | libzmq3-dev \ 34 | build-essential \ 35 | zlib1g-dev \ 36 | && rm -rf /var/lib/apt/lists/* \ 37 | && mkdir -p /var/run/sshd 38 | 39 | # Set default NCCL parameters 40 | RUN echo NCCL_DEBUG=INFO >> /etc/nccl.conf 41 | 42 | ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH 43 | ENV PATH=/usr/local/openmpi/bin/:$PATH 44 | ENV PATH=/usr/local/nvidia/bin:$PATH 45 | 46 | WORKDIR / 47 | 48 | ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True 49 | ADD docker/requirements.txt / 50 | RUN echo "${TF_VER}" > /requirements-tf.txt 51 | RUN pip --no-cache-dir install --upgrade pip 'setuptools<65' \ 52 | && pip install --no-cache-dir -U -r requirements.txt -r requirements-tf.txt 53 | 54 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py 55 | 56 | RUN chmod +x /usr/local/bin/deep_learning_container.py 57 | 58 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt 59 | 60 | CMD ["bin/bash"] 61 | -------------------------------------------------------------------------------- /docker/primary/Dockerfile.gpu-legacy: -------------------------------------------------------------------------------- 1 | # Nvidia does not publish a TensorRT Runtime library for Ubuntu 18.04 with Cuda 10.1 support, so we stick with cuda 10.0. 2 | # https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/ 3 | FROM nvidia/cuda:10.0-base-ubuntu18.04 4 | 5 | LABEL maintainer="Amazon AI" 6 | 7 | # Prevent docker build get stopped by requesting user interaction 8 | ENV DEBIAN_FRONTEND=noninteractive 9 | ENV DEBCONF_NONINTERACTIVE_SEEN=true 10 | # Python won’t try to write .pyc or .pyo files on the import of source modules 11 | ENV PYTHONDONTWRITEBYTECODE=1 12 | ENV PYTHONUNBUFFERED=1 13 | # See http://bugs.python.org/issue19846 14 | ENV PYTHONIOENCODING=UTF-8 15 | ENV LANG=C.UTF-8 16 | ENV LC_ALL=C.UTF-8 17 | # Specify the location of module that contains the training logic for SageMaker 18 | # https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html 19 | ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main 20 | 21 | RUN distribution=$(. /etc/os-release;echo $ID$VERSION_ID | sed 's/\.//') && \ 22 | apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/$distribution/x86_64/7fa2af80.pub 23 | 24 | # Install CUDNN 25 | RUN echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list 26 | 27 | # Define framework-related package sources 28 | ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15.2/AmazonLinux/gpu/final/tensorflow_gpu-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl 29 | 30 | RUN apt-get update \ 31 | && apt-get install -y --no-install-recommends --allow-unauthenticated \ 32 | python3-dev \ 33 | python3-pip \ 34 | python3-setuptools \ 35 | python3-dev \ 36 | ca-certificates \ 37 | cuda-command-line-tools-10-0 \ 38 | cuda-cublas-dev-10-0 \ 39 | cuda-cudart-dev-10-0 \ 40 | cuda-cufft-dev-10-0 \ 41 | cuda-curand-dev-10-0 \ 42 | cuda-cusolver-dev-10-0 \ 43 | cuda-cusparse-dev-10-0 \ 44 | curl \ 45 | libcudnn7=7.6.5.32-1+cuda10.0 \ 46 | # TensorFlow doesn't require libnccl anymore but Open MPI still depends on it 47 | libnccl2=2.4.7-1+cuda10.0 \ 48 | libgomp1 \ 49 | libnccl-dev=2.4.7-1+cuda10.0 \ 50 | libfreetype6-dev \ 51 | libhdf5-serial-dev \ 52 | libpng-dev \ 53 | libzmq3-dev \ 54 | git \ 55 | wget \ 56 | vim \ 57 | build-essential \ 58 | openssh-client \ 59 | openssh-server \ 60 | zlib1g-dev \ 61 | # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0 62 | # adds a new list which contains libnvinfer library, so it needs another 63 | # 'apt-get update' to retrieve that list before it can actually install the 64 | # library. 65 | # We don't install libnvinfer-dev since we don't need to build against TensorRT, 66 | # and libnvinfer4 doesn't contain libnvinfer.a static library. 67 | && apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated \ 68 | nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0 \ 69 | && apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated \ 70 | libnvinfer5=5.0.2-1+cuda10.0 \ 71 | && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \ 72 | && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \ 73 | && rm /usr/lib/x86_64-linux-gnu/libnvparsers* \ 74 | && rm -rf /var/lib/apt/lists/* \ 75 | && mkdir -p /var/run/sshd 76 | 77 | ########################################################################### 78 | # Horovod & its dependencies 79 | ########################################################################### 80 | 81 | # Set default NCCL parameters 82 | RUN echo NCCL_DEBUG=INFO >> /etc/nccl.conf 83 | 84 | ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH 85 | ENV PATH=/usr/local/openmpi/bin/:$PATH 86 | ENV PATH=/usr/local/nvidia/bin:$PATH 87 | 88 | # SSH login fix. Otherwise user is kicked off after login 89 | RUN mkdir -p /var/run/sshd \ 90 | && sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd 91 | 92 | # Create SSH key. 93 | RUN mkdir -p /root/.ssh/ \ 94 | && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \ 95 | && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \ 96 | && printf "Host *\n StrictHostKeyChecking no\n" >> /root/.ssh/config 97 | 98 | WORKDIR / 99 | 100 | RUN pip3 --no-cache-dir install --upgrade \ 101 | pip \ 102 | setuptools 103 | 104 | # Some TF tools expect a "python" binary 105 | RUN ln -s $(which python3) /usr/local/bin/python \ 106 | && ln -s $(which pip3) /usr/bin/pip 107 | 108 | RUN pip install --no-cache-dir -U \ 109 | numpy==1.17.4 \ 110 | scipy==1.2.2 \ 111 | scikit-learn==0.20.3 \ 112 | pandas==0.24.2 \ 113 | Pillow==7.0.0 \ 114 | h5py==2.9.0 \ 115 | keras_applications==1.0.8 \ 116 | keras_preprocessing==1.1.0 \ 117 | requests==2.22.0 \ 118 | keras==2.3.1 \ 119 | smdebug==0.7.2 \ 120 | sagemaker==1.50.17 \ 121 | sagemaker-experiments==0.1.7 \ 122 | "cryptography>=2.3" \ 123 | "sagemaker-tensorflow>=1.15,<1.16" \ 124 | "sagemaker-tensorflow-training>=2,<3" \ 125 | # Let's install TensorFlow separately in the end to avoid 126 | # the library version to be overwritten 127 | && pip install --force-reinstall --no-cache-dir -U \ 128 | tensorflow_gpu==1.15.2 \ 129 | && pip install --no-cache-dir -U \ 130 | awscli 131 | 132 | # Allow OpenSSH to talk to containers without asking for confirmation 133 | RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new \ 134 | && echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new \ 135 | && mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config 136 | 137 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py 138 | 139 | RUN chmod +x /usr/local/bin/deep_learning_container.py 140 | 141 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt 142 | 143 | CMD ["bin/bash"] -------------------------------------------------------------------------------- /docker/requirements.txt: -------------------------------------------------------------------------------- 1 | python-dateutil==2.8.2 2 | annoy==1.17.3 3 | pillow==9.0.1 4 | matplotlib==3.7.4 5 | numpy==1.24.3 6 | pandas==2.0.3 7 | pygame==2.5.2 8 | scipy==1.8.0 9 | scikit-image==0.21.0 10 | futures==3.0.5 11 | boto3==1.34.12 12 | minio==7.2.0 13 | cryptography==41.0.7 14 | bokeh==3.1.1 15 | retrying==1.3.4 16 | eventlet==0.34.2 17 | flask==3.0.0 18 | gevent==23.9.1 19 | gunicorn==21.2.0 20 | h5py==3.10.0 21 | pytest==7.4.4 22 | pytest-cov==4.1.0 23 | netifaces 24 | sagemaker<2 25 | sagemaker-experiments==0.1.45 26 | sagemaker-tensorflow<3 27 | sagemaker-tensorflow-training>=2,<3 28 | sagemaker-containers>=2.7.1 29 | protobuf<3.20 -------------------------------------------------------------------------------- /docker/secondary/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG arch 2 | ARG version 3 | ARG prefix 4 | FROM ${prefix}/sagemaker-tensorflow-container:${version}-${arch} 5 | 6 | RUN apt-get update && apt-get install -y --no-install-recommends \ 7 | wget \ 8 | jq \ 9 | ffmpeg \ 10 | libjpeg-dev \ 11 | libxrender1 \ 12 | python3-opengl \ 13 | pkg-config \ 14 | xvfb && \ 15 | apt-get clean && \ 16 | rm -rf /var/lib/apt/lists/* 17 | 18 | # Install Redis. 19 | RUN cd /tmp && \ 20 | wget https://download.redis.io/releases/redis-6.2.7.tar.gz && \ 21 | tar xvzf redis-6.2.7.tar.gz && \ 22 | cd redis-6.2.7 && \ 23 | make && \ 24 | make install && \ 25 | rm -rf /tmp/redis* 26 | 27 | RUN pip install -U --no-cache-dir --upgrade-strategy only-if-needed \ 28 | pyglet \ 29 | gym \ 30 | "redis>=4.4.4" \ 31 | "rl-coach-slim==1.0.0" \ 32 | "protobuf<3.20" \ 33 | awscli 34 | RUN wget https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -O /usr/local/lib/python3.8/dist-packages/google/protobuf/internal/builder.py 35 | 36 | COPY ./files/redis.conf /etc/redis/redis.conf 37 | COPY ./files/rl_coach.patch /opt/amazon/rl_coach.patch 38 | RUN patch -p1 -N --directory=/usr/local/lib/python3.8/dist-packages/ < /opt/amazon/rl_coach.patch 39 | 40 | ENV COACH_BACKEND=tensorflow 41 | 42 | # Copy workaround script for incorrect hostname 43 | COPY files/changehostname.c / 44 | COPY files/start.sh /usr/local/bin/start.sh 45 | RUN chmod +x /usr/local/bin/start.sh 46 | 47 | ENV PYTHONPATH /opt/amazon/:$PYTHONPATH 48 | ENV PATH /opt/ml/code/:$PATH 49 | WORKDIR /opt/ml/code 50 | 51 | # Tell sagemaker-containers where the launch point is for training job. 52 | ENV NODE_TYPE SAGEMAKER_TRAINING_WORKER 53 | 54 | ENV PYTHONUNBUFFERED 1 55 | 56 | # Versioning 57 | ARG IMG_VERSION 58 | LABEL maintainer "AWS DeepRacer Community - deepracing.io" 59 | LABEL version $IMG_VERSION 60 | 61 | # Starts framework 62 | ENTRYPOINT ["bash", "-m", "start.sh", "train"] 63 | -------------------------------------------------------------------------------- /files/changehostname.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* 5 | * Modifies gethostname to return algo-1, algo-2, etc. when running on SageMaker. 6 | * 7 | * Without this gethostname() on SageMaker returns 'aws', leading NCCL/MPI to think there is only one host, 8 | * not realizing that it needs to use NET/Socket. 9 | * 10 | * When docker container starts we read 'current_host' value from /opt/ml/input/config/resourceconfig.json 11 | * and replace PLACEHOLDER_HOSTNAME with it before compiling this code into a shared library. 12 | */ 13 | int gethostname(char *name, size_t len) 14 | { 15 | const char *val = PLACEHOLDER_HOSTNAME; 16 | strncpy(name, val, len); 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /files/redis.conf: -------------------------------------------------------------------------------- 1 | # Redis configuration file example. 2 | # 3 | # Note that in order to read the configuration file, Redis must be 4 | # started with the file path as first argument: 5 | # 6 | # ./redis-server /path/to/redis.conf 7 | 8 | # Note on units: when memory size is needed, it is possible to specify 9 | # it in the usual form of 1k 5GB 4M and so forth: 10 | # 11 | # 1k => 1000 bytes 12 | # 1kb => 1024 bytes 13 | # 1m => 1000000 bytes 14 | # 1mb => 1024*1024 bytes 15 | # 1g => 1000000000 bytes 16 | # 1gb => 1024*1024*1024 bytes 17 | # 18 | # units are case insensitive so 1GB 1Gb 1gB are all the same. 19 | 20 | ################################## INCLUDES ################################### 21 | 22 | # Include one or more other config files here. This is useful if you 23 | # have a standard template that goes to all Redis servers but also need 24 | # to customize a few per-server settings. Include files can include 25 | # other files, so use this wisely. 26 | # 27 | # Notice option "include" won't be rewritten by command "CONFIG REWRITE" 28 | # from admin or Redis Sentinel. Since Redis always uses the last processed 29 | # line as value of a configuration directive, you'd better put includes 30 | # at the beginning of this file to avoid overwriting config change at runtime. 31 | # 32 | # If instead you are interested in using includes to override configuration 33 | # options, it is better to use include as the last line. 34 | # 35 | # include /path/to/local.conf 36 | # include /path/to/other.conf 37 | 38 | ################################## MODULES ##################################### 39 | 40 | # Load modules at startup. If the server is not able to load modules 41 | # it will abort. It is possible to use multiple loadmodule directives. 42 | # 43 | # loadmodule /path/to/my_module.so 44 | # loadmodule /path/to/other_module.so 45 | 46 | ################################## NETWORK ##################################### 47 | 48 | # By default, if no "bind" configuration directive is specified, Redis listens 49 | # for connections from all the network interfaces available on the server. 50 | # It is possible to listen to just one or multiple selected interfaces using 51 | # the "bind" configuration directive, followed by one or more IP addresses. 52 | # 53 | # Examples: 54 | # 55 | # bind 192.168.1.100 10.0.0.1 56 | # bind 127.0.0.1 ::1 57 | # 58 | # ~~~ WARNING ~~~ If the computer running Redis is directly exposed to the 59 | # internet, binding to all the interfaces is dangerous and will expose the 60 | # instance to everybody on the internet. So by default we uncomment the 61 | # following bind directive, that will force Redis to listen only into 62 | # the IPv4 loopback interface address (this means Redis will be able to 63 | # accept connections only from clients running into the same computer it 64 | # is running). 65 | # 66 | # IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES 67 | # JUST COMMENT THE FOLLOWING LINE. 68 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 69 | bind 0.0.0.0 70 | 71 | # Protected mode is a layer of security protection, in order to avoid that 72 | # Redis instances left open on the internet are accessed and exploited. 73 | # 74 | # When protected mode is on and if: 75 | # 76 | # 1) The server is not binding explicitly to a set of addresses using the 77 | # "bind" directive. 78 | # 2) No password is configured. 79 | # 80 | # The server only accepts connections from clients connecting from the 81 | # IPv4 and IPv6 loopback addresses 127.0.0.1 and ::1, and from Unix domain 82 | # sockets. 83 | # 84 | # By default protected mode is enabled. You should disable it only if 85 | # you are sure you want clients from other hosts to connect to Redis 86 | # even if no authentication is configured, nor a specific set of interfaces 87 | # are explicitly listed using the "bind" directive. 88 | protected-mode yes 89 | 90 | # Accept connections on the specified port, default is 6379 (IANA #815344). 91 | # If port 0 is specified Redis will not listen on a TCP socket. 92 | port 6379 93 | 94 | # TCP listen() backlog. 95 | # 96 | # In high requests-per-second environments you need an high backlog in order 97 | # to avoid slow clients connections issues. Note that the Linux kernel 98 | # will silently truncate it to the value of /proc/sys/net/core/somaxconn so 99 | # make sure to raise both the value of somaxconn and tcp_max_syn_backlog 100 | # in order to get the desired effect. 101 | tcp-backlog 512 102 | 103 | # Unix socket. 104 | # 105 | # Specify the path for the Unix socket that will be used to listen for 106 | # incoming connections. There is no default, so Redis will not listen 107 | # on a unix socket when not specified. 108 | # 109 | # unixsocket /tmp/redis.sock 110 | # unixsocketperm 700 111 | 112 | # Close the connection after a client is idle for N seconds (0 to disable) 113 | timeout 0 114 | 115 | # TCP keepalive. 116 | # 117 | # If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence 118 | # of communication. This is useful for two reasons: 119 | # 120 | # 1) Detect dead peers. 121 | # 2) Take the connection alive from the point of view of network 122 | # equipment in the middle. 123 | # 124 | # On Linux, the specified value (in seconds) is the period used to send ACKs. 125 | # Note that to close the connection the double of the time is needed. 126 | # On other kernels the period depends on the kernel configuration. 127 | # 128 | # A reasonable value for this option is 300 seconds, which is the new 129 | # Redis default starting with Redis 3.2.1. 130 | tcp-keepalive 300 131 | 132 | ################################# GENERAL ##################################### 133 | 134 | # By default Redis does not run as a daemon. Use 'yes' if you need it. 135 | # Note that Redis will write a pid file in /var/run/redis.pid when daemonized. 136 | daemonize no 137 | 138 | # If you run Redis from upstart or systemd, Redis can interact with your 139 | # supervision tree. Options: 140 | # supervised no - no supervision interaction 141 | # supervised upstart - signal upstart by putting Redis into SIGSTOP mode 142 | # supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET 143 | # supervised auto - detect upstart or systemd method based on 144 | # UPSTART_JOB or NOTIFY_SOCKET environment variables 145 | # Note: these supervision methods only signal "process is ready." 146 | # They do not enable continuous liveness pings back to your supervisor. 147 | supervised no 148 | 149 | # If a pid file is specified, Redis writes it where specified at startup 150 | # and removes it at exit. 151 | # 152 | # When the server runs non daemonized, no pid file is created if none is 153 | # specified in the configuration. When the server is daemonized, the pid file 154 | # is used even if not specified, defaulting to "/var/run/redis.pid". 155 | # 156 | # Creating a pid file is best effort: if Redis is not able to create it 157 | # nothing bad happens, the server will start and run normally. 158 | pidfile /var/run/redis_6379.pid 159 | 160 | # Specify the server verbosity level. 161 | # This can be one of: 162 | # debug (a lot of information, useful for development/testing) 163 | # verbose (many rarely useful info, but not a mess like the debug level) 164 | # notice (moderately verbose, what you want in production probably) 165 | # warning (only very important / critical messages are logged) 166 | loglevel notice 167 | 168 | # Specify the log file name. Also the empty string can be used to force 169 | # Redis to log on the standard output. Note that if you use standard 170 | # output for logging but daemonize, logs will be sent to /dev/null 171 | logfile "" 172 | 173 | # To enable logging to the system logger, just set 'syslog-enabled' to yes, 174 | # and optionally update the other syslog parameters to suit your needs. 175 | # syslog-enabled no 176 | 177 | # Specify the syslog identity. 178 | # syslog-ident redis 179 | 180 | # Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. 181 | # syslog-facility local0 182 | 183 | # Set the number of databases. The default database is DB 0, you can select 184 | # a different one on a per-connection basis using SELECT where 185 | # dbid is a number between 0 and 'databases'-1 186 | databases 16 187 | 188 | # By default Redis shows an ASCII art logo only when started to log to the 189 | # standard output and if the standard output is a TTY. Basically this means 190 | # that normally a logo is displayed only in interactive sessions. 191 | # 192 | # However it is possible to force the pre-4.0 behavior and always show a 193 | # ASCII art logo in startup logs by setting the following option to yes. 194 | always-show-logo yes 195 | 196 | ################################ SNAPSHOTTING ################################ 197 | # 198 | # Save the DB on disk: 199 | # 200 | # save 201 | # 202 | # Will save the DB if both the given number of seconds and the given 203 | # number of write operations against the DB occurred. 204 | # 205 | # In the example below the behaviour will be to save: 206 | # after 900 sec (15 min) if at least 1 key changed 207 | # after 300 sec (5 min) if at least 10 keys changed 208 | # after 60 sec if at least 10000 keys changed 209 | # 210 | # Note: you can disable saving completely by commenting out all "save" lines. 211 | # 212 | # It is also possible to remove all the previously configured save 213 | # points by adding a save directive with a single empty string argument 214 | # like in the following example: 215 | # 216 | # save "" 217 | 218 | save 900 1 219 | save 300 10 220 | save 60 10000 221 | 222 | # By default Redis will stop accepting writes if RDB snapshots are enabled 223 | # (at least one save point) and the latest background save failed. 224 | # This will make the user aware (in a hard way) that data is not persisting 225 | # on disk properly, otherwise chances are that no one will notice and some 226 | # disaster will happen. 227 | # 228 | # If the background saving process will start working again Redis will 229 | # automatically allow writes again. 230 | # 231 | # However if you have setup your proper monitoring of the Redis server 232 | # and persistence, you may want to disable this feature so that Redis will 233 | # continue to work as usual even if there are problems with disk, 234 | # permissions, and so forth. 235 | stop-writes-on-bgsave-error yes 236 | 237 | # Compress string objects using LZF when dump .rdb databases? 238 | # For default that's set to 'yes' as it's almost always a win. 239 | # If you want to save some CPU in the saving child set it to 'no' but 240 | # the dataset will likely be bigger if you have compressible values or keys. 241 | rdbcompression yes 242 | 243 | # Since version 5 of RDB a CRC64 checksum is placed at the end of the file. 244 | # This makes the format more resistant to corruption but there is a performance 245 | # hit to pay (around 10%) when saving and loading RDB files, so you can disable it 246 | # for maximum performances. 247 | # 248 | # RDB files created with checksum disabled have a checksum of zero that will 249 | # tell the loading code to skip the check. 250 | rdbchecksum yes 251 | 252 | # The filename where to dump the DB 253 | dbfilename dump.rdb 254 | 255 | # The working directory. 256 | # 257 | # The DB will be written inside this directory, with the filename specified 258 | # above using the 'dbfilename' configuration directive. 259 | # 260 | # The Append Only File will also be created inside this directory. 261 | # 262 | # Note that you must specify a directory here, not a file name. 263 | dir ./ 264 | 265 | ################################# REPLICATION ################################# 266 | 267 | # Master-Replica replication. Use replicaof to make a Redis instance a copy of 268 | # another Redis server. A few things to understand ASAP about Redis replication. 269 | # 270 | # +------------------+ +---------------+ 271 | # | Master | ---> | Replica | 272 | # | (receive writes) | | (exact copy) | 273 | # +------------------+ +---------------+ 274 | # 275 | # 1) Redis replication is asynchronous, but you can configure a master to 276 | # stop accepting writes if it appears to be not connected with at least 277 | # a given number of replicas. 278 | # 2) Redis replicas are able to perform a partial resynchronization with the 279 | # master if the replication link is lost for a relatively small amount of 280 | # time. You may want to configure the replication backlog size (see the next 281 | # sections of this file) with a sensible value depending on your needs. 282 | # 3) Replication is automatic and does not need user intervention. After a 283 | # network partition replicas automatically try to reconnect to masters 284 | # and resynchronize with them. 285 | # 286 | # replicaof 287 | 288 | # If the master is password protected (using the "requirepass" configuration 289 | # directive below) it is possible to tell the replica to authenticate before 290 | # starting the replication synchronization process, otherwise the master will 291 | # refuse the replica request. 292 | # 293 | # masterauth 294 | 295 | # When a replica loses its connection with the master, or when the replication 296 | # is still in progress, the replica can act in two different ways: 297 | # 298 | # 1) if replica-serve-stale-data is set to 'yes' (the default) the replica will 299 | # still reply to client requests, possibly with out of date data, or the 300 | # data set may just be empty if this is the first synchronization. 301 | # 302 | # 2) if replica-serve-stale-data is set to 'no' the replica will reply with 303 | # an error "SYNC with master in progress" to all the kind of commands 304 | # but to INFO, replicaOF, AUTH, PING, SHUTDOWN, REPLCONF, ROLE, CONFIG, 305 | # SUBSCRIBE, UNSUBSCRIBE, PSUBSCRIBE, PUNSUBSCRIBE, PUBLISH, PUBSUB, 306 | # COMMAND, POST, HOST: and LATENCY. 307 | # 308 | replica-serve-stale-data yes 309 | 310 | # You can configure a replica instance to accept writes or not. Writing against 311 | # a replica instance may be useful to store some ephemeral data (because data 312 | # written on a replica will be easily deleted after resync with the master) but 313 | # may also cause problems if clients are writing to it because of a 314 | # misconfiguration. 315 | # 316 | # Since Redis 2.6 by default replicas are read-only. 317 | # 318 | # Note: read only replicas are not designed to be exposed to untrusted clients 319 | # on the internet. It's just a protection layer against misuse of the instance. 320 | # Still a read only replica exports by default all the administrative commands 321 | # such as CONFIG, DEBUG, and so forth. To a limited extent you can improve 322 | # security of read only replicas using 'rename-command' to shadow all the 323 | # administrative / dangerous commands. 324 | replica-read-only yes 325 | 326 | # Replication SYNC strategy: disk or socket. 327 | # 328 | # ------------------------------------------------------- 329 | # WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY 330 | # ------------------------------------------------------- 331 | # 332 | # New replicas and reconnecting replicas that are not able to continue the replication 333 | # process just receiving differences, need to do what is called a "full 334 | # synchronization". An RDB file is transmitted from the master to the replicas. 335 | # The transmission can happen in two different ways: 336 | # 337 | # 1) Disk-backed: The Redis master creates a new process that writes the RDB 338 | # file on disk. Later the file is transferred by the parent 339 | # process to the replicas incrementally. 340 | # 2) Diskless: The Redis master creates a new process that directly writes the 341 | # RDB file to replica sockets, without touching the disk at all. 342 | # 343 | # With disk-backed replication, while the RDB file is generated, more replicas 344 | # can be queued and served with the RDB file as soon as the current child producing 345 | # the RDB file finishes its work. With diskless replication instead once 346 | # the transfer starts, new replicas arriving will be queued and a new transfer 347 | # will start when the current one terminates. 348 | # 349 | # When diskless replication is used, the master waits a configurable amount of 350 | # time (in seconds) before starting the transfer in the hope that multiple replicas 351 | # will arrive and the transfer can be parallelized. 352 | # 353 | # With slow disks and fast (large bandwidth) networks, diskless replication 354 | # works better. 355 | repl-diskless-sync yes 356 | 357 | # When diskless replication is enabled, it is possible to configure the delay 358 | # the server waits in order to spawn the child that transfers the RDB via socket 359 | # to the replicas. 360 | # 361 | # This is important since once the transfer starts, it is not possible to serve 362 | # new replicas arriving, that will be queued for the next RDB transfer, so the server 363 | # waits a delay in order to let more replicas arrive. 364 | # 365 | # The delay is specified in seconds, and by default is 5 seconds. To disable 366 | # it entirely just set it to 0 seconds and the transfer will start ASAP. 367 | repl-diskless-sync-delay 5 368 | 369 | # Replicas send PINGs to server in a predefined interval. It's possible to change 370 | # this interval with the repl_ping_replica_period option. The default value is 10 371 | # seconds. 372 | # 373 | repl-ping-replica-period 10 374 | 375 | # The following option sets the replication timeout for: 376 | # 377 | # 1) Bulk transfer I/O during SYNC, from the point of view of replica. 378 | # 2) Master timeout from the point of view of replicas (data, pings). 379 | # 3) Replica timeout from the point of view of masters (REPLCONF ACK pings). 380 | # 381 | # It is important to make sure that this value is greater than the value 382 | # specified for repl-ping-replica-period otherwise a timeout will be detected 383 | # every time there is low traffic between the master and the replica. 384 | # 385 | repl-timeout 60 386 | 387 | # Disable TCP_NODELAY on the replica socket after SYNC? 388 | # 389 | # If you select "yes" Redis will use a smaller number of TCP packets and 390 | # less bandwidth to send data to replicas. But this can add a delay for 391 | # the data to appear on the replica side, up to 40 milliseconds with 392 | # Linux kernels using a default configuration. 393 | # 394 | # If you select "no" the delay for data to appear on the replica side will 395 | # be reduced but more bandwidth will be used for replication. 396 | # 397 | # By default we optimize for low latency, but in very high traffic conditions 398 | # or when the master and replicas are many hops away, turning this to "yes" may 399 | # be a good idea. 400 | repl-disable-tcp-nodelay no 401 | 402 | # Set the replication backlog size. The backlog is a buffer that accumulates 403 | # replica data when replicas are disconnected for some time, so that when a replica 404 | # wants to reconnect again, often a full resync is not needed, but a partial 405 | # resync is enough, just passing the portion of data the replica missed while 406 | # disconnected. 407 | # 408 | # The bigger the replication backlog, the longer the time the replica can be 409 | # disconnected and later be able to perform a partial resynchronization. 410 | # 411 | # The backlog is only allocated once there is at least a replica connected. 412 | # 413 | repl-backlog-size 500mb 414 | 415 | # After a master has no longer connected replicas for some time, the backlog 416 | # will be freed. The following option configures the amount of seconds that 417 | # need to elapse, starting from the time the last replica disconnected, for 418 | # the backlog buffer to be freed. 419 | # 420 | # Note that replicas never free the backlog for timeout, since they may be 421 | # promoted to masters later, and should be able to correctly "partially 422 | # resynchronize" with the replicas: hence they should always accumulate backlog. 423 | # 424 | # A value of 0 means to never release the backlog. 425 | # 426 | # repl-backlog-ttl 3600 427 | 428 | # The replica priority is an integer number published by Redis in the INFO output. 429 | # It is used by Redis Sentinel in order to select a replica to promote into a 430 | # master if the master is no longer working correctly. 431 | # 432 | # A replica with a low priority number is considered better for promotion, so 433 | # for instance if there are three replicas with priority 10, 100, 25 Sentinel will 434 | # pick the one with priority 10, that is the lowest. 435 | # 436 | # However a special priority of 0 marks the replica as not able to perform the 437 | # role of master, so a replica with priority of 0 will never be selected by 438 | # Redis Sentinel for promotion. 439 | # 440 | # By default the priority is 100. 441 | replica-priority 100 442 | 443 | # It is possible for a master to stop accepting writes if there are less than 444 | # N replicas connected, having a lag less or equal than M seconds. 445 | # 446 | # The N replicas need to be in "online" state. 447 | # 448 | # The lag in seconds, that must be <= the specified value, is calculated from 449 | # the last ping received from the replica, that is usually sent every second. 450 | # 451 | # This option does not GUARANTEE that N replicas will accept the write, but 452 | # will limit the window of exposure for lost writes in case not enough replicas 453 | # are available, to the specified number of seconds. 454 | # 455 | # For example to require at least 3 replicas with a lag <= 10 seconds use: 456 | # 457 | # min-replicas-to-write 3 458 | # min-replicas-max-lag 10 459 | # 460 | # Setting one or the other to 0 disables the feature. 461 | # 462 | # By default min-replicas-to-write is set to 0 (feature disabled) and 463 | # min-replicas-max-lag is set to 10. 464 | 465 | # A Redis master is able to list the address and port of the attached 466 | # replicas in different ways. For example the "INFO replication" section 467 | # offers this information, which is used, among other tools, by 468 | # Redis Sentinel in order to discover replica instances. 469 | # Another place where this info is available is in the output of the 470 | # "ROLE" command of a master. 471 | # 472 | # The listed IP and address normally reported by a replica is obtained 473 | # in the following way: 474 | # 475 | # IP: The address is auto detected by checking the peer address 476 | # of the socket used by the replica to connect with the master. 477 | # 478 | # Port: The port is communicated by the replica during the replication 479 | # handshake, and is normally the port that the replica is using to 480 | # listen for connections. 481 | # 482 | # However when port forwarding or Network Address Translation (NAT) is 483 | # used, the replica may be actually reachable via different IP and port 484 | # pairs. The following two options can be used by a replica in order to 485 | # report to its master a specific set of IP and port, so that both INFO 486 | # and ROLE will report those values. 487 | # 488 | # There is no need to use both the options if you need to override just 489 | # the port or the IP address. 490 | # 491 | # replica-announce-ip 5.5.5.5 492 | # replica-announce-port 1234 493 | 494 | ################################## SECURITY ################################### 495 | 496 | # Require clients to issue AUTH before processing any other 497 | # commands. This might be useful in environments in which you do not trust 498 | # others with access to the host running redis-server. 499 | # 500 | # This should stay commented out for backward compatibility and because most 501 | # people do not need auth (e.g. they run their own servers). 502 | # 503 | # Warning: since Redis is pretty fast an outside user can try up to 504 | # 150k passwords per second against a good box. This means that you should 505 | # use a very strong password otherwise it will be very easy to break. 506 | # 507 | # requirepass foobared 508 | 509 | # Command renaming. 510 | # 511 | # It is possible to change the name of dangerous commands in a shared 512 | # environment. For instance the CONFIG command may be renamed into something 513 | # hard to guess so that it will still be available for internal-use tools 514 | # but not available for general clients. 515 | # 516 | # Example: 517 | # 518 | # rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 519 | # 520 | # It is also possible to completely kill a command by renaming it into 521 | # an empty string: 522 | # 523 | # rename-command CONFIG "" 524 | # 525 | # Please note that changing the name of commands that are logged into the 526 | # AOF file or transmitted to replicas may cause problems. 527 | 528 | ################################### CLIENTS #################################### 529 | 530 | # Set the max number of connected clients at the same time. By default 531 | # this limit is set to 10000 clients, however if the Redis server is not 532 | # able to configure the process file limit to allow for the specified limit 533 | # the max number of allowed clients is set to the current file limit 534 | # minus 32 (as Redis reserves a few file descriptors for internal uses). 535 | # 536 | # Once the limit is reached Redis will close all the new connections sending 537 | # an error 'max number of clients reached'. 538 | # 539 | # maxclients 10000 540 | 541 | ############################## MEMORY MANAGEMENT ################################ 542 | 543 | # Set a memory usage limit to the specified amount of bytes. 544 | # When the memory limit is reached Redis will try to remove keys 545 | # according to the eviction policy selected (see maxmemory-policy). 546 | # 547 | # If Redis can't remove keys according to the policy, or if the policy is 548 | # set to 'noeviction', Redis will start to reply with errors to commands 549 | # that would use more memory, like SET, LPUSH, and so on, and will continue 550 | # to reply to read-only commands like GET. 551 | # 552 | # This option is usually useful when using Redis as an LRU or LFU cache, or to 553 | # set a hard memory limit for an instance (using the 'noeviction' policy). 554 | # 555 | # WARNING: If you have replicas attached to an instance with maxmemory on, 556 | # the size of the output buffers needed to feed the replicas are subtracted 557 | # from the used memory count, so that network problems / resyncs will 558 | # not trigger a loop where keys are evicted, and in turn the output 559 | # buffer of replicas is full with DELs of keys evicted triggering the deletion 560 | # of more keys, and so forth until the database is completely emptied. 561 | # 562 | # In short... if you have replicas attached it is suggested that you set a lower 563 | # limit for maxmemory so that there is some free RAM on the system for replica 564 | # output buffers (but this is not needed if the policy is 'noeviction'). 565 | # 566 | #maxmemory 1gb 567 | 568 | # MAXMEMORY POLICY: how Redis will select what to remove when maxmemory 569 | # is reached. You can select among five behaviors: 570 | # 571 | # volatile-lru -> Evict using approximated LRU among the keys with an expire set. 572 | # allkeys-lru -> Evict any key using approximated LRU. 573 | # volatile-lfu -> Evict using approximated LFU among the keys with an expire set. 574 | # allkeys-lfu -> Evict any key using approximated LFU. 575 | # volatile-random -> Remove a random key among the ones with an expire set. 576 | # allkeys-random -> Remove a random key, any key. 577 | # volatile-ttl -> Remove the key with the nearest expire time (minor TTL) 578 | # noeviction -> Don't evict anything, just return an error on write operations. 579 | # 580 | # LRU means Least Recently Used 581 | # LFU means Least Frequently Used 582 | # 583 | # Both LRU, LFU and volatile-ttl are implemented using approximated 584 | # randomized algorithms. 585 | # 586 | # Note: with any of the above policies, Redis will return an error on write 587 | # operations, when there are no suitable keys for eviction. 588 | # 589 | # At the date of writing these commands are: set setnx setex append 590 | # incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd 591 | # sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby 592 | # zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby 593 | # getset mset msetnx exec sort 594 | # 595 | # The default is: 596 | # 597 | maxmemory-policy volatile-lfu 598 | 599 | # LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated 600 | # algorithms (in order to save memory), so you can tune it for speed or 601 | # accuracy. For default Redis will check five keys and pick the one that was 602 | # used less recently, you can change the sample size using the following 603 | # configuration directive. 604 | # 605 | # The default of 5 produces good enough results. 10 Approximates very closely 606 | # true LRU but costs more CPU. 3 is faster but not very accurate. 607 | # 608 | # maxmemory-samples 5 609 | 610 | # Starting from Redis 5, by default a replica will ignore its maxmemory setting 611 | # (unless it is promoted to master after a failover or manually). It means 612 | # that the eviction of keys will be just handled by the master, sending the 613 | # DEL commands to the replica as keys evict in the master side. 614 | # 615 | # This behavior ensures that masters and replicas stay consistent, and is usually 616 | # what you want, however if your replica is writable, or you want the replica to have 617 | # a different memory setting, and you are sure all the writes performed to the 618 | # replica are idempotent, then you may change this default (but be sure to understand 619 | # what you are doing). 620 | # 621 | # Note that since the replica by default does not evict, it may end using more 622 | # memory than the one set via maxmemory (there are certain buffers that may 623 | # be larger on the replica, or data structures may sometimes take more memory and so 624 | # forth). So make sure you monitor your replicas and make sure they have enough 625 | # memory to never hit a real out-of-memory condition before the master hits 626 | # the configured maxmemory setting. 627 | # 628 | # replica-ignore-maxmemory yes 629 | 630 | ############################# LAZY FREEING #################################### 631 | 632 | # Redis has two primitives to delete keys. One is called DEL and is a blocking 633 | # deletion of the object. It means that the server stops processing new commands 634 | # in order to reclaim all the memory associated with an object in a synchronous 635 | # way. If the key deleted is associated with a small object, the time needed 636 | # in order to execute the DEL command is very small and comparable to most other 637 | # O(1) or O(log_N) commands in Redis. However if the key is associated with an 638 | # aggregated value containing millions of elements, the server can block for 639 | # a long time (even seconds) in order to complete the operation. 640 | # 641 | # For the above reasons Redis also offers non blocking deletion primitives 642 | # such as UNLINK (non blocking DEL) and the ASYNC option of FLUSHALL and 643 | # FLUSHDB commands, in order to reclaim memory in background. Those commands 644 | # are executed in constant time. Another thread will incrementally free the 645 | # object in the background as fast as possible. 646 | # 647 | # DEL, UNLINK and ASYNC option of FLUSHALL and FLUSHDB are user-controlled. 648 | # It's up to the design of the application to understand when it is a good 649 | # idea to use one or the other. However the Redis server sometimes has to 650 | # delete keys or flush the whole database as a side effect of other operations. 651 | # Specifically Redis deletes objects independently of a user call in the 652 | # following scenarios: 653 | # 654 | # 1) On eviction, because of the maxmemory and maxmemory policy configurations, 655 | # in order to make room for new data, without going over the specified 656 | # memory limit. 657 | # 2) Because of expire: when a key with an associated time to live (see the 658 | # EXPIRE command) must be deleted from memory. 659 | # 3) Because of a side effect of a command that stores data on a key that may 660 | # already exist. For example the RENAME command may delete the old key 661 | # content when it is replaced with another one. Similarly SUNIONSTORE 662 | # or SORT with STORE option may delete existing keys. The SET command 663 | # itself removes any old content of the specified key in order to replace 664 | # it with the specified string. 665 | # 4) During replication, when a replica performs a full resynchronization with 666 | # its master, the content of the whole database is removed in order to 667 | # load the RDB file just transferred. 668 | # 669 | # In all the above cases the default is to delete objects in a blocking way, 670 | # like if DEL was called. However you can configure each case specifically 671 | # in order to instead release memory in a non-blocking way like if UNLINK 672 | # was called, using the following configuration directives: 673 | 674 | lazyfree-lazy-eviction no 675 | lazyfree-lazy-expire no 676 | lazyfree-lazy-server-del no 677 | replica-lazy-flush no 678 | 679 | ############################## APPEND ONLY MODE ############################### 680 | 681 | # By default Redis asynchronously dumps the dataset on disk. This mode is 682 | # good enough in many applications, but an issue with the Redis process or 683 | # a power outage may result into a few minutes of writes lost (depending on 684 | # the configured save points). 685 | # 686 | # The Append Only File is an alternative persistence mode that provides 687 | # much better durability. For instance using the default data fsync policy 688 | # (see later in the config file) Redis can lose just one second of writes in a 689 | # dramatic event like a server power outage, or a single write if something 690 | # wrong with the Redis process itself happens, but the operating system is 691 | # still running correctly. 692 | # 693 | # AOF and RDB persistence can be enabled at the same time without problems. 694 | # If the AOF is enabled on startup Redis will load the AOF, that is the file 695 | # with the better durability guarantees. 696 | # 697 | # Please check http://redis.io/topics/persistence for more information. 698 | 699 | appendonly no 700 | 701 | # The name of the append only file (default: "appendonly.aof") 702 | 703 | appendfilename "appendonly.aof" 704 | 705 | # The fsync() call tells the Operating System to actually write data on disk 706 | # instead of waiting for more data in the output buffer. Some OS will really flush 707 | # data on disk, some other OS will just try to do it ASAP. 708 | # 709 | # Redis supports three different modes: 710 | # 711 | # no: don't fsync, just let the OS flush the data when it wants. Faster. 712 | # always: fsync after every write to the append only log. Slow, Safest. 713 | # everysec: fsync only one time every second. Compromise. 714 | # 715 | # The default is "everysec", as that's usually the right compromise between 716 | # speed and data safety. It's up to you to understand if you can relax this to 717 | # "no" that will let the operating system flush the output buffer when 718 | # it wants, for better performances (but if you can live with the idea of 719 | # some data loss consider the default persistence mode that's snapshotting), 720 | # or on the contrary, use "always" that's very slow but a bit safer than 721 | # everysec. 722 | # 723 | # More details please check the following article: 724 | # http://antirez.com/post/redis-persistence-demystified.html 725 | # 726 | # If unsure, use "everysec". 727 | 728 | # appendfsync always 729 | appendfsync everysec 730 | # appendfsync no 731 | 732 | # When the AOF fsync policy is set to always or everysec, and a background 733 | # saving process (a background save or AOF log background rewriting) is 734 | # performing a lot of I/O against the disk, in some Linux configurations 735 | # Redis may block too long on the fsync() call. Note that there is no fix for 736 | # this currently, as even performing fsync in a different thread will block 737 | # our synchronous write(2) call. 738 | # 739 | # In order to mitigate this problem it's possible to use the following option 740 | # that will prevent fsync() from being called in the main process while a 741 | # BGSAVE or BGREWRITEAOF is in progress. 742 | # 743 | # This means that while another child is saving, the durability of Redis is 744 | # the same as "appendfsync none". In practical terms, this means that it is 745 | # possible to lose up to 30 seconds of log in the worst scenario (with the 746 | # default Linux settings). 747 | # 748 | # If you have latency problems turn this to "yes". Otherwise leave it as 749 | # "no" that is the safest pick from the point of view of durability. 750 | 751 | no-appendfsync-on-rewrite no 752 | 753 | # Automatic rewrite of the append only file. 754 | # Redis is able to automatically rewrite the log file implicitly calling 755 | # BGREWRITEAOF when the AOF log size grows by the specified percentage. 756 | # 757 | # This is how it works: Redis remembers the size of the AOF file after the 758 | # latest rewrite (if no rewrite has happened since the restart, the size of 759 | # the AOF at startup is used). 760 | # 761 | # This base size is compared to the current size. If the current size is 762 | # bigger than the specified percentage, the rewrite is triggered. Also 763 | # you need to specify a minimal size for the AOF file to be rewritten, this 764 | # is useful to avoid rewriting the AOF file even if the percentage increase 765 | # is reached but it is still pretty small. 766 | # 767 | # Specify a percentage of zero in order to disable the automatic AOF 768 | # rewrite feature. 769 | 770 | auto-aof-rewrite-percentage 0 771 | auto-aof-rewrite-min-size 64mb 772 | 773 | # An AOF file may be found to be truncated at the end during the Redis 774 | # startup process, when the AOF data gets loaded back into memory. 775 | # This may happen when the system where Redis is running 776 | # crashes, especially when an ext4 filesystem is mounted without the 777 | # data=ordered option (however this can't happen when Redis itself 778 | # crashes or aborts but the operating system still works correctly). 779 | # 780 | # Redis can either exit with an error when this happens, or load as much 781 | # data as possible (the default now) and start if the AOF file is found 782 | # to be truncated at the end. The following option controls this behavior. 783 | # 784 | # If aof-load-truncated is set to yes, a truncated AOF file is loaded and 785 | # the Redis server starts emitting a log to inform the user of the event. 786 | # Otherwise if the option is set to no, the server aborts with an error 787 | # and refuses to start. When the option is set to no, the user requires 788 | # to fix the AOF file using the "redis-check-aof" utility before to restart 789 | # the server. 790 | # 791 | # Note that if the AOF file will be found to be corrupted in the middle 792 | # the server will still exit with an error. This option only applies when 793 | # Redis will try to read more data from the AOF file but not enough bytes 794 | # will be found. 795 | aof-load-truncated yes 796 | 797 | # When rewriting the AOF file, Redis is able to use an RDB preamble in the 798 | # AOF file for faster rewrites and recoveries. When this option is turned 799 | # on the rewritten AOF file is composed of two different stanzas: 800 | # 801 | # [RDB file][AOF tail] 802 | # 803 | # When loading Redis recognizes that the AOF file starts with the "REDIS" 804 | # string and loads the prefixed RDB file, and continues loading the AOF 805 | # tail. 806 | aof-use-rdb-preamble yes 807 | 808 | ################################ LUA SCRIPTING ############################### 809 | 810 | # Max execution time of a Lua script in milliseconds. 811 | # 812 | # If the maximum execution time is reached Redis will log that a script is 813 | # still in execution after the maximum allowed time and will start to 814 | # reply to queries with an error. 815 | # 816 | # When a long running script exceeds the maximum execution time only the 817 | # SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be 818 | # used to stop a script that did not yet called write commands. The second 819 | # is the only way to shut down the server in the case a write command was 820 | # already issued by the script but the user doesn't want to wait for the natural 821 | # termination of the script. 822 | # 823 | # Set it to 0 or a negative value for unlimited execution without warnings. 824 | lua-time-limit 5000 825 | 826 | ################################ REDIS CLUSTER ############################### 827 | # 828 | # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 829 | # WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however 830 | # in order to mark it as "mature" we need to wait for a non trivial percentage 831 | # of users to deploy it in production. 832 | # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 833 | # 834 | # Normal Redis instances can't be part of a Redis Cluster; only nodes that are 835 | # started as cluster nodes can. In order to start a Redis instance as a 836 | # cluster node enable the cluster support uncommenting the following: 837 | # 838 | # cluster-enabled yes 839 | 840 | # Every cluster node has a cluster configuration file. This file is not 841 | # intended to be edited by hand. It is created and updated by Redis nodes. 842 | # Every Redis Cluster node requires a different cluster configuration file. 843 | # Make sure that instances running in the same system do not have 844 | # overlapping cluster configuration file names. 845 | # 846 | # cluster-config-file nodes-6379.conf 847 | 848 | # Cluster node timeout is the amount of milliseconds a node must be unreachable 849 | # for it to be considered in failure state. 850 | # Most other internal time limits are multiple of the node timeout. 851 | # 852 | # cluster-node-timeout 15000 853 | 854 | # A replica of a failing master will avoid to start a failover if its data 855 | # looks too old. 856 | # 857 | # There is no simple way for a replica to actually have an exact measure of 858 | # its "data age", so the following two checks are performed: 859 | # 860 | # 1) If there are multiple replicas able to failover, they exchange messages 861 | # in order to try to give an advantage to the replica with the best 862 | # replication offset (more data from the master processed). 863 | # Replicas will try to get their rank by offset, and apply to the start 864 | # of the failover a delay proportional to their rank. 865 | # 866 | # 2) Every single replica computes the time of the last interaction with 867 | # its master. This can be the last ping or command received (if the master 868 | # is still in the "connected" state), or the time that elapsed since the 869 | # disconnection with the master (if the replication link is currently down). 870 | # If the last interaction is too old, the replica will not try to failover 871 | # at all. 872 | # 873 | # The point "2" can be tuned by user. Specifically a replica will not perform 874 | # the failover if, since the last interaction with the master, the time 875 | # elapsed is greater than: 876 | # 877 | # (node-timeout * replica-validity-factor) + repl-ping-replica-period 878 | # 879 | # So for example if node-timeout is 30 seconds, and the replica-validity-factor 880 | # is 10, and assuming a default repl-ping-replica-period of 10 seconds, the 881 | # replica will not try to failover if it was not able to talk with the master 882 | # for longer than 310 seconds. 883 | # 884 | # A large replica-validity-factor may allow replicas with too old data to failover 885 | # a master, while a too small value may prevent the cluster from being able to 886 | # elect a replica at all. 887 | # 888 | # For maximum availability, it is possible to set the replica-validity-factor 889 | # to a value of 0, which means, that replicas will always try to failover the 890 | # master regardless of the last time they interacted with the master. 891 | # (However they'll always try to apply a delay proportional to their 892 | # offset rank). 893 | # 894 | # Zero is the only value able to guarantee that when all the partitions heal 895 | # the cluster will always be able to continue. 896 | # 897 | # cluster-replica-validity-factor 10 898 | 899 | # Cluster replicas are able to migrate to orphaned masters, that are masters 900 | # that are left without working replicas. This improves the cluster ability 901 | # to resist to failures as otherwise an orphaned master can't be failed over 902 | # in case of failure if it has no working replicas. 903 | # 904 | # Replicas migrate to orphaned masters only if there are still at least a 905 | # given number of other working replicas for their old master. This number 906 | # is the "migration barrier". A migration barrier of 1 means that a replica 907 | # will migrate only if there is at least 1 other working replica for its master 908 | # and so forth. It usually reflects the number of replicas you want for every 909 | # master in your cluster. 910 | # 911 | # Default is 1 (replicas migrate only if their masters remain with at least 912 | # one replica). To disable migration just set it to a very large value. 913 | # A value of 0 can be set but is useful only for debugging and dangerous 914 | # in production. 915 | # 916 | # cluster-migration-barrier 1 917 | 918 | # By default Redis Cluster nodes stop accepting queries if they detect there 919 | # is at least an hash slot uncovered (no available node is serving it). 920 | # This way if the cluster is partially down (for example a range of hash slots 921 | # are no longer covered) all the cluster becomes, eventually, unavailable. 922 | # It automatically returns available as soon as all the slots are covered again. 923 | # 924 | # However sometimes you want the subset of the cluster which is working, 925 | # to continue to accept queries for the part of the key space that is still 926 | # covered. In order to do so, just set the cluster-require-full-coverage 927 | # option to no. 928 | # 929 | # cluster-require-full-coverage yes 930 | 931 | # This option, when set to yes, prevents replicas from trying to failover its 932 | # master during master failures. However the master can still perform a 933 | # manual failover, if forced to do so. 934 | # 935 | # This is useful in different scenarios, especially in the case of multiple 936 | # data center operations, where we want one side to never be promoted if not 937 | # in the case of a total DC failure. 938 | # 939 | # cluster-replica-no-failover no 940 | 941 | # In order to setup your cluster make sure to read the documentation 942 | # available at http://redis.io web site. 943 | 944 | ########################## CLUSTER DOCKER/NAT support ######################## 945 | 946 | # In certain deployments, Redis Cluster nodes address discovery fails, because 947 | # addresses are NAT-ted or because ports are forwarded (the typical case is 948 | # Docker and other containers). 949 | # 950 | # In order to make Redis Cluster working in such environments, a static 951 | # configuration where each node knows its public address is needed. The 952 | # following two options are used for this scope, and are: 953 | # 954 | # * cluster-announce-ip 955 | # * cluster-announce-port 956 | # * cluster-announce-bus-port 957 | # 958 | # Each instruct the node about its address, client port, and cluster message 959 | # bus port. The information is then published in the header of the bus packets 960 | # so that other nodes will be able to correctly map the address of the node 961 | # publishing the information. 962 | # 963 | # If the above options are not used, the normal Redis Cluster auto-detection 964 | # will be used instead. 965 | # 966 | # Note that when remapped, the bus port may not be at the fixed offset of 967 | # clients port + 10000, so you can specify any port and bus-port depending 968 | # on how they get remapped. If the bus-port is not set, a fixed offset of 969 | # 10000 will be used as usually. 970 | # 971 | # Example: 972 | # 973 | # cluster-announce-ip 10.1.1.5 974 | # cluster-announce-port 6379 975 | # cluster-announce-bus-port 6380 976 | 977 | ################################## SLOW LOG ################################### 978 | 979 | # The Redis Slow Log is a system to log queries that exceeded a specified 980 | # execution time. The execution time does not include the I/O operations 981 | # like talking with the client, sending the reply and so forth, 982 | # but just the time needed to actually execute the command (this is the only 983 | # stage of command execution where the thread is blocked and can not serve 984 | # other requests in the meantime). 985 | # 986 | # You can configure the slow log with two parameters: one tells Redis 987 | # what is the execution time, in microseconds, to exceed in order for the 988 | # command to get logged, and the other parameter is the length of the 989 | # slow log. When a new command is logged the oldest one is removed from the 990 | # queue of logged commands. 991 | 992 | # The following time is expressed in microseconds, so 1000000 is equivalent 993 | # to one second. Note that a negative number disables the slow log, while 994 | # a value of zero forces the logging of every command. 995 | slowlog-log-slower-than -1 996 | 997 | # There is no limit to this length. Just be aware that it will consume memory. 998 | # You can reclaim memory used by the slow log with SLOWLOG RESET. 999 | slowlog-max-len 128 1000 | 1001 | ################################ LATENCY MONITOR ############################## 1002 | 1003 | # The Redis latency monitoring subsystem samples different operations 1004 | # at runtime in order to collect data related to possible sources of 1005 | # latency of a Redis instance. 1006 | # 1007 | # Via the LATENCY command this information is available to the user that can 1008 | # print graphs and obtain reports. 1009 | # 1010 | # The system only logs operations that were performed in a time equal or 1011 | # greater than the amount of milliseconds specified via the 1012 | # latency-monitor-threshold configuration directive. When its value is set 1013 | # to zero, the latency monitor is turned off. 1014 | # 1015 | # By default latency monitoring is disabled since it is mostly not needed 1016 | # if you don't have latency issues, and collecting data has a performance 1017 | # impact, that while very small, can be measured under big load. Latency 1018 | # monitoring can easily be enabled at runtime using the command 1019 | # "CONFIG SET latency-monitor-threshold " if needed. 1020 | latency-monitor-threshold 0 1021 | 1022 | ############################# EVENT NOTIFICATION ############################## 1023 | 1024 | # Redis can notify Pub/Sub clients about events happening in the key space. 1025 | # This feature is documented at http://redis.io/topics/notifications 1026 | # 1027 | # For instance if keyspace events notification is enabled, and a client 1028 | # performs a DEL operation on key "foo" stored in the Database 0, two 1029 | # messages will be published via Pub/Sub: 1030 | # 1031 | # PUBLISH __keyspace@0__:foo del 1032 | # PUBLISH __keyevent@0__:del foo 1033 | # 1034 | # It is possible to select the events that Redis will notify among a set 1035 | # of classes. Every class is identified by a single character: 1036 | # 1037 | # K Keyspace events, published with __keyspace@__ prefix. 1038 | # E Keyevent events, published with __keyevent@__ prefix. 1039 | # g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... 1040 | # $ String commands 1041 | # l List commands 1042 | # s Set commands 1043 | # h Hash commands 1044 | # z Sorted set commands 1045 | # x Expired events (events generated every time a key expires) 1046 | # e Evicted events (events generated when a key is evicted for maxmemory) 1047 | # A Alias for g$lshzxe, so that the "AKE" string means all the events. 1048 | # 1049 | # The "notify-keyspace-events" takes as argument a string that is composed 1050 | # of zero or multiple characters. The empty string means that notifications 1051 | # are disabled. 1052 | # 1053 | # Example: to enable list and generic events, from the point of view of the 1054 | # event name, use: 1055 | # 1056 | # notify-keyspace-events Elg 1057 | # 1058 | # Example 2: to get the stream of the expired keys subscribing to channel 1059 | # name __keyevent@0__:expired use: 1060 | # 1061 | # notify-keyspace-events Ex 1062 | # 1063 | # By default all notifications are disabled because most users don't need 1064 | # this feature and the feature has some overhead. Note that if you don't 1065 | # specify at least one of K or E, no events will be delivered. 1066 | notify-keyspace-events "" 1067 | 1068 | ############################### ADVANCED CONFIG ############################### 1069 | 1070 | # Hashes are encoded using a memory efficient data structure when they have a 1071 | # small number of entries, and the biggest entry does not exceed a given 1072 | # threshold. These thresholds can be configured using the following directives. 1073 | hash-max-ziplist-entries 512 1074 | hash-max-ziplist-value 64 1075 | 1076 | # Lists are also encoded in a special way to save a lot of space. 1077 | # The number of entries allowed per internal list node can be specified 1078 | # as a fixed maximum size or a maximum number of elements. 1079 | # For a fixed maximum size, use -5 through -1, meaning: 1080 | # -5: max size: 64 Kb <-- not recommended for normal workloads 1081 | # -4: max size: 32 Kb <-- not recommended 1082 | # -3: max size: 16 Kb <-- probably not recommended 1083 | # -2: max size: 8 Kb <-- good 1084 | # -1: max size: 4 Kb <-- good 1085 | # Positive numbers mean store up to _exactly_ that number of elements 1086 | # per list node. 1087 | # The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size), 1088 | # but if your use case is unique, adjust the settings as necessary. 1089 | list-max-ziplist-size -2 1090 | 1091 | # Lists may also be compressed. 1092 | # Compress depth is the number of quicklist ziplist nodes from *each* side of 1093 | # the list to *exclude* from compression. The head and tail of the list 1094 | # are always uncompressed for fast push/pop operations. Settings are: 1095 | # 0: disable all list compression 1096 | # 1: depth 1 means "don't start compressing until after 1 node into the list, 1097 | # going from either the head or tail" 1098 | # So: [head]->node->node->...->node->[tail] 1099 | # [head], [tail] will always be uncompressed; inner nodes will compress. 1100 | # 2: [head]->[next]->node->node->...->node->[prev]->[tail] 1101 | # 2 here means: don't compress head or head->next or tail->prev or tail, 1102 | # but compress all nodes between them. 1103 | # 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail] 1104 | # etc. 1105 | list-compress-depth 0 1106 | 1107 | # Sets have a special encoding in just one case: when a set is composed 1108 | # of just strings that happen to be integers in radix 10 in the range 1109 | # of 64 bit signed integers. 1110 | # The following configuration setting sets the limit in the size of the 1111 | # set in order to use this special memory saving encoding. 1112 | set-max-intset-entries 512 1113 | 1114 | # Similarly to hashes and lists, sorted sets are also specially encoded in 1115 | # order to save a lot of space. This encoding is only used when the length and 1116 | # elements of a sorted set are below the following limits: 1117 | zset-max-ziplist-entries 128 1118 | zset-max-ziplist-value 64 1119 | 1120 | # HyperLogLog sparse representation bytes limit. The limit includes the 1121 | # 16 bytes header. When an HyperLogLog using the sparse representation crosses 1122 | # this limit, it is converted into the dense representation. 1123 | # 1124 | # A value greater than 16000 is totally useless, since at that point the 1125 | # dense representation is more memory efficient. 1126 | # 1127 | # The suggested value is ~ 3000 in order to have the benefits of 1128 | # the space efficient encoding without slowing down too much PFADD, 1129 | # which is O(N) with the sparse encoding. The value can be raised to 1130 | # ~ 10000 when CPU is not a concern, but space is, and the data set is 1131 | # composed of many HyperLogLogs with cardinality in the 0 - 15000 range. 1132 | hll-sparse-max-bytes 3000 1133 | 1134 | # Streams macro node max size / items. The stream data structure is a radix 1135 | # tree of big nodes that encode multiple items inside. Using this configuration 1136 | # it is possible to configure how big a single node can be in bytes, and the 1137 | # maximum number of items it may contain before switching to a new node when 1138 | # appending new stream entries. If any of the following settings are set to 1139 | # zero, the limit is ignored, so for instance it is possible to set just a 1140 | # max entires limit by setting max-bytes to 0 and max-entries to the desired 1141 | # value. 1142 | stream-node-max-bytes 4096 1143 | stream-node-max-entries 100 1144 | 1145 | # Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in 1146 | # order to help rehashing the main Redis hash table (the one mapping top-level 1147 | # keys to values). The hash table implementation Redis uses (see dict.c) 1148 | # performs a lazy rehashing: the more operation you run into a hash table 1149 | # that is rehashing, the more rehashing "steps" are performed, so if the 1150 | # server is idle the rehashing is never complete and some more memory is used 1151 | # by the hash table. 1152 | # 1153 | # The default is to use this millisecond 10 times every second in order to 1154 | # actively rehash the main dictionaries, freeing memory when possible. 1155 | # 1156 | # If unsure: 1157 | # use "activerehashing no" if you have hard latency requirements and it is 1158 | # not a good thing in your environment that Redis can reply from time to time 1159 | # to queries with 2 milliseconds delay. 1160 | # 1161 | # use "activerehashing yes" if you don't have such hard requirements but 1162 | # want to free memory asap when possible. 1163 | activerehashing yes 1164 | 1165 | # The client output buffer limits can be used to force disconnection of clients 1166 | # that are not reading data from the server fast enough for some reason (a 1167 | # common reason is that a Pub/Sub client can't consume messages as fast as the 1168 | # publisher can produce them). 1169 | # 1170 | # The limit can be set differently for the three different classes of clients: 1171 | # 1172 | # normal -> normal clients including MONITOR clients 1173 | # replica -> replica clients 1174 | # pubsub -> clients subscribed to at least one pubsub channel or pattern 1175 | # 1176 | # The syntax of every client-output-buffer-limit directive is the following: 1177 | # 1178 | # client-output-buffer-limit 1179 | # 1180 | # A client is immediately disconnected once the hard limit is reached, or if 1181 | # the soft limit is reached and remains reached for the specified number of 1182 | # seconds (continuously). 1183 | # So for instance if the hard limit is 32 megabytes and the soft limit is 1184 | # 16 megabytes / 10 seconds, the client will get disconnected immediately 1185 | # if the size of the output buffers reach 32 megabytes, but will also get 1186 | # disconnected if the client reaches 16 megabytes and continuously overcomes 1187 | # the limit for 10 seconds. 1188 | # 1189 | # By default normal clients are not limited because they don't receive data 1190 | # without asking (in a push way), but just after a request, so only 1191 | # asynchronous clients may create a scenario where data is requested faster 1192 | # than it can read. 1193 | # 1194 | # Instead there is a default limit for pubsub and replica clients, since 1195 | # subscribers and replicas receive data in a push fashion. 1196 | # 1197 | # Both the hard or the soft limit can be disabled by setting them to zero. 1198 | client-output-buffer-limit normal 0 0 0 1199 | client-output-buffer-limit replica 1gb 512mb 60 1200 | client-output-buffer-limit pubsub 4gb 3gb 60 1201 | 1202 | # Client query buffers accumulate new commands. They are limited to a fixed 1203 | # amount by default in order to avoid that a protocol desynchronization (for 1204 | # instance due to a bug in the client) will lead to unbound memory usage in 1205 | # the query buffer. However you can configure it here if you have very special 1206 | # needs, such us huge multi/exec requests or alike. 1207 | # 1208 | # client-query-buffer-limit 1gb 1209 | 1210 | # In the Redis protocol, bulk requests, that are, elements representing single 1211 | # strings, are normally limited ot 512 mb. However you can change this limit 1212 | # here. 1213 | # 1214 | # proto-max-bulk-len 512mb 1215 | 1216 | # Redis calls an internal function to perform many background tasks, like 1217 | # closing connections of clients in timeout, purging expired keys that are 1218 | # never requested, and so forth. 1219 | # 1220 | # Not all tasks are performed with the same frequency, but Redis checks for 1221 | # tasks to perform according to the specified "hz" value. 1222 | # 1223 | # By default "hz" is set to 10. Raising the value will use more CPU when 1224 | # Redis is idle, but at the same time will make Redis more responsive when 1225 | # there are many keys expiring at the same time, and timeouts may be 1226 | # handled with more precision. 1227 | # 1228 | # The range is between 1 and 500, however a value over 100 is usually not 1229 | # a good idea. Most users should use the default of 10 and raise this up to 1230 | # 100 only in environments where very low latency is required. 1231 | hz 10 1232 | 1233 | # Normally it is useful to have an HZ value which is proportional to the 1234 | # number of clients connected. This is useful in order, for instance, to 1235 | # avoid too many clients are processed for each background task invocation 1236 | # in order to avoid latency spikes. 1237 | # 1238 | # Since the default HZ value by default is conservatively set to 10, Redis 1239 | # offers, and enables by default, the ability to use an adaptive HZ value 1240 | # which will temporary raise when there are many connected clients. 1241 | # 1242 | # When dynamic HZ is enabled, the actual configured HZ will be used as 1243 | # as a baseline, but multiples of the configured HZ value will be actually 1244 | # used as needed once more clients are connected. In this way an idle 1245 | # instance will use very little CPU time while a busy instance will be 1246 | # more responsive. 1247 | dynamic-hz yes 1248 | 1249 | # When a child rewrites the AOF file, if the following option is enabled 1250 | # the file will be fsync-ed every 32 MB of data generated. This is useful 1251 | # in order to commit the file to the disk more incrementally and avoid 1252 | # big latency spikes. 1253 | aof-rewrite-incremental-fsync yes 1254 | 1255 | # When redis saves RDB file, if the following option is enabled 1256 | # the file will be fsync-ed every 32 MB of data generated. This is useful 1257 | # in order to commit the file to the disk more incrementally and avoid 1258 | # big latency spikes. 1259 | rdb-save-incremental-fsync yes 1260 | 1261 | # Redis LFU eviction (see maxmemory setting) can be tuned. However it is a good 1262 | # idea to start with the default settings and only change them after investigating 1263 | # how to improve the performances and how the keys LFU change over time, which 1264 | # is possible to inspect via the OBJECT FREQ command. 1265 | # 1266 | # There are two tunable parameters in the Redis LFU implementation: the 1267 | # counter logarithm factor and the counter decay time. It is important to 1268 | # understand what the two parameters mean before changing them. 1269 | # 1270 | # The LFU counter is just 8 bits per key, it's maximum value is 255, so Redis 1271 | # uses a probabilistic increment with logarithmic behavior. Given the value 1272 | # of the old counter, when a key is accessed, the counter is incremented in 1273 | # this way: 1274 | # 1275 | # 1. A random number R between 0 and 1 is extracted. 1276 | # 2. A probability P is calculated as 1/(old_value*lfu_log_factor+1). 1277 | # 3. The counter is incremented only if R < P. 1278 | # 1279 | # The default lfu-log-factor is 10. This is a table of how the frequency 1280 | # counter changes with a different number of accesses with different 1281 | # logarithmic factors: 1282 | # 1283 | # +--------+------------+------------+------------+------------+------------+ 1284 | # | factor | 100 hits | 1000 hits | 100K hits | 1M hits | 10M hits | 1285 | # +--------+------------+------------+------------+------------+------------+ 1286 | # | 0 | 104 | 255 | 255 | 255 | 255 | 1287 | # +--------+------------+------------+------------+------------+------------+ 1288 | # | 1 | 18 | 49 | 255 | 255 | 255 | 1289 | # +--------+------------+------------+------------+------------+------------+ 1290 | # | 10 | 10 | 18 | 142 | 255 | 255 | 1291 | # +--------+------------+------------+------------+------------+------------+ 1292 | # | 100 | 8 | 11 | 49 | 143 | 255 | 1293 | # +--------+------------+------------+------------+------------+------------+ 1294 | # 1295 | # NOTE: The above table was obtained by running the following commands: 1296 | # 1297 | # redis-benchmark -n 1000000 incr foo 1298 | # redis-cli object freq foo 1299 | # 1300 | # NOTE 2: The counter initial value is 5 in order to give new objects a chance 1301 | # to accumulate hits. 1302 | # 1303 | # The counter decay time is the time, in minutes, that must elapse in order 1304 | # for the key counter to be divided by two (or decremented if it has a value 1305 | # less <= 10). 1306 | # 1307 | # The default value for the lfu-decay-time is 1. A Special value of 0 means to 1308 | # decay the counter every time it happens to be scanned. 1309 | # 1310 | # lfu-log-factor 10 1311 | # lfu-decay-time 1 1312 | 1313 | ########################### ACTIVE DEFRAGMENTATION ####################### 1314 | # 1315 | # WARNING THIS FEATURE IS EXPERIMENTAL. However it was stress tested 1316 | # even in production and manually tested by multiple engineers for some 1317 | # time. 1318 | # 1319 | # What is active defragmentation? 1320 | # ------------------------------- 1321 | # 1322 | # Active (online) defragmentation allows a Redis server to compact the 1323 | # spaces left between small allocations and deallocations of data in memory, 1324 | # thus allowing to reclaim back memory. 1325 | # 1326 | # Fragmentation is a natural process that happens with every allocator (but 1327 | # less so with Jemalloc, fortunately) and certain workloads. Normally a server 1328 | # restart is needed in order to lower the fragmentation, or at least to flush 1329 | # away all the data and create it again. However thanks to this feature 1330 | # implemented by Oran Agra for Redis 4.0 this process can happen at runtime 1331 | # in an "hot" way, while the server is running. 1332 | # 1333 | # Basically when the fragmentation is over a certain level (see the 1334 | # configuration options below) Redis will start to create new copies of the 1335 | # values in contiguous memory regions by exploiting certain specific Jemalloc 1336 | # features (in order to understand if an allocation is causing fragmentation 1337 | # and to allocate it in a better place), and at the same time, will release the 1338 | # old copies of the data. This process, repeated incrementally for all the keys 1339 | # will cause the fragmentation to drop back to normal values. 1340 | # 1341 | # Important things to understand: 1342 | # 1343 | # 1. This feature is disabled by default, and only works if you compiled Redis 1344 | # to use the copy of Jemalloc we ship with the source code of Redis. 1345 | # This is the default with Linux builds. 1346 | # 1347 | # 2. You never need to enable this feature if you don't have fragmentation 1348 | # issues. 1349 | # 1350 | # 3. Once you experience fragmentation, you can enable this feature when 1351 | # needed with the command "CONFIG SET activedefrag yes". 1352 | # 1353 | # The configuration parameters are able to fine tune the behavior of the 1354 | # defragmentation process. If you are not sure about what they mean it is 1355 | # a good idea to leave the defaults untouched. 1356 | 1357 | # Enabled active defragmentation 1358 | # activedefrag yes 1359 | 1360 | # Minimum amount of fragmentation waste to start active defrag 1361 | # active-defrag-ignore-bytes 100mb 1362 | 1363 | # Minimum percentage of fragmentation to start active defrag 1364 | # active-defrag-threshold-lower 10 1365 | 1366 | # Maximum percentage of fragmentation at which we use maximum effort 1367 | # active-defrag-threshold-upper 100 1368 | 1369 | # Minimal effort for defrag in CPU percentage 1370 | # active-defrag-cycle-min 5 1371 | 1372 | # Maximal effort for defrag in CPU percentage 1373 | # active-defrag-cycle-max 75 1374 | 1375 | # Maximum number of set/hash/zset/list fields that will be processed from 1376 | # the main dictionary scan 1377 | # active-defrag-max-scan-fields 1000 1378 | 1379 | -------------------------------------------------------------------------------- /files/rl_coach.patch: -------------------------------------------------------------------------------- 1 | diff --git a/rl_coach/agents/actor_critic_agent.py b/rl_coach/agents/actor_critic_agent.py 2 | index 35c8bf9..4f3ce60 100644 3 | --- a/rl_coach/agents/actor_critic_agent.py 4 | +++ b/rl_coach/agents/actor_critic_agent.py 5 | @@ -94,11 +94,14 @@ class ActorCriticAgentParameters(AgentParameters): 6 | class ActorCriticAgent(PolicyOptimizationAgent): 7 | def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None): 8 | super().__init__(agent_parameters, parent) 9 | + print("[RL] ActorCriticAgent init") 10 | self.last_gradient_update_step_idx = 0 11 | self.action_advantages = self.register_signal('Advantages') 12 | self.state_values = self.register_signal('Values') 13 | self.value_loss = self.register_signal('Value Loss') 14 | self.policy_loss = self.register_signal('Policy Loss') 15 | + print("[RL] ActorCriticAgent init successful") 16 | + 17 | 18 | # Discounting function used to calculate discounted returns. 19 | def discount(self, x, gamma): 20 | diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py 21 | index 866fe8a..cf0873a 100644 22 | --- a/rl_coach/agents/agent.py 23 | +++ b/rl_coach/agents/agent.py 24 | @@ -28,6 +28,8 @@ from rl_coach.base_parameters import AgentParameters, Device, DeviceType, Distri 25 | from rl_coach.core_types import RunPhase, PredictionType, EnvironmentEpisodes, ActionType, Batch, Episode, StateType 26 | from rl_coach.core_types import Transition, ActionInfo, TrainingSteps, EnvironmentSteps, EnvResponse 27 | from rl_coach.logger import screen, Logger, EpisodeLogger 28 | +from rl_coach.memories.memory import Memory 29 | +from rl_coach.memories.non_episodic.experience_replay import ExperienceReplay 30 | from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay 31 | from rl_coach.saver import SaverCollection 32 | from rl_coach.spaces import SpacesDefinition, VectorObservationSpace, GoalsSpace, AttentionActionSpace 33 | @@ -74,7 +76,7 @@ class Agent(AgentInterface): 34 | self.imitation = False 35 | self.agent_logger = Logger() 36 | self.agent_episode_logger = EpisodeLogger() 37 | - 38 | + print("[RL] Created agent loggers") 39 | # get the memory 40 | # - distributed training + shared memory: 41 | # * is chief? -> create the memory and add it to the scratchpad 42 | @@ -84,22 +86,30 @@ class Agent(AgentInterface): 43 | memory_name = self.ap.memory.path.split(':')[1] 44 | self.memory_lookup_name = self.full_name_id + '.' + memory_name 45 | if self.shared_memory and not self.is_chief: 46 | + print("[RL] Creating shared memory") 47 | self.memory = self.shared_memory_scratchpad.get(self.memory_lookup_name) 48 | else: 49 | + print("[RL] Dynamic import of memory: ", self.ap.memory) 50 | # modules 51 | self.memory = dynamic_import_and_instantiate_module_from_params(self.ap.memory) 52 | + print("[RL] Dynamically imported of memory", self.memory) 53 | 54 | if hasattr(self.ap.memory, 'memory_backend_params'): 55 | + print("[RL] Getting memory backend", self.ap.memory.memory_backend_params) 56 | self.memory_backend = get_memory_backend(self.ap.memory.memory_backend_params) 57 | + print("[RL] Memory backend", self.memory_backend) 58 | 59 | if self.ap.memory.memory_backend_params.run_type != 'trainer': 60 | + print("[RL] Setting memory backend", self.memory_backend) 61 | self.memory.set_memory_backend(self.memory_backend) 62 | 63 | if self.shared_memory and self.is_chief: 64 | + print("[RL] Shared memory scratchpad") 65 | self.shared_memory_scratchpad.add(self.memory_lookup_name, self.memory) 66 | 67 | # set devices 68 | if type(agent_parameters.task_parameters) == DistributedTaskParameters: 69 | + print("[RL] Setting distributed devices") 70 | self.has_global = True 71 | self.replicated_device = agent_parameters.task_parameters.device 72 | self.worker_device = "/job:worker/task:{}".format(self.task_id) 73 | @@ -108,6 +118,7 @@ class Agent(AgentInterface): 74 | else: 75 | self.worker_device += "/device:GPU:0" 76 | else: 77 | + print("[RL] Setting devices") 78 | self.has_global = False 79 | self.replicated_device = None 80 | if agent_parameters.task_parameters.use_cpu: 81 | @@ -115,7 +126,7 @@ class Agent(AgentInterface): 82 | else: 83 | self.worker_device = [Device(DeviceType.GPU, i) 84 | for i in range(agent_parameters.task_parameters.num_gpu)] 85 | - 86 | + print("[RL] Setting filters") 87 | # filters 88 | self.input_filter = self.ap.input_filter 89 | self.input_filter.set_name('input_filter') 90 | @@ -134,21 +145,26 @@ class Agent(AgentInterface): 91 | # 3. Single worker (=both TF and Mxnet) - no data sharing needed + numpy arithmetic backend 92 | 93 | if hasattr(self.ap.memory, 'memory_backend_params') and self.ap.algorithm.distributed_coach_synchronization_type: 94 | + print("[RL] Setting filter devices: distributed") 95 | self.input_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params, mode='numpy') 96 | self.output_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params, mode='numpy') 97 | self.pre_network_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params, mode='numpy') 98 | elif (type(agent_parameters.task_parameters) == DistributedTaskParameters and 99 | agent_parameters.task_parameters.framework_type == Frameworks.tensorflow): 100 | + print("[RL] Setting filter devices: tf") 101 | self.input_filter.set_device(device, mode='tf') 102 | self.output_filter.set_device(device, mode='tf') 103 | self.pre_network_filter.set_device(device, mode='tf') 104 | else: 105 | + print("[RL] Setting filter devices: numpy") 106 | self.input_filter.set_device(device, mode='numpy') 107 | self.output_filter.set_device(device, mode='numpy') 108 | self.pre_network_filter.set_device(device, mode='numpy') 109 | 110 | # initialize all internal variables 111 | + print("[RL] Setting Phase") 112 | self._phase = RunPhase.HEATUP 113 | + print("[RL] After setting Phase") 114 | self.total_shaped_reward_in_current_episode = 0 115 | self.total_reward_in_current_episode = 0 116 | self.total_steps_counter = 0 117 | @@ -180,7 +196,7 @@ class Agent(AgentInterface): 118 | # environment parameters 119 | self.spaces = None 120 | self.in_action_space = self.ap.algorithm.in_action_space 121 | - 122 | + print("[RL] Setting signals") 123 | # signals 124 | self.episode_signals = [] 125 | self.step_signals = [] 126 | @@ -195,6 +211,8 @@ class Agent(AgentInterface): 127 | 128 | # batch rl 129 | self.ope_manager = OpeManager() if self.ap.is_batch_rl_training else None 130 | + print("[RL] Agent init successful") 131 | + 132 | 133 | @property 134 | def parent(self) -> 'LevelManager': 135 | @@ -572,7 +590,8 @@ class Agent(AgentInterface): 136 | self.current_episode += 1 137 | 138 | if self.phase != RunPhase.TEST: 139 | - if isinstance(self.memory, EpisodicExperienceReplay): 140 | + if isinstance(self.memory, EpisodicExperienceReplay) or \ 141 | + (isinstance(self.memory, Memory) and not isinstance(self.memory, ExperienceReplay)): 142 | self.call_memory('store_episode', self.current_episode_buffer) 143 | elif self.ap.algorithm.store_transitions_only_when_episodes_are_terminated: 144 | for transition in self.current_episode_buffer.transitions: 145 | @@ -618,7 +637,8 @@ class Agent(AgentInterface): 146 | self.input_filter.reset() 147 | self.output_filter.reset() 148 | self.pre_network_filter.reset() 149 | - if isinstance(self.memory, EpisodicExperienceReplay): 150 | + if isinstance(self.memory, EpisodicExperienceReplay) or \ 151 | + (isinstance(self.memory, Memory) and not isinstance(self.memory, ExperienceReplay)): 152 | self.call_memory('verify_last_episode_is_closed') 153 | 154 | for network in self.networks.values(): 155 | @@ -953,7 +973,7 @@ class Agent(AgentInterface): 156 | # for episodic memories we keep the transitions in a local buffer until the episode is ended. 157 | # for regular memories we insert the transitions directly to the memory 158 | self.current_episode_buffer.insert(transition) 159 | - if not isinstance(self.memory, EpisodicExperienceReplay) \ 160 | + if isinstance(self.memory, ExperienceReplay) \ 161 | and not self.ap.algorithm.store_transitions_only_when_episodes_are_terminated: 162 | self.call_memory('store', transition) 163 | 164 | diff --git a/rl_coach/agents/clipped_ppo_agent.py b/rl_coach/agents/clipped_ppo_agent.py 165 | index cc29f33..4f1a7d9 100644 166 | --- a/rl_coach/agents/clipped_ppo_agent.py 167 | +++ b/rl_coach/agents/clipped_ppo_agent.py 168 | @@ -182,7 +182,7 @@ class ClippedPPOAgent(ActorCriticAgent): 169 | screen.warning("WARNING: The requested policy gradient rescaler is not available") 170 | 171 | # standardize 172 | - advantages = (advantages - np.mean(advantages)) / np.std(advantages) 173 | + advantages = (advantages - np.mean(advantages)) / (np.std(advantages) + 1e-8) 174 | 175 | for transition, advantage, value_target in zip(batch.transitions, advantages, value_targets): 176 | transition.info['advantage'] = advantage 177 | diff --git a/rl_coach/architectures/architecture.py b/rl_coach/architectures/architecture.py 178 | index 90dbd6e..8d457a9 100644 179 | --- a/rl_coach/architectures/architecture.py 180 | +++ b/rl_coach/architectures/architecture.py 181 | @@ -46,8 +46,9 @@ class Architecture(object): 182 | """ 183 | self.spaces = spaces 184 | self.name = name 185 | - self.network_wrapper_name = self.name.split('/')[0] # e.g. 'main/online' --> 'main' 186 | - self.full_name = "{}/{}".format(agent_parameters.full_name_id, name) 187 | + self.network_wrapper_name = self.name.split('/')[1] # e.g. 'main/online' --> 'main' 188 | + self.full_name = "{}/{}".format(agent_parameters.full_name_id, '/'.join(name.split('/')[1:])) 189 | + # self.full_name = "{}/{}".format(agent_parameters.full_name_id, name) 190 | self.network_parameters = agent_parameters.network_wrappers[self.network_wrapper_name] 191 | self.batch_size = self.network_parameters.batch_size 192 | self.learning_rate = self.network_parameters.learning_rate 193 | diff --git a/rl_coach/architectures/network_wrapper.py b/rl_coach/architectures/network_wrapper.py 194 | index dfefc41..a31dbf4 100644 195 | --- a/rl_coach/architectures/network_wrapper.py 196 | +++ b/rl_coach/architectures/network_wrapper.py 197 | @@ -68,7 +68,7 @@ class NetworkWrapper(object): 198 | self.global_network = general_network(variable_scope=variable_scope, 199 | devices=force_list(replicated_device), 200 | agent_parameters=agent_parameters, 201 | - name='{}/global'.format(name), 202 | + name='{}/{}/global'.format(agent_parameters.name, name), 203 | global_network=None, 204 | network_is_local=False, 205 | spaces=spaces, 206 | @@ -79,7 +79,7 @@ class NetworkWrapper(object): 207 | self.online_network = general_network(variable_scope=variable_scope, 208 | devices=force_list(worker_device), 209 | agent_parameters=agent_parameters, 210 | - name='{}/online'.format(name), 211 | + name='{}/{}/online'.format(agent_parameters.name,name), 212 | global_network=self.global_network, 213 | network_is_local=True, 214 | spaces=spaces, 215 | @@ -91,7 +91,7 @@ class NetworkWrapper(object): 216 | self.target_network = general_network(variable_scope=variable_scope, 217 | devices=force_list(worker_device), 218 | agent_parameters=agent_parameters, 219 | - name='{}/target'.format(name), 220 | + name='{}/{}/target'.format(agent_parameters.name, name), 221 | global_network=self.global_network, 222 | network_is_local=True, 223 | spaces=spaces, 224 | diff --git a/rl_coach/architectures/tensorflow_components/architecture.py b/rl_coach/architectures/tensorflow_components/architecture.py 225 | index 68420fe..f847d8a 100644 226 | --- a/rl_coach/architectures/tensorflow_components/architecture.py 227 | +++ b/rl_coach/architectures/tensorflow_components/architecture.py 228 | @@ -28,21 +28,21 @@ from rl_coach.saver import SaverCollection 229 | from rl_coach.spaces import SpacesDefinition 230 | from rl_coach.utils import force_list, squeeze_list, start_shell_command_and_wait 231 | 232 | - 233 | +tf.compat.v1.disable_resource_variables() 234 | def variable_summaries(var): 235 | """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" 236 | - with tf.name_scope('summaries'): 237 | + with tf.compat.v1.name_scope('summaries'): 238 | layer_weight_name = '_'.join(var.name.split('/')[-3:])[:-2] 239 | 240 | - with tf.name_scope(layer_weight_name): 241 | + with tf.compat.v1.name_scope(layer_weight_name): 242 | mean = tf.reduce_mean(var) 243 | - tf.summary.scalar('mean', mean) 244 | - with tf.name_scope('stddev'): 245 | + tf.compat.v1.summary.scalar('mean', mean) 246 | + with tf.compat.v1.name_scope('stddev'): 247 | stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) 248 | - tf.summary.scalar('stddev', stddev) 249 | - tf.summary.scalar('max', tf.reduce_max(var)) 250 | - tf.summary.scalar('min', tf.reduce_min(var)) 251 | - tf.summary.histogram('histogram', var) 252 | + tf.compat.v1.summary.scalar('stddev', stddev) 253 | + tf.compat.v1.summary.scalar('max', tf.reduce_max(var)) 254 | + tf.compat.v1.summary.scalar('min', tf.reduce_min(var)) 255 | + tf.compat.v1.summary.histogram('histogram', var) 256 | 257 | 258 | def local_getter(getter, name, *args, **kwargs): 259 | @@ -52,7 +52,7 @@ def local_getter(getter, name, *args, **kwargs): 260 | between workers. these variables are also assumed to be non-trainable (the optimizer does not apply gradients to 261 | these variables), but we can calculate the gradients wrt these variables, and we can update their content. 262 | """ 263 | - kwargs['collections'] = [tf.GraphKeys.LOCAL_VARIABLES] 264 | + kwargs['collections'] = [tf.compat.v1.GraphKeys.LOCAL_VARIABLES] 265 | return getter(name, *args, **kwargs) 266 | 267 | 268 | @@ -96,17 +96,17 @@ class TensorFlowArchitecture(Architecture): 269 | 270 | self.optimizer_type = self.network_parameters.optimizer_type 271 | if self.ap.task_parameters.seed is not None: 272 | - tf.set_random_seed(self.ap.task_parameters.seed) 273 | - with tf.variable_scope("/".join(self.name.split("/")[1:]), initializer=tf.contrib.layers.xavier_initializer(), 274 | + tf.compat.v1.set_random_seed(self.ap.task_parameters.seed) 275 | + with tf.compat.v1.variable_scope("/".join(self.name.split("/")[2:]), initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), 276 | custom_getter=local_getter if network_is_local and global_network else None): 277 | - self.global_step = tf.train.get_or_create_global_step() 278 | + self.global_step = tf.compat.v1.train.get_or_create_global_step() 279 | 280 | # build the network 281 | self.weights = self.get_model() 282 | 283 | # create the placeholder for the assigning gradients and some tensorboard summaries for the weights 284 | for idx, var in enumerate(self.weights): 285 | - placeholder = tf.placeholder(tf.float32, shape=var.get_shape(), name=str(idx) + '_holder') 286 | + placeholder = tf.compat.v1.placeholder(tf.float32, shape=var.get_shape(), name=str(idx) + '_holder') 287 | self.weights_placeholders.append(placeholder) 288 | if self.ap.visualization.tensorboard: 289 | variable_summaries(var) 290 | @@ -128,14 +128,14 @@ class TensorFlowArchitecture(Architecture): 291 | self.reset_internal_memory() 292 | 293 | if self.ap.visualization.tensorboard: 294 | - current_scope_summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, 295 | - scope=tf.contrib.framework.get_name_scope()) 296 | - self.merged = tf.summary.merge(current_scope_summaries) 297 | + current_scope_summaries = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.SUMMARIES, 298 | + scope=tf.get_current_name_scope()) 299 | + self.merged = tf.compat.v1.summary.merge(current_scope_summaries) 300 | 301 | # initialize or restore model 302 | self.init_op = tf.group( 303 | - tf.global_variables_initializer(), 304 | - tf.local_variables_initializer() 305 | + tf.compat.v1.global_variables_initializer(), 306 | + tf.compat.v1.local_variables_initializer() 307 | ) 308 | 309 | # set the fetches for training 310 | @@ -171,14 +171,14 @@ class TensorFlowArchitecture(Architecture): 311 | Create locks for synchronizing the different workers during training 312 | :return: None 313 | """ 314 | - self.lock_counter = tf.get_variable("lock_counter", [], tf.int32, 315 | - initializer=tf.constant_initializer(0, dtype=tf.int32), 316 | + self.lock_counter = tf.compat.v1.get_variable("lock_counter", [], tf.int32, 317 | + initializer=tf.compat.v1.constant_initializer(0, dtype=tf.int32), 318 | trainable=False) 319 | self.lock = self.lock_counter.assign_add(1, use_locking=True) 320 | self.lock_init = self.lock_counter.assign(0) 321 | 322 | - self.release_counter = tf.get_variable("release_counter", [], tf.int32, 323 | - initializer=tf.constant_initializer(0, dtype=tf.int32), 324 | + self.release_counter = tf.compat.v1.get_variable("release_counter", [], tf.int32, 325 | + initializer=tf.compat.v1.constant_initializer(0, dtype=tf.int32), 326 | trainable=False) 327 | self.release = self.release_counter.assign_add(1, use_locking=True) 328 | self.release_decrement = self.release_counter.assign_add(-1, use_locking=True) 329 | @@ -191,7 +191,7 @@ class TensorFlowArchitecture(Architecture): 330 | """ 331 | 332 | self.tensor_gradients = tf.gradients(self.total_loss, self.weights) 333 | - self.gradients_norm = tf.global_norm(self.tensor_gradients) 334 | + self.gradients_norm = tf.linalg.global_norm(self.tensor_gradients) 335 | 336 | # gradient clipping 337 | if self.network_parameters.clip_gradients is not None and self.network_parameters.clip_gradients != 0: 338 | @@ -205,7 +205,7 @@ class TensorFlowArchitecture(Architecture): 339 | # gradients of the outputs w.r.t. the inputs 340 | self.gradients_wrt_inputs = [{name: tf.gradients(output, input_ph) for name, input_ph in 341 | self.inputs.items()} for output in self.outputs] 342 | - self.gradients_weights_ph = [tf.placeholder('float32', self.outputs[i].shape, 'output_gradient_weights') 343 | + self.gradients_weights_ph = [tf.compat.v1.placeholder('float32', self.outputs[i].shape, 'output_gradient_weights') 344 | for i in range(len(self.outputs))] 345 | self.weighted_gradients = [] 346 | for i in range(len(self.outputs)): 347 | @@ -270,7 +270,7 @@ class TensorFlowArchitecture(Architecture): 348 | elif self.network_is_trainable: 349 | # not any of the above but is trainable? -> create an operation for applying the gradients to 350 | # this network weights 351 | - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=self.full_name) 352 | + update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS, scope=self.full_name) 353 | 354 | with tf.control_dependencies(update_ops): 355 | self.update_weights_from_batch_gradients = self.optimizer.apply_gradients( 356 | @@ -288,10 +288,10 @@ class TensorFlowArchitecture(Architecture): 357 | if self.ap.visualization.tensorboard: 358 | # Write the merged summaries to the current experiment directory 359 | if not task_is_distributed: 360 | - self.train_writer = tf.summary.FileWriter(self.ap.task_parameters.experiment_path + '/tensorboard') 361 | + self.train_writer = tf.compat.v1.summary.FileWriter(self.ap.task_parameters.experiment_path + '/tensorboard') 362 | self.train_writer.add_graph(self.sess.graph) 363 | elif self.network_is_local: 364 | - self.train_writer = tf.summary.FileWriter(self.ap.task_parameters.experiment_path + 365 | + self.train_writer = tf.compat.v1.summary.FileWriter(self.ap.task_parameters.experiment_path + 366 | '/tensorboard/worker{}'.format(self.ap.task_parameters.task_index)) 367 | self.train_writer.add_graph(self.sess.graph) 368 | 369 | diff --git a/rl_coach/architectures/tensorflow_components/distributed_tf_utils.py b/rl_coach/architectures/tensorflow_components/distributed_tf_utils.py 370 | index bbbbc0f..39403a6 100644 371 | --- a/rl_coach/architectures/tensorflow_components/distributed_tf_utils.py 372 | +++ b/rl_coach/architectures/tensorflow_components/distributed_tf_utils.py 373 | @@ -36,7 +36,7 @@ def create_cluster_spec(parameters_server: str, workers: str) -> tf.train.Cluste 374 | return cluster_spec 375 | 376 | 377 | -def create_and_start_parameters_server(cluster_spec: tf.train.ClusterSpec, config: tf.ConfigProto=None) -> None: 378 | +def create_and_start_parameters_server(cluster_spec: tf.train.ClusterSpec, config: tf.compat.v1.ConfigProto=None) -> None: 379 | """ 380 | Create and start a parameter server 381 | :param cluster_spec: the ClusterSpec object representing the cluster 382 | @@ -44,14 +44,14 @@ def create_and_start_parameters_server(cluster_spec: tf.train.ClusterSpec, confi 383 | :return: None 384 | """ 385 | # create a server object for the parameter server 386 | - server = tf.train.Server(cluster_spec, job_name="ps", task_index=0, config=config) 387 | + server = tf.distribute.Server(cluster_spec, job_name="ps", task_index=0, config=config) 388 | 389 | # wait for the server to finish 390 | server.join() 391 | 392 | 393 | def create_worker_server_and_device(cluster_spec: tf.train.ClusterSpec, task_index: int, 394 | - use_cpu: bool=True, config: tf.ConfigProto=None) -> Tuple[str, tf.device]: 395 | + use_cpu: bool=True, config: tf.compat.v1.ConfigProto=None) -> Tuple[str, tf.device]: 396 | """ 397 | Creates a worker server and a device setter used to assign the workers operations to 398 | :param cluster_spec: a ClusterSpec object representing the cluster 399 | @@ -61,7 +61,7 @@ def create_worker_server_and_device(cluster_spec: tf.train.ClusterSpec, task_ind 400 | :return: the target string for the tf.Session and the worker device setter object 401 | """ 402 | # Create and start a worker 403 | - server = tf.train.Server(cluster_spec, job_name="worker", task_index=task_index, config=config) 404 | + server = tf.distribute.Server(cluster_spec, job_name="worker", task_index=task_index, config=config) 405 | 406 | # Assign ops to the local worker 407 | worker_device = "/job:worker/task:{}".format(task_index) 408 | @@ -69,13 +69,13 @@ def create_worker_server_and_device(cluster_spec: tf.train.ClusterSpec, task_ind 409 | worker_device += "/cpu:0" 410 | else: 411 | worker_device += "/device:GPU:0" 412 | - device = tf.train.replica_device_setter(worker_device=worker_device, cluster=cluster_spec) 413 | + device = tf.compat.v1.train.replica_device_setter(worker_device=worker_device, cluster=cluster_spec) 414 | 415 | return server.target, device 416 | 417 | 418 | -def create_monitored_session(target: tf.train.Server, task_index: int, 419 | - checkpoint_dir: str, checkpoint_save_secs: int, config: tf.ConfigProto=None) -> tf.Session: 420 | +def create_monitored_session(target: tf.distribute.Server, task_index: int, 421 | + checkpoint_dir: str, checkpoint_save_secs: int, config: tf.compat.v1.ConfigProto=None) -> tf.compat.v1.Session: 422 | """ 423 | Create a monitored session for the worker 424 | :param target: the target string for the tf.Session 425 | @@ -89,7 +89,7 @@ def create_monitored_session(target: tf.train.Server, task_index: int, 426 | is_chief = task_index == 0 427 | 428 | # Create the monitored session 429 | - sess = tf.train.MonitoredTrainingSession( 430 | + sess = tf.compat.v1.train.MonitoredTrainingSession( 431 | master=target, 432 | is_chief=is_chief, 433 | hooks=[], 434 | diff --git a/rl_coach/architectures/tensorflow_components/embedders/embedder.py b/rl_coach/architectures/tensorflow_components/embedders/embedder.py 435 | index 13544c9..97dca64 100644 436 | --- a/rl_coach/architectures/tensorflow_components/embedders/embedder.py 437 | +++ b/rl_coach/architectures/tensorflow_components/embedders/embedder.py 438 | @@ -75,15 +75,15 @@ class InputEmbedder(object): 439 | activation_function=self.activation_function, 440 | dropout_rate=self.dropout_rate)) 441 | 442 | - def __call__(self, prev_input_placeholder: tf.placeholder=None) -> Tuple[tf.Tensor, tf.Tensor]: 443 | + def __call__(self, prev_input_placeholder: tf.compat.v1.placeholder=None) -> Tuple[tf.Tensor, tf.Tensor]: 444 | """ 445 | Wrapper for building the module graph including scoping and loss creation 446 | :param prev_input_placeholder: the input to the graph 447 | :return: the input placeholder and the output of the last layer 448 | """ 449 | - with tf.variable_scope(self.get_name()): 450 | + with tf.compat.v1.variable_scope(self.get_name()): 451 | if prev_input_placeholder is None: 452 | - self.input = tf.placeholder("float", shape=[None] + self.input_size, name=self.get_name()) 453 | + self.input = tf.compat.v1.placeholder("float", shape=[None] + self.input_size, name=self.get_name()) 454 | else: 455 | self.input = prev_input_placeholder 456 | self._build_module() 457 | @@ -116,8 +116,8 @@ class InputEmbedder(object): 458 | is_training=self.is_training) 459 | )) 460 | 461 | - self.output = tf.contrib.layers.flatten(self.layers[-1]) 462 | - 463 | + self.output = tf.keras.layers.Flatten()(self.layers[-1]) 464 | + 465 | @property 466 | def input_size(self) -> List[int]: 467 | return self._input_size 468 | diff --git a/rl_coach/architectures/tensorflow_components/general_network.py b/rl_coach/architectures/tensorflow_components/general_network.py 469 | index 8821ac6..61b9472 100644 470 | --- a/rl_coach/architectures/tensorflow_components/general_network.py 471 | +++ b/rl_coach/architectures/tensorflow_components/general_network.py 472 | @@ -32,7 +32,8 @@ from rl_coach.logger import screen 473 | from rl_coach.spaces import SpacesDefinition, PlanarMapsObservationSpace, TensorObservationSpace 474 | from rl_coach.utils import get_all_subclasses, dynamic_import_and_instantiate_module_from_params, indent_string 475 | 476 | - 477 | +tf.compat.v1.disable_resource_variables() 478 | +tf.compat.v1.disable_eager_execution() 479 | class GeneralTensorFlowNetwork(TensorFlowArchitecture): 480 | """ 481 | A generalized version of all possible networks implemented using tensorflow. 482 | @@ -64,11 +65,11 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture): 483 | # variable_scope() call and also recover the name space using name_scope 484 | if variable_scope in GeneralTensorFlowNetwork.variable_scopes_dict: 485 | variable_scope = GeneralTensorFlowNetwork.variable_scopes_dict[variable_scope] 486 | - with tf.variable_scope(variable_scope, auxiliary_name_scope=False) as vs: 487 | - with tf.name_scope(vs.original_name_scope): 488 | + with tf.compat.v1.variable_scope(variable_scope, auxiliary_name_scope=False) as vs: 489 | + with tf.compat.v1.name_scope(vs.original_name_scope): 490 | return construct_on_device() 491 | else: 492 | - with tf.variable_scope(variable_scope, auxiliary_name_scope=True) as vs: 493 | + with tf.compat.v1.variable_scope(variable_scope, auxiliary_name_scope=True) as vs: 494 | # Add variable_scope object to dictionary for next call to construct 495 | GeneralTensorFlowNetwork.variable_scopes_dict[variable_scope] = vs 496 | return construct_on_device() 497 | @@ -105,7 +106,7 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture): 498 | """ 499 | self.global_network = global_network 500 | self.network_is_local = network_is_local 501 | - self.network_wrapper_name = name.split('/')[0] 502 | + self.network_wrapper_name = name.split('/')[1] 503 | self.network_parameters = agent_parameters.network_wrappers[self.network_wrapper_name] 504 | self.num_heads_per_network = 1 if self.network_parameters.use_separate_networks_per_head else \ 505 | len(self.network_parameters.heads_parameters) 506 | @@ -237,12 +238,12 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture): 507 | raise ValueError("Exactly one middleware type should be defined") 508 | 509 | # ops for defining the training / testing phase 510 | - self.is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) 511 | - self.is_training_placeholder = tf.placeholder("bool") 512 | - self.assign_is_training = tf.assign(self.is_training, self.is_training_placeholder) 513 | + self.is_training = tf.Variable(False, trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES]) 514 | + self.is_training_placeholder = tf.compat.v1.placeholder("bool") 515 | + self.assign_is_training = tf.compat.v1.assign(self.is_training, self.is_training_placeholder) 516 | 517 | for network_idx in range(self.num_networks): 518 | - with tf.variable_scope('network_{}'.format(network_idx)): 519 | + with tf.compat.v1.variable_scope('network_{}'.format(network_idx)): 520 | 521 | #################### 522 | # Input Embeddings # 523 | @@ -310,12 +311,12 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture): 524 | 525 | # rescale the gradients from the head 526 | self.gradients_from_head_rescalers.append( 527 | - tf.get_variable('gradients_from_head_{}-{}_rescalers'.format(head_idx, head_copy_idx), 528 | + tf.compat.v1.get_variable('gradients_from_head_{}-{}_rescalers'.format(head_idx, head_copy_idx), 529 | initializer=float(head_params.rescale_gradient_from_head_by_factor), 530 | dtype=tf.float32)) 531 | 532 | self.gradients_from_head_rescalers_placeholders.append( 533 | - tf.placeholder('float', 534 | + tf.compat.v1.placeholder('float', 535 | name='gradients_from_head_{}-{}_rescalers'.format(head_type_idx, head_copy_idx))) 536 | 537 | self.update_head_rescaler_value_ops.append(self.gradients_from_head_rescalers[head_count].assign( 538 | @@ -343,13 +344,13 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture): 539 | 540 | # model weights 541 | if not self.distributed_training or self.network_is_global: 542 | - self.weights = [var for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.full_name) if 543 | - 'global_step' not in var.name] 544 | + self.weights = [var for var in tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope=self.full_name) if 545 | + 'global_step' not in var.name and 'Variable:0' not in var.name] 546 | else: 547 | - self.weights = [var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.full_name)] 548 | + self.weights = [var for var in tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=self.full_name)] 549 | 550 | # Losses 551 | - self.losses = tf.losses.get_losses(self.full_name) 552 | + self.losses = tf.compat.v1.losses.get_losses(self.full_name) 553 | 554 | # L2 regularization 555 | if self.network_parameters.l2_regularization != 0: 556 | @@ -363,7 +364,7 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture): 557 | # Learning rate 558 | if self.network_parameters.learning_rate_decay_rate != 0: 559 | self.adaptive_learning_rate_scheme = \ 560 | - tf.train.exponential_decay( 561 | + tf.compat.v1.train.exponential_decay( 562 | self.network_parameters.learning_rate, 563 | self.global_step, 564 | decay_steps=self.network_parameters.learning_rate_decay_steps, 565 | @@ -388,17 +389,14 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture): 566 | # -> create an optimizer 567 | 568 | if self.network_parameters.optimizer_type == 'Adam': 569 | - self.optimizer = tf.train.AdamOptimizer(learning_rate=self.current_learning_rate, 570 | + self.optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=self.current_learning_rate, 571 | beta1=self.network_parameters.adam_optimizer_beta1, 572 | beta2=self.network_parameters.adam_optimizer_beta2, 573 | epsilon=self.network_parameters.optimizer_epsilon) 574 | elif self.network_parameters.optimizer_type == 'RMSProp': 575 | - self.optimizer = tf.train.RMSPropOptimizer(self.current_learning_rate, 576 | + self.optimizer = tf.compat.v1.train.RMSPropOptimizer(self.current_learning_rate, 577 | decay=self.network_parameters.rms_prop_optimizer_decay, 578 | epsilon=self.network_parameters.optimizer_epsilon) 579 | - elif self.network_parameters.optimizer_type == 'LBFGS': 580 | - self.optimizer = tf.contrib.opt.ScipyOptimizerInterface(self.total_loss, method='L-BFGS-B', 581 | - options={'maxiter': 25}) 582 | else: 583 | raise Exception("{} is not a valid optimizer type".format(self.network_parameters.optimizer_type)) 584 | 585 | diff --git a/rl_coach/architectures/tensorflow_components/heads/acer_policy_head.py b/rl_coach/architectures/tensorflow_components/heads/acer_policy_head.py 586 | index d31fa3d..eebfa3f 100644 587 | --- a/rl_coach/architectures/tensorflow_components/heads/acer_policy_head.py 588 | +++ b/rl_coach/architectures/tensorflow_components/heads/acer_policy_head.py 589 | @@ -40,9 +40,9 @@ class ACERPolicyHead(Head): 590 | if hasattr(agent_parameters.algorithm, 'beta_entropy'): 591 | # we set the beta value as a tf variable so it can be updated later if needed 592 | self.beta = tf.Variable(float(agent_parameters.algorithm.beta_entropy), 593 | - trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) 594 | - self.beta_placeholder = tf.placeholder('float') 595 | - self.set_beta = tf.assign(self.beta, self.beta_placeholder) 596 | + trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES]) 597 | + self.beta_placeholder = tf.compat.v1.placeholder('float') 598 | + self.set_beta = tf.compat.v1.assign(self.beta, self.beta_placeholder) 599 | 600 | def _build_module(self, input_layer): 601 | if isinstance(self.spaces.action, DiscreteActionSpace): 602 | @@ -58,18 +58,18 @@ class ACERPolicyHead(Head): 603 | self.regularizations += [-tf.multiply(self.beta, self.entropy, name='entropy_regularization')] 604 | 605 | # Truncated importance sampling with bias corrections 606 | - importance_sampling_weight = tf.placeholder(tf.float32, [None, self.num_actions], 607 | + importance_sampling_weight = tf.compat.v1.placeholder(tf.float32, [None, self.num_actions], 608 | name='{}_importance_sampling_weight'.format(self.get_name())) 609 | self.input.append(importance_sampling_weight) 610 | - importance_sampling_weight_i = tf.placeholder(tf.float32, [None], 611 | + importance_sampling_weight_i = tf.compat.v1.placeholder(tf.float32, [None], 612 | name='{}_importance_sampling_weight_i'.format(self.get_name())) 613 | self.input.append(importance_sampling_weight_i) 614 | 615 | - V_values = tf.placeholder(tf.float32, [None], name='{}_V_values'.format(self.get_name())) 616 | + V_values = tf.compat.v1.placeholder(tf.float32, [None], name='{}_V_values'.format(self.get_name())) 617 | self.target.append(V_values) 618 | - Q_values = tf.placeholder(tf.float32, [None, self.num_actions], name='{}_Q_values'.format(self.get_name())) 619 | + Q_values = tf.compat.v1.placeholder(tf.float32, [None, self.num_actions], name='{}_Q_values'.format(self.get_name())) 620 | self.input.append(Q_values) 621 | - Q_retrace = tf.placeholder(tf.float32, [None], name='{}_Q_retrace'.format(self.get_name())) 622 | + Q_retrace = tf.compat.v1.placeholder(tf.float32, [None], name='{}_Q_retrace'.format(self.get_name())) 623 | self.input.append(Q_retrace) 624 | 625 | action_log_probs_wrt_policy = self.policy_distribution.log_prob(self.actions) 626 | @@ -78,7 +78,7 @@ class ACERPolicyHead(Head): 627 | * tf.minimum(self.ap.algorithm.importance_weight_truncation, 628 | importance_sampling_weight_i)) 629 | 630 | - log_probs_wrt_policy = tf.log(self.policy_probs + eps) 631 | + log_probs_wrt_policy = tf.math.log(self.policy_probs + eps) 632 | bias_correction_gain = tf.reduce_sum(log_probs_wrt_policy 633 | * (Q_values - tf.expand_dims(V_values, 1)) 634 | * tf.nn.relu(1.0 - (self.ap.algorithm.importance_weight_truncation 635 | @@ -88,15 +88,15 @@ class ACERPolicyHead(Head): 636 | self.bias_correction_loss = -tf.reduce_mean(bias_correction_gain) 637 | 638 | self.loss = self.probability_loss + self.bias_correction_loss 639 | - tf.losses.add_loss(self.loss) 640 | + tf.compat.v1.losses.add_loss(self.loss) 641 | 642 | # Trust region 643 | - batch_size = tf.to_float(tf.shape(input_layer)[0]) 644 | - average_policy = tf.placeholder(tf.float32, [None, self.num_actions], 645 | + batch_size = tf.cast(tf.shape(input_layer)[0], dtype=tf.float32) 646 | + average_policy = tf.compat.v1.placeholder(tf.float32, [None, self.num_actions], 647 | name='{}_average_policy'.format(self.get_name())) 648 | self.input.append(average_policy) 649 | - average_policy_distribution = tf.contrib.distributions.Categorical(probs=(average_policy + eps)) 650 | - self.kl_divergence = tf.reduce_mean(tf.distributions.kl_divergence(average_policy_distribution, 651 | + average_policy_distribution = tf.compat.v1.distributions.Categorical(probs=(average_policy + eps)) 652 | + self.kl_divergence = tf.reduce_mean(tf.compat.v1.distributions.kl_divergence(average_policy_distribution, 653 | self.policy_distribution)) 654 | if self.ap.algorithm.use_trust_region_optimization: 655 | @tf.custom_gradient 656 | @@ -114,12 +114,12 @@ class ACERPolicyHead(Head): 657 | 658 | def _build_discrete_net(self, input_layer, action_space): 659 | self.num_actions = len(action_space.actions) 660 | - self.actions = tf.placeholder(tf.int32, [None], name='{}_actions'.format(self.get_name())) 661 | + self.actions = tf.compat.v1.placeholder(tf.int32, [None], name='{}_actions'.format(self.get_name())) 662 | self.input.append(self.actions) 663 | 664 | policy_values = self.dense_layer(self.num_actions)(input_layer, name='fc') 665 | self.policy_probs = tf.nn.softmax(policy_values, name='{}_policy'.format(self.get_name())) 666 | 667 | # (the + eps is to prevent probability 0 which will cause the log later on to be -inf) 668 | - self.policy_distribution = tf.contrib.distributions.Categorical(probs=(self.policy_probs + eps)) 669 | + self.policy_distribution = tf.compat.v1.distributions.Categorical(probs=(self.policy_probs + eps)) 670 | self.output = self.policy_probs 671 | diff --git a/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py b/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py 672 | index b573fe5..ee44176 100644 673 | --- a/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py 674 | +++ b/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py 675 | @@ -45,11 +45,11 @@ class CategoricalQHead(QHead): 676 | self.output = tf.nn.softmax(values_distribution) 677 | 678 | # calculate cross entropy loss 679 | - self.distributions = tf.placeholder(tf.float32, shape=(None, self.num_actions, self.num_atoms), 680 | + self.distributions = tf.compat.v1.placeholder(tf.float32, shape=(None, self.num_actions, self.num_atoms), 681 | name="distributions") 682 | self.target = self.distributions 683 | - self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=values_distribution) 684 | - tf.losses.add_loss(self.loss) 685 | + self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=tf.stop_gradient(self.target), logits=values_distribution) 686 | + tf.compat.v1.losses.add_loss(self.loss) 687 | 688 | self.q_values = tf.tensordot(tf.cast(self.output, tf.float64), self.z_values, 1) 689 | 690 | diff --git a/rl_coach/architectures/tensorflow_components/heads/cil_head.py b/rl_coach/architectures/tensorflow_components/heads/cil_head.py 691 | index f3ae003..25a8ae6 100644 692 | --- a/rl_coach/architectures/tensorflow_components/heads/cil_head.py 693 | +++ b/rl_coach/architectures/tensorflow_components/heads/cil_head.py 694 | @@ -39,9 +39,9 @@ class RegressionHead(Head): 695 | self.num_actions = len(self.spaces.action.actions) 696 | self.return_type = QActionStateValue 697 | if agent_parameters.network_wrappers[self.network_name].replace_mse_with_huber_loss: 698 | - self.loss_type = tf.losses.huber_loss 699 | + self.loss_type = tf.compat.v1.losses.huber_loss 700 | else: 701 | - self.loss_type = tf.losses.mean_squared_error 702 | + self.loss_type = tf.compat.v1.losses.mean_squared_error 703 | self.output_bias_initializer = output_bias_initializer 704 | 705 | def _build_module(self, input_layer): 706 | diff --git a/rl_coach/architectures/tensorflow_components/heads/classification_head.py b/rl_coach/architectures/tensorflow_components/heads/classification_head.py 707 | index 6f6af7c..5a9da70 100644 708 | --- a/rl_coach/architectures/tensorflow_components/heads/classification_head.py 709 | +++ b/rl_coach/architectures/tensorflow_components/heads/classification_head.py 710 | @@ -47,9 +47,9 @@ class ClassificationHead(Head): 711 | self.output = tf.nn.softmax(self.class_values) 712 | 713 | # calculate cross entropy loss 714 | - self.target = tf.placeholder(tf.float32, shape=(None, self.num_actions), name="target") 715 | - self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=self.class_values) 716 | - tf.losses.add_loss(self.loss) 717 | + self.target = tf.compat.v1.placeholder(tf.float32, shape=(None, self.num_actions), name="target") 718 | + self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=tf.stop_gradient(self.target), logits=self.class_values) 719 | + tf.compat.v1.losses.add_loss(self.loss) 720 | 721 | def __str__(self): 722 | result = [ 723 | diff --git a/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py b/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py 724 | index 6462f83..1c43988 100644 725 | --- a/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py 726 | +++ b/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py 727 | @@ -65,11 +65,11 @@ class DNDQHead(QHead): 728 | self.softmax = self.add_softmax_with_temperature() 729 | 730 | def _q_value(self, input_layer, action): 731 | - result = tf.py_func(self.DND.query, 732 | + result = tf.compat.v1.py_func(self.DND.query, 733 | [input_layer, action, self.number_of_nn], 734 | [tf.float64, tf.float64, tf.int64]) 735 | - self.dnd_embeddings[action] = tf.to_float(result[0]) 736 | - self.dnd_values[action] = tf.to_float(result[1]) 737 | + self.dnd_embeddings[action] = tf.cast(result[0], dtype=tf.float32) 738 | + self.dnd_values[action] = tf.cast(result[1], dtype=tf.float32) 739 | self.dnd_indices[action] = result[2] 740 | 741 | # DND calculation 742 | @@ -77,7 +77,7 @@ class DNDQHead(QHead): 743 | distances = tf.reduce_sum(square_diff, axis=2) + [self.l2_norm_added_delta] 744 | self.dnd_distances[action] = distances 745 | weights = 1.0 / distances 746 | - normalised_weights = weights / tf.reduce_sum(weights, axis=1, keep_dims=True) 747 | + normalised_weights = weights / tf.reduce_sum(weights, axis=1, keepdims=True) 748 | q_value = tf.reduce_sum(self.dnd_values[action] * normalised_weights, axis=1) 749 | q_value.set_shape((None,)) 750 | return q_value 751 | diff --git a/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py b/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py 752 | index 92692ab..d612256 100644 753 | --- a/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py 754 | +++ b/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py 755 | @@ -32,12 +32,12 @@ class DuelingQHead(QHead): 756 | 757 | def _build_module(self, input_layer): 758 | # state value tower - V 759 | - with tf.variable_scope("state_value"): 760 | + with tf.compat.v1.variable_scope("state_value"): 761 | self.state_value = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1') 762 | self.state_value = self.dense_layer(1)(self.state_value, name='fc2') 763 | 764 | # action advantage tower - A 765 | - with tf.variable_scope("action_advantage"): 766 | + with tf.compat.v1.variable_scope("action_advantage"): 767 | self.action_advantage = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1') 768 | self.action_advantage = self.dense_layer(self.num_actions)(self.action_advantage, name='fc2') 769 | self.action_mean = tf.reduce_mean(self.action_advantage, axis=1, keepdims=True) 770 | diff --git a/rl_coach/architectures/tensorflow_components/heads/head.py b/rl_coach/architectures/tensorflow_components/heads/head.py 771 | index e971889..82fbabb 100644 772 | --- a/rl_coach/architectures/tensorflow_components/heads/head.py 773 | +++ b/rl_coach/architectures/tensorflow_components/heads/head.py 774 | @@ -50,7 +50,7 @@ class Head(object): 775 | self.loss_type = [] 776 | self.regularizations = [] 777 | self.loss_weight = tf.Variable([float(w) for w in force_list(loss_weight)], 778 | - trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) 779 | + trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES]) 780 | self.target = [] 781 | self.importance_weight = [] 782 | self.input = [] 783 | @@ -73,7 +73,7 @@ class Head(object): 784 | :return: the output of the last layer and the target placeholder 785 | """ 786 | 787 | - with tf.variable_scope(self.get_name(), initializer=tf.contrib.layers.xavier_initializer()): 788 | + with tf.compat.v1.variable_scope(self.get_name(), initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform")): 789 | self._build_module(squeeze_tensor(input_layer)) 790 | 791 | self.output = force_list(self.output) 792 | @@ -126,7 +126,7 @@ class Head(object): 793 | 794 | # there are heads that define the loss internally, but we need to create additional placeholders for them 795 | for idx in range(len(self.loss)): 796 | - importance_weight = tf.placeholder('float', 797 | + importance_weight = tf.compat.v1.placeholder('float', 798 | [None] + [1] * (len(self.target[idx].shape) - 1), 799 | '{}_importance_weight'.format(self.get_name())) 800 | self.importance_weight.append(importance_weight) 801 | @@ -134,12 +134,12 @@ class Head(object): 802 | # add losses and target placeholder 803 | for idx in range(len(self.loss_type)): 804 | # create target placeholder 805 | - target = tf.placeholder('float', self.output[idx].shape, '{}_target'.format(self.get_name())) 806 | + target = tf.compat.v1.placeholder('float', self.output[idx].shape, '{}_target'.format(self.get_name())) 807 | self.target.append(target) 808 | 809 | # create importance sampling weights placeholder 810 | num_target_dims = len(self.target[idx].shape) 811 | - importance_weight = tf.placeholder('float', [None] + [1] * (num_target_dims - 1), 812 | + importance_weight = tf.compat.v1.placeholder('float', [None] + [1] * (num_target_dims - 1), 813 | '{}_importance_weight'.format(self.get_name())) 814 | self.importance_weight.append(importance_weight) 815 | 816 | @@ -153,13 +153,13 @@ class Head(object): 817 | loss = tf.reduce_mean(loss_weight*tf.reduce_sum(loss, axis=list(range(1, num_target_dims)))) 818 | 819 | # we add the loss to the losses collection and later we will extract it in general_network 820 | - tf.losses.add_loss(loss) 821 | + tf.compat.v1.losses.add_loss(loss) 822 | self.loss.append(loss) 823 | 824 | # add regularizations 825 | for regularization in self.regularizations: 826 | self.loss.append(regularization) 827 | - tf.losses.add_loss(regularization) 828 | + tf.compat.v1.losses.add_loss(regularization) 829 | 830 | @classmethod 831 | def path(cls): 832 | diff --git a/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py b/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py 833 | index 647abc3..2d95d25 100644 834 | --- a/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py 835 | +++ b/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py 836 | @@ -39,27 +39,27 @@ class MeasurementsPredictionHead(Head): 837 | def _build_module(self, input_layer): 838 | # This is almost exactly the same as Dueling Network but we predict the future measurements for each action 839 | # actions expectation tower (expectation stream) - E 840 | - with tf.variable_scope("expectation_stream"): 841 | + with tf.compat.v1.variable_scope("expectation_stream"): 842 | expectation_stream = self.dense_layer(256)(input_layer, activation=self.activation_function, name='fc1') 843 | expectation_stream = self.dense_layer(self.multi_step_measurements_size)(expectation_stream, name='output') 844 | expectation_stream = tf.expand_dims(expectation_stream, axis=1) 845 | 846 | # action fine differences tower (action stream) - A 847 | - with tf.variable_scope("action_stream"): 848 | + with tf.compat.v1.variable_scope("action_stream"): 849 | action_stream = self.dense_layer(256)(input_layer, activation=self.activation_function, name='fc1') 850 | action_stream = self.dense_layer(self.num_actions * self.multi_step_measurements_size)(action_stream, 851 | name='output') 852 | action_stream = tf.reshape(action_stream, 853 | (tf.shape(action_stream)[0], self.num_actions, self.multi_step_measurements_size)) 854 | - action_stream = action_stream - tf.reduce_mean(action_stream, reduction_indices=1, keepdims=True) 855 | + action_stream = action_stream - tf.reduce_mean(action_stream, axis=1, keepdims=True) 856 | 857 | # merge to future measurements predictions 858 | self.output = tf.add(expectation_stream, action_stream, name='output') 859 | - self.target = tf.placeholder(tf.float32, [None, self.num_actions, self.multi_step_measurements_size], 860 | + self.target = tf.compat.v1.placeholder(tf.float32, [None, self.num_actions, self.multi_step_measurements_size], 861 | name="targets") 862 | - targets_nonan = tf.where(tf.is_nan(self.target), self.output, self.target) 863 | - self.loss = tf.reduce_sum(tf.reduce_mean(tf.square(targets_nonan - self.output), reduction_indices=0)) 864 | - tf.losses.add_loss(self.loss_weight[0] * self.loss) 865 | + targets_nonan = tf.compat.v1.where(tf.math.is_nan(self.target), self.output, self.target) 866 | + self.loss = tf.reduce_sum(tf.reduce_mean(tf.square(targets_nonan - self.output), axis=0)) 867 | + tf.compat.v1.losses.add_loss(self.loss_weight[0] * self.loss) 868 | 869 | def __str__(self): 870 | result = [ 871 | diff --git a/rl_coach/architectures/tensorflow_components/heads/naf_head.py b/rl_coach/architectures/tensorflow_components/heads/naf_head.py 872 | index 9071fed..2bcb724 100644 873 | --- a/rl_coach/architectures/tensorflow_components/heads/naf_head.py 874 | +++ b/rl_coach/architectures/tensorflow_components/heads/naf_head.py 875 | @@ -38,13 +38,13 @@ class NAFHead(Head): 876 | self.output_scale = self.spaces.action.max_abs_range 877 | self.return_type = QActionStateValue 878 | if agent_parameters.network_wrappers[self.network_name].replace_mse_with_huber_loss: 879 | - self.loss_type = tf.losses.huber_loss 880 | + self.loss_type = tf.compat.v1.losses.huber_loss 881 | else: 882 | - self.loss_type = tf.losses.mean_squared_error 883 | + self.loss_type = tf.compat.v1.losses.mean_squared_error 884 | 885 | def _build_module(self, input_layer): 886 | # NAF 887 | - self.action = tf.placeholder(tf.float32, [None, self.num_actions], name="action") 888 | + self.action = tf.compat.v1.placeholder(tf.float32, [None, self.num_actions], name="action") 889 | self.input = self.action 890 | 891 | # V Head 892 | diff --git a/rl_coach/architectures/tensorflow_components/heads/policy_head.py b/rl_coach/architectures/tensorflow_components/heads/policy_head.py 893 | index 540bd1a..792d52f 100644 894 | --- a/rl_coach/architectures/tensorflow_components/heads/policy_head.py 895 | +++ b/rl_coach/architectures/tensorflow_components/heads/policy_head.py 896 | @@ -16,6 +16,7 @@ 897 | 898 | import numpy as np 899 | import tensorflow as tf 900 | +import tensorflow_probability as tfp 901 | 902 | from rl_coach.architectures.tensorflow_components.layers import Dense 903 | from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer 904 | @@ -44,9 +45,9 @@ class PolicyHead(Head): 905 | if hasattr(agent_parameters.algorithm, 'beta_entropy'): 906 | # we set the beta value as a tf variable so it can be updated later if needed 907 | self.beta = tf.Variable(float(agent_parameters.algorithm.beta_entropy), 908 | - trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) 909 | - self.beta_placeholder = tf.placeholder('float') 910 | - self.set_beta = tf.assign(self.beta, self.beta_placeholder) 911 | + trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES]) 912 | + self.beta_placeholder = tf.compat.v1.placeholder('float') 913 | + self.set_beta = tf.compat.v1.assign(self.beta, self.beta_placeholder) 914 | 915 | # a scalar weight that penalizes high activation values (before the activation function) for the final layer 916 | if hasattr(agent_parameters.algorithm, 'action_penalty'): 917 | @@ -64,7 +65,7 @@ class PolicyHead(Head): 918 | 919 | # create a compound action network 920 | for action_space_idx, action_space in enumerate(action_spaces): 921 | - with tf.variable_scope("sub_action_{}".format(action_space_idx)): 922 | + with tf.compat.v1.variable_scope("sub_action_{}".format(action_space_idx)): 923 | if isinstance(action_space, DiscreteActionSpace): 924 | # create a discrete action network (softmax probabilities output) 925 | self._build_discrete_net(input_layer, action_space) 926 | @@ -81,27 +82,27 @@ class PolicyHead(Head): 927 | # calculate loss 928 | self.action_log_probs_wrt_policy = \ 929 | tf.add_n([dist.log_prob(action) for dist, action in zip(self.policy_distributions, self.actions)]) 930 | - self.advantages = tf.placeholder(tf.float32, [None], name="advantages") 931 | + self.advantages = tf.compat.v1.placeholder(tf.float32, [None], name="advantages") 932 | self.target = self.advantages 933 | self.loss = -tf.reduce_mean(self.action_log_probs_wrt_policy * self.advantages) 934 | - tf.losses.add_loss(self.loss_weight[0] * self.loss) 935 | + tf.compat.v1.losses.add_loss(self.loss_weight[0] * self.loss) 936 | 937 | def _build_discrete_net(self, input_layer, action_space): 938 | num_actions = len(action_space.actions) 939 | - self.actions.append(tf.placeholder(tf.int32, [None], name="actions")) 940 | + self.actions.append(tf.compat.v1.placeholder(tf.int32, [None], name="actions")) 941 | 942 | policy_values = self.dense_layer(num_actions)(input_layer, name='fc') 943 | self.policy_probs = tf.nn.softmax(policy_values, name="policy") 944 | 945 | # define the distributions for the policy and the old policy 946 | # (the + eps is to prevent probability 0 which will cause the log later on to be -inf) 947 | - policy_distribution = tf.contrib.distributions.Categorical(probs=(self.policy_probs + eps)) 948 | + policy_distribution = tf.compat.v1.distributions.Categorical(probs=(self.policy_probs + eps)) 949 | self.policy_distributions.append(policy_distribution) 950 | self.output.append(self.policy_probs) 951 | 952 | def _build_continuous_net(self, input_layer, action_space): 953 | num_actions = action_space.shape 954 | - self.actions.append(tf.placeholder(tf.float32, [None, num_actions], name="actions")) 955 | + self.actions.append(tf.compat.v1.placeholder(tf.float32, [None, num_actions], name="actions")) 956 | 957 | # output activation function 958 | if np.all(action_space.max_abs_range < np.inf): 959 | @@ -135,14 +136,14 @@ class PolicyHead(Head): 960 | # it as not trainable puts it for some reason in the global variables collections. If this is not done, 961 | # the variable won't be initialized and when working with multiple workers they will get stuck. 962 | self.policy_std = tf.Variable(np.ones(num_actions), dtype='float32', trainable=False, 963 | - name='policy_stdev', collections=[tf.GraphKeys.LOCAL_VARIABLES]) 964 | + name='policy_stdev', collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES]) 965 | 966 | # assign op for the policy std 967 | - self.policy_std_placeholder = tf.placeholder('float32', (num_actions,)) 968 | - self.assign_policy_std = tf.assign(self.policy_std, self.policy_std_placeholder) 969 | + self.policy_std_placeholder = tf.compat.v1.placeholder('float32', (num_actions,)) 970 | + self.assign_policy_std = tf.compat.v1.assign(self.policy_std, self.policy_std_placeholder) 971 | 972 | # define the distributions for the policy and the old policy 973 | - policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.policy_mean, self.policy_std) 974 | + policy_distribution = tfp.distributions.MultivariateNormalDiag(self.policy_mean, self.policy_std) 975 | self.policy_distributions.append(policy_distribution) 976 | 977 | if self.is_local: 978 | diff --git a/rl_coach/architectures/tensorflow_components/heads/ppo_head.py b/rl_coach/architectures/tensorflow_components/heads/ppo_head.py 979 | index 63f95a3..1d17a9d 100644 980 | --- a/rl_coach/architectures/tensorflow_components/heads/ppo_head.py 981 | +++ b/rl_coach/architectures/tensorflow_components/heads/ppo_head.py 982 | @@ -16,6 +16,7 @@ 983 | 984 | import numpy as np 985 | import tensorflow as tf 986 | +import tensorflow_probability as tfp 987 | 988 | from rl_coach.architectures.tensorflow_components.layers import Dense 989 | from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer 990 | @@ -25,6 +26,11 @@ from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace 991 | from rl_coach.spaces import SpacesDefinition 992 | from rl_coach.utils import eps 993 | 994 | +# Since we are using log prob it is possible to encounter a 0 log 0 condition 995 | +# which will tank the training by producing NaN's therefore it is necessary 996 | +# to add a zero offset to all networks with discreete distributions to prevent 997 | +# this isssue 998 | +ZERO_OFFSET = 1e-8 999 | 1000 | class PPOHead(Head): 1001 | def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str, 1002 | @@ -41,8 +47,8 @@ class PPOHead(Head): 1003 | # kl coefficient and its corresponding assignment operation and placeholder 1004 | self.kl_coefficient = tf.Variable(agent_parameters.algorithm.initial_kl_coefficient, 1005 | trainable=False, name='kl_coefficient') 1006 | - self.kl_coefficient_ph = tf.placeholder('float', name='kl_coefficient_ph') 1007 | - self.assign_kl_coefficient = tf.assign(self.kl_coefficient, self.kl_coefficient_ph) 1008 | + self.kl_coefficient_ph = tf.compat.v1.placeholder('float', name='kl_coefficient_ph') 1009 | + self.assign_kl_coefficient = tf.compat.v1.assign(self.kl_coefficient, self.kl_coefficient_ph) 1010 | self.kl_cutoff = 2 * agent_parameters.algorithm.target_kl_divergence 1011 | self.high_kl_penalty_coefficient = agent_parameters.algorithm.high_kl_penalty_coefficient 1012 | 1013 | @@ -63,7 +69,11 @@ class PPOHead(Head): 1014 | 1015 | # Used by regular PPO only 1016 | # add kl divergence regularization 1017 | - self.kl_divergence = tf.reduce_mean(tf.distributions.kl_divergence(self.old_policy_distribution, self.policy_distribution)) 1018 | + if isinstance(self.spaces.action, DiscreteActionSpace): 1019 | + self.kl_divergence = tf.reduce_mean(tf.compat.v1.distributions.kl_divergence(self.old_policy_distribution, self.policy_distribution)) 1020 | + else: 1021 | + self.kl_divergence = tf.reduce_mean(tfp.distributions.kl_divergence(self.old_policy_distribution, self.policy_distribution)) 1022 | + 1023 | 1024 | if self.use_kl_regularization: 1025 | # no clipping => use kl regularization 1026 | @@ -72,12 +82,12 @@ class PPOHead(Head): 1027 | tf.square(tf.maximum(0.0, self.kl_divergence - self.kl_cutoff))] 1028 | 1029 | # calculate surrogate loss 1030 | - self.advantages = tf.placeholder(tf.float32, [None], name="advantages") 1031 | + self.advantages = tf.compat.v1.placeholder(tf.float32, [None], name="advantages") 1032 | self.target = self.advantages 1033 | # action_probs_wrt_old_policy != 0 because it is e^... 1034 | self.likelihood_ratio = tf.exp(self.action_probs_wrt_policy - self.action_probs_wrt_old_policy) 1035 | if self.clip_likelihood_ratio_using_epsilon is not None: 1036 | - self.clip_param_rescaler = tf.placeholder(tf.float32, ()) 1037 | + self.clip_param_rescaler = tf.compat.v1.placeholder(tf.float32, ()) 1038 | self.input.append(self.clip_param_rescaler) 1039 | max_value = 1 + self.clip_likelihood_ratio_using_epsilon * self.clip_param_rescaler 1040 | min_value = 1 - self.clip_likelihood_ratio_using_epsilon * self.clip_param_rescaler 1041 | @@ -95,51 +105,51 @@ class PPOHead(Head): 1042 | self.regularizations += [-tf.multiply(self.beta, self.entropy, name='entropy_regularization')] 1043 | 1044 | self.loss = self.surrogate_loss 1045 | - tf.losses.add_loss(self.loss) 1046 | + tf.compat.v1.losses.add_loss(self.loss) 1047 | 1048 | def _build_discrete_net(self, input_layer, action_space): 1049 | num_actions = len(action_space.actions) 1050 | - self.actions = tf.placeholder(tf.int32, [None], name="actions") 1051 | + self.actions = tf.compat.v1.placeholder(tf.int32, [None], name="actions") 1052 | 1053 | - self.old_policy_mean = tf.placeholder(tf.float32, [None, num_actions], "old_policy_mean") 1054 | - self.old_policy_std = tf.placeholder(tf.float32, [None, num_actions], "old_policy_std") 1055 | + self.old_policy_mean = tf.compat.v1.placeholder(tf.float32, [None, num_actions], "old_policy_mean") 1056 | + self.old_policy_std = tf.compat.v1.placeholder(tf.float32, [None, num_actions], "old_policy_std") 1057 | 1058 | # Policy Head 1059 | self.input = [self.actions, self.old_policy_mean] 1060 | policy_values = self.dense_layer(num_actions)(input_layer, name='policy_fc') 1061 | - self.policy_mean = tf.nn.softmax(policy_values, name="policy") 1062 | + # Prevent distributions with 0 values 1063 | + self.policy_mean = tf.maximum(tf.nn.softmax(policy_values, name="policy"), ZERO_OFFSET) 1064 | 1065 | # define the distributions for the policy and the old policy 1066 | - self.policy_distribution = tf.contrib.distributions.Categorical(probs=self.policy_mean) 1067 | - self.old_policy_distribution = tf.contrib.distributions.Categorical(probs=self.old_policy_mean) 1068 | + self.policy_distribution = tf.compat.v1.distributions.Categorical(probs=self.policy_mean) 1069 | + self.old_policy_distribution = tf.compat.v1.distributions.Categorical(probs=self.old_policy_mean) 1070 | 1071 | self.output = self.policy_mean 1072 | 1073 | def _build_continuous_net(self, input_layer, action_space): 1074 | num_actions = action_space.shape[0] 1075 | - self.actions = tf.placeholder(tf.float32, [None, num_actions], name="actions") 1076 | + self.actions = tf.compat.v1.placeholder(tf.float32, [None, num_actions], name="actions") 1077 | 1078 | - self.old_policy_mean = tf.placeholder(tf.float32, [None, num_actions], "old_policy_mean") 1079 | - self.old_policy_std = tf.placeholder(tf.float32, [None, num_actions], "old_policy_std") 1080 | + self.old_policy_mean = tf.compat.v1.placeholder(tf.float32, [None, num_actions], "old_policy_mean") 1081 | + self.old_policy_std = tf.compat.v1.placeholder(tf.float32, [None, num_actions], "old_policy_std") 1082 | 1083 | self.input = [self.actions, self.old_policy_mean, self.old_policy_std] 1084 | - self.policy_mean = self.dense_layer(num_actions)(input_layer, name='policy_mean', 1085 | - kernel_initializer=normalized_columns_initializer(0.01)) 1086 | - 1087 | + self.policy_mean = tf.identity(self.dense_layer(num_actions)(input_layer, name='policy_mean', 1088 | + kernel_initializer=normalized_columns_initializer(0.01)), name="policy") 1089 | # for local networks in distributed settings, we need to move variables we create manually to the 1090 | # tf.GraphKeys.LOCAL_VARIABLES collection, since the variable scope custom getter which is set in 1091 | # Architecture does not apply to them 1092 | if self.is_local and isinstance(self.ap.task_parameters, DistributedTaskParameters): 1093 | self.policy_logstd = tf.Variable(np.zeros((1, num_actions)), dtype='float32', 1094 | - collections=[tf.GraphKeys.LOCAL_VARIABLES], name="policy_log_std") 1095 | + collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES], name="policy_log_std") 1096 | else: 1097 | self.policy_logstd = tf.Variable(np.zeros((1, num_actions)), dtype='float32', name="policy_log_std") 1098 | 1099 | - self.policy_std = tf.tile(tf.exp(self.policy_logstd), [tf.shape(input_layer)[0], 1], name='policy_std') 1100 | + self.policy_std = tf.tile(tf.exp(tf.clip_by_value(self.policy_logstd, -20.0, 3.0)), [tf.shape(input_layer)[0], 1], name='policy_std') 1101 | 1102 | # define the distributions for the policy and the old policy 1103 | - self.policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.policy_mean, self.policy_std + eps) 1104 | - self.old_policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.old_policy_mean, self.old_policy_std + eps) 1105 | + self.policy_distribution = tfp.distributions.MultivariateNormalDiag(self.policy_mean, self.policy_std + eps) 1106 | + self.old_policy_distribution = tfp.distributions.MultivariateNormalDiag(self.old_policy_mean, self.old_policy_std + eps) 1107 | 1108 | self.output = [self.policy_mean, self.policy_std] 1109 | 1110 | diff --git a/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py b/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py 1111 | index e2abbfc..1907fe9 100644 1112 | --- a/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py 1113 | +++ b/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py 1114 | @@ -35,12 +35,12 @@ class PPOVHead(Head): 1115 | self.output_bias_initializer = output_bias_initializer 1116 | 1117 | def _build_module(self, input_layer): 1118 | - self.old_policy_value = tf.placeholder(tf.float32, [None], "old_policy_values") 1119 | + self.old_policy_value = tf.compat.v1.placeholder(tf.float32, [None], "old_policy_values") 1120 | self.input = [self.old_policy_value] 1121 | self.output = self.dense_layer(1)(input_layer, name='output', 1122 | kernel_initializer=normalized_columns_initializer(1.0), 1123 | bias_initializer=self.output_bias_initializer) 1124 | - self.target = self.total_return = tf.placeholder(tf.float32, [None], name="total_return") 1125 | + self.target = self.total_return = tf.compat.v1.placeholder(tf.float32, [None], name="total_return") 1126 | 1127 | value_loss_1 = tf.square(self.output - self.target) 1128 | value_loss_2 = tf.square(self.old_policy_value + 1129 | @@ -49,7 +49,7 @@ class PPOVHead(Head): 1130 | self.clip_likelihood_ratio_using_epsilon) - self.target) 1131 | self.vf_loss = tf.reduce_mean(tf.maximum(value_loss_1, value_loss_2)) 1132 | self.loss = self.vf_loss 1133 | - tf.losses.add_loss(self.loss) 1134 | + tf.compat.v1.losses.add_loss(self.loss) 1135 | 1136 | def __str__(self): 1137 | result = [ 1138 | diff --git a/rl_coach/architectures/tensorflow_components/heads/q_head.py b/rl_coach/architectures/tensorflow_components/heads/q_head.py 1139 | index 0bd120b..2a9470b 100644 1140 | --- a/rl_coach/architectures/tensorflow_components/heads/q_head.py 1141 | +++ b/rl_coach/architectures/tensorflow_components/heads/q_head.py 1142 | @@ -42,9 +42,9 @@ class QHead(Head): 1143 | ) 1144 | self.return_type = QActionStateValue 1145 | if agent_parameters.network_wrappers[self.network_name].replace_mse_with_huber_loss: 1146 | - self.loss_type = tf.losses.huber_loss 1147 | + self.loss_type = tf.compat.v1.losses.huber_loss 1148 | else: 1149 | - self.loss_type = tf.losses.mean_squared_error 1150 | + self.loss_type = tf.compat.v1.losses.mean_squared_error 1151 | 1152 | self.output_bias_initializer = output_bias_initializer 1153 | 1154 | diff --git a/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py b/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py 1155 | index 4e32e91..9bce7dc 100644 1156 | --- a/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py 1157 | +++ b/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py 1158 | @@ -38,8 +38,8 @@ class QuantileRegressionQHead(QHead): 1159 | self.loss_type = [] 1160 | 1161 | def _build_module(self, input_layer): 1162 | - self.actions = tf.placeholder(tf.int32, [None, 2], name="actions") 1163 | - self.quantile_midpoints = tf.placeholder(tf.float32, [None, self.num_atoms], name="quantile_midpoints") 1164 | + self.actions = tf.compat.v1.placeholder(tf.int32, [None, 2], name="actions") 1165 | + self.quantile_midpoints = tf.compat.v1.placeholder(tf.float32, [None, self.num_atoms], name="quantile_midpoints") 1166 | self.input = [self.actions, self.quantile_midpoints] 1167 | 1168 | # the output of the head is the N unordered quantile locations {theta_1, ..., theta_N} 1169 | @@ -48,7 +48,7 @@ class QuantileRegressionQHead(QHead): 1170 | quantiles_locations = tf.reshape(quantiles_locations, (tf.shape(quantiles_locations)[0], self.num_actions, self.num_atoms)) 1171 | self.output = quantiles_locations 1172 | 1173 | - self.quantiles = tf.placeholder(tf.float32, shape=(None, self.num_atoms), name="quantiles") 1174 | + self.quantiles = tf.compat.v1.placeholder(tf.float32, shape=(None, self.num_atoms), name="quantiles") 1175 | self.target = self.quantiles 1176 | 1177 | # only the quantiles of the taken action are taken into account 1178 | @@ -73,7 +73,7 @@ class QuantileRegressionQHead(QHead): 1179 | # Quantile regression loss (the probability for each quantile is 1/num_quantiles) 1180 | quantile_regression_loss = tf.reduce_sum(quantile_huber_loss) / float(self.num_atoms) 1181 | self.loss = quantile_regression_loss 1182 | - tf.losses.add_loss(self.loss) 1183 | + tf.compat.v1.losses.add_loss(self.loss) 1184 | 1185 | self.q_values = tf.tensordot(tf.cast(self.output, tf.float64), self.quantile_probabilities, 1) 1186 | 1187 | diff --git a/rl_coach/architectures/tensorflow_components/heads/rainbow_q_head.py b/rl_coach/architectures/tensorflow_components/heads/rainbow_q_head.py 1188 | index f7f0ba4..b138fc7 100644 1189 | --- a/rl_coach/architectures/tensorflow_components/heads/rainbow_q_head.py 1190 | +++ b/rl_coach/architectures/tensorflow_components/heads/rainbow_q_head.py 1191 | @@ -37,13 +37,13 @@ class RainbowQHead(QHead): 1192 | 1193 | def _build_module(self, input_layer): 1194 | # state value tower - V 1195 | - with tf.variable_scope("state_value"): 1196 | + with tf.compat.v1.variable_scope("state_value"): 1197 | state_value = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1') 1198 | state_value = self.dense_layer(self.num_atoms)(state_value, name='fc2') 1199 | state_value = tf.expand_dims(state_value, axis=1) 1200 | 1201 | # action advantage tower - A 1202 | - with tf.variable_scope("action_advantage"): 1203 | + with tf.compat.v1.variable_scope("action_advantage"): 1204 | action_advantage = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1') 1205 | action_advantage = self.dense_layer(self.num_actions * self.num_atoms)(action_advantage, name='fc2') 1206 | action_advantage = tf.reshape(action_advantage, (tf.shape(input_layer)[0], self.num_actions, 1207 | @@ -58,11 +58,11 @@ class RainbowQHead(QHead): 1208 | self.output = tf.nn.softmax(values_distribution) 1209 | 1210 | # calculate cross entropy loss 1211 | - self.distributions = tf.placeholder(tf.float32, shape=(None, self.num_actions, self.num_atoms), 1212 | + self.distributions = tf.compat.v1.placeholder(tf.float32, shape=(None, self.num_actions, self.num_atoms), 1213 | name="distributions") 1214 | self.target = self.distributions 1215 | - self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=values_distribution) 1216 | - tf.losses.add_loss(self.loss) 1217 | + self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=tf.stop_gradient(self.target), logits=values_distribution) 1218 | + tf.compat.v1.losses.add_loss(self.loss) 1219 | 1220 | self.q_values = tf.tensordot(tf.cast(self.output, tf.float64), self.z_values, 1) 1221 | 1222 | diff --git a/rl_coach/architectures/tensorflow_components/heads/sac_head.py b/rl_coach/architectures/tensorflow_components/heads/sac_head.py 1223 | index aad9bfc..dfd58c3 100644 1224 | --- a/rl_coach/architectures/tensorflow_components/heads/sac_head.py 1225 | +++ b/rl_coach/architectures/tensorflow_components/heads/sac_head.py 1226 | @@ -15,6 +15,7 @@ 1227 | # 1228 | 1229 | import tensorflow as tf 1230 | +import tensorflow_probability as tfp 1231 | 1232 | from rl_coach.architectures.tensorflow_components.layers import Dense 1233 | from rl_coach.architectures.tensorflow_components.heads.head import Head 1234 | @@ -39,7 +40,7 @@ class SACPolicyHead(Head): 1235 | self.squash = squash # squashing using tanh 1236 | 1237 | def _build_module(self, input_layer): 1238 | - self.given_raw_actions = tf.placeholder(tf.float32, [None, self.num_actions], name="actions") 1239 | + self.given_raw_actions = tf.compat.v1.placeholder(tf.float32, [None, self.num_actions], name="actions") 1240 | self.input = [self.given_raw_actions] 1241 | self.output = [] 1242 | 1243 | @@ -55,7 +56,7 @@ class SACPolicyHead(Head): 1244 | ''' 1245 | if not self.squash: 1246 | return 0 1247 | - return tf.reduce_sum(tf.log(1 - tf.tanh(actions) ** 2 + eps), axis=1) 1248 | + return tf.reduce_sum(tf.math.log(1 - tf.tanh(actions) ** 2 + eps), axis=1) 1249 | 1250 | def _build_continuous_net(self, input_layer, action_space): 1251 | num_actions = action_space.shape[0] 1252 | @@ -70,8 +71,7 @@ class SACPolicyHead(Head): 1253 | 1254 | # define the distributions for the policy 1255 | # Tensorflow's multivariate normal distribution supports reparameterization 1256 | - tfd = tf.contrib.distributions 1257 | - self.policy_distribution = tfd.MultivariateNormalDiag(loc=self.policy_mean, 1258 | + self.policy_distribution = tfp.distributions.MultivariateNormalDiag(loc=self.policy_mean, 1259 | scale_diag=tf.exp(self.policy_log_std)) 1260 | 1261 | # define network outputs 1262 | diff --git a/rl_coach/architectures/tensorflow_components/heads/sac_q_head.py b/rl_coach/architectures/tensorflow_components/heads/sac_q_head.py 1263 | index dbac165..d6b4e9d 100644 1264 | --- a/rl_coach/architectures/tensorflow_components/heads/sac_q_head.py 1265 | +++ b/rl_coach/architectures/tensorflow_components/heads/sac_q_head.py 1266 | @@ -48,15 +48,15 @@ class SACQHead(Head): 1267 | # state is the observation fed through the input_layer, action is fed through placeholder to the header 1268 | # each is calculating q value : q1(s,a) and q2(s,a) 1269 | # the output of the head is min(q1,q2) 1270 | - self.actions = tf.placeholder(tf.float32, [None, self.num_actions], name="actions") 1271 | - self.target = tf.placeholder(tf.float32, [None, 1], name="q_targets") 1272 | + self.actions = tf.compat.v1.placeholder(tf.float32, [None, self.num_actions], name="actions") 1273 | + self.target = tf.compat.v1.placeholder(tf.float32, [None, 1], name="q_targets") 1274 | self.input = [self.actions] 1275 | self.output = [] 1276 | # Note (1) : in the author's implementation of sac (in rllab) they summarize the embedding of observation and 1277 | # action (broadcasting the bias) in the first layer of the network. 1278 | 1279 | # build q1 network head 1280 | - with tf.variable_scope("q1_head"): 1281 | + with tf.compat.v1.variable_scope("q1_head"): 1282 | layer_size = self.network_layers_sizes[0] 1283 | qi_obs_emb = self.dense_layer(layer_size)(input_layer, activation=self.activation_function) 1284 | qi_act_emb = self.dense_layer(layer_size)(self.actions, activation=self.activation_function) 1285 | @@ -68,7 +68,7 @@ class SACQHead(Head): 1286 | bias_initializer=self.output_bias_initializer) 1287 | 1288 | # build q2 network head 1289 | - with tf.variable_scope("q2_head"): 1290 | + with tf.compat.v1.variable_scope("q2_head"): 1291 | layer_size = self.network_layers_sizes[0] 1292 | qi_obs_emb = self.dense_layer(layer_size)(input_layer, activation=self.activation_function) 1293 | qi_act_emb = self.dense_layer(layer_size)(self.actions, activation=self.activation_function) 1294 | @@ -93,7 +93,7 @@ class SACQHead(Head): 1295 | self.q2_loss = 0.5*tf.reduce_mean(tf.square(self.q2_output - self.target)) 1296 | # eventually both losses are depends on different parameters so we can sum them up 1297 | self.loss = self.q1_loss+self.q2_loss 1298 | - tf.losses.add_loss(self.loss) 1299 | + tf.compat.v1.losses.add_loss(self.loss) 1300 | 1301 | def __str__(self): 1302 | result = [ 1303 | diff --git a/rl_coach/architectures/tensorflow_components/heads/td3_v_head.py b/rl_coach/architectures/tensorflow_components/heads/td3_v_head.py 1304 | index 1457e32..8420f56 100644 1305 | --- a/rl_coach/architectures/tensorflow_components/heads/td3_v_head.py 1306 | +++ b/rl_coach/architectures/tensorflow_components/heads/td3_v_head.py 1307 | @@ -40,7 +40,7 @@ class TD3VHead(Head): 1308 | def _build_module(self, input_layer): 1309 | # Standard V Network 1310 | q_outputs = [] 1311 | - self.target = tf.placeholder(tf.float32, shape=(None, 1), name="q_networks_min_placeholder") 1312 | + self.target = tf.compat.v1.placeholder(tf.float32, shape=(None, 1), name="q_networks_min_placeholder") 1313 | 1314 | for i in range(input_layer.shape[0]): # assuming that the actual size is 2, as there are two critic networks 1315 | if self.initializer == 'normalized_columns': 1316 | @@ -57,7 +57,7 @@ class TD3VHead(Head): 1317 | self.output.append(tf.reduce_min(q_outputs, axis=0)) 1318 | self.output.append(tf.reduce_mean(self.output[0])) 1319 | self.loss = sum(self.loss) 1320 | - tf.losses.add_loss(self.loss) 1321 | + tf.compat.v1.losses.add_loss(self.loss) 1322 | 1323 | def __str__(self): 1324 | result = [ 1325 | diff --git a/rl_coach/architectures/tensorflow_components/heads/v_head.py b/rl_coach/architectures/tensorflow_components/heads/v_head.py 1326 | index 16ff185..1f0d00a 100644 1327 | --- a/rl_coach/architectures/tensorflow_components/heads/v_head.py 1328 | +++ b/rl_coach/architectures/tensorflow_components/heads/v_head.py 1329 | @@ -33,9 +33,9 @@ class VHead(Head): 1330 | self.return_type = VStateValue 1331 | 1332 | if agent_parameters.network_wrappers[self.network_name.split('/')[0]].replace_mse_with_huber_loss: 1333 | - self.loss_type = tf.losses.huber_loss 1334 | + self.loss_type = tf.compat.v1.losses.huber_loss 1335 | else: 1336 | - self.loss_type = tf.losses.mean_squared_error 1337 | + self.loss_type = tf.compat.v1.losses.mean_squared_error 1338 | 1339 | self.initializer = initializer 1340 | self.output_bias_initializer = output_bias_initializer 1341 | diff --git a/rl_coach/architectures/tensorflow_components/layers.py b/rl_coach/architectures/tensorflow_components/layers.py 1342 | index 91c0c30..6fb60a8 100644 1343 | --- a/rl_coach/architectures/tensorflow_components/layers.py 1344 | +++ b/rl_coach/architectures/tensorflow_components/layers.py 1345 | @@ -22,7 +22,7 @@ import tensorflow as tf 1346 | from rl_coach.architectures import layers 1347 | from rl_coach.architectures.tensorflow_components import utils 1348 | 1349 | - 1350 | +tf.compat.v1.disable_resource_variables() 1351 | def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dropout_rate, is_training, name): 1352 | layers = [input_layer] 1353 | 1354 | @@ -32,7 +32,7 @@ def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dr 1355 | # batchnorm 1356 | if batchnorm: 1357 | layers.append( 1358 | - tf.layers.batch_normalization(layers[-1], name="{}_batchnorm".format(name), training=is_training) 1359 | + tf.compat.v1.layers.batch_normalization(layers[-1], name="{}_batchnorm".format(name), training=is_training) 1360 | ) 1361 | 1362 | # activation 1363 | @@ -46,7 +46,7 @@ def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dr 1364 | # dropout 1365 | if dropout_rate > 0: 1366 | layers.append( 1367 | - tf.layers.dropout(layers[-1], dropout_rate, name="{}_dropout".format(name), training=is_training) 1368 | + tf.compat.v1.layers.dropout(layers[-1], dropout_rate, name="{}_dropout".format(name), training=is_training) 1369 | ) 1370 | 1371 | # remove the input layer from the layers list 1372 | @@ -116,7 +116,7 @@ class Conv2d(layers.Conv2d): 1373 | :param name: layer name 1374 | :return: conv2d layer 1375 | """ 1376 | - return tf.layers.conv2d(input_layer, filters=self.num_filters, kernel_size=self.kernel_size, 1377 | + return tf.compat.v1.layers.conv2d(input_layer, filters=self.num_filters, kernel_size=self.kernel_size, 1378 | strides=self.strides, data_format='channels_last', name=name) 1379 | 1380 | @staticmethod 1381 | @@ -177,8 +177,8 @@ class Dense(layers.Dense): 1382 | :return: dense layer 1383 | """ 1384 | if bias_initializer is None: 1385 | - bias_initializer = tf.zeros_initializer() 1386 | - return tf.layers.dense(input_layer, self.units, name=name, kernel_initializer=kernel_initializer, 1387 | + bias_initializer = tf.compat.v1.zeros_initializer() 1388 | + return tf.compat.v1.layers.dense(input_layer, self.units, name=name, kernel_initializer=kernel_initializer, 1389 | activation=activation, bias_initializer=bias_initializer) 1390 | 1391 | @staticmethod 1392 | @@ -222,8 +222,8 @@ class NoisyNetDense(layers.NoisyNetDense): 1393 | def _factorized_noise(inputs, outputs): 1394 | # TODO: use factorized noise only for compute intensive algos (e.g. DQN). 1395 | # lighter algos (e.g. DQN) should not use it 1396 | - noise1 = _f(tf.random_normal((inputs, 1))) 1397 | - noise2 = _f(tf.random_normal((1, outputs))) 1398 | + noise1 = _f(tf.random.normal((inputs, 1))) 1399 | + noise2 = _f(tf.random.normal((1, outputs))) 1400 | return tf.matmul(noise1, noise2) 1401 | 1402 | num_inputs = input_layer.get_shape()[-1].value 1403 | @@ -233,22 +233,22 @@ class NoisyNetDense(layers.NoisyNetDense): 1404 | activation = activation if activation is not None else (lambda x: x) 1405 | 1406 | if kernel_initializer is None: 1407 | - kernel_mean_initializer = tf.random_uniform_initializer(-stddev, stddev) 1408 | - kernel_stddev_initializer = tf.random_uniform_initializer(-stddev * self.sigma0, stddev * self.sigma0) 1409 | + kernel_mean_initializer = tf.compat.v1.random_uniform_initializer(-stddev, stddev) 1410 | + kernel_stddev_initializer = tf.compat.v1.random_uniform_initializer(-stddev * self.sigma0, stddev * self.sigma0) 1411 | else: 1412 | kernel_mean_initializer = kernel_stddev_initializer = kernel_initializer 1413 | if bias_initializer is None: 1414 | - bias_initializer = tf.zeros_initializer() 1415 | - with tf.variable_scope(None, default_name=name): 1416 | - weight_mean = tf.get_variable('weight_mean', shape=(num_inputs, num_outputs), 1417 | + bias_initializer = tf.compat.v1.zeros_initializer() 1418 | + with tf.compat.v1.variable_scope(None, default_name=name): 1419 | + weight_mean = tf.compat.v1.get_variable('weight_mean', shape=(num_inputs, num_outputs), 1420 | initializer=kernel_mean_initializer) 1421 | - bias_mean = tf.get_variable('bias_mean', shape=(num_outputs,), initializer=bias_initializer) 1422 | + bias_mean = tf.compat.v1.get_variable('bias_mean', shape=(num_outputs,), initializer=bias_initializer) 1423 | 1424 | - weight_stddev = tf.get_variable('weight_stddev', shape=(num_inputs, num_outputs), 1425 | + weight_stddev = tf.compat.v1.get_variable('weight_stddev', shape=(num_inputs, num_outputs), 1426 | initializer=kernel_stddev_initializer) 1427 | - bias_stddev = tf.get_variable('bias_stddev', shape=(num_outputs,), 1428 | + bias_stddev = tf.compat.v1.get_variable('bias_stddev', shape=(num_outputs,), 1429 | initializer=kernel_stddev_initializer) 1430 | - bias_noise = _f(tf.random_normal((num_outputs,))) 1431 | + bias_noise = _f(tf.random.normal((num_outputs,))) 1432 | weight_noise = _factorized_noise(num_inputs, num_outputs) 1433 | 1434 | bias = bias_mean + bias_stddev * bias_noise 1435 | diff --git a/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py b/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py 1436 | index 6ca9cd7..f4005d9 100644 1437 | --- a/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py 1438 | +++ b/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py 1439 | @@ -57,17 +57,17 @@ class LSTMMiddleware(Middleware): 1440 | )) 1441 | 1442 | # add the LSTM layer 1443 | - lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.number_of_lstm_cells, state_is_tuple=True) 1444 | + lstm_cell = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(self.number_of_lstm_cells, state_is_tuple=True) 1445 | self.c_init = np.zeros((1, lstm_cell.state_size.c), np.float32) 1446 | self.h_init = np.zeros((1, lstm_cell.state_size.h), np.float32) 1447 | self.state_init = [self.c_init, self.h_init] 1448 | - self.c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c]) 1449 | - self.h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h]) 1450 | + self.c_in = tf.compat.v1.placeholder(tf.float32, [1, lstm_cell.state_size.c]) 1451 | + self.h_in = tf.compat.v1.placeholder(tf.float32, [1, lstm_cell.state_size.h]) 1452 | self.state_in = (self.c_in, self.h_in) 1453 | rnn_in = tf.expand_dims(self.layers[-1], [0]) 1454 | step_size = tf.shape(self.layers[-1])[:1] 1455 | - state_in = tf.nn.rnn_cell.LSTMStateTuple(self.c_in, self.h_in) 1456 | - lstm_outputs, lstm_state = tf.nn.dynamic_rnn( 1457 | + state_in = tf.compat.v1.nn.rnn_cell.LSTMStateTuple(self.c_in, self.h_in) 1458 | + lstm_outputs, lstm_state = tf.compat.v1.nn.dynamic_rnn( 1459 | lstm_cell, rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False) 1460 | lstm_c, lstm_h = lstm_state 1461 | self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) 1462 | diff --git a/rl_coach/architectures/tensorflow_components/middlewares/middleware.py b/rl_coach/architectures/tensorflow_components/middlewares/middleware.py 1463 | index 64c578f..b52d262 100644 1464 | --- a/rl_coach/architectures/tensorflow_components/middlewares/middleware.py 1465 | +++ b/rl_coach/architectures/tensorflow_components/middlewares/middleware.py 1466 | @@ -71,7 +71,7 @@ class Middleware(object): 1467 | :param input_layer: the input to the graph 1468 | :return: the input placeholder and the output of the last layer 1469 | """ 1470 | - with tf.variable_scope(self.get_name()): 1471 | + with tf.compat.v1.variable_scope(self.get_name()): 1472 | self.input = input_layer 1473 | self._build_module() 1474 | 1475 | diff --git a/rl_coach/architectures/tensorflow_components/savers.py b/rl_coach/architectures/tensorflow_components/savers.py 1476 | index 531c523..ae92826 100644 1477 | --- a/rl_coach/architectures/tensorflow_components/savers.py 1478 | +++ b/rl_coach/architectures/tensorflow_components/savers.py 1479 | @@ -28,22 +28,22 @@ class GlobalVariableSaver(Saver): 1480 | self._names = [name] 1481 | # if graph is finalized, savers must have already already been added. This happens 1482 | # in the case of a MonitoredSession 1483 | - self._variables = tf.global_variables() 1484 | + self._variables = tf.compat.v1.trainable_variables() 1485 | 1486 | # target network is never saved or restored directly from checkpoint, so we are removing all its variables from the list 1487 | # the target network would be synched back from the online network in graph_manager.improve(...), at the beginning of the run flow. 1488 | - self._variables = [v for v in self._variables if "/target" not in v.name] 1489 | + self._variables = [v for v in self._variables if ('/target' not in v.name and name.split('/')[0]+'/' in v.name)] 1490 | 1491 | # Using a placeholder to update the variable during restore to avoid memory leak. 1492 | # Ref: https://github.com/tensorflow/tensorflow/issues/4151 1493 | self._variable_placeholders = [] 1494 | self._variable_update_ops = [] 1495 | for v in self._variables: 1496 | - variable_placeholder = tf.placeholder(v.dtype, shape=v.get_shape()) 1497 | + variable_placeholder = tf.compat.v1.placeholder(v.dtype, shape=v.get_shape()) 1498 | self._variable_placeholders.append(variable_placeholder) 1499 | self._variable_update_ops.append(v.assign(variable_placeholder)) 1500 | 1501 | - self._saver = tf.train.Saver(self._variables, max_to_keep=None) 1502 | + self._saver = tf.compat.v1.train.Saver(self._variables, max_to_keep=None) 1503 | 1504 | @property 1505 | def path(self): 1506 | @@ -118,7 +118,7 @@ class GlobalVariableSaver(Saver): 1507 | # We don't use saver.restore() because checkpoint is loaded to online 1508 | # network, but if the checkpoint is from the global network, a namespace 1509 | # mismatch exists and variable name must be modified before loading. 1510 | - reader = tf.contrib.framework.load_checkpoint(restore_path) 1511 | + reader = tf.train.load_checkpoint(restore_path) 1512 | for var_name, _ in reader.get_variable_to_shape_map().items(): 1513 | yield var_name, reader.get_tensor(var_name) 1514 | 1515 | diff --git a/rl_coach/architectures/tensorflow_components/shared_variables.py b/rl_coach/architectures/tensorflow_components/shared_variables.py 1516 | index fe805af..5278b70 100644 1517 | --- a/rl_coach/architectures/tensorflow_components/shared_variables.py 1518 | +++ b/rl_coach/architectures/tensorflow_components/shared_variables.py 1519 | @@ -21,7 +21,7 @@ import tensorflow as tf 1520 | 1521 | from rl_coach.utilities.shared_running_stats import SharedRunningStats 1522 | 1523 | - 1524 | +tf.compat.v1.disable_resource_variables() 1525 | class TFSharedRunningStats(SharedRunningStats): 1526 | def __init__(self, replicated_device=None, epsilon=1e-2, name="", create_ops=True, pubsub_params=None): 1527 | super().__init__(name=name, pubsub_params=pubsub_params) 1528 | @@ -42,39 +42,39 @@ class TFSharedRunningStats(SharedRunningStats): 1529 | """ 1530 | 1531 | self.clip_values = clip_values 1532 | - with tf.variable_scope(self.name): 1533 | - self._sum = tf.get_variable( 1534 | + with tf.compat.v1.variable_scope(self.name): 1535 | + self._sum = tf.compat.v1.get_variable( 1536 | dtype=tf.float64, 1537 | - initializer=tf.constant_initializer(0.0), 1538 | + initializer=tf.compat.v1.constant_initializer(0.0), 1539 | name="running_sum", trainable=False, shape=shape, validate_shape=False, 1540 | - collections=[tf.GraphKeys.GLOBAL_VARIABLES]) 1541 | - self._sum_squares = tf.get_variable( 1542 | + collections=[tf.compat.v1.GraphKeys.GLOBAL_VARIABLES]) 1543 | + self._sum_squares = tf.compat.v1.get_variable( 1544 | dtype=tf.float64, 1545 | - initializer=tf.constant_initializer(self.epsilon), 1546 | + initializer=tf.compat.v1.constant_initializer(self.epsilon), 1547 | name="running_sum_squares", trainable=False, shape=shape, validate_shape=False, 1548 | - collections=[tf.GraphKeys.GLOBAL_VARIABLES]) 1549 | - self._count = tf.get_variable( 1550 | + collections=[tf.compat.v1.GraphKeys.GLOBAL_VARIABLES]) 1551 | + self._count = tf.compat.v1.get_variable( 1552 | dtype=tf.float64, 1553 | shape=(), 1554 | - initializer=tf.constant_initializer(self.epsilon), 1555 | - name="count", trainable=False, collections=[tf.GraphKeys.GLOBAL_VARIABLES]) 1556 | + initializer=tf.compat.v1.constant_initializer(self.epsilon), 1557 | + name="count", trainable=False, collections=[tf.compat.v1.GraphKeys.GLOBAL_VARIABLES]) 1558 | 1559 | self._shape = None 1560 | - self._mean = tf.div(self._sum, self._count, name="mean") 1561 | + self._mean = tf.compat.v1.div(self._sum, self._count, name="mean") 1562 | self._std = tf.sqrt(tf.maximum((self._sum_squares - self._count * tf.square(self._mean)) 1563 | / tf.maximum(self._count-1, 1), self.epsilon), name="stdev") 1564 | self.tf_mean = tf.cast(self._mean, 'float32') 1565 | self.tf_std = tf.cast(self._std, 'float32') 1566 | 1567 | - self.new_sum = tf.placeholder(dtype=tf.float64, name='sum') 1568 | - self.new_sum_squares = tf.placeholder(dtype=tf.float64, name='var') 1569 | - self.newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count') 1570 | + self.new_sum = tf.compat.v1.placeholder(dtype=tf.float64, name='sum') 1571 | + self.new_sum_squares = tf.compat.v1.placeholder(dtype=tf.float64, name='var') 1572 | + self.newcount = tf.compat.v1.placeholder(shape=[], dtype=tf.float64, name='count') 1573 | 1574 | - self._inc_sum = tf.assign_add(self._sum, self.new_sum, use_locking=True) 1575 | - self._inc_sum_squares = tf.assign_add(self._sum_squares, self.new_sum_squares, use_locking=True) 1576 | - self._inc_count = tf.assign_add(self._count, self.newcount, use_locking=True) 1577 | + self._inc_sum = tf.compat.v1.assign_add(self._sum, self.new_sum, use_locking=True) 1578 | + self._inc_sum_squares = tf.compat.v1.assign_add(self._sum_squares, self.new_sum_squares, use_locking=True) 1579 | + self._inc_count = tf.compat.v1.assign_add(self._count, self.newcount, use_locking=True) 1580 | 1581 | - self.raw_obs = tf.placeholder(dtype=tf.float64, name='raw_obs') 1582 | + self.raw_obs = tf.compat.v1.placeholder(dtype=tf.float64, name='raw_obs') 1583 | self.normalized_obs = (self.raw_obs - self._mean) / self._std 1584 | if self.clip_values is not None: 1585 | self.clipped_obs = tf.clip_by_value(self.normalized_obs, self.clip_values[0], self.clip_values[1]) 1586 | diff --git a/rl_coach/core_types.py b/rl_coach/core_types.py 1587 | index c173318..58fd0bc 100644 1588 | --- a/rl_coach/core_types.py 1589 | +++ b/rl_coach/core_types.py 1590 | @@ -182,6 +182,7 @@ class RunPhase(Enum): 1591 | TRAIN = "Training" 1592 | TEST = "Testing" 1593 | UNDEFINED = "Undefined" 1594 | + WAITING = "Waiting" 1595 | 1596 | 1597 | # transitions 1598 | diff --git a/rl_coach/data_stores/s3_data_store.py b/rl_coach/data_stores/s3_data_store.py 1599 | index 959422a..42737dd 100644 1600 | --- a/rl_coach/data_stores/s3_data_store.py 1601 | +++ b/rl_coach/data_stores/s3_data_store.py 1602 | @@ -17,7 +17,7 @@ 1603 | 1604 | from rl_coach.data_stores.data_store import DataStore, DataStoreParameters 1605 | from minio import Minio 1606 | -from minio.error import ResponseError 1607 | +from minio.error import InvalidResponseError 1608 | from configparser import ConfigParser, Error 1609 | from rl_coach.checkpoint import CheckpointStateFile 1610 | from rl_coach.data_stores.data_store import SyncFiles 1611 | @@ -133,7 +133,7 @@ class S3DataStore(DataStore): 1612 | for filename in os.listdir(os.path.join(self.params.expt_dir, 'gifs')): 1613 | self.mc.fput_object(self.params.bucket_name, filename, os.path.join(self.params.expt_dir, 'gifs', filename)) 1614 | 1615 | - except ResponseError as e: 1616 | + except InvalidResponseError as e: 1617 | print("Got exception: %s\n while saving to S3", e) 1618 | 1619 | def load_from_store(self): 1620 | @@ -189,7 +189,7 @@ class S3DataStore(DataStore): 1621 | if not os.path.exists(filename): 1622 | self.mc.fget_object(obj.bucket_name, obj.object_name, filename) 1623 | 1624 | - except ResponseError as e: 1625 | + except InvalidResponseError as e: 1626 | print("Got exception: %s\n while loading from S3", e) 1627 | 1628 | def setup_checkpoint_dir(self, crd=None): 1629 | diff --git a/rl_coach/filters/observation/observation_normalization_filter.py b/rl_coach/filters/observation/observation_normalization_filter.py 1630 | index 791b345..db9e104 100644 1631 | --- a/rl_coach/filters/observation/observation_normalization_filter.py 1632 | +++ b/rl_coach/filters/observation/observation_normalization_filter.py 1633 | @@ -87,4 +87,3 @@ class ObservationNormalizationFilter(ObservationFilter): 1634 | 1635 | def restore_state_from_checkpoint(self, checkpoint_dir: str, checkpoint_prefix: str): 1636 | self.running_observation_stats.restore_state_from_checkpoint(checkpoint_dir, checkpoint_prefix) 1637 | - 1638 | \ No newline at end of file 1639 | diff --git a/rl_coach/graph_managers/graph_manager.py b/rl_coach/graph_managers/graph_manager.py 1640 | index 60afcee..10ae965 100644 1641 | --- a/rl_coach/graph_managers/graph_manager.py 1642 | +++ b/rl_coach/graph_managers/graph_manager.py 1643 | @@ -173,7 +173,7 @@ class GraphManager(object): 1644 | @staticmethod 1645 | def _create_worker_or_parameters_server_tf(task_parameters: DistributedTaskParameters): 1646 | import tensorflow as tf 1647 | - config = tf.ConfigProto() 1648 | + config = tf.compat.v1.ConfigProto() 1649 | config.allow_soft_placement = True # allow placing ops on cpu if they are not fit for gpu 1650 | config.gpu_options.allow_growth = True # allow the gpu memory allocated for the worker to grow if needed 1651 | config.gpu_options.per_process_gpu_memory_fraction = 0.2 1652 | @@ -212,7 +212,7 @@ class GraphManager(object): 1653 | 1654 | def _create_session_tf(self, task_parameters: TaskParameters): 1655 | import tensorflow as tf 1656 | - config = tf.ConfigProto() 1657 | + config = tf.compat.v1.ConfigProto() 1658 | config.allow_soft_placement = True # allow placing ops on cpu if they are not fit for gpu 1659 | config.gpu_options.allow_growth = True # allow the gpu memory allocated for the worker to grow if needed 1660 | # config.gpu_options.per_process_gpu_memory_fraction = 0.2 1661 | @@ -241,7 +241,7 @@ class GraphManager(object): 1662 | self.set_session(self.sess) 1663 | else: 1664 | # regular session 1665 | - self.sess = tf.Session(config=config) 1666 | + self.sess = tf.compat.v1.Session(config=config) 1667 | # set the session for all the modules 1668 | self.set_session(self.sess) 1669 | 1670 | @@ -278,7 +278,7 @@ class GraphManager(object): 1671 | import tensorflow as tf 1672 | 1673 | # write graph 1674 | - tf.train.write_graph(tf.get_default_graph(), 1675 | + tf.io.write_graph(tf.compat.v1.get_default_graph(), 1676 | logdir=self.task_parameters.checkpoint_save_dir, 1677 | name='graphdef.pb', 1678 | as_text=False) 1679 | diff --git a/rl_coach/presets/Acrobot_DDQN_BCQ_BatchRL.py b/rl_coach/presets/Acrobot_DDQN_BCQ_BatchRL.py 1680 | index cda8a45..43bd5a9 100644 1681 | --- a/rl_coach/presets/Acrobot_DDQN_BCQ_BatchRL.py 1682 | +++ b/rl_coach/presets/Acrobot_DDQN_BCQ_BatchRL.py 1683 | @@ -34,7 +34,7 @@ schedule_params.heatup_steps = EnvironmentSteps(DATASET_SIZE) 1684 | agent_params = DDQNBCQAgentParameters() 1685 | agent_params.network_wrappers['main'].batch_size = 128 1686 | # TODO cross-DL framework abstraction for a constant initializer? 1687 | -agent_params.network_wrappers['main'].heads_parameters = [QHeadParameters(output_bias_initializer=tf.constant_initializer(-100))] 1688 | +agent_params.network_wrappers['main'].heads_parameters = [QHeadParameters(output_bias_initializer=tf.compat.v1.constant_initializer(-100))] 1689 | 1690 | agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps( 1691 | 100) 1692 | @@ -77,7 +77,7 @@ experience_generating_agent_params.network_wrappers['main'].learning_rate = 0.00 1693 | experience_generating_agent_params.network_wrappers['main'].batch_size = 128 1694 | experience_generating_agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False 1695 | experience_generating_agent_params.network_wrappers['main'].heads_parameters = \ 1696 | -[QHeadParameters(output_bias_initializer=tf.constant_initializer(-100))] 1697 | +[QHeadParameters(output_bias_initializer=tf.compat.v1.constant_initializer(-100))] 1698 | 1699 | # ER size 1700 | experience_generating_agent_params.memory = EpisodicExperienceReplayParameters() 1701 | diff --git a/rl_coach/tests/agents/test_agent_external_communication.py b/rl_coach/tests/agents/test_agent_external_communication.py 1702 | index 77f0a89..aa6a78b 100644 1703 | --- a/rl_coach/tests/agents/test_agent_external_communication.py 1704 | +++ b/rl_coach/tests/agents/test_agent_external_communication.py 1705 | @@ -12,7 +12,7 @@ logging.set_verbosity(logging.INFO) 1706 | 1707 | @pytest.mark.unit_test 1708 | def test_get_QActionStateValue_predictions(): 1709 | - tf.reset_default_graph() 1710 | + tf.compat.v1.reset_default_graph() 1711 | from rl_coach.presets.CartPole_DQN import graph_manager as cartpole_dqn_graph_manager 1712 | assert cartpole_dqn_graph_manager 1713 | cartpole_dqn_graph_manager.create_graph(task_parameters= 1714 | diff --git a/rl_coach/tests/architectures/tensorflow_components/embedders/test_identity_embedder.py b/rl_coach/tests/architectures/tensorflow_components/embedders/test_identity_embedder.py 1715 | index 23ca834..337e549 100644 1716 | --- a/rl_coach/tests/architectures/tensorflow_components/embedders/test_identity_embedder.py 1717 | +++ b/rl_coach/tests/architectures/tensorflow_components/embedders/test_identity_embedder.py 1718 | @@ -15,7 +15,7 @@ logging.set_verbosity(logging.INFO) 1719 | 1720 | @pytest.fixture 1721 | def reset(): 1722 | - tf.reset_default_graph() 1723 | + tf.compat.v1.reset_default_graph() 1724 | 1725 | 1726 | @pytest.mark.unit_test 1727 | @@ -23,17 +23,17 @@ def test_embedder(reset): 1728 | embedder = VectorEmbedder(np.array([10, 10]), name="test", scheme=EmbedderScheme.Empty) 1729 | 1730 | # make sure the ops where not created yet 1731 | - assert len(tf.get_default_graph().get_operations()) == 0 1732 | + assert len(tf.compat.v1.get_default_graph().get_operations()) == 0 1733 | 1734 | # call the embedder 1735 | input_ph, output_ph = embedder() 1736 | 1737 | # make sure that now the ops were created 1738 | - assert len(tf.get_default_graph().get_operations()) > 0 1739 | + assert len(tf.compat.v1.get_default_graph().get_operations()) > 0 1740 | 1741 | # try feeding a batch of one example # TODO: consider auto converting to batch 1742 | input = np.random.rand(1, 10, 10) 1743 | - sess = tf.Session() 1744 | + sess = tf.compat.v1.Session() 1745 | output = sess.run(embedder.output, {embedder.input: input}) 1746 | assert output.shape == (1, 100) # should have flattened the input 1747 | 1748 | diff --git a/rl_coach/tests/architectures/tensorflow_components/embedders/test_image_embedder.py b/rl_coach/tests/architectures/tensorflow_components/embedders/test_image_embedder.py 1749 | index 65076d1..efc7584 100644 1750 | --- a/rl_coach/tests/architectures/tensorflow_components/embedders/test_image_embedder.py 1751 | +++ b/rl_coach/tests/architectures/tensorflow_components/embedders/test_image_embedder.py 1752 | @@ -12,7 +12,7 @@ logging.set_verbosity(logging.INFO) 1753 | 1754 | @pytest.fixture 1755 | def reset(): 1756 | - tf.reset_default_graph() 1757 | + tf.compat.v1.reset_default_graph() 1758 | 1759 | 1760 | @pytest.mark.unit_test 1761 | @@ -26,24 +26,24 @@ def test_embedder(reset): 1762 | embedder = ImageEmbedder(np.array([10, 100, 100, 100]), name="test") 1763 | 1764 | 1765 | - is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) 1766 | - pre_ops = len(tf.get_default_graph().get_operations()) 1767 | + is_training = tf.Variable(False, trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES]) 1768 | + pre_ops = len(tf.compat.v1.get_default_graph().get_operations()) 1769 | # creating a simple image embedder 1770 | embedder = ImageEmbedder(np.array([100, 100, 10]), name="test", is_training=is_training) 1771 | 1772 | # make sure the only the is_training op is creates 1773 | - assert len(tf.get_default_graph().get_operations()) == pre_ops 1774 | + assert len(tf.compat.v1.get_default_graph().get_operations()) == pre_ops 1775 | 1776 | # call the embedder 1777 | input_ph, output_ph = embedder() 1778 | 1779 | # make sure that now the ops were created 1780 | - assert len(tf.get_default_graph().get_operations()) > pre_ops 1781 | + assert len(tf.compat.v1.get_default_graph().get_operations()) > pre_ops 1782 | 1783 | # try feeding a batch of one example 1784 | input = np.random.rand(1, 100, 100, 10) 1785 | - sess = tf.Session() 1786 | - sess.run(tf.global_variables_initializer()) 1787 | + sess = tf.compat.v1.Session() 1788 | + sess.run(tf.compat.v1.global_variables_initializer()) 1789 | output = sess.run(embedder.output, {embedder.input: input}) 1790 | assert output.shape == (1, 5184) 1791 | 1792 | @@ -58,7 +58,7 @@ def test_embedder(reset): 1793 | @pytest.mark.unit_test 1794 | def test_complex_embedder(reset): 1795 | # creating a deep vector embedder 1796 | - is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) 1797 | + is_training = tf.Variable(False, trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES]) 1798 | embedder = ImageEmbedder(np.array([100, 100, 10]), name="test", scheme=EmbedderScheme.Deep, 1799 | is_training=is_training) 1800 | 1801 | @@ -67,8 +67,8 @@ def test_complex_embedder(reset): 1802 | 1803 | # try feeding a batch of one example 1804 | input = np.random.rand(1, 100, 100, 10) 1805 | - sess = tf.Session() 1806 | - sess.run(tf.global_variables_initializer()) 1807 | + sess = tf.compat.v1.Session() 1808 | + sess.run(tf.compat.v1.global_variables_initializer()) 1809 | output = sess.run(embedder.output, {embedder.input: input}) 1810 | assert output.shape == (1, 256) # should have flattened the input 1811 | 1812 | @@ -76,7 +76,7 @@ def test_complex_embedder(reset): 1813 | @pytest.mark.unit_test 1814 | def test_activation_function(reset): 1815 | # creating a deep image embedder with relu 1816 | - is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) 1817 | + is_training = tf.Variable(False, trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES]) 1818 | embedder = ImageEmbedder(np.array([100, 100, 10]), name="relu", scheme=EmbedderScheme.Deep, 1819 | activation_function=tf.nn.relu, is_training=is_training) 1820 | 1821 | @@ -85,8 +85,8 @@ def test_activation_function(reset): 1822 | 1823 | # try feeding a batch of one example 1824 | input = np.random.rand(1, 100, 100, 10) 1825 | - sess = tf.Session() 1826 | - sess.run(tf.global_variables_initializer()) 1827 | + sess = tf.compat.v1.Session() 1828 | + sess.run(tf.compat.v1.global_variables_initializer()) 1829 | output = sess.run(embedder.output, {embedder.input: input}) 1830 | assert np.all(output >= 0) # should have flattened the input 1831 | 1832 | @@ -99,7 +99,7 @@ def test_activation_function(reset): 1833 | 1834 | # try feeding a batch of one example 1835 | input = np.random.rand(1, 100, 100, 10) 1836 | - sess = tf.Session() 1837 | - sess.run(tf.global_variables_initializer()) 1838 | + sess = tf.compat.v1.Session() 1839 | + sess.run(tf.compat.v1.global_variables_initializer()) 1840 | output = sess.run(embedder_tanh.output, {embedder_tanh.input: input}) 1841 | assert np.all(output >= -1) and np.all(output <= 1) 1842 | diff --git a/rl_coach/tests/architectures/tensorflow_components/embedders/test_vector_embedder.py b/rl_coach/tests/architectures/tensorflow_components/embedders/test_vector_embedder.py 1843 | index 73482f9..400a738 100644 1844 | --- a/rl_coach/tests/architectures/tensorflow_components/embedders/test_vector_embedder.py 1845 | +++ b/rl_coach/tests/architectures/tensorflow_components/embedders/test_vector_embedder.py 1846 | @@ -12,7 +12,7 @@ logging.set_verbosity(logging.INFO) 1847 | 1848 | @pytest.fixture 1849 | def reset(): 1850 | - tf.reset_default_graph() 1851 | + tf.compat.v1.reset_default_graph() 1852 | 1853 | 1854 | @pytest.mark.unit_test 1855 | @@ -22,24 +22,24 @@ def test_embedder(reset): 1856 | embedder = VectorEmbedder(np.array([10, 10]), name="test") 1857 | 1858 | # creating a simple vector embedder 1859 | - is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) 1860 | - pre_ops = len(tf.get_default_graph().get_operations()) 1861 | + is_training = tf.Variable(False, trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES]) 1862 | + pre_ops = len(tf.compat.v1.get_default_graph().get_operations()) 1863 | 1864 | embedder = VectorEmbedder(np.array([10]), name="test", is_training=is_training) 1865 | 1866 | # make sure the ops where not created yet 1867 | - assert len(tf.get_default_graph().get_operations()) == pre_ops 1868 | + assert len(tf.compat.v1.get_default_graph().get_operations()) == pre_ops 1869 | 1870 | # call the embedder 1871 | input_ph, output_ph = embedder() 1872 | 1873 | # make sure that now the ops were created 1874 | - assert len(tf.get_default_graph().get_operations()) > pre_ops 1875 | + assert len(tf.compat.v1.get_default_graph().get_operations()) > pre_ops 1876 | 1877 | # try feeding a batch of one example 1878 | input = np.random.rand(1, 10) 1879 | - sess = tf.Session() 1880 | - sess.run(tf.global_variables_initializer()) 1881 | + sess = tf.compat.v1.Session() 1882 | + sess.run(tf.compat.v1.global_variables_initializer()) 1883 | output = sess.run(embedder.output, {embedder.input: input}) 1884 | assert output.shape == (1, 256) 1885 | 1886 | @@ -54,7 +54,7 @@ def test_embedder(reset): 1887 | @pytest.mark.unit_test 1888 | def test_complex_embedder(reset): 1889 | # creating a deep vector embedder 1890 | - is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) 1891 | + is_training = tf.Variable(False, trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES]) 1892 | embedder = VectorEmbedder(np.array([10]), name="test", scheme=EmbedderScheme.Deep, is_training=is_training) 1893 | 1894 | # call the embedder 1895 | @@ -62,8 +62,8 @@ def test_complex_embedder(reset): 1896 | 1897 | # try feeding a batch of one example 1898 | input = np.random.rand(1, 10) 1899 | - sess = tf.Session() 1900 | - sess.run(tf.global_variables_initializer()) 1901 | + sess = tf.compat.v1.Session() 1902 | + sess.run(tf.compat.v1.global_variables_initializer()) 1903 | output = sess.run(embedder.output, {embedder.input: input}) 1904 | assert output.shape == (1, 128) # should have flattened the input 1905 | 1906 | @@ -71,7 +71,7 @@ def test_complex_embedder(reset): 1907 | @pytest.mark.unit_test 1908 | def test_activation_function(reset): 1909 | # creating a deep vector embedder with relu 1910 | - is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) 1911 | + is_training = tf.Variable(False, trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES]) 1912 | embedder = VectorEmbedder(np.array([10]), name="relu", scheme=EmbedderScheme.Deep, 1913 | activation_function=tf.nn.relu, is_training=is_training) 1914 | 1915 | @@ -80,8 +80,8 @@ def test_activation_function(reset): 1916 | 1917 | # try feeding a batch of one example 1918 | input = np.random.rand(1, 10) 1919 | - sess = tf.Session() 1920 | - sess.run(tf.global_variables_initializer()) 1921 | + sess = tf.compat.v1.Session() 1922 | + sess.run(tf.compat.v1.global_variables_initializer()) 1923 | output = sess.run(embedder.output, {embedder.input: input}) 1924 | assert np.all(output >= 0) # should have flattened the input 1925 | 1926 | @@ -94,7 +94,7 @@ def test_activation_function(reset): 1927 | 1928 | # try feeding a batch of one example 1929 | input = np.random.rand(1, 10) 1930 | - sess = tf.Session() 1931 | - sess.run(tf.global_variables_initializer()) 1932 | + sess = tf.compat.v1.Session() 1933 | + sess.run(tf.compat.v1.global_variables_initializer()) 1934 | output = sess.run(embedder_tanh.output, {embedder_tanh.input: input}) 1935 | assert np.all(output >= -1) and np.all(output <= 1) 1936 | diff --git a/rl_coach/tests/graph_managers/test_basic_rl_graph_manager.py b/rl_coach/tests/graph_managers/test_basic_rl_graph_manager.py 1937 | index 4e30312..c10a99f 100644 1938 | --- a/rl_coach/tests/graph_managers/test_basic_rl_graph_manager.py 1939 | +++ b/rl_coach/tests/graph_managers/test_basic_rl_graph_manager.py 1940 | @@ -14,7 +14,7 @@ logging.set_verbosity(logging.INFO) 1941 | 1942 | @pytest.mark.unit_test 1943 | def test_basic_rl_graph_manager_with_pong_a3c(): 1944 | - tf.reset_default_graph() 1945 | + tf.compat.v1.reset_default_graph() 1946 | from rl_coach.presets.Atari_A3C import graph_manager 1947 | assert graph_manager 1948 | graph_manager.env_params.level = "PongDeterministic-v4" 1949 | @@ -25,7 +25,7 @@ def test_basic_rl_graph_manager_with_pong_a3c(): 1950 | 1951 | @pytest.mark.unit_test 1952 | def test_basic_rl_graph_manager_with_pong_nec(): 1953 | - tf.reset_default_graph() 1954 | + tf.compat.v1.reset_default_graph() 1955 | from rl_coach.presets.Atari_NEC import graph_manager 1956 | assert graph_manager 1957 | graph_manager.env_params.level = "PongDeterministic-v4" 1958 | @@ -36,7 +36,7 @@ def test_basic_rl_graph_manager_with_pong_nec(): 1959 | 1960 | @pytest.mark.unit_test 1961 | def test_basic_rl_graph_manager_with_cartpole_dqn(): 1962 | - tf.reset_default_graph() 1963 | + tf.compat.v1.reset_default_graph() 1964 | from rl_coach.presets.CartPole_DQN import graph_manager 1965 | assert graph_manager 1966 | graph_manager.create_graph(task_parameters=TaskParameters(framework_type=Frameworks.tensorflow, 1967 | @@ -46,7 +46,7 @@ def test_basic_rl_graph_manager_with_cartpole_dqn(): 1968 | # Test for identifying memory leak in restore_checkpoint 1969 | @pytest.mark.unit_test 1970 | def test_basic_rl_graph_manager_with_cartpole_dqn_and_repeated_checkpoint_restore(): 1971 | - tf.reset_default_graph() 1972 | + tf.compat.v1.reset_default_graph() 1973 | from rl_coach.presets.CartPole_DQN import graph_manager 1974 | assert graph_manager 1975 | graph_manager.create_graph(task_parameters=TaskParameters(framework_type=Frameworks.tensorflow, 1976 | diff --git a/rl_coach/tests/memories/test_differential_neural_dictionary.py b/rl_coach/tests/memories/test_differential_neural_dictionary.py 1977 | index 461b4e5..eb1a0be 100644 1978 | --- a/rl_coach/tests/memories/test_differential_neural_dictionary.py 1979 | +++ b/rl_coach/tests/memories/test_differential_neural_dictionary.py 1980 | @@ -43,8 +43,8 @@ def test_random_sample_from_dnd(dnd: QDND): 1981 | # calculate_normalization_factor 1982 | sampled_embeddings = dnd.sample_embeddings(NUM_SAMPLED_EMBEDDINGS) 1983 | coefficient = 1/(NUM_SAMPLED_EMBEDDINGS * (NUM_SAMPLED_EMBEDDINGS - 1.0)) 1984 | - tf_current_embedding = tf.placeholder(tf.float32, shape=(EMBEDDING_SIZE), name='current_embedding') 1985 | - tf_other_embeddings = tf.placeholder(tf.float32, shape=(NUM_SAMPLED_EMBEDDINGS - 1, EMBEDDING_SIZE), name='other_embeddings') 1986 | + tf_current_embedding = tf.compat.v1.placeholder(tf.float32, shape=(EMBEDDING_SIZE), name='current_embedding') 1987 | + tf_other_embeddings = tf.compat.v1.placeholder(tf.float32, shape=(NUM_SAMPLED_EMBEDDINGS - 1, EMBEDDING_SIZE), name='other_embeddings') 1988 | 1989 | sub = tf_current_embedding - tf_other_embeddings 1990 | square = tf.square(sub) 1991 | @@ -55,7 +55,7 @@ def test_random_sample_from_dnd(dnd: QDND): 1992 | ########################### 1993 | # more efficient method 1994 | ########################### 1995 | - sampled_embeddings_expanded = tf.placeholder( 1996 | + sampled_embeddings_expanded = tf.compat.v1.placeholder( 1997 | tf.float32, shape=(1, NUM_SAMPLED_EMBEDDINGS, EMBEDDING_SIZE), name='sampled_embeddings_expanded') 1998 | sampled_embeddings_tiled = tf.tile(sampled_embeddings_expanded, (sampled_embeddings_expanded.shape[1], 1, 1)) 1999 | sampled_embeddings_transposed = tf.transpose(sampled_embeddings_tiled, (1, 0, 2)) 2000 | @@ -63,11 +63,11 @@ def test_random_sample_from_dnd(dnd: QDND): 2001 | square2 = tf.square(sub2) 2002 | result2 = tf.reduce_sum(square2) 2003 | 2004 | - config = tf.ConfigProto() 2005 | + config = tf.compat.v1.ConfigProto() 2006 | config.allow_soft_placement = True # allow placing ops on cpu if they are not fit for gpu 2007 | config.gpu_options.allow_growth = True # allow the gpu memory allocated for the worker to grow if needed 2008 | 2009 | - sess = tf.Session(config=config) 2010 | + sess = tf.compat.v1.Session(config=config) 2011 | 2012 | sum1 = 0 2013 | start = time.time() 2014 | diff --git a/rl_coach/tests/test_global_variable_saver.py b/rl_coach/tests/test_global_variable_saver.py 2015 | index 19da034..47e3f23 100644 2016 | --- a/rl_coach/tests/test_global_variable_saver.py 2017 | +++ b/rl_coach/tests/test_global_variable_saver.py 2018 | @@ -19,7 +19,7 @@ def name(): 2019 | 2020 | @pytest.fixture 2021 | def variable(shape, name): 2022 | - tf.reset_default_graph() 2023 | + tf.compat.v1.reset_default_graph() 2024 | return tf.Variable(tf.zeros(shape), name=name) 2025 | 2026 | 2027 | @@ -36,8 +36,8 @@ def assert_arrays_ones_shape(arrays, shape, name): 2028 | 2029 | @pytest.mark.unit_test 2030 | def test_global_variable_saver_to_arrays(variable, name, shape): 2031 | - with tf.Session() as session: 2032 | - session.run(tf.global_variables_initializer()) 2033 | + with tf.compat.v1.Session() as session: 2034 | + session.run(tf.compat.v1.global_variables_initializer()) 2035 | session.run(variable.assign(tf.ones(shape))) 2036 | 2037 | saver = GlobalVariableSaver("name") 2038 | @@ -47,8 +47,8 @@ def test_global_variable_saver_to_arrays(variable, name, shape): 2039 | 2040 | @pytest.mark.unit_test 2041 | def test_global_variable_saver_from_arrays(variable, name, shape): 2042 | - with tf.Session() as session: 2043 | - session.run(tf.global_variables_initializer()) 2044 | + with tf.compat.v1.Session() as session: 2045 | + session.run(tf.compat.v1.global_variables_initializer()) 2046 | 2047 | saver = GlobalVariableSaver("name") 2048 | saver.from_arrays(session, {name: np.ones(shape)}) 2049 | @@ -58,8 +58,8 @@ def test_global_variable_saver_from_arrays(variable, name, shape): 2050 | 2051 | @pytest.mark.unit_test 2052 | def test_global_variable_saver_to_string(variable, name, shape): 2053 | - with tf.Session() as session: 2054 | - session.run(tf.global_variables_initializer()) 2055 | + with tf.compat.v1.Session() as session: 2056 | + session.run(tf.compat.v1.global_variables_initializer()) 2057 | session.run(variable.assign(tf.ones(shape))) 2058 | 2059 | saver = GlobalVariableSaver("name") 2060 | @@ -70,8 +70,8 @@ def test_global_variable_saver_to_string(variable, name, shape): 2061 | 2062 | @pytest.mark.unit_test 2063 | def test_global_variable_saver_from_string(variable, name, shape): 2064 | - with tf.Session() as session: 2065 | - session.run(tf.global_variables_initializer()) 2066 | + with tf.compat.v1.Session() as session: 2067 | + session.run(tf.compat.v1.global_variables_initializer()) 2068 | 2069 | saver = GlobalVariableSaver("name") 2070 | saver.from_string(session, pickle.dumps({name: np.ones(shape)}, protocol=-1)) 2071 | -------------------------------------------------------------------------------- /files/start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ $1 == 'train' ] 4 | then 5 | # Remove all nvidia gl libraries if they exists to run training in SageMaker. 6 | rm -rf /usr/local/nvidia/lib/libGL* 7 | rm -rf /usr/local/nvidia/lib/libEGL* 8 | rm -rf /usr/local/nvidia/lib/libOpenGL* 9 | rm -rf /usr/local/nvidia/lib64/libGL* 10 | rm -rf /usr/local/nvidia/lib64/libEGL* 11 | rm -rf /usr/local/nvidia/lib64/libOpenGL* 12 | 13 | CURRENT_HOST=$(jq .current_host /opt/ml/input/config/resourceconfig.json) 14 | 15 | sed -ie "s/PLACEHOLDER_HOSTNAME/$CURRENT_HOST/g" /changehostname.c 16 | 17 | gcc -o /changehostname.o -c -fPIC -Wall /changehostname.c 18 | gcc -o /libchangehostname.so -shared -export-dynamic /changehostname.o -ldl 19 | redis-server /etc/redis/redis.conf & 20 | LD_PRELOAD=/libchangehostname.so xvfb-run --auto-servernum -s "-screen 0 1024x768x16" train 21 | elif [ $1 == 'serve' ] 22 | then 23 | serve 24 | fi 25 | -------------------------------------------------------------------------------- /push.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | trap ctrl_c INT 3 | 4 | function ctrl_c() { 5 | echo "Requested to stop." 6 | exit 1 7 | } 8 | 9 | PREFIX="local" 10 | VERSION=$(cat VERSION) 11 | 12 | ARCH="cpu gpu cpu-intel" 13 | 14 | while getopts "p:a:" opt; do 15 | case $opt in 16 | p) PREFIX="$OPTARG" 17 | ;; 18 | a) ARCH="$OPTARG" 19 | ;; 20 | \?) echo "Invalid option -$OPTARG" >&2 21 | exit 1 22 | ;; 23 | esac 24 | done 25 | 26 | echo "Pushing docker images for [$ARCH]" 27 | 28 | for A in $ARCH; do 29 | echo "Pushing $PREFIX/deepracer-sagemaker:$VERSION-$A" 30 | docker push $PREFIX/deepracer-sagemaker:$VERSION-$A 31 | done 32 | --------------------------------------------------------------------------------