├── .gitignore
├── .gitmodules
├── LICENSE.txt
├── README.md
├── VERSION
├── build.sh
├── buildspec.yml
├── docker
    ├── primary
    │   ├── Dockerfile.cpu
    │   ├── Dockerfile.gpu
    │   └── Dockerfile.gpu-legacy
    ├── requirements.txt
    └── secondary
    │   └── Dockerfile
├── files
    ├── changehostname.c
    ├── redis.conf
    ├── rl_coach.patch
    └── start.sh
└── push.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | staging/
2 | *.tmp


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | *This repository is archived as all needed code is in the [Simapp/Robomaker repository](https://github.com/aws-deepracer-community/deepracer-simapp).*
2 | 
3 | # Deepracer Sagemaker Container
4 | 
5 | This repository provides the build scripts required to build an AWS DeepRacer Sagemaker Container useful for local build / build in cloud infrastructure.
6 | 
7 | The work is based on the Dockerfile and patches provided by AWS [https://github.com/awslabs/amazon-sagemaker-examples/tree/master/reinforcement_learning/rl_deepracer_robomaker_coach_gazebo] 
8 | 


--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 5.2.1
2 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | trap ctrl_c INT
 3 | 
 4 | function ctrl_c() {
 5 |     echo "Requested to stop."
 6 |     exit 1
 7 | }
 8 | 
 9 | set -e
10 | 
11 | PREFIX="awsdeepracercommunity"
12 | ARCH="cpu gpu cpu-intel"
13 | 
14 | while getopts ":2fa:p:" opt; do
15 |     case $opt in
16 |     2)
17 |         OPT_SECOND_STAGE_ONLY="OPT_SECOND_STAGE_ONLY"
18 |         ;;
19 |     p)
20 |         PREFIX="$OPTARG"
21 |         ;;
22 |     a)
23 |         ARCH="$OPTARG"
24 |         ;;
25 |     f)
26 |         OPT_NOCACHE="--no-cache"
27 |         ;;
28 |     \?)
29 |         echo "Invalid option -$OPTARG" >&2
30 |         exit 1
31 |         ;;
32 |     esac
33 | done
34 | 
35 | DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
36 | cd $DIR
37 | VERSION=$(cat VERSION)
38 | 
39 | echo "Preparing docker images for [$ARCH]"
40 | 
41 | TF_VER="tensorflow==2.13.1\ntensorflow-probability==0.21.0"
42 | 
43 | ## First stage
44 | if [[ -z "$OPT_SECOND_STAGE_ONLY" ]]; then
45 | 
46 |     for arch in $ARCH; do
47 | 
48 |         if [[ "$arch" == "gpu" ]]; then
49 |             docker buildx build $OPT_NOCACHE . -t $PREFIX/sagemaker-tensorflow-container:$VERSION-$arch -f docker/primary/Dockerfile.gpu \
50 |                 --build-arg TF_VER=$TF_VER
51 |         elif [[ "$arch" == "cpu" ]]; then
52 |             docker buildx build $OPT_NOCACHE . -t $PREFIX/sagemaker-tensorflow-container:$VERSION-$arch -f docker/primary/Dockerfile.cpu \
53 |                 --build-arg TF_VER=$TF_VER
54 |         elif [[ "$arch" == "cpu-intel" ]]; then
55 |             TF_VER='intel-tensorflow==2.13.0\ntensorflow-probability==0.21.0'
56 |             docker buildx build $OPT_NOCACHE . -t $PREFIX/sagemaker-tensorflow-container:$VERSION-$arch -f docker/primary/Dockerfile.cpu \
57 |                 --build-arg TF_VER="$TF_VER"
58 |         fi
59 | 
60 |     done
61 | 
62 | fi
63 | cd $DIR
64 | 
65 | ## Second stage
66 | for arch in $ARCH; do
67 |     docker buildx build $OPT_NOCACHE -f docker/secondary/Dockerfile -t $PREFIX/deepracer-sagemaker:$VERSION-$arch . --build-arg version=$VERSION --build-arg arch=$arch --build-arg prefix=$PREFIX --build-arg IMG_VERSION=$VERSION
68 | done
69 | 
70 | set +e
71 | 


--------------------------------------------------------------------------------
/buildspec.yml:
--------------------------------------------------------------------------------
 1 | version: 0.2
 2 | phases:
 3 |   install:
 4 |     commands:
 5 |         - pip install awscli wheel setuptools
 6 |   pre_build:
 7 |     commands:
 8 |         - docker login --username ${DOCKER_HUB_USER} --password ${DOCKER_HUB_KEY} 
 9 |   build:
10 |     commands:
11 |        - bash build.sh -a "${CPU_FLAGS}" -p ${DOCKER_HUB_REPO}
12 |   post_build:
13 |     commands:
14 |        - bash push.sh -a "${CPU_FLAGS}" -p ${DOCKER_HUB_REPO}


--------------------------------------------------------------------------------
/docker/primary/Dockerfile.cpu:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | ARG TF_VER
 3 | LABEL maintainer="Amazon AI"
 4 | 
 5 | # Prevent docker build get stopped by requesting user interaction
 6 | ENV DEBIAN_FRONTEND=noninteractive
 7 | ENV DEBCONF_NONINTERACTIVE_SEEN=true
 8 | # Python won’t try to write .pyc or .pyo files on the import of source modules
 9 | ENV PYTHONDONTWRITEBYTECODE=1
10 | ENV PYTHONUNBUFFERED=1
11 | # See http://bugs.python.org/issue19846
12 | ENV PYTHONIOENCODING=UTF-8
13 | ENV LANG=C.UTF-8
14 | ENV LC_ALL=C.UTF-8
15 | # Specify the location of module that contains the training logic for SageMaker
16 | # https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
17 | ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
18 | 
19 | RUN apt-get update && \
20 |     apt-get install -y --no-install-recommends --allow-unauthenticated gnupg ca-certificates curl git wget vim openssh-client && \
21 |     rm -rf /var/lib/apt/lists/*
22 | 
23 | RUN apt-get update \
24 |     && apt-get install -y --no-install-recommends --allow-unauthenticated \
25 |     python3-pip \
26 |     python3-setuptools \
27 |     python3-dev \
28 |     libgomp1 \
29 |     libfreetype6-dev \
30 |     libhdf5-serial-dev \
31 |     libpng-dev \
32 |     libzmq3-dev \
33 |     build-essential \
34 |     zlib1g-dev \
35 |     && rm -rf /var/lib/apt/lists/* \
36 |     && mkdir -p /var/run/sshd
37 | 
38 | ENV PATH=/usr/local/nvidia/bin:$PATH
39 | 
40 | WORKDIR /
41 | 
42 | ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True
43 | ADD docker/requirements.txt /
44 | RUN echo "${TF_VER}" > /requirements-tf.txt
45 | RUN pip --no-cache-dir install --upgrade pip 'setuptools<65' \
46 |     && pip install --no-cache-dir -U -r requirements.txt -r requirements-tf.txt
47 | 
48 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
49 | 
50 | RUN chmod +x /usr/local/bin/deep_learning_container.py
51 | 
52 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
53 | 
54 | CMD ["bin/bash"]
55 | 


--------------------------------------------------------------------------------
/docker/primary/Dockerfile.gpu:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04
 2 | ARG TF_VER
 3 | LABEL maintainer="Amazon AI"
 4 | 
 5 | # Prevent docker build get stopped by requesting user interaction
 6 | ENV DEBIAN_FRONTEND=noninteractive
 7 | ENV DEBCONF_NONINTERACTIVE_SEEN=true
 8 | # Python won’t try to write .pyc or .pyo files on the import of source modules
 9 | ENV PYTHONDONTWRITEBYTECODE=1
10 | ENV PYTHONUNBUFFERED=1
11 | # See http://bugs.python.org/issue19846
12 | ENV PYTHONIOENCODING=UTF-8
13 | ENV LANG=C.UTF-8
14 | ENV LC_ALL=C.UTF-8
15 | # Specify the location of module that contains the training logic for SageMaker
16 | # https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
17 | ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
18 | 
19 | RUN apt-get update && \
20 |     apt-get install -y --no-install-recommends --allow-unauthenticated gnupg ca-certificates curl git wget vim openssh-client && \
21 |     rm -rf /var/lib/apt/lists/*
22 | RUN apt-get update \
23 |     && apt-get install -y --no-install-recommends --allow-unauthenticated \
24 |     python3-pip \
25 |     python3-setuptools \
26 |     python3-dev \
27 |     cuda-command-line-tools-11-8 \
28 |     cuda-nvcc-11-8 \	
29 |     libgomp1 \
30 |     libfreetype6-dev \
31 |     libhdf5-serial-dev \
32 |     libpng-dev \
33 |     libzmq3-dev \
34 |     build-essential \
35 |     zlib1g-dev \
36 |     && rm -rf /var/lib/apt/lists/* \
37 |     && mkdir -p /var/run/sshd
38 | 
39 | # Set default NCCL parameters
40 | RUN echo NCCL_DEBUG=INFO >> /etc/nccl.conf
41 | 
42 | ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
43 | ENV PATH=/usr/local/openmpi/bin/:$PATH
44 | ENV PATH=/usr/local/nvidia/bin:$PATH
45 | 
46 | WORKDIR /
47 | 
48 | ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True
49 | ADD docker/requirements.txt /
50 | RUN echo "${TF_VER}" > /requirements-tf.txt
51 | RUN pip --no-cache-dir install --upgrade pip 'setuptools<65' \
52 |     && pip install --no-cache-dir -U -r requirements.txt -r requirements-tf.txt
53 | 
54 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
55 | 
56 | RUN chmod +x /usr/local/bin/deep_learning_container.py
57 | 
58 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
59 | 
60 | CMD ["bin/bash"]
61 | 


--------------------------------------------------------------------------------
/docker/primary/Dockerfile.gpu-legacy:
--------------------------------------------------------------------------------
  1 | # Nvidia does not publish a TensorRT Runtime library for Ubuntu 18.04 with Cuda 10.1 support, so we stick with cuda 10.0.
  2 | # https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/
  3 | FROM nvidia/cuda:10.0-base-ubuntu18.04
  4 | 
  5 | LABEL maintainer="Amazon AI"
  6 | 
  7 | # Prevent docker build get stopped by requesting user interaction
  8 | ENV DEBIAN_FRONTEND=noninteractive
  9 | ENV DEBCONF_NONINTERACTIVE_SEEN=true
 10 | # Python won’t try to write .pyc or .pyo files on the import of source modules
 11 | ENV PYTHONDONTWRITEBYTECODE=1
 12 | ENV PYTHONUNBUFFERED=1
 13 | # See http://bugs.python.org/issue19846
 14 | ENV PYTHONIOENCODING=UTF-8
 15 | ENV LANG=C.UTF-8
 16 | ENV LC_ALL=C.UTF-8
 17 | # Specify the location of module that contains the training logic for SageMaker
 18 | # https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
 19 | ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
 20 | 
 21 | RUN distribution=$(. /etc/os-release;echo $ID$VERSION_ID | sed 's/\.//') && \
 22 |     apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/$distribution/x86_64/7fa2af80.pub
 23 | 
 24 | # Install CUDNN
 25 | RUN echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list
 26 | 
 27 | # Define framework-related package sources
 28 | ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15.2/AmazonLinux/gpu/final/tensorflow_gpu-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl
 29 | 
 30 | RUN apt-get update \
 31 |  && apt-get install -y --no-install-recommends --allow-unauthenticated \
 32 |     python3-dev \
 33 |     python3-pip \
 34 |     python3-setuptools \
 35 |     python3-dev \
 36 |     ca-certificates \
 37 |     cuda-command-line-tools-10-0 \
 38 |     cuda-cublas-dev-10-0 \
 39 |     cuda-cudart-dev-10-0 \
 40 |     cuda-cufft-dev-10-0 \
 41 |     cuda-curand-dev-10-0 \
 42 |     cuda-cusolver-dev-10-0 \
 43 |     cuda-cusparse-dev-10-0 \
 44 |     curl \
 45 |     libcudnn7=7.6.5.32-1+cuda10.0 \
 46 |     # TensorFlow doesn't require libnccl anymore but Open MPI still depends on it
 47 |     libnccl2=2.4.7-1+cuda10.0 \
 48 |     libgomp1 \
 49 |     libnccl-dev=2.4.7-1+cuda10.0 \
 50 |     libfreetype6-dev \
 51 |     libhdf5-serial-dev \
 52 |     libpng-dev \
 53 |     libzmq3-dev \
 54 |     git \
 55 |     wget \
 56 |     vim \
 57 |     build-essential \
 58 |     openssh-client \
 59 |     openssh-server \
 60 |     zlib1g-dev \
 61 |     # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0
 62 |     # adds a new list which contains libnvinfer library, so it needs another
 63 |     # 'apt-get update' to retrieve that list before it can actually install the
 64 |     # library.
 65 |     # We don't install libnvinfer-dev since we don't need to build against TensorRT,
 66 |     # and libnvinfer4 doesn't contain libnvinfer.a static library.
 67 |  && apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated  \
 68 |     nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0 \
 69 |  && apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated  \
 70 |     libnvinfer5=5.0.2-1+cuda10.0 \
 71 |  && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \
 72 |  && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \
 73 |  && rm /usr/lib/x86_64-linux-gnu/libnvparsers* \
 74 |  && rm -rf /var/lib/apt/lists/* \
 75 |  && mkdir -p /var/run/sshd
 76 | 
 77 | ###########################################################################
 78 | # Horovod & its dependencies
 79 | ###########################################################################
 80 | 
 81 | # Set default NCCL parameters
 82 | RUN echo NCCL_DEBUG=INFO >> /etc/nccl.conf
 83 | 
 84 | ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
 85 | ENV PATH=/usr/local/openmpi/bin/:$PATH
 86 | ENV PATH=/usr/local/nvidia/bin:$PATH
 87 | 
 88 | # SSH login fix. Otherwise user is kicked off after login
 89 | RUN mkdir -p /var/run/sshd \
 90 |  && sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
 91 | 
 92 | # Create SSH key.
 93 | RUN mkdir -p /root/.ssh/ \
 94 |  && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
 95 |  && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
 96 |  && printf "Host *\n  StrictHostKeyChecking no\n" >> /root/.ssh/config
 97 | 
 98 | WORKDIR /
 99 | 
100 | RUN pip3 --no-cache-dir install --upgrade \
101 |     pip \
102 |     setuptools
103 | 
104 | # Some TF tools expect a "python" binary
105 | RUN ln -s $(which python3) /usr/local/bin/python \
106 |  && ln -s $(which pip3) /usr/bin/pip
107 | 
108 | RUN pip install --no-cache-dir -U \
109 |     numpy==1.17.4 \
110 |     scipy==1.2.2 \
111 |     scikit-learn==0.20.3 \
112 |     pandas==0.24.2 \
113 |     Pillow==7.0.0 \
114 |     h5py==2.9.0 \
115 |     keras_applications==1.0.8 \
116 |     keras_preprocessing==1.1.0 \
117 |     requests==2.22.0 \
118 |     keras==2.3.1 \
119 |     smdebug==0.7.2 \
120 |     sagemaker==1.50.17 \
121 |     sagemaker-experiments==0.1.7 \
122 |     "cryptography>=2.3" \
123 |     "sagemaker-tensorflow>=1.15,<1.16" \
124 |     "sagemaker-tensorflow-training>=2,<3" \
125 |     # Let's install TensorFlow separately in the end to avoid
126 |     # the library version to be overwritten
127 |  && pip install --force-reinstall --no-cache-dir -U \
128 |     tensorflow_gpu==1.15.2 \
129 |  && pip install --no-cache-dir -U \
130 |     awscli
131 | 
132 | # Allow OpenSSH to talk to containers without asking for confirmation
133 | RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new \
134 |  && echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new \
135 |  && mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
136 | 
137 | ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
138 | 
139 | RUN chmod +x /usr/local/bin/deep_learning_container.py
140 | 
141 | RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
142 | 
143 | CMD ["bin/bash"]


--------------------------------------------------------------------------------
/docker/requirements.txt:
--------------------------------------------------------------------------------
 1 | python-dateutil==2.8.2
 2 | annoy==1.17.3
 3 | pillow==9.0.1
 4 | matplotlib==3.7.4
 5 | numpy==1.24.3
 6 | pandas==2.0.3
 7 | pygame==2.5.2
 8 | scipy==1.8.0
 9 | scikit-image==0.21.0
10 | futures==3.0.5
11 | boto3==1.34.12
12 | minio==7.2.0
13 | cryptography==41.0.7
14 | bokeh==3.1.1
15 | retrying==1.3.4
16 | eventlet==0.34.2
17 | flask==3.0.0
18 | gevent==23.9.1
19 | gunicorn==21.2.0
20 | h5py==3.10.0
21 | pytest==7.4.4
22 | pytest-cov==4.1.0
23 | netifaces
24 | sagemaker<2
25 | sagemaker-experiments==0.1.45
26 | sagemaker-tensorflow<3 
27 | sagemaker-tensorflow-training>=2,<3
28 | sagemaker-containers>=2.7.1
29 | protobuf<3.20


--------------------------------------------------------------------------------
/docker/secondary/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG arch
 2 | ARG version
 3 | ARG prefix
 4 | FROM ${prefix}/sagemaker-tensorflow-container:${version}-${arch}
 5 | 
 6 | RUN apt-get update && apt-get install -y --no-install-recommends \
 7 |     wget \
 8 |     jq \
 9 |     ffmpeg \
10 |     libjpeg-dev \
11 |     libxrender1 \
12 |     python3-opengl \
13 |     pkg-config \
14 |     xvfb && \
15 |     apt-get clean && \
16 |     rm -rf /var/lib/apt/lists/*
17 | 
18 | # Install Redis.
19 | RUN cd /tmp && \
20 |     wget https://download.redis.io/releases/redis-6.2.7.tar.gz && \
21 |     tar xvzf redis-6.2.7.tar.gz && \
22 |     cd redis-6.2.7 && \
23 |     make && \
24 |     make install && \
25 |     rm -rf /tmp/redis*
26 | 
27 | RUN pip install -U --no-cache-dir --upgrade-strategy only-if-needed \
28 |     pyglet \
29 |     gym \
30 |     "redis>=4.4.4" \
31 |     "rl-coach-slim==1.0.0"  \
32 |     "protobuf<3.20" \
33 |     awscli
34 | RUN wget https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -O /usr/local/lib/python3.8/dist-packages/google/protobuf/internal/builder.py
35 | 
36 | COPY ./files/redis.conf /etc/redis/redis.conf
37 | COPY ./files/rl_coach.patch /opt/amazon/rl_coach.patch
38 | RUN patch -p1 -N --directory=/usr/local/lib/python3.8/dist-packages/ < /opt/amazon/rl_coach.patch
39 | 
40 | ENV COACH_BACKEND=tensorflow
41 | 
42 | # Copy workaround script for incorrect hostname
43 | COPY files/changehostname.c /
44 | COPY files/start.sh /usr/local/bin/start.sh
45 | RUN chmod +x /usr/local/bin/start.sh
46 | 
47 | ENV PYTHONPATH /opt/amazon/:$PYTHONPATH
48 | ENV PATH /opt/ml/code/:$PATH
49 | WORKDIR /opt/ml/code
50 | 
51 | # Tell sagemaker-containers where the launch point is for training job.
52 | ENV NODE_TYPE SAGEMAKER_TRAINING_WORKER
53 | 
54 | ENV PYTHONUNBUFFERED 1
55 | 
56 | # Versioning
57 | ARG IMG_VERSION
58 | LABEL maintainer "AWS DeepRacer Community - deepracing.io"
59 | LABEL version $IMG_VERSION
60 | 
61 | # Starts framework
62 | ENTRYPOINT ["bash", "-m", "start.sh", "train"]
63 | 


--------------------------------------------------------------------------------
/files/changehostname.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <string.h>
 3 | 
 4 | /*
 5 |  * Modifies gethostname to return algo-1, algo-2, etc. when running on SageMaker.
 6 |  *
 7 |  * Without this gethostname() on SageMaker returns 'aws', leading NCCL/MPI to think there is only one host,
 8 |  * not realizing that it needs to use NET/Socket.
 9 |  *
10 |  * When docker container starts we read 'current_host' value  from /opt/ml/input/config/resourceconfig.json
11 |  * and replace PLACEHOLDER_HOSTNAME with it before compiling this code into a shared library.
12 |  */
13 | int gethostname(char *name, size_t len)
14 | {
15 |   const char *val = PLACEHOLDER_HOSTNAME;
16 |   strncpy(name, val, len);
17 |   return 0;
18 | }
19 | 


--------------------------------------------------------------------------------
/files/redis.conf:
--------------------------------------------------------------------------------
   1 | # Redis configuration file example.
   2 | #
   3 | # Note that in order to read the configuration file, Redis must be
   4 | # started with the file path as first argument:
   5 | #
   6 | # ./redis-server /path/to/redis.conf
   7 | 
   8 | # Note on units: when memory size is needed, it is possible to specify
   9 | # it in the usual form of 1k 5GB 4M and so forth:
  10 | #
  11 | # 1k => 1000 bytes
  12 | # 1kb => 1024 bytes
  13 | # 1m => 1000000 bytes
  14 | # 1mb => 1024*1024 bytes
  15 | # 1g => 1000000000 bytes
  16 | # 1gb => 1024*1024*1024 bytes
  17 | #
  18 | # units are case insensitive so 1GB 1Gb 1gB are all the same.
  19 | 
  20 | ################################## INCLUDES ###################################
  21 | 
  22 | # Include one or more other config files here.  This is useful if you
  23 | # have a standard template that goes to all Redis servers but also need
  24 | # to customize a few per-server settings.  Include files can include
  25 | # other files, so use this wisely.
  26 | #
  27 | # Notice option "include" won't be rewritten by command "CONFIG REWRITE"
  28 | # from admin or Redis Sentinel. Since Redis always uses the last processed
  29 | # line as value of a configuration directive, you'd better put includes
  30 | # at the beginning of this file to avoid overwriting config change at runtime.
  31 | #
  32 | # If instead you are interested in using includes to override configuration
  33 | # options, it is better to use include as the last line.
  34 | #
  35 | # include /path/to/local.conf
  36 | # include /path/to/other.conf
  37 | 
  38 | ################################## MODULES #####################################
  39 | 
  40 | # Load modules at startup. If the server is not able to load modules
  41 | # it will abort. It is possible to use multiple loadmodule directives.
  42 | #
  43 | # loadmodule /path/to/my_module.so
  44 | # loadmodule /path/to/other_module.so
  45 | 
  46 | ################################## NETWORK #####################################
  47 | 
  48 | # By default, if no "bind" configuration directive is specified, Redis listens
  49 | # for connections from all the network interfaces available on the server.
  50 | # It is possible to listen to just one or multiple selected interfaces using
  51 | # the "bind" configuration directive, followed by one or more IP addresses.
  52 | #
  53 | # Examples:
  54 | #
  55 | # bind 192.168.1.100 10.0.0.1
  56 | # bind 127.0.0.1 ::1
  57 | #
  58 | # ~~~ WARNING ~~~ If the computer running Redis is directly exposed to the
  59 | # internet, binding to all the interfaces is dangerous and will expose the
  60 | # instance to everybody on the internet. So by default we uncomment the
  61 | # following bind directive, that will force Redis to listen only into
  62 | # the IPv4 loopback interface address (this means Redis will be able to
  63 | # accept connections only from clients running into the same computer it
  64 | # is running).
  65 | #
  66 | # IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES
  67 | # JUST COMMENT THE FOLLOWING LINE.
  68 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  69 | bind 0.0.0.0
  70 | 
  71 | # Protected mode is a layer of security protection, in order to avoid that
  72 | # Redis instances left open on the internet are accessed and exploited.
  73 | #
  74 | # When protected mode is on and if:
  75 | #
  76 | # 1) The server is not binding explicitly to a set of addresses using the
  77 | #    "bind" directive.
  78 | # 2) No password is configured.
  79 | #
  80 | # The server only accepts connections from clients connecting from the
  81 | # IPv4 and IPv6 loopback addresses 127.0.0.1 and ::1, and from Unix domain
  82 | # sockets.
  83 | #
  84 | # By default protected mode is enabled. You should disable it only if
  85 | # you are sure you want clients from other hosts to connect to Redis
  86 | # even if no authentication is configured, nor a specific set of interfaces
  87 | # are explicitly listed using the "bind" directive.
  88 | protected-mode yes
  89 | 
  90 | # Accept connections on the specified port, default is 6379 (IANA #815344).
  91 | # If port 0 is specified Redis will not listen on a TCP socket.
  92 | port 6379
  93 | 
  94 | # TCP listen() backlog.
  95 | #
  96 | # In high requests-per-second environments you need an high backlog in order
  97 | # to avoid slow clients connections issues. Note that the Linux kernel
  98 | # will silently truncate it to the value of /proc/sys/net/core/somaxconn so
  99 | # make sure to raise both the value of somaxconn and tcp_max_syn_backlog
 100 | # in order to get the desired effect.
 101 | tcp-backlog 512
 102 | 
 103 | # Unix socket.
 104 | #
 105 | # Specify the path for the Unix socket that will be used to listen for
 106 | # incoming connections. There is no default, so Redis will not listen
 107 | # on a unix socket when not specified.
 108 | #
 109 | # unixsocket /tmp/redis.sock
 110 | # unixsocketperm 700
 111 | 
 112 | # Close the connection after a client is idle for N seconds (0 to disable)
 113 | timeout 0
 114 | 
 115 | # TCP keepalive.
 116 | #
 117 | # If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence
 118 | # of communication. This is useful for two reasons:
 119 | #
 120 | # 1) Detect dead peers.
 121 | # 2) Take the connection alive from the point of view of network
 122 | #    equipment in the middle.
 123 | #
 124 | # On Linux, the specified value (in seconds) is the period used to send ACKs.
 125 | # Note that to close the connection the double of the time is needed.
 126 | # On other kernels the period depends on the kernel configuration.
 127 | #
 128 | # A reasonable value for this option is 300 seconds, which is the new
 129 | # Redis default starting with Redis 3.2.1.
 130 | tcp-keepalive 300
 131 | 
 132 | ################################# GENERAL #####################################
 133 | 
 134 | # By default Redis does not run as a daemon. Use 'yes' if you need it.
 135 | # Note that Redis will write a pid file in /var/run/redis.pid when daemonized.
 136 | daemonize no
 137 | 
 138 | # If you run Redis from upstart or systemd, Redis can interact with your
 139 | # supervision tree. Options:
 140 | #   supervised no      - no supervision interaction
 141 | #   supervised upstart - signal upstart by putting Redis into SIGSTOP mode
 142 | #   supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET
 143 | #   supervised auto    - detect upstart or systemd method based on
 144 | #                        UPSTART_JOB or NOTIFY_SOCKET environment variables
 145 | # Note: these supervision methods only signal "process is ready."
 146 | #       They do not enable continuous liveness pings back to your supervisor.
 147 | supervised no
 148 | 
 149 | # If a pid file is specified, Redis writes it where specified at startup
 150 | # and removes it at exit.
 151 | #
 152 | # When the server runs non daemonized, no pid file is created if none is
 153 | # specified in the configuration. When the server is daemonized, the pid file
 154 | # is used even if not specified, defaulting to "/var/run/redis.pid".
 155 | #
 156 | # Creating a pid file is best effort: if Redis is not able to create it
 157 | # nothing bad happens, the server will start and run normally.
 158 | pidfile /var/run/redis_6379.pid
 159 | 
 160 | # Specify the server verbosity level.
 161 | # This can be one of:
 162 | # debug (a lot of information, useful for development/testing)
 163 | # verbose (many rarely useful info, but not a mess like the debug level)
 164 | # notice (moderately verbose, what you want in production probably)
 165 | # warning (only very important / critical messages are logged)
 166 | loglevel notice
 167 | 
 168 | # Specify the log file name. Also the empty string can be used to force
 169 | # Redis to log on the standard output. Note that if you use standard
 170 | # output for logging but daemonize, logs will be sent to /dev/null
 171 | logfile ""
 172 | 
 173 | # To enable logging to the system logger, just set 'syslog-enabled' to yes,
 174 | # and optionally update the other syslog parameters to suit your needs.
 175 | # syslog-enabled no
 176 | 
 177 | # Specify the syslog identity.
 178 | # syslog-ident redis
 179 | 
 180 | # Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7.
 181 | # syslog-facility local0
 182 | 
 183 | # Set the number of databases. The default database is DB 0, you can select
 184 | # a different one on a per-connection basis using SELECT <dbid> where
 185 | # dbid is a number between 0 and 'databases'-1
 186 | databases 16
 187 | 
 188 | # By default Redis shows an ASCII art logo only when started to log to the
 189 | # standard output and if the standard output is a TTY. Basically this means
 190 | # that normally a logo is displayed only in interactive sessions.
 191 | #
 192 | # However it is possible to force the pre-4.0 behavior and always show a
 193 | # ASCII art logo in startup logs by setting the following option to yes.
 194 | always-show-logo yes
 195 | 
 196 | ################################ SNAPSHOTTING  ################################
 197 | #
 198 | # Save the DB on disk:
 199 | #
 200 | #   save <seconds> <changes>
 201 | #
 202 | #   Will save the DB if both the given number of seconds and the given
 203 | #   number of write operations against the DB occurred.
 204 | #
 205 | #   In the example below the behaviour will be to save:
 206 | #   after 900 sec (15 min) if at least 1 key changed
 207 | #   after 300 sec (5 min) if at least 10 keys changed
 208 | #   after 60 sec if at least 10000 keys changed
 209 | #
 210 | #   Note: you can disable saving completely by commenting out all "save" lines.
 211 | #
 212 | #   It is also possible to remove all the previously configured save
 213 | #   points by adding a save directive with a single empty string argument
 214 | #   like in the following example:
 215 | #
 216 | #   save ""
 217 | 
 218 | save 900 1
 219 | save 300 10
 220 | save 60 10000
 221 | 
 222 | # By default Redis will stop accepting writes if RDB snapshots are enabled
 223 | # (at least one save point) and the latest background save failed.
 224 | # This will make the user aware (in a hard way) that data is not persisting
 225 | # on disk properly, otherwise chances are that no one will notice and some
 226 | # disaster will happen.
 227 | #
 228 | # If the background saving process will start working again Redis will
 229 | # automatically allow writes again.
 230 | #
 231 | # However if you have setup your proper monitoring of the Redis server
 232 | # and persistence, you may want to disable this feature so that Redis will
 233 | # continue to work as usual even if there are problems with disk,
 234 | # permissions, and so forth.
 235 | stop-writes-on-bgsave-error yes
 236 | 
 237 | # Compress string objects using LZF when dump .rdb databases?
 238 | # For default that's set to 'yes' as it's almost always a win.
 239 | # If you want to save some CPU in the saving child set it to 'no' but
 240 | # the dataset will likely be bigger if you have compressible values or keys.
 241 | rdbcompression yes
 242 | 
 243 | # Since version 5 of RDB a CRC64 checksum is placed at the end of the file.
 244 | # This makes the format more resistant to corruption but there is a performance
 245 | # hit to pay (around 10%) when saving and loading RDB files, so you can disable it
 246 | # for maximum performances.
 247 | #
 248 | # RDB files created with checksum disabled have a checksum of zero that will
 249 | # tell the loading code to skip the check.
 250 | rdbchecksum yes
 251 | 
 252 | # The filename where to dump the DB
 253 | dbfilename dump.rdb
 254 | 
 255 | # The working directory.
 256 | #
 257 | # The DB will be written inside this directory, with the filename specified
 258 | # above using the 'dbfilename' configuration directive.
 259 | #
 260 | # The Append Only File will also be created inside this directory.
 261 | #
 262 | # Note that you must specify a directory here, not a file name.
 263 | dir ./
 264 | 
 265 | ################################# REPLICATION #################################
 266 | 
 267 | # Master-Replica replication. Use replicaof to make a Redis instance a copy of
 268 | # another Redis server. A few things to understand ASAP about Redis replication.
 269 | #
 270 | #   +------------------+      +---------------+
 271 | #   |      Master      | ---> |    Replica    |
 272 | #   | (receive writes) |      |  (exact copy) |
 273 | #   +------------------+      +---------------+
 274 | #
 275 | # 1) Redis replication is asynchronous, but you can configure a master to
 276 | #    stop accepting writes if it appears to be not connected with at least
 277 | #    a given number of replicas.
 278 | # 2) Redis replicas are able to perform a partial resynchronization with the
 279 | #    master if the replication link is lost for a relatively small amount of
 280 | #    time. You may want to configure the replication backlog size (see the next
 281 | #    sections of this file) with a sensible value depending on your needs.
 282 | # 3) Replication is automatic and does not need user intervention. After a
 283 | #    network partition replicas automatically try to reconnect to masters
 284 | #    and resynchronize with them.
 285 | #
 286 | # replicaof <masterip> <masterport>
 287 | 
 288 | # If the master is password protected (using the "requirepass" configuration
 289 | # directive below) it is possible to tell the replica to authenticate before
 290 | # starting the replication synchronization process, otherwise the master will
 291 | # refuse the replica request.
 292 | #
 293 | # masterauth <master-password>
 294 | 
 295 | # When a replica loses its connection with the master, or when the replication
 296 | # is still in progress, the replica can act in two different ways:
 297 | #
 298 | # 1) if replica-serve-stale-data is set to 'yes' (the default) the replica will
 299 | #    still reply to client requests, possibly with out of date data, or the
 300 | #    data set may just be empty if this is the first synchronization.
 301 | #
 302 | # 2) if replica-serve-stale-data is set to 'no' the replica will reply with
 303 | #    an error "SYNC with master in progress" to all the kind of commands
 304 | #    but to INFO, replicaOF, AUTH, PING, SHUTDOWN, REPLCONF, ROLE, CONFIG,
 305 | #    SUBSCRIBE, UNSUBSCRIBE, PSUBSCRIBE, PUNSUBSCRIBE, PUBLISH, PUBSUB,
 306 | #    COMMAND, POST, HOST: and LATENCY.
 307 | #
 308 | replica-serve-stale-data yes
 309 | 
 310 | # You can configure a replica instance to accept writes or not. Writing against
 311 | # a replica instance may be useful to store some ephemeral data (because data
 312 | # written on a replica will be easily deleted after resync with the master) but
 313 | # may also cause problems if clients are writing to it because of a
 314 | # misconfiguration.
 315 | #
 316 | # Since Redis 2.6 by default replicas are read-only.
 317 | #
 318 | # Note: read only replicas are not designed to be exposed to untrusted clients
 319 | # on the internet. It's just a protection layer against misuse of the instance.
 320 | # Still a read only replica exports by default all the administrative commands
 321 | # such as CONFIG, DEBUG, and so forth. To a limited extent you can improve
 322 | # security of read only replicas using 'rename-command' to shadow all the
 323 | # administrative / dangerous commands.
 324 | replica-read-only yes
 325 | 
 326 | # Replication SYNC strategy: disk or socket.
 327 | #
 328 | # -------------------------------------------------------
 329 | # WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY
 330 | # -------------------------------------------------------
 331 | #
 332 | # New replicas and reconnecting replicas that are not able to continue the replication
 333 | # process just receiving differences, need to do what is called a "full
 334 | # synchronization". An RDB file is transmitted from the master to the replicas.
 335 | # The transmission can happen in two different ways:
 336 | #
 337 | # 1) Disk-backed: The Redis master creates a new process that writes the RDB
 338 | #                 file on disk. Later the file is transferred by the parent
 339 | #                 process to the replicas incrementally.
 340 | # 2) Diskless: The Redis master creates a new process that directly writes the
 341 | #              RDB file to replica sockets, without touching the disk at all.
 342 | #
 343 | # With disk-backed replication, while the RDB file is generated, more replicas
 344 | # can be queued and served with the RDB file as soon as the current child producing
 345 | # the RDB file finishes its work. With diskless replication instead once
 346 | # the transfer starts, new replicas arriving will be queued and a new transfer
 347 | # will start when the current one terminates.
 348 | #
 349 | # When diskless replication is used, the master waits a configurable amount of
 350 | # time (in seconds) before starting the transfer in the hope that multiple replicas
 351 | # will arrive and the transfer can be parallelized.
 352 | #
 353 | # With slow disks and fast (large bandwidth) networks, diskless replication
 354 | # works better.
 355 | repl-diskless-sync yes
 356 | 
 357 | # When diskless replication is enabled, it is possible to configure the delay
 358 | # the server waits in order to spawn the child that transfers the RDB via socket
 359 | # to the replicas.
 360 | #
 361 | # This is important since once the transfer starts, it is not possible to serve
 362 | # new replicas arriving, that will be queued for the next RDB transfer, so the server
 363 | # waits a delay in order to let more replicas arrive.
 364 | #
 365 | # The delay is specified in seconds, and by default is 5 seconds. To disable
 366 | # it entirely just set it to 0 seconds and the transfer will start ASAP.
 367 | repl-diskless-sync-delay 5
 368 | 
 369 | # Replicas send PINGs to server in a predefined interval. It's possible to change
 370 | # this interval with the repl_ping_replica_period option. The default value is 10
 371 | # seconds.
 372 | #
 373 | repl-ping-replica-period 10
 374 | 
 375 | # The following option sets the replication timeout for:
 376 | #
 377 | # 1) Bulk transfer I/O during SYNC, from the point of view of replica.
 378 | # 2) Master timeout from the point of view of replicas (data, pings).
 379 | # 3) Replica timeout from the point of view of masters (REPLCONF ACK pings).
 380 | #
 381 | # It is important to make sure that this value is greater than the value
 382 | # specified for repl-ping-replica-period otherwise a timeout will be detected
 383 | # every time there is low traffic between the master and the replica.
 384 | #
 385 | repl-timeout 60
 386 | 
 387 | # Disable TCP_NODELAY on the replica socket after SYNC?
 388 | #
 389 | # If you select "yes" Redis will use a smaller number of TCP packets and
 390 | # less bandwidth to send data to replicas. But this can add a delay for
 391 | # the data to appear on the replica side, up to 40 milliseconds with
 392 | # Linux kernels using a default configuration.
 393 | #
 394 | # If you select "no" the delay for data to appear on the replica side will
 395 | # be reduced but more bandwidth will be used for replication.
 396 | #
 397 | # By default we optimize for low latency, but in very high traffic conditions
 398 | # or when the master and replicas are many hops away, turning this to "yes" may
 399 | # be a good idea.
 400 | repl-disable-tcp-nodelay no
 401 | 
 402 | # Set the replication backlog size. The backlog is a buffer that accumulates
 403 | # replica data when replicas are disconnected for some time, so that when a replica
 404 | # wants to reconnect again, often a full resync is not needed, but a partial
 405 | # resync is enough, just passing the portion of data the replica missed while
 406 | # disconnected.
 407 | #
 408 | # The bigger the replication backlog, the longer the time the replica can be
 409 | # disconnected and later be able to perform a partial resynchronization.
 410 | #
 411 | # The backlog is only allocated once there is at least a replica connected.
 412 | #
 413 | repl-backlog-size 500mb
 414 | 
 415 | # After a master has no longer connected replicas for some time, the backlog
 416 | # will be freed. The following option configures the amount of seconds that
 417 | # need to elapse, starting from the time the last replica disconnected, for
 418 | # the backlog buffer to be freed.
 419 | #
 420 | # Note that replicas never free the backlog for timeout, since they may be
 421 | # promoted to masters later, and should be able to correctly "partially
 422 | # resynchronize" with the replicas: hence they should always accumulate backlog.
 423 | #
 424 | # A value of 0 means to never release the backlog.
 425 | #
 426 | # repl-backlog-ttl 3600
 427 | 
 428 | # The replica priority is an integer number published by Redis in the INFO output.
 429 | # It is used by Redis Sentinel in order to select a replica to promote into a
 430 | # master if the master is no longer working correctly.
 431 | #
 432 | # A replica with a low priority number is considered better for promotion, so
 433 | # for instance if there are three replicas with priority 10, 100, 25 Sentinel will
 434 | # pick the one with priority 10, that is the lowest.
 435 | #
 436 | # However a special priority of 0 marks the replica as not able to perform the
 437 | # role of master, so a replica with priority of 0 will never be selected by
 438 | # Redis Sentinel for promotion.
 439 | #
 440 | # By default the priority is 100.
 441 | replica-priority 100
 442 | 
 443 | # It is possible for a master to stop accepting writes if there are less than
 444 | # N replicas connected, having a lag less or equal than M seconds.
 445 | #
 446 | # The N replicas need to be in "online" state.
 447 | #
 448 | # The lag in seconds, that must be <= the specified value, is calculated from
 449 | # the last ping received from the replica, that is usually sent every second.
 450 | #
 451 | # This option does not GUARANTEE that N replicas will accept the write, but
 452 | # will limit the window of exposure for lost writes in case not enough replicas
 453 | # are available, to the specified number of seconds.
 454 | #
 455 | # For example to require at least 3 replicas with a lag <= 10 seconds use:
 456 | #
 457 | # min-replicas-to-write 3
 458 | # min-replicas-max-lag 10
 459 | #
 460 | # Setting one or the other to 0 disables the feature.
 461 | #
 462 | # By default min-replicas-to-write is set to 0 (feature disabled) and
 463 | # min-replicas-max-lag is set to 10.
 464 | 
 465 | # A Redis master is able to list the address and port of the attached
 466 | # replicas in different ways. For example the "INFO replication" section
 467 | # offers this information, which is used, among other tools, by
 468 | # Redis Sentinel in order to discover replica instances.
 469 | # Another place where this info is available is in the output of the
 470 | # "ROLE" command of a master.
 471 | #
 472 | # The listed IP and address normally reported by a replica is obtained
 473 | # in the following way:
 474 | #
 475 | #   IP: The address is auto detected by checking the peer address
 476 | #   of the socket used by the replica to connect with the master.
 477 | #
 478 | #   Port: The port is communicated by the replica during the replication
 479 | #   handshake, and is normally the port that the replica is using to
 480 | #   listen for connections.
 481 | #
 482 | # However when port forwarding or Network Address Translation (NAT) is
 483 | # used, the replica may be actually reachable via different IP and port
 484 | # pairs. The following two options can be used by a replica in order to
 485 | # report to its master a specific set of IP and port, so that both INFO
 486 | # and ROLE will report those values.
 487 | #
 488 | # There is no need to use both the options if you need to override just
 489 | # the port or the IP address.
 490 | #
 491 | # replica-announce-ip 5.5.5.5
 492 | # replica-announce-port 1234
 493 | 
 494 | ################################## SECURITY ###################################
 495 | 
 496 | # Require clients to issue AUTH <PASSWORD> before processing any other
 497 | # commands.  This might be useful in environments in which you do not trust
 498 | # others with access to the host running redis-server.
 499 | #
 500 | # This should stay commented out for backward compatibility and because most
 501 | # people do not need auth (e.g. they run their own servers).
 502 | #
 503 | # Warning: since Redis is pretty fast an outside user can try up to
 504 | # 150k passwords per second against a good box. This means that you should
 505 | # use a very strong password otherwise it will be very easy to break.
 506 | #
 507 | # requirepass foobared
 508 | 
 509 | # Command renaming.
 510 | #
 511 | # It is possible to change the name of dangerous commands in a shared
 512 | # environment. For instance the CONFIG command may be renamed into something
 513 | # hard to guess so that it will still be available for internal-use tools
 514 | # but not available for general clients.
 515 | #
 516 | # Example:
 517 | #
 518 | # rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52
 519 | #
 520 | # It is also possible to completely kill a command by renaming it into
 521 | # an empty string:
 522 | #
 523 | # rename-command CONFIG ""
 524 | #
 525 | # Please note that changing the name of commands that are logged into the
 526 | # AOF file or transmitted to replicas may cause problems.
 527 | 
 528 | ################################### CLIENTS ####################################
 529 | 
 530 | # Set the max number of connected clients at the same time. By default
 531 | # this limit is set to 10000 clients, however if the Redis server is not
 532 | # able to configure the process file limit to allow for the specified limit
 533 | # the max number of allowed clients is set to the current file limit
 534 | # minus 32 (as Redis reserves a few file descriptors for internal uses).
 535 | #
 536 | # Once the limit is reached Redis will close all the new connections sending
 537 | # an error 'max number of clients reached'.
 538 | #
 539 | # maxclients 10000
 540 | 
 541 | ############################## MEMORY MANAGEMENT ################################
 542 | 
 543 | # Set a memory usage limit to the specified amount of bytes.
 544 | # When the memory limit is reached Redis will try to remove keys
 545 | # according to the eviction policy selected (see maxmemory-policy).
 546 | #
 547 | # If Redis can't remove keys according to the policy, or if the policy is
 548 | # set to 'noeviction', Redis will start to reply with errors to commands
 549 | # that would use more memory, like SET, LPUSH, and so on, and will continue
 550 | # to reply to read-only commands like GET.
 551 | #
 552 | # This option is usually useful when using Redis as an LRU or LFU cache, or to
 553 | # set a hard memory limit for an instance (using the 'noeviction' policy).
 554 | #
 555 | # WARNING: If you have replicas attached to an instance with maxmemory on,
 556 | # the size of the output buffers needed to feed the replicas are subtracted
 557 | # from the used memory count, so that network problems / resyncs will
 558 | # not trigger a loop where keys are evicted, and in turn the output
 559 | # buffer of replicas is full with DELs of keys evicted triggering the deletion
 560 | # of more keys, and so forth until the database is completely emptied.
 561 | #
 562 | # In short... if you have replicas attached it is suggested that you set a lower
 563 | # limit for maxmemory so that there is some free RAM on the system for replica
 564 | # output buffers (but this is not needed if the policy is 'noeviction').
 565 | #
 566 | #maxmemory 1gb
 567 | 
 568 | # MAXMEMORY POLICY: how Redis will select what to remove when maxmemory
 569 | # is reached. You can select among five behaviors:
 570 | #
 571 | # volatile-lru -> Evict using approximated LRU among the keys with an expire set.
 572 | # allkeys-lru -> Evict any key using approximated LRU.
 573 | # volatile-lfu -> Evict using approximated LFU among the keys with an expire set.
 574 | # allkeys-lfu -> Evict any key using approximated LFU.
 575 | # volatile-random -> Remove a random key among the ones with an expire set.
 576 | # allkeys-random -> Remove a random key, any key.
 577 | # volatile-ttl -> Remove the key with the nearest expire time (minor TTL)
 578 | # noeviction -> Don't evict anything, just return an error on write operations.
 579 | #
 580 | # LRU means Least Recently Used
 581 | # LFU means Least Frequently Used
 582 | #
 583 | # Both LRU, LFU and volatile-ttl are implemented using approximated
 584 | # randomized algorithms.
 585 | #
 586 | # Note: with any of the above policies, Redis will return an error on write
 587 | #       operations, when there are no suitable keys for eviction.
 588 | #
 589 | #       At the date of writing these commands are: set setnx setex append
 590 | #       incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd
 591 | #       sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby
 592 | #       zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby
 593 | #       getset mset msetnx exec sort
 594 | #
 595 | # The default is:
 596 | #
 597 | maxmemory-policy volatile-lfu
 598 | 
 599 | # LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated
 600 | # algorithms (in order to save memory), so you can tune it for speed or
 601 | # accuracy. For default Redis will check five keys and pick the one that was
 602 | # used less recently, you can change the sample size using the following
 603 | # configuration directive.
 604 | #
 605 | # The default of 5 produces good enough results. 10 Approximates very closely
 606 | # true LRU but costs more CPU. 3 is faster but not very accurate.
 607 | #
 608 | # maxmemory-samples 5
 609 | 
 610 | # Starting from Redis 5, by default a replica will ignore its maxmemory setting
 611 | # (unless it is promoted to master after a failover or manually). It means
 612 | # that the eviction of keys will be just handled by the master, sending the
 613 | # DEL commands to the replica as keys evict in the master side.
 614 | #
 615 | # This behavior ensures that masters and replicas stay consistent, and is usually
 616 | # what you want, however if your replica is writable, or you want the replica to have
 617 | # a different memory setting, and you are sure all the writes performed to the
 618 | # replica are idempotent, then you may change this default (but be sure to understand
 619 | # what you are doing).
 620 | #
 621 | # Note that since the replica by default does not evict, it may end using more
 622 | # memory than the one set via maxmemory (there are certain buffers that may
 623 | # be larger on the replica, or data structures may sometimes take more memory and so
 624 | # forth). So make sure you monitor your replicas and make sure they have enough
 625 | # memory to never hit a real out-of-memory condition before the master hits
 626 | # the configured maxmemory setting.
 627 | #
 628 | # replica-ignore-maxmemory yes
 629 | 
 630 | ############################# LAZY FREEING ####################################
 631 | 
 632 | # Redis has two primitives to delete keys. One is called DEL and is a blocking
 633 | # deletion of the object. It means that the server stops processing new commands
 634 | # in order to reclaim all the memory associated with an object in a synchronous
 635 | # way. If the key deleted is associated with a small object, the time needed
 636 | # in order to execute the DEL command is very small and comparable to most other
 637 | # O(1) or O(log_N) commands in Redis. However if the key is associated with an
 638 | # aggregated value containing millions of elements, the server can block for
 639 | # a long time (even seconds) in order to complete the operation.
 640 | #
 641 | # For the above reasons Redis also offers non blocking deletion primitives
 642 | # such as UNLINK (non blocking DEL) and the ASYNC option of FLUSHALL and
 643 | # FLUSHDB commands, in order to reclaim memory in background. Those commands
 644 | # are executed in constant time. Another thread will incrementally free the
 645 | # object in the background as fast as possible.
 646 | #
 647 | # DEL, UNLINK and ASYNC option of FLUSHALL and FLUSHDB are user-controlled.
 648 | # It's up to the design of the application to understand when it is a good
 649 | # idea to use one or the other. However the Redis server sometimes has to
 650 | # delete keys or flush the whole database as a side effect of other operations.
 651 | # Specifically Redis deletes objects independently of a user call in the
 652 | # following scenarios:
 653 | #
 654 | # 1) On eviction, because of the maxmemory and maxmemory policy configurations,
 655 | #    in order to make room for new data, without going over the specified
 656 | #    memory limit.
 657 | # 2) Because of expire: when a key with an associated time to live (see the
 658 | #    EXPIRE command) must be deleted from memory.
 659 | # 3) Because of a side effect of a command that stores data on a key that may
 660 | #    already exist. For example the RENAME command may delete the old key
 661 | #    content when it is replaced with another one. Similarly SUNIONSTORE
 662 | #    or SORT with STORE option may delete existing keys. The SET command
 663 | #    itself removes any old content of the specified key in order to replace
 664 | #    it with the specified string.
 665 | # 4) During replication, when a replica performs a full resynchronization with
 666 | #    its master, the content of the whole database is removed in order to
 667 | #    load the RDB file just transferred.
 668 | #
 669 | # In all the above cases the default is to delete objects in a blocking way,
 670 | # like if DEL was called. However you can configure each case specifically
 671 | # in order to instead release memory in a non-blocking way like if UNLINK
 672 | # was called, using the following configuration directives:
 673 | 
 674 | lazyfree-lazy-eviction no
 675 | lazyfree-lazy-expire no
 676 | lazyfree-lazy-server-del no
 677 | replica-lazy-flush no
 678 | 
 679 | ############################## APPEND ONLY MODE ###############################
 680 | 
 681 | # By default Redis asynchronously dumps the dataset on disk. This mode is
 682 | # good enough in many applications, but an issue with the Redis process or
 683 | # a power outage may result into a few minutes of writes lost (depending on
 684 | # the configured save points).
 685 | #
 686 | # The Append Only File is an alternative persistence mode that provides
 687 | # much better durability. For instance using the default data fsync policy
 688 | # (see later in the config file) Redis can lose just one second of writes in a
 689 | # dramatic event like a server power outage, or a single write if something
 690 | # wrong with the Redis process itself happens, but the operating system is
 691 | # still running correctly.
 692 | #
 693 | # AOF and RDB persistence can be enabled at the same time without problems.
 694 | # If the AOF is enabled on startup Redis will load the AOF, that is the file
 695 | # with the better durability guarantees.
 696 | #
 697 | # Please check http://redis.io/topics/persistence for more information.
 698 | 
 699 | appendonly no
 700 | 
 701 | # The name of the append only file (default: "appendonly.aof")
 702 | 
 703 | appendfilename "appendonly.aof"
 704 | 
 705 | # The fsync() call tells the Operating System to actually write data on disk
 706 | # instead of waiting for more data in the output buffer. Some OS will really flush
 707 | # data on disk, some other OS will just try to do it ASAP.
 708 | #
 709 | # Redis supports three different modes:
 710 | #
 711 | # no: don't fsync, just let the OS flush the data when it wants. Faster.
 712 | # always: fsync after every write to the append only log. Slow, Safest.
 713 | # everysec: fsync only one time every second. Compromise.
 714 | #
 715 | # The default is "everysec", as that's usually the right compromise between
 716 | # speed and data safety. It's up to you to understand if you can relax this to
 717 | # "no" that will let the operating system flush the output buffer when
 718 | # it wants, for better performances (but if you can live with the idea of
 719 | # some data loss consider the default persistence mode that's snapshotting),
 720 | # or on the contrary, use "always" that's very slow but a bit safer than
 721 | # everysec.
 722 | #
 723 | # More details please check the following article:
 724 | # http://antirez.com/post/redis-persistence-demystified.html
 725 | #
 726 | # If unsure, use "everysec".
 727 | 
 728 | # appendfsync always
 729 | appendfsync everysec
 730 | # appendfsync no
 731 | 
 732 | # When the AOF fsync policy is set to always or everysec, and a background
 733 | # saving process (a background save or AOF log background rewriting) is
 734 | # performing a lot of I/O against the disk, in some Linux configurations
 735 | # Redis may block too long on the fsync() call. Note that there is no fix for
 736 | # this currently, as even performing fsync in a different thread will block
 737 | # our synchronous write(2) call.
 738 | #
 739 | # In order to mitigate this problem it's possible to use the following option
 740 | # that will prevent fsync() from being called in the main process while a
 741 | # BGSAVE or BGREWRITEAOF is in progress.
 742 | #
 743 | # This means that while another child is saving, the durability of Redis is
 744 | # the same as "appendfsync none". In practical terms, this means that it is
 745 | # possible to lose up to 30 seconds of log in the worst scenario (with the
 746 | # default Linux settings).
 747 | #
 748 | # If you have latency problems turn this to "yes". Otherwise leave it as
 749 | # "no" that is the safest pick from the point of view of durability.
 750 | 
 751 | no-appendfsync-on-rewrite no
 752 | 
 753 | # Automatic rewrite of the append only file.
 754 | # Redis is able to automatically rewrite the log file implicitly calling
 755 | # BGREWRITEAOF when the AOF log size grows by the specified percentage.
 756 | #
 757 | # This is how it works: Redis remembers the size of the AOF file after the
 758 | # latest rewrite (if no rewrite has happened since the restart, the size of
 759 | # the AOF at startup is used).
 760 | #
 761 | # This base size is compared to the current size. If the current size is
 762 | # bigger than the specified percentage, the rewrite is triggered. Also
 763 | # you need to specify a minimal size for the AOF file to be rewritten, this
 764 | # is useful to avoid rewriting the AOF file even if the percentage increase
 765 | # is reached but it is still pretty small.
 766 | #
 767 | # Specify a percentage of zero in order to disable the automatic AOF
 768 | # rewrite feature.
 769 | 
 770 | auto-aof-rewrite-percentage 0
 771 | auto-aof-rewrite-min-size 64mb
 772 | 
 773 | # An AOF file may be found to be truncated at the end during the Redis
 774 | # startup process, when the AOF data gets loaded back into memory.
 775 | # This may happen when the system where Redis is running
 776 | # crashes, especially when an ext4 filesystem is mounted without the
 777 | # data=ordered option (however this can't happen when Redis itself
 778 | # crashes or aborts but the operating system still works correctly).
 779 | #
 780 | # Redis can either exit with an error when this happens, or load as much
 781 | # data as possible (the default now) and start if the AOF file is found
 782 | # to be truncated at the end. The following option controls this behavior.
 783 | #
 784 | # If aof-load-truncated is set to yes, a truncated AOF file is loaded and
 785 | # the Redis server starts emitting a log to inform the user of the event.
 786 | # Otherwise if the option is set to no, the server aborts with an error
 787 | # and refuses to start. When the option is set to no, the user requires
 788 | # to fix the AOF file using the "redis-check-aof" utility before to restart
 789 | # the server.
 790 | #
 791 | # Note that if the AOF file will be found to be corrupted in the middle
 792 | # the server will still exit with an error. This option only applies when
 793 | # Redis will try to read more data from the AOF file but not enough bytes
 794 | # will be found.
 795 | aof-load-truncated yes
 796 | 
 797 | # When rewriting the AOF file, Redis is able to use an RDB preamble in the
 798 | # AOF file for faster rewrites and recoveries. When this option is turned
 799 | # on the rewritten AOF file is composed of two different stanzas:
 800 | #
 801 | #   [RDB file][AOF tail]
 802 | #
 803 | # When loading Redis recognizes that the AOF file starts with the "REDIS"
 804 | # string and loads the prefixed RDB file, and continues loading the AOF
 805 | # tail.
 806 | aof-use-rdb-preamble yes
 807 | 
 808 | ################################ LUA SCRIPTING  ###############################
 809 | 
 810 | # Max execution time of a Lua script in milliseconds.
 811 | #
 812 | # If the maximum execution time is reached Redis will log that a script is
 813 | # still in execution after the maximum allowed time and will start to
 814 | # reply to queries with an error.
 815 | #
 816 | # When a long running script exceeds the maximum execution time only the
 817 | # SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be
 818 | # used to stop a script that did not yet called write commands. The second
 819 | # is the only way to shut down the server in the case a write command was
 820 | # already issued by the script but the user doesn't want to wait for the natural
 821 | # termination of the script.
 822 | #
 823 | # Set it to 0 or a negative value for unlimited execution without warnings.
 824 | lua-time-limit 5000
 825 | 
 826 | ################################ REDIS CLUSTER  ###############################
 827 | #
 828 | # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 829 | # WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however
 830 | # in order to mark it as "mature" we need to wait for a non trivial percentage
 831 | # of users to deploy it in production.
 832 | # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 833 | #
 834 | # Normal Redis instances can't be part of a Redis Cluster; only nodes that are
 835 | # started as cluster nodes can. In order to start a Redis instance as a
 836 | # cluster node enable the cluster support uncommenting the following:
 837 | #
 838 | # cluster-enabled yes
 839 | 
 840 | # Every cluster node has a cluster configuration file. This file is not
 841 | # intended to be edited by hand. It is created and updated by Redis nodes.
 842 | # Every Redis Cluster node requires a different cluster configuration file.
 843 | # Make sure that instances running in the same system do not have
 844 | # overlapping cluster configuration file names.
 845 | #
 846 | # cluster-config-file nodes-6379.conf
 847 | 
 848 | # Cluster node timeout is the amount of milliseconds a node must be unreachable
 849 | # for it to be considered in failure state.
 850 | # Most other internal time limits are multiple of the node timeout.
 851 | #
 852 | # cluster-node-timeout 15000
 853 | 
 854 | # A replica of a failing master will avoid to start a failover if its data
 855 | # looks too old.
 856 | #
 857 | # There is no simple way for a replica to actually have an exact measure of
 858 | # its "data age", so the following two checks are performed:
 859 | #
 860 | # 1) If there are multiple replicas able to failover, they exchange messages
 861 | #    in order to try to give an advantage to the replica with the best
 862 | #    replication offset (more data from the master processed).
 863 | #    Replicas will try to get their rank by offset, and apply to the start
 864 | #    of the failover a delay proportional to their rank.
 865 | #
 866 | # 2) Every single replica computes the time of the last interaction with
 867 | #    its master. This can be the last ping or command received (if the master
 868 | #    is still in the "connected" state), or the time that elapsed since the
 869 | #    disconnection with the master (if the replication link is currently down).
 870 | #    If the last interaction is too old, the replica will not try to failover
 871 | #    at all.
 872 | #
 873 | # The point "2" can be tuned by user. Specifically a replica will not perform
 874 | # the failover if, since the last interaction with the master, the time
 875 | # elapsed is greater than:
 876 | #
 877 | #   (node-timeout * replica-validity-factor) + repl-ping-replica-period
 878 | #
 879 | # So for example if node-timeout is 30 seconds, and the replica-validity-factor
 880 | # is 10, and assuming a default repl-ping-replica-period of 10 seconds, the
 881 | # replica will not try to failover if it was not able to talk with the master
 882 | # for longer than 310 seconds.
 883 | #
 884 | # A large replica-validity-factor may allow replicas with too old data to failover
 885 | # a master, while a too small value may prevent the cluster from being able to
 886 | # elect a replica at all.
 887 | #
 888 | # For maximum availability, it is possible to set the replica-validity-factor
 889 | # to a value of 0, which means, that replicas will always try to failover the
 890 | # master regardless of the last time they interacted with the master.
 891 | # (However they'll always try to apply a delay proportional to their
 892 | # offset rank).
 893 | #
 894 | # Zero is the only value able to guarantee that when all the partitions heal
 895 | # the cluster will always be able to continue.
 896 | #
 897 | # cluster-replica-validity-factor 10
 898 | 
 899 | # Cluster replicas are able to migrate to orphaned masters, that are masters
 900 | # that are left without working replicas. This improves the cluster ability
 901 | # to resist to failures as otherwise an orphaned master can't be failed over
 902 | # in case of failure if it has no working replicas.
 903 | #
 904 | # Replicas migrate to orphaned masters only if there are still at least a
 905 | # given number of other working replicas for their old master. This number
 906 | # is the "migration barrier". A migration barrier of 1 means that a replica
 907 | # will migrate only if there is at least 1 other working replica for its master
 908 | # and so forth. It usually reflects the number of replicas you want for every
 909 | # master in your cluster.
 910 | #
 911 | # Default is 1 (replicas migrate only if their masters remain with at least
 912 | # one replica). To disable migration just set it to a very large value.
 913 | # A value of 0 can be set but is useful only for debugging and dangerous
 914 | # in production.
 915 | #
 916 | # cluster-migration-barrier 1
 917 | 
 918 | # By default Redis Cluster nodes stop accepting queries if they detect there
 919 | # is at least an hash slot uncovered (no available node is serving it).
 920 | # This way if the cluster is partially down (for example a range of hash slots
 921 | # are no longer covered) all the cluster becomes, eventually, unavailable.
 922 | # It automatically returns available as soon as all the slots are covered again.
 923 | #
 924 | # However sometimes you want the subset of the cluster which is working,
 925 | # to continue to accept queries for the part of the key space that is still
 926 | # covered. In order to do so, just set the cluster-require-full-coverage
 927 | # option to no.
 928 | #
 929 | # cluster-require-full-coverage yes
 930 | 
 931 | # This option, when set to yes, prevents replicas from trying to failover its
 932 | # master during master failures. However the master can still perform a
 933 | # manual failover, if forced to do so.
 934 | #
 935 | # This is useful in different scenarios, especially in the case of multiple
 936 | # data center operations, where we want one side to never be promoted if not
 937 | # in the case of a total DC failure.
 938 | #
 939 | # cluster-replica-no-failover no
 940 | 
 941 | # In order to setup your cluster make sure to read the documentation
 942 | # available at http://redis.io web site.
 943 | 
 944 | ########################## CLUSTER DOCKER/NAT support  ########################
 945 | 
 946 | # In certain deployments, Redis Cluster nodes address discovery fails, because
 947 | # addresses are NAT-ted or because ports are forwarded (the typical case is
 948 | # Docker and other containers).
 949 | #
 950 | # In order to make Redis Cluster working in such environments, a static
 951 | # configuration where each node knows its public address is needed. The
 952 | # following two options are used for this scope, and are:
 953 | #
 954 | # * cluster-announce-ip
 955 | # * cluster-announce-port
 956 | # * cluster-announce-bus-port
 957 | #
 958 | # Each instruct the node about its address, client port, and cluster message
 959 | # bus port. The information is then published in the header of the bus packets
 960 | # so that other nodes will be able to correctly map the address of the node
 961 | # publishing the information.
 962 | #
 963 | # If the above options are not used, the normal Redis Cluster auto-detection
 964 | # will be used instead.
 965 | #
 966 | # Note that when remapped, the bus port may not be at the fixed offset of
 967 | # clients port + 10000, so you can specify any port and bus-port depending
 968 | # on how they get remapped. If the bus-port is not set, a fixed offset of
 969 | # 10000 will be used as usually.
 970 | #
 971 | # Example:
 972 | #
 973 | # cluster-announce-ip 10.1.1.5
 974 | # cluster-announce-port 6379
 975 | # cluster-announce-bus-port 6380
 976 | 
 977 | ################################## SLOW LOG ###################################
 978 | 
 979 | # The Redis Slow Log is a system to log queries that exceeded a specified
 980 | # execution time. The execution time does not include the I/O operations
 981 | # like talking with the client, sending the reply and so forth,
 982 | # but just the time needed to actually execute the command (this is the only
 983 | # stage of command execution where the thread is blocked and can not serve
 984 | # other requests in the meantime).
 985 | #
 986 | # You can configure the slow log with two parameters: one tells Redis
 987 | # what is the execution time, in microseconds, to exceed in order for the
 988 | # command to get logged, and the other parameter is the length of the
 989 | # slow log. When a new command is logged the oldest one is removed from the
 990 | # queue of logged commands.
 991 | 
 992 | # The following time is expressed in microseconds, so 1000000 is equivalent
 993 | # to one second. Note that a negative number disables the slow log, while
 994 | # a value of zero forces the logging of every command.
 995 | slowlog-log-slower-than -1
 996 | 
 997 | # There is no limit to this length. Just be aware that it will consume memory.
 998 | # You can reclaim memory used by the slow log with SLOWLOG RESET.
 999 | slowlog-max-len 128
1000 | 
1001 | ################################ LATENCY MONITOR ##############################
1002 | 
1003 | # The Redis latency monitoring subsystem samples different operations
1004 | # at runtime in order to collect data related to possible sources of
1005 | # latency of a Redis instance.
1006 | #
1007 | # Via the LATENCY command this information is available to the user that can
1008 | # print graphs and obtain reports.
1009 | #
1010 | # The system only logs operations that were performed in a time equal or
1011 | # greater than the amount of milliseconds specified via the
1012 | # latency-monitor-threshold configuration directive. When its value is set
1013 | # to zero, the latency monitor is turned off.
1014 | #
1015 | # By default latency monitoring is disabled since it is mostly not needed
1016 | # if you don't have latency issues, and collecting data has a performance
1017 | # impact, that while very small, can be measured under big load. Latency
1018 | # monitoring can easily be enabled at runtime using the command
1019 | # "CONFIG SET latency-monitor-threshold <milliseconds>" if needed.
1020 | latency-monitor-threshold 0
1021 | 
1022 | ############################# EVENT NOTIFICATION ##############################
1023 | 
1024 | # Redis can notify Pub/Sub clients about events happening in the key space.
1025 | # This feature is documented at http://redis.io/topics/notifications
1026 | #
1027 | # For instance if keyspace events notification is enabled, and a client
1028 | # performs a DEL operation on key "foo" stored in the Database 0, two
1029 | # messages will be published via Pub/Sub:
1030 | #
1031 | # PUBLISH __keyspace@0__:foo del
1032 | # PUBLISH __keyevent@0__:del foo
1033 | #
1034 | # It is possible to select the events that Redis will notify among a set
1035 | # of classes. Every class is identified by a single character:
1036 | #
1037 | #  K     Keyspace events, published with __keyspace@<db>__ prefix.
1038 | #  E     Keyevent events, published with __keyevent@<db>__ prefix.
1039 | #  g     Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ...
1040 | #  $     String commands
1041 | #  l     List commands
1042 | #  s     Set commands
1043 | #  h     Hash commands
1044 | #  z     Sorted set commands
1045 | #  x     Expired events (events generated every time a key expires)
1046 | #  e     Evicted events (events generated when a key is evicted for maxmemory)
1047 | #  A     Alias for g$lshzxe, so that the "AKE" string means all the events.
1048 | #
1049 | #  The "notify-keyspace-events" takes as argument a string that is composed
1050 | #  of zero or multiple characters. The empty string means that notifications
1051 | #  are disabled.
1052 | #
1053 | #  Example: to enable list and generic events, from the point of view of the
1054 | #           event name, use:
1055 | #
1056 | #  notify-keyspace-events Elg
1057 | #
1058 | #  Example 2: to get the stream of the expired keys subscribing to channel
1059 | #             name __keyevent@0__:expired use:
1060 | #
1061 | #  notify-keyspace-events Ex
1062 | #
1063 | #  By default all notifications are disabled because most users don't need
1064 | #  this feature and the feature has some overhead. Note that if you don't
1065 | #  specify at least one of K or E, no events will be delivered.
1066 | notify-keyspace-events ""
1067 | 
1068 | ############################### ADVANCED CONFIG ###############################
1069 | 
1070 | # Hashes are encoded using a memory efficient data structure when they have a
1071 | # small number of entries, and the biggest entry does not exceed a given
1072 | # threshold. These thresholds can be configured using the following directives.
1073 | hash-max-ziplist-entries 512
1074 | hash-max-ziplist-value 64
1075 | 
1076 | # Lists are also encoded in a special way to save a lot of space.
1077 | # The number of entries allowed per internal list node can be specified
1078 | # as a fixed maximum size or a maximum number of elements.
1079 | # For a fixed maximum size, use -5 through -1, meaning:
1080 | # -5: max size: 64 Kb  <-- not recommended for normal workloads
1081 | # -4: max size: 32 Kb  <-- not recommended
1082 | # -3: max size: 16 Kb  <-- probably not recommended
1083 | # -2: max size: 8 Kb   <-- good
1084 | # -1: max size: 4 Kb   <-- good
1085 | # Positive numbers mean store up to _exactly_ that number of elements
1086 | # per list node.
1087 | # The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size),
1088 | # but if your use case is unique, adjust the settings as necessary.
1089 | list-max-ziplist-size -2
1090 | 
1091 | # Lists may also be compressed.
1092 | # Compress depth is the number of quicklist ziplist nodes from *each* side of
1093 | # the list to *exclude* from compression.  The head and tail of the list
1094 | # are always uncompressed for fast push/pop operations.  Settings are:
1095 | # 0: disable all list compression
1096 | # 1: depth 1 means "don't start compressing until after 1 node into the list,
1097 | #    going from either the head or tail"
1098 | #    So: [head]->node->node->...->node->[tail]
1099 | #    [head], [tail] will always be uncompressed; inner nodes will compress.
1100 | # 2: [head]->[next]->node->node->...->node->[prev]->[tail]
1101 | #    2 here means: don't compress head or head->next or tail->prev or tail,
1102 | #    but compress all nodes between them.
1103 | # 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail]
1104 | # etc.
1105 | list-compress-depth 0
1106 | 
1107 | # Sets have a special encoding in just one case: when a set is composed
1108 | # of just strings that happen to be integers in radix 10 in the range
1109 | # of 64 bit signed integers.
1110 | # The following configuration setting sets the limit in the size of the
1111 | # set in order to use this special memory saving encoding.
1112 | set-max-intset-entries 512
1113 | 
1114 | # Similarly to hashes and lists, sorted sets are also specially encoded in
1115 | # order to save a lot of space. This encoding is only used when the length and
1116 | # elements of a sorted set are below the following limits:
1117 | zset-max-ziplist-entries 128
1118 | zset-max-ziplist-value 64
1119 | 
1120 | # HyperLogLog sparse representation bytes limit. The limit includes the
1121 | # 16 bytes header. When an HyperLogLog using the sparse representation crosses
1122 | # this limit, it is converted into the dense representation.
1123 | #
1124 | # A value greater than 16000 is totally useless, since at that point the
1125 | # dense representation is more memory efficient.
1126 | #
1127 | # The suggested value is ~ 3000 in order to have the benefits of
1128 | # the space efficient encoding without slowing down too much PFADD,
1129 | # which is O(N) with the sparse encoding. The value can be raised to
1130 | # ~ 10000 when CPU is not a concern, but space is, and the data set is
1131 | # composed of many HyperLogLogs with cardinality in the 0 - 15000 range.
1132 | hll-sparse-max-bytes 3000
1133 | 
1134 | # Streams macro node max size / items. The stream data structure is a radix
1135 | # tree of big nodes that encode multiple items inside. Using this configuration
1136 | # it is possible to configure how big a single node can be in bytes, and the
1137 | # maximum number of items it may contain before switching to a new node when
1138 | # appending new stream entries. If any of the following settings are set to
1139 | # zero, the limit is ignored, so for instance it is possible to set just a
1140 | # max entires limit by setting max-bytes to 0 and max-entries to the desired
1141 | # value.
1142 | stream-node-max-bytes 4096
1143 | stream-node-max-entries 100
1144 | 
1145 | # Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in
1146 | # order to help rehashing the main Redis hash table (the one mapping top-level
1147 | # keys to values). The hash table implementation Redis uses (see dict.c)
1148 | # performs a lazy rehashing: the more operation you run into a hash table
1149 | # that is rehashing, the more rehashing "steps" are performed, so if the
1150 | # server is idle the rehashing is never complete and some more memory is used
1151 | # by the hash table.
1152 | #
1153 | # The default is to use this millisecond 10 times every second in order to
1154 | # actively rehash the main dictionaries, freeing memory when possible.
1155 | #
1156 | # If unsure:
1157 | # use "activerehashing no" if you have hard latency requirements and it is
1158 | # not a good thing in your environment that Redis can reply from time to time
1159 | # to queries with 2 milliseconds delay.
1160 | #
1161 | # use "activerehashing yes" if you don't have such hard requirements but
1162 | # want to free memory asap when possible.
1163 | activerehashing yes
1164 | 
1165 | # The client output buffer limits can be used to force disconnection of clients
1166 | # that are not reading data from the server fast enough for some reason (a
1167 | # common reason is that a Pub/Sub client can't consume messages as fast as the
1168 | # publisher can produce them).
1169 | #
1170 | # The limit can be set differently for the three different classes of clients:
1171 | #
1172 | # normal -> normal clients including MONITOR clients
1173 | # replica  -> replica clients
1174 | # pubsub -> clients subscribed to at least one pubsub channel or pattern
1175 | #
1176 | # The syntax of every client-output-buffer-limit directive is the following:
1177 | #
1178 | # client-output-buffer-limit <class> <hard limit> <soft limit> <soft seconds>
1179 | #
1180 | # A client is immediately disconnected once the hard limit is reached, or if
1181 | # the soft limit is reached and remains reached for the specified number of
1182 | # seconds (continuously).
1183 | # So for instance if the hard limit is 32 megabytes and the soft limit is
1184 | # 16 megabytes / 10 seconds, the client will get disconnected immediately
1185 | # if the size of the output buffers reach 32 megabytes, but will also get
1186 | # disconnected if the client reaches 16 megabytes and continuously overcomes
1187 | # the limit for 10 seconds.
1188 | #
1189 | # By default normal clients are not limited because they don't receive data
1190 | # without asking (in a push way), but just after a request, so only
1191 | # asynchronous clients may create a scenario where data is requested faster
1192 | # than it can read.
1193 | #
1194 | # Instead there is a default limit for pubsub and replica clients, since
1195 | # subscribers and replicas receive data in a push fashion.
1196 | #
1197 | # Both the hard or the soft limit can be disabled by setting them to zero.
1198 | client-output-buffer-limit normal 0 0 0
1199 | client-output-buffer-limit replica 1gb 512mb 60
1200 | client-output-buffer-limit pubsub 4gb 3gb 60
1201 | 
1202 | # Client query buffers accumulate new commands. They are limited to a fixed
1203 | # amount by default in order to avoid that a protocol desynchronization (for
1204 | # instance due to a bug in the client) will lead to unbound memory usage in
1205 | # the query buffer. However you can configure it here if you have very special
1206 | # needs, such us huge multi/exec requests or alike.
1207 | #
1208 | # client-query-buffer-limit 1gb
1209 | 
1210 | # In the Redis protocol, bulk requests, that are, elements representing single
1211 | # strings, are normally limited ot 512 mb. However you can change this limit
1212 | # here.
1213 | #
1214 | # proto-max-bulk-len 512mb
1215 | 
1216 | # Redis calls an internal function to perform many background tasks, like
1217 | # closing connections of clients in timeout, purging expired keys that are
1218 | # never requested, and so forth.
1219 | #
1220 | # Not all tasks are performed with the same frequency, but Redis checks for
1221 | # tasks to perform according to the specified "hz" value.
1222 | #
1223 | # By default "hz" is set to 10. Raising the value will use more CPU when
1224 | # Redis is idle, but at the same time will make Redis more responsive when
1225 | # there are many keys expiring at the same time, and timeouts may be
1226 | # handled with more precision.
1227 | #
1228 | # The range is between 1 and 500, however a value over 100 is usually not
1229 | # a good idea. Most users should use the default of 10 and raise this up to
1230 | # 100 only in environments where very low latency is required.
1231 | hz 10
1232 | 
1233 | # Normally it is useful to have an HZ value which is proportional to the
1234 | # number of clients connected. This is useful in order, for instance, to
1235 | # avoid too many clients are processed for each background task invocation
1236 | # in order to avoid latency spikes.
1237 | #
1238 | # Since the default HZ value by default is conservatively set to 10, Redis
1239 | # offers, and enables by default, the ability to use an adaptive HZ value
1240 | # which will temporary raise when there are many connected clients.
1241 | #
1242 | # When dynamic HZ is enabled, the actual configured HZ will be used as
1243 | # as a baseline, but multiples of the configured HZ value will be actually
1244 | # used as needed once more clients are connected. In this way an idle
1245 | # instance will use very little CPU time while a busy instance will be
1246 | # more responsive.
1247 | dynamic-hz yes
1248 | 
1249 | # When a child rewrites the AOF file, if the following option is enabled
1250 | # the file will be fsync-ed every 32 MB of data generated. This is useful
1251 | # in order to commit the file to the disk more incrementally and avoid
1252 | # big latency spikes.
1253 | aof-rewrite-incremental-fsync yes
1254 | 
1255 | # When redis saves RDB file, if the following option is enabled
1256 | # the file will be fsync-ed every 32 MB of data generated. This is useful
1257 | # in order to commit the file to the disk more incrementally and avoid
1258 | # big latency spikes.
1259 | rdb-save-incremental-fsync yes
1260 | 
1261 | # Redis LFU eviction (see maxmemory setting) can be tuned. However it is a good
1262 | # idea to start with the default settings and only change them after investigating
1263 | # how to improve the performances and how the keys LFU change over time, which
1264 | # is possible to inspect via the OBJECT FREQ command.
1265 | #
1266 | # There are two tunable parameters in the Redis LFU implementation: the
1267 | # counter logarithm factor and the counter decay time. It is important to
1268 | # understand what the two parameters mean before changing them.
1269 | #
1270 | # The LFU counter is just 8 bits per key, it's maximum value is 255, so Redis
1271 | # uses a probabilistic increment with logarithmic behavior. Given the value
1272 | # of the old counter, when a key is accessed, the counter is incremented in
1273 | # this way:
1274 | #
1275 | # 1. A random number R between 0 and 1 is extracted.
1276 | # 2. A probability P is calculated as 1/(old_value*lfu_log_factor+1).
1277 | # 3. The counter is incremented only if R < P.
1278 | #
1279 | # The default lfu-log-factor is 10. This is a table of how the frequency
1280 | # counter changes with a different number of accesses with different
1281 | # logarithmic factors:
1282 | #
1283 | # +--------+------------+------------+------------+------------+------------+
1284 | # | factor | 100 hits   | 1000 hits  | 100K hits  | 1M hits    | 10M hits   |
1285 | # +--------+------------+------------+------------+------------+------------+
1286 | # | 0      | 104        | 255        | 255        | 255        | 255        |
1287 | # +--------+------------+------------+------------+------------+------------+
1288 | # | 1      | 18         | 49         | 255        | 255        | 255        |
1289 | # +--------+------------+------------+------------+------------+------------+
1290 | # | 10     | 10         | 18         | 142        | 255        | 255        |
1291 | # +--------+------------+------------+------------+------------+------------+
1292 | # | 100    | 8          | 11         | 49         | 143        | 255        |
1293 | # +--------+------------+------------+------------+------------+------------+
1294 | #
1295 | # NOTE: The above table was obtained by running the following commands:
1296 | #
1297 | #   redis-benchmark -n 1000000 incr foo
1298 | #   redis-cli object freq foo
1299 | #
1300 | # NOTE 2: The counter initial value is 5 in order to give new objects a chance
1301 | # to accumulate hits.
1302 | #
1303 | # The counter decay time is the time, in minutes, that must elapse in order
1304 | # for the key counter to be divided by two (or decremented if it has a value
1305 | # less <= 10).
1306 | #
1307 | # The default value for the lfu-decay-time is 1. A Special value of 0 means to
1308 | # decay the counter every time it happens to be scanned.
1309 | #
1310 | # lfu-log-factor 10
1311 | # lfu-decay-time 1
1312 | 
1313 | ########################### ACTIVE DEFRAGMENTATION #######################
1314 | #
1315 | # WARNING THIS FEATURE IS EXPERIMENTAL. However it was stress tested
1316 | # even in production and manually tested by multiple engineers for some
1317 | # time.
1318 | #
1319 | # What is active defragmentation?
1320 | # -------------------------------
1321 | #
1322 | # Active (online) defragmentation allows a Redis server to compact the
1323 | # spaces left between small allocations and deallocations of data in memory,
1324 | # thus allowing to reclaim back memory.
1325 | #
1326 | # Fragmentation is a natural process that happens with every allocator (but
1327 | # less so with Jemalloc, fortunately) and certain workloads. Normally a server
1328 | # restart is needed in order to lower the fragmentation, or at least to flush
1329 | # away all the data and create it again. However thanks to this feature
1330 | # implemented by Oran Agra for Redis 4.0 this process can happen at runtime
1331 | # in an "hot" way, while the server is running.
1332 | #
1333 | # Basically when the fragmentation is over a certain level (see the
1334 | # configuration options below) Redis will start to create new copies of the
1335 | # values in contiguous memory regions by exploiting certain specific Jemalloc
1336 | # features (in order to understand if an allocation is causing fragmentation
1337 | # and to allocate it in a better place), and at the same time, will release the
1338 | # old copies of the data. This process, repeated incrementally for all the keys
1339 | # will cause the fragmentation to drop back to normal values.
1340 | #
1341 | # Important things to understand:
1342 | #
1343 | # 1. This feature is disabled by default, and only works if you compiled Redis
1344 | #    to use the copy of Jemalloc we ship with the source code of Redis.
1345 | #    This is the default with Linux builds.
1346 | #
1347 | # 2. You never need to enable this feature if you don't have fragmentation
1348 | #    issues.
1349 | #
1350 | # 3. Once you experience fragmentation, you can enable this feature when
1351 | #    needed with the command "CONFIG SET activedefrag yes".
1352 | #
1353 | # The configuration parameters are able to fine tune the behavior of the
1354 | # defragmentation process. If you are not sure about what they mean it is
1355 | # a good idea to leave the defaults untouched.
1356 | 
1357 | # Enabled active defragmentation
1358 | # activedefrag yes
1359 | 
1360 | # Minimum amount of fragmentation waste to start active defrag
1361 | # active-defrag-ignore-bytes 100mb
1362 | 
1363 | # Minimum percentage of fragmentation to start active defrag
1364 | # active-defrag-threshold-lower 10
1365 | 
1366 | # Maximum percentage of fragmentation at which we use maximum effort
1367 | # active-defrag-threshold-upper 100
1368 | 
1369 | # Minimal effort for defrag in CPU percentage
1370 | # active-defrag-cycle-min 5
1371 | 
1372 | # Maximal effort for defrag in CPU percentage
1373 | # active-defrag-cycle-max 75
1374 | 
1375 | # Maximum number of set/hash/zset/list fields that will be processed from
1376 | # the main dictionary scan
1377 | # active-defrag-max-scan-fields 1000
1378 | 
1379 | 


--------------------------------------------------------------------------------
/files/rl_coach.patch:
--------------------------------------------------------------------------------
   1 | diff --git a/rl_coach/agents/actor_critic_agent.py b/rl_coach/agents/actor_critic_agent.py
   2 | index 35c8bf9..4f3ce60 100644
   3 | --- a/rl_coach/agents/actor_critic_agent.py
   4 | +++ b/rl_coach/agents/actor_critic_agent.py
   5 | @@ -94,11 +94,14 @@ class ActorCriticAgentParameters(AgentParameters):
   6 |  class ActorCriticAgent(PolicyOptimizationAgent):
   7 |      def __init__(self, agent_parameters, parent: Union['LevelManager', 'CompositeAgent']=None):
   8 |          super().__init__(agent_parameters, parent)
   9 | +        print("[RL] ActorCriticAgent init")
  10 |          self.last_gradient_update_step_idx = 0
  11 |          self.action_advantages = self.register_signal('Advantages')
  12 |          self.state_values = self.register_signal('Values')
  13 |          self.value_loss = self.register_signal('Value Loss')
  14 |          self.policy_loss = self.register_signal('Policy Loss')
  15 | +        print("[RL] ActorCriticAgent  init successful")
  16 | +
  17 |  
  18 |      # Discounting function used to calculate discounted returns.
  19 |      def discount(self, x, gamma):
  20 | diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py
  21 | index 866fe8a..cf0873a 100644
  22 | --- a/rl_coach/agents/agent.py
  23 | +++ b/rl_coach/agents/agent.py
  24 | @@ -28,6 +28,8 @@ from rl_coach.base_parameters import AgentParameters, Device, DeviceType, Distri
  25 |  from rl_coach.core_types import RunPhase, PredictionType, EnvironmentEpisodes, ActionType, Batch, Episode, StateType
  26 |  from rl_coach.core_types import Transition, ActionInfo, TrainingSteps, EnvironmentSteps, EnvResponse
  27 |  from rl_coach.logger import screen, Logger, EpisodeLogger
  28 | +from rl_coach.memories.memory import Memory
  29 | +from rl_coach.memories.non_episodic.experience_replay import ExperienceReplay
  30 |  from rl_coach.memories.episodic.episodic_experience_replay import EpisodicExperienceReplay
  31 |  from rl_coach.saver import SaverCollection
  32 |  from rl_coach.spaces import SpacesDefinition, VectorObservationSpace, GoalsSpace, AttentionActionSpace
  33 | @@ -74,7 +76,7 @@ class Agent(AgentInterface):
  34 |          self.imitation = False
  35 |          self.agent_logger = Logger()
  36 |          self.agent_episode_logger = EpisodeLogger()
  37 | -
  38 | +        print("[RL] Created agent loggers")
  39 |          # get the memory
  40 |          # - distributed training + shared memory:
  41 |          #   * is chief?  -> create the memory and add it to the scratchpad
  42 | @@ -84,22 +86,30 @@ class Agent(AgentInterface):
  43 |          memory_name = self.ap.memory.path.split(':')[1]
  44 |          self.memory_lookup_name = self.full_name_id + '.' + memory_name
  45 |          if self.shared_memory and not self.is_chief:
  46 | +            print("[RL] Creating shared memory")
  47 |              self.memory = self.shared_memory_scratchpad.get(self.memory_lookup_name)
  48 |          else:
  49 | +            print("[RL] Dynamic import of memory: ", self.ap.memory)
  50 |              # modules
  51 |              self.memory = dynamic_import_and_instantiate_module_from_params(self.ap.memory)
  52 | +            print("[RL] Dynamically imported of memory", self.memory)
  53 |  
  54 |              if hasattr(self.ap.memory, 'memory_backend_params'):
  55 | +                print("[RL] Getting memory backend", self.ap.memory.memory_backend_params)
  56 |                  self.memory_backend = get_memory_backend(self.ap.memory.memory_backend_params)
  57 | +                print("[RL] Memory backend", self.memory_backend)
  58 |  
  59 |                  if self.ap.memory.memory_backend_params.run_type != 'trainer':
  60 | +                    print("[RL] Setting memory backend", self.memory_backend)
  61 |                      self.memory.set_memory_backend(self.memory_backend)
  62 |  
  63 |              if self.shared_memory and self.is_chief:
  64 | +                print("[RL] Shared memory scratchpad")
  65 |                  self.shared_memory_scratchpad.add(self.memory_lookup_name, self.memory)
  66 |  
  67 |          # set devices
  68 |          if type(agent_parameters.task_parameters) == DistributedTaskParameters:
  69 | +            print("[RL] Setting distributed devices")
  70 |              self.has_global = True
  71 |              self.replicated_device = agent_parameters.task_parameters.device
  72 |              self.worker_device = "/job:worker/task:{}".format(self.task_id)
  73 | @@ -108,6 +118,7 @@ class Agent(AgentInterface):
  74 |              else:
  75 |                  self.worker_device += "/device:GPU:0"
  76 |          else:
  77 | +            print("[RL] Setting devices")
  78 |              self.has_global = False
  79 |              self.replicated_device = None
  80 |              if agent_parameters.task_parameters.use_cpu:
  81 | @@ -115,7 +126,7 @@ class Agent(AgentInterface):
  82 |              else:
  83 |                  self.worker_device = [Device(DeviceType.GPU, i)
  84 |                                        for i in range(agent_parameters.task_parameters.num_gpu)]
  85 | -
  86 | +        print("[RL] Setting filters")
  87 |          # filters
  88 |          self.input_filter = self.ap.input_filter
  89 |          self.input_filter.set_name('input_filter')
  90 | @@ -134,21 +145,26 @@ class Agent(AgentInterface):
  91 |          # 3. Single worker (=both TF and Mxnet) - no data sharing needed + numpy arithmetic backend
  92 |  
  93 |          if hasattr(self.ap.memory, 'memory_backend_params') and self.ap.algorithm.distributed_coach_synchronization_type:
  94 | +            print("[RL] Setting filter devices: distributed")
  95 |              self.input_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params, mode='numpy')
  96 |              self.output_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params, mode='numpy')
  97 |              self.pre_network_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params, mode='numpy')
  98 |          elif (type(agent_parameters.task_parameters) == DistributedTaskParameters and
  99 |                agent_parameters.task_parameters.framework_type == Frameworks.tensorflow):
 100 | +            print("[RL] Setting filter devices: tf")
 101 |              self.input_filter.set_device(device, mode='tf')
 102 |              self.output_filter.set_device(device, mode='tf')
 103 |              self.pre_network_filter.set_device(device, mode='tf')
 104 |          else:
 105 | +            print("[RL] Setting filter devices: numpy")
 106 |              self.input_filter.set_device(device, mode='numpy')
 107 |              self.output_filter.set_device(device, mode='numpy')
 108 |              self.pre_network_filter.set_device(device, mode='numpy')
 109 |  
 110 |          # initialize all internal variables
 111 | +        print("[RL] Setting Phase")
 112 |          self._phase = RunPhase.HEATUP
 113 | +        print("[RL] After setting Phase")
 114 |          self.total_shaped_reward_in_current_episode = 0
 115 |          self.total_reward_in_current_episode = 0
 116 |          self.total_steps_counter = 0
 117 | @@ -180,7 +196,7 @@ class Agent(AgentInterface):
 118 |          # environment parameters
 119 |          self.spaces = None
 120 |          self.in_action_space = self.ap.algorithm.in_action_space
 121 | -
 122 | +        print("[RL] Setting signals")
 123 |          # signals
 124 |          self.episode_signals = []
 125 |          self.step_signals = []
 126 | @@ -195,6 +211,8 @@ class Agent(AgentInterface):
 127 |  
 128 |          # batch rl
 129 |          self.ope_manager = OpeManager() if self.ap.is_batch_rl_training else None
 130 | +        print("[RL] Agent init successful")
 131 | +
 132 |  
 133 |      @property
 134 |      def parent(self) -> 'LevelManager':
 135 | @@ -572,7 +590,8 @@ class Agent(AgentInterface):
 136 |              self.current_episode += 1
 137 |  
 138 |          if self.phase != RunPhase.TEST:
 139 | -            if isinstance(self.memory, EpisodicExperienceReplay):
 140 | +            if isinstance(self.memory, EpisodicExperienceReplay) or \
 141 | +                (isinstance(self.memory, Memory) and not isinstance(self.memory, ExperienceReplay)):
 142 |                  self.call_memory('store_episode', self.current_episode_buffer)
 143 |              elif self.ap.algorithm.store_transitions_only_when_episodes_are_terminated:
 144 |                  for transition in self.current_episode_buffer.transitions:
 145 | @@ -618,7 +637,8 @@ class Agent(AgentInterface):
 146 |          self.input_filter.reset()
 147 |          self.output_filter.reset()
 148 |          self.pre_network_filter.reset()
 149 | -        if isinstance(self.memory, EpisodicExperienceReplay):
 150 | +        if isinstance(self.memory, EpisodicExperienceReplay) or \
 151 | +            (isinstance(self.memory, Memory) and not isinstance(self.memory, ExperienceReplay)):
 152 |              self.call_memory('verify_last_episode_is_closed')
 153 |  
 154 |          for network in self.networks.values():
 155 | @@ -953,7 +973,7 @@ class Agent(AgentInterface):
 156 |              # for episodic memories we keep the transitions in a local buffer until the episode is ended.
 157 |              # for regular memories we insert the transitions directly to the memory
 158 |              self.current_episode_buffer.insert(transition)
 159 | -            if not isinstance(self.memory, EpisodicExperienceReplay) \
 160 | +            if isinstance(self.memory, ExperienceReplay) \
 161 |                      and not self.ap.algorithm.store_transitions_only_when_episodes_are_terminated:
 162 |                  self.call_memory('store', transition)
 163 |  
 164 | diff --git a/rl_coach/agents/clipped_ppo_agent.py b/rl_coach/agents/clipped_ppo_agent.py
 165 | index cc29f33..4f1a7d9 100644
 166 | --- a/rl_coach/agents/clipped_ppo_agent.py
 167 | +++ b/rl_coach/agents/clipped_ppo_agent.py
 168 | @@ -182,7 +182,7 @@ class ClippedPPOAgent(ActorCriticAgent):
 169 |              screen.warning("WARNING: The requested policy gradient rescaler is not available")
 170 |  
 171 |          # standardize
 172 | -        advantages = (advantages - np.mean(advantages)) / np.std(advantages)
 173 | +        advantages = (advantages - np.mean(advantages)) / (np.std(advantages) + 1e-8)
 174 |  
 175 |          for transition, advantage, value_target in zip(batch.transitions, advantages, value_targets):
 176 |              transition.info['advantage'] = advantage
 177 | diff --git a/rl_coach/architectures/architecture.py b/rl_coach/architectures/architecture.py
 178 | index 90dbd6e..8d457a9 100644
 179 | --- a/rl_coach/architectures/architecture.py
 180 | +++ b/rl_coach/architectures/architecture.py
 181 | @@ -46,8 +46,9 @@ class Architecture(object):
 182 |          """
 183 |          self.spaces = spaces
 184 |          self.name = name
 185 | -        self.network_wrapper_name = self.name.split('/')[0]  # e.g. 'main/online' --> 'main'
 186 | -        self.full_name = "{}/{}".format(agent_parameters.full_name_id, name)
 187 | +        self.network_wrapper_name = self.name.split('/')[1]  # e.g. 'main/online' --> 'main'
 188 | +        self.full_name = "{}/{}".format(agent_parameters.full_name_id, '/'.join(name.split('/')[1:]))
 189 | +        # self.full_name = "{}/{}".format(agent_parameters.full_name_id, name)
 190 |          self.network_parameters = agent_parameters.network_wrappers[self.network_wrapper_name]
 191 |          self.batch_size = self.network_parameters.batch_size
 192 |          self.learning_rate = self.network_parameters.learning_rate
 193 | diff --git a/rl_coach/architectures/network_wrapper.py b/rl_coach/architectures/network_wrapper.py
 194 | index dfefc41..a31dbf4 100644
 195 | --- a/rl_coach/architectures/network_wrapper.py
 196 | +++ b/rl_coach/architectures/network_wrapper.py
 197 | @@ -68,7 +68,7 @@ class NetworkWrapper(object):
 198 |              self.global_network = general_network(variable_scope=variable_scope,
 199 |                                                    devices=force_list(replicated_device),
 200 |                                                    agent_parameters=agent_parameters,
 201 | -                                                  name='{}/global'.format(name),
 202 | +                                                  name='{}/{}/global'.format(agent_parameters.name, name),
 203 |                                                    global_network=None,
 204 |                                                    network_is_local=False,
 205 |                                                    spaces=spaces,
 206 | @@ -79,7 +79,7 @@ class NetworkWrapper(object):
 207 |          self.online_network = general_network(variable_scope=variable_scope,
 208 |                                                devices=force_list(worker_device),
 209 |                                                agent_parameters=agent_parameters,
 210 | -                                              name='{}/online'.format(name),
 211 | +                                              name='{}/{}/online'.format(agent_parameters.name,name),
 212 |                                                global_network=self.global_network,
 213 |                                                network_is_local=True,
 214 |                                                spaces=spaces,
 215 | @@ -91,7 +91,7 @@ class NetworkWrapper(object):
 216 |              self.target_network = general_network(variable_scope=variable_scope,
 217 |                                                    devices=force_list(worker_device),
 218 |                                                    agent_parameters=agent_parameters,
 219 | -                                                  name='{}/target'.format(name),
 220 | +                                                  name='{}/{}/target'.format(agent_parameters.name, name),
 221 |                                                    global_network=self.global_network,
 222 |                                                    network_is_local=True,
 223 |                                                    spaces=spaces,
 224 | diff --git a/rl_coach/architectures/tensorflow_components/architecture.py b/rl_coach/architectures/tensorflow_components/architecture.py
 225 | index 68420fe..f847d8a 100644
 226 | --- a/rl_coach/architectures/tensorflow_components/architecture.py
 227 | +++ b/rl_coach/architectures/tensorflow_components/architecture.py
 228 | @@ -28,21 +28,21 @@ from rl_coach.saver import SaverCollection
 229 |  from rl_coach.spaces import SpacesDefinition
 230 |  from rl_coach.utils import force_list, squeeze_list, start_shell_command_and_wait
 231 |  
 232 | -
 233 | +tf.compat.v1.disable_resource_variables()
 234 |  def variable_summaries(var):
 235 |      """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
 236 | -    with tf.name_scope('summaries'):
 237 | +    with tf.compat.v1.name_scope('summaries'):
 238 |          layer_weight_name = '_'.join(var.name.split('/')[-3:])[:-2]
 239 |  
 240 | -        with tf.name_scope(layer_weight_name):
 241 | +        with tf.compat.v1.name_scope(layer_weight_name):
 242 |              mean = tf.reduce_mean(var)
 243 | -            tf.summary.scalar('mean', mean)
 244 | -            with tf.name_scope('stddev'):
 245 | +            tf.compat.v1.summary.scalar('mean', mean)
 246 | +            with tf.compat.v1.name_scope('stddev'):
 247 |                  stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
 248 | -            tf.summary.scalar('stddev', stddev)
 249 | -            tf.summary.scalar('max', tf.reduce_max(var))
 250 | -            tf.summary.scalar('min', tf.reduce_min(var))
 251 | -            tf.summary.histogram('histogram', var)
 252 | +            tf.compat.v1.summary.scalar('stddev', stddev)
 253 | +            tf.compat.v1.summary.scalar('max', tf.reduce_max(var))
 254 | +            tf.compat.v1.summary.scalar('min', tf.reduce_min(var))
 255 | +            tf.compat.v1.summary.histogram('histogram', var)
 256 |  
 257 |  
 258 |  def local_getter(getter, name, *args, **kwargs):
 259 | @@ -52,7 +52,7 @@ def local_getter(getter, name, *args, **kwargs):
 260 |      between workers. these variables are also assumed to be non-trainable (the optimizer does not apply gradients to
 261 |      these variables), but we can calculate the gradients wrt these variables, and we can update their content.
 262 |      """
 263 | -    kwargs['collections'] = [tf.GraphKeys.LOCAL_VARIABLES]
 264 | +    kwargs['collections'] = [tf.compat.v1.GraphKeys.LOCAL_VARIABLES]
 265 |      return getter(name, *args, **kwargs)
 266 |  
 267 |  
 268 | @@ -96,17 +96,17 @@ class TensorFlowArchitecture(Architecture):
 269 |  
 270 |          self.optimizer_type = self.network_parameters.optimizer_type
 271 |          if self.ap.task_parameters.seed is not None:
 272 | -            tf.set_random_seed(self.ap.task_parameters.seed)
 273 | -        with tf.variable_scope("/".join(self.name.split("/")[1:]), initializer=tf.contrib.layers.xavier_initializer(),
 274 | +            tf.compat.v1.set_random_seed(self.ap.task_parameters.seed)
 275 | +        with tf.compat.v1.variable_scope("/".join(self.name.split("/")[2:]), initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"),
 276 |                                 custom_getter=local_getter if network_is_local and global_network else None):
 277 | -            self.global_step = tf.train.get_or_create_global_step()
 278 | +            self.global_step = tf.compat.v1.train.get_or_create_global_step()
 279 |  
 280 |              # build the network
 281 |              self.weights = self.get_model()
 282 |  
 283 |              # create the placeholder for the assigning gradients and some tensorboard summaries for the weights
 284 |              for idx, var in enumerate(self.weights):
 285 | -                placeholder = tf.placeholder(tf.float32, shape=var.get_shape(), name=str(idx) + '_holder')
 286 | +                placeholder = tf.compat.v1.placeholder(tf.float32, shape=var.get_shape(), name=str(idx) + '_holder')
 287 |                  self.weights_placeholders.append(placeholder)
 288 |                  if self.ap.visualization.tensorboard:
 289 |                      variable_summaries(var)
 290 | @@ -128,14 +128,14 @@ class TensorFlowArchitecture(Architecture):
 291 |              self.reset_internal_memory()
 292 |  
 293 |              if self.ap.visualization.tensorboard:
 294 | -                current_scope_summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
 295 | -                                                            scope=tf.contrib.framework.get_name_scope())
 296 | -                self.merged = tf.summary.merge(current_scope_summaries)
 297 | +                current_scope_summaries = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.SUMMARIES,
 298 | +                                                            scope=tf.get_current_name_scope())
 299 | +                self.merged = tf.compat.v1.summary.merge(current_scope_summaries)
 300 |  
 301 |              # initialize or restore model
 302 |              self.init_op = tf.group(
 303 | -                tf.global_variables_initializer(),
 304 | -                tf.local_variables_initializer()
 305 | +                tf.compat.v1.global_variables_initializer(),
 306 | +                tf.compat.v1.local_variables_initializer()
 307 |              )
 308 |  
 309 |              # set the fetches for training
 310 | @@ -171,14 +171,14 @@ class TensorFlowArchitecture(Architecture):
 311 |          Create locks for synchronizing the different workers during training
 312 |          :return: None
 313 |          """
 314 | -        self.lock_counter = tf.get_variable("lock_counter", [], tf.int32,
 315 | -                                            initializer=tf.constant_initializer(0, dtype=tf.int32),
 316 | +        self.lock_counter = tf.compat.v1.get_variable("lock_counter", [], tf.int32,
 317 | +                                            initializer=tf.compat.v1.constant_initializer(0, dtype=tf.int32),
 318 |                                              trainable=False)
 319 |          self.lock = self.lock_counter.assign_add(1, use_locking=True)
 320 |          self.lock_init = self.lock_counter.assign(0)
 321 |  
 322 | -        self.release_counter = tf.get_variable("release_counter", [], tf.int32,
 323 | -                                               initializer=tf.constant_initializer(0, dtype=tf.int32),
 324 | +        self.release_counter = tf.compat.v1.get_variable("release_counter", [], tf.int32,
 325 | +                                               initializer=tf.compat.v1.constant_initializer(0, dtype=tf.int32),
 326 |                                                 trainable=False)
 327 |          self.release = self.release_counter.assign_add(1, use_locking=True)
 328 |          self.release_decrement = self.release_counter.assign_add(-1, use_locking=True)
 329 | @@ -191,7 +191,7 @@ class TensorFlowArchitecture(Architecture):
 330 |          """
 331 |  
 332 |          self.tensor_gradients = tf.gradients(self.total_loss, self.weights)
 333 | -        self.gradients_norm = tf.global_norm(self.tensor_gradients)
 334 | +        self.gradients_norm = tf.linalg.global_norm(self.tensor_gradients)
 335 |  
 336 |          # gradient clipping
 337 |          if self.network_parameters.clip_gradients is not None and self.network_parameters.clip_gradients != 0:
 338 | @@ -205,7 +205,7 @@ class TensorFlowArchitecture(Architecture):
 339 |          # gradients of the outputs w.r.t. the inputs
 340 |          self.gradients_wrt_inputs = [{name: tf.gradients(output, input_ph) for name, input_ph in
 341 |                                        self.inputs.items()} for output in self.outputs]
 342 | -        self.gradients_weights_ph = [tf.placeholder('float32', self.outputs[i].shape, 'output_gradient_weights')
 343 | +        self.gradients_weights_ph = [tf.compat.v1.placeholder('float32', self.outputs[i].shape, 'output_gradient_weights')
 344 |                                       for i in range(len(self.outputs))]
 345 |          self.weighted_gradients = []
 346 |          for i in range(len(self.outputs)):
 347 | @@ -270,7 +270,7 @@ class TensorFlowArchitecture(Architecture):
 348 |          elif self.network_is_trainable:
 349 |              # not any of the above but is trainable? -> create an operation for applying the gradients to
 350 |              # this network weights
 351 | -            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=self.full_name)
 352 | +            update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS, scope=self.full_name)
 353 |  
 354 |              with tf.control_dependencies(update_ops):
 355 |                  self.update_weights_from_batch_gradients = self.optimizer.apply_gradients(
 356 | @@ -288,10 +288,10 @@ class TensorFlowArchitecture(Architecture):
 357 |          if self.ap.visualization.tensorboard:
 358 |              # Write the merged summaries to the current experiment directory
 359 |              if not task_is_distributed:
 360 | -                self.train_writer = tf.summary.FileWriter(self.ap.task_parameters.experiment_path + '/tensorboard')
 361 | +                self.train_writer = tf.compat.v1.summary.FileWriter(self.ap.task_parameters.experiment_path + '/tensorboard')
 362 |                  self.train_writer.add_graph(self.sess.graph)
 363 |              elif self.network_is_local:
 364 | -                self.train_writer = tf.summary.FileWriter(self.ap.task_parameters.experiment_path +
 365 | +                self.train_writer = tf.compat.v1.summary.FileWriter(self.ap.task_parameters.experiment_path +
 366 |                                                            '/tensorboard/worker{}'.format(self.ap.task_parameters.task_index))
 367 |                  self.train_writer.add_graph(self.sess.graph)
 368 |  
 369 | diff --git a/rl_coach/architectures/tensorflow_components/distributed_tf_utils.py b/rl_coach/architectures/tensorflow_components/distributed_tf_utils.py
 370 | index bbbbc0f..39403a6 100644
 371 | --- a/rl_coach/architectures/tensorflow_components/distributed_tf_utils.py
 372 | +++ b/rl_coach/architectures/tensorflow_components/distributed_tf_utils.py
 373 | @@ -36,7 +36,7 @@ def create_cluster_spec(parameters_server: str, workers: str) -> tf.train.Cluste
 374 |      return cluster_spec
 375 |  
 376 |  
 377 | -def create_and_start_parameters_server(cluster_spec: tf.train.ClusterSpec, config: tf.ConfigProto=None) -> None:
 378 | +def create_and_start_parameters_server(cluster_spec: tf.train.ClusterSpec, config: tf.compat.v1.ConfigProto=None) -> None:
 379 |      """
 380 |      Create and start a parameter server
 381 |      :param cluster_spec: the ClusterSpec object representing the cluster
 382 | @@ -44,14 +44,14 @@ def create_and_start_parameters_server(cluster_spec: tf.train.ClusterSpec, confi
 383 |      :return: None
 384 |      """
 385 |      # create a server object for the parameter server
 386 | -    server = tf.train.Server(cluster_spec, job_name="ps", task_index=0, config=config)
 387 | +    server = tf.distribute.Server(cluster_spec, job_name="ps", task_index=0, config=config)
 388 |  
 389 |      # wait for the server to finish
 390 |      server.join()
 391 |  
 392 |  
 393 |  def create_worker_server_and_device(cluster_spec: tf.train.ClusterSpec, task_index: int,
 394 | -                                    use_cpu: bool=True, config: tf.ConfigProto=None) -> Tuple[str, tf.device]:
 395 | +                                    use_cpu: bool=True, config: tf.compat.v1.ConfigProto=None) -> Tuple[str, tf.device]:
 396 |      """
 397 |      Creates a worker server and a device setter used to assign the workers operations to
 398 |      :param cluster_spec: a ClusterSpec object representing the cluster
 399 | @@ -61,7 +61,7 @@ def create_worker_server_and_device(cluster_spec: tf.train.ClusterSpec, task_ind
 400 |      :return: the target string for the tf.Session and the worker device setter object
 401 |      """
 402 |      # Create and start a worker
 403 | -    server = tf.train.Server(cluster_spec, job_name="worker", task_index=task_index, config=config)
 404 | +    server = tf.distribute.Server(cluster_spec, job_name="worker", task_index=task_index, config=config)
 405 |  
 406 |      # Assign ops to the local worker
 407 |      worker_device = "/job:worker/task:{}".format(task_index)
 408 | @@ -69,13 +69,13 @@ def create_worker_server_and_device(cluster_spec: tf.train.ClusterSpec, task_ind
 409 |          worker_device += "/cpu:0"
 410 |      else:
 411 |          worker_device += "/device:GPU:0"
 412 | -    device = tf.train.replica_device_setter(worker_device=worker_device, cluster=cluster_spec)
 413 | +    device = tf.compat.v1.train.replica_device_setter(worker_device=worker_device, cluster=cluster_spec)
 414 |  
 415 |      return server.target, device
 416 |  
 417 |  
 418 | -def create_monitored_session(target: tf.train.Server, task_index: int,
 419 | -                             checkpoint_dir: str, checkpoint_save_secs: int, config: tf.ConfigProto=None) -> tf.Session:
 420 | +def create_monitored_session(target: tf.distribute.Server, task_index: int,
 421 | +                             checkpoint_dir: str, checkpoint_save_secs: int, config: tf.compat.v1.ConfigProto=None) -> tf.compat.v1.Session:
 422 |      """
 423 |      Create a monitored session for the worker
 424 |      :param target: the target string for the tf.Session
 425 | @@ -89,7 +89,7 @@ def create_monitored_session(target: tf.train.Server, task_index: int,
 426 |      is_chief = task_index == 0
 427 |  
 428 |      # Create the monitored session
 429 | -    sess = tf.train.MonitoredTrainingSession(
 430 | +    sess = tf.compat.v1.train.MonitoredTrainingSession(
 431 |          master=target,
 432 |          is_chief=is_chief,
 433 |          hooks=[],
 434 | diff --git a/rl_coach/architectures/tensorflow_components/embedders/embedder.py b/rl_coach/architectures/tensorflow_components/embedders/embedder.py
 435 | index 13544c9..97dca64 100644
 436 | --- a/rl_coach/architectures/tensorflow_components/embedders/embedder.py
 437 | +++ b/rl_coach/architectures/tensorflow_components/embedders/embedder.py
 438 | @@ -75,15 +75,15 @@ class InputEmbedder(object):
 439 |                                                                       activation_function=self.activation_function,
 440 |                                                                       dropout_rate=self.dropout_rate))
 441 |  
 442 | -    def __call__(self, prev_input_placeholder: tf.placeholder=None) -> Tuple[tf.Tensor, tf.Tensor]:
 443 | +    def __call__(self, prev_input_placeholder: tf.compat.v1.placeholder=None) -> Tuple[tf.Tensor, tf.Tensor]:
 444 |          """
 445 |          Wrapper for building the module graph including scoping and loss creation
 446 |          :param prev_input_placeholder: the input to the graph
 447 |          :return: the input placeholder and the output of the last layer
 448 |          """
 449 | -        with tf.variable_scope(self.get_name()):
 450 | +        with tf.compat.v1.variable_scope(self.get_name()):
 451 |              if prev_input_placeholder is None:
 452 | -                self.input = tf.placeholder("float", shape=[None] + self.input_size, name=self.get_name())
 453 | +                self.input = tf.compat.v1.placeholder("float", shape=[None] + self.input_size, name=self.get_name())
 454 |              else:
 455 |                  self.input = prev_input_placeholder
 456 |              self._build_module()
 457 | @@ -116,8 +116,8 @@ class InputEmbedder(object):
 458 |                               is_training=self.is_training)
 459 |              ))
 460 |  
 461 | -        self.output = tf.contrib.layers.flatten(self.layers[-1])
 462 | -
 463 | +        self.output = tf.keras.layers.Flatten()(self.layers[-1])
 464 | +        
 465 |      @property
 466 |      def input_size(self) -> List[int]:
 467 |          return self._input_size
 468 | diff --git a/rl_coach/architectures/tensorflow_components/general_network.py b/rl_coach/architectures/tensorflow_components/general_network.py
 469 | index 8821ac6..61b9472 100644
 470 | --- a/rl_coach/architectures/tensorflow_components/general_network.py
 471 | +++ b/rl_coach/architectures/tensorflow_components/general_network.py
 472 | @@ -32,7 +32,8 @@ from rl_coach.logger import screen
 473 |  from rl_coach.spaces import SpacesDefinition, PlanarMapsObservationSpace, TensorObservationSpace
 474 |  from rl_coach.utils import get_all_subclasses, dynamic_import_and_instantiate_module_from_params, indent_string
 475 |  
 476 | -
 477 | +tf.compat.v1.disable_resource_variables()
 478 | +tf.compat.v1.disable_eager_execution()
 479 |  class GeneralTensorFlowNetwork(TensorFlowArchitecture):
 480 |      """
 481 |      A generalized version of all possible networks implemented using tensorflow.
 482 | @@ -64,11 +65,11 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
 483 |          # variable_scope() call and also recover the name space using name_scope
 484 |          if variable_scope in GeneralTensorFlowNetwork.variable_scopes_dict:
 485 |              variable_scope = GeneralTensorFlowNetwork.variable_scopes_dict[variable_scope]
 486 | -            with tf.variable_scope(variable_scope, auxiliary_name_scope=False) as vs:
 487 | -                with tf.name_scope(vs.original_name_scope):
 488 | +            with tf.compat.v1.variable_scope(variable_scope, auxiliary_name_scope=False) as vs:
 489 | +                with tf.compat.v1.name_scope(vs.original_name_scope):
 490 |                      return construct_on_device()
 491 |          else:
 492 | -            with tf.variable_scope(variable_scope, auxiliary_name_scope=True) as vs:
 493 | +            with tf.compat.v1.variable_scope(variable_scope, auxiliary_name_scope=True) as vs:
 494 |                  # Add variable_scope object to dictionary for next call to construct
 495 |                  GeneralTensorFlowNetwork.variable_scopes_dict[variable_scope] = vs
 496 |                  return construct_on_device()
 497 | @@ -105,7 +106,7 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
 498 |          """
 499 |          self.global_network = global_network
 500 |          self.network_is_local = network_is_local
 501 | -        self.network_wrapper_name = name.split('/')[0]
 502 | +        self.network_wrapper_name = name.split('/')[1]
 503 |          self.network_parameters = agent_parameters.network_wrappers[self.network_wrapper_name]
 504 |          self.num_heads_per_network = 1 if self.network_parameters.use_separate_networks_per_head else \
 505 |              len(self.network_parameters.heads_parameters)
 506 | @@ -237,12 +238,12 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
 507 |              raise ValueError("Exactly one middleware type should be defined")
 508 |  
 509 |          # ops for defining the training / testing phase
 510 | -        self.is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
 511 | -        self.is_training_placeholder = tf.placeholder("bool")
 512 | -        self.assign_is_training = tf.assign(self.is_training, self.is_training_placeholder)
 513 | +        self.is_training = tf.Variable(False, trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES])
 514 | +        self.is_training_placeholder = tf.compat.v1.placeholder("bool")
 515 | +        self.assign_is_training = tf.compat.v1.assign(self.is_training, self.is_training_placeholder)
 516 |  
 517 |          for network_idx in range(self.num_networks):
 518 | -            with tf.variable_scope('network_{}'.format(network_idx)):
 519 | +            with tf.compat.v1.variable_scope('network_{}'.format(network_idx)):
 520 |  
 521 |                  ####################
 522 |                  # Input Embeddings #
 523 | @@ -310,12 +311,12 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
 524 |  
 525 |                          # rescale the gradients from the head
 526 |                          self.gradients_from_head_rescalers.append(
 527 | -                            tf.get_variable('gradients_from_head_{}-{}_rescalers'.format(head_idx, head_copy_idx),
 528 | +                            tf.compat.v1.get_variable('gradients_from_head_{}-{}_rescalers'.format(head_idx, head_copy_idx),
 529 |                                              initializer=float(head_params.rescale_gradient_from_head_by_factor),
 530 |                                              dtype=tf.float32))
 531 |  
 532 |                          self.gradients_from_head_rescalers_placeholders.append(
 533 | -                            tf.placeholder('float',
 534 | +                            tf.compat.v1.placeholder('float',
 535 |                                             name='gradients_from_head_{}-{}_rescalers'.format(head_type_idx, head_copy_idx)))
 536 |  
 537 |                          self.update_head_rescaler_value_ops.append(self.gradients_from_head_rescalers[head_count].assign(
 538 | @@ -343,13 +344,13 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
 539 |  
 540 |          # model weights
 541 |          if not self.distributed_training or self.network_is_global:
 542 | -            self.weights = [var for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.full_name) if
 543 | -                            'global_step' not in var.name]
 544 | +            self.weights = [var for var in tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope=self.full_name) if
 545 | +                            'global_step' not in var.name and 'Variable:0' not in var.name]
 546 |          else:
 547 | -            self.weights = [var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.full_name)]
 548 | +            self.weights = [var for var in tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=self.full_name)]
 549 |  
 550 |          # Losses
 551 | -        self.losses = tf.losses.get_losses(self.full_name)
 552 | +        self.losses = tf.compat.v1.losses.get_losses(self.full_name)
 553 |  
 554 |          # L2 regularization
 555 |          if self.network_parameters.l2_regularization != 0:
 556 | @@ -363,7 +364,7 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
 557 |          # Learning rate
 558 |          if self.network_parameters.learning_rate_decay_rate != 0:
 559 |              self.adaptive_learning_rate_scheme = \
 560 | -                tf.train.exponential_decay(
 561 | +                tf.compat.v1.train.exponential_decay(
 562 |                      self.network_parameters.learning_rate,
 563 |                      self.global_step,
 564 |                      decay_steps=self.network_parameters.learning_rate_decay_steps,
 565 | @@ -388,17 +389,14 @@ class GeneralTensorFlowNetwork(TensorFlowArchitecture):
 566 |              # -> create an optimizer
 567 |  
 568 |              if self.network_parameters.optimizer_type == 'Adam':
 569 | -                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.current_learning_rate,
 570 | +                self.optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=self.current_learning_rate,
 571 |                                                          beta1=self.network_parameters.adam_optimizer_beta1,
 572 |                                                          beta2=self.network_parameters.adam_optimizer_beta2,
 573 |                                                          epsilon=self.network_parameters.optimizer_epsilon)
 574 |              elif self.network_parameters.optimizer_type == 'RMSProp':
 575 | -                self.optimizer = tf.train.RMSPropOptimizer(self.current_learning_rate,
 576 | +                self.optimizer = tf.compat.v1.train.RMSPropOptimizer(self.current_learning_rate,
 577 |                                                             decay=self.network_parameters.rms_prop_optimizer_decay,
 578 |                                                             epsilon=self.network_parameters.optimizer_epsilon)
 579 | -            elif self.network_parameters.optimizer_type == 'LBFGS':
 580 | -                self.optimizer = tf.contrib.opt.ScipyOptimizerInterface(self.total_loss, method='L-BFGS-B',
 581 | -                                                                        options={'maxiter': 25})
 582 |              else:
 583 |                  raise Exception("{} is not a valid optimizer type".format(self.network_parameters.optimizer_type))
 584 |  
 585 | diff --git a/rl_coach/architectures/tensorflow_components/heads/acer_policy_head.py b/rl_coach/architectures/tensorflow_components/heads/acer_policy_head.py
 586 | index d31fa3d..eebfa3f 100644
 587 | --- a/rl_coach/architectures/tensorflow_components/heads/acer_policy_head.py
 588 | +++ b/rl_coach/architectures/tensorflow_components/heads/acer_policy_head.py
 589 | @@ -40,9 +40,9 @@ class ACERPolicyHead(Head):
 590 |          if hasattr(agent_parameters.algorithm, 'beta_entropy'):
 591 |              # we set the beta value as a tf variable so it can be updated later if needed
 592 |              self.beta = tf.Variable(float(agent_parameters.algorithm.beta_entropy),
 593 | -                                    trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
 594 | -            self.beta_placeholder = tf.placeholder('float')
 595 | -            self.set_beta = tf.assign(self.beta, self.beta_placeholder)
 596 | +                                    trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES])
 597 | +            self.beta_placeholder = tf.compat.v1.placeholder('float')
 598 | +            self.set_beta = tf.compat.v1.assign(self.beta, self.beta_placeholder)
 599 |  
 600 |      def _build_module(self, input_layer):
 601 |          if isinstance(self.spaces.action, DiscreteActionSpace):
 602 | @@ -58,18 +58,18 @@ class ACERPolicyHead(Head):
 603 |                  self.regularizations += [-tf.multiply(self.beta, self.entropy, name='entropy_regularization')]
 604 |  
 605 |              # Truncated importance sampling with bias corrections
 606 | -            importance_sampling_weight = tf.placeholder(tf.float32, [None, self.num_actions],
 607 | +            importance_sampling_weight = tf.compat.v1.placeholder(tf.float32, [None, self.num_actions],
 608 |                                                          name='{}_importance_sampling_weight'.format(self.get_name()))
 609 |              self.input.append(importance_sampling_weight)
 610 | -            importance_sampling_weight_i = tf.placeholder(tf.float32, [None],
 611 | +            importance_sampling_weight_i = tf.compat.v1.placeholder(tf.float32, [None],
 612 |                                                            name='{}_importance_sampling_weight_i'.format(self.get_name()))
 613 |              self.input.append(importance_sampling_weight_i)
 614 |  
 615 | -            V_values = tf.placeholder(tf.float32, [None], name='{}_V_values'.format(self.get_name()))
 616 | +            V_values = tf.compat.v1.placeholder(tf.float32, [None], name='{}_V_values'.format(self.get_name()))
 617 |              self.target.append(V_values)
 618 | -            Q_values = tf.placeholder(tf.float32, [None, self.num_actions], name='{}_Q_values'.format(self.get_name()))
 619 | +            Q_values = tf.compat.v1.placeholder(tf.float32, [None, self.num_actions], name='{}_Q_values'.format(self.get_name()))
 620 |              self.input.append(Q_values)
 621 | -            Q_retrace = tf.placeholder(tf.float32, [None], name='{}_Q_retrace'.format(self.get_name()))
 622 | +            Q_retrace = tf.compat.v1.placeholder(tf.float32, [None], name='{}_Q_retrace'.format(self.get_name()))
 623 |              self.input.append(Q_retrace)
 624 |  
 625 |              action_log_probs_wrt_policy = self.policy_distribution.log_prob(self.actions)
 626 | @@ -78,7 +78,7 @@ class ACERPolicyHead(Head):
 627 |                                                      * tf.minimum(self.ap.algorithm.importance_weight_truncation,
 628 |                                                                   importance_sampling_weight_i))
 629 |  
 630 | -            log_probs_wrt_policy = tf.log(self.policy_probs + eps)
 631 | +            log_probs_wrt_policy = tf.math.log(self.policy_probs + eps)
 632 |              bias_correction_gain = tf.reduce_sum(log_probs_wrt_policy
 633 |                                                   * (Q_values - tf.expand_dims(V_values, 1))
 634 |                                                   * tf.nn.relu(1.0 - (self.ap.algorithm.importance_weight_truncation
 635 | @@ -88,15 +88,15 @@ class ACERPolicyHead(Head):
 636 |              self.bias_correction_loss = -tf.reduce_mean(bias_correction_gain)
 637 |  
 638 |              self.loss = self.probability_loss + self.bias_correction_loss
 639 | -            tf.losses.add_loss(self.loss)
 640 | +            tf.compat.v1.losses.add_loss(self.loss)
 641 |  
 642 |              # Trust region
 643 | -            batch_size = tf.to_float(tf.shape(input_layer)[0])
 644 | -            average_policy = tf.placeholder(tf.float32, [None, self.num_actions],
 645 | +            batch_size = tf.cast(tf.shape(input_layer)[0], dtype=tf.float32)
 646 | +            average_policy = tf.compat.v1.placeholder(tf.float32, [None, self.num_actions],
 647 |                                              name='{}_average_policy'.format(self.get_name()))
 648 |              self.input.append(average_policy)
 649 | -            average_policy_distribution = tf.contrib.distributions.Categorical(probs=(average_policy + eps))
 650 | -            self.kl_divergence = tf.reduce_mean(tf.distributions.kl_divergence(average_policy_distribution,
 651 | +            average_policy_distribution = tf.compat.v1.distributions.Categorical(probs=(average_policy + eps))
 652 | +            self.kl_divergence = tf.reduce_mean(tf.compat.v1.distributions.kl_divergence(average_policy_distribution,
 653 |                                                                                 self.policy_distribution))
 654 |              if self.ap.algorithm.use_trust_region_optimization:
 655 |                  @tf.custom_gradient
 656 | @@ -114,12 +114,12 @@ class ACERPolicyHead(Head):
 657 |  
 658 |      def _build_discrete_net(self, input_layer, action_space):
 659 |          self.num_actions = len(action_space.actions)
 660 | -        self.actions = tf.placeholder(tf.int32, [None], name='{}_actions'.format(self.get_name()))
 661 | +        self.actions = tf.compat.v1.placeholder(tf.int32, [None], name='{}_actions'.format(self.get_name()))
 662 |          self.input.append(self.actions)
 663 |  
 664 |          policy_values = self.dense_layer(self.num_actions)(input_layer, name='fc')
 665 |          self.policy_probs = tf.nn.softmax(policy_values, name='{}_policy'.format(self.get_name()))
 666 |  
 667 |          # (the + eps is to prevent probability 0 which will cause the log later on to be -inf)
 668 | -        self.policy_distribution = tf.contrib.distributions.Categorical(probs=(self.policy_probs + eps))
 669 | +        self.policy_distribution = tf.compat.v1.distributions.Categorical(probs=(self.policy_probs + eps))
 670 |          self.output = self.policy_probs
 671 | diff --git a/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py b/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py
 672 | index b573fe5..ee44176 100644
 673 | --- a/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py
 674 | +++ b/rl_coach/architectures/tensorflow_components/heads/categorical_q_head.py
 675 | @@ -45,11 +45,11 @@ class CategoricalQHead(QHead):
 676 |          self.output = tf.nn.softmax(values_distribution)
 677 |  
 678 |          # calculate cross entropy loss
 679 | -        self.distributions = tf.placeholder(tf.float32, shape=(None, self.num_actions, self.num_atoms),
 680 | +        self.distributions = tf.compat.v1.placeholder(tf.float32, shape=(None, self.num_actions, self.num_atoms),
 681 |                                              name="distributions")
 682 |          self.target = self.distributions
 683 | -        self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=values_distribution)
 684 | -        tf.losses.add_loss(self.loss)
 685 | +        self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=tf.stop_gradient(self.target), logits=values_distribution)
 686 | +        tf.compat.v1.losses.add_loss(self.loss)
 687 |  
 688 |          self.q_values = tf.tensordot(tf.cast(self.output, tf.float64), self.z_values, 1)
 689 |  
 690 | diff --git a/rl_coach/architectures/tensorflow_components/heads/cil_head.py b/rl_coach/architectures/tensorflow_components/heads/cil_head.py
 691 | index f3ae003..25a8ae6 100644
 692 | --- a/rl_coach/architectures/tensorflow_components/heads/cil_head.py
 693 | +++ b/rl_coach/architectures/tensorflow_components/heads/cil_head.py
 694 | @@ -39,9 +39,9 @@ class RegressionHead(Head):
 695 |              self.num_actions = len(self.spaces.action.actions)
 696 |          self.return_type = QActionStateValue
 697 |          if agent_parameters.network_wrappers[self.network_name].replace_mse_with_huber_loss:
 698 | -            self.loss_type = tf.losses.huber_loss
 699 | +            self.loss_type = tf.compat.v1.losses.huber_loss
 700 |          else:
 701 | -            self.loss_type = tf.losses.mean_squared_error
 702 | +            self.loss_type = tf.compat.v1.losses.mean_squared_error
 703 |          self.output_bias_initializer = output_bias_initializer
 704 |  
 705 |      def _build_module(self, input_layer):
 706 | diff --git a/rl_coach/architectures/tensorflow_components/heads/classification_head.py b/rl_coach/architectures/tensorflow_components/heads/classification_head.py
 707 | index 6f6af7c..5a9da70 100644
 708 | --- a/rl_coach/architectures/tensorflow_components/heads/classification_head.py
 709 | +++ b/rl_coach/architectures/tensorflow_components/heads/classification_head.py
 710 | @@ -47,9 +47,9 @@ class ClassificationHead(Head):
 711 |          self.output = tf.nn.softmax(self.class_values)
 712 |  
 713 |          # calculate cross entropy loss
 714 | -        self.target = tf.placeholder(tf.float32, shape=(None, self.num_actions), name="target")
 715 | -        self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=self.class_values)
 716 | -        tf.losses.add_loss(self.loss)
 717 | +        self.target = tf.compat.v1.placeholder(tf.float32, shape=(None, self.num_actions), name="target")
 718 | +        self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=tf.stop_gradient(self.target), logits=self.class_values)
 719 | +        tf.compat.v1.losses.add_loss(self.loss)
 720 |  
 721 |      def __str__(self):
 722 |          result = [
 723 | diff --git a/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py b/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py
 724 | index 6462f83..1c43988 100644
 725 | --- a/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py
 726 | +++ b/rl_coach/architectures/tensorflow_components/heads/dnd_q_head.py
 727 | @@ -65,11 +65,11 @@ class DNDQHead(QHead):
 728 |          self.softmax = self.add_softmax_with_temperature()
 729 |  
 730 |      def _q_value(self, input_layer, action):
 731 | -        result = tf.py_func(self.DND.query,
 732 | +        result = tf.compat.v1.py_func(self.DND.query,
 733 |                              [input_layer, action, self.number_of_nn],
 734 |                              [tf.float64, tf.float64, tf.int64])
 735 | -        self.dnd_embeddings[action] = tf.to_float(result[0])
 736 | -        self.dnd_values[action] = tf.to_float(result[1])
 737 | +        self.dnd_embeddings[action] = tf.cast(result[0], dtype=tf.float32)
 738 | +        self.dnd_values[action] = tf.cast(result[1], dtype=tf.float32)
 739 |          self.dnd_indices[action] = result[2]
 740 |  
 741 |          # DND calculation
 742 | @@ -77,7 +77,7 @@ class DNDQHead(QHead):
 743 |          distances = tf.reduce_sum(square_diff, axis=2) + [self.l2_norm_added_delta]
 744 |          self.dnd_distances[action] = distances
 745 |          weights = 1.0 / distances
 746 | -        normalised_weights = weights / tf.reduce_sum(weights, axis=1, keep_dims=True)
 747 | +        normalised_weights = weights / tf.reduce_sum(weights, axis=1, keepdims=True)
 748 |          q_value = tf.reduce_sum(self.dnd_values[action] * normalised_weights, axis=1)
 749 |          q_value.set_shape((None,))
 750 |          return q_value
 751 | diff --git a/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py b/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py
 752 | index 92692ab..d612256 100644
 753 | --- a/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py
 754 | +++ b/rl_coach/architectures/tensorflow_components/heads/dueling_q_head.py
 755 | @@ -32,12 +32,12 @@ class DuelingQHead(QHead):
 756 |  
 757 |      def _build_module(self, input_layer):
 758 |          # state value tower - V
 759 | -        with tf.variable_scope("state_value"):
 760 | +        with tf.compat.v1.variable_scope("state_value"):
 761 |              self.state_value = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1')
 762 |              self.state_value = self.dense_layer(1)(self.state_value, name='fc2')
 763 |  
 764 |          # action advantage tower - A
 765 | -        with tf.variable_scope("action_advantage"):
 766 | +        with tf.compat.v1.variable_scope("action_advantage"):
 767 |              self.action_advantage = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1')
 768 |              self.action_advantage = self.dense_layer(self.num_actions)(self.action_advantage, name='fc2')
 769 |              self.action_mean = tf.reduce_mean(self.action_advantage, axis=1, keepdims=True)
 770 | diff --git a/rl_coach/architectures/tensorflow_components/heads/head.py b/rl_coach/architectures/tensorflow_components/heads/head.py
 771 | index e971889..82fbabb 100644
 772 | --- a/rl_coach/architectures/tensorflow_components/heads/head.py
 773 | +++ b/rl_coach/architectures/tensorflow_components/heads/head.py
 774 | @@ -50,7 +50,7 @@ class Head(object):
 775 |          self.loss_type = []
 776 |          self.regularizations = []
 777 |          self.loss_weight = tf.Variable([float(w) for w in force_list(loss_weight)],
 778 | -                                       trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
 779 | +                                       trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES])
 780 |          self.target = []
 781 |          self.importance_weight = []
 782 |          self.input = []
 783 | @@ -73,7 +73,7 @@ class Head(object):
 784 |          :return: the output of the last layer and the target placeholder
 785 |          """
 786 |  
 787 | -        with tf.variable_scope(self.get_name(), initializer=tf.contrib.layers.xavier_initializer()):
 788 | +        with tf.compat.v1.variable_scope(self.get_name(), initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform")):
 789 |              self._build_module(squeeze_tensor(input_layer))
 790 |  
 791 |              self.output = force_list(self.output)
 792 | @@ -126,7 +126,7 @@ class Head(object):
 793 |  
 794 |          # there are heads that define the loss internally, but we need to create additional placeholders for them
 795 |          for idx in range(len(self.loss)):
 796 | -            importance_weight = tf.placeholder('float',
 797 | +            importance_weight = tf.compat.v1.placeholder('float',
 798 |                                                 [None] + [1] * (len(self.target[idx].shape) - 1),
 799 |                                                 '{}_importance_weight'.format(self.get_name()))
 800 |              self.importance_weight.append(importance_weight)
 801 | @@ -134,12 +134,12 @@ class Head(object):
 802 |          # add losses and target placeholder
 803 |          for idx in range(len(self.loss_type)):
 804 |              # create target placeholder
 805 | -            target = tf.placeholder('float', self.output[idx].shape, '{}_target'.format(self.get_name()))
 806 | +            target = tf.compat.v1.placeholder('float', self.output[idx].shape, '{}_target'.format(self.get_name()))
 807 |              self.target.append(target)
 808 |  
 809 |              # create importance sampling weights placeholder
 810 |              num_target_dims = len(self.target[idx].shape)
 811 | -            importance_weight = tf.placeholder('float', [None] + [1] * (num_target_dims - 1),
 812 | +            importance_weight = tf.compat.v1.placeholder('float', [None] + [1] * (num_target_dims - 1),
 813 |                                                 '{}_importance_weight'.format(self.get_name()))
 814 |              self.importance_weight.append(importance_weight)
 815 |  
 816 | @@ -153,13 +153,13 @@ class Head(object):
 817 |              loss = tf.reduce_mean(loss_weight*tf.reduce_sum(loss, axis=list(range(1, num_target_dims))))
 818 |  
 819 |              # we add the loss to the losses collection and later we will extract it in general_network
 820 | -            tf.losses.add_loss(loss)
 821 | +            tf.compat.v1.losses.add_loss(loss)
 822 |              self.loss.append(loss)
 823 |  
 824 |          # add regularizations
 825 |          for regularization in self.regularizations:
 826 |              self.loss.append(regularization)
 827 | -            tf.losses.add_loss(regularization)
 828 | +            tf.compat.v1.losses.add_loss(regularization)
 829 |  
 830 |      @classmethod
 831 |      def path(cls):
 832 | diff --git a/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py b/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py
 833 | index 647abc3..2d95d25 100644
 834 | --- a/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py
 835 | +++ b/rl_coach/architectures/tensorflow_components/heads/measurements_prediction_head.py
 836 | @@ -39,27 +39,27 @@ class MeasurementsPredictionHead(Head):
 837 |      def _build_module(self, input_layer):
 838 |          # This is almost exactly the same as Dueling Network but we predict the future measurements for each action
 839 |          # actions expectation tower (expectation stream) - E
 840 | -        with tf.variable_scope("expectation_stream"):
 841 | +        with tf.compat.v1.variable_scope("expectation_stream"):
 842 |              expectation_stream = self.dense_layer(256)(input_layer, activation=self.activation_function, name='fc1')
 843 |              expectation_stream = self.dense_layer(self.multi_step_measurements_size)(expectation_stream, name='output')
 844 |              expectation_stream = tf.expand_dims(expectation_stream, axis=1)
 845 |  
 846 |          # action fine differences tower (action stream) - A
 847 | -        with tf.variable_scope("action_stream"):
 848 | +        with tf.compat.v1.variable_scope("action_stream"):
 849 |              action_stream = self.dense_layer(256)(input_layer, activation=self.activation_function, name='fc1')
 850 |              action_stream = self.dense_layer(self.num_actions * self.multi_step_measurements_size)(action_stream,
 851 |                                                                                                     name='output')
 852 |              action_stream = tf.reshape(action_stream,
 853 |                                         (tf.shape(action_stream)[0], self.num_actions, self.multi_step_measurements_size))
 854 | -            action_stream = action_stream - tf.reduce_mean(action_stream, reduction_indices=1, keepdims=True)
 855 | +            action_stream = action_stream - tf.reduce_mean(action_stream, axis=1, keepdims=True)
 856 |  
 857 |          # merge to future measurements predictions
 858 |          self.output = tf.add(expectation_stream, action_stream, name='output')
 859 | -        self.target = tf.placeholder(tf.float32, [None, self.num_actions, self.multi_step_measurements_size],
 860 | +        self.target = tf.compat.v1.placeholder(tf.float32, [None, self.num_actions, self.multi_step_measurements_size],
 861 |                                       name="targets")
 862 | -        targets_nonan = tf.where(tf.is_nan(self.target), self.output, self.target)
 863 | -        self.loss = tf.reduce_sum(tf.reduce_mean(tf.square(targets_nonan - self.output), reduction_indices=0))
 864 | -        tf.losses.add_loss(self.loss_weight[0] * self.loss)
 865 | +        targets_nonan = tf.compat.v1.where(tf.math.is_nan(self.target), self.output, self.target)
 866 | +        self.loss = tf.reduce_sum(tf.reduce_mean(tf.square(targets_nonan - self.output), axis=0))
 867 | +        tf.compat.v1.losses.add_loss(self.loss_weight[0] * self.loss)
 868 |  
 869 |      def __str__(self):
 870 |          result = [
 871 | diff --git a/rl_coach/architectures/tensorflow_components/heads/naf_head.py b/rl_coach/architectures/tensorflow_components/heads/naf_head.py
 872 | index 9071fed..2bcb724 100644
 873 | --- a/rl_coach/architectures/tensorflow_components/heads/naf_head.py
 874 | +++ b/rl_coach/architectures/tensorflow_components/heads/naf_head.py
 875 | @@ -38,13 +38,13 @@ class NAFHead(Head):
 876 |          self.output_scale = self.spaces.action.max_abs_range
 877 |          self.return_type = QActionStateValue
 878 |          if agent_parameters.network_wrappers[self.network_name].replace_mse_with_huber_loss:
 879 | -            self.loss_type = tf.losses.huber_loss
 880 | +            self.loss_type = tf.compat.v1.losses.huber_loss
 881 |          else:
 882 | -            self.loss_type = tf.losses.mean_squared_error
 883 | +            self.loss_type = tf.compat.v1.losses.mean_squared_error
 884 |  
 885 |      def _build_module(self, input_layer):
 886 |          # NAF
 887 | -        self.action = tf.placeholder(tf.float32, [None, self.num_actions], name="action")
 888 | +        self.action = tf.compat.v1.placeholder(tf.float32, [None, self.num_actions], name="action")
 889 |          self.input = self.action
 890 |  
 891 |          # V Head
 892 | diff --git a/rl_coach/architectures/tensorflow_components/heads/policy_head.py b/rl_coach/architectures/tensorflow_components/heads/policy_head.py
 893 | index 540bd1a..792d52f 100644
 894 | --- a/rl_coach/architectures/tensorflow_components/heads/policy_head.py
 895 | +++ b/rl_coach/architectures/tensorflow_components/heads/policy_head.py
 896 | @@ -16,6 +16,7 @@
 897 |  
 898 |  import numpy as np
 899 |  import tensorflow as tf
 900 | +import tensorflow_probability as tfp
 901 |  
 902 |  from rl_coach.architectures.tensorflow_components.layers import Dense
 903 |  from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer
 904 | @@ -44,9 +45,9 @@ class PolicyHead(Head):
 905 |          if hasattr(agent_parameters.algorithm, 'beta_entropy'):
 906 |              # we set the beta value as a tf variable so it can be updated later if needed
 907 |              self.beta = tf.Variable(float(agent_parameters.algorithm.beta_entropy),
 908 | -                                    trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
 909 | -            self.beta_placeholder = tf.placeholder('float')
 910 | -            self.set_beta = tf.assign(self.beta, self.beta_placeholder)
 911 | +                                    trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES])
 912 | +            self.beta_placeholder = tf.compat.v1.placeholder('float')
 913 | +            self.set_beta = tf.compat.v1.assign(self.beta, self.beta_placeholder)
 914 |  
 915 |          # a scalar weight that penalizes high activation values (before the activation function) for the final layer
 916 |          if hasattr(agent_parameters.algorithm, 'action_penalty'):
 917 | @@ -64,7 +65,7 @@ class PolicyHead(Head):
 918 |  
 919 |          # create a compound action network
 920 |          for action_space_idx, action_space in enumerate(action_spaces):
 921 | -            with tf.variable_scope("sub_action_{}".format(action_space_idx)):
 922 | +            with tf.compat.v1.variable_scope("sub_action_{}".format(action_space_idx)):
 923 |                  if isinstance(action_space, DiscreteActionSpace):
 924 |                      # create a discrete action network (softmax probabilities output)
 925 |                      self._build_discrete_net(input_layer, action_space)
 926 | @@ -81,27 +82,27 @@ class PolicyHead(Head):
 927 |              # calculate loss
 928 |              self.action_log_probs_wrt_policy = \
 929 |                  tf.add_n([dist.log_prob(action) for dist, action in zip(self.policy_distributions, self.actions)])
 930 | -            self.advantages = tf.placeholder(tf.float32, [None], name="advantages")
 931 | +            self.advantages = tf.compat.v1.placeholder(tf.float32, [None], name="advantages")
 932 |              self.target = self.advantages
 933 |              self.loss = -tf.reduce_mean(self.action_log_probs_wrt_policy * self.advantages)
 934 | -            tf.losses.add_loss(self.loss_weight[0] * self.loss)
 935 | +            tf.compat.v1.losses.add_loss(self.loss_weight[0] * self.loss)
 936 |  
 937 |      def _build_discrete_net(self, input_layer, action_space):
 938 |          num_actions = len(action_space.actions)
 939 | -        self.actions.append(tf.placeholder(tf.int32, [None], name="actions"))
 940 | +        self.actions.append(tf.compat.v1.placeholder(tf.int32, [None], name="actions"))
 941 |  
 942 |          policy_values = self.dense_layer(num_actions)(input_layer, name='fc')
 943 |          self.policy_probs = tf.nn.softmax(policy_values, name="policy")
 944 |  
 945 |          # define the distributions for the policy and the old policy
 946 |          # (the + eps is to prevent probability 0 which will cause the log later on to be -inf)
 947 | -        policy_distribution = tf.contrib.distributions.Categorical(probs=(self.policy_probs + eps))
 948 | +        policy_distribution = tf.compat.v1.distributions.Categorical(probs=(self.policy_probs + eps))
 949 |          self.policy_distributions.append(policy_distribution)
 950 |          self.output.append(self.policy_probs)
 951 |  
 952 |      def _build_continuous_net(self, input_layer, action_space):
 953 |          num_actions = action_space.shape
 954 | -        self.actions.append(tf.placeholder(tf.float32, [None, num_actions], name="actions"))
 955 | +        self.actions.append(tf.compat.v1.placeholder(tf.float32, [None, num_actions], name="actions"))
 956 |  
 957 |          # output activation function
 958 |          if np.all(action_space.max_abs_range < np.inf):
 959 | @@ -135,14 +136,14 @@ class PolicyHead(Head):
 960 |              # it as not trainable puts it for some reason in the global variables collections. If this is not done,
 961 |              # the variable won't be initialized and when working with multiple workers they will get stuck.
 962 |              self.policy_std = tf.Variable(np.ones(num_actions), dtype='float32', trainable=False,
 963 | -                                          name='policy_stdev', collections=[tf.GraphKeys.LOCAL_VARIABLES])
 964 | +                                          name='policy_stdev', collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES])
 965 |  
 966 |              # assign op for the policy std
 967 | -            self.policy_std_placeholder = tf.placeholder('float32', (num_actions,))
 968 | -            self.assign_policy_std = tf.assign(self.policy_std, self.policy_std_placeholder)
 969 | +            self.policy_std_placeholder = tf.compat.v1.placeholder('float32', (num_actions,))
 970 | +            self.assign_policy_std = tf.compat.v1.assign(self.policy_std, self.policy_std_placeholder)
 971 |  
 972 |          # define the distributions for the policy and the old policy
 973 | -        policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.policy_mean, self.policy_std)
 974 | +        policy_distribution = tfp.distributions.MultivariateNormalDiag(self.policy_mean, self.policy_std)
 975 |          self.policy_distributions.append(policy_distribution)
 976 |  
 977 |          if self.is_local:
 978 | diff --git a/rl_coach/architectures/tensorflow_components/heads/ppo_head.py b/rl_coach/architectures/tensorflow_components/heads/ppo_head.py
 979 | index 63f95a3..1d17a9d 100644
 980 | --- a/rl_coach/architectures/tensorflow_components/heads/ppo_head.py
 981 | +++ b/rl_coach/architectures/tensorflow_components/heads/ppo_head.py
 982 | @@ -16,6 +16,7 @@
 983 |  
 984 |  import numpy as np
 985 |  import tensorflow as tf
 986 | +import tensorflow_probability as tfp
 987 |  
 988 |  from rl_coach.architectures.tensorflow_components.layers import Dense
 989 |  from rl_coach.architectures.tensorflow_components.heads.head import Head, normalized_columns_initializer
 990 | @@ -25,6 +26,11 @@ from rl_coach.spaces import BoxActionSpace, DiscreteActionSpace
 991 |  from rl_coach.spaces import SpacesDefinition
 992 |  from rl_coach.utils import eps
 993 |  
 994 | +# Since we are using log prob it is possible to encounter a 0 log 0 condition
 995 | +# which will tank the training by producing NaN's therefore it is necessary
 996 | +# to add a zero offset to all networks with discreete distributions to prevent
 997 | +# this isssue
 998 | +ZERO_OFFSET = 1e-8
 999 |  
1000 |  class PPOHead(Head):
1001 |      def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition, network_name: str,
1002 | @@ -41,8 +47,8 @@ class PPOHead(Head):
1003 |              # kl coefficient and its corresponding assignment operation and placeholder
1004 |              self.kl_coefficient = tf.Variable(agent_parameters.algorithm.initial_kl_coefficient,
1005 |                                                trainable=False, name='kl_coefficient')
1006 | -            self.kl_coefficient_ph = tf.placeholder('float', name='kl_coefficient_ph')
1007 | -            self.assign_kl_coefficient = tf.assign(self.kl_coefficient, self.kl_coefficient_ph)
1008 | +            self.kl_coefficient_ph = tf.compat.v1.placeholder('float', name='kl_coefficient_ph')
1009 | +            self.assign_kl_coefficient = tf.compat.v1.assign(self.kl_coefficient, self.kl_coefficient_ph)
1010 |              self.kl_cutoff = 2 * agent_parameters.algorithm.target_kl_divergence
1011 |              self.high_kl_penalty_coefficient = agent_parameters.algorithm.high_kl_penalty_coefficient
1012 |  
1013 | @@ -63,7 +69,11 @@ class PPOHead(Head):
1014 |  
1015 |          # Used by regular PPO only
1016 |          # add kl divergence regularization
1017 | -        self.kl_divergence = tf.reduce_mean(tf.distributions.kl_divergence(self.old_policy_distribution, self.policy_distribution))
1018 | +        if isinstance(self.spaces.action, DiscreteActionSpace):
1019 | +            self.kl_divergence = tf.reduce_mean(tf.compat.v1.distributions.kl_divergence(self.old_policy_distribution, self.policy_distribution))
1020 | +        else:
1021 | +            self.kl_divergence = tf.reduce_mean(tfp.distributions.kl_divergence(self.old_policy_distribution, self.policy_distribution))
1022 | +
1023 |  
1024 |          if self.use_kl_regularization:
1025 |              # no clipping => use kl regularization
1026 | @@ -72,12 +82,12 @@ class PPOHead(Head):
1027 |                                                  tf.square(tf.maximum(0.0, self.kl_divergence - self.kl_cutoff))]
1028 |  
1029 |          # calculate surrogate loss
1030 | -        self.advantages = tf.placeholder(tf.float32, [None], name="advantages")
1031 | +        self.advantages = tf.compat.v1.placeholder(tf.float32, [None], name="advantages")
1032 |          self.target = self.advantages
1033 |          # action_probs_wrt_old_policy != 0 because it is e^...
1034 |          self.likelihood_ratio = tf.exp(self.action_probs_wrt_policy - self.action_probs_wrt_old_policy)
1035 |          if self.clip_likelihood_ratio_using_epsilon is not None:
1036 | -            self.clip_param_rescaler = tf.placeholder(tf.float32, ())
1037 | +            self.clip_param_rescaler = tf.compat.v1.placeholder(tf.float32, ())
1038 |              self.input.append(self.clip_param_rescaler)
1039 |              max_value = 1 + self.clip_likelihood_ratio_using_epsilon * self.clip_param_rescaler
1040 |              min_value = 1 - self.clip_likelihood_ratio_using_epsilon * self.clip_param_rescaler
1041 | @@ -95,51 +105,51 @@ class PPOHead(Head):
1042 |                  self.regularizations += [-tf.multiply(self.beta, self.entropy, name='entropy_regularization')]
1043 |  
1044 |          self.loss = self.surrogate_loss
1045 | -        tf.losses.add_loss(self.loss)
1046 | +        tf.compat.v1.losses.add_loss(self.loss)
1047 |  
1048 |      def _build_discrete_net(self, input_layer, action_space):
1049 |          num_actions = len(action_space.actions)
1050 | -        self.actions = tf.placeholder(tf.int32, [None], name="actions")
1051 | +        self.actions = tf.compat.v1.placeholder(tf.int32, [None], name="actions")
1052 |  
1053 | -        self.old_policy_mean = tf.placeholder(tf.float32, [None, num_actions], "old_policy_mean")
1054 | -        self.old_policy_std = tf.placeholder(tf.float32, [None, num_actions], "old_policy_std")
1055 | +        self.old_policy_mean = tf.compat.v1.placeholder(tf.float32, [None, num_actions], "old_policy_mean")
1056 | +        self.old_policy_std = tf.compat.v1.placeholder(tf.float32, [None, num_actions], "old_policy_std")
1057 |  
1058 |          # Policy Head
1059 |          self.input = [self.actions, self.old_policy_mean]
1060 |          policy_values = self.dense_layer(num_actions)(input_layer, name='policy_fc')
1061 | -        self.policy_mean = tf.nn.softmax(policy_values, name="policy")
1062 | +        # Prevent distributions with 0 values
1063 | +        self.policy_mean = tf.maximum(tf.nn.softmax(policy_values, name="policy"), ZERO_OFFSET)
1064 |  
1065 |          # define the distributions for the policy and the old policy
1066 | -        self.policy_distribution = tf.contrib.distributions.Categorical(probs=self.policy_mean)
1067 | -        self.old_policy_distribution = tf.contrib.distributions.Categorical(probs=self.old_policy_mean)
1068 | +        self.policy_distribution = tf.compat.v1.distributions.Categorical(probs=self.policy_mean)
1069 | +        self.old_policy_distribution = tf.compat.v1.distributions.Categorical(probs=self.old_policy_mean)
1070 |  
1071 |          self.output = self.policy_mean
1072 |  
1073 |      def _build_continuous_net(self, input_layer, action_space):
1074 |          num_actions = action_space.shape[0]
1075 | -        self.actions = tf.placeholder(tf.float32, [None, num_actions], name="actions")
1076 | +        self.actions = tf.compat.v1.placeholder(tf.float32, [None, num_actions], name="actions")
1077 |  
1078 | -        self.old_policy_mean = tf.placeholder(tf.float32, [None, num_actions], "old_policy_mean")
1079 | -        self.old_policy_std = tf.placeholder(tf.float32, [None, num_actions], "old_policy_std")
1080 | +        self.old_policy_mean = tf.compat.v1.placeholder(tf.float32, [None, num_actions], "old_policy_mean")
1081 | +        self.old_policy_std = tf.compat.v1.placeholder(tf.float32, [None, num_actions], "old_policy_std")
1082 |  
1083 |          self.input = [self.actions, self.old_policy_mean, self.old_policy_std]
1084 | -        self.policy_mean = self.dense_layer(num_actions)(input_layer, name='policy_mean',
1085 | -                                           kernel_initializer=normalized_columns_initializer(0.01))
1086 | -
1087 | +        self.policy_mean = tf.identity(self.dense_layer(num_actions)(input_layer, name='policy_mean',
1088 | +                                           kernel_initializer=normalized_columns_initializer(0.01)), name="policy")
1089 |          # for local networks in distributed settings, we need to move variables we create manually to the
1090 |          # tf.GraphKeys.LOCAL_VARIABLES collection, since the variable scope custom getter which is set in
1091 |          # Architecture does not apply to them
1092 |          if self.is_local and isinstance(self.ap.task_parameters, DistributedTaskParameters):
1093 |              self.policy_logstd = tf.Variable(np.zeros((1, num_actions)), dtype='float32',
1094 | -                                             collections=[tf.GraphKeys.LOCAL_VARIABLES], name="policy_log_std")
1095 | +                                             collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES], name="policy_log_std")
1096 |          else:
1097 |              self.policy_logstd = tf.Variable(np.zeros((1, num_actions)), dtype='float32', name="policy_log_std")
1098 |  
1099 | -        self.policy_std = tf.tile(tf.exp(self.policy_logstd), [tf.shape(input_layer)[0], 1], name='policy_std')
1100 | +        self.policy_std = tf.tile(tf.exp(tf.clip_by_value(self.policy_logstd, -20.0, 3.0)), [tf.shape(input_layer)[0], 1], name='policy_std')
1101 |  
1102 |          # define the distributions for the policy and the old policy
1103 | -        self.policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.policy_mean, self.policy_std + eps)
1104 | -        self.old_policy_distribution = tf.contrib.distributions.MultivariateNormalDiag(self.old_policy_mean, self.old_policy_std + eps)
1105 | +        self.policy_distribution = tfp.distributions.MultivariateNormalDiag(self.policy_mean, self.policy_std + eps)
1106 | +        self.old_policy_distribution = tfp.distributions.MultivariateNormalDiag(self.old_policy_mean, self.old_policy_std + eps)
1107 |  
1108 |          self.output = [self.policy_mean, self.policy_std]
1109 |  
1110 | diff --git a/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py b/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py
1111 | index e2abbfc..1907fe9 100644
1112 | --- a/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py
1113 | +++ b/rl_coach/architectures/tensorflow_components/heads/ppo_v_head.py
1114 | @@ -35,12 +35,12 @@ class PPOVHead(Head):
1115 |          self.output_bias_initializer = output_bias_initializer
1116 |  
1117 |      def _build_module(self, input_layer):
1118 | -        self.old_policy_value = tf.placeholder(tf.float32, [None], "old_policy_values")
1119 | +        self.old_policy_value = tf.compat.v1.placeholder(tf.float32, [None], "old_policy_values")
1120 |          self.input = [self.old_policy_value]
1121 |          self.output = self.dense_layer(1)(input_layer, name='output',
1122 |                                            kernel_initializer=normalized_columns_initializer(1.0),
1123 |                                            bias_initializer=self.output_bias_initializer)
1124 | -        self.target = self.total_return = tf.placeholder(tf.float32, [None], name="total_return")
1125 | +        self.target = self.total_return = tf.compat.v1.placeholder(tf.float32, [None], name="total_return")
1126 |  
1127 |          value_loss_1 = tf.square(self.output - self.target)
1128 |          value_loss_2 = tf.square(self.old_policy_value +
1129 | @@ -49,7 +49,7 @@ class PPOVHead(Head):
1130 |                                                    self.clip_likelihood_ratio_using_epsilon) - self.target)
1131 |          self.vf_loss = tf.reduce_mean(tf.maximum(value_loss_1, value_loss_2))
1132 |          self.loss = self.vf_loss
1133 | -        tf.losses.add_loss(self.loss)
1134 | +        tf.compat.v1.losses.add_loss(self.loss)
1135 |  
1136 |      def __str__(self):
1137 |          result = [
1138 | diff --git a/rl_coach/architectures/tensorflow_components/heads/q_head.py b/rl_coach/architectures/tensorflow_components/heads/q_head.py
1139 | index 0bd120b..2a9470b 100644
1140 | --- a/rl_coach/architectures/tensorflow_components/heads/q_head.py
1141 | +++ b/rl_coach/architectures/tensorflow_components/heads/q_head.py
1142 | @@ -42,9 +42,9 @@ class QHead(Head):
1143 |              )
1144 |          self.return_type = QActionStateValue
1145 |          if agent_parameters.network_wrappers[self.network_name].replace_mse_with_huber_loss:
1146 | -            self.loss_type = tf.losses.huber_loss
1147 | +            self.loss_type = tf.compat.v1.losses.huber_loss
1148 |          else:
1149 | -            self.loss_type = tf.losses.mean_squared_error
1150 | +            self.loss_type = tf.compat.v1.losses.mean_squared_error
1151 |  
1152 |          self.output_bias_initializer = output_bias_initializer
1153 |  
1154 | diff --git a/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py b/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py
1155 | index 4e32e91..9bce7dc 100644
1156 | --- a/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py
1157 | +++ b/rl_coach/architectures/tensorflow_components/heads/quantile_regression_q_head.py
1158 | @@ -38,8 +38,8 @@ class QuantileRegressionQHead(QHead):
1159 |          self.loss_type = []
1160 |  
1161 |      def _build_module(self, input_layer):
1162 | -        self.actions = tf.placeholder(tf.int32, [None, 2], name="actions")
1163 | -        self.quantile_midpoints = tf.placeholder(tf.float32, [None, self.num_atoms], name="quantile_midpoints")
1164 | +        self.actions = tf.compat.v1.placeholder(tf.int32, [None, 2], name="actions")
1165 | +        self.quantile_midpoints = tf.compat.v1.placeholder(tf.float32, [None, self.num_atoms], name="quantile_midpoints")
1166 |          self.input = [self.actions, self.quantile_midpoints]
1167 |  
1168 |          # the output of the head is the N unordered quantile locations {theta_1, ..., theta_N}
1169 | @@ -48,7 +48,7 @@ class QuantileRegressionQHead(QHead):
1170 |          quantiles_locations = tf.reshape(quantiles_locations, (tf.shape(quantiles_locations)[0], self.num_actions, self.num_atoms))
1171 |          self.output = quantiles_locations
1172 |  
1173 | -        self.quantiles = tf.placeholder(tf.float32, shape=(None, self.num_atoms), name="quantiles")
1174 | +        self.quantiles = tf.compat.v1.placeholder(tf.float32, shape=(None, self.num_atoms), name="quantiles")
1175 |          self.target = self.quantiles
1176 |  
1177 |          # only the quantiles of the taken action are taken into account
1178 | @@ -73,7 +73,7 @@ class QuantileRegressionQHead(QHead):
1179 |          # Quantile regression loss (the probability for each quantile is 1/num_quantiles)
1180 |          quantile_regression_loss = tf.reduce_sum(quantile_huber_loss) / float(self.num_atoms)
1181 |          self.loss = quantile_regression_loss
1182 | -        tf.losses.add_loss(self.loss)
1183 | +        tf.compat.v1.losses.add_loss(self.loss)
1184 |  
1185 |          self.q_values = tf.tensordot(tf.cast(self.output, tf.float64), self.quantile_probabilities, 1)
1186 |  
1187 | diff --git a/rl_coach/architectures/tensorflow_components/heads/rainbow_q_head.py b/rl_coach/architectures/tensorflow_components/heads/rainbow_q_head.py
1188 | index f7f0ba4..b138fc7 100644
1189 | --- a/rl_coach/architectures/tensorflow_components/heads/rainbow_q_head.py
1190 | +++ b/rl_coach/architectures/tensorflow_components/heads/rainbow_q_head.py
1191 | @@ -37,13 +37,13 @@ class RainbowQHead(QHead):
1192 |  
1193 |      def _build_module(self, input_layer):
1194 |          # state value tower - V
1195 | -        with tf.variable_scope("state_value"):
1196 | +        with tf.compat.v1.variable_scope("state_value"):
1197 |              state_value = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1')
1198 |              state_value = self.dense_layer(self.num_atoms)(state_value, name='fc2')
1199 |              state_value = tf.expand_dims(state_value, axis=1)
1200 |  
1201 |          # action advantage tower - A
1202 | -        with tf.variable_scope("action_advantage"):
1203 | +        with tf.compat.v1.variable_scope("action_advantage"):
1204 |              action_advantage = self.dense_layer(512)(input_layer, activation=self.activation_function, name='fc1')
1205 |              action_advantage = self.dense_layer(self.num_actions * self.num_atoms)(action_advantage, name='fc2')
1206 |              action_advantage = tf.reshape(action_advantage, (tf.shape(input_layer)[0], self.num_actions,
1207 | @@ -58,11 +58,11 @@ class RainbowQHead(QHead):
1208 |          self.output = tf.nn.softmax(values_distribution)
1209 |  
1210 |          # calculate cross entropy loss
1211 | -        self.distributions = tf.placeholder(tf.float32, shape=(None, self.num_actions, self.num_atoms),
1212 | +        self.distributions = tf.compat.v1.placeholder(tf.float32, shape=(None, self.num_actions, self.num_atoms),
1213 |                                              name="distributions")
1214 |          self.target = self.distributions
1215 | -        self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.target, logits=values_distribution)
1216 | -        tf.losses.add_loss(self.loss)
1217 | +        self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=tf.stop_gradient(self.target), logits=values_distribution)
1218 | +        tf.compat.v1.losses.add_loss(self.loss)
1219 |  
1220 |          self.q_values = tf.tensordot(tf.cast(self.output, tf.float64), self.z_values, 1)
1221 |  
1222 | diff --git a/rl_coach/architectures/tensorflow_components/heads/sac_head.py b/rl_coach/architectures/tensorflow_components/heads/sac_head.py
1223 | index aad9bfc..dfd58c3 100644
1224 | --- a/rl_coach/architectures/tensorflow_components/heads/sac_head.py
1225 | +++ b/rl_coach/architectures/tensorflow_components/heads/sac_head.py
1226 | @@ -15,6 +15,7 @@
1227 |  #
1228 |  
1229 |  import tensorflow as tf
1230 | +import tensorflow_probability as tfp
1231 |  
1232 |  from rl_coach.architectures.tensorflow_components.layers import Dense
1233 |  from rl_coach.architectures.tensorflow_components.heads.head import Head
1234 | @@ -39,7 +40,7 @@ class SACPolicyHead(Head):
1235 |          self.squash = squash        # squashing using tanh
1236 |  
1237 |      def _build_module(self, input_layer):
1238 | -        self.given_raw_actions = tf.placeholder(tf.float32, [None, self.num_actions], name="actions")
1239 | +        self.given_raw_actions = tf.compat.v1.placeholder(tf.float32, [None, self.num_actions], name="actions")
1240 |          self.input = [self.given_raw_actions]
1241 |          self.output = []
1242 |  
1243 | @@ -55,7 +56,7 @@ class SACPolicyHead(Head):
1244 |          '''
1245 |          if not self.squash:
1246 |              return 0
1247 | -        return tf.reduce_sum(tf.log(1 - tf.tanh(actions) ** 2 + eps), axis=1)
1248 | +        return tf.reduce_sum(tf.math.log(1 - tf.tanh(actions) ** 2 + eps), axis=1)
1249 |  
1250 |      def _build_continuous_net(self, input_layer, action_space):
1251 |          num_actions = action_space.shape[0]
1252 | @@ -70,8 +71,7 @@ class SACPolicyHead(Head):
1253 |  
1254 |          # define the distributions for the policy
1255 |          # Tensorflow's multivariate normal distribution supports reparameterization
1256 | -        tfd = tf.contrib.distributions
1257 | -        self.policy_distribution = tfd.MultivariateNormalDiag(loc=self.policy_mean,
1258 | +        self.policy_distribution = tfp.distributions.MultivariateNormalDiag(loc=self.policy_mean,
1259 |                                                                scale_diag=tf.exp(self.policy_log_std))
1260 |  
1261 |          # define network outputs
1262 | diff --git a/rl_coach/architectures/tensorflow_components/heads/sac_q_head.py b/rl_coach/architectures/tensorflow_components/heads/sac_q_head.py
1263 | index dbac165..d6b4e9d 100644
1264 | --- a/rl_coach/architectures/tensorflow_components/heads/sac_q_head.py
1265 | +++ b/rl_coach/architectures/tensorflow_components/heads/sac_q_head.py
1266 | @@ -48,15 +48,15 @@ class SACQHead(Head):
1267 |          # state is the observation fed through the input_layer, action is fed through placeholder to the header
1268 |          # each is calculating q value  : q1(s,a) and q2(s,a)
1269 |          # the output of the head is min(q1,q2)
1270 | -        self.actions = tf.placeholder(tf.float32, [None, self.num_actions], name="actions")
1271 | -        self.target = tf.placeholder(tf.float32, [None, 1], name="q_targets")
1272 | +        self.actions = tf.compat.v1.placeholder(tf.float32, [None, self.num_actions], name="actions")
1273 | +        self.target = tf.compat.v1.placeholder(tf.float32, [None, 1], name="q_targets")
1274 |          self.input = [self.actions]
1275 |          self.output = []
1276 |          # Note (1) : in the author's implementation of sac (in rllab) they summarize the embedding of observation and
1277 |          # action (broadcasting the bias) in the first layer of the network.
1278 |  
1279 |          # build q1 network head
1280 | -        with tf.variable_scope("q1_head"):
1281 | +        with tf.compat.v1.variable_scope("q1_head"):
1282 |              layer_size = self.network_layers_sizes[0]
1283 |              qi_obs_emb = self.dense_layer(layer_size)(input_layer, activation=self.activation_function)
1284 |              qi_act_emb = self.dense_layer(layer_size)(self.actions, activation=self.activation_function)
1285 | @@ -68,7 +68,7 @@ class SACQHead(Head):
1286 |                                                   bias_initializer=self.output_bias_initializer)
1287 |  
1288 |          # build q2 network head
1289 | -        with tf.variable_scope("q2_head"):
1290 | +        with tf.compat.v1.variable_scope("q2_head"):
1291 |              layer_size = self.network_layers_sizes[0]
1292 |              qi_obs_emb = self.dense_layer(layer_size)(input_layer, activation=self.activation_function)
1293 |              qi_act_emb = self.dense_layer(layer_size)(self.actions, activation=self.activation_function)
1294 | @@ -93,7 +93,7 @@ class SACQHead(Head):
1295 |          self.q2_loss = 0.5*tf.reduce_mean(tf.square(self.q2_output - self.target))
1296 |          # eventually both losses are depends on different parameters so we can sum them up
1297 |          self.loss = self.q1_loss+self.q2_loss
1298 | -        tf.losses.add_loss(self.loss)
1299 | +        tf.compat.v1.losses.add_loss(self.loss)
1300 |  
1301 |      def __str__(self):
1302 |          result = [
1303 | diff --git a/rl_coach/architectures/tensorflow_components/heads/td3_v_head.py b/rl_coach/architectures/tensorflow_components/heads/td3_v_head.py
1304 | index 1457e32..8420f56 100644
1305 | --- a/rl_coach/architectures/tensorflow_components/heads/td3_v_head.py
1306 | +++ b/rl_coach/architectures/tensorflow_components/heads/td3_v_head.py
1307 | @@ -40,7 +40,7 @@ class TD3VHead(Head):
1308 |      def _build_module(self, input_layer):
1309 |          # Standard V Network
1310 |          q_outputs = []
1311 | -        self.target = tf.placeholder(tf.float32, shape=(None, 1), name="q_networks_min_placeholder")
1312 | +        self.target = tf.compat.v1.placeholder(tf.float32, shape=(None, 1), name="q_networks_min_placeholder")
1313 |  
1314 |          for i in range(input_layer.shape[0]): # assuming that the actual size is 2, as there are two critic networks
1315 |              if self.initializer == 'normalized_columns':
1316 | @@ -57,7 +57,7 @@ class TD3VHead(Head):
1317 |          self.output.append(tf.reduce_min(q_outputs, axis=0))
1318 |          self.output.append(tf.reduce_mean(self.output[0]))
1319 |          self.loss = sum(self.loss)
1320 | -        tf.losses.add_loss(self.loss)
1321 | +        tf.compat.v1.losses.add_loss(self.loss)
1322 |  
1323 |      def __str__(self):
1324 |          result = [
1325 | diff --git a/rl_coach/architectures/tensorflow_components/heads/v_head.py b/rl_coach/architectures/tensorflow_components/heads/v_head.py
1326 | index 16ff185..1f0d00a 100644
1327 | --- a/rl_coach/architectures/tensorflow_components/heads/v_head.py
1328 | +++ b/rl_coach/architectures/tensorflow_components/heads/v_head.py
1329 | @@ -33,9 +33,9 @@ class VHead(Head):
1330 |          self.return_type = VStateValue
1331 |  
1332 |          if agent_parameters.network_wrappers[self.network_name.split('/')[0]].replace_mse_with_huber_loss:
1333 | -            self.loss_type = tf.losses.huber_loss
1334 | +            self.loss_type = tf.compat.v1.losses.huber_loss
1335 |          else:
1336 | -            self.loss_type = tf.losses.mean_squared_error
1337 | +            self.loss_type = tf.compat.v1.losses.mean_squared_error
1338 |  
1339 |          self.initializer = initializer
1340 |          self.output_bias_initializer = output_bias_initializer
1341 | diff --git a/rl_coach/architectures/tensorflow_components/layers.py b/rl_coach/architectures/tensorflow_components/layers.py
1342 | index 91c0c30..6fb60a8 100644
1343 | --- a/rl_coach/architectures/tensorflow_components/layers.py
1344 | +++ b/rl_coach/architectures/tensorflow_components/layers.py
1345 | @@ -22,7 +22,7 @@ import tensorflow as tf
1346 |  from rl_coach.architectures import layers
1347 |  from rl_coach.architectures.tensorflow_components import utils
1348 |  
1349 | -
1350 | +tf.compat.v1.disable_resource_variables()
1351 |  def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dropout_rate, is_training, name):
1352 |      layers = [input_layer]
1353 |  
1354 | @@ -32,7 +32,7 @@ def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dr
1355 |      # batchnorm
1356 |      if batchnorm:
1357 |          layers.append(
1358 | -            tf.layers.batch_normalization(layers[-1], name="{}_batchnorm".format(name), training=is_training)
1359 | +            tf.compat.v1.layers.batch_normalization(layers[-1], name="{}_batchnorm".format(name), training=is_training)
1360 |          )
1361 |  
1362 |      # activation
1363 | @@ -46,7 +46,7 @@ def batchnorm_activation_dropout(input_layer, batchnorm, activation_function, dr
1364 |      # dropout
1365 |      if dropout_rate > 0:
1366 |          layers.append(
1367 | -            tf.layers.dropout(layers[-1], dropout_rate, name="{}_dropout".format(name), training=is_training)
1368 | +            tf.compat.v1.layers.dropout(layers[-1], dropout_rate, name="{}_dropout".format(name), training=is_training)
1369 |          )
1370 |  
1371 |      # remove the input layer from the layers list
1372 | @@ -116,7 +116,7 @@ class Conv2d(layers.Conv2d):
1373 |          :param name: layer name
1374 |          :return: conv2d layer
1375 |          """
1376 | -        return tf.layers.conv2d(input_layer, filters=self.num_filters, kernel_size=self.kernel_size,
1377 | +        return tf.compat.v1.layers.conv2d(input_layer, filters=self.num_filters, kernel_size=self.kernel_size,
1378 |                                  strides=self.strides, data_format='channels_last', name=name)
1379 |  
1380 |      @staticmethod
1381 | @@ -177,8 +177,8 @@ class Dense(layers.Dense):
1382 |          :return: dense layer
1383 |          """
1384 |          if bias_initializer is None:
1385 | -            bias_initializer = tf.zeros_initializer()
1386 | -        return tf.layers.dense(input_layer, self.units, name=name, kernel_initializer=kernel_initializer,
1387 | +            bias_initializer = tf.compat.v1.zeros_initializer()
1388 | +        return tf.compat.v1.layers.dense(input_layer, self.units, name=name, kernel_initializer=kernel_initializer,
1389 |                                 activation=activation, bias_initializer=bias_initializer)
1390 |  
1391 |      @staticmethod
1392 | @@ -222,8 +222,8 @@ class NoisyNetDense(layers.NoisyNetDense):
1393 |          def _factorized_noise(inputs, outputs):
1394 |              # TODO: use factorized noise only for compute intensive algos (e.g. DQN).
1395 |              #      lighter algos (e.g. DQN) should not use it
1396 | -            noise1 = _f(tf.random_normal((inputs, 1)))
1397 | -            noise2 = _f(tf.random_normal((1, outputs)))
1398 | +            noise1 = _f(tf.random.normal((inputs, 1)))
1399 | +            noise2 = _f(tf.random.normal((1, outputs)))
1400 |              return tf.matmul(noise1, noise2)
1401 |  
1402 |          num_inputs = input_layer.get_shape()[-1].value
1403 | @@ -233,22 +233,22 @@ class NoisyNetDense(layers.NoisyNetDense):
1404 |          activation = activation if activation is not None else (lambda x: x)
1405 |  
1406 |          if kernel_initializer is None:
1407 | -            kernel_mean_initializer = tf.random_uniform_initializer(-stddev, stddev)
1408 | -            kernel_stddev_initializer = tf.random_uniform_initializer(-stddev * self.sigma0, stddev * self.sigma0)
1409 | +            kernel_mean_initializer = tf.compat.v1.random_uniform_initializer(-stddev, stddev)
1410 | +            kernel_stddev_initializer = tf.compat.v1.random_uniform_initializer(-stddev * self.sigma0, stddev * self.sigma0)
1411 |          else:
1412 |              kernel_mean_initializer = kernel_stddev_initializer = kernel_initializer
1413 |          if bias_initializer is None:
1414 | -            bias_initializer = tf.zeros_initializer()
1415 | -        with tf.variable_scope(None, default_name=name):
1416 | -            weight_mean = tf.get_variable('weight_mean', shape=(num_inputs, num_outputs),
1417 | +            bias_initializer = tf.compat.v1.zeros_initializer()
1418 | +        with tf.compat.v1.variable_scope(None, default_name=name):
1419 | +            weight_mean = tf.compat.v1.get_variable('weight_mean', shape=(num_inputs, num_outputs),
1420 |                                            initializer=kernel_mean_initializer)
1421 | -            bias_mean = tf.get_variable('bias_mean', shape=(num_outputs,), initializer=bias_initializer)
1422 | +            bias_mean = tf.compat.v1.get_variable('bias_mean', shape=(num_outputs,), initializer=bias_initializer)
1423 |  
1424 | -            weight_stddev = tf.get_variable('weight_stddev', shape=(num_inputs, num_outputs),
1425 | +            weight_stddev = tf.compat.v1.get_variable('weight_stddev', shape=(num_inputs, num_outputs),
1426 |                                              initializer=kernel_stddev_initializer)
1427 | -            bias_stddev = tf.get_variable('bias_stddev', shape=(num_outputs,),
1428 | +            bias_stddev = tf.compat.v1.get_variable('bias_stddev', shape=(num_outputs,),
1429 |                                            initializer=kernel_stddev_initializer)
1430 | -            bias_noise = _f(tf.random_normal((num_outputs,)))
1431 | +            bias_noise = _f(tf.random.normal((num_outputs,)))
1432 |              weight_noise = _factorized_noise(num_inputs, num_outputs)
1433 |  
1434 |          bias = bias_mean + bias_stddev * bias_noise
1435 | diff --git a/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py b/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py
1436 | index 6ca9cd7..f4005d9 100644
1437 | --- a/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py
1438 | +++ b/rl_coach/architectures/tensorflow_components/middlewares/lstm_middleware.py
1439 | @@ -57,17 +57,17 @@ class LSTMMiddleware(Middleware):
1440 |              ))
1441 |  
1442 |          # add the LSTM layer
1443 | -        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.number_of_lstm_cells, state_is_tuple=True)
1444 | +        lstm_cell = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(self.number_of_lstm_cells, state_is_tuple=True)
1445 |          self.c_init = np.zeros((1, lstm_cell.state_size.c), np.float32)
1446 |          self.h_init = np.zeros((1, lstm_cell.state_size.h), np.float32)
1447 |          self.state_init = [self.c_init, self.h_init]
1448 | -        self.c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c])
1449 | -        self.h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h])
1450 | +        self.c_in = tf.compat.v1.placeholder(tf.float32, [1, lstm_cell.state_size.c])
1451 | +        self.h_in = tf.compat.v1.placeholder(tf.float32, [1, lstm_cell.state_size.h])
1452 |          self.state_in = (self.c_in, self.h_in)
1453 |          rnn_in = tf.expand_dims(self.layers[-1], [0])
1454 |          step_size = tf.shape(self.layers[-1])[:1]
1455 | -        state_in = tf.nn.rnn_cell.LSTMStateTuple(self.c_in, self.h_in)
1456 | -        lstm_outputs, lstm_state = tf.nn.dynamic_rnn(
1457 | +        state_in = tf.compat.v1.nn.rnn_cell.LSTMStateTuple(self.c_in, self.h_in)
1458 | +        lstm_outputs, lstm_state = tf.compat.v1.nn.dynamic_rnn(
1459 |              lstm_cell, rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False)
1460 |          lstm_c, lstm_h = lstm_state
1461 |          self.state_out = (lstm_c[:1, :], lstm_h[:1, :])
1462 | diff --git a/rl_coach/architectures/tensorflow_components/middlewares/middleware.py b/rl_coach/architectures/tensorflow_components/middlewares/middleware.py
1463 | index 64c578f..b52d262 100644
1464 | --- a/rl_coach/architectures/tensorflow_components/middlewares/middleware.py
1465 | +++ b/rl_coach/architectures/tensorflow_components/middlewares/middleware.py
1466 | @@ -71,7 +71,7 @@ class Middleware(object):
1467 |          :param input_layer: the input to the graph
1468 |          :return: the input placeholder and the output of the last layer
1469 |          """
1470 | -        with tf.variable_scope(self.get_name()):
1471 | +        with tf.compat.v1.variable_scope(self.get_name()):
1472 |              self.input = input_layer
1473 |              self._build_module()
1474 |  
1475 | diff --git a/rl_coach/architectures/tensorflow_components/savers.py b/rl_coach/architectures/tensorflow_components/savers.py
1476 | index 531c523..ae92826 100644
1477 | --- a/rl_coach/architectures/tensorflow_components/savers.py
1478 | +++ b/rl_coach/architectures/tensorflow_components/savers.py
1479 | @@ -28,22 +28,22 @@ class GlobalVariableSaver(Saver):
1480 |          self._names = [name]
1481 |          # if graph is finalized, savers must have already already been added. This happens
1482 |          # in the case of a MonitoredSession
1483 | -        self._variables = tf.global_variables()
1484 | +        self._variables = tf.compat.v1.trainable_variables()
1485 |  
1486 |          # target network is never saved or restored directly from checkpoint, so we are removing all its variables from the list
1487 |          # the target network would be synched back from the online network in graph_manager.improve(...), at the beginning of the run flow.
1488 | -        self._variables = [v for v in self._variables if "/target" not in v.name]
1489 | +        self._variables = [v for v in self._variables if ('/target' not in v.name and name.split('/')[0]+'/'  in v.name)]
1490 |  
1491 |          # Using a placeholder to update the variable during restore to avoid memory leak.
1492 |          # Ref: https://github.com/tensorflow/tensorflow/issues/4151
1493 |          self._variable_placeholders = []
1494 |          self._variable_update_ops = []
1495 |          for v in self._variables:
1496 | -            variable_placeholder = tf.placeholder(v.dtype, shape=v.get_shape())
1497 | +            variable_placeholder = tf.compat.v1.placeholder(v.dtype, shape=v.get_shape())
1498 |              self._variable_placeholders.append(variable_placeholder)
1499 |              self._variable_update_ops.append(v.assign(variable_placeholder))
1500 |  
1501 | -        self._saver = tf.train.Saver(self._variables, max_to_keep=None)
1502 | +        self._saver = tf.compat.v1.train.Saver(self._variables, max_to_keep=None)
1503 |  
1504 |      @property
1505 |      def path(self):
1506 | @@ -118,7 +118,7 @@ class GlobalVariableSaver(Saver):
1507 |          # We don't use saver.restore() because checkpoint is loaded to online
1508 |          # network, but if the checkpoint is from the global network, a namespace
1509 |          # mismatch exists and variable name must be modified before loading.
1510 | -        reader = tf.contrib.framework.load_checkpoint(restore_path)
1511 | +        reader = tf.train.load_checkpoint(restore_path)
1512 |          for var_name, _ in reader.get_variable_to_shape_map().items():
1513 |              yield var_name, reader.get_tensor(var_name)
1514 |  
1515 | diff --git a/rl_coach/architectures/tensorflow_components/shared_variables.py b/rl_coach/architectures/tensorflow_components/shared_variables.py
1516 | index fe805af..5278b70 100644
1517 | --- a/rl_coach/architectures/tensorflow_components/shared_variables.py
1518 | +++ b/rl_coach/architectures/tensorflow_components/shared_variables.py
1519 | @@ -21,7 +21,7 @@ import tensorflow as tf
1520 |  
1521 |  from rl_coach.utilities.shared_running_stats import SharedRunningStats
1522 |  
1523 | -
1524 | +tf.compat.v1.disable_resource_variables()
1525 |  class TFSharedRunningStats(SharedRunningStats):
1526 |      def __init__(self, replicated_device=None, epsilon=1e-2, name="", create_ops=True, pubsub_params=None):
1527 |          super().__init__(name=name, pubsub_params=pubsub_params)
1528 | @@ -42,39 +42,39 @@ class TFSharedRunningStats(SharedRunningStats):
1529 |          """
1530 |  
1531 |          self.clip_values = clip_values
1532 | -        with tf.variable_scope(self.name):
1533 | -            self._sum = tf.get_variable(
1534 | +        with tf.compat.v1.variable_scope(self.name):
1535 | +            self._sum = tf.compat.v1.get_variable(
1536 |                  dtype=tf.float64,
1537 | -                initializer=tf.constant_initializer(0.0),
1538 | +                initializer=tf.compat.v1.constant_initializer(0.0),
1539 |                  name="running_sum", trainable=False, shape=shape, validate_shape=False,
1540 | -                collections=[tf.GraphKeys.GLOBAL_VARIABLES])
1541 | -            self._sum_squares = tf.get_variable(
1542 | +                collections=[tf.compat.v1.GraphKeys.GLOBAL_VARIABLES])
1543 | +            self._sum_squares = tf.compat.v1.get_variable(
1544 |                  dtype=tf.float64,
1545 | -                initializer=tf.constant_initializer(self.epsilon),
1546 | +                initializer=tf.compat.v1.constant_initializer(self.epsilon),
1547 |                  name="running_sum_squares", trainable=False, shape=shape, validate_shape=False,
1548 | -                collections=[tf.GraphKeys.GLOBAL_VARIABLES])
1549 | -            self._count = tf.get_variable(
1550 | +                collections=[tf.compat.v1.GraphKeys.GLOBAL_VARIABLES])
1551 | +            self._count = tf.compat.v1.get_variable(
1552 |                  dtype=tf.float64,
1553 |                  shape=(),
1554 | -                initializer=tf.constant_initializer(self.epsilon),
1555 | -                name="count", trainable=False, collections=[tf.GraphKeys.GLOBAL_VARIABLES])
1556 | +                initializer=tf.compat.v1.constant_initializer(self.epsilon),
1557 | +                name="count", trainable=False, collections=[tf.compat.v1.GraphKeys.GLOBAL_VARIABLES])
1558 |  
1559 |              self._shape = None
1560 | -            self._mean = tf.div(self._sum, self._count, name="mean")
1561 | +            self._mean = tf.compat.v1.div(self._sum, self._count, name="mean")
1562 |              self._std = tf.sqrt(tf.maximum((self._sum_squares - self._count * tf.square(self._mean))
1563 |                                             / tf.maximum(self._count-1, 1), self.epsilon), name="stdev")
1564 |              self.tf_mean = tf.cast(self._mean, 'float32')
1565 |              self.tf_std = tf.cast(self._std, 'float32')
1566 |  
1567 | -            self.new_sum = tf.placeholder(dtype=tf.float64, name='sum')
1568 | -            self.new_sum_squares = tf.placeholder(dtype=tf.float64, name='var')
1569 | -            self.newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count')
1570 | +            self.new_sum = tf.compat.v1.placeholder(dtype=tf.float64, name='sum')
1571 | +            self.new_sum_squares = tf.compat.v1.placeholder(dtype=tf.float64, name='var')
1572 | +            self.newcount = tf.compat.v1.placeholder(shape=[], dtype=tf.float64, name='count')
1573 |  
1574 | -            self._inc_sum = tf.assign_add(self._sum, self.new_sum, use_locking=True)
1575 | -            self._inc_sum_squares = tf.assign_add(self._sum_squares, self.new_sum_squares, use_locking=True)
1576 | -            self._inc_count = tf.assign_add(self._count, self.newcount, use_locking=True)
1577 | +            self._inc_sum = tf.compat.v1.assign_add(self._sum, self.new_sum, use_locking=True)
1578 | +            self._inc_sum_squares = tf.compat.v1.assign_add(self._sum_squares, self.new_sum_squares, use_locking=True)
1579 | +            self._inc_count = tf.compat.v1.assign_add(self._count, self.newcount, use_locking=True)
1580 |  
1581 | -            self.raw_obs = tf.placeholder(dtype=tf.float64, name='raw_obs')
1582 | +            self.raw_obs = tf.compat.v1.placeholder(dtype=tf.float64, name='raw_obs')
1583 |              self.normalized_obs = (self.raw_obs - self._mean) / self._std
1584 |              if self.clip_values is not None:
1585 |                  self.clipped_obs = tf.clip_by_value(self.normalized_obs, self.clip_values[0], self.clip_values[1])
1586 | diff --git a/rl_coach/core_types.py b/rl_coach/core_types.py
1587 | index c173318..58fd0bc 100644
1588 | --- a/rl_coach/core_types.py
1589 | +++ b/rl_coach/core_types.py
1590 | @@ -182,6 +182,7 @@ class RunPhase(Enum):
1591 |      TRAIN = "Training"
1592 |      TEST = "Testing"
1593 |      UNDEFINED = "Undefined"
1594 | +    WAITING = "Waiting"
1595 |  
1596 |  
1597 |  # transitions
1598 | diff --git a/rl_coach/data_stores/s3_data_store.py b/rl_coach/data_stores/s3_data_store.py
1599 | index 959422a..42737dd 100644
1600 | --- a/rl_coach/data_stores/s3_data_store.py
1601 | +++ b/rl_coach/data_stores/s3_data_store.py
1602 | @@ -17,7 +17,7 @@
1603 |  
1604 |  from rl_coach.data_stores.data_store import DataStore, DataStoreParameters
1605 |  from minio import Minio
1606 | -from minio.error import ResponseError
1607 | +from minio.error import InvalidResponseError
1608 |  from configparser import ConfigParser, Error
1609 |  from rl_coach.checkpoint import CheckpointStateFile
1610 |  from rl_coach.data_stores.data_store import SyncFiles
1611 | @@ -133,7 +133,7 @@ class S3DataStore(DataStore):
1612 |                  for filename in os.listdir(os.path.join(self.params.expt_dir, 'gifs')):
1613 |                          self.mc.fput_object(self.params.bucket_name, filename, os.path.join(self.params.expt_dir, 'gifs', filename))
1614 |  
1615 | -        except ResponseError as e:
1616 | +        except InvalidResponseError as e:
1617 |              print("Got exception: %s\n while saving to S3", e)
1618 |  
1619 |      def load_from_store(self):
1620 | @@ -189,7 +189,7 @@ class S3DataStore(DataStore):
1621 |                      if not os.path.exists(filename):
1622 |                          self.mc.fget_object(obj.bucket_name, obj.object_name, filename)
1623 |  
1624 | -        except ResponseError as e:
1625 | +        except InvalidResponseError as e:
1626 |              print("Got exception: %s\n while loading from S3", e)
1627 |  
1628 |      def setup_checkpoint_dir(self, crd=None):
1629 | diff --git a/rl_coach/filters/observation/observation_normalization_filter.py b/rl_coach/filters/observation/observation_normalization_filter.py
1630 | index 791b345..db9e104 100644
1631 | --- a/rl_coach/filters/observation/observation_normalization_filter.py
1632 | +++ b/rl_coach/filters/observation/observation_normalization_filter.py
1633 | @@ -87,4 +87,3 @@ class ObservationNormalizationFilter(ObservationFilter):
1634 |  
1635 |      def restore_state_from_checkpoint(self, checkpoint_dir: str, checkpoint_prefix: str):
1636 |          self.running_observation_stats.restore_state_from_checkpoint(checkpoint_dir, checkpoint_prefix)
1637 | - 
1638 | \ No newline at end of file
1639 | diff --git a/rl_coach/graph_managers/graph_manager.py b/rl_coach/graph_managers/graph_manager.py
1640 | index 60afcee..10ae965 100644
1641 | --- a/rl_coach/graph_managers/graph_manager.py
1642 | +++ b/rl_coach/graph_managers/graph_manager.py
1643 | @@ -173,7 +173,7 @@ class GraphManager(object):
1644 |      @staticmethod
1645 |      def _create_worker_or_parameters_server_tf(task_parameters: DistributedTaskParameters):
1646 |          import tensorflow as tf
1647 | -        config = tf.ConfigProto()
1648 | +        config = tf.compat.v1.ConfigProto()
1649 |          config.allow_soft_placement = True  # allow placing ops on cpu if they are not fit for gpu
1650 |          config.gpu_options.allow_growth = True  # allow the gpu memory allocated for the worker to grow if needed
1651 |          config.gpu_options.per_process_gpu_memory_fraction = 0.2
1652 | @@ -212,7 +212,7 @@ class GraphManager(object):
1653 |  
1654 |      def _create_session_tf(self, task_parameters: TaskParameters):
1655 |          import tensorflow as tf
1656 | -        config = tf.ConfigProto()
1657 | +        config = tf.compat.v1.ConfigProto()
1658 |          config.allow_soft_placement = True  # allow placing ops on cpu if they are not fit for gpu
1659 |          config.gpu_options.allow_growth = True  # allow the gpu memory allocated for the worker to grow if needed
1660 |          # config.gpu_options.per_process_gpu_memory_fraction = 0.2
1661 | @@ -241,7 +241,7 @@ class GraphManager(object):
1662 |              self.set_session(self.sess)
1663 |          else:
1664 |              # regular session
1665 | -            self.sess = tf.Session(config=config)
1666 | +            self.sess = tf.compat.v1.Session(config=config)
1667 |              # set the session for all the modules
1668 |              self.set_session(self.sess)
1669 |  
1670 | @@ -278,7 +278,7 @@ class GraphManager(object):
1671 |          import tensorflow as tf
1672 |  
1673 |          # write graph
1674 | -        tf.train.write_graph(tf.get_default_graph(),
1675 | +        tf.io.write_graph(tf.compat.v1.get_default_graph(),
1676 |                               logdir=self.task_parameters.checkpoint_save_dir,
1677 |                               name='graphdef.pb',
1678 |                               as_text=False)
1679 | diff --git a/rl_coach/presets/Acrobot_DDQN_BCQ_BatchRL.py b/rl_coach/presets/Acrobot_DDQN_BCQ_BatchRL.py
1680 | index cda8a45..43bd5a9 100644
1681 | --- a/rl_coach/presets/Acrobot_DDQN_BCQ_BatchRL.py
1682 | +++ b/rl_coach/presets/Acrobot_DDQN_BCQ_BatchRL.py
1683 | @@ -34,7 +34,7 @@ schedule_params.heatup_steps = EnvironmentSteps(DATASET_SIZE)
1684 |  agent_params = DDQNBCQAgentParameters()
1685 |  agent_params.network_wrappers['main'].batch_size = 128
1686 |  # TODO cross-DL framework abstraction for a constant initializer?
1687 | -agent_params.network_wrappers['main'].heads_parameters = [QHeadParameters(output_bias_initializer=tf.constant_initializer(-100))]
1688 | +agent_params.network_wrappers['main'].heads_parameters = [QHeadParameters(output_bias_initializer=tf.compat.v1.constant_initializer(-100))]
1689 |  
1690 |  agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(
1691 |      100)
1692 | @@ -77,7 +77,7 @@ experience_generating_agent_params.network_wrappers['main'].learning_rate = 0.00
1693 |  experience_generating_agent_params.network_wrappers['main'].batch_size = 128
1694 |  experience_generating_agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False
1695 |  experience_generating_agent_params.network_wrappers['main'].heads_parameters = \
1696 | -[QHeadParameters(output_bias_initializer=tf.constant_initializer(-100))]
1697 | +[QHeadParameters(output_bias_initializer=tf.compat.v1.constant_initializer(-100))]
1698 |  
1699 |  # ER size
1700 |  experience_generating_agent_params.memory = EpisodicExperienceReplayParameters()
1701 | diff --git a/rl_coach/tests/agents/test_agent_external_communication.py b/rl_coach/tests/agents/test_agent_external_communication.py
1702 | index 77f0a89..aa6a78b 100644
1703 | --- a/rl_coach/tests/agents/test_agent_external_communication.py
1704 | +++ b/rl_coach/tests/agents/test_agent_external_communication.py
1705 | @@ -12,7 +12,7 @@ logging.set_verbosity(logging.INFO)
1706 |  
1707 |  @pytest.mark.unit_test
1708 |  def test_get_QActionStateValue_predictions():
1709 | -    tf.reset_default_graph()
1710 | +    tf.compat.v1.reset_default_graph()
1711 |      from rl_coach.presets.CartPole_DQN import graph_manager as cartpole_dqn_graph_manager
1712 |      assert cartpole_dqn_graph_manager
1713 |      cartpole_dqn_graph_manager.create_graph(task_parameters=
1714 | diff --git a/rl_coach/tests/architectures/tensorflow_components/embedders/test_identity_embedder.py b/rl_coach/tests/architectures/tensorflow_components/embedders/test_identity_embedder.py
1715 | index 23ca834..337e549 100644
1716 | --- a/rl_coach/tests/architectures/tensorflow_components/embedders/test_identity_embedder.py
1717 | +++ b/rl_coach/tests/architectures/tensorflow_components/embedders/test_identity_embedder.py
1718 | @@ -15,7 +15,7 @@ logging.set_verbosity(logging.INFO)
1719 |  
1720 |  @pytest.fixture
1721 |  def reset():
1722 | -    tf.reset_default_graph()
1723 | +    tf.compat.v1.reset_default_graph()
1724 |  
1725 |  
1726 |  @pytest.mark.unit_test
1727 | @@ -23,17 +23,17 @@ def test_embedder(reset):
1728 |      embedder = VectorEmbedder(np.array([10, 10]), name="test", scheme=EmbedderScheme.Empty)
1729 |  
1730 |      # make sure the ops where not created yet
1731 | -    assert len(tf.get_default_graph().get_operations()) == 0
1732 | +    assert len(tf.compat.v1.get_default_graph().get_operations()) == 0
1733 |  
1734 |      # call the embedder
1735 |      input_ph, output_ph = embedder()
1736 |  
1737 |      # make sure that now the ops were created
1738 | -    assert len(tf.get_default_graph().get_operations()) > 0
1739 | +    assert len(tf.compat.v1.get_default_graph().get_operations()) > 0
1740 |  
1741 |      # try feeding a batch of one example  # TODO: consider auto converting to batch
1742 |      input = np.random.rand(1, 10, 10)
1743 | -    sess = tf.Session()
1744 | +    sess = tf.compat.v1.Session()
1745 |      output = sess.run(embedder.output, {embedder.input: input})
1746 |      assert output.shape == (1, 100)  # should have flattened the input
1747 |  
1748 | diff --git a/rl_coach/tests/architectures/tensorflow_components/embedders/test_image_embedder.py b/rl_coach/tests/architectures/tensorflow_components/embedders/test_image_embedder.py
1749 | index 65076d1..efc7584 100644
1750 | --- a/rl_coach/tests/architectures/tensorflow_components/embedders/test_image_embedder.py
1751 | +++ b/rl_coach/tests/architectures/tensorflow_components/embedders/test_image_embedder.py
1752 | @@ -12,7 +12,7 @@ logging.set_verbosity(logging.INFO)
1753 |  
1754 |  @pytest.fixture
1755 |  def reset():
1756 | -    tf.reset_default_graph()
1757 | +    tf.compat.v1.reset_default_graph()
1758 |  
1759 |  
1760 |  @pytest.mark.unit_test
1761 | @@ -26,24 +26,24 @@ def test_embedder(reset):
1762 |          embedder = ImageEmbedder(np.array([10, 100, 100, 100]), name="test")
1763 |  
1764 |  
1765 | -    is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
1766 | -    pre_ops = len(tf.get_default_graph().get_operations())
1767 | +    is_training = tf.Variable(False, trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES])
1768 | +    pre_ops = len(tf.compat.v1.get_default_graph().get_operations())
1769 |      # creating a simple image embedder
1770 |      embedder = ImageEmbedder(np.array([100, 100, 10]), name="test", is_training=is_training)
1771 |  
1772 |      # make sure the only the is_training op is creates
1773 | -    assert len(tf.get_default_graph().get_operations()) == pre_ops
1774 | +    assert len(tf.compat.v1.get_default_graph().get_operations()) == pre_ops
1775 |  
1776 |      # call the embedder
1777 |      input_ph, output_ph = embedder()
1778 |  
1779 |      # make sure that now the ops were created
1780 | -    assert len(tf.get_default_graph().get_operations()) > pre_ops
1781 | +    assert len(tf.compat.v1.get_default_graph().get_operations()) > pre_ops
1782 |  
1783 |      # try feeding a batch of one example
1784 |      input = np.random.rand(1, 100, 100, 10)
1785 | -    sess = tf.Session()
1786 | -    sess.run(tf.global_variables_initializer())
1787 | +    sess = tf.compat.v1.Session()
1788 | +    sess.run(tf.compat.v1.global_variables_initializer())
1789 |      output = sess.run(embedder.output, {embedder.input: input})
1790 |      assert output.shape == (1, 5184)
1791 |  
1792 | @@ -58,7 +58,7 @@ def test_embedder(reset):
1793 |  @pytest.mark.unit_test
1794 |  def test_complex_embedder(reset):
1795 |      # creating a deep vector embedder
1796 | -    is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
1797 | +    is_training = tf.Variable(False, trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES])
1798 |      embedder = ImageEmbedder(np.array([100, 100, 10]), name="test", scheme=EmbedderScheme.Deep, 
1799 |          is_training=is_training)
1800 |  
1801 | @@ -67,8 +67,8 @@ def test_complex_embedder(reset):
1802 |  
1803 |      # try feeding a batch of one example
1804 |      input = np.random.rand(1, 100, 100, 10)
1805 | -    sess = tf.Session()
1806 | -    sess.run(tf.global_variables_initializer())
1807 | +    sess = tf.compat.v1.Session()
1808 | +    sess.run(tf.compat.v1.global_variables_initializer())
1809 |      output = sess.run(embedder.output, {embedder.input: input})
1810 |      assert output.shape == (1, 256)  # should have flattened the input
1811 |  
1812 | @@ -76,7 +76,7 @@ def test_complex_embedder(reset):
1813 |  @pytest.mark.unit_test
1814 |  def test_activation_function(reset):
1815 |      # creating a deep image embedder with relu
1816 | -    is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
1817 | +    is_training = tf.Variable(False, trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES])
1818 |      embedder = ImageEmbedder(np.array([100, 100, 10]), name="relu", scheme=EmbedderScheme.Deep,
1819 |                               activation_function=tf.nn.relu, is_training=is_training)
1820 |  
1821 | @@ -85,8 +85,8 @@ def test_activation_function(reset):
1822 |  
1823 |      # try feeding a batch of one example
1824 |      input = np.random.rand(1, 100, 100, 10)
1825 | -    sess = tf.Session()
1826 | -    sess.run(tf.global_variables_initializer())
1827 | +    sess = tf.compat.v1.Session()
1828 | +    sess.run(tf.compat.v1.global_variables_initializer())
1829 |      output = sess.run(embedder.output, {embedder.input: input})
1830 |      assert np.all(output >= 0)  # should have flattened the input
1831 |  
1832 | @@ -99,7 +99,7 @@ def test_activation_function(reset):
1833 |  
1834 |      # try feeding a batch of one example
1835 |      input = np.random.rand(1, 100, 100, 10)
1836 | -    sess = tf.Session()
1837 | -    sess.run(tf.global_variables_initializer())
1838 | +    sess = tf.compat.v1.Session()
1839 | +    sess.run(tf.compat.v1.global_variables_initializer())
1840 |      output = sess.run(embedder_tanh.output, {embedder_tanh.input: input})
1841 |      assert np.all(output >= -1) and np.all(output <= 1)
1842 | diff --git a/rl_coach/tests/architectures/tensorflow_components/embedders/test_vector_embedder.py b/rl_coach/tests/architectures/tensorflow_components/embedders/test_vector_embedder.py
1843 | index 73482f9..400a738 100644
1844 | --- a/rl_coach/tests/architectures/tensorflow_components/embedders/test_vector_embedder.py
1845 | +++ b/rl_coach/tests/architectures/tensorflow_components/embedders/test_vector_embedder.py
1846 | @@ -12,7 +12,7 @@ logging.set_verbosity(logging.INFO)
1847 |  
1848 |  @pytest.fixture
1849 |  def reset():
1850 | -    tf.reset_default_graph()
1851 | +    tf.compat.v1.reset_default_graph()
1852 |  
1853 |  
1854 |  @pytest.mark.unit_test
1855 | @@ -22,24 +22,24 @@ def test_embedder(reset):
1856 |          embedder = VectorEmbedder(np.array([10, 10]), name="test")
1857 |  
1858 |      # creating a simple vector embedder
1859 | -    is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
1860 | -    pre_ops = len(tf.get_default_graph().get_operations())
1861 | +    is_training = tf.Variable(False, trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES])
1862 | +    pre_ops = len(tf.compat.v1.get_default_graph().get_operations())
1863 |  
1864 |      embedder = VectorEmbedder(np.array([10]), name="test", is_training=is_training)
1865 |  
1866 |      # make sure the ops where not created yet
1867 | -    assert len(tf.get_default_graph().get_operations()) == pre_ops
1868 | +    assert len(tf.compat.v1.get_default_graph().get_operations()) == pre_ops
1869 |  
1870 |      # call the embedder
1871 |      input_ph, output_ph = embedder()
1872 |  
1873 |      # make sure that now the ops were created
1874 | -    assert len(tf.get_default_graph().get_operations()) > pre_ops
1875 | +    assert len(tf.compat.v1.get_default_graph().get_operations()) > pre_ops
1876 |  
1877 |      # try feeding a batch of one example
1878 |      input = np.random.rand(1, 10)
1879 | -    sess = tf.Session()
1880 | -    sess.run(tf.global_variables_initializer())
1881 | +    sess = tf.compat.v1.Session()
1882 | +    sess.run(tf.compat.v1.global_variables_initializer())
1883 |      output = sess.run(embedder.output, {embedder.input: input})
1884 |      assert output.shape == (1, 256)
1885 |  
1886 | @@ -54,7 +54,7 @@ def test_embedder(reset):
1887 |  @pytest.mark.unit_test
1888 |  def test_complex_embedder(reset):
1889 |      # creating a deep vector embedder
1890 | -    is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
1891 | +    is_training = tf.Variable(False, trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES])
1892 |      embedder = VectorEmbedder(np.array([10]), name="test", scheme=EmbedderScheme.Deep, is_training=is_training)
1893 |  
1894 |      # call the embedder
1895 | @@ -62,8 +62,8 @@ def test_complex_embedder(reset):
1896 |  
1897 |      # try feeding a batch of one example
1898 |      input = np.random.rand(1, 10)
1899 | -    sess = tf.Session()
1900 | -    sess.run(tf.global_variables_initializer())
1901 | +    sess = tf.compat.v1.Session()
1902 | +    sess.run(tf.compat.v1.global_variables_initializer())
1903 |      output = sess.run(embedder.output, {embedder.input: input})
1904 |      assert output.shape == (1, 128)  # should have flattened the input
1905 |  
1906 | @@ -71,7 +71,7 @@ def test_complex_embedder(reset):
1907 |  @pytest.mark.unit_test
1908 |  def test_activation_function(reset):
1909 |      # creating a deep vector embedder with relu
1910 | -    is_training = tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
1911 | +    is_training = tf.Variable(False, trainable=False, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES])
1912 |      embedder = VectorEmbedder(np.array([10]), name="relu", scheme=EmbedderScheme.Deep,
1913 |                                activation_function=tf.nn.relu, is_training=is_training)
1914 |  
1915 | @@ -80,8 +80,8 @@ def test_activation_function(reset):
1916 |  
1917 |      # try feeding a batch of one example
1918 |      input = np.random.rand(1, 10)
1919 | -    sess = tf.Session()
1920 | -    sess.run(tf.global_variables_initializer())
1921 | +    sess = tf.compat.v1.Session()
1922 | +    sess.run(tf.compat.v1.global_variables_initializer())
1923 |      output = sess.run(embedder.output, {embedder.input: input})
1924 |      assert np.all(output >= 0)  # should have flattened the input
1925 |  
1926 | @@ -94,7 +94,7 @@ def test_activation_function(reset):
1927 |  
1928 |      # try feeding a batch of one example
1929 |      input = np.random.rand(1, 10)
1930 | -    sess = tf.Session()
1931 | -    sess.run(tf.global_variables_initializer())
1932 | +    sess = tf.compat.v1.Session()
1933 | +    sess.run(tf.compat.v1.global_variables_initializer())
1934 |      output = sess.run(embedder_tanh.output, {embedder_tanh.input: input})
1935 |      assert np.all(output >= -1) and np.all(output <= 1)
1936 | diff --git a/rl_coach/tests/graph_managers/test_basic_rl_graph_manager.py b/rl_coach/tests/graph_managers/test_basic_rl_graph_manager.py
1937 | index 4e30312..c10a99f 100644
1938 | --- a/rl_coach/tests/graph_managers/test_basic_rl_graph_manager.py
1939 | +++ b/rl_coach/tests/graph_managers/test_basic_rl_graph_manager.py
1940 | @@ -14,7 +14,7 @@ logging.set_verbosity(logging.INFO)
1941 |  
1942 |  @pytest.mark.unit_test
1943 |  def test_basic_rl_graph_manager_with_pong_a3c():
1944 | -    tf.reset_default_graph()
1945 | +    tf.compat.v1.reset_default_graph()
1946 |      from rl_coach.presets.Atari_A3C import graph_manager
1947 |      assert graph_manager
1948 |      graph_manager.env_params.level = "PongDeterministic-v4"
1949 | @@ -25,7 +25,7 @@ def test_basic_rl_graph_manager_with_pong_a3c():
1950 |  
1951 |  @pytest.mark.unit_test
1952 |  def test_basic_rl_graph_manager_with_pong_nec():
1953 | -    tf.reset_default_graph()
1954 | +    tf.compat.v1.reset_default_graph()
1955 |      from rl_coach.presets.Atari_NEC import graph_manager
1956 |      assert graph_manager
1957 |      graph_manager.env_params.level = "PongDeterministic-v4"
1958 | @@ -36,7 +36,7 @@ def test_basic_rl_graph_manager_with_pong_nec():
1959 |  
1960 |  @pytest.mark.unit_test
1961 |  def test_basic_rl_graph_manager_with_cartpole_dqn():
1962 | -    tf.reset_default_graph()
1963 | +    tf.compat.v1.reset_default_graph()
1964 |      from rl_coach.presets.CartPole_DQN import graph_manager
1965 |      assert graph_manager
1966 |      graph_manager.create_graph(task_parameters=TaskParameters(framework_type=Frameworks.tensorflow,
1967 | @@ -46,7 +46,7 @@ def test_basic_rl_graph_manager_with_cartpole_dqn():
1968 |  # Test for identifying memory leak in restore_checkpoint
1969 |  @pytest.mark.unit_test
1970 |  def test_basic_rl_graph_manager_with_cartpole_dqn_and_repeated_checkpoint_restore():
1971 | -    tf.reset_default_graph()
1972 | +    tf.compat.v1.reset_default_graph()
1973 |      from rl_coach.presets.CartPole_DQN import graph_manager
1974 |      assert graph_manager
1975 |      graph_manager.create_graph(task_parameters=TaskParameters(framework_type=Frameworks.tensorflow,
1976 | diff --git a/rl_coach/tests/memories/test_differential_neural_dictionary.py b/rl_coach/tests/memories/test_differential_neural_dictionary.py
1977 | index 461b4e5..eb1a0be 100644
1978 | --- a/rl_coach/tests/memories/test_differential_neural_dictionary.py
1979 | +++ b/rl_coach/tests/memories/test_differential_neural_dictionary.py
1980 | @@ -43,8 +43,8 @@ def test_random_sample_from_dnd(dnd: QDND):
1981 |      # calculate_normalization_factor
1982 |      sampled_embeddings = dnd.sample_embeddings(NUM_SAMPLED_EMBEDDINGS)
1983 |      coefficient = 1/(NUM_SAMPLED_EMBEDDINGS * (NUM_SAMPLED_EMBEDDINGS - 1.0))
1984 | -    tf_current_embedding = tf.placeholder(tf.float32, shape=(EMBEDDING_SIZE), name='current_embedding')
1985 | -    tf_other_embeddings = tf.placeholder(tf.float32, shape=(NUM_SAMPLED_EMBEDDINGS - 1, EMBEDDING_SIZE), name='other_embeddings')
1986 | +    tf_current_embedding = tf.compat.v1.placeholder(tf.float32, shape=(EMBEDDING_SIZE), name='current_embedding')
1987 | +    tf_other_embeddings = tf.compat.v1.placeholder(tf.float32, shape=(NUM_SAMPLED_EMBEDDINGS - 1, EMBEDDING_SIZE), name='other_embeddings')
1988 |  
1989 |      sub = tf_current_embedding - tf_other_embeddings
1990 |      square = tf.square(sub)
1991 | @@ -55,7 +55,7 @@ def test_random_sample_from_dnd(dnd: QDND):
1992 |      ###########################
1993 |      # more efficient method
1994 |      ###########################
1995 | -    sampled_embeddings_expanded = tf.placeholder(
1996 | +    sampled_embeddings_expanded = tf.compat.v1.placeholder(
1997 |          tf.float32, shape=(1, NUM_SAMPLED_EMBEDDINGS, EMBEDDING_SIZE), name='sampled_embeddings_expanded')
1998 |      sampled_embeddings_tiled = tf.tile(sampled_embeddings_expanded, (sampled_embeddings_expanded.shape[1], 1, 1))
1999 |      sampled_embeddings_transposed = tf.transpose(sampled_embeddings_tiled, (1, 0, 2))
2000 | @@ -63,11 +63,11 @@ def test_random_sample_from_dnd(dnd: QDND):
2001 |      square2 = tf.square(sub2)
2002 |      result2 = tf.reduce_sum(square2)
2003 |  
2004 | -    config = tf.ConfigProto()
2005 | +    config = tf.compat.v1.ConfigProto()
2006 |      config.allow_soft_placement = True  # allow placing ops on cpu if they are not fit for gpu
2007 |      config.gpu_options.allow_growth = True  # allow the gpu memory allocated for the worker to grow if needed
2008 |  
2009 | -    sess = tf.Session(config=config)
2010 | +    sess = tf.compat.v1.Session(config=config)
2011 |  
2012 |      sum1 = 0
2013 |      start = time.time()
2014 | diff --git a/rl_coach/tests/test_global_variable_saver.py b/rl_coach/tests/test_global_variable_saver.py
2015 | index 19da034..47e3f23 100644
2016 | --- a/rl_coach/tests/test_global_variable_saver.py
2017 | +++ b/rl_coach/tests/test_global_variable_saver.py
2018 | @@ -19,7 +19,7 @@ def name():
2019 |  
2020 |  @pytest.fixture
2021 |  def variable(shape, name):
2022 | -    tf.reset_default_graph()
2023 | +    tf.compat.v1.reset_default_graph()
2024 |      return tf.Variable(tf.zeros(shape), name=name)
2025 |  
2026 |  
2027 | @@ -36,8 +36,8 @@ def assert_arrays_ones_shape(arrays, shape, name):
2028 |  
2029 |  @pytest.mark.unit_test
2030 |  def test_global_variable_saver_to_arrays(variable, name, shape):
2031 | -    with tf.Session() as session:
2032 | -        session.run(tf.global_variables_initializer())
2033 | +    with tf.compat.v1.Session() as session:
2034 | +        session.run(tf.compat.v1.global_variables_initializer())
2035 |          session.run(variable.assign(tf.ones(shape)))
2036 |  
2037 |          saver = GlobalVariableSaver("name")
2038 | @@ -47,8 +47,8 @@ def test_global_variable_saver_to_arrays(variable, name, shape):
2039 |  
2040 |  @pytest.mark.unit_test
2041 |  def test_global_variable_saver_from_arrays(variable, name, shape):
2042 | -    with tf.Session() as session:
2043 | -        session.run(tf.global_variables_initializer())
2044 | +    with tf.compat.v1.Session() as session:
2045 | +        session.run(tf.compat.v1.global_variables_initializer())
2046 |  
2047 |          saver = GlobalVariableSaver("name")
2048 |          saver.from_arrays(session, {name: np.ones(shape)})
2049 | @@ -58,8 +58,8 @@ def test_global_variable_saver_from_arrays(variable, name, shape):
2050 |  
2051 |  @pytest.mark.unit_test
2052 |  def test_global_variable_saver_to_string(variable, name, shape):
2053 | -    with tf.Session() as session:
2054 | -        session.run(tf.global_variables_initializer())
2055 | +    with tf.compat.v1.Session() as session:
2056 | +        session.run(tf.compat.v1.global_variables_initializer())
2057 |          session.run(variable.assign(tf.ones(shape)))
2058 |  
2059 |          saver = GlobalVariableSaver("name")
2060 | @@ -70,8 +70,8 @@ def test_global_variable_saver_to_string(variable, name, shape):
2061 |  
2062 |  @pytest.mark.unit_test
2063 |  def test_global_variable_saver_from_string(variable, name, shape):
2064 | -    with tf.Session() as session:
2065 | -        session.run(tf.global_variables_initializer())
2066 | +    with tf.compat.v1.Session() as session:
2067 | +        session.run(tf.compat.v1.global_variables_initializer())
2068 |  
2069 |          saver = GlobalVariableSaver("name")
2070 |          saver.from_string(session, pickle.dumps({name: np.ones(shape)}, protocol=-1))
2071 | 


--------------------------------------------------------------------------------
/files/start.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ $1 == 'train' ]
 4 | then
 5 |     # Remove all nvidia gl libraries if they exists to run training in SageMaker.
 6 |     rm -rf /usr/local/nvidia/lib/libGL*
 7 |     rm -rf /usr/local/nvidia/lib/libEGL*
 8 |     rm -rf /usr/local/nvidia/lib/libOpenGL*
 9 |     rm -rf /usr/local/nvidia/lib64/libGL*
10 |     rm -rf /usr/local/nvidia/lib64/libEGL*
11 |     rm -rf /usr/local/nvidia/lib64/libOpenGL*
12 | 
13 |     CURRENT_HOST=$(jq .current_host  /opt/ml/input/config/resourceconfig.json)
14 | 
15 |     sed -ie "s/PLACEHOLDER_HOSTNAME/$CURRENT_HOST/g" /changehostname.c
16 | 
17 |     gcc -o /changehostname.o -c -fPIC -Wall /changehostname.c
18 |     gcc -o /libchangehostname.so -shared -export-dynamic /changehostname.o -ldl
19 |     redis-server /etc/redis/redis.conf &
20 |     LD_PRELOAD=/libchangehostname.so xvfb-run --auto-servernum -s "-screen 0 1024x768x16" train
21 | elif [ $1 == 'serve' ]
22 | then
23 |     serve
24 | fi
25 | 


--------------------------------------------------------------------------------
/push.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | trap ctrl_c INT
 3 | 
 4 | function ctrl_c() {
 5 |         echo "Requested to stop."
 6 |         exit 1
 7 | }
 8 | 
 9 | PREFIX="local"
10 | VERSION=$(cat VERSION)
11 | 
12 | ARCH="cpu gpu cpu-intel"
13 | 
14 | while getopts "p:a:" opt; do
15 | case $opt in
16 | p) PREFIX="$OPTARG"
17 | ;;
18 | a) ARCH="$OPTARG"
19 | ;;
20 | \?) echo "Invalid option -$OPTARG" >&2
21 | exit 1
22 | ;;
23 | esac
24 | done
25 | 
26 | echo "Pushing docker images for [$ARCH]"
27 | 
28 | for A in $ARCH; do
29 |   echo "Pushing $PREFIX/deepracer-sagemaker:$VERSION-$A"
30 |   docker push $PREFIX/deepracer-sagemaker:$VERSION-$A
31 | done
32 | 


--------------------------------------------------------------------------------