├── .gitignore ├── README.rst ├── environment.yml ├── examples ├── .rayproject │ ├── cluster.yaml │ ├── project.yaml │ └── requirements.txt ├── README.rst ├── accelerate_pandas.ipynb ├── map_reduce.ipynb ├── news_recommendation_model.ipynb ├── news_recommendation_serving.ipynb ├── newsreader_1.png ├── newsreader_2.png ├── newsreader_3.png ├── newsreader_4.png ├── newsreader_5.png └── sharded_parameter_server.ipynb ├── exercises ├── colab01-03.ipynb ├── colab04-05.ipynb ├── colab06-07.ipynb ├── exercise01-Introduction.ipynb ├── exercise02-Task_Dependencies.ipynb ├── exercise03-Nested_Remote_Functions.ipynb ├── exercise04-Actors.ipynb ├── exercise05-Actor_Handles.ipynb ├── exercise06-Wait.ipynb ├── exercise07-Ordered_Wait.ipynb ├── exercise08-Serialization.ipynb ├── exercise09-GPUs.ipynb ├── exercise10-Custom_Resources.ipynb ├── exercise11-TensorFlow.ipynb └── exercise12-TreeReduce.ipynb ├── postBuild ├── requirements.in ├── requirements.txt ├── rllib_exercises ├── client.png ├── dqn.png ├── learning.png ├── log.png ├── ppo.png ├── rllib_colab.ipynb ├── rllib_exercise01_mdp.ipynb ├── rllib_exercise02_ppo.ipynb ├── rllib_exercise03_custom_env.ipynb ├── rllib_exercise04_serving.ipynb ├── serving │ ├── data_large.gz │ ├── data_small.gz │ ├── do_rollouts.py │ ├── javascript-pong │ │ └── static │ │ │ ├── game.js │ │ │ ├── images │ │ │ ├── press1.png │ │ │ ├── press2.png │ │ │ └── winner.png │ │ │ ├── index.html │ │ │ ├── pong.css │ │ │ ├── pong.js │ │ │ └── sounds │ │ │ ├── goal.wav │ │ │ ├── ping.wav │ │ │ ├── pong.wav │ │ │ └── wall.wav │ ├── pong_py │ │ ├── pong_py.egg-info │ │ │ ├── PKG-INFO │ │ │ ├── SOURCES.txt │ │ │ ├── dependency_links.txt │ │ │ └── top_level.txt │ │ ├── pong_py │ │ │ ├── __init__.py │ │ │ ├── ball.py │ │ │ ├── helper.py │ │ │ ├── paddle.py │ │ │ └── pongjsenv.py │ │ └── setup.py │ ├── pong_web_server.py │ └── simple_policy_server.py ├── test_exercises.py └── web.png ├── solutions ├── colab01-03_solution.ipynb ├── colab04-05_solution.ipynb └── colab06-07_solution.ipynb ├── tune_exercises ├── README.rst ├── _old_tutorial │ ├── Tutorial-Answers.ipynb │ ├── Tutorial.ipynb │ ├── cnn.png │ ├── helper.py │ ├── input.html │ ├── input_final.html │ ├── mnist.png │ ├── model.py │ └── tune.png ├── exercise_1_basics.ipynb ├── exercise_2_optimize.ipynb ├── exercise_3_pbt.ipynb ├── helper.py ├── pbt.png ├── tune-arch-simple.png └── tune.png ├── tutorialextension.py └── utilities ├── javascript-pong └── static │ ├── game.js │ ├── images │ ├── press1.png │ ├── press2.png │ └── winner.png │ ├── index.html │ ├── pong.css │ ├── pong.js │ └── sounds │ ├── goal.wav │ ├── ping.wav │ ├── pong.wav │ └── wall.wav ├── patch.py └── pong_py ├── pong_py ├── __init__.py ├── ball.py ├── helper.py ├── paddle.py └── pongjsenv.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .ipynb_checkpoints 3 | *.h5 4 | .DS_Store 5 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Ray Tutorial 2 | ============ 3 | 4 | **NOTE**: These sets of tutorials have been **deprecated**. A portion of their modules have 5 | been incorporated into the new Anyscale Academy tutorials at https://github.com/anyscale/academy. 6 | 7 | 8 | Try Ray on Google Colab 9 | ----------------------- 10 | 11 | Try the Ray tutorials online using Google Colab: 12 | 13 | - `Remote Functions`_ 14 | - `Remote Actors`_ 15 | - `In-Order Task Processing`_ 16 | - `Reinforcement Learning with RLlib`_ 17 | 18 | .. _`Remote Functions`: https://colab.research.google.com/github/ray-project/tutorial/blob/master/exercises/colab01-03.ipynb 19 | .. _`Remote Actors`: https://colab.research.google.com/github/ray-project/tutorial/blob/master/exercises/colab04-05.ipynb 20 | .. _`In-Order Task Processing`: https://colab.research.google.com/github/ray-project/tutorial/blob/master/exercises/colab06-07.ipynb 21 | .. _`Reinforcement Learning with RLlib`: https://colab.research.google.com/github/ray-project/tutorial/blob/master/rllib_exercises/rllib_colab.ipynb 22 | 23 | Try Tune on Google Colab 24 | ------------------------ 25 | 26 | Tuning hyperparameters is often the most expensive part of the machine learning workflow. `Ray Tune `_ is built to address this, demonstrating an efficient and scalable solution for this pain point. 27 | 28 | `Exercise 1 `_ covers basics of using Tune - creating your first training function and using Tune. This tutorial uses Keras. 29 | 30 | .. raw:: html 31 | 32 | 33 | Tune Tutorial 34 | 35 | 36 | `Exercise 2 `_ covers Search algorithms and Trial Schedulers. This tutorial uses PyTorch. 37 | 38 | .. raw:: html 39 | 40 | 41 | Tune Tutorial 42 | 43 | 44 | `Exercise 3 `_ covers using Population-Based Training (PBT) and uses the advanced Trainable API with save and restore functions and checkpointing. 45 | 46 | .. raw:: html 47 | 48 | 49 | Tune Tutorial 50 | 51 | 52 | Try Ray on Binder 53 | ----------------- 54 | 55 | Try the Ray tutorials online on `Binder`_. Note that Binder will use very small 56 | machines, so the degree of parallelism will be limited. 57 | 58 | .. _`Binder`: https://mybinder.org/v2/gh/ray-project/tutorial/master?urlpath=lab 59 | 60 | Local Setup 61 | ----------- 62 | 63 | 1. Make sure you have Python installed (we recommend using the `Anaconda Python 64 | distribution`_). Ray works with both Python 2 and Python 3. If you are unsure 65 | which to use, then use Python 3. 66 | 67 | **If not using conda**, continue to step 2. 68 | 69 | **If using conda**, you can then run the following commands and skip the next 4 steps: 70 | 71 | .. code-block:: bash 72 | 73 | git clone https://github.com/ray-project/tutorial 74 | cd tutorial 75 | conda env create -f environment.yml 76 | conda activate ray-tutorial 77 | 78 | 79 | 2. **Install Jupyter** with ``pip install jupyter``. Verify that you can start 80 | Jupyter lab with the command ``jupyter-lab`` or ``jupyter-notebook``. 81 | 82 | 3. **Install Ray** by running ``pip install -U ray``. Verify that you can run 83 | 84 | .. code-block:: bash 85 | 86 | import ray 87 | ray.init() 88 | 89 | in a Python interpreter. 90 | 91 | 4. Clone the tutorial repository with 92 | 93 | .. code-block:: bash 94 | 95 | git clone https://github.com/ray-project/tutorial.git 96 | 97 | 5. Install the additional dependencies. 98 | 99 | Either install them from the given requirements.txt 100 | 101 | .. code-block:: bash 102 | pip install -r requirements.txt 103 | 104 | Or install them manually 105 | 106 | .. code-block:: bash 107 | 108 | pip install modin 109 | pip install tensorflow 110 | pip install gym 111 | pip install scipy 112 | pip install opencv-python 113 | pip install bokeh 114 | pip install ipywidgets==6.0.0 115 | pip install keras 116 | 117 | Verify that you can run ``import tensorflow`` and ``import gym`` in a Python 118 | interpreter. 119 | 120 | **Note:** If you have trouble installing these Python modules, note that 121 | almost all of the exercises can be done without them. 122 | 123 | 6. If you want to run the pong exercise (in `rl_exercises/rl_exercise05.ipynb`), 124 | you will need to do `pip install utilities/pong_py`. 125 | 126 | Exercises 127 | --------- 128 | 129 | Each file ``exercises/exercise*.ipynb`` is a separate exercise. They can be 130 | opened in Jupyter lab by running the following commands. 131 | 132 | .. code-block:: bash 133 | 134 | cd tutorial/exercises 135 | jupyter-lab 136 | 137 | If you don't have `jupyter-lab`, try `jupyter-notebook`. If it asks for a password, just hit enter. 138 | 139 | Instructions are written in each file. To do each exercise, first run all of 140 | the cells in Jupyter lab. Then modify the ones that need to be modified 141 | in order to prevent any exceptions from being raised. Throughout these 142 | exercises, you may find the `Ray documentation`_ helpful. 143 | 144 | **Exercise 1:** Define a remote function, and execute multiple remote functions 145 | in parallel. 146 | 147 | **Exercise 2:** Execute remote functions in parallel with some dependencies. 148 | 149 | **Exercise 3:** Call remote functions from within remote functions. 150 | 151 | **Exercise 4:** Use actors to share state between tasks. See the documentation 152 | on `using actors`_. 153 | 154 | **Exercise 5:** Pass actor handles to tasks so that multiple tasks can invoke 155 | methods on the same actor. 156 | 157 | **Exercise 6:** Use ``ray.wait`` to ignore stragglers. See the 158 | `documentation for wait`_. 159 | 160 | **Exercise 7:** Use ``ray.wait`` to process tasks in the order that they finish. 161 | See the `documentation for wait`_. 162 | 163 | **Exercise 8:** Use ``ray.put`` to avoid serializing and copying the same 164 | object into shared memory multiple times. 165 | 166 | **Exercise 9:** Specify that an actor requires some GPUs. For a complete 167 | example that does something similar, you may want to see the `ResNet example`_. 168 | 169 | **Exercise 10:** Specify that a remote function requires certain custom 170 | resources. See the documentation on `custom resources`_. 171 | 172 | **Exercise 11:** Extract neural network weights from an actor on one process, 173 | and set them in another actor. You may want to read the documentation on 174 | `using Ray with TensorFlow`_. 175 | 176 | **Exercise 12:** Pass object IDs into tasks to construct dependencies between 177 | tasks and perform a tree reduce. 178 | 179 | .. _`Anaconda Python distribution`: https://www.continuum.io/downloads 180 | .. _`Ray documentation`: https://ray.readthedocs.io/en/latest/?badge=latest 181 | .. _`documentation for wait`: https://ray.readthedocs.io/en/latest/api.html#ray.wait 182 | .. _`using actors`: https://ray.readthedocs.io/en/latest/actors.html 183 | .. _`using Ray with TensorFlow`: https://ray.readthedocs.io/en/latest/using-ray-with-tensorflow.html 184 | .. _`ResNet example`: https://ray.readthedocs.io/en/latest/example-resnet.html 185 | .. _`custom resources`: https://ray.readthedocs.io/en/latest/resources.html#custom-resources 186 | 187 | 188 | More In-Depth Examples 189 | ---------------------- 190 | 191 | **Sharded Parameter Server:** This exercise involves implementing a parameter 192 | server as a Ray actor, implementing a simple asynchronous distributed training 193 | algorithm, and sharding the parameter server to improve throughput. 194 | 195 | **Speed Up Pandas:** This exercise involves using `Modin`_ to speed up your 196 | pandas workloads. 197 | 198 | **MapReduce:** This exercise shows how to implement a toy version of the 199 | MapReduce system on top of Ray. 200 | 201 | .. _`Modin`: https://modin.readthedocs.io/en/latest/ 202 | 203 | RL Exercises 204 | ------------ 205 | 206 | The exercises in ``rl_exercises/rl_exercise*.ipynb`` should be done in order. 207 | They can be opened in Jupyter lab by running the following commands. 208 | 209 | .. code-block:: bash 210 | 211 | cd tutorial/rl_exercises 212 | jupyter-lab 213 | 214 | **Exercise 1:** Introduction to Markov Decision Processes. 215 | 216 | **Exercise 2:** Derivative free optimization. 217 | 218 | **Exercise 3:** Introduction to proximal policy optimization (PPO). 219 | 220 | **Exercise 4:** Introduction to asynchronous advantage actor-critic (A3C). 221 | 222 | **Exercise 5:** Train a policy to play pong using RLlib. Deploy it using actors, 223 | and play against the trained policy. 224 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: ray-tutorial 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.6 6 | - bokeh 7 | - ipywidgets=6.0.0 8 | - tensorflow==1.14.0 9 | - psutil 10 | - nbserverproxy 11 | - flask 12 | - flask-cors 13 | - pip: 14 | - ray[rllib]==0.7.4 15 | - keras 16 | - modin 17 | - matplotlib 18 | - bs4 19 | - lxml 20 | - scikit-learn 21 | - setproctitle 22 | - spacy 23 | - wikipedia 24 | - atoma 25 | -------------------------------------------------------------------------------- /examples/.rayproject/cluster.yaml: -------------------------------------------------------------------------------- 1 | # This file is generated by `ray project create`. 2 | 3 | # A unique identifier for the head node and workers of this cluster. 4 | cluster_name: ray-tutorial-examples 5 | 6 | # The maximum number of workers nodes to launch in addition to the head 7 | # node. This takes precedence over min_workers. min_workers defaults to 0. 8 | max_workers: 1 9 | 10 | # Cloud-provider specific configuration. 11 | provider: 12 | type: aws 13 | region: us-west-2 14 | availability_zone: us-west-2a 15 | 16 | # How Ray will authenticate with newly launched nodes. 17 | auth: 18 | ssh_user: ubuntu 19 | -------------------------------------------------------------------------------- /examples/.rayproject/project.yaml: -------------------------------------------------------------------------------- 1 | # This file is generated by `ray project create`. 2 | 3 | name: ray-tutorial-examples 4 | 5 | description: "The Ray tutorial examples" 6 | tags: ["ray-tutorial", "examples", "newsreader", "map-reduce", "modin"] 7 | 8 | cluster: .rayproject/cluster.yaml 9 | 10 | environment: 11 | requirements: .rayproject/requirements.txt 12 | 13 | commands: 14 | - name: tutorial 15 | help: "Start a jupyter notebook with the tutorials" 16 | command: jupyter notebook --port 8889 17 | config: 18 | port_forward: 8889 19 | 20 | # Save the logs from the last run 21 | output_files: [ 22 | "/tmp/ray/session_latest/logs", 23 | ] 24 | -------------------------------------------------------------------------------- /examples/.rayproject/requirements.txt: -------------------------------------------------------------------------------- 1 | ray[debug]==0.7.3 2 | modin[ray] 3 | atoma 4 | -------------------------------------------------------------------------------- /examples/README.rst: -------------------------------------------------------------------------------- 1 | Running Ray Tutorial Examples with Projects 2 | =========================================== 3 | 4 | You can run the tutorial examples with Ray projects. To do so 5 | you need to run the following commands, the second one in this 6 | directory: 7 | 8 | .. code-block:: bash 9 | 10 | pip install any 11 | any project create 12 | 13 | 14 | Starting the Tutorial 15 | --------------------- 16 | 17 | You can start the tutorial with 18 | 19 | .. code-block:: bash 20 | 21 | any session start -y tutorial 22 | 23 | 24 | After the session is started, it will print an URL like 25 | 26 | .. cod-block:: 27 | 28 | Copy/paste this URL into your browser when you connect for the first time, 29 | to login with a token: 30 | http://localhost:8889/?token=0a30613eb5e22c2e30ab76811c2a23551c1535c3a10ba971&token=0a30613eb5e22c2e30ab76811c2a23551c1535c3a10ba971 31 | 32 | 33 | that you can use to access the notebook. 34 | 35 | 36 | Stopping the Tutorial 37 | --------------------- 38 | 39 | The tutorial session can be stopped with 40 | 41 | .. code-block:: bash 42 | 43 | any session stop 44 | -------------------------------------------------------------------------------- /examples/accelerate_pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Modin (Pandas on Ray)\n", 8 | "\n", 9 | "**GOAL:** Learn to increase the speed of Pandas workflows by changing a single line of code.\n", 10 | "\n", 11 | "[Modin](https://modin.readthedocs.io/en/latest/?badge=latest) (Pandas on Ray) is a project aimed at speeding up Pandas using Ray.\n", 12 | "\n", 13 | "### Using Modin\n", 14 | "\n", 15 | "To use Modin, only a single line of code must be changed.\n", 16 | "\n", 17 | "Simply change:\n", 18 | "```python\n", 19 | "import pandas as pd\n", 20 | "```\n", 21 | "to\n", 22 | "```python\n", 23 | "import modin.pandas as pd\n", 24 | "```\n", 25 | "\n", 26 | "Changing this line of code will allow you to use all of the cores in your machine to do computation on your data. One of the major performance bottlenecks of Pandas is that it only uses a single core for any given computation. **Modin** exposes an API that is identical to Pandas, allowing you to continue interacting with your data as you would with Pandas. **There are no additional commands required to use Modin locally.** Partitioning, scheduling, data transfer, and other related concerns are all handled by **Modin** and **Ray** under the hood.\n", 27 | "\n", 28 | "### Concept for Exercise: DataFrame Constructor\n", 29 | "\n", 30 | "Often when playing around in Pandas, it is useful to create a DataFrame with the constructor. That is where we will start.\n", 31 | "\n", 32 | "```python\n", 33 | "import numpy as np\n", 34 | "import pandas as pd\n", 35 | "\n", 36 | "frame_data = np.random.randint(0, 100, size=(2**10, 2**5))\n", 37 | "df = pd.DataFrame(frame_data)\n", 38 | "```\n", 39 | "\n", 40 | "The above code creates a Pandas DataFrame full of random integers with 1024 rows and 128 columns." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "from __future__ import absolute_import\n", 50 | "from __future__ import division\n", 51 | "from __future__ import print_function\n", 52 | "\n", 53 | "import numpy as np\n", 54 | "import pandas\n", 55 | "import subprocess\n", 56 | "import sys" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "**EXERCISE:** Modify the code below to make the dataframe a `modin.pandas` DataFrame (remember the line of code to change)." 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "# Implement your answer here. You are also free to play with the size\n", 73 | "# and shape of the DataFrame, but beware of exceeding your memory!\n", 74 | "\n", 75 | "import pandas as pd\n", 76 | "\n", 77 | "frame_data = np.random.randint(0, 100, size=(2**10, 2**5))\n", 78 | "df = pd.DataFrame(frame_data)\n", 79 | "\n", 80 | "# ***** Do not change the code below! It verifies that \n", 81 | "# ***** the exercise has been done correctly. *****\n", 82 | "\n", 83 | "try:\n", 84 | " assert df is not None\n", 85 | " assert frame_data is not None\n", 86 | " assert isinstance(frame_data, np.ndarray)\n", 87 | "except:\n", 88 | " raise AssertionError('Don\\'t change too much of the original code!')\n", 89 | "assert 'modin.pandas' in sys.modules, 'Not quite correct. Remember the single line of code change (See above)'\n", 90 | "assert hasattr(df, '_query_compiler'), 'Make sure that df is a modin.pandas DataFrame.'\n", 91 | "\n", 92 | "print(\"Success! You only need to change one line of code!\")" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "Now that we have created a toy example for playing around with the DataFrame, let's print it out in different ways.\n", 100 | "\n", 101 | "### Concept for Exercise: Data Interaction and Printing\n", 102 | "\n", 103 | "When interacting with data, it is very imporant to look at different parts of the data (e.g. `df.head()`). Here we will show that you can print the `modin.pandas` DataFrame in the same ways you would Pandas." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "# Print the first 10 lines.\n", 113 | "df.head(10)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "# Print the DataFrame.\n", 123 | "df" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "# Free cell for custom interaction (Play around here!)\n" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "`modin.pandas` is using all of the cores in your machine to read the CSV file much faster than Pandas!" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "### Concept for Exercise: Identical API\n", 147 | "\n", 148 | "As previously mentioned, `modin.pandas` has an identical API to Pandas. In this section, we will go over some examples of how you can use `modin.pandas` to interact with your data in the same way you would with Pandas.\n", 149 | "\n", 150 | "**Note: `modin.pandas` does not yet have 100% of the Pandas API fully implemented or optimized. Some parameters are not implemented for some methods and some of the more obscure methods are not yet implemented. We are continuing to work toward 100% API coverage.**\n", 151 | "\n", 152 | "For a full list of implemented methods, visit the [Modin documentation](https://modin.readthedocs.io/en/latest/pandas_supported.html)." 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "df.describe()" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "# Transpose the data\n", 171 | "df.T" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "# Create a new column the same as Pandas\n", 181 | "df['New Column'] = np.nan" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "df" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "df.columns" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "# Delete the first column\n", 209 | "del df[df.columns[0]]" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "df.columns" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "# Some operation are not yet optimized, but they are implemented.\n", 228 | "df.fillna(value=0, axis=0, limit=100)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "# Some operations are not yet implemented, they will throw this error.\n", 238 | "df.kurtosis()" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "# Free cell for custom interaction (Play around here!).\n" 248 | ] 249 | } 250 | ], 251 | "metadata": { 252 | "kernelspec": { 253 | "display_name": "Python 3", 254 | "language": "python", 255 | "name": "python3" 256 | }, 257 | "language_info": { 258 | "codemirror_mode": { 259 | "name": "ipython", 260 | "version": 3 261 | }, 262 | "file_extension": ".py", 263 | "mimetype": "text/x-python", 264 | "name": "python", 265 | "nbconvert_exporter": "python", 266 | "pygments_lexer": "ipython3", 267 | "version": "3.6.4" 268 | } 269 | }, 270 | "nbformat": 4, 271 | "nbformat_minor": 2 272 | } 273 | -------------------------------------------------------------------------------- /examples/news_recommendation_serving.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# News recommendation serving\n", 8 | "We will now develop our news recommendation training example into a simple news reader application. We will use an existing web-frontend https://github.com/saqueib/qreader and develop a flask based backend for it, which uses Ray actors to serve news and our news recommendation model.\n", 9 | "\n", 10 | "### Implementing the backend with a Ray actor\n", 11 | "\n", 12 | "" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "from __future__ import absolute_import\n", 22 | "from __future__ import division\n", 23 | "from __future__ import print_function\n", 24 | "\n", 25 | "import atoma\n", 26 | "from flask import Flask, jsonify, request\n", 27 | "from flask_cors import CORS\n", 28 | "import os\n", 29 | "import requests\n", 30 | "import ray" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "ray.init(num_cpus=2)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "We define an actor `NewsServer`, which is responsible for parsing an RSS feed and extracting the news items so they can be sent to the frontend. It also has a method `like_item`, which is called whenever the user \"likes\" and article. Note that this is a toy example, but in a more realistic applications, we could have a number of these actors, for example one for each user, to distribute the load." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "@ray.remote\n", 56 | "class NewsServer(object):\n", 57 | "\n", 58 | " def __init__(self):\n", 59 | " pass\n", 60 | "\n", 61 | " def retrieve_feed(self, url):\n", 62 | " response = requests.get(url)\n", 63 | " # Sometimes there are parsing errors \n", 64 | " feed = atoma.parse_rss_bytes(response.content)\n", 65 | " items = []\n", 66 | " for item in feed.items:\n", 67 | " color = \"#FFFFFF\" # white\n", 68 | " items.append({\"title\": item.title,\n", 69 | " \"link\": item.link,\n", 70 | " \"description\": item.description,\n", 71 | " \"description_text\": item.description,\n", 72 | " \"pubDate\": str(item.pub_date),\n", 73 | " \"color\": color})\n", 74 | "\n", 75 | " return {\"channel\": {\"title\": feed.title,\n", 76 | " \"link\": feed.link,\n", 77 | " \"url\": feed.link},\n", 78 | " \"items\": items}\n", 79 | "\n", 80 | " def like_item(self, url, is_faved):\n", 81 | " if is_faved:\n", 82 | " print(\"url {} has been favorited\".format(url))\n", 83 | " else:\n", 84 | " print(\"url {} has been defavorited\".format(url))" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "### Deploying the backend\n", 92 | "\n", 93 | "The following cell will set up a flask webserver that listens to commands from the frontend and dispatches them to the `NewsServer` actor." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "app = Flask(\"newsreader\")\n", 103 | "CORS(app)\n", 104 | "\n", 105 | "@app.route(\"/api\", methods=[\"POST\"])\n", 106 | "def dispatcher():\n", 107 | " req = request.get_json()\n", 108 | " method_name = req[\"method_name\"]\n", 109 | " method_args = req[\"method_args\"]\n", 110 | " if hasattr(dispatcher.server, method_name):\n", 111 | " method = getattr(dispatcher.server, method_name)\n", 112 | " # Doing a blocking ray.get right after submitting the task\n", 113 | " # might be bad for performance if the task is expensive.\n", 114 | " result = ray.get(method.remote(*method_args))\n", 115 | " return jsonify(result)\n", 116 | " else:\n", 117 | " return jsonify(\n", 118 | " {\"error\": \"method_name '\" + method_name + \"' not found\"})\n", 119 | "\n", 120 | "dispatcher.server = NewsServer.remote()\n", 121 | "app.run(host=\"0.0.0.0\")" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "To try out the backend, go to http://localhost:9000/. You can then click on the \"Add Channel\" button and enter the URL of a newsfeed, for example `http://news.ycombinator.com/rss`. Click on one of the star icons and observe how the information is propagated to the Ray actor (it will be printed in the above cell)." 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "### Serving the model\n", 136 | "\n", 137 | "\n", 138 | "\n", 139 | "We can now integrate the model we have trained in the `news_recommendation_training` example. **First you need to click Kernel/Restart & Clear Output** to prepare restarting the flask server. Then we import the needed modules and start Ray:" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "from __future__ import absolute_import\n", 149 | "from __future__ import division\n", 150 | "from __future__ import print_function\n", 151 | "\n", 152 | "import atoma\n", 153 | "from flask import Flask, jsonify, request\n", 154 | "from flask_cors import CORS\n", 155 | "import os\n", 156 | "import requests\n", 157 | "import ray" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "ray.init(num_cpus=2)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "If you already completed this example, put the name of the file that generated the best model below and evaluate the following cell:" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "best_result_path = \"\"\n", 183 | "import pickle\n", 184 | "with open(best_result_path, \"rb\") as f:\n", 185 | " pipeline = pickle.load(f)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "**EXERCISE**: Copy the `NewsServer` code from above into the cell below and change it so that `color` will be\n", 193 | "\n", 194 | "```python\n", 195 | "color = \"#FFD700\" # gold-ish yellow\n", 196 | "```\n", 197 | "\n", 198 | "if `pipeline.predict([item.title])` is `True` and\n", 199 | "\n", 200 | "```python\n", 201 | "color = \"#FFFFFF\" # white\n", 202 | "```\n", 203 | "\n", 204 | "otherwise." 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [ 213 | "# The new code for the NewsServer goes here" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "Now we can restart the flask server:" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "app = Flask(\"newsreader\")\n", 230 | "CORS(app)\n", 231 | "\n", 232 | "@app.route(\"/api\", methods=[\"POST\"])\n", 233 | "def dispatcher():\n", 234 | " req = request.get_json()\n", 235 | " method_name = req[\"method_name\"]\n", 236 | " method_args = req[\"method_args\"]\n", 237 | " if hasattr(dispatcher.server, method_name):\n", 238 | " method = getattr(dispatcher.server, method_name)\n", 239 | " # Doing a blocking ray.get right after submitting the task\n", 240 | " # might be bad for performance if the task is expensive.\n", 241 | " result = ray.get(method.remote(*method_args))\n", 242 | " return jsonify(result)\n", 243 | " else:\n", 244 | " return jsonify(\n", 245 | " {\"error\": \"method_name '\" + method_name + \"' not found\"})\n", 246 | "\n", 247 | "dispatcher.server = NewsServer.remote()\n", 248 | "app.run(host=\"0.0.0.0\")" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "To try out the backend with model serving integrated, go to http://localhost:9000/. You can again click on the \"Add Channel\" button and enter the URL of a newsfeed, for example http://news.ycombinator.com/rss. It will show recommended articles in yellow." 256 | ] 257 | } 258 | ], 259 | "metadata": { 260 | "kernelspec": { 261 | "display_name": "Python 3", 262 | "language": "python", 263 | "name": "python3" 264 | }, 265 | "language_info": { 266 | "codemirror_mode": { 267 | "name": "ipython", 268 | "version": 3 269 | }, 270 | "file_extension": ".py", 271 | "mimetype": "text/x-python", 272 | "name": "python", 273 | "nbconvert_exporter": "python", 274 | "pygments_lexer": "ipython3", 275 | "version": "3.6.0" 276 | } 277 | }, 278 | "nbformat": 4, 279 | "nbformat_minor": 2 280 | } 281 | -------------------------------------------------------------------------------- /examples/newsreader_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/examples/newsreader_1.png -------------------------------------------------------------------------------- /examples/newsreader_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/examples/newsreader_2.png -------------------------------------------------------------------------------- /examples/newsreader_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/examples/newsreader_3.png -------------------------------------------------------------------------------- /examples/newsreader_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/examples/newsreader_4.png -------------------------------------------------------------------------------- /examples/newsreader_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/examples/newsreader_5.png -------------------------------------------------------------------------------- /exercises/exercise01-Introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise 1 - Simple Data Parallel Example\n", 8 | "\n", 9 | "**GOAL:** The goal of this exercise is to show how to run simple tasks in parallel.\n", 10 | "\n", 11 | "This script is too slow, and the computation is embarrassingly parallel. In this exercise, you will use Ray to execute the functions in parallel to speed it up.\n", 12 | "\n", 13 | "### Concept for this Exercise - Remote Functions\n", 14 | "\n", 15 | "The standard way to turn a Python function into a remote function is to add the `@ray.remote` decorator. Here is an example.\n", 16 | "\n", 17 | "```python\n", 18 | "# A regular Python function.\n", 19 | "def regular_function():\n", 20 | " return 1\n", 21 | "\n", 22 | "# A Ray remote function.\n", 23 | "@ray.remote\n", 24 | "def remote_function():\n", 25 | " return 1\n", 26 | "```\n", 27 | "\n", 28 | "The differences are the following:\n", 29 | "\n", 30 | "1. **Invocation:** The regular version is called with `regular_function()`, whereas the remote version is called with `remote_function.remote()`.\n", 31 | "2. **Return values:** `regular_function` immediately executes and returns `1`, whereas `remote_function` immediately returns an object ID (a future) and then creates a task that will be executed on a worker process. The result can be obtained with `ray.get`.\n", 32 | " ```python\n", 33 | " >>> regular_function()\n", 34 | " 1\n", 35 | " \n", 36 | " >>> remote_function.remote()\n", 37 | " ObjectID(1c80d6937802cd7786ad25e50caf2f023c95e350)\n", 38 | " \n", 39 | " >>> ray.get(remote_function.remote())\n", 40 | " 1\n", 41 | " ```\n", 42 | "3. **Parallelism:** Invocations of `regular_function` happen **serially**, for example\n", 43 | " ```python\n", 44 | " # These happen serially.\n", 45 | " for _ in range(4):\n", 46 | " regular_function()\n", 47 | " ```\n", 48 | " whereas invocations of `remote_function` happen in **parallel**, for example\n", 49 | " ```python\n", 50 | " # These happen in parallel.\n", 51 | " for _ in range(4):\n", 52 | " remote_function.remote()\n", 53 | " ```" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "from __future__ import absolute_import\n", 63 | "from __future__ import division\n", 64 | "from __future__ import print_function\n", 65 | "\n", 66 | "import ray\n", 67 | "import time" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "Start Ray. By default, Ray does not schedule more tasks concurrently than there are CPUs. This example requires four tasks to run concurrently, so we tell Ray that there are four CPUs. Usually this is not done and Ray computes the number of CPUs using `psutil.cpu_count()`. The argument `ignore_reinit_error=True` just ignores errors if the cell is run multiple times.\n", 75 | "\n", 76 | "The call to `ray.init` starts a number of processes." 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "ray.init(num_cpus=4, ignore_reinit_error=True, include_webui=False)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "**EXERCISE:** The function below is slow. Turn it into a remote function using the `@ray.remote` decorator." 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "# This function is a proxy for a more interesting and computationally\n", 102 | "# intensive function.\n", 103 | "def slow_function(i):\n", 104 | " time.sleep(1)\n", 105 | " return i" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "**EXERCISE:** The loop below takes too long. The four function calls could be executed in parallel. Instead of four seconds, it should only take one second. Once `slow_function` has been made a remote function, execute these four tasks in parallel by calling `slow_function.remote()`. Then obtain the results by calling `ray.get` on a list of the resulting object IDs." 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "# Sleep a little to improve the accuracy of the timing measurements below.\n", 122 | "# We do this because workers may still be starting up in the background.\n", 123 | "time.sleep(2.0)\n", 124 | "start_time = time.time()\n", 125 | "\n", 126 | "results = [slow_function(i) for i in range(4)]\n", 127 | "\n", 128 | "end_time = time.time()\n", 129 | "duration = end_time - start_time\n", 130 | "\n", 131 | "print('The results are {}. This took {} seconds. Run the next cell to see '\n", 132 | " 'if the exercise was done correctly.'.format(results, duration))" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "**VERIFY:** Run some checks to verify that the changes you made to the code were correct. Some of the checks should fail when you initially run the cells. After completing the exercises, the checks should pass." 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "assert results == [0, 1, 2, 3], 'Did you remember to call ray.get?'\n", 149 | "assert duration < 1.1, ('The loop took {} seconds. This is too slow.'\n", 150 | " .format(duration))\n", 151 | "assert duration > 1, ('The loop took {} seconds. This is too fast.'\n", 152 | " .format(duration))\n", 153 | "\n", 154 | "print('Success! The example took {} seconds.'.format(duration))" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "**EXERCISE:** Use the UI to view the task timeline and to verify that the four tasks were executed in parallel. You can do this as follows.\n", 162 | "\n", 163 | "1. Run the following cell to generate a JSON file containing the profiling data.\n", 164 | "2. Download the timeline file by right clicking on `timeline01.json` in the navigator to the left and choosing **\"Download\"**.\n", 165 | "3. Open [chrome://tracing/](chrome://tracing/) in the Chrome web browser, click on the **\"Load\"** button and load the downloaded JSON file.\n", 166 | "\n", 167 | "To navigate within the timeline, do the following.\n", 168 | "- Move around by clicking and dragging.\n", 169 | "- Zoom in and out by holding **alt** and scrolling.\n", 170 | "\n", 171 | "**NOTE:** The timeline visualization will only work in **Chrome**." 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "ray.timeline(filename=\"timeline01.json\")" 181 | ] 182 | } 183 | ], 184 | "metadata": { 185 | "kernelspec": { 186 | "display_name": "Python 3", 187 | "language": "python", 188 | "name": "python3" 189 | }, 190 | "language_info": { 191 | "codemirror_mode": { 192 | "name": "ipython", 193 | "version": 3 194 | }, 195 | "file_extension": ".py", 196 | "mimetype": "text/x-python", 197 | "name": "python", 198 | "nbconvert_exporter": "python", 199 | "pygments_lexer": "ipython3", 200 | "version": "3.6.7" 201 | }, 202 | "toc": { 203 | "base_numbering": 1, 204 | "nav_menu": {}, 205 | "number_sections": false, 206 | "sideBar": true, 207 | "skip_h1_title": false, 208 | "title_cell": "Table of Contents", 209 | "title_sidebar": "Contents", 210 | "toc_cell": false, 211 | "toc_position": { 212 | "height": "calc(100% - 180px)", 213 | "left": "10px", 214 | "top": "150px", 215 | "width": "382.391px" 216 | }, 217 | "toc_section_display": true, 218 | "toc_window_display": true 219 | } 220 | }, 221 | "nbformat": 4, 222 | "nbformat_minor": 2 223 | } 224 | -------------------------------------------------------------------------------- /exercises/exercise04-Actors.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise 4 - Introducing Actors\n", 8 | "\n", 9 | "**Goal:** The goal of this exercise is to show how to create an actor and how to call actor methods.\n", 10 | "\n", 11 | "See the documentation on actors at http://ray.readthedocs.io/en/latest/actors.html.\n", 12 | "\n", 13 | "Sometimes you need a \"worker\" process to have \"state\". For example, that state might be a neural network, a simulator environment, a counter, or something else entirely. However, remote functions are side-effect free. That is, they operate on inputs and produce outputs, but they don't change the state of the worker they execute on.\n", 14 | "\n", 15 | "Actors are different. When we instantiate an actor, a brand new worker is created, and all methods that are called on that actor are executed on the newly created worker.\n", 16 | "\n", 17 | "This means that with a single actor, no parallelism can be achieved because calls to the actor's methods will be executed one at a time. However, multiple actors can be created and methods can be executed on them in parallel.\n", 18 | "\n", 19 | "### Concepts for this Exercise - Actors\n", 20 | "\n", 21 | "To create an actor, decorate Python class with the `@ray.remote` decorator.\n", 22 | "\n", 23 | "```python\n", 24 | "@ray.remote\n", 25 | "class Example(object):\n", 26 | " def __init__(self, x):\n", 27 | " self.x = x\n", 28 | " \n", 29 | " def set(self, x):\n", 30 | " self.x = x\n", 31 | " \n", 32 | " def get(self):\n", 33 | " return self.x\n", 34 | "```\n", 35 | "\n", 36 | "Like regular Python classes, **actors encapsulate state that is shared across actor method invocations**.\n", 37 | "\n", 38 | "Actor classes differ from regular Python classes in the following ways.\n", 39 | "1. **Instantiation:** A regular class would be instantiated via `e = Example(1)`. Actors are instantiated via\n", 40 | " ```python\n", 41 | " e = Example.remote(1)\n", 42 | " ```\n", 43 | " When an actor is instantiated, a **new worker process** is created by a local scheduler somewhere in the cluster.\n", 44 | "2. **Method Invocation:** Methods of a regular class would be invoked via `e.set(2)` or `e.get()`. Actor methods are invoked differently.\n", 45 | " ```python\n", 46 | " >>> e.set.remote(2)\n", 47 | " ObjectID(d966aa9b6486331dc2257522734a69ff603e5a1c)\n", 48 | " \n", 49 | " >>> e.get.remote()\n", 50 | " ObjectID(7c432c085864ed4c7c18cf112377a608676afbc3)\n", 51 | " ```\n", 52 | "3. **Return Values:** Actor methods are non-blocking. They immediately return an object ID and **they create a task which is scheduled on the actor worker**. The result can be retrieved with `ray.get`.\n", 53 | " ```python\n", 54 | " >>> ray.get(e.set.remote(2))\n", 55 | " None\n", 56 | " \n", 57 | " >>> ray.get(e.get.remote())\n", 58 | " 2\n", 59 | " ```" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "from __future__ import absolute_import\n", 69 | "from __future__ import division\n", 70 | "from __future__ import print_function\n", 71 | "\n", 72 | "import numpy as np\n", 73 | "import ray\n", 74 | "import time" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "ray.init(num_cpus=4, include_webui=False, ignore_reinit_error=True)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "**EXERCISE:** Change the `Foo` class to be an actor class by using the `@ray.remote` decorator." 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "class Foo(object):\n", 100 | " def __init__(self):\n", 101 | " self.counter = 0\n", 102 | "\n", 103 | " def reset(self):\n", 104 | " self.counter = 0\n", 105 | "\n", 106 | " def increment(self):\n", 107 | " time.sleep(0.5)\n", 108 | " self.counter += 1\n", 109 | " return self.counter\n", 110 | "\n", 111 | "assert hasattr(Foo, 'remote'), 'You need to turn \"Foo\" into an actor with @ray.remote.'" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "**EXERCISE:** Change the intantiations below to create two actors by calling `Foo.remote()`." 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "# Create two Foo objects.\n", 128 | "f1 = Foo()\n", 129 | "f2 = Foo()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "**EXERCISE:** Parallelize the code below. The two actors can execute methods in parallel (though each actor can only execute one method at a time)." 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "# Sleep a little to improve the accuracy of the timing measurements below.\n", 146 | "time.sleep(2.0)\n", 147 | "start_time = time.time()\n", 148 | "\n", 149 | "# Reset the actor state so that we can run this cell multiple times without\n", 150 | "# changing the results.\n", 151 | "f1.reset()\n", 152 | "f2.reset()\n", 153 | "\n", 154 | "# We want to parallelize this code. However, it is not straightforward to\n", 155 | "# make \"increment\" a remote function, because state is shared (the value of\n", 156 | "# \"self.counter\") between subsequent calls to \"increment\". In this case, it\n", 157 | "# makes sense to use actors.\n", 158 | "results = []\n", 159 | "for _ in range(5):\n", 160 | " results.append(f1.increment())\n", 161 | " results.append(f2.increment())\n", 162 | "\n", 163 | "end_time = time.time()\n", 164 | "duration = end_time - start_time\n", 165 | "\n", 166 | "assert not any([isinstance(result, ray.ObjectID) for result in results]), 'Looks like \"results\" is {}. You may have forgotten to call ray.get.'.format(results)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "**VERIFY:** Run some checks to verify that the changes you made to the code were correct. Some of the checks should fail when you initially run the cells. After completing the exercises, the checks should pass." 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "assert results == [1, 1, 2, 2, 3, 3, 4, 4, 5, 5]\n", 183 | "\n", 184 | "assert duration < 3, ('The experiments ran in {} seconds. This is too '\n", 185 | " 'slow.'.format(duration))\n", 186 | "assert duration > 2.5, ('The experiments ran in {} seconds. This is too '\n", 187 | " 'fast.'.format(duration))\n", 188 | "\n", 189 | "print('Success! The example took {} seconds.'.format(duration))" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [] 198 | } 199 | ], 200 | "metadata": { 201 | "kernelspec": { 202 | "display_name": "Python 3", 203 | "language": "python", 204 | "name": "python3" 205 | }, 206 | "language_info": { 207 | "codemirror_mode": { 208 | "name": "ipython", 209 | "version": 3 210 | }, 211 | "file_extension": ".py", 212 | "mimetype": "text/x-python", 213 | "name": "python", 214 | "nbconvert_exporter": "python", 215 | "pygments_lexer": "ipython3", 216 | "version": "3.6.1" 217 | }, 218 | "toc": { 219 | "base_numbering": 1, 220 | "nav_menu": {}, 221 | "number_sections": false, 222 | "sideBar": true, 223 | "skip_h1_title": false, 224 | "title_cell": "Table of Contents", 225 | "title_sidebar": "Contents", 226 | "toc_cell": false, 227 | "toc_position": { 228 | "height": "calc(100% - 180px)", 229 | "left": "10px", 230 | "top": "150px", 231 | "width": "382.391px" 232 | }, 233 | "toc_section_display": true, 234 | "toc_window_display": true 235 | } 236 | }, 237 | "nbformat": 4, 238 | "nbformat_minor": 2 239 | } 240 | -------------------------------------------------------------------------------- /exercises/exercise05-Actor_Handles.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise 5 - Actor Handles\n", 8 | "\n", 9 | "**GOAL:** The goal of this exercise is to show how to pass around actor handles.\n", 10 | "\n", 11 | "Suppose we wish to have multiple tasks invoke methods on the same actor. For example, we may have a single actor that records logging information from a number of tasks. We can achieve this by passing a handle to the actor as an argument into the relevant tasks.\n", 12 | "\n", 13 | "### Concepts for this Exercise - Actor Handles\n", 14 | "\n", 15 | "First of all, suppose we've created an actor as follows.\n", 16 | "\n", 17 | "```python\n", 18 | "@ray.remote\n", 19 | "class Actor(object):\n", 20 | " def method(self):\n", 21 | " pass\n", 22 | "\n", 23 | "# Create the actor\n", 24 | "actor = Actor.remote()\n", 25 | "```\n", 26 | "\n", 27 | "Then we can define a remote function (or another actor) that takes an actor handle as an argument.\n", 28 | "\n", 29 | "```python\n", 30 | "@ray.remote\n", 31 | "def f(actor):\n", 32 | " # We can invoke methods on the actor.\n", 33 | " x_id = actor.method.remote()\n", 34 | " # We can block and get the results.\n", 35 | " return ray.get(x_id)\n", 36 | "```\n", 37 | "\n", 38 | "Then we can invoke the remote function a few times and pass in the actor handle.\n", 39 | "\n", 40 | "```python\n", 41 | "# Each of the three tasks created below will invoke methods on the same actor.\n", 42 | "f.remote(actor)\n", 43 | "f.remote(actor)\n", 44 | "f.remote(actor)\n", 45 | "```" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "from __future__ import absolute_import\n", 55 | "from __future__ import division\n", 56 | "from __future__ import print_function\n", 57 | "\n", 58 | "from collections import defaultdict\n", 59 | "import ray\n", 60 | "import time" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "ray.init(num_cpus=4, include_webui=False, ignore_reinit_error=True)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "In this exercise, we're going to write some code that runs several \"experiments\" in parallel and has each experiment log its results to an actor. The driver script can then periodically pull the results from the logging actor.\n", 77 | "\n", 78 | "**EXERCISE:** Turn this `LoggingActor` class into an actor class." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "class LoggingActor(object):\n", 88 | " def __init__(self):\n", 89 | " self.logs = defaultdict(lambda: [])\n", 90 | " \n", 91 | " def log(self, index, message):\n", 92 | " self.logs[index].append(message)\n", 93 | " \n", 94 | " def get_logs(self):\n", 95 | " return dict(self.logs)\n", 96 | "\n", 97 | "\n", 98 | "assert hasattr(LoggingActor, 'remote'), ('You need to turn LoggingActor into an '\n", 99 | " 'actor (by using the ray.remote keyword).')" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "**EXERCISE:** Instantiate the actor." 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "logging_actor = LoggingActor()\n", 116 | "\n", 117 | "# Some checks to make sure this was done correctly.\n", 118 | "assert hasattr(logging_actor, 'get_logs')" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "Now we define a remote function that runs and pushes its logs to the `LoggingActor`.\n", 126 | "\n", 127 | "**EXERCISE:** Modify this function so that it invokes methods correctly on `logging_actor` (you need to change the way you call the `log` method)." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "@ray.remote\n", 137 | "def run_experiment(experiment_index, logging_actor):\n", 138 | " for i in range(60):\n", 139 | " time.sleep(1)\n", 140 | " # Push a logging message to the actor.\n", 141 | " logging_actor.log(experiment_index, 'On iteration {}'.format(i))" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "Now we create several tasks that use the logging actor." 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "experiment_ids = [run_experiment.remote(i, logging_actor) for i in range(3)]" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "While the experiments are running in the background, the driver process (that is, this Jupyter notebook) can query the actor to read the logs.\n", 165 | "\n", 166 | "**EXERCISE:** Modify the code below to dispatch methods to the `LoggingActor`." 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "logs = logging_actor.get_logs()\n", 176 | "\n", 177 | "assert isinstance(logs, dict), (\"Make sure that you dispatch tasks to the \"\n", 178 | " \"actor using the .remote keyword and get the results using ray.get.\")\n", 179 | "logs" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "**EXERCISE:** Try running the above box multiple times and see how the results change (while the experiments are still running in the background). You can also try running more of the experiment tasks and see what happens." 187 | ] 188 | } 189 | ], 190 | "metadata": { 191 | "kernelspec": { 192 | "display_name": "Python 3", 193 | "language": "python", 194 | "name": "python3" 195 | }, 196 | "language_info": { 197 | "codemirror_mode": { 198 | "name": "ipython", 199 | "version": 3 200 | }, 201 | "file_extension": ".py", 202 | "mimetype": "text/x-python", 203 | "name": "python", 204 | "nbconvert_exporter": "python", 205 | "pygments_lexer": "ipython3", 206 | "version": "3.6.1" 207 | }, 208 | "toc": { 209 | "base_numbering": 1, 210 | "nav_menu": {}, 211 | "number_sections": false, 212 | "sideBar": true, 213 | "skip_h1_title": false, 214 | "title_cell": "Table of Contents", 215 | "title_sidebar": "Contents", 216 | "toc_cell": false, 217 | "toc_position": {}, 218 | "toc_section_display": true, 219 | "toc_window_display": true 220 | } 221 | }, 222 | "nbformat": 4, 223 | "nbformat_minor": 2 224 | } 225 | -------------------------------------------------------------------------------- /exercises/exercise06-Wait.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise 6 - Handling Slow Tasks\n", 8 | "\n", 9 | "**GOAL:** The goal of this exercise is to show how to use `ray.wait` to avoid waiting for slow tasks.\n", 10 | "\n", 11 | "See the documentation for `ray.wait` at https://ray.readthedocs.io/en/latest/package-ref.html?highlight=ray.wait#ray.wait.\n", 12 | "\n", 13 | "### Concepts for this Exercise - ray.wait\n", 14 | "\n", 15 | "After launching a number of tasks, you may want to know which ones have finished executing. This can be done with `ray.wait`. The function works as follows.\n", 16 | "\n", 17 | "```python\n", 18 | "ready_ids, remaining_ids = ray.wait(object_ids, num_returns=1, timeout=None)\n", 19 | "```\n", 20 | "\n", 21 | "**Arguments:**\n", 22 | "- `object_ids`: This is a list of object IDs.\n", 23 | "- `num_returns`: This is maximum number of object IDs to wait for. The default value is `1`.\n", 24 | "- `timeout`: This is the maximum amount of time in milliseconds to wait for. So `ray.wait` will block until either `num_returns` objects are ready or until `timeout` milliseconds have passed.\n", 25 | "\n", 26 | "**Return values:**\n", 27 | "- `ready_ids`: This is a list of object IDs that are available in the object store.\n", 28 | "- `remaining_ids`: This is a list of the IDs that were in `object_ids` but are not in `ready_ids`, so the IDs in `ready_ids` and `remaining_ids` together make up all the IDs in `object_ids`." 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "from __future__ import absolute_import\n", 38 | "from __future__ import division\n", 39 | "from __future__ import print_function\n", 40 | "\n", 41 | "import numpy as np\n", 42 | "import ray\n", 43 | "import time" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "ray.init(num_cpus=6, include_webui=False, ignore_reinit_error=True)\n", 53 | "\n", 54 | "# Sleep a little to improve the accuracy of the timing measurements used below,\n", 55 | "# because some workers may still be starting up in the background.\n", 56 | "time.sleep(2.0)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "Define a remote function that takes a variable amount of time to run." 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "@ray.remote\n", 73 | "def f(i):\n", 74 | " np.random.seed(5 + i)\n", 75 | " x = np.random.uniform(0, 4)\n", 76 | " time.sleep(x)\n", 77 | " return i, time.time()" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "**EXERCISE:** Using `ray.wait`, change the code below so that `initial_results` consists of the outputs of the first three tasks to complete instead of the first three tasks that were submitted." 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "start_time = time.time()\n", 94 | "\n", 95 | "# This launches 6 tasks, each of which takes a random amount of time to\n", 96 | "# complete.\n", 97 | "result_ids = [f.remote(i) for i in range(6)]\n", 98 | "# Get one batch of tasks. Instead of waiting for a fixed subset of tasks, we\n", 99 | "# should instead use the first 3 tasks that finish.\n", 100 | "initial_results = ray.get(result_ids[:3])\n", 101 | "\n", 102 | "end_time = time.time()\n", 103 | "duration = end_time - start_time" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "**EXERCISE:** Change the code below so that `remaining_results` consists of the outputs of the last three tasks to complete." 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "# Wait for the remaining tasks to complete.\n", 120 | "remaining_results = ray.get(result_ids[3:])" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "**VERIFY:** Run some checks to verify that the changes you made to the code were correct. Some of the checks should fail when you initially run the cells. After completing the exercises, the checks should pass." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "assert len(initial_results) == 3\n", 137 | "assert len(remaining_results) == 3\n", 138 | "\n", 139 | "initial_indices = [result[0] for result in initial_results]\n", 140 | "initial_times = [result[1] for result in initial_results]\n", 141 | "remaining_indices = [result[0] for result in remaining_results]\n", 142 | "remaining_times = [result[1] for result in remaining_results]\n", 143 | "\n", 144 | "assert set(initial_indices + remaining_indices) == set(range(6))\n", 145 | "\n", 146 | "assert duration < 1.5, ('The initial batch of ten tasks was retrieved in '\n", 147 | " '{} seconds. This is too slow.'.format(duration))\n", 148 | "\n", 149 | "assert duration > 0.8, ('The initial batch of ten tasks was retrieved in '\n", 150 | " '{} seconds. This is too slow.'.format(duration))\n", 151 | "\n", 152 | "# Make sure the initial results actually completed first.\n", 153 | "assert max(initial_times) < min(remaining_times)\n", 154 | "\n", 155 | "print('Success! The example took {} seconds.'.format(duration))" 156 | ] 157 | } 158 | ], 159 | "metadata": { 160 | "kernelspec": { 161 | "display_name": "Python 3", 162 | "language": "python", 163 | "name": "python3" 164 | }, 165 | "language_info": { 166 | "codemirror_mode": { 167 | "name": "ipython", 168 | "version": 3 169 | }, 170 | "file_extension": ".py", 171 | "mimetype": "text/x-python", 172 | "name": "python", 173 | "nbconvert_exporter": "python", 174 | "pygments_lexer": "ipython3", 175 | "version": "3.6.7" 176 | }, 177 | "toc": { 178 | "base_numbering": 1, 179 | "nav_menu": {}, 180 | "number_sections": false, 181 | "sideBar": true, 182 | "skip_h1_title": false, 183 | "title_cell": "Table of Contents", 184 | "title_sidebar": "Contents", 185 | "toc_cell": false, 186 | "toc_position": {}, 187 | "toc_section_display": true, 188 | "toc_window_display": true 189 | } 190 | }, 191 | "nbformat": 4, 192 | "nbformat_minor": 2 193 | } 194 | -------------------------------------------------------------------------------- /exercises/exercise07-Ordered_Wait.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise 7 - Process Tasks in Order of Completion\n", 8 | "\n", 9 | "**GOAL:** The goal of this exercise is to show how to use `ray.wait` to process tasks in the order that they finish.\n", 10 | "\n", 11 | "See the documentation for ray.wait at https://ray.readthedocs.io/en/latest/package-ref.html?highlight=ray.wait#ray.wait.\n", 12 | "\n", 13 | "## Concepts for this exercise - `ray.wait`\n", 14 | "\n", 15 | "After launching a number of tasks, you may want to run the results sequentially. To do so, we build off of exercise 6 and use `ray.wait` to execute the results sequentially. \n", 16 | "\n", 17 | "We are able to use `ray.wait` because the two lists returned by **`ray.wait` maintains the ordering of the input list**. That is, if `f` is a remote function, the code \n", 18 | "```python\n", 19 | " results = ray.wait([f.remote(i) for i in range(100)], num_returns=10)\n", 20 | "```\n", 21 | "will return `(ready_list, remain_list)` and the `ObjectID`s of in those lists will be ordered by the argument passed to `f` above." 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from __future__ import absolute_import\n", 31 | "from __future__ import division\n", 32 | "from __future__ import print_function\n", 33 | "\n", 34 | "import numpy as np\n", 35 | "import ray\n", 36 | "import time" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "ray.init(num_cpus=5, include_webui=False, ignore_reinit_error=True)\n", 46 | "\n", 47 | "# Sleep a little to improve the accuracy of the timing measurements used below,\n", 48 | "# because some workers may still be starting up in the background.\n", 49 | "time.sleep(2.0)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "@ray.remote\n", 59 | "def f():\n", 60 | " time.sleep(np.random.uniform(0, 5))\n", 61 | " return time.time()" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "**EXERCISE:** Change the code below to use `ray.wait` to get the results of the tasks in the order that they complete.\n", 69 | "\n", 70 | "**NOTE:** It would be a simple modification to maintain a pool of 10 experiments and to start a new experiment whenever one finishes." 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "start_time = time.time()\n", 80 | "\n", 81 | "remaining_result_ids = [f.remote() for _ in range(10)]\n", 82 | "\n", 83 | "# Get the results.\n", 84 | "results = []\n", 85 | "while len(remaining_result_ids) > 0:\n", 86 | " # EXERCISE: Instead of simply waiting for the first result from\n", 87 | " # remaining_result_ids, use ray.wait to get the first one to finish.\n", 88 | " result_id = remaining_result_ids[0]\n", 89 | " remaining_result_ids = remaining_result_ids[1:]\n", 90 | " result = ray.get(result_id)\n", 91 | " results.append(result)\n", 92 | " print('Processing result which finished after {} seconds.'\n", 93 | " .format(result - start_time))\n", 94 | "\n", 95 | "end_time = time.time()\n", 96 | "duration = end_time - start_time" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "**VERIFY:** Run some checks to verify that the changes you made to the code were correct. Some of the checks should fail when you initially run the cells. After completing the exercises, the checks should pass." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "assert results == sorted(results), ('The results were not processed in the '\n", 113 | " 'order that they finished.')\n", 114 | "\n", 115 | "print('Success! The example took {} seconds.'.format(duration))" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [] 124 | } 125 | ], 126 | "metadata": { 127 | "kernelspec": { 128 | "display_name": "Python 3", 129 | "language": "python", 130 | "name": "python3" 131 | }, 132 | "language_info": { 133 | "codemirror_mode": { 134 | "name": "ipython", 135 | "version": 3 136 | }, 137 | "file_extension": ".py", 138 | "mimetype": "text/x-python", 139 | "name": "python", 140 | "nbconvert_exporter": "python", 141 | "pygments_lexer": "ipython3", 142 | "version": "3.6.7" 143 | }, 144 | "toc": { 145 | "base_numbering": 1, 146 | "nav_menu": {}, 147 | "number_sections": false, 148 | "sideBar": true, 149 | "skip_h1_title": false, 150 | "title_cell": "Table of Contents", 151 | "title_sidebar": "Contents", 152 | "toc_cell": false, 153 | "toc_position": {}, 154 | "toc_section_display": true, 155 | "toc_window_display": true 156 | } 157 | }, 158 | "nbformat": 4, 159 | "nbformat_minor": 2 160 | } 161 | -------------------------------------------------------------------------------- /exercises/exercise08-Serialization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise 8 - Speed up Serialization\n", 8 | "\n", 9 | "**GOAL:** The goal of this exercise is to illustrate how to speed up serialization by using `ray.put`.\n", 10 | "\n", 11 | "### Concepts for this Exercise - ray.put\n", 12 | "\n", 13 | "Object IDs can be created in multiple ways.\n", 14 | "- They are returned by remote function calls.\n", 15 | "- They are returned by actor method calls.\n", 16 | "- They are returned by `ray.put`.\n", 17 | "\n", 18 | "When an object is passed to `ray.put`, the object is serialized using the Apache Arrow format (see https://arrow.apache.org/ for more information about Arrow) and copied into a shared memory object store. This object will then be available to other workers on the same machine via shared memory. If it is needed by workers on another machine, it will be shipped under the hood.\n", 19 | "\n", 20 | "**When objects are passed into a remote function, Ray puts them in the object store under the hood.** That is, if `f` is a remote function, the code\n", 21 | "\n", 22 | "```python\n", 23 | "x = np.zeros(1000)\n", 24 | "f.remote(x)\n", 25 | "```\n", 26 | "\n", 27 | "is essentially transformed under the hood to\n", 28 | "\n", 29 | "```python\n", 30 | "x = np.zeros(1000)\n", 31 | "x_id = ray.put(x)\n", 32 | "f.remote(x_id)\n", 33 | "```\n", 34 | "\n", 35 | "The call to `ray.put` copies the numpy array into the shared-memory object store, from where it can be read by all of the worker processes (without additional copying). However, if you do something like\n", 36 | "\n", 37 | "```python\n", 38 | "for i in range(10):\n", 39 | " f.remote(x)\n", 40 | "```\n", 41 | "\n", 42 | "then 10 copies of the array will be placed into the object store. This takes up more memory in the object store than is necessary, and it also takes time to copy the array into the object store over and over. This can be made more efficient by placing the array in the object store only once as follows.\n", 43 | "\n", 44 | "```python\n", 45 | "x_id = ray.put(x)\n", 46 | "for i in range(10):\n", 47 | " f.remote(x_id)\n", 48 | "```\n", 49 | "\n", 50 | "In this exercise, you will speed up the code below and reduce the memory footprint by calling `ray.put` on the neural net weights before passing them into the remote functions.\n", 51 | "\n", 52 | "**WARNING:** This exercise requires a lot of memory to run. If this notebook is running within a Docker container, then the docker container must be started with a large shared-memory file system. This can be done by starting the docker container with the `--shm-size` flag." 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "from __future__ import absolute_import\n", 62 | "from __future__ import division\n", 63 | "from __future__ import print_function\n", 64 | "\n", 65 | "import pickle\n", 66 | "import numpy as np\n", 67 | "import ray\n", 68 | "import time" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "ray.init(num_cpus=4, include_webui=False, ignore_reinit_error=True)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "Define some neural net weights which will be passed into a number of tasks." 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "neural_net_weights = {'variable{}'.format(i): np.random.normal(size=2**18)\n", 94 | " for i in range(50)}" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "**EXERCISE:** Compare the time required to serialize the neural net weights and copy them into the object store using Ray versus the time required to pickle and unpickle the weights. The big win should be with the time required for *deserialization*.\n", 102 | "\n", 103 | "Note that when you call `ray.put`, in addition to serializing the object, we are copying it into shared memory where it can be efficiently accessed by other workers on the same machine.\n", 104 | "\n", 105 | "**NOTE:** You don't actually have to do anything here other than run the cell below and read the output.\n", 106 | "\n", 107 | "**NOTE:** Sometimes `ray.put` can be faster than `pickle.dumps`. This is because `ray.put` leverages multiple threads when serializing large objects. Note that this is not possible with `pickle`." 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "print('Ray - serializing')\n", 117 | "%time x_id = ray.put(neural_net_weights)\n", 118 | "print('\\nRay - deserializing')\n", 119 | "%time x_val = ray.get(x_id)\n", 120 | "\n", 121 | "print('\\npickle - serializing')\n", 122 | "%time serialized = pickle.dumps(neural_net_weights)\n", 123 | "print('\\npickle - deserializing')\n", 124 | "%time deserialized = pickle.loads(serialized)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "Define a remote function which uses the neural net weights." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "@ray.remote\n", 141 | "def use_weights(weights, i):\n", 142 | " return i" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "**EXERCISE:** In the code below, use `ray.put` to avoid copying the neural net weights to the object store multiple times." 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "# Sleep a little to improve the accuracy of the timing measurements below.\n", 159 | "time.sleep(2.0)\n", 160 | "start_time = time.time()\n", 161 | "\n", 162 | "results = ray.get([use_weights.remote(neural_net_weights, i)\n", 163 | " for i in range(20)])\n", 164 | "\n", 165 | "end_time = time.time()\n", 166 | "duration = end_time - start_time" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "**VERIFY:** Run some checks to verify that the changes you made to the code were correct. Some of the checks should fail when you initially run the cells. After completing the exercises, the checks should pass." 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "assert results == list(range(20))\n", 183 | "assert duration < 1, ('The experiments ran in {} seconds. This is too '\n", 184 | " 'slow.'.format(duration))\n", 185 | "\n", 186 | "print('Success! The example took {} seconds.'.format(duration))" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [] 195 | } 196 | ], 197 | "metadata": { 198 | "kernelspec": { 199 | "display_name": "Python 3", 200 | "language": "python", 201 | "name": "python3" 202 | }, 203 | "language_info": { 204 | "codemirror_mode": { 205 | "name": "ipython", 206 | "version": 3 207 | }, 208 | "file_extension": ".py", 209 | "mimetype": "text/x-python", 210 | "name": "python", 211 | "nbconvert_exporter": "python", 212 | "pygments_lexer": "ipython3", 213 | "version": "3.6.1" 214 | }, 215 | "toc": { 216 | "base_numbering": 1, 217 | "nav_menu": {}, 218 | "number_sections": false, 219 | "sideBar": true, 220 | "skip_h1_title": false, 221 | "title_cell": "Table of Contents", 222 | "title_sidebar": "Contents", 223 | "toc_cell": false, 224 | "toc_position": { 225 | "height": "calc(100% - 180px)", 226 | "left": "10px", 227 | "top": "150px", 228 | "width": "382.391px" 229 | }, 230 | "toc_section_display": true, 231 | "toc_window_display": true 232 | } 233 | }, 234 | "nbformat": 4, 235 | "nbformat_minor": 2 236 | } 237 | -------------------------------------------------------------------------------- /exercises/exercise09-GPUs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise 9 - Using the GPU API\n", 8 | "\n", 9 | "**GOAL:** The goal of this exercise is to show how to use GPUs with remote functions and actors.\n", 10 | "\n", 11 | "**NOTE:** These exercises are designed to run on a machine without GPUs.\n", 12 | "\n", 13 | "See the documentation on using Ray with GPUs http://ray.readthedocs.io/en/latest/using-ray-with-gpus.html.\n", 14 | "\n", 15 | "### Concepts for this Exercise - Using Ray with GPUs\n", 16 | "\n", 17 | "We can indicate that a remote function or an actor requires some GPUs using the `num_gpus` keyword.\n", 18 | "\n", 19 | "```python\n", 20 | "@ray.remote(num_gpus=1)\n", 21 | "def f():\n", 22 | " # The command ray.get_gpu_ids() returns a list of the indices\n", 23 | " # of the GPUs that this task can use (e.g., [0] or [1]).\n", 24 | " ray.get_gpu_ids()\n", 25 | "\n", 26 | "@ray.remote(num_gpus=2)\n", 27 | "class Foo(object):\n", 28 | " def __init__(self):\n", 29 | " # The command ray.get_gpu_ids() returns a list of the\n", 30 | " # indices of the GPUs that this actor can use\n", 31 | " # (e.g., [0, 1] or [3, 5]).\n", 32 | " ray.get_gpu_ids()\n", 33 | "```\n", 34 | "\n", 35 | "Then inside of the actor constructor and methods, we can get the IDs of the GPUs allocated for that actor with `ray.get_gpu_ids()`." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "from __future__ import absolute_import\n", 45 | "from __future__ import division\n", 46 | "from __future__ import print_function\n", 47 | "\n", 48 | "import numpy as np\n", 49 | "import ray\n", 50 | "import time" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "Start Ray, note that we pass in `num_gpus=4`. Ray will assume this machine has 4 GPUs (even if it does not). When a task or actor requests a GPU, it will be assigned a GPU ID from the set `[0, 1, 2, 3]`. It is then the responsibility of the task or actor to make sure that it only uses that specific GPU (e.g., by setting the `CUDA_VISIBLE_DEVICES` environment variable)." 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "ray.init(num_cpus=4, num_gpus=2, include_webui=False, ignore_reinit_error=True)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "**EXERCISE:** Change the remote function below to require one GPU.\n", 74 | "\n", 75 | "**NOTE:** This change does not make the remote function actually **use** the GPU, it simply **reserves** the GPU for use by the remote function. To actually use the GPU, the remote function would use a neural net library like TensorFlow or PyTorch after setting the `CUDA_VISIBLE_DEVICES` environment variable properly. This can be done as follows.\n", 76 | "\n", 77 | "```python\n", 78 | "import os\n", 79 | "os.environ['CUDA_VISIBLE_DEVICES'] = ','.join([str(i) for i in ray.get_gpu_ids()])\n", 80 | "```" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "@ray.remote\n", 90 | "def f():\n", 91 | " time.sleep(0.5)\n", 92 | " return ray.get_gpu_ids()" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "**VERIFY:** This code checks that each task was assigned one GPU and that not more than two tasks are run at the same time (because we told Ray there are only two GPUs)." 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "start_time = time.time()\n", 109 | "\n", 110 | "gpu_ids = ray.get([f.remote() for _ in range(3)])\n", 111 | "\n", 112 | "end_time = time.time()\n", 113 | "\n", 114 | "for i in range(len(gpu_ids)):\n", 115 | " assert len(gpu_ids[i]) == 1\n", 116 | "\n", 117 | "assert end_time - start_time > 1\n", 118 | "\n", 119 | "print('Sucess! The test passed.')" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "**EXERCISE:** The code below defines an actor. Make it require one GPU." 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "@ray.remote\n", 136 | "class Actor(object):\n", 137 | " def __init__(self):\n", 138 | " pass\n", 139 | "\n", 140 | " def get_gpu_ids(self):\n", 141 | " return ray.get_gpu_ids()" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "**VERIFY:** This code checks that the actor was assigned a GPU." 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "actor = Actor.remote()\n", 158 | "\n", 159 | "gpu_ids = ray.get(actor.get_gpu_ids.remote())\n", 160 | "\n", 161 | "assert len(gpu_ids) == 1\n", 162 | "\n", 163 | "print('Sucess! The test passed.')" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [] 172 | } 173 | ], 174 | "metadata": { 175 | "kernelspec": { 176 | "display_name": "Python 3", 177 | "language": "python", 178 | "name": "python3" 179 | }, 180 | "language_info": { 181 | "codemirror_mode": { 182 | "name": "ipython", 183 | "version": 3 184 | }, 185 | "file_extension": ".py", 186 | "mimetype": "text/x-python", 187 | "name": "python", 188 | "nbconvert_exporter": "python", 189 | "pygments_lexer": "ipython3", 190 | "version": "3.6.1" 191 | }, 192 | "toc": { 193 | "base_numbering": 1, 194 | "nav_menu": {}, 195 | "number_sections": false, 196 | "sideBar": true, 197 | "skip_h1_title": false, 198 | "title_cell": "Table of Contents", 199 | "title_sidebar": "Contents", 200 | "toc_cell": false, 201 | "toc_position": {}, 202 | "toc_section_display": true, 203 | "toc_window_display": true 204 | } 205 | }, 206 | "nbformat": 4, 207 | "nbformat_minor": 2 208 | } 209 | -------------------------------------------------------------------------------- /exercises/exercise10-Custom_Resources.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise 10 - Custom Resources\n", 8 | "\n", 9 | "**GOAL:** The goal of this exercise is to show how to use custom resources\n", 10 | "\n", 11 | "See the documentation on using Ray with custom resources http://ray.readthedocs.io/en/latest/resources.html#custom-resources.\n", 12 | "\n", 13 | "### Concepts for this Exercise - Using Custom Resources\n", 14 | "\n", 15 | "We've discussed how to specify a task's CPU and GPU requirements, but there are many other kinds of resources. For example, a task may require a dataset, which only lives on a few machines, or it may need to be scheduled on a machine with extra memory. These kinds of requirements can be expressed through the use of custom resources.\n", 16 | "\n", 17 | "Custom resources are most useful in the multi-machine setting. However, this exercise illustrates their usage in the single-machine setting.\n", 18 | "\n", 19 | "Ray can be started with a dictionary of custom resources (mapping resource name to resource quantity) as follows.\n", 20 | "\n", 21 | "```python\n", 22 | "ray.init(resources={'CustomResource1': 1, 'CustomResource2': 4})\n", 23 | "```\n", 24 | "\n", 25 | "The resource requirements of a remote function or actor can be specified in a similar way.\n", 26 | "\n", 27 | "```python\n", 28 | "@ray.remote(resources={'CustomResource2': 1})\n", 29 | "def f():\n", 30 | " return 1\n", 31 | "```\n", 32 | "\n", 33 | "Even if there are many CPUs on the machine, only 4 copies of `f` can be executed concurrently.\n", 34 | "\n", 35 | "Custom resources give applications a great deal of flexibility. For example, if you wish to control precisely which machine a task gets scheduled on, you can simply start each machine with a different custom resource (e.g., start machine `n` with resource `Custom_n` and then tasks that should be scheduled on machine `n` can require resource `Custom_n`. However, this usage has drawbacks because it makes the code less portable and less resilient to machine failures." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "from __future__ import absolute_import\n", 45 | "from __future__ import division\n", 46 | "from __future__ import print_function\n", 47 | "\n", 48 | "import ray\n", 49 | "import time" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "In this exercise, we will start Ray using custom resources." 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "ray.init(num_cpus=8, resources={'Custom1': 4}, include_webui=False, ignore_reinit_error=True)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "**EXERCISE:** Modify the resource requirements of the remote functions below so that the following hold.\n", 73 | "- The number of concurrently executing tasks is at most 8 (note that there are 8 CPUs).\n", 74 | "- No more than 4 copies of `g` can execute concurrently (note that there are only 4 \"Custom1\" resources).\n", 75 | "- If 4 `g` tasks are executing, then an additional 4 `f` tasks can execute.\n", 76 | "\n", 77 | "You should only need to use the `Custom1` resource." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "@ray.remote\n", 87 | "def f():\n", 88 | " time.sleep(0.1)\n", 89 | "\n", 90 | "@ray.remote\n", 91 | "def g():\n", 92 | " time.sleep(0.1)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "If you did the above exercise correctly, the next cell should execute without raising an exception." 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "start = time.time()\n", 109 | "ray.get([f.remote() for _ in range(8)])\n", 110 | "duration = time.time() - start \n", 111 | "assert duration >= 0.1 and duration < 0.19, '8 f tasks should be able to execute concurrently.'\n", 112 | "\n", 113 | "start = time.time()\n", 114 | "ray.get([f.remote() for _ in range(9)])\n", 115 | "duration = time.time() - start \n", 116 | "assert duration >= 0.2 and duration < 0.29, 'f tasks should not be able to execute concurrently.'\n", 117 | "\n", 118 | "start = time.time()\n", 119 | "ray.get([g.remote() for _ in range(4)])\n", 120 | "duration = time.time() - start \n", 121 | "assert duration >= 0.1 and duration < 0.19, '4 g tasks should be able to execute concurrently.'\n", 122 | "\n", 123 | "start = time.time()\n", 124 | "ray.get([g.remote() for _ in range(5)])\n", 125 | "duration = time.time() - start \n", 126 | "assert duration >= 0.2 and duration < 0.29, '5 g tasks should not be able to execute concurrently.'\n", 127 | "\n", 128 | "start = time.time()\n", 129 | "ray.get([f.remote() for _ in range(4)] + [g.remote() for _ in range(4)])\n", 130 | "duration = time.time() - start \n", 131 | "assert duration >= 0.1 and duration < 0.19, '4 f and 4 g tasks should be able to execute concurrently.'\n", 132 | "\n", 133 | "start = time.time()\n", 134 | "ray.get([f.remote() for _ in range(5)] + [g.remote() for _ in range(4)])\n", 135 | "duration = time.time() - start \n", 136 | "assert duration >= 0.2 and duration < 0.29, '5 f and 4 g tasks should not be able to execute concurrently.'\n", 137 | "\n", 138 | "print('Success!')" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [] 147 | } 148 | ], 149 | "metadata": { 150 | "kernelspec": { 151 | "display_name": "Python 3", 152 | "language": "python", 153 | "name": "python3" 154 | }, 155 | "language_info": { 156 | "codemirror_mode": { 157 | "name": "ipython", 158 | "version": 3 159 | }, 160 | "file_extension": ".py", 161 | "mimetype": "text/x-python", 162 | "name": "python", 163 | "nbconvert_exporter": "python", 164 | "pygments_lexer": "ipython3", 165 | "version": "3.6.1" 166 | }, 167 | "toc": { 168 | "base_numbering": 1, 169 | "nav_menu": {}, 170 | "number_sections": false, 171 | "sideBar": true, 172 | "skip_h1_title": false, 173 | "title_cell": "Table of Contents", 174 | "title_sidebar": "Contents", 175 | "toc_cell": false, 176 | "toc_position": {}, 177 | "toc_section_display": true, 178 | "toc_window_display": true 179 | } 180 | }, 181 | "nbformat": 4, 182 | "nbformat_minor": 2 183 | } 184 | -------------------------------------------------------------------------------- /exercises/exercise11-TensorFlow.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise 11 - Pass Neural Net Weights Between Processes\n", 8 | "\n", 9 | "**GOAL:** The goal of this exercise is to show how to send neural network weights between workers and the driver.\n", 10 | "\n", 11 | "For more details on using Ray with TensorFlow, see the documentation at http://ray.readthedocs.io/en/latest/using-ray-with-tensorflow.html.\n", 12 | "\n", 13 | "### Concepts for this Exercise - Getting and Setting Neural Net Weights\n", 14 | "\n", 15 | "Since pickling and unpickling a TensorFlow graph can be inefficient or may not work at all, it is most efficient to ship the weights between processes as a dictionary of numpy arrays (or as a flattened numpy array).\n", 16 | "\n", 17 | "We provide the helper class `ray.experimental.TensorFlowVariables` to help with getting and setting weights. Similar techniques should work other neural net libraries.\n", 18 | "\n", 19 | "Consider the following neural net definition.\n", 20 | "\n", 21 | "```python\n", 22 | "import tensorflow as tf\n", 23 | "\n", 24 | "x_data = tf.placeholder(tf.float32, shape=[100])\n", 25 | "y_data = tf.placeholder(tf.float32, shape=[100])\n", 26 | "\n", 27 | "w = tf.Variable(tf.random_uniform([1], -1.0, 1.0))\n", 28 | "b = tf.Variable(tf.zeros([1]))\n", 29 | "y = w * x_data + b\n", 30 | "\n", 31 | "loss = tf.reduce_mean(tf.square(y - y_data))\n", 32 | "optimizer = tf.train.GradientDescentOptimizer(0.5)\n", 33 | "grads = optimizer.compute_gradients(loss)\n", 34 | "train = optimizer.apply_gradients(grads)\n", 35 | "\n", 36 | "init = tf.global_variables_initializer()\n", 37 | "sess = tf.Session()\n", 38 | "sess.run(init)\n", 39 | "```\n", 40 | "\n", 41 | "Then we can use the helper class as follows.\n", 42 | "\n", 43 | "```python\n", 44 | "variables = ray.experimental.TensorFlowVariables(loss, sess)\n", 45 | "# Here 'weights' is a dictionary mapping variable names to the associated\n", 46 | "# weights as a numpy array.\n", 47 | "weights = variables.get_weights()\n", 48 | "variables.set_weights(weights)\n", 49 | "```\n", 50 | "\n", 51 | "Note that there are analogous methods `variables.get_flat` and `variables.set_flat`, which concatenate the weights as a single array instead of a dictionary.\n", 52 | "\n", 53 | "```python\n", 54 | "# Here 'weights' is a numpy array of all of the neural net weights\n", 55 | "# concatenated together.\n", 56 | "weights = variables.get_flat()\n", 57 | "variables.set_flat(weights)\n", 58 | "```\n", 59 | "\n", 60 | "In this exercise, we will use an actor containing a neural network and implement methods to extract and set the neural net weights.\n", 61 | "\n", 62 | "**WARNING:** This exercise is more complex than previous exercises." 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "from __future__ import absolute_import\n", 72 | "from __future__ import division\n", 73 | "from __future__ import print_function\n", 74 | "\n", 75 | "import numpy as np\n", 76 | "import ray\n", 77 | "import tensorflow as tf\n", 78 | "import time" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "ray.init(num_cpus=4, include_webui=False, ignore_reinit_error=True)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "The code below defines a class containing a simple neural network.\n", 95 | "\n", 96 | "**EXERCISE:** Implement the `set_weights` and `get_weights` methods. This should be done using the `ray.experimental.TensorFlowVariables` helper class." 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "@ray.remote\n", 106 | "class SimpleModel(object):\n", 107 | " def __init__(self):\n", 108 | " x_data = tf.placeholder(tf.float32, shape=[100])\n", 109 | " y_data = tf.placeholder(tf.float32, shape=[100])\n", 110 | "\n", 111 | " w = tf.Variable(tf.random_uniform([1], -1.0, 1.0))\n", 112 | " b = tf.Variable(tf.zeros([1]))\n", 113 | " y = w * x_data + b\n", 114 | "\n", 115 | " self.loss = tf.reduce_mean(tf.square(y - y_data))\n", 116 | " optimizer = tf.train.GradientDescentOptimizer(0.5)\n", 117 | " grads = optimizer.compute_gradients(self.loss)\n", 118 | " self.train = optimizer.apply_gradients(grads)\n", 119 | "\n", 120 | " init = tf.global_variables_initializer()\n", 121 | " self.sess = tf.Session()\n", 122 | "\n", 123 | " # Here we create the TensorFlowVariables object to assist with getting\n", 124 | " # and setting weights.\n", 125 | " self.variables = ray.experimental.TensorFlowVariables(self.loss, self.sess)\n", 126 | "\n", 127 | " self.sess.run(init)\n", 128 | "\n", 129 | " def set_weights(self, weights):\n", 130 | " \"\"\"Set the neural net weights.\n", 131 | " \n", 132 | " This method should assign the given weights to the neural net.\n", 133 | " \n", 134 | " Args:\n", 135 | " weights: Either a dict mapping strings (the variable names) to numpy\n", 136 | " arrays or a single flattened numpy array containing all of the\n", 137 | " concatenated weights.\n", 138 | " \"\"\"\n", 139 | " # EXERCISE: You will want to use self.variables here.\n", 140 | " raise NotImplementedError\n", 141 | "\n", 142 | " def get_weights(self):\n", 143 | " \"\"\"Get the neural net weights.\n", 144 | " \n", 145 | " This method should return the current neural net weights.\n", 146 | " \n", 147 | " Returns:\n", 148 | " Either a dict mapping strings (the variable names) to numpy arrays or\n", 149 | " a single flattened numpy array containing all of the concatenated\n", 150 | " weights.\n", 151 | " \"\"\"\n", 152 | " # EXERCISE: You will want to use self.variables here.\n", 153 | " raise NotImplementedError" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "Create a few actors." 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "actors = [SimpleModel.remote() for _ in range(4)]" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "**EXERCISE:** Get the neural net weights from all of the actors." 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "raise Exception('Implement this.')" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "**EXERCISE:** Average all of the neural net weights.\n", 193 | "\n", 194 | "**NOTE:** This will be easier to do if you chose to use `get_flat`/`set_flat` instead of `get_weights`/`set_weights` in the implementation of `SimpleModel.set_weights` and `SimpleModel.get_weights` above.." 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "raise Exception('Implement this.')" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "**EXERCISE:** Set the average weights on the actors." 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "raise Exception('Implement this.')" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "**VERIFY:** Check that all of the actors have the same weights." 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "weights = ray.get([actor.get_weights.remote() for actor in actors])\n", 236 | "\n", 237 | "for i in range(len(weights)):\n", 238 | " np.testing.assert_equal(weights[i], weights[0])\n", 239 | "\n", 240 | "print('Success! The test passed.')" 241 | ] 242 | } 243 | ], 244 | "metadata": { 245 | "kernelspec": { 246 | "display_name": "Python 3", 247 | "language": "python", 248 | "name": "python3" 249 | }, 250 | "language_info": { 251 | "codemirror_mode": { 252 | "name": "ipython", 253 | "version": 3 254 | }, 255 | "file_extension": ".py", 256 | "mimetype": "text/x-python", 257 | "name": "python", 258 | "nbconvert_exporter": "python", 259 | "pygments_lexer": "ipython3", 260 | "version": "3.6.1" 261 | }, 262 | "toc": { 263 | "base_numbering": 1, 264 | "nav_menu": {}, 265 | "number_sections": false, 266 | "sideBar": true, 267 | "skip_h1_title": false, 268 | "title_cell": "Table of Contents", 269 | "title_sidebar": "Contents", 270 | "toc_cell": false, 271 | "toc_position": {}, 272 | "toc_section_display": true, 273 | "toc_window_display": true 274 | } 275 | }, 276 | "nbformat": 4, 277 | "nbformat_minor": 2 278 | } 279 | -------------------------------------------------------------------------------- /exercises/exercise12-TreeReduce.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise 12 - Tree Reduce\n", 8 | "\n", 9 | "**GOAL:** The goal of this exercise is to show how to implement a tree reduce in Ray by passing object IDs into remote functions to encode dependencies between tasks.\n", 10 | "\n", 11 | "In this exercise, you will use Ray to implement parallel data generation and a parallel tree reduction." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "from __future__ import absolute_import\n", 21 | "from __future__ import division\n", 22 | "from __future__ import print_function\n", 23 | "\n", 24 | "import numpy as np\n", 25 | "import ray\n", 26 | "import time" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "ray.init(num_cpus=8, include_webui=False, ignore_reinit_error=True)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "**EXERCISE:** These functions will need to be turned into remote functions so that the tree of tasks can be executed in parallel." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "# This is a proxy for a function which generates some data.\n", 52 | "def create_data(i):\n", 53 | " time.sleep(0.3)\n", 54 | " return i * np.ones(10000)\n", 55 | "\n", 56 | "# This is a proxy for an expensive aggregation step (which is also\n", 57 | "# commutative and associative so it can be used in a tree-reduce).\n", 58 | "def aggregate_data(x, y):\n", 59 | " time.sleep(0.3)\n", 60 | " return x * y" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "**EXERCISE:** Make the data creation tasks run in parallel. Also aggregate the vectors in parallel. Note that the `aggregate_data` function must be called 7 times. They cannot all run in parallel because some depend on the outputs of others. However, it is possible to first run 4 in parallel, then 2 in parallel, and then 1." 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "# Sleep a little to improve the accuracy of the timing measurements below.\n", 77 | "time.sleep(1.0)\n", 78 | "start_time = time.time()\n", 79 | "\n", 80 | "# EXERCISE: Here we generate some data. Do this part in parallel.\n", 81 | "vectors = [create_data(i + 1) for i in range(8)]\n", 82 | "\n", 83 | "# Here we aggregate all of the data repeatedly calling aggregate_data. This\n", 84 | "# can be sped up using Ray.\n", 85 | "#\n", 86 | "# NOTE: A direct translation of the code below to use Ray will not result in\n", 87 | "# a speedup because each function call uses the output of the previous function\n", 88 | "# call so the function calls must be executed serially.\n", 89 | "#\n", 90 | "# EXERCISE: Speed up the aggregation below by using Ray. Note that this will\n", 91 | "# require restructuring the code to expose more parallelism. First run 4 tasks\n", 92 | "# aggregating the 8 values in pairs. Then run 2 tasks aggregating the resulting\n", 93 | "# 4 intermediate values in pairs. then run 1 task aggregating the two resulting\n", 94 | "# values. Lastly, you will need to call ray.get to retrieve the final result.\n", 95 | "#\n", 96 | "# Exposing more parallelism means aggregating the vectors in a DIFFERENT ORDER.\n", 97 | "# This can be done because we are simply summing the data and the order in\n", 98 | "# which the values are summed doesn't matter (it's commutative and associative).\n", 99 | "result = aggregate_data(vectors[0], vectors[1])\n", 100 | "result = aggregate_data(result, vectors[2])\n", 101 | "result = aggregate_data(result, vectors[3])\n", 102 | "result = aggregate_data(result, vectors[4])\n", 103 | "result = aggregate_data(result, vectors[5])\n", 104 | "result = aggregate_data(result, vectors[6])\n", 105 | "result = aggregate_data(result, vectors[7])\n", 106 | "\n", 107 | "# NOTE: For clarity, the aggregation above is written out as 7 separate function\n", 108 | "# calls, but this can be done more easily in a while loop via\n", 109 | "#\n", 110 | "# while len(vectors) > 1:\n", 111 | "# vectors = aggregate_data(vectors[0], vectors[1]) + vectors[2:]\n", 112 | "# result = vectors[0]\n", 113 | "#\n", 114 | "# When expressed this way, the change from serial aggregation to tree-structured\n", 115 | "# aggregation can be made simply by appending the result of aggregate_data to the\n", 116 | "# end of the vectors list as opposed to the beginning.\n", 117 | "#\n", 118 | "# EXERCISE: Think about why this is true.\n", 119 | "\n", 120 | "end_time = time.time()\n", 121 | "duration = end_time - start_time" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "**VERIFY:** Run some checks to verify that the changes you made to the code were correct. Some of the checks should fail when you initially run the cells. After completing the exercises, the checks should pass." 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "assert np.all(result == 40320 * np.ones(10000)), ('Did you remember to '\n", 138 | " 'call ray.get?')\n", 139 | "assert duration < 0.3 + 0.9 + 0.3, ('FAILURE: The data generation and '\n", 140 | " 'aggregation took {} seconds. This is '\n", 141 | " 'too slow'.format(duration))\n", 142 | "assert duration > 0.3 + 0.9, ('FAILURE: The data generation and '\n", 143 | " 'aggregation took {} seconds. This is '\n", 144 | " 'too fast'.format(duration))\n", 145 | "\n", 146 | "print('Success! The example took {} seconds.'.format(duration))" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "**EXERCISE:** Use the UI to view the task timeline and to verify that the vectors were aggregated with a tree of tasks.\n", 154 | "\n", 155 | "You should be able to see the 8 `create_data` tasks running in parallel followed by 4 `aggregate_data` tasks running in parallel followed by 2 more `aggregate_data` tasks followed by 1 more `aggregate_data` task.\n", 156 | "\n", 157 | "In the timeline, click on **View Options** and select **Flow Events** to visualize tasks dependencies." 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "import ray.experimental.ui as ui\n", 167 | "ui.task_timeline()" 168 | ] 169 | } 170 | ], 171 | "metadata": { 172 | "kernelspec": { 173 | "display_name": "Python 3", 174 | "language": "python", 175 | "name": "python3" 176 | }, 177 | "language_info": { 178 | "codemirror_mode": { 179 | "name": "ipython", 180 | "version": 3 181 | }, 182 | "file_extension": ".py", 183 | "mimetype": "text/x-python", 184 | "name": "python", 185 | "nbconvert_exporter": "python", 186 | "pygments_lexer": "ipython3", 187 | "version": "3.6.1" 188 | }, 189 | "toc": { 190 | "base_numbering": 1, 191 | "nav_menu": {}, 192 | "number_sections": false, 193 | "sideBar": true, 194 | "skip_h1_title": false, 195 | "title_cell": "Table of Contents", 196 | "title_sidebar": "Contents", 197 | "toc_cell": false, 198 | "toc_position": {}, 199 | "toc_section_display": true, 200 | "toc_window_display": true 201 | } 202 | }, 203 | "nbformat": 4, 204 | "nbformat_minor": 2 205 | } 206 | -------------------------------------------------------------------------------- /postBuild: -------------------------------------------------------------------------------- 1 | git clone -b learning https://github.com/ray-project/qreader 2 | cd qreader 3 | npm install; npm run build 4 | cd dist 5 | sed -i 's-/static/-static/-g' index.html 6 | cd .. 7 | cd .. 8 | # enable nbserverproxy 9 | jupyter serverextension enable --sys-prefix nbserverproxy 10 | # install the tutorial extension to start processes 11 | mv tutorialextension.py ${NB_PYTHON_PREFIX}/lib/python*/site-packages/ 12 | # enable tutorial extension 13 | jupyter serverextension enable --sys-prefix tutorialextension 14 | cd ~/rllib_exercises/serving/pong_py 15 | pip install -e . 16 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | modin 2 | tensorflow 3 | gym 4 | scipy 5 | opencv-python 6 | bokeh 7 | ipywidgets==6.0.0 8 | keras 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile 3 | # To update, run: 4 | # 5 | # pip-compile 6 | # 7 | absl-py==0.9.0 # via tensorboard, tensorflow 8 | astunparse==1.6.3 # via tensorflow 9 | attrs==19.3.0 # via jsonschema 10 | backcall==0.2.0 # via ipython 11 | bleach==3.1.5 # via nbconvert 12 | bokeh==2.1.1 # via -r requirements.in 13 | cachetools==4.1.1 # via google-auth 14 | certifi==2020.6.20 # via requests 15 | chardet==3.0.4 # via requests 16 | cloudpickle==1.3.0 # via gym 17 | decorator==4.4.2 # via ipython, traitlets 18 | defusedxml==0.6.0 # via nbconvert 19 | entrypoints==0.3 # via nbconvert 20 | future==0.18.2 # via pyglet 21 | gast==0.3.3 # via tensorflow 22 | google-auth-oauthlib==0.4.1 # via tensorboard 23 | google-auth==1.19.2 # via google-auth-oauthlib, tensorboard 24 | google-pasta==0.2.0 # via tensorflow 25 | grpcio==1.30.0 # via tensorboard, tensorflow 26 | gym==0.17.2 # via -r requirements.in 27 | h5py==2.10.0 # via keras, tensorflow 28 | idna==2.10 # via requests 29 | ipykernel==5.3.3 # via ipywidgets, notebook 30 | ipython-genutils==0.2.0 # via nbformat, notebook, traitlets 31 | ipython==7.16.1 # via ipykernel, ipywidgets 32 | ipywidgets==6.0.0 # via -r requirements.in 33 | jedi==0.17.2 # via ipython 34 | jinja2==2.11.2 # via bokeh, nbconvert, notebook 35 | jsonschema==3.2.0 # via nbformat 36 | jupyter-client==6.1.6 # via ipykernel, notebook 37 | jupyter-core==4.6.3 # via jupyter-client, nbconvert, nbformat, notebook 38 | keras-preprocessing==1.1.2 # via tensorflow 39 | keras==2.4.3 # via -r requirements.in 40 | markdown==3.2.2 # via tensorboard 41 | markupsafe==1.1.1 # via jinja2 42 | mistune==0.8.4 # via nbconvert 43 | modin==0.7.4 # via -r requirements.in 44 | nbconvert==5.6.1 # via notebook 45 | nbformat==5.0.7 # via ipywidgets, nbconvert, notebook 46 | notebook==6.0.3 # via widgetsnbextension 47 | numpy==1.18.5 # via bokeh, gym, h5py, keras, keras-preprocessing, opencv-python, opt-einsum, pandas, scipy, tensorboard, tensorflow 48 | oauthlib==3.1.0 # via requests-oauthlib 49 | opencv-python==4.3.0.36 # via -r requirements.in 50 | opt-einsum==3.3.0 # via tensorflow 51 | packaging==20.4 # via bleach, bokeh, modin 52 | pandas==1.0.5 # via modin 53 | pandocfilters==1.4.2 # via nbconvert 54 | parso==0.7.0 # via jedi 55 | pexpect==4.8.0 # via ipython 56 | pickleshare==0.7.5 # via ipython 57 | pillow==7.2.0 # via bokeh 58 | prometheus-client==0.8.0 # via notebook 59 | prompt-toolkit==3.0.5 # via ipython 60 | protobuf==3.12.2 # via tensorboard, tensorflow 61 | ptyprocess==0.6.0 # via pexpect, terminado 62 | pyasn1-modules==0.2.8 # via google-auth 63 | pyasn1==0.4.8 # via pyasn1-modules, rsa 64 | pyglet==1.5.0 # via gym 65 | pygments==2.6.1 # via ipython, nbconvert 66 | pyparsing==2.4.7 # via packaging 67 | pyrsistent==0.16.0 # via jsonschema 68 | python-dateutil==2.8.1 # via bokeh, jupyter-client, pandas 69 | pytz==2020.1 # via pandas 70 | pyyaml==5.3.1 # via bokeh, keras 71 | pyzmq==19.0.1 # via jupyter-client, notebook 72 | requests-oauthlib==1.3.0 # via google-auth-oauthlib 73 | requests==2.24.0 # via requests-oauthlib, tensorboard 74 | rsa==4.6 # via google-auth 75 | scipy==1.4.1 # via -r requirements.in, gym, keras 76 | send2trash==1.5.0 # via notebook 77 | six==1.15.0 # via absl-py, astunparse, bleach, google-auth, google-pasta, grpcio, h5py, jsonschema, keras-preprocessing, packaging, protobuf, pyrsistent, python-dateutil, tensorboard, tensorflow, traitlets 78 | tensorboard-plugin-wit==1.7.0 # via tensorboard 79 | tensorboard==2.2.2 # via tensorflow 80 | tensorflow-estimator==2.2.0 # via tensorflow 81 | tensorflow==2.2.1 # via -r requirements.in 82 | termcolor==1.1.0 # via tensorflow 83 | terminado==0.8.3 # via notebook 84 | testpath==0.4.4 # via nbconvert 85 | tornado==6.0.4 # via bokeh, ipykernel, jupyter-client, notebook, terminado 86 | traitlets==4.3.3 # via ipykernel, ipython, ipywidgets, jupyter-client, jupyter-core, nbconvert, nbformat, notebook 87 | typing-extensions==3.7.4.2 # via bokeh 88 | urllib3==1.25.9 # via requests 89 | wcwidth==0.2.5 # via prompt-toolkit 90 | webencodings==0.5.1 # via bleach 91 | werkzeug==1.0.1 # via tensorboard 92 | wheel==0.34.2 # via astunparse, tensorboard, tensorflow 93 | widgetsnbextension==2.0.1 # via ipywidgets 94 | wrapt==1.12.1 # via tensorflow 95 | 96 | # The following packages are considered to be unsafe in a requirements file: 97 | # setuptools 98 | -------------------------------------------------------------------------------- /rllib_exercises/client.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/client.png -------------------------------------------------------------------------------- /rllib_exercises/dqn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/dqn.png -------------------------------------------------------------------------------- /rllib_exercises/learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/learning.png -------------------------------------------------------------------------------- /rllib_exercises/log.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/log.png -------------------------------------------------------------------------------- /rllib_exercises/ppo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/ppo.png -------------------------------------------------------------------------------- /rllib_exercises/rllib_exercise01_mdp.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "hideCode": false, 7 | "hidePrompt": false 8 | }, 9 | "source": [ 10 | "# RL Exercise 1 - Markov Decision Processes\n", 11 | "\n", 12 | "**GOAL:** The goal of the exercise is to introduce the Markov Decision Process abstraction and to show how to use Markov Decision Processes in Python.\n", 13 | "\n", 14 | "**The key abstraction in reinforcement learning is the Markov decision process (MDP).** An MDP models sequential interactions with an external environment. It consists of the following:\n", 15 | "- a **state space**\n", 16 | "- a set of **actions**\n", 17 | "- a **transition function** which describes the probability of being in a state $s'$ at time $t+1$ given that the MDP was in state $s$ at time $t$ and action $a$ was taken\n", 18 | "- a **reward function**, which determines the reward received at time $t$\n", 19 | "- a **discount factor** $\\gamma$\n", 20 | "\n", 21 | "More details are available [here](https://en.wikipedia.org/wiki/Markov_decision_process).\n", 22 | "\n", 23 | "**NOTE:** Reinforcement learning algorithms are often applied to problems that don't strictly fit into the MDP framework. In particular, situations in which the state of the environment is not fully observed lead to violations of the MDP assumption. Nevertheless, RL algorithms can be applied anyway.\n", 24 | "\n", 25 | "## Policies\n", 26 | "\n", 27 | "A **policy** is a function that takes in a **state** and returns an **action**. A policy may be stochastic (i.e., it may sample from a probability distribution) or it can be deterministic.\n", 28 | "\n", 29 | "The **goal of reinforcement learning** is to learn a **policy** for maximizing the cumulative reward in an MDP. That is, we wish to find a policy $\\pi$ which solves the following optimization problem\n", 30 | "\n", 31 | "\\begin{equation}\n", 32 | "\\arg\\max_{\\pi} \\sum_{t=1}^T \\gamma^t R_t(\\pi),\n", 33 | "\\end{equation}\n", 34 | "\n", 35 | "where $T$ is the number of steps taken in the MDP (this is a random variable and may depend on $\\pi$) and $R_t$ is the reward received at time $t$ (also a random variable which depends on $\\pi$).\n", 36 | "\n", 37 | "A number of algorithms are available for solving reinforcement learning problems. Several of the most widely known are [value iteration](https://en.wikipedia.org/wiki/Markov_decision_process#Value_iteration), [policy iteration](https://en.wikipedia.org/wiki/Markov_decision_process#Policy_iteration), and [Q learning](https://en.wikipedia.org/wiki/Q-learning).\n", 38 | "\n", 39 | "## RL in Python\n", 40 | "\n", 41 | "The `gym` Python module provides MDP interfaces to a variety of simulators. For example, the CartPole environment interfaces with a simple simulator which simulates the physics of balancing a pole on a cart. The CartPole problem is described at https://gym.openai.com/envs/CartPole-v0. This example fits into the MDP framework as follows.\n", 42 | "- The **state** consists of the position and velocity of the cart as well as the angle and angular velocity of the pole that is balancing on the cart.\n", 43 | "- The **actions** are to decrease or increase the cart's velocity by one unit.\n", 44 | "- The **transition function** is deterministic and is determined by simulating physical laws.\n", 45 | "- The **reward function** is a constant 1 as long as the pole is upright, and 0 once the pole has fallen over. Therefore, maximizing the reward means balancing the pole for as long as possible.\n", 46 | "- The **discount factor** in this case can be taken to be 1.\n", 47 | "\n", 48 | "More information about the `gym` Python module is available at https://gym.openai.com/." 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "from __future__ import absolute_import\n", 58 | "from __future__ import division\n", 59 | "from __future__ import print_function\n", 60 | "\n", 61 | "import gym\n", 62 | "import numpy as np" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "The code below illustrates how to create and manipulate MDPs in Python. An MDP can be created by calling `gym.make`. Gym environments are identified by names like `CartPole-v0`. A **catalog of built-in environments** can be found at https://gym.openai.com/envs." 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": { 76 | "hideCode": false, 77 | "hidePrompt": false 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "env = gym.make('CartPole-v0')\n", 82 | "print('Created env:', env)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "Reset the state of the MDP by calling `env.reset()`. This call returns the initial state of the MDP." 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "hideCode": false, 97 | "hidePrompt": false 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "state = env.reset()\n", 102 | "print('The starting state is:', state)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "The `env.step` method takes an action (in the case of the CartPole environment, the appropriate actions are 0 or 1, for moving left or right). It returns a tuple of four things:\n", 110 | "1. the new state of the environment\n", 111 | "2. a reward\n", 112 | "3. a boolean indicating whether the simulation has finished\n", 113 | "4. a dictionary of miscellaneous extra information" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": { 120 | "hideCode": false, 121 | "hidePrompt": false 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "# Simulate taking an action in the environment. Appropriate actions for\n", 126 | "# the CartPole environment are 0 and 1 (for moving left and right).\n", 127 | "action = 0\n", 128 | "state, reward, done, info = env.step(action)\n", 129 | "print(state, reward, done, info)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "A **rollout** is a simulation of a policy in an environment. It alternates between choosing actions based (using some policy) and taking those actions in the environment.\n", 137 | "\n", 138 | "The code below performs a rollout in a given environment. It takes **random actions** until the simulation has finished and returns the cumulative reward." 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "def random_rollout(env):\n", 148 | " state = env.reset()\n", 149 | " \n", 150 | " done = False\n", 151 | " cumulative_reward = 0\n", 152 | "\n", 153 | " # Keep looping as long as the simulation has not finished.\n", 154 | " while not done:\n", 155 | " # Choose a random action (either 0 or 1).\n", 156 | " action = np.random.choice([0, 1])\n", 157 | " \n", 158 | " # Take the action in the environment.\n", 159 | " state, reward, done, _ = env.step(action)\n", 160 | " \n", 161 | " # Update the cumulative reward.\n", 162 | " cumulative_reward += reward\n", 163 | " \n", 164 | " # Return the cumulative reward.\n", 165 | " return cumulative_reward\n", 166 | " \n", 167 | "reward = random_rollout(env)\n", 168 | "print(reward)\n", 169 | "reward = random_rollout(env)\n", 170 | "print(reward)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "**EXERCISE:** Finish implementing the `rollout_policy` function below, which should take an environment *and* a policy. The *policy* is a function that takes in a *state* and returns an *action*. The main difference is that instead of choosing a **random action**, the action should be chosen **with the policy** (as a function of the state)." 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "def rollout_policy(env, policy):\n", 187 | " state = env.reset()\n", 188 | " \n", 189 | " done = False\n", 190 | " cumulative_reward = 0\n", 191 | "\n", 192 | " # EXERCISE: Fill out this function by copying the 'random_rollout' function\n", 193 | " # and then modifying it to choose the action using the policy.\n", 194 | " raise NotImplementedError\n", 195 | "\n", 196 | " # Return the cumulative reward.\n", 197 | " return cumulative_reward\n", 198 | "\n", 199 | "def sample_policy1(state):\n", 200 | " return 0 if state[0] < 0 else 1\n", 201 | "\n", 202 | "def sample_policy2(state):\n", 203 | " return 1 if state[0] < 0 else 0\n", 204 | "\n", 205 | "reward1 = np.mean([rollout_policy(env, sample_policy1) for _ in range(100)])\n", 206 | "reward2 = np.mean([rollout_policy(env, sample_policy2) for _ in range(100)])\n", 207 | "\n", 208 | "print('The first sample policy got an average reward of {}.'.format(reward1))\n", 209 | "print('The second sample policy got an average reward of {}.'.format(reward2))\n", 210 | "\n", 211 | "assert 5 < reward1 < 15, ('Make sure that rollout_policy computes the action '\n", 212 | " 'by applying the policy to the state.')\n", 213 | "assert 25 < reward2 < 35, ('Make sure that rollout_policy computes the action '\n", 214 | " 'by applying the policy to the state.')" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [] 223 | } 224 | ], 225 | "metadata": { 226 | "hide_code_all_hidden": false, 227 | "kernelspec": { 228 | "display_name": "Python 3", 229 | "language": "python", 230 | "name": "python3" 231 | }, 232 | "language_info": { 233 | "codemirror_mode": { 234 | "name": "ipython", 235 | "version": 3 236 | }, 237 | "file_extension": ".py", 238 | "mimetype": "text/x-python", 239 | "name": "python", 240 | "nbconvert_exporter": "python", 241 | "pygments_lexer": "ipython3", 242 | "version": "3.7.3" 243 | } 244 | }, 245 | "nbformat": 4, 246 | "nbformat_minor": 2 247 | } 248 | -------------------------------------------------------------------------------- /rllib_exercises/rllib_exercise02_ppo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# RL Exercise 2 - Proximal Policy Optimization\n", 8 | "\n", 9 | "**GOAL:** The goal of this exercise is to demonstrate how to use the proximal policy optimization (PPO) algorithm.\n", 10 | "\n", 11 | "To understand how to use **RLlib**, see the documentation at http://rllib.io.\n", 12 | "\n", 13 | "PPO is described in detail in https://arxiv.org/abs/1707.06347. It is a variant of Trust Region Policy Optimization (TRPO) described in https://arxiv.org/abs/1502.05477\n", 14 | "\n", 15 | "PPO works in two phases. In one phase, a large number of rollouts are performed (in parallel). The rollouts are then aggregated on the driver and a surrogate optimization objective is defined based on those rollouts. We then use SGD to find the policy that maximizes that objective with a penalty term for diverging too much from the current policy.\n", 16 | "\n", 17 | "![ppo](https://raw.githubusercontent.com/ucbrise/risecamp/risecamp2018/ray/tutorial/rllib_exercises/ppo.png)\n", 18 | "\n", 19 | "**NOTE:** The SGD optimization step is best performed in a data-parallel manner over multiple GPUs. This is exposed through the `num_gpus` field of the `config` dictionary (for this to work, you must be using a machine that has GPUs)." 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "# Be sure to install the latest version of RLlib.\n", 29 | "! pip install -U ray[rllib]" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "from __future__ import absolute_import\n", 39 | "from __future__ import division\n", 40 | "from __future__ import print_function\n", 41 | "\n", 42 | "import gym\n", 43 | "import ray\n", 44 | "from ray.rllib.agents.ppo import PPOTrainer, DEFAULT_CONFIG\n", 45 | "from ray.tune.logger import pretty_print" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "Start up Ray. This must be done before we instantiate any RL agents." 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "ray.init(num_cpus=3, ignore_reinit_error=True, log_to_driver=False)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "Instantiate a PPOTrainer object. We pass in a config object that specifies how the network and training procedure should be configured. Some of the parameters are the following.\n", 69 | "\n", 70 | "- `num_workers` is the number of actors that the agent will create. This determines the degree of parallelism that will be used.\n", 71 | "- `num_sgd_iter` is the number of epochs of SGD (passes through the data) that will be used to optimize the PPO surrogate objective at each iteration of PPO.\n", 72 | "- `sgd_minibatch_size` is the SGD batch size that will be used to optimize the PPO surrogate objective.\n", 73 | "- `model` contains a dictionary of parameters describing the neural net used to parameterize the policy. The `fcnet_hiddens` parameter is a list of the sizes of the hidden layers." 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "config = DEFAULT_CONFIG.copy()\n", 83 | "config['num_workers'] = 1\n", 84 | "config['num_sgd_iter'] = 30\n", 85 | "config['sgd_minibatch_size'] = 128\n", 86 | "config['model']['fcnet_hiddens'] = [100, 100]\n", 87 | "config['num_cpus_per_worker'] = 0 # This avoids running out of resources in the notebook environment when this cell is re-executed\n", 88 | "\n", 89 | "agent = PPOTrainer(config, 'CartPole-v0')" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "Train the policy on the `CartPole-v0` environment for 2 steps. The CartPole problem is described at https://gym.openai.com/envs/CartPole-v0.\n", 97 | "\n", 98 | "**EXERCISE:** Inspect how well the policy is doing by looking for the lines that say something like\n", 99 | "\n", 100 | "```\n", 101 | "episode_len_mean: 22.262569832402235\n", 102 | "episode_reward_mean: 22.262569832402235\n", 103 | "```\n", 104 | "\n", 105 | "This indicates how much reward the policy is receiving and how many time steps of the environment the policy ran. The maximum possible reward for this problem is 200. The reward and trajectory length are very close because the agent receives a reward of one for every time step that it survives (however, that is specific to this environment)." 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "for i in range(2):\n", 115 | " result = agent.train()\n", 116 | " print(pretty_print(result))" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "**EXERCISE:** The current network and training configuration are too large and heavy-duty for a simple problem like CartPole. Modify the configuration to use a smaller network and to speed up the optimization of the surrogate objective (fewer SGD iterations and a larger batch size should help)." 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "config = DEFAULT_CONFIG.copy()\n", 133 | "config['num_workers'] = 3\n", 134 | "config['num_sgd_iter'] = 30\n", 135 | "config['sgd_minibatch_size'] = 128\n", 136 | "config['model']['fcnet_hiddens'] = [100, 100]\n", 137 | "config['num_cpus_per_worker'] = 0\n", 138 | "\n", 139 | "agent = PPOTrainer(config, 'CartPole-v0')" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "**EXERCISE:** Train the agent and try to get a reward of 200. If it's training too slowly you may need to modify the config above to use fewer hidden units, a larger `sgd_minibatch_size`, a smaller `num_sgd_iter`, or a larger `num_workers`.\n", 147 | "\n", 148 | "This should take around 20 or 30 training iterations." 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "for i in range(2):\n", 158 | " result = agent.train()\n", 159 | " print(pretty_print(result))" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "Checkpoint the current model. The call to `agent.save()` returns the path to the checkpointed model and can be used later to restore the model." 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "checkpoint_path = agent.save()\n", 176 | "print(checkpoint_path)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "Now let's use the trained policy to make predictions.\n", 184 | "\n", 185 | "**NOTE:** Here we are loading the trained policy in the same process, but in practice, this would often be done in a different process (probably on a different machine)." 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "trained_config = config.copy()\n", 195 | "\n", 196 | "test_agent = PPOTrainer(trained_config, 'CartPole-v0')\n", 197 | "test_agent.restore(checkpoint_path)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "Now use the trained policy to act in an environment. The key line is the call to `test_agent.compute_action(state)` which uses the trained policy to choose an action.\n", 205 | "\n", 206 | "**EXERCISE:** Verify that the reward received roughly matches up with the reward printed in the training logs." 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "env = gym.make('CartPole-v0')\n", 216 | "state = env.reset()\n", 217 | "done = False\n", 218 | "cumulative_reward = 0\n", 219 | "\n", 220 | "while not done:\n", 221 | " action = test_agent.compute_action(state)\n", 222 | " state, reward, done, _ = env.step(action)\n", 223 | " cumulative_reward += reward\n", 224 | "\n", 225 | "print(cumulative_reward)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "## Visualize results with TensorBoard\n", 233 | "\n", 234 | "**EXERCISE**: Finally, you can visualize your training results using TensorBoard. To do this, open a new terminal in Jupyter lab using the \"+\" button, and run:\n", 235 | " \n", 236 | "`$ tensorboard --logdir=~/ray_results --host=0.0.0.0`\n", 237 | "\n", 238 | "And open your browser to the address printed (or change the current URL to go to port 6006). Check the \"episode_reward_mean\" learning curve of the PPO agent. Toggle the horizontal axis between both the \"STEPS\" and \"RELATIVE\" view to compare efficiency in number of timesteps vs real time time.\n", 239 | "\n", 240 | "Note that TensorBoard will not work in Binder." 241 | ] 242 | } 243 | ], 244 | "metadata": { 245 | "kernelspec": { 246 | "display_name": "Python 3", 247 | "language": "python", 248 | "name": "python3" 249 | }, 250 | "language_info": { 251 | "codemirror_mode": { 252 | "name": "ipython", 253 | "version": 3 254 | }, 255 | "file_extension": ".py", 256 | "mimetype": "text/x-python", 257 | "name": "python", 258 | "nbconvert_exporter": "python", 259 | "pygments_lexer": "ipython3", 260 | "version": "3.6.7" 261 | } 262 | }, 263 | "nbformat": 4, 264 | "nbformat_minor": 2 265 | } 266 | -------------------------------------------------------------------------------- /rllib_exercises/serving/data_large.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/serving/data_large.gz -------------------------------------------------------------------------------- /rllib_exercises/serving/data_small.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/serving/data_small.gz -------------------------------------------------------------------------------- /rllib_exercises/serving/do_rollouts.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import json 6 | import argparse 7 | import gym 8 | 9 | from ray.rllib.utils.policy_client import PolicyClient 10 | 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument( 13 | "--no-train", action="store_true", help="Whether to disable training.") 14 | parser.add_argument( 15 | "--off-policy", 16 | action="store_true", 17 | help="Whether to take random instead of on-policy actions.") 18 | 19 | 20 | if __name__ == "__main__": 21 | args = parser.parse_args() 22 | import pong_py 23 | env = pong_py.PongJSEnv() 24 | client = PolicyClient("http://localhost:8900") 25 | 26 | eid = client.start_episode(training_enabled=not args.no_train) 27 | obs = env.reset() 28 | rewards = 0 29 | episode = [] 30 | f = open("out.txt", "w") 31 | 32 | while True: 33 | if args.off_policy: 34 | action = env.action_space.sample() 35 | client.log_action(eid, obs, action) 36 | else: 37 | action = client.get_action(eid, obs) 38 | next_obs, reward, done, info = env.step(action) 39 | episode.append({ 40 | "obs": obs.tolist(), 41 | "action": float(action), 42 | "reward": reward, 43 | }) 44 | obs = next_obs 45 | rewards += reward 46 | client.log_returns(eid, reward, info=info) 47 | if done: 48 | print("Total reward:", rewards) 49 | f.write(json.dumps(episode)) 50 | f.write("\n") 51 | f.flush() 52 | rewards = 0 53 | client.end_episode(eid, obs) 54 | obs = env.reset() 55 | eid = client.start_episode(training_enabled=not args.no_train) 56 | -------------------------------------------------------------------------------- /rllib_exercises/serving/javascript-pong/static/images/press1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/serving/javascript-pong/static/images/press1.png -------------------------------------------------------------------------------- /rllib_exercises/serving/javascript-pong/static/images/press2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/serving/javascript-pong/static/images/press2.png -------------------------------------------------------------------------------- /rllib_exercises/serving/javascript-pong/static/images/winner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/serving/javascript-pong/static/images/winner.png -------------------------------------------------------------------------------- /rllib_exercises/serving/javascript-pong/static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Pong! 5 | 6 | 7 | 8 | 9 | 10 | 11 | 49 | 50 | 51 |
52 | Sorry, this example cannot be run because your browser does not support the <canvas> element 53 |
54 |
55 | 56 | 57 | 58 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /rllib_exercises/serving/javascript-pong/static/pong.css: -------------------------------------------------------------------------------- 1 | body { background-color: black; color: #AAA; font-size: 12pt; padding: 1em; } 2 | 3 | #unsupported { border: 1px solid yellow; color: black; background-color: #FFFFAD; padding: 2em; margin: 1em; display: inline-block; } 4 | 5 | #sidebar { width: 18em; height: 40em; float: left; font-size: 0.825em; background-color: #333; border: 1px solid white; padding: 1em; } 6 | #sidebar h2 { color: white; text-align: center; margin: 0; } 7 | #sidebar .parts { padding-left: 1em; list-style-type: none; margin-bottom: 2em; text-align: right; } 8 | #sidebar .parts li a { color: white; text-decoration: none; } 9 | #sidebar .parts li a:visited { color: white; } 10 | #sidebar .parts li a:hover { color: white; text-decoration: underline; } 11 | #sidebar .parts li a.selected { color: #F08010; } 12 | #sidebar .parts li a i { color: #AAA; } 13 | #sidebar .parts li a.selected i { color: #F08010; } 14 | #sidebar .settings { line-height: 1.2em; height: 1.2em; text-align: right; } 15 | #sidebar .settings.size { } 16 | #sidebar .settings.speed { margin-bottom: 1em; } 17 | #sidebar .settings label { vertical-align: middle; } 18 | #sidebar .settings input { vertical-align: middle; } 19 | #sidebar .settings select { vertical-align: middle; } 20 | #sidebar .description { margin-bottom: 2em; } 21 | #sidebar .description b { font-weight: normal; color: #FFF; } 22 | 23 | 24 | @media screen and (min-width: 0px) { 25 | #sidebar { display: none; } 26 | #game { display: block; width: 480px; height: 360px; margin: 0 auto; } 27 | } 28 | 29 | @media screen and (min-width: 800px) { 30 | #game { width: 640px; height: 480px; } 31 | } 32 | 33 | @media screen and (min-width: 1000px) { 34 | #sidebar { display: block; } 35 | #game { margin-left: 18em; } 36 | } 37 | 38 | @media screen and (min-width: 1200px) { 39 | #game { width: 800px; height: 600px; } 40 | } 41 | 42 | @media screen and (min-width: 1600px) { 43 | #game { width: 1024px; height: 768px; } 44 | } 45 | -------------------------------------------------------------------------------- /rllib_exercises/serving/javascript-pong/static/sounds/goal.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/serving/javascript-pong/static/sounds/goal.wav -------------------------------------------------------------------------------- /rllib_exercises/serving/javascript-pong/static/sounds/ping.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/serving/javascript-pong/static/sounds/ping.wav -------------------------------------------------------------------------------- /rllib_exercises/serving/javascript-pong/static/sounds/pong.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/serving/javascript-pong/static/sounds/pong.wav -------------------------------------------------------------------------------- /rllib_exercises/serving/javascript-pong/static/sounds/wall.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/serving/javascript-pong/static/sounds/wall.wav -------------------------------------------------------------------------------- /rllib_exercises/serving/pong_py/pong_py.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: pong-py 3 | Version: 0.0.0 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | Author: UNKNOWN 7 | Author-email: UNKNOWN 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /rllib_exercises/serving/pong_py/pong_py.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | setup.py 2 | pong_py/__init__.py 3 | pong_py/ball.py 4 | pong_py/helper.py 5 | pong_py/paddle.py 6 | pong_py/pongjsenv.py 7 | pong_py.egg-info/PKG-INFO 8 | pong_py.egg-info/SOURCES.txt 9 | pong_py.egg-info/dependency_links.txt 10 | pong_py.egg-info/top_level.txt -------------------------------------------------------------------------------- /rllib_exercises/serving/pong_py/pong_py.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rllib_exercises/serving/pong_py/pong_py.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | pong_py 2 | -------------------------------------------------------------------------------- /rllib_exercises/serving/pong_py/pong_py/__init__.py: -------------------------------------------------------------------------------- 1 | from pong_py.pongjsenv import PongJSEnv 2 | -------------------------------------------------------------------------------- /rllib_exercises/serving/pong_py/pong_py/ball.py: -------------------------------------------------------------------------------- 1 | import pong_py.helper as helper 2 | import random 3 | 4 | class Ball(): 5 | def __init__(self, pong): 6 | self.radius = 5 7 | self.dt = pong.dt 8 | self.minX = self.radius; 9 | self.maxX = pong.width - self.radius 10 | self.minY = pong.wall_width + self.radius 11 | self.maxY = pong.height - pong.wall_width - self.radius 12 | self.speed = (self.maxX - self.minX) / 4; 13 | self.accel = 8; 14 | self.dx = 0 15 | self.dy = 0 16 | 17 | def set_position(self, x, y): 18 | self.x_prev = x if not hasattr(self, "x") else self.x 19 | self.y_prev = y if not hasattr(self, "y") else self.y 20 | 21 | self.x = x 22 | self.y = y 23 | self.left = self.x - self.radius 24 | self.top = self.y - self.radius 25 | self.right = self.x + self.radius 26 | self.bottom = self.y + self.radius 27 | 28 | def set_direction(self, dx, dy): 29 | self.dx = dx 30 | self.dy = dy 31 | 32 | def update(self, left_pad, right_pad): 33 | 34 | pos = helper.accelerate(self.x, self.y, 35 | self.dx, self.dy, 36 | self.accel, self.dt); 37 | 38 | if ((pos.dy > 0) and (pos.y > self.maxY)): 39 | pos.y = self.maxY 40 | pos.dy = -pos.dy 41 | elif ((pos.dy < 0) and (pos.y < self.minY)): 42 | pos.y = self.minY 43 | pos.dy = -pos.dy 44 | 45 | paddle = left_pad if (pos.dx < 0) else right_pad; 46 | pt = helper.ballIntercept(self, paddle, pos.nx, pos.ny); 47 | 48 | if pt: 49 | if pt.d == 'left' or pt.d == 'right': 50 | pos.x = pt.x 51 | pos.dx = -pos.dx 52 | elif pt.d == 'top' or pt.d == 'bottom': 53 | pos.y = pt.y 54 | pos.dy = -pos.dy 55 | 56 | if paddle.up: 57 | pos.dy = pos.dy * (0.5 if pos.dy < 0 else 1.5) 58 | elif paddle.down: 59 | pos.dy = pos.dy * (0.5 if pos.dy > 0 else 1.5) 60 | 61 | self.set_position(pos.x, pos.y) 62 | self.set_direction(pos.dx, pos.dy) 63 | 64 | def reset(self, playerNo): 65 | self.set_position((self.maxX + self.minX) / 2, random.uniform(self.minY, self.maxY)) 66 | self.set_direction(self.speed if playerNo == 1 else -self.speed, self.speed) 67 | -------------------------------------------------------------------------------- /rllib_exercises/serving/pong_py/pong_py/helper.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | Position = namedtuple("Position", ["nx", "ny", "x", "y", "dx", "dy"]) 4 | Intercept = namedtuple("Intercept", ["x", "y", "d"]) 5 | Rectangle = namedtuple("Rectangle", ["left", "right", "top", "bottom"]) 6 | 7 | class Position(): 8 | def __init__(self, nx, ny, x, y, dx, dy): 9 | self.nx = nx 10 | self.ny = ny 11 | self.x = x 12 | self.y = y 13 | self.dx = dx 14 | self.dy = dy 15 | 16 | class Intercept(): 17 | def __init__(self, x, y, d): 18 | self.x = x 19 | self.y = y 20 | self.d = d 21 | 22 | class Rectangle(): 23 | def __init__(self, left, right, top, bottom): 24 | self.left = left 25 | self.right = right 26 | self.top = top 27 | self.bottom = bottom 28 | 29 | def accelerate(x, y, dx, dy, accel, dt): 30 | x2 = x + (dt * dx) + (accel * dt * dt * 0.5); 31 | y2 = y + (dt * dy) + (accel * dt * dt * 0.5); 32 | dx2 = dx + (accel * dt) * (1 if dx > 0 else -1); 33 | dy2 = dy + (accel * dt) * (1 if dy > 0 else -1); 34 | return Position((x2-x), (y2-y), x2, y2, dx2, dy2 ) 35 | 36 | 37 | def intercept(x1, y1, x2, y2, x3, y3, x4, y4, d): 38 | denom = ((y4-y3) * (x2-x1)) - ((x4-x3) * (y2-y1)) 39 | if (denom != 0): 40 | ua = (((x4-x3) * (y1-y3)) - ((y4-y3) * (x1-x3))) / denom 41 | if ((ua >= 0) and (ua <= 1)): 42 | ub = (((x2-x1) * (y1-y3)) - ((y2-y1) * (x1-x3))) / denom 43 | if ((ub >= 0) and (ub <= 1)): 44 | x = x1 + (ua * (x2-x1)) 45 | y = y1 + (ua * (y2-y1)) 46 | return Intercept(x, y, d) 47 | 48 | 49 | def ballIntercept(ball, rect, nx, ny): 50 | pt = None 51 | if (nx < 0): 52 | pt = intercept(ball.x, ball.y, ball.x + nx, ball.y + ny, 53 | rect.right + ball.radius, 54 | rect.top - ball.radius, 55 | rect.right + ball.radius, 56 | rect.bottom + ball.radius, 57 | "right"); 58 | elif (nx > 0): 59 | pt = intercept(ball.x, ball.y, ball.x + nx, ball.y + ny, 60 | rect.left - ball.radius, 61 | rect.top - ball.radius, 62 | rect.left - ball.radius, 63 | rect.bottom + ball.radius, 64 | "left") 65 | 66 | if (not pt): 67 | if (ny < 0): 68 | pt = intercept(ball.x, ball.y, ball.x + nx, ball.y + ny, 69 | rect.left - ball.radius, 70 | rect.bottom + ball.radius, 71 | rect.right + ball.radius, 72 | rect.bottom + ball.radius, 73 | "bottom"); 74 | elif (ny > 0): 75 | pt = intercept(ball.x, ball.y, ball.x + nx, ball.y + ny, 76 | rect.left - ball.radius, 77 | rect.top - ball.radius, 78 | rect.right + ball.radius, 79 | rect.top - ball.radius, 80 | "top"); 81 | return pt -------------------------------------------------------------------------------- /rllib_exercises/serving/pong_py/pong_py/paddle.py: -------------------------------------------------------------------------------- 1 | import random 2 | import pong_py.helper as helper 3 | from pong_py.helper import Rectangle 4 | 5 | class Paddle(): 6 | STOP = 0 7 | DOWN = 1 8 | UP = 2 9 | 10 | 11 | def __init__(self, rhs, pong): 12 | self.pid = rhs 13 | self.width = 12 14 | self.height = 60 15 | self.dt = pong.dt 16 | self.minY = pong.wall_width 17 | self.maxY = pong.height - pong.wall_width - self.height 18 | self.speed = (self.maxY - self.minY) / 2 19 | self.ai_reaction = 0.1 20 | self.ai_error = 120 21 | self.pong = pong 22 | self.set_direction(0) 23 | self.set_position(pong.width - self.width if rhs else 0, 24 | self.minY + (self.maxY - self.minY) / 2) 25 | self.prediction = None 26 | self.ai_prev_action = 0 27 | 28 | def set_position(self, x, y): 29 | self.x = x 30 | self.y = y 31 | self.left = self.x 32 | self.right = self.left + self.width 33 | self.top = self.y 34 | self.bottom = self.y + self.height 35 | 36 | def set_direction(self, dy): 37 | # Needed for spin calculation 38 | self.up = -dy if dy < 0 else 0 39 | self.down = dy if dy > 0 else 0 40 | 41 | def step(self, action): 42 | if action == self.STOP: 43 | self.stopMovingDown() 44 | self.stopMovingUp() 45 | elif action == self.DOWN: 46 | self.moveDown() 47 | elif action == self.UP: 48 | self.moveUp() 49 | amt = self.down - self.up 50 | if amt != 0: 51 | y = self.y + (amt * self.dt * self.speed) 52 | if y < self.minY: 53 | y = self.minY 54 | elif y > self.maxY: 55 | y = self.maxY 56 | self.set_position(self.x, y) 57 | 58 | def predict(self, ball, dt): 59 | # only re-predict if the ball changed direction, or its been some amount of time since last prediction 60 | if (self.prediction and ((self.prediction.dx * ball.dx) > 0) and 61 | ((self.prediction.dy * ball.dy) > 0) and 62 | (self.prediction.since < self.ai_reaction)): 63 | self.prediction.since += dt 64 | return 65 | 66 | rect = Rectangle(self.left, self.right, -10000, 10000) 67 | pt = helper.ballIntercept(ball, rect, ball.dx * 10, ball.dy * 10) 68 | 69 | if (pt): 70 | t = self.minY + ball.radius 71 | b = self.maxY + self.height - ball.radius 72 | 73 | while ((pt.y < t) or (pt.y > b)): 74 | if (pt.y < t): 75 | pt.y = t + (t - pt.y) 76 | elif (pt.y > b): 77 | pt.y = t + (b - t) - (pt.y - b) 78 | self.prediction = pt 79 | else: 80 | self.prediction = None 81 | 82 | if self.prediction: 83 | self.prediction.since = 0 84 | self.prediction.dx = ball.dx 85 | self.prediction.dy = ball.dy 86 | self.prediction.radius = ball.radius 87 | self.prediction.exactX = self.prediction.x 88 | self.prediction.exactY = self.prediction.y 89 | closeness = (ball.x - self.right if ball.dx < 0 else self.left - ball.x) / self.pong.width 90 | error = self.ai_error * closeness 91 | self.prediction.y = self.prediction.y + random.uniform(-error, error) 92 | 93 | def ai_step(self, ball): 94 | 95 | if (((ball.x < self.left) and (ball.dx < 0)) or 96 | ((ball.x > self.right) and (ball.dx > 0))): 97 | self.stopMovingUp() 98 | self.stopMovingDown() 99 | return 100 | 101 | self.predict(ball, self.dt) 102 | action = self.ai_prev_action 103 | 104 | if (self.prediction): 105 | # print('prediction') 106 | if (self.prediction.y < (self.top + self.height/2 - 5)): 107 | action = self.UP 108 | # print("moved up") 109 | elif (self.prediction.y > (self.bottom - self.height/2 + 5)): 110 | action = self.DOWN 111 | # print("moved down") 112 | 113 | else: 114 | action = self.STOP 115 | # print("nothing") 116 | self.ai_prev_action = action 117 | return self.step(action) 118 | 119 | def moveUp(self): 120 | self.down = 0 121 | self.up = 1 122 | 123 | def moveDown(self): 124 | self.down = 1 125 | self.up = 0 126 | 127 | def stopMovingDown(self): 128 | self.down = 0 129 | 130 | def stopMovingUp(self): 131 | self.up = 0 132 | -------------------------------------------------------------------------------- /rllib_exercises/serving/pong_py/pong_py/pongjsenv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | import gym.spaces 5 | 6 | from pong_py.ball import Ball 7 | from pong_py.paddle import Paddle 8 | 9 | 10 | def transform_state(state): 11 | return state / 500 12 | 13 | 14 | class PongJS(object): 15 | # MDP to 16 | def __init__(self): 17 | self.width = 640 18 | self.height = 480 19 | self.wall_width = 12 20 | self.dt = 0.05 # seconds 21 | #self.dt = 0.01 # seconds 22 | self.left_pad = Paddle(0, self) 23 | self.right_pad = Paddle(1, self) 24 | self.ball = Ball(self) 25 | 26 | def step(self, action): 27 | # do logic for self 28 | self.left_pad.step(action) 29 | self.right_pad.ai_step(self.ball) 30 | 31 | self.ball.update(self.left_pad, self.right_pad) 32 | term, reward = self.terminate() 33 | if term: 34 | self.reset(0 if reward == 1 else 1) 35 | state = self.get_state() 36 | return state, reward, term 37 | 38 | def init(self): 39 | self.reset(0) 40 | 41 | def terminate(self): 42 | if self.ball.left > self.width: 43 | return True, 1 44 | elif self.ball.right < 0: 45 | return True, -1 46 | else: 47 | return False, 0 48 | 49 | def get_state(self): 50 | return np.array([self.left_pad.y, 0, 51 | self.ball.x, self.ball.y, 52 | self.ball.dx, self.ball.dy, 53 | self.ball.x_prev, self.ball.y_prev]) 54 | 55 | def reset(self, player): 56 | self.ball.reset(player) 57 | 58 | 59 | class PongJSEnv(gym.Env): 60 | def __init__(self): 61 | self.env = PongJS() 62 | self.action_space = gym.spaces.Discrete(3) 63 | self.observation_space = gym.spaces.box.Box(low=0, high=1, shape=(8,)) 64 | 65 | @property 66 | def right_pad(self): 67 | return self.env.right_pad 68 | 69 | @property 70 | def left_pad(self): 71 | return self.env.left_pad 72 | 73 | def reset(self): 74 | self.env.init() 75 | return transform_state(self.env.get_state()) 76 | 77 | def step(self, action): 78 | state, reward, done = self.env.step(action) 79 | return transform_state(state), 1, done, {} 80 | #return state, reward, done, {} 81 | -------------------------------------------------------------------------------- /rllib_exercises/serving/pong_py/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages, Distribution 2 | 3 | setup(name='pong_py', 4 | packages=find_packages()) 5 | -------------------------------------------------------------------------------- /rllib_exercises/serving/pong_web_server.py: -------------------------------------------------------------------------------- 1 | import cgi 2 | from http.server import BaseHTTPRequestHandler, HTTPServer 3 | import json 4 | import requests 5 | import socketserver 6 | import subprocess 7 | import threading 8 | 9 | from ray.rllib.utils.policy_client import PolicyClient 10 | 11 | 12 | # Check that the required port isn't already in use. 13 | try: 14 | requests.get('http://localhost:3000') 15 | except: 16 | pass 17 | else: 18 | raise Exception('The port 3000 is still in use (perhaps from a previous run of this notebook. ' 19 | 'You will need to kill that process before proceeding, e.g., by running ' 20 | '"subprocess.call([\'ray\', \'stop\'])" in a new cell and restarting this notebook.') 21 | 22 | 23 | client = PolicyClient("http://localhost:8900") 24 | 25 | 26 | def make_handler_class(agent): 27 | """This function is used to define a custom handler using the policy.""" 28 | 29 | class PolicyHandler(BaseHTTPRequestHandler): 30 | def __init__(self, *args, **kwargs): 31 | BaseHTTPRequestHandler.__init__(self, *args, **kwargs) 32 | 33 | def end_headers(self): 34 | self.send_header('Access-Control-Allow-Origin', '*') 35 | self.send_header('Access-Control-Allow-Methods', '*') 36 | self.send_header('Access-Control-Allow-Headers', 'Content-Type') 37 | BaseHTTPRequestHandler.end_headers(self) 38 | 39 | def do_OPTIONS(self): 40 | self.send_response(200, 'ok') 41 | self.end_headers() 42 | 43 | def do_POST(self): 44 | """This method receives the state of the game and returns an action.""" 45 | length = int(self.headers.get_all('content-length')[0]) 46 | post_body = cgi.parse_qs(self.rfile.read(length), keep_blank_values=1) 47 | print("Processing request", post_body) 48 | req = json.loads(list(post_body.keys())[0].decode("utf-8")) 49 | if "command" in req: 50 | if req["command"] == "start_episode": 51 | resp = client.start_episode(training_enabled=False) 52 | elif req["command"] == "end_episode": 53 | resp = client.end_episode(req["episode_id"], [0] * 8) 54 | elif req["command"] == "log_returns": 55 | if req["playerNo"] == 0: 56 | client.log_returns(req["episode_id"], req["reward"]) 57 | resp = "OK" 58 | else: 59 | raise ValueError("Unknown command") 60 | else: 61 | action = client.get_action(req["episode_id"], req["observation"]) 62 | resp = {"output": int(action)} 63 | 64 | self.send_response(200) 65 | self.send_header('Content-type', 'json') 66 | self.end_headers() 67 | 68 | self.wfile.write(json.dumps(resp).encode('ascii')) 69 | 70 | return PolicyHandler 71 | 72 | 73 | if __name__ == "__main__": 74 | handler = make_handler_class(None) 75 | httpd = HTTPServer(('', 3000), handler) 76 | print("Starting web server on port 3000.") 77 | httpd.serve_forever() 78 | -------------------------------------------------------------------------------- /rllib_exercises/serving/simple_policy_server.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import os 7 | 8 | from gym import spaces 9 | import numpy as np 10 | 11 | import ray 12 | from ray.rllib.agents.dqn import DQNAgent 13 | from ray.rllib.agents.pg import PGAgent 14 | from ray.rllib.env.serving_env import ServingEnv 15 | from ray.rllib.utils.policy_server import PolicyServer 16 | from ray.tune.logger import pretty_print 17 | from ray.tune.registry import register_env 18 | 19 | SERVER_ADDRESS = "localhost" 20 | SERVER_PORT = 8900 21 | 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument("--action-size", type=int, required=True) 24 | parser.add_argument("--observation-size", type=int, required=True) 25 | parser.add_argument("--checkpoint-file", type=str, required=True) 26 | parser.add_argument("--run", type=str, required=True) 27 | 28 | 29 | class SimpleServing(ServingEnv): 30 | def __init__(self, config): 31 | ServingEnv.__init__( 32 | self, spaces.Discrete(config["action_size"]), 33 | spaces.Box( 34 | low=-10, high=10, 35 | shape=(config["observation_size"],), 36 | dtype=np.float32)) 37 | 38 | def run(self): 39 | print("Starting policy server at {}:{}".format(SERVER_ADDRESS, 40 | SERVER_PORT)) 41 | server = PolicyServer(self, SERVER_ADDRESS, SERVER_PORT) 42 | server.serve_forever() 43 | 44 | 45 | if __name__ == "__main__": 46 | args = parser.parse_args() 47 | ray.init() 48 | register_env("srv", lambda config: SimpleServing(config)) 49 | 50 | if args.run == "DQN": 51 | agent = DQNAgent( 52 | env="srv", 53 | config={ 54 | # Use a single process to avoid needing a load balancer 55 | "num_workers": 0, 56 | # Configure the agent to run short iterations for debugging 57 | "exploration_fraction": 0.01, 58 | "learning_starts": 100, 59 | "timesteps_per_iteration": 200, 60 | "env_config": { 61 | "observation_size": args.observation_size, 62 | "action_size": args.action_size, 63 | }, 64 | }) 65 | elif args.run == "PG": 66 | agent = PGAgent( 67 | env="srv", 68 | config={ 69 | "num_workers": 0, 70 | "env_config": { 71 | "observation_size": args.observation_size, 72 | "action_size": args.action_size, 73 | }, 74 | }) 75 | 76 | # Attempt to restore from checkpoint if possible. 77 | if os.path.exists(args.checkpoint_file): 78 | checkpoint_file = open(args.checkpoint_file).read() 79 | print("Restoring from checkpoint path", checkpoint_file) 80 | agent.restore(checkpoint_file) 81 | 82 | # Serving and training loop 83 | while True: 84 | print(pretty_print(agent.train())) 85 | checkpoint_file = agent.save() 86 | print("Last checkpoint", checkpoint_file) 87 | with open(args.checkpoint_file, "w") as f: 88 | f.write(checkpoint_file) 89 | -------------------------------------------------------------------------------- /rllib_exercises/test_exercises.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | 6 | def test_chain_env_spaces(chain_env_cls): 7 | test_env = chain_env_cls(dict(n=6)) 8 | print("Testing if spaces have been setup correctly...") 9 | assert test_env.action_space is not None, "Action Space not implemented!" 10 | assert test_env.observation_space is not None, "Observation Space not implemented!" 11 | assert not test_env.action_space.contains(2), "Action Space is only [0, 1]" 12 | assert test_env.action_space.contains( 13 | 1), "Action Space does not contain 1." 14 | assert not test_env.observation_space.contains( 15 | 6), "Observation Space is only [0..5]" 16 | assert test_env.observation_space.contains( 17 | 5), "Observation Space is only [0..5]" 18 | print("Success! You've setup the spaces correctly.") 19 | 20 | 21 | def test_chain_env_reward(chain_env_cls): 22 | test_env = chain_env_cls(dict(n=6)) 23 | print("Testing if reward has been setup correctly...") 24 | test_env.reset() 25 | assert test_env.step(1)[1] == test_env.small_reward 26 | assert test_env.state == 0 27 | assert test_env.step(0)[1] == 0 28 | assert test_env.state == 1 29 | test_env.reset() 30 | total_reward = 0 31 | for i in range(test_env.n - 1): 32 | total_reward += test_env.step(0)[1] 33 | assert total_reward == 0, "Expected {} reward; got {}".format( 34 | 0, total_reward) 35 | for i in range(3): 36 | assert test_env.step(0)[1] == test_env.large_reward 37 | assert test_env.step(1)[1] == test_env.small_reward 38 | print("Success! You've setup the rewards correctly.") 39 | 40 | 41 | def test_chain_env_behavior(chain_env_cls): 42 | test_env = chain_env_cls(dict(n=6)) 43 | print("Testing if behavior has been changed...") 44 | test_env.reset() 45 | assert test_env.state == 0 46 | test_env.step(1) 47 | assert test_env.state == 0 48 | test_env.step(0) 49 | assert test_env.state == 1 50 | test_env.reset() 51 | assert test_env.state == 0 52 | for i in range(1, test_env.n): 53 | test_env.step(0) 54 | assert test_env.state == i 55 | test_env.step(0) 56 | assert test_env.state == test_env.n - 1 57 | test_env.step(1) 58 | assert test_env.state == 0 59 | print("Success! Behavior of environment is correct.") 60 | -------------------------------------------------------------------------------- /rllib_exercises/web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/rllib_exercises/web.png -------------------------------------------------------------------------------- /tune_exercises/README.rst: -------------------------------------------------------------------------------- 1 | Tune Tutorial 2 | ------------- 3 | 4 | .. raw:: html 5 | 6 | 7 | 8 | 9 | Tuning hyperparameters is often the most expensive part of the machine learning workflow. Tune is built to address this, demonstrating an efficient and scalable solution for this pain point. 10 | 11 | **Code**: https://github.com/ray-project/ray/tree/master/python/ray/tune 12 | 13 | **Examples**: https://github.com/ray-project/ray/tree/master/python/ray/tune/examples 14 | 15 | **Documentation**: http://ray.readthedocs.io/en/latest/tune.html 16 | 17 | **Mailing List** https://groups.google.com/forum/#!forum/ray-dev 18 | 19 | 20 | Notebooks 21 | --------- 22 | 23 | `Exercise 1 `_ covers basics of using Tune - creating your first training function and using Tune. This tutorial uses Keras. 24 | 25 | .. raw:: html 26 | 27 | 28 | Tune Tutorial 29 | 30 | 31 | `Exercise 2 `_ covers Search algorithms and Trial Schedulers. This tutorial uses PyTorch. 32 | 33 | .. raw:: html 34 | 35 | 36 | Tune Tutorial 37 | 38 | 39 | `Exercise 3 `_ covers using Population-Based Training and uses the advanced Trainable API with save and restore functions and checkpointing. 40 | 41 | .. raw:: html 42 | 43 | 44 | Tune Tutorial 45 | 46 | 47 | 48 | Please open an issue if you have any questions or identify any issues. All suggestions and contributions welcome! 49 | -------------------------------------------------------------------------------- /tune_exercises/_old_tutorial/cnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/tune_exercises/_old_tutorial/cnn.png -------------------------------------------------------------------------------- /tune_exercises/_old_tutorial/helper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import scipy.ndimage as ndimage 4 | import itertools 5 | import logging 6 | import sys 7 | import keras 8 | from keras.datasets import mnist 9 | from keras.preprocessing.image import ImageDataGenerator 10 | from keras import backend as K 11 | 12 | 13 | def limit_threads(num_threads): 14 | K.set_session( 15 | K.tf.Session( 16 | config=K.tf.ConfigProto( 17 | intra_op_parallelism_threads=num_threads, 18 | inter_op_parallelism_threads=num_threads))) 19 | 20 | 21 | def get_best_trial(trial_list, metric): 22 | """Retrieve the best trial.""" 23 | return max(trial_list, key=lambda trial: trial.last_result.get(metric, 0)) 24 | 25 | 26 | def get_sorted_trials(trial_list, metric): 27 | return sorted( 28 | trial_list, 29 | key=lambda trial: trial.last_result.get(metric, 0), 30 | reverse=True) 31 | 32 | 33 | def get_best_result(trial_list, metric): 34 | """Retrieve the last result from the best trial.""" 35 | return {metric: get_best_trial(trial_list, metric).last_result[metric]} 36 | 37 | 38 | def get_best_model(model_creator, trial_list, metric): 39 | """Restore a model from the best trial.""" 40 | sorted_trials = get_sorted_trials(trial_list, metric) 41 | for best_trial in sorted_trials: 42 | try: 43 | print("Creating model...") 44 | model = model_creator(**best_trial.config) 45 | weights = os.path.join(best_trial.logdir, 46 | best_trial.last_result["checkpoint"]) 47 | print("Loading from", weights) 48 | model.load_weights(weights) 49 | break 50 | except Exception as e: 51 | print(e) 52 | print("Loading failed. Trying next model") 53 | return model 54 | 55 | 56 | class TuneCallback(keras.callbacks.Callback): 57 | """Custom Callback for Tune.""" 58 | 59 | def __init__(self, reporter): 60 | super(TuneCallback, self).__init__() 61 | self.reporter = reporter 62 | self.top_acc = -1 63 | self.last_10_results = [] 64 | 65 | def on_batch_end(self, batch, logs={}): 66 | """Reports the last result""" 67 | curr_acc = logs["acc"] 68 | if curr_acc > self.top_acc: 69 | self.top_acc = curr_acc 70 | self.model.save_weights("weights_tune_tmp.h5") 71 | os.rename("weights_tune_tmp.h5", "weights_tune.h5") 72 | 73 | if len(self.last_10_results) >= 5: 74 | self.last_10_results = self.last_10_results[1:] 75 | self.last_10_results += [logs["acc"]] 76 | 77 | self.reporter( 78 | mean_accuracy=np.mean(self.last_10_results), 79 | checkpoint="weights_tune.h5") 80 | 81 | 82 | class GoodError(Exception): 83 | pass 84 | 85 | 86 | def test_reporter(train_mnist_tune): 87 | def mock_reporter(**kwargs): 88 | assert "mean_accuracy" in kwargs, "Did not report proper metric" 89 | assert "checkpoint" in kwargs, "Accidentally removed `checkpoint`?" 90 | raise GoodError("This works.") 91 | 92 | try: 93 | train_mnist_tune({}, mock_reporter) 94 | except TypeError as e: 95 | print("Forgot to modify function signature?") 96 | raise e 97 | except GoodError: 98 | print("Works!") 99 | return 1 100 | raise Exception("Didn't call reporter...") 101 | 102 | 103 | def prepare_data(data): 104 | try: 105 | new_data = np.array(data).reshape((1, 28, 28, 1)).astype(np.float32) 106 | except ValueError as e: 107 | print("Try running this notebook in `jupyter notebook`.") 108 | raise e 109 | return ndimage.gaussian_filter(new_data, sigma=(0.5)) 110 | -------------------------------------------------------------------------------- /tune_exercises/_old_tutorial/mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/tune_exercises/_old_tutorial/mnist.png -------------------------------------------------------------------------------- /tune_exercises/_old_tutorial/model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import itertools 3 | 4 | import keras 5 | from keras.datasets import mnist 6 | from keras.preprocessing.image import ImageDataGenerator 7 | from keras import backend as K 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Flatten 10 | from keras.layers import Conv2D, MaxPooling2D 11 | from keras.preprocessing.image import ImageDataGenerator 12 | 13 | 14 | def load_data(generator=False, data_size=10000, num_batches=600): 15 | num_classes = 10 16 | 17 | # input image dimensions 18 | img_rows, img_cols = 28, 28 19 | 20 | # the data, split between train and test sets 21 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 22 | 23 | if K.image_data_format() == 'channels_first': 24 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) 25 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) 26 | input_shape = (1, img_rows, img_cols) 27 | else: 28 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) 29 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) 30 | input_shape = (img_rows, img_cols, 1) 31 | 32 | x_train = x_train.astype('float32') 33 | x_test = x_test.astype('float32') 34 | x_train /= 255 35 | x_test /= 255 36 | 37 | def shuffled(x, y): 38 | idx = np.r_[:x.shape[0]] 39 | np.random.shuffle(idx) 40 | return x[idx], y[idx] 41 | 42 | x_train, y_train = shuffled(x_train, y_train) 43 | x_train = x_train[:data_size] 44 | y_train = y_train[:data_size] 45 | x_test, y_test = shuffled(x_test, y_test) 46 | 47 | # convert class vectors to binary class matrices 48 | y_train = keras.utils.to_categorical(y_train, num_classes) 49 | y_test = keras.utils.to_categorical(y_test, num_classes) 50 | if generator: 51 | datagen = ImageDataGenerator() 52 | return itertools.islice(datagen.flow(x_train, y_train), num_batches) 53 | return x_train, y_train, x_test, y_test 54 | 55 | 56 | def make_model(lr=0.01, layer_size=128): 57 | """Create a Convolutional Nueral Network using Keras.""" 58 | num_classes = 10 59 | 60 | model = Sequential() 61 | model = Sequential() 62 | model.add( 63 | Conv2D( 64 | 32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 65 | 1))) 66 | model.add(Conv2D(64, (3, 3), activation='relu')) 67 | model.add(MaxPooling2D(pool_size=(2, 2))) 68 | model.add(Dropout(0.25)) 69 | model.add(Flatten()) 70 | model.add(Dense(128, activation='relu')) 71 | model.add(Dropout(0.5)) 72 | model.add(Dense(num_classes, activation='softmax')) 73 | 74 | model.compile( 75 | loss=keras.losses.categorical_crossentropy, 76 | optimizer=keras.optimizers.rmsprop(lr=lr, decay=1e-6), 77 | # keras.optimizers.SGD( 78 | # lr=lr, momentum=momentum), 79 | metrics=['accuracy']) 80 | return model 81 | 82 | 83 | def evaluate(model, validation=True): 84 | x_train, y_train, x_test, y_test = load_data(generator=False) 85 | data = x_test if validation else x_train 86 | labels = y_test if validation else y_train 87 | 88 | res = model.evaluate(data, labels) 89 | return dict(zip(model.metrics_names, res)) 90 | -------------------------------------------------------------------------------- /tune_exercises/_old_tutorial/tune.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/tune_exercises/_old_tutorial/tune.png -------------------------------------------------------------------------------- /tune_exercises/exercise_3_pbt.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Tutorial: Population-Based Training" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### In this tutorial, we'll show you how to leverage Population-based Training.\n", 15 | "\n", 16 | "\"PBT\"\n", 17 | "\n", 18 | "Tune is a scalable framework for model training and hyperparameter search with a focus on deep learning and deep reinforcement learning.\n", 19 | "\n", 20 | "* **Code**: https://github.com/ray-project/ray/tree/master/python/ray/tune \n", 21 | "* **Examples**: https://github.com/ray-project/ray/tree/master/python/ray/tune/examples\n", 22 | "* **Documentation**: http://ray.readthedocs.io/en/latest/tune.html\n", 23 | "* **Mailing List** https://groups.google.com/forum/#!forum/ray-dev" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "## If you are running on Google Colab, uncomment below to install the necessary dependencies \n", 33 | "## before beginning the exercise.\n", 34 | "\n", 35 | "# print(\"Setting up colab environment\")\n", 36 | "# !pip uninstall -y -q pyarrow\n", 37 | "# !pip install -q https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.8.0.dev5-cp36-cp36m-manylinux1_x86_64.whl\n", 38 | "# !pip install -q ray[debug]\n", 39 | "\n", 40 | "# # A hack to force the runtime to restart, needed to include the above dependencies.\n", 41 | "# print(\"Done installing! Restarting via forced crash (this is not an issue).\")\n", 42 | "# import os\n", 43 | "# os._exit(0)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "import tensorflow as tf\n", 53 | "try:\n", 54 | " tf.get_logger().setLevel('INFO')\n", 55 | "except Exception as exc:\n", 56 | " print(exc)\n", 57 | "import warnings\n", 58 | "warnings.simplefilter(\"ignore\")\n", 59 | "\n", 60 | "import os\n", 61 | "import numpy as np\n", 62 | "import torch\n", 63 | "import torch.optim as optim\n", 64 | "from torchvision import datasets\n", 65 | "from ray.tune.examples.mnist_pytorch import train, test, ConvNet, get_data_loaders\n", 66 | "\n", 67 | "import ray\n", 68 | "from ray import tune\n", 69 | "from ray.tune import track\n", 70 | "from ray.tune.schedulers import PopulationBasedTraining\n", 71 | "from ray.tune.utils import validate_save_restore\n", 72 | "\n", 73 | "%matplotlib inline\n", 74 | "import matplotlib.style as style\n", 75 | "import matplotlib.pyplot as plt\n", 76 | "style.use(\"ggplot\")\n", 77 | "\n", 78 | "datasets.MNIST(\"~/data\", train=True, download=True)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "# Setup Trainable\n" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "To utilize the PopulationBasedTraining Scheduler, we will have to use Tune's more extensive Class-based API. \n", 93 | "\n", 94 | "This API will allow Tune to take intermediate actions such as checkpointing and changing the hyperparameters in the middle of training.\n", 95 | "\n", 96 | "``train()`` wraps ``_train()``.\n", 97 | "\n", 98 | "A call to ``train()`` on a trainable will execute one logical iteration of training. As a rule of thumb, the execution time of one train call should be large enough to avoid overheads (i.e. more than a few seconds), but short enough to report progress periodically (i.e. at most a few minutes).\n", 99 | "\n", 100 | "### Instructions:\n", 101 | "\n", 102 | "Add training code under ``_train`` as follows:\n", 103 | "\n", 104 | "```python\n", 105 | " def _train(self):\n", 106 | " train(self.model, self.optimizer, self.train_loader, device=self.device)\n", 107 | " acc = test(self.model, self.test_loader, self.device)\n", 108 | " return {\"mean_accuracy\": acc}\n", 109 | "```" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "class PytorchTrainble(tune.Trainable):\n", 119 | " def _setup(self, config):\n", 120 | " self.device = torch.device(\"cpu\")\n", 121 | " self.train_loader, self.test_loader = get_data_loaders()\n", 122 | " self.model = ConvNet().to(self.device)\n", 123 | " self.optimizer = optim.SGD(\n", 124 | " self.model.parameters(),\n", 125 | " lr=config.get(\"lr\", 0.01),\n", 126 | " momentum=config.get(\"momentum\", 0.9))\n", 127 | "\n", 128 | " def _train(self):\n", 129 | " # TODO: Add training code here.\n", 130 | " return {\"mean_accuracy\": acc}\n", 131 | "\n", 132 | " def _save(self, checkpoint_dir):\n", 133 | " checkpoint_path = os.path.join(checkpoint_dir, \"model.pth\")\n", 134 | " torch.save(self.model.state_dict(), checkpoint_path)\n", 135 | " return checkpoint_path\n", 136 | "\n", 137 | " def _restore(self, checkpoint_path):\n", 138 | " self.model.load_state_dict(torch.load(checkpoint_path))\n", 139 | " \n", 140 | " def reset_config(self, new_config):\n", 141 | " del self.optimizer\n", 142 | " self.optimizer = optim.SGD(\n", 143 | " self.model.parameters(),\n", 144 | " lr=new_config.get(\"lr\", 0.01),\n", 145 | " momentum=new_config.get(\"momentum\", 0.9))\n", 146 | " return True\n", 147 | "\n", 148 | "\n", 149 | "ray.shutdown() # Restart Ray defensively in case the ray connection is lost. \n", 150 | "ray.init(log_to_driver=False)\n", 151 | "\n", 152 | "validate_save_restore(PytorchTrainble)\n", 153 | "validate_save_restore(PytorchTrainble, use_object_store=True)\n", 154 | "print(\"Success!\")" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "# Use population-based training with 2 samples" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "PBT uses information from the rest of the population to refine the hyperparameters and direct computational resources to models which show promise. \n", 169 | "\n", 170 | "In PBT, a worker might copy the model parameters from a better performing worker. It can also explore new hyperparameters by changing the current values randomly (``hyperparam_mutations``).\n", 171 | "\n", 172 | "\n", 173 | "\n", 174 | "As the training of the population of neural networks progresses, this process of exploiting and exploring is performed periodically, ensuring that all the workers in the population have a good base level of performance and also that new hyperparameters are consistently explored. This means that PBT can quickly exploit good hyperparameters, can dedicate more training time to promising models and, crucially, can adapt the hyperparameter values throughout training, leading to automatic learning of the best configurations." 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "scheduler = PopulationBasedTraining(\n", 184 | " time_attr=\"training_iteration\",\n", 185 | " metric=\"mean_accuracy\",\n", 186 | " mode=\"max\",\n", 187 | " perturbation_interval=5,\n", 188 | " hyperparam_mutations={\n", 189 | " # distribution for resampling\n", 190 | " \"lr\": lambda: np.random.uniform(0.0001, 1),\n", 191 | " # allow perturbations within this set of categorical values\n", 192 | " \"momentum\": [0.8, 0.9, 0.99],\n", 193 | " }\n", 194 | ")" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": { 201 | "scrolled": false 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "ray.shutdown() # Restart Ray defensively in case the ray connection is lost. \n", 206 | "ray.init(log_to_driver=False)\n", 207 | "\n", 208 | "\n", 209 | "analysis = tune.run(\n", 210 | " PytorchTrainble,\n", 211 | " name=\"pbt_test\",\n", 212 | " scheduler=scheduler,\n", 213 | " reuse_actors=True,\n", 214 | " verbose=1,\n", 215 | " stop={\n", 216 | " \"training_iteration\": 100,\n", 217 | " },\n", 218 | " num_samples=4,\n", 219 | " \n", 220 | " # PBT starts by training many neural networks in parallel with random hyperparameters. \n", 221 | " config={\n", 222 | " \"lr\": tune.uniform(0.001, 1),\n", 223 | " \"momentum\": tune.uniform(0.001, 1),\n", 224 | " })\n" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "# You can use this to visualize all mutations of Population-based Training.\n", 234 | "! cat ~/ray_results/pbt_test/pbt_global.txt" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "# Visualizing the results of Population-based Training" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "# Plot by wall-clock time\n", 251 | "\n", 252 | "dfs = analysis.fetch_trial_dataframes()\n", 253 | "# This plots everything on the same plot\n", 254 | "ax = None\n", 255 | "for d in dfs.values():\n", 256 | " ax = d.plot(\"training_iteration\", \"mean_accuracy\", ax=ax, legend=False)\n", 257 | "\n", 258 | "plt.xlabel(\"epoch\"); plt.ylabel(\"Test Accuracy\"); " 259 | ] 260 | } 261 | ], 262 | "metadata": { 263 | "kernelspec": { 264 | "display_name": "Python 3", 265 | "language": "python", 266 | "name": "python3" 267 | }, 268 | "language_info": { 269 | "codemirror_mode": { 270 | "name": "ipython", 271 | "version": 3 272 | }, 273 | "file_extension": ".py", 274 | "mimetype": "text/x-python", 275 | "name": "python", 276 | "nbconvert_exporter": "python", 277 | "pygments_lexer": "ipython3", 278 | "version": "3.7.3" 279 | } 280 | }, 281 | "nbformat": 4, 282 | "nbformat_minor": 2 283 | } -------------------------------------------------------------------------------- /tune_exercises/helper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import scipy.ndimage as ndimage 4 | import itertools 5 | import logging 6 | import sys 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | from torchvision import datasets, transforms 12 | 13 | 14 | EPOCH_SIZE = 512 15 | TEST_SIZE = 256 16 | 17 | 18 | def limit_threads(num_threads): 19 | from keras import backend as K 20 | 21 | K.set_session( 22 | K.tf.Session( 23 | config=K.tf.ConfigProto( 24 | intra_op_parallelism_threads=num_threads, 25 | inter_op_parallelism_threads=num_threads))) 26 | 27 | 28 | class GoodError(Exception): 29 | pass 30 | 31 | 32 | def test_reporter(train_mnist_tune): 33 | def mock_reporter(**kwargs): 34 | assert "mean_accuracy" in kwargs, "Did not report proper metric" 35 | assert "checkpoint" in kwargs, "Accidentally removed `checkpoint`?" 36 | raise GoodError("This works.") 37 | 38 | try: 39 | train_mnist_tune({}, mock_reporter) 40 | except TypeError as e: 41 | print("Forgot to modify function signature?") 42 | raise e 43 | except GoodError: 44 | print("Works!") 45 | return 1 46 | raise Exception("Didn't call reporter...") 47 | 48 | 49 | def prepare_data(data): 50 | try: 51 | new_data = np.array(data).reshape((1, 28, 28, 1)).astype(np.float32) 52 | except ValueError as e: 53 | print("Try running this notebook in `jupyter notebook`.") 54 | raise e 55 | return ndimage.gaussian_filter(new_data, sigma=(0.5)) 56 | 57 | 58 | class ConvNet(nn.Module): 59 | def __init__(self, config): 60 | super(ConvNet, self).__init__() 61 | self.conv1 = nn.Conv2d(1, 3, kernel_size=3) 62 | self.fc = nn.Linear(192, 10) 63 | 64 | def forward(self, x): 65 | x = F.relu(F.max_pool2d(self.conv1(x), 3)) 66 | x = x.view(-1, 192) 67 | x = self.fc(x) 68 | return F.log_softmax(x, dim=1) 69 | 70 | 71 | def train(model, optimizer, train_loader): 72 | model.train() 73 | for batch_idx, (data, target) in enumerate(train_loader): 74 | if batch_idx * len(data) > EPOCH_SIZE: 75 | return 76 | optimizer.zero_grad() 77 | output = model(data) 78 | loss = F.nll_loss(output, target) 79 | loss.backward() 80 | optimizer.step() 81 | 82 | 83 | def test(model, data_loader): 84 | model.eval() 85 | correct = 0 86 | total = 0 87 | with torch.no_grad(): 88 | for batch_idx, (data, target) in enumerate(data_loader): 89 | if batch_idx * len(data) > TEST_SIZE: 90 | break 91 | outputs = model(data) 92 | _, predicted = torch.max(outputs.data, 1) 93 | total += target.size(0) 94 | correct += (predicted == target).sum().item() 95 | return correct / total 96 | 97 | 98 | def get_data_loaders(datapath="~/data"): 99 | mnist_transforms = transforms.Compose([transforms.ToTensor()]) 100 | train_loader = torch.utils.data.DataLoader( 101 | datasets.MNIST( 102 | datapath, train=True, transform=mnist_transforms), 103 | batch_size=64, 104 | shuffle=True 105 | ) 106 | test_loader = torch.utils.data.DataLoader( 107 | datasets.MNIST( 108 | datapath, train=False, transform=mnist_transforms), 109 | batch_size=64, 110 | shuffle=True 111 | ) 112 | return train_loader, test_loader -------------------------------------------------------------------------------- /tune_exercises/pbt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/tune_exercises/pbt.png -------------------------------------------------------------------------------- /tune_exercises/tune-arch-simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/tune_exercises/tune-arch-simple.png -------------------------------------------------------------------------------- /tune_exercises/tune.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/tune_exercises/tune.png -------------------------------------------------------------------------------- /tutorialextension.py: -------------------------------------------------------------------------------- 1 | import os 2 | from subprocess import Popen 3 | 4 | def load_jupyter_server_extension(nbapp): 5 | cwd = os.getcwd() 6 | # The following will replace "localhost" with the right binder url 7 | Popen("python utilities/patch.py examples/news_recommendation_serving.ipynb 9000", shell=True) 8 | Popen("python utilities/patch.py exercises/exercise01-Introduction.ipynb 8000", shell=True) 9 | Popen("python utilities/patch.py exercises/exercise02-Task_Dependencies.ipynb 8000", shell=True) 10 | os.chdir(os.path.join(cwd, "qreader", "dist")) 11 | Popen("python ../../utilities/patch.py static/js/app.*.js 5000", shell=True) 12 | 13 | Popen(["python", "-m", "http.server", "9000"]) 14 | os.chdir("/tmp") 15 | Popen(["python", "-m", "http.server"]) 16 | -------------------------------------------------------------------------------- /utilities/javascript-pong/static/images/press1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/utilities/javascript-pong/static/images/press1.png -------------------------------------------------------------------------------- /utilities/javascript-pong/static/images/press2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/utilities/javascript-pong/static/images/press2.png -------------------------------------------------------------------------------- /utilities/javascript-pong/static/images/winner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/utilities/javascript-pong/static/images/winner.png -------------------------------------------------------------------------------- /utilities/javascript-pong/static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Pong! 5 | 6 | 7 | 8 | 9 | 10 | 11 | 49 | 50 | 51 |
52 | Sorry, this example cannot be run because your browser does not support the <canvas> element 53 |
54 |
55 | 56 | 57 | 58 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /utilities/javascript-pong/static/pong.css: -------------------------------------------------------------------------------- 1 | body { background-color: black; color: #AAA; font-size: 12pt; padding: 1em; } 2 | 3 | #unsupported { border: 1px solid yellow; color: black; background-color: #FFFFAD; padding: 2em; margin: 1em; display: inline-block; } 4 | 5 | #sidebar { width: 18em; height: 40em; float: left; font-size: 0.825em; background-color: #333; border: 1px solid white; padding: 1em; } 6 | #sidebar h2 { color: white; text-align: center; margin: 0; } 7 | #sidebar .parts { padding-left: 1em; list-style-type: none; margin-bottom: 2em; text-align: right; } 8 | #sidebar .parts li a { color: white; text-decoration: none; } 9 | #sidebar .parts li a:visited { color: white; } 10 | #sidebar .parts li a:hover { color: white; text-decoration: underline; } 11 | #sidebar .parts li a.selected { color: #F08010; } 12 | #sidebar .parts li a i { color: #AAA; } 13 | #sidebar .parts li a.selected i { color: #F08010; } 14 | #sidebar .settings { line-height: 1.2em; height: 1.2em; text-align: right; } 15 | #sidebar .settings.size { } 16 | #sidebar .settings.speed { margin-bottom: 1em; } 17 | #sidebar .settings label { vertical-align: middle; } 18 | #sidebar .settings input { vertical-align: middle; } 19 | #sidebar .settings select { vertical-align: middle; } 20 | #sidebar .description { margin-bottom: 2em; } 21 | #sidebar .description b { font-weight: normal; color: #FFF; } 22 | 23 | 24 | @media screen and (min-width: 0px) { 25 | #sidebar { display: none; } 26 | #game { display: block; width: 480px; height: 360px; margin: 0 auto; } 27 | } 28 | 29 | @media screen and (min-width: 800px) { 30 | #game { width: 640px; height: 480px; } 31 | } 32 | 33 | @media screen and (min-width: 1000px) { 34 | #sidebar { display: block; } 35 | #game { margin-left: 18em; } 36 | } 37 | 38 | @media screen and (min-width: 1200px) { 39 | #game { width: 800px; height: 600px; } 40 | } 41 | 42 | @media screen and (min-width: 1600px) { 43 | #game { width: 1024px; height: 768px; } 44 | } 45 | -------------------------------------------------------------------------------- /utilities/javascript-pong/static/sounds/goal.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/utilities/javascript-pong/static/sounds/goal.wav -------------------------------------------------------------------------------- /utilities/javascript-pong/static/sounds/ping.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/utilities/javascript-pong/static/sounds/ping.wav -------------------------------------------------------------------------------- /utilities/javascript-pong/static/sounds/pong.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/utilities/javascript-pong/static/sounds/pong.wav -------------------------------------------------------------------------------- /utilities/javascript-pong/static/sounds/wall.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ray-project/tutorial/08f4f01fc3e918c997c971f7b2421551f054c851/utilities/javascript-pong/static/sounds/wall.wav -------------------------------------------------------------------------------- /utilities/patch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | port = sys.argv[2] 5 | 6 | url = "https://hub.mybinder.org" + os.environ["JUPYTERHUB_SERVICE_PREFIX"] + "proxy/" + port + "/" 7 | 8 | with open(sys.argv[1], "r") as f: 9 | content = f.read() 10 | content = content.replace("http://localhost:" + port + "/", url) 11 | 12 | print("content", content) 13 | 14 | with open(sys.argv[1], "w") as f: 15 | f.write(content) 16 | -------------------------------------------------------------------------------- /utilities/pong_py/pong_py/__init__.py: -------------------------------------------------------------------------------- 1 | from pong_py.pongjsenv import PongJSEnv 2 | -------------------------------------------------------------------------------- /utilities/pong_py/pong_py/ball.py: -------------------------------------------------------------------------------- 1 | import pong_py.helper as helper 2 | import random 3 | 4 | class Ball(): 5 | def __init__(self, pong): 6 | self.radius = 5 7 | self.dt = pong.dt 8 | self.minX = self.radius; 9 | self.maxX = pong.width - self.radius 10 | self.minY = pong.wall_width + self.radius 11 | self.maxY = pong.height - pong.wall_width - self.radius 12 | self.speed = (self.maxX - self.minX) / 4; 13 | self.accel = 8; 14 | self.dx = 0 15 | self.dy = 0 16 | 17 | def set_position(self, x, y): 18 | self.x_prev = x if not hasattr(self, "x") else self.x 19 | self.y_prev = y if not hasattr(self, "y") else self.y 20 | 21 | self.x = x 22 | self.y = y 23 | self.left = self.x - self.radius 24 | self.top = self.y - self.radius 25 | self.right = self.x + self.radius 26 | self.bottom = self.y + self.radius 27 | 28 | def set_direction(self, dx, dy): 29 | self.dx = dx 30 | self.dy = dy 31 | 32 | def update(self, left_pad, right_pad): 33 | 34 | pos = helper.accelerate(self.x, self.y, 35 | self.dx, self.dy, 36 | self.accel, self.dt); 37 | 38 | if ((pos.dy > 0) and (pos.y > self.maxY)): 39 | pos.y = self.maxY 40 | pos.dy = -pos.dy 41 | elif ((pos.dy < 0) and (pos.y < self.minY)): 42 | pos.y = self.minY 43 | pos.dy = -pos.dy 44 | 45 | paddle = left_pad if (pos.dx < 0) else right_pad; 46 | pt = helper.ballIntercept(self, paddle, pos.nx, pos.ny); 47 | 48 | if pt: 49 | if pt.d == 'left' or pt.d == 'right': 50 | pos.x = pt.x 51 | pos.dx = -pos.dx 52 | elif pt.d == 'top' or pt.d == 'bottom': 53 | pos.y = pt.y 54 | pos.dy = -pos.dy 55 | 56 | if paddle.up: 57 | pos.dy = pos.dy * (0.5 if pos.dy < 0 else 1.5) 58 | elif paddle.down: 59 | pos.dy = pos.dy * (0.5 if pos.dy > 0 else 1.5) 60 | 61 | self.set_position(pos.x, pos.y) 62 | self.set_direction(pos.dx, pos.dy) 63 | 64 | def reset(self, playerNo): 65 | self.set_position((self.maxX + self.minX) / 2, random.uniform(self.minY, self.maxY)) 66 | self.set_direction(self.speed if playerNo == 1 else -self.speed, self.speed) 67 | -------------------------------------------------------------------------------- /utilities/pong_py/pong_py/helper.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | Position = namedtuple("Position", ["nx", "ny", "x", "y", "dx", "dy"]) 4 | Intercept = namedtuple("Intercept", ["x", "y", "d"]) 5 | Rectangle = namedtuple("Rectangle", ["left", "right", "top", "bottom"]) 6 | 7 | class Position(): 8 | def __init__(self, nx, ny, x, y, dx, dy): 9 | self.nx = nx 10 | self.ny = ny 11 | self.x = x 12 | self.y = y 13 | self.dx = dx 14 | self.dy = dy 15 | 16 | class Intercept(): 17 | def __init__(self, x, y, d): 18 | self.x = x 19 | self.y = y 20 | self.d = d 21 | 22 | class Rectangle(): 23 | def __init__(self, left, right, top, bottom): 24 | self.left = left 25 | self.right = right 26 | self.top = top 27 | self.bottom = bottom 28 | 29 | def accelerate(x, y, dx, dy, accel, dt): 30 | x2 = x + (dt * dx) + (accel * dt * dt * 0.5); 31 | y2 = y + (dt * dy) + (accel * dt * dt * 0.5); 32 | dx2 = dx + (accel * dt) * (1 if dx > 0 else -1); 33 | dy2 = dy + (accel * dt) * (1 if dy > 0 else -1); 34 | return Position((x2-x), (y2-y), x2, y2, dx2, dy2 ) 35 | 36 | 37 | def intercept(x1, y1, x2, y2, x3, y3, x4, y4, d): 38 | denom = ((y4-y3) * (x2-x1)) - ((x4-x3) * (y2-y1)) 39 | if (denom != 0): 40 | ua = (((x4-x3) * (y1-y3)) - ((y4-y3) * (x1-x3))) / denom 41 | if ((ua >= 0) and (ua <= 1)): 42 | ub = (((x2-x1) * (y1-y3)) - ((y2-y1) * (x1-x3))) / denom 43 | if ((ub >= 0) and (ub <= 1)): 44 | x = x1 + (ua * (x2-x1)) 45 | y = y1 + (ua * (y2-y1)) 46 | return Intercept(x, y, d) 47 | 48 | 49 | def ballIntercept(ball, rect, nx, ny): 50 | pt = None 51 | if (nx < 0): 52 | pt = intercept(ball.x, ball.y, ball.x + nx, ball.y + ny, 53 | rect.right + ball.radius, 54 | rect.top - ball.radius, 55 | rect.right + ball.radius, 56 | rect.bottom + ball.radius, 57 | "right"); 58 | elif (nx > 0): 59 | pt = intercept(ball.x, ball.y, ball.x + nx, ball.y + ny, 60 | rect.left - ball.radius, 61 | rect.top - ball.radius, 62 | rect.left - ball.radius, 63 | rect.bottom + ball.radius, 64 | "left") 65 | 66 | if (not pt): 67 | if (ny < 0): 68 | pt = intercept(ball.x, ball.y, ball.x + nx, ball.y + ny, 69 | rect.left - ball.radius, 70 | rect.bottom + ball.radius, 71 | rect.right + ball.radius, 72 | rect.bottom + ball.radius, 73 | "bottom"); 74 | elif (ny > 0): 75 | pt = intercept(ball.x, ball.y, ball.x + nx, ball.y + ny, 76 | rect.left - ball.radius, 77 | rect.top - ball.radius, 78 | rect.right + ball.radius, 79 | rect.top - ball.radius, 80 | "top"); 81 | return pt -------------------------------------------------------------------------------- /utilities/pong_py/pong_py/paddle.py: -------------------------------------------------------------------------------- 1 | import random 2 | import pong_py.helper as helper 3 | from pong_py.helper import Rectangle 4 | 5 | class Paddle(): 6 | STOP = 0 7 | DOWN = 1 8 | UP = 2 9 | 10 | 11 | def __init__(self, rhs, pong): 12 | self.pid = rhs 13 | self.width = 12 14 | self.height = 60 15 | self.dt = pong.dt 16 | self.minY = pong.wall_width 17 | self.maxY = pong.height - pong.wall_width - self.height 18 | self.speed = (self.maxY - self.minY) / 2 19 | self.ai_reaction = 0.1 20 | self.ai_error = 120 21 | self.pong = pong 22 | self.set_direction(0) 23 | self.set_position(pong.width - self.width if rhs else 0, 24 | self.minY + (self.maxY - self.minY) / 2) 25 | self.prediction = None 26 | self.ai_prev_action = 0 27 | 28 | def set_position(self, x, y): 29 | self.x = x 30 | self.y = y 31 | self.left = self.x 32 | self.right = self.left + self.width 33 | self.top = self.y 34 | self.bottom = self.y + self.height 35 | 36 | def set_direction(self, dy): 37 | # Needed for spin calculation 38 | self.up = -dy if dy < 0 else 0 39 | self.down = dy if dy > 0 else 0 40 | 41 | def step(self, action): 42 | if action == self.STOP: 43 | self.stopMovingDown() 44 | self.stopMovingUp() 45 | elif action == self.DOWN: 46 | self.moveDown() 47 | elif action == self.UP: 48 | self.moveUp() 49 | amt = self.down - self.up 50 | if amt != 0: 51 | y = self.y + (amt * self.dt * self.speed) 52 | if y < self.minY: 53 | y = self.minY 54 | elif y > self.maxY: 55 | y = self.maxY 56 | self.set_position(self.x, y) 57 | 58 | def predict(self, ball, dt): 59 | # only re-predict if the ball changed direction, or its been some amount of time since last prediction 60 | if (self.prediction and ((self.prediction.dx * ball.dx) > 0) and 61 | ((self.prediction.dy * ball.dy) > 0) and 62 | (self.prediction.since < self.ai_reaction)): 63 | self.prediction.since += dt 64 | return 65 | 66 | rect = Rectangle(self.left, self.right, -10000, 10000) 67 | pt = helper.ballIntercept(ball, rect, ball.dx * 10, ball.dy * 10) 68 | 69 | if (pt): 70 | t = self.minY + ball.radius 71 | b = self.maxY + self.height - ball.radius 72 | 73 | while ((pt.y < t) or (pt.y > b)): 74 | if (pt.y < t): 75 | pt.y = t + (t - pt.y) 76 | elif (pt.y > b): 77 | pt.y = t + (b - t) - (pt.y - b) 78 | self.prediction = pt 79 | else: 80 | self.prediction = None 81 | 82 | if self.prediction: 83 | self.prediction.since = 0 84 | self.prediction.dx = ball.dx 85 | self.prediction.dy = ball.dy 86 | self.prediction.radius = ball.radius 87 | self.prediction.exactX = self.prediction.x 88 | self.prediction.exactY = self.prediction.y 89 | closeness = (ball.x - self.right if ball.dx < 0 else self.left - ball.x) / self.pong.width 90 | error = self.ai_error * closeness 91 | self.prediction.y = self.prediction.y + random.uniform(-error, error) 92 | 93 | def ai_step(self, ball): 94 | 95 | if (((ball.x < self.left) and (ball.dx < 0)) or 96 | ((ball.x > self.right) and (ball.dx > 0))): 97 | self.stopMovingUp() 98 | self.stopMovingDown() 99 | return 100 | 101 | self.predict(ball, self.dt) 102 | action = self.ai_prev_action 103 | 104 | if (self.prediction): 105 | # print('prediction') 106 | if (self.prediction.y < (self.top + self.height/2 - 5)): 107 | action = self.UP 108 | # print("moved up") 109 | elif (self.prediction.y > (self.bottom - self.height/2 + 5)): 110 | action = self.DOWN 111 | # print("moved down") 112 | 113 | else: 114 | action = self.STOP 115 | # print("nothing") 116 | self.ai_prev_action = action 117 | return self.step(action) 118 | 119 | def moveUp(self): 120 | self.down = 0 121 | self.up = 1 122 | 123 | def moveDown(self): 124 | self.down = 1 125 | self.up = 0 126 | 127 | def stopMovingDown(self): 128 | self.down = 0 129 | 130 | def stopMovingUp(self): 131 | self.up = 0 132 | -------------------------------------------------------------------------------- /utilities/pong_py/pong_py/pongjsenv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | import gym.spaces 5 | 6 | from pong_py.ball import Ball 7 | from pong_py.paddle import Paddle 8 | 9 | 10 | def transform_state(state): 11 | return state / 500 12 | 13 | 14 | class PongJS(object): 15 | # MDP to 16 | def __init__(self): 17 | self.width = 640 18 | self.height = 480 19 | self.wall_width = 12 20 | self.dt = 0.05 # seconds 21 | #self.dt = 0.01 # seconds 22 | self.left_pad = Paddle(0, self) 23 | self.right_pad = Paddle(1, self) 24 | self.ball = Ball(self) 25 | 26 | def step(self, action): 27 | # do logic for self 28 | self.left_pad.step(action) 29 | self.right_pad.ai_step(self.ball) 30 | 31 | self.ball.update(self.left_pad, self.right_pad) 32 | term, reward = self.terminate() 33 | if term: 34 | self.reset(0 if reward == 1 else 1) 35 | state = self.get_state() 36 | return state, reward, term 37 | 38 | def init(self): 39 | self.reset(0) 40 | 41 | def terminate(self): 42 | if self.ball.left > self.width: 43 | return True, 1 44 | elif self.ball.right < 0: 45 | return True, -1 46 | else: 47 | return False, 0 48 | 49 | def get_state(self): 50 | return np.array([self.left_pad.y, 0, 51 | self.ball.x, self.ball.y, 52 | self.ball.dx, self.ball.dy, 53 | self.ball.x_prev, self.ball.y_prev]) 54 | 55 | def reset(self, player): 56 | self.ball.reset(player) 57 | 58 | 59 | class PongJSEnv(gym.Env): 60 | def __init__(self): 61 | self.env = PongJS() 62 | self.action_space = gym.spaces.Discrete(3) 63 | self.observation_space = gym.spaces.box.Box(low=0, high=1, shape=(8,)) 64 | 65 | @property 66 | def right_pad(self): 67 | return self.env.right_pad 68 | 69 | @property 70 | def left_pad(self): 71 | return self.env.left_pad 72 | 73 | def reset(self): 74 | self.env.init() 75 | return transform_state(self.env.get_state()) 76 | 77 | def step(self, action): 78 | state, reward, done = self.env.step(action) 79 | return transform_state(state), 1, done, {} 80 | #return state, reward, done, {} 81 | -------------------------------------------------------------------------------- /utilities/pong_py/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages, Distribution 2 | 3 | setup(name='pong_py', 4 | packages=find_packages()) 5 | --------------------------------------------------------------------------------