├── xvfb.pid ├── stop-docker ├── Workshop ├── Images │ ├── AI.jpg │ ├── Agent.png │ ├── CartPole-v1.gif │ ├── Discussion.jpg │ ├── Taxi_matrix.png │ └── Taxi_matrix_initial.png ├── freeze_weights │ ├── CartPole-v0 │ │ ├── local_network.h5 │ │ └── target_network.h5 │ └── CartPole-v1 │ │ ├── local_network.h5 │ │ └── target_network.h5 ├── Workshop_Q_table.ipynb ├── Workshop_Q_table_solution.ipynb └── Workshop_DQN.ipynb ├── .idea ├── libraries │ └── R_User_Library.xml ├── vcs.xml ├── misc.xml ├── modules.xml ├── Reinforcement-Learning-Workshop.iml └── workspace.xml ├── start-docker ├── xvfb ├── AMI-user-data ├── README.md ├── .gitignore ├── Workshop_facilitation └── generate-groups.py └── LICENSE /xvfb.pid: -------------------------------------------------------------------------------- 1 | 60 2 | -------------------------------------------------------------------------------- /stop-docker: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker stop $(docker ps -q) 3 | 4 | -------------------------------------------------------------------------------- /Workshop/Images/AI.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiansd/Reinforcement-Learning-Workshop/HEAD/Workshop/Images/AI.jpg -------------------------------------------------------------------------------- /Workshop/Images/Agent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiansd/Reinforcement-Learning-Workshop/HEAD/Workshop/Images/Agent.png -------------------------------------------------------------------------------- /Workshop/Images/CartPole-v1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiansd/Reinforcement-Learning-Workshop/HEAD/Workshop/Images/CartPole-v1.gif -------------------------------------------------------------------------------- /Workshop/Images/Discussion.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiansd/Reinforcement-Learning-Workshop/HEAD/Workshop/Images/Discussion.jpg -------------------------------------------------------------------------------- /Workshop/Images/Taxi_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiansd/Reinforcement-Learning-Workshop/HEAD/Workshop/Images/Taxi_matrix.png -------------------------------------------------------------------------------- /Workshop/Images/Taxi_matrix_initial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiansd/Reinforcement-Learning-Workshop/HEAD/Workshop/Images/Taxi_matrix_initial.png -------------------------------------------------------------------------------- /.idea/libraries/R_User_Library.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Workshop/freeze_weights/CartPole-v0/local_network.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiansd/Reinforcement-Learning-Workshop/HEAD/Workshop/freeze_weights/CartPole-v0/local_network.h5 -------------------------------------------------------------------------------- /Workshop/freeze_weights/CartPole-v0/target_network.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiansd/Reinforcement-Learning-Workshop/HEAD/Workshop/freeze_weights/CartPole-v0/target_network.h5 -------------------------------------------------------------------------------- /Workshop/freeze_weights/CartPole-v1/local_network.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiansd/Reinforcement-Learning-Workshop/HEAD/Workshop/freeze_weights/CartPole-v1/local_network.h5 -------------------------------------------------------------------------------- /Workshop/freeze_weights/CartPole-v1/target_network.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabiansd/Reinforcement-Learning-Workshop/HEAD/Workshop/freeze_weights/CartPole-v1/target_network.h5 -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /start-docker: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | TMP=$(sudo docker run --rm -d -v /home/ubuntu/RL_folder:/notebooks -p 8888:8888 justheuristic/practical_rl /run_jupyter.sh) 3 | IP=$(curl http://checkip.amazonaws.com) 4 | # The script sleeps to ensure the docker image is finished with the startup such that the docker log is generated 5 | sleep 120 6 | docker logs $TMP 2>&1 | grep -P " .*\K:8888\/\?token=[a-zA-Z0-9]*$" | sudo sed "s/^\(.*\)token=\(.*\)$/http:\/\/$IP:8888\/\?token=\2/" > /home/ubuntu/RL_folder/LINK/"$IP".txt 7 | sleep 5 8 | aws s3 sync /home/ubuntu/RL_folder/LINK s3://rl-ws-eirik-fabian/RL-WS-links 9 | -------------------------------------------------------------------------------- /.idea/Reinforcement-Learning-Workshop.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 14 | -------------------------------------------------------------------------------- /xvfb: -------------------------------------------------------------------------------- 1 | #taken from https://gist.github.com/jterrace/2911875 2 | XVFB=/usr/bin/Xvfb 3 | XVFBARGS=":1 -screen 0 1024x768x24 -ac +extension GLX +render -noreset" 4 | PIDFILE=./xvfb.pid 5 | case "$1" in 6 | start) 7 | echo -n "Starting virtual X frame buffer: Xvfb" 8 | start-stop-daemon --start --quiet --pidfile $PIDFILE --make-pidfile --background --exec $XVFB -- $XVFBARGS 9 | echo "." 10 | ;; 11 | stop) 12 | echo -n "Stopping virtual X frame buffer: Xvfb" 13 | start-stop-daemon --stop --quiet --pidfile $PIDFILE 14 | echo "." 15 | ;; 16 | restart) 17 | $0 stop 18 | $0 start 19 | ;; 20 | *) 21 | echo "Usage: /etc/init.d/xvfb {start|stop|restart}" 22 | exit 1 23 | esac 24 | 25 | exit 0 26 | -------------------------------------------------------------------------------- /AMI-user-data: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="//" 2 | MIME-Version: 1.0 3 | 4 | --// 5 | Content-Type: text/cloud-config; charset="us-ascii" 6 | MIME-Version: 1.0 7 | Content-Transfer-Encoding: 7bit 8 | Content-Disposition: attachment; filename="cloud-config.txt" 9 | 10 | #cloud-config 11 | cloud_final_modules: 12 | - [scripts-user, always] 13 | 14 | --// 15 | Content-Type: text/x-shellscript; charset="us-ascii" 16 | MIME-Version: 1.0 17 | Content-Transfer-Encoding: 7bit 18 | Content-Disposition: attachment; filename="userdata.txt" 19 | 20 | #!/bin/bash 21 | cd /home/ubuntu/RL_folder/ 22 | git clone https://github.com/fabiansd/Reinforcement-Learning-Workshop.git 23 | cd /home/ubuntu/RL_folder/Reinforcement-Learning-Workshop 24 | sudo bash /home/ubuntu/RL_folder/Reinforcement-Learning-Workshop/start-docker 25 | --// 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![CC BY-NC-SA 4.0][cc-by-nc-sa-shield]][cc-by-nc-sa] 2 | 3 | [cc-by-nc-sa]: https://creativecommons.org/licenses/by-nc-sa/4.0/ 4 | [cc-by-nc-sa-shield]: https://mirrors.creativecommons.org/presskit/buttons/80x15/svg/by-nc-sa.svg 5 | 6 | # Reinforcement-Learning-Workshop 7 | Necessary files and notebooks to host a RL workshop on AWS 8 | 9 | ## AWS 10 | 11 | ### Setup 12 | 13 | An AWS Image is used to create a docker container with the reinforcement learning environment, and automatically launch a Jupyter Notebook on port 8888. To access this notebook, a link is generated with the corresponding IP, port and token. 14 | 15 | ### Steps to launch instances for the WS 16 | 17 | 1. Select number of instances to start 18 | 2. Paste in the user data 19 | 3. Remember to attach the correct network security group (which opens up port 8888) and IAM role (to give write access to s3) 20 | 21 | ### Visualization 22 | Visualizations in the DQN-notebooks are only supported for Linux and OSX, in addition to headless Linux servers, e.g. an AWS EC2 Linux instance. 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /Workshop_facilitation/generate-groups.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import boto3 3 | from botocore.exceptions import ClientError 4 | from pathlib import Path 5 | import os 6 | import json 7 | import argparse 8 | import shutil 9 | import pandas as pd 10 | 11 | TMP_FOLDER = 'LINK_TEMP' 12 | IP_DICT_JSON_NAME = 'active_ips.json' 13 | GROUP_DICT_NAME = 'groups.json' 14 | GROUP_EXCEL_NAME = 'Groups.xlsx' 15 | AMI_ID = 'ami-060e29600c7769bc0' 16 | 17 | 18 | def bucket_exists(bucket_name): 19 | """Determine whether bucket_name exists and the user has permission to access it 20 | 21 | :param bucket_name: string 22 | :return: True if the referenced bucket_name exists, otherwise False 23 | """ 24 | 25 | s3 = boto3.client('s3') 26 | try: 27 | response = s3.head_bucket(Bucket=bucket_name) 28 | except ClientError as e: 29 | logging.debug(e) 30 | return False 31 | return True 32 | 33 | 34 | def write_json(d, dest): 35 | with open(dest, 'w') as cred: 36 | json.dump(d, cred) 37 | 38 | 39 | def load_json(dest): 40 | with open(dest, 'r') as cred: 41 | return json.load(cred) 42 | 43 | 44 | def list_ec2(): 45 | ec2 = boto3.client('ec2') 46 | response = ec2.describe_instances() 47 | 48 | running_ws_ips = {} 49 | 50 | for reservation in response["Reservations"]: 51 | for instance in reservation["Instances"]: 52 | 53 | if instance['ImageId'] == AMI_ID \ 54 | and instance['State']['Name'] == 'running': 55 | 56 | running_ws_ips[instance['InstanceId']] = \ 57 | instance['NetworkInterfaces'][0]['Association']['PublicIp'] 58 | 59 | print(f'\nFound the running ec2 instances:') 60 | print(f'{running_ws_ips}\n') 61 | 62 | return running_ws_ips 63 | 64 | 65 | def download_files(bucket_name, LINK_FOLDER): 66 | 67 | ip_dict = list_ec2() 68 | 69 | if Path(TMP_FOLDER).exists(): 70 | shutil.rmtree(TMP_FOLDER) 71 | os.mkdir(TMP_FOLDER) 72 | 73 | 74 | s3 = boto3.resource('s3') 75 | rl_bucket = s3.Bucket(bucket_name) 76 | 77 | # Downloads only files with an ip as name in the bucket 78 | for element in rl_bucket.objects.all(): 79 | 80 | element_list = element.key.split('/') 81 | 82 | if element_list[0] == LINK_FOLDER and os.path.splitext( 83 | element_list[-1])[-1] == '.txt': 84 | p_ip, _ = os.path.splitext(element_list[-1]) 85 | 86 | if p_ip in ip_dict.values(): 87 | 88 | s3.meta.client.download_file(bucket_name, 89 | element.key, 90 | str(Path(TMP_FOLDER).joinpath( 91 | element_list[-1]))) 92 | 93 | # Save the ip dict to track last active ec2 instances 94 | write_json(ip_dict, IP_DICT_JSON_NAME) 95 | 96 | 97 | def allocate_new_groups(): 98 | 99 | ip_active_dict = load_json(IP_DICT_JSON_NAME) 100 | 101 | if Path(GROUP_DICT_NAME).exists(): 102 | os.remove(Path(GROUP_DICT_NAME)) 103 | 104 | group_dict = {} 105 | 106 | group_number = 1 107 | while True: 108 | 109 | popped_active_ip = ip_active_dict.pop( 110 | next(iter(ip_active_dict))) 111 | link_temp = '' 112 | try: 113 | with open(Path(TMP_FOLDER).joinpath( 114 | popped_active_ip + '.txt'), 'r') as link_file: 115 | link_temp = link_file.read().replace('\n', '') 116 | except IOError: 117 | link_temp = 'NO LINK FOUND' 118 | 119 | group_dict[group_number] = { 120 | 'ip': popped_active_ip, 121 | 'link': link_temp 122 | } 123 | 124 | if len(ip_active_dict) == 0: 125 | break 126 | 127 | group_number += 1 128 | 129 | print('\nNew groups allocated to the running instances') 130 | print(group_dict) 131 | print('\n') 132 | 133 | write_json(group_dict, GROUP_DICT_NAME) 134 | 135 | 136 | def update_and_allocate_instances(ip_active_dict): 137 | 138 | # ip_active_dict = load_json(IP_DICT_JSON_NAME) 139 | 140 | try: 141 | group_dict = load_json(Path(GROUP_DICT_NAME)) 142 | except IOError: 143 | allocate_new_groups() 144 | return 145 | 146 | # If any groups no longer has a running EC2 among the active list, 147 | # remove the ip and link 148 | inactive_list = [] 149 | for group_n, info_d in group_dict.items(): 150 | if str(info_d['ip']) not in ip_active_dict.values(): 151 | inactive_list.append(group_n) 152 | 153 | for n in inactive_list: 154 | group_dict[n] = {'ip': None, 'link': None} 155 | 156 | # This is untested 157 | 158 | # Here we round up the used ips og the active ips to find out what 159 | # active instances are vacant 160 | non_vacant_instances = [] 161 | for group_n, info_d in group_dict.items(): 162 | if not group_dict[group_n] == {'ip': None, 'link': None}: 163 | non_vacant_instances.append(group_dict[group_n]['ip']) 164 | 165 | 166 | def test_connection(BUCKET_NAME): 167 | 168 | # Set up logging 169 | logging.basicConfig(level=logging.DEBUG, 170 | format='%(levelname)s: %(asctime)s: %(message)s') 171 | 172 | # Check if the bucket exists 173 | if bucket_exists(BUCKET_NAME): 174 | logging.info(f'{BUCKET_NAME} exists and you have permission to access it.') 175 | else: 176 | logging.info(f'{BUCKET_NAME} does not exist or ' 177 | f'you do not have permission to access it.') 178 | 179 | 180 | def generate_excel(): 181 | 182 | group_dict = load_json(GROUP_DICT_NAME) 183 | 184 | group_df = pd.DataFrame.from_dict(group_dict, orient='index') 185 | 186 | group_df.to_excel('Groups.xlsx') 187 | 188 | print('Excel generated and saved') 189 | 190 | 191 | def clean_up(): 192 | if Path(TMP_FOLDER).exists(): 193 | shutil.rmtree(TMP_FOLDER) 194 | if Path(IP_DICT_JSON_NAME).exists(): 195 | os.remove(IP_DICT_JSON_NAME) 196 | if Path(GROUP_DICT_NAME).exists(): 197 | os.remove(GROUP_DICT_NAME) 198 | if Path(GROUP_EXCEL_NAME).exists(): 199 | os.remove(GROUP_EXCEL_NAME) 200 | 201 | 202 | if __name__ == '__main__': 203 | 204 | # Assign this value before running the program 205 | BUCKET_NAME = 'rl-workshop-bucket' 206 | LINK_FOLDER = 'RL-WS-links' 207 | 208 | # input arguments 209 | parser = argparse.ArgumentParser(description= 'Allocate running EC2 instances of RL-WS-image into groups') 210 | parser.add_argument('-i', '--init', type=bool, 211 | help='Set this to yes or no wheter you are creating the groups for the first time or updating') 212 | parser.add_argument('-c', '--clean', type=bool, 213 | help='Enable this if you want to clean the entire group setting') 214 | parser.add_argument('-t', '--test', type=bool, 215 | help='Test the connection to the s3 bucket') 216 | args = vars(parser.parse_args()) 217 | 218 | if args['clean']: 219 | clean_up() 220 | print('CLEANED UP') 221 | 222 | if args['test']: 223 | test_connection(BUCKET_NAME) 224 | 225 | if args['init']: 226 | download_files(BUCKET_NAME, LINK_FOLDER) 227 | allocate_new_groups() 228 | generate_excel() 229 | 230 | # ip_org_dict = {'i-0345dc555876e6985': '54.174.112.245', 'i-0116da57317bfa93c': '3.87.192.207', 'i-0a2eb4c7cc444b410': '52.23.168.91'} 231 | # ip_new_dict = {'i-0345dc555876e6985': '54.174.112.245', 'i-0a2eb4c7cc444b410': '52.23.168.91'} 232 | # ip_replaced_dict = {'i-0116da57317bfa93c': '3.87.192.207', 'i-0116daas317bfa93c': '9.87.192.207', 'i-0a2eb4c7cc444b410': '52.23.168.91', 'i-0116da57317bfa93c': '3.87.192.207'} 233 | # elif args['update']: 234 | # download_files(BUCKET_NAME, LINK_FOLDER) 235 | # update_and_allocate_instances() 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 72 | 73 | 74 | 75 | save_ip_dict 76 | list_ec2 77 | shutil 78 | 79 | 80 | 81 | 83 | 84 | 92 | 93 | 94 | 103 | 104 | 105 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 |