├── Selenium_Automation ├── AWS_credentials.txt └── Selenium_DeepRacer.ipynb ├── Compute_Speed_And_Actions └── Spain_racing_line.npy ├── README.md └── Reward_Function └── reward_function.py /Selenium_Automation/AWS_credentials.txt: -------------------------------------------------------------------------------- 1 | AWS_ID 2 | USERNAME 3 | PASSWORD -------------------------------------------------------------------------------- /Compute_Speed_And_Actions/Spain_racing_line.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dgnzlz/Capstone_AWS_DeepRacer/HEAD/Compute_Speed_And_Actions/Spain_racing_line.npy -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Capstone Project for the Master of Science in Business Analytics at ESADE Business School 2 | 3 | **This repository contains the code that was used for the article "An Advanced Guide to AWS DeepRacer - Autonomous Formula 1 Racing using Reinforcement Learning". Feel free to check it out [here](https://towardsdatascience.com/an-advanced-guide-to-aws-deepracer-2b462c37eea).** 4 | 5 | 6 | - The folder *Compute_Speed_And_Actions* contains a jupyter notebook, which takes the optimal racing line from [this](https://github.com/cdthompson/deepracer-k1999-race-lines) repo and computes the optimal speed. Additionally, it computes a custom action space with K-Means clustering. The folder also contains the K1999 racing line notebook from cdthompson, which I altered to be able to only use the inner 80% of the track. 7 | - The folder *Reward_Function* contains a .py file with the reward function that our team used to get to 12th place out of 1291 participants in the time trial category of the F1 event in May 2020 8 | - The folder *Selenium_Automation* contains a jupyter notebook, which allows you to submit a model to a race multiple times without using the AWS CLI. As a bonus, you can also automatically conducts experiments with hyperparameters. This can be used to conduct multiple experiments over night without having to manually set them up every couple of hours 9 | 10 | ## GitHub Repositories that were used 11 | - To calculate the optimal racing line: https://github.com/cdthompson/deepracer-k1999-race-lines 12 | - To analyze the logs: https://github.com/aws-deepracer-community/deepracer-analysis 13 | - To retreive the track data: https://github.com/aws-deepracer-community/deepracer-simapp/tree/master/bundle/deepracer_simulation_environment/share/deepracer_simulation_environment/routes 14 | 15 | ## License 16 | Feel free to use, distribute, and alter the code as you like. 17 | 18 | This is a finished university project. Therefore, we will not be maintaining the code any more. 19 | -------------------------------------------------------------------------------- /Selenium_Automation/Selenium_DeepRacer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from selenium import webdriver\n", 10 | "from selenium.webdriver.common.keys import Keys\n", 11 | "from selenium.webdriver.support.ui import Select\n", 12 | "\n", 13 | "import chromedriver_binary # Adds chromedriver binary to path\n", 14 | "\n", 15 | "import time,os" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import itertools\n", 25 | "import random\n", 26 | "import datetime" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "# HELPER FUNCTIONS" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# Quits the browser\n", 43 | "def quitBrowser():\n", 44 | " try:\n", 45 | " browser.quit()\n", 46 | " except:\n", 47 | " pass\n", 48 | "\n", 49 | "\n", 50 | "# Returns a new browser\n", 51 | "def newBrowser():\n", 52 | "\n", 53 | " browserProfile = webdriver.ChromeOptions()\n", 54 | " browserProfile.add_experimental_option(\n", 55 | " 'prefs', {'intl.accept_languages': 'en,en_US'})\n", 56 | "\n", 57 | " browser = webdriver.Chrome(options=browserProfile)\n", 58 | " \n", 59 | " # In case chromedriver_binary does not work, use this:\n", 60 | " # path = XXX\n", 61 | " # browser = webdriver.Chrome(path, options=browserProfile)\n", 62 | "\n", 63 | " # If an element is not found, browser will try again every 0.5s until 3 seconds\n", 64 | " browser.implicitly_wait(3)\n", 65 | "\n", 66 | " return browser\n", 67 | "\n", 68 | "\n", 69 | "# Logs in to AWS using the credentials in AWS_credentials.txt\n", 70 | "def awsLogin():\n", 71 | "\n", 72 | " with open(\"AWS_credentials.txt\", 'r') as f:\n", 73 | " [aws_id, username, password] = f.read().splitlines()\n", 74 | "\n", 75 | " # Build AWS Console URL with aws_id\n", 76 | " aws_id = str(aws_id)\n", 77 | " url = \"https://%s.signin.aws.amazon.com/console\" % aws_id\n", 78 | "\n", 79 | " # Open browser with the starting URL\n", 80 | " browser.get(url)\n", 81 | " browser.refresh()\n", 82 | " time.sleep(3)\n", 83 | "\n", 84 | " usernameInput = browser.find_elements_by_css_selector('form input')[1]\n", 85 | " passwordInput = browser.find_elements_by_css_selector('form input')[2]\n", 86 | "\n", 87 | " usernameInput.send_keys(username)\n", 88 | " passwordInput.send_keys(password)\n", 89 | " passwordInput.send_keys(Keys.ENTER)\n", 90 | " time.sleep(2.5)\n", 91 | "\n", 92 | " print(\n", 93 | " f\"Successfully logged in to AWS account number {aws_id} with username {username}\")\n", 94 | "\n", 95 | "\n", 96 | "# General function to create a new model (used by cloneModel() and newModel())\n", 97 | "# Note: rfchanges is not yet implemented (make changes to parameters in the reward function)\n", 98 | "def createModel(modelname, description, track, hyperparams, maxtime,\n", 99 | " car=None, rewardfunction=None, rfchanges=None, testmode=False):\n", 100 | "\n", 101 | " #### PAGE 1 ####\n", 102 | "\n", 103 | " # Fill in the model name\n", 104 | " time.sleep(0.5)\n", 105 | " #modelnameInput = browser.find_element_by_id('awsui-input-1')\n", 106 | " # awsui-input awsui-input-type-text\n", 107 | " modelnameInput = browser.find_element_by_css_selector(\n", 108 | " 'input[class=\"awsui-input awsui-input-type-text\"]')\n", 109 | " modelnameInput.clear()\n", 110 | " modelnameInput.send_keys(modelname)\n", 111 | "\n", 112 | " # Fill in the training job description\n", 113 | " time.sleep(0.5)\n", 114 | " descriptionInput = browser.find_element_by_css_selector(\n", 115 | " 'textarea[class=\"awsui-textarea\"]')\n", 116 | " descriptionInput.clear()\n", 117 | " descriptionInput.send_keys(description)\n", 118 | "\n", 119 | " # Select the track\n", 120 | " time.sleep(0.5)\n", 121 | " trackValue = \"arn:aws:deepracer:us-east-1::track/%s_track\" % track\n", 122 | " trackButton = browser.find_element_by_css_selector(\n", 123 | " 'input[type=\"radio\"][value=\"%s\"]' % trackValue)\n", 124 | " trackButton.click()\n", 125 | "\n", 126 | " # Click next (check page 3, part 3 for more robust xpath approach)\n", 127 | " time.sleep(0.5)\n", 128 | " nextButton = browser.find_elements_by_css_selector(\n", 129 | " 'button[type=\"submit\"] span[awsui-button-region=\"text\"]')[3]\n", 130 | " nextButton.click()\n", 131 | "\n", 132 | " #### PAGE 2 ####\n", 133 | "\n", 134 | " # Click time trial (race type)\n", 135 | " time.sleep(0.5)\n", 136 | " raceType = \"TIME_TRIAL\" # add to function parameters if needed\n", 137 | " raceTypeButton = browser.find_element_by_css_selector(\n", 138 | " 'input[type=\"radio\"][value=\"%s\"]' % raceType)\n", 139 | " raceTypeButton.click()\n", 140 | "\n", 141 | " # Only select car if car argument is passed, else skip this part\n", 142 | " if car != None:\n", 143 | "\n", 144 | " time.sleep(0.5)\n", 145 | "\n", 146 | " # Expand car list\n", 147 | " allCarsList = browser.find_element_by_css_selector(\n", 148 | " 'span[class=\"awsui-select-trigger-textbox\"]')\n", 149 | " allCarsList.click()\n", 150 | "\n", 151 | " # Select desired car\n", 152 | " # number of retries with 1 sec wait inbetween (expanding car list takes time)\n", 153 | " retry = 10\n", 154 | " while retry > 0:\n", 155 | " time.sleep(1)\n", 156 | " try:\n", 157 | " carButton = browser.find_element_by_css_selector(\n", 158 | " 'div[title=\"%s\"]' % car)\n", 159 | " except:\n", 160 | " retry -= 1 # this is executed when there was an error\n", 161 | " else:\n", 162 | " retry = 0 # this is executed when there was no error\n", 163 | " carButton.click()\n", 164 | "\n", 165 | " # Click next (check page 3, part 3 for more robust xpath approach)\n", 166 | " time.sleep(0.5)\n", 167 | " nextButton2 = browser.find_elements_by_css_selector(\n", 168 | " 'button[type=\"submit\"] span[awsui-button-region=\"text\"]')[1]\n", 169 | " nextButton2.click()\n", 170 | "\n", 171 | " #### PAGE 3, PART 1 REWARD FUNCTION ####\n", 172 | "\n", 173 | " # Only fill out reward function if argument is passed, else leave pre-filled reward function\n", 174 | " if rewardfunction != None:\n", 175 | "\n", 176 | " time.sleep(0.5)\n", 177 | "\n", 178 | " # Click into code editor for reward function\n", 179 | " codeEditor = browser.find_elements_by_css_selector(\n", 180 | " 'span[class=\"ace_keyword\"]')[0]\n", 181 | " actionChain1 = webdriver.ActionChains(browser)\n", 182 | " actionChain1.move_to_element(codeEditor)\n", 183 | " actionChain1.click()\n", 184 | " actionChain1.perform()\n", 185 | "\n", 186 | " # Select all code and delete\n", 187 | " actionChain2 = webdriver.ActionChains(browser)\n", 188 | " # use Keys.CONTROL for a Windows system\n", 189 | " actionChain2.key_down(Keys.COMMAND)\n", 190 | " actionChain2.send_keys('a')\n", 191 | " actionChain2.key_up(Keys.COMMAND)\n", 192 | " actionChain2.send_keys(Keys.DELETE)\n", 193 | " actionChain2.perform()\n", 194 | "\n", 195 | " # Insert reward function\n", 196 | " # Note: COMMAND+C / COMMAND+V does not work with Selenium as Chrome blocks it because of security reasons\n", 197 | " # NOT YET IMPLEMENTED: WOULD BE EASIER TO INSERT reward_function.txt to S3 with Boto3\n", 198 | " # reward_function_to_console(rewardfunction)\n", 199 | "\n", 200 | " #### PAGE 3, PART 2 HYPERPARAMETERS ####\n", 201 | "\n", 202 | " # Expand hyperparameter menu\n", 203 | " time.sleep(0.5)\n", 204 | " hyperparamsExpand = browser.find_element_by_css_selector(\n", 205 | " 'awsui-expandable-section[class=\"algorithm-settings\"]')\n", 206 | " hyperparamsExpand.click()\n", 207 | "\n", 208 | " # Select desired batch size\n", 209 | " time.sleep(0.5)\n", 210 | " batchsizeButton = browser.find_element_by_css_selector(\n", 211 | " 'input[type=\"radio\"][value=\"%i\"]' % hyperparams[\"batchsize\"])\n", 212 | " batchsizeButton.click()\n", 213 | "\n", 214 | " # Enter desired number of epochs\n", 215 | " time.sleep(0.5)\n", 216 | " epochsField = browser.find_element_by_css_selector(\n", 217 | " 'input[name=\"request.TrainingConfig.Hyperparameters.num_epochs\"]')\n", 218 | " epochsField.clear()\n", 219 | " epochsField.send_keys(hyperparams[\"epochs\"])\n", 220 | "\n", 221 | " # Enter desired learning rate\n", 222 | " time.sleep(0.5)\n", 223 | " learningrateField = browser.find_element_by_css_selector(\n", 224 | " 'input[name=\"request.TrainingConfig.Hyperparameters.lr\"]')\n", 225 | " learningrateField.clear()\n", 226 | " learningrateField.send_keys(str(hyperparams[\"learningrate\"]))\n", 227 | "\n", 228 | " # Enter desired entropy\n", 229 | " time.sleep(0.5)\n", 230 | " entropyField = browser.find_element_by_css_selector(\n", 231 | " 'input[name=\"request.TrainingConfig.Hyperparameters.beta_entropy\"]')\n", 232 | " entropyField.clear()\n", 233 | " entropyField.send_keys(str(hyperparams[\"entropy\"]))\n", 234 | "\n", 235 | " # Enter desired discount factor\n", 236 | " time.sleep(0.5)\n", 237 | " discountField = browser.find_element_by_css_selector(\n", 238 | " 'input[name=\"request.TrainingConfig.Hyperparameters.discount_factor\"]')\n", 239 | " discountField.clear()\n", 240 | " discountField.send_keys(str(hyperparams[\"discount\"]))\n", 241 | "\n", 242 | " # Enter desired episodes between updates\n", 243 | " time.sleep(0.5)\n", 244 | " episodesUpdateField = browser.find_element_by_css_selector(\n", 245 | " 'input[name=\"request.TrainingConfig.Hyperparameters.num_episodes_between_training\"]')\n", 246 | " episodesUpdateField.clear()\n", 247 | " episodesUpdateField.send_keys(str(hyperparams[\"episodesUpdate\"]))\n", 248 | "\n", 249 | " #### PAGE 3, PART 3 STOP CONDITION ####\n", 250 | "\n", 251 | " # Enter desired maximum training time in minutes\n", 252 | " time.sleep(0.5)\n", 253 | " episodesUpdateField = browser.find_element_by_css_selector(\n", 254 | " 'input[name=\"request.TrainingConfig.TerminationConditions.MaxTimeInMinutes\"]')\n", 255 | " episodesUpdateField.clear()\n", 256 | " episodesUpdateField.send_keys(str(maxtime))\n", 257 | "\n", 258 | " # Press \"Create model\", but only if testmode is False\n", 259 | " if testmode == False:\n", 260 | " time.sleep(0.5)\n", 261 | " createModelButton = browser.find_element_by_xpath(\n", 262 | " '//button[@type=\"submit\"]/*[text()=\"Create model\"]')\n", 263 | " createModelButton.text\n", 264 | " createModelButton.click()\n", 265 | " time.sleep(15)\n", 266 | " # Print success state\n", 267 | " print(\n", 268 | " f\"Successfully created model {modelname} with hyperparams {hyperparams}\")\n", 269 | " else:\n", 270 | " print(\n", 271 | " f\"Prepared model with name {modelname}, but did not yet create it\")\n", 272 | "\n", 273 | "\n", 274 | "# Creates new model as clone from other model\n", 275 | "def cloneModel(clonefrom, modelname, description, track, hyperparams,\n", 276 | " maxtime, rewardfunction=None, rfchanges=None, testmode=False):\n", 277 | "\n", 278 | " browser.get(\n", 279 | " \"https://console.aws.amazon.com/deepracer/home?region=us-east-1#model/%s\" % clonefrom)\n", 280 | " browser.refresh()\n", 281 | " time.sleep(3)\n", 282 | "\n", 283 | " # Click on clone button\n", 284 | " time.sleep(0.5)\n", 285 | " cloneButton = browser.find_element_by_xpath(\n", 286 | " '//*[@id=\"PLCHLDR_model_detail_clone_button\"]')\n", 287 | " cloneButton.click()\n", 288 | "\n", 289 | " # Create model as clone\n", 290 | " time.sleep(0.5)\n", 291 | " createModel(modelname=modelname,\n", 292 | " description=description,\n", 293 | " track=track,\n", 294 | " hyperparams=hyperparams,\n", 295 | " maxtime=maxtime,\n", 296 | " rewardfunction=rewardfunction,\n", 297 | " rfchanges=rfchanges,\n", 298 | " testmode=testmode)\n", 299 | "\n", 300 | "\n", 301 | "# Counts the number of models that are currently training\n", 302 | "def count_models_training():\n", 303 | "\n", 304 | " browser.get(\n", 305 | " \"https://console.aws.amazon.com/deepracer/home?region=us-east-1#models\")\n", 306 | " browser.refresh()\n", 307 | " time.sleep(3)\n", 308 | "\n", 309 | " # Count number of models that are being created\n", 310 | " count_created = len(browser.find_elements_by_xpath(\n", 311 | " '//span/*[text()=\"Created\"]'))\n", 312 | " # Count number of models that are training\n", 313 | " count_training = len(browser.find_elements_by_xpath(\n", 314 | " '//span/*[text()=\"Training...\"]'))\n", 315 | " # Count number of models that are being stopped\n", 316 | " count_stopping = len(browser.find_elements_by_xpath(\n", 317 | " '//span/*[text()=\"Stopping...\"]'))\n", 318 | "\n", 319 | " return count_created + count_training + count_stopping\n", 320 | "\n", 321 | "\n", 322 | "# Clones model and performs multiple experiments with hyperparameters\n", 323 | "def clone_hyperparams_experiment(clone_from_model, hyperparams_experiment, training_slots=2,\n", 324 | " number_of_experiments=2, start_naming_with=\"a\", track=\"reInvent2019\",\n", 325 | " maxtime_per_training=180):\n", 326 | "\n", 327 | " # Calculate approximate number of hours that this function will run\n", 328 | " total_hours = number_of_experiments * \\\n", 329 | " (maxtime_per_training/60) / training_slots\n", 330 | " print(\n", 331 | " f\"Starting {number_of_experiments} experiments. This will take approx {total_hours} hours.\")\n", 332 | "\n", 333 | " # Create all combinations of hyperparameters. Result is list of dictionaries\n", 334 | " keys, values = zip(*hyperparams_experiment.items())\n", 335 | " hp_exp_all = [dict(zip(keys, v)) for v in itertools.product(*values)]\n", 336 | "\n", 337 | " # Generate random indexes\n", 338 | " hp_exp_all_indexes = random.sample(\n", 339 | " range(len(hp_exp_all)), number_of_experiments)\n", 340 | "\n", 341 | " # Select only the dictionaries that were randomly selected\n", 342 | " hp_exp = [hp_exp_all[i] for i in hp_exp_all_indexes]\n", 343 | "\n", 344 | " # Transform e.g. \"a\" to integer 97\n", 345 | " start_naming_with_int = ord(start_naming_with)\n", 346 | "\n", 347 | " for hp_exp_i in hp_exp:\n", 348 | "\n", 349 | " # Wait until training slot is free\n", 350 | " slot_free = False\n", 351 | " while slot_free == False:\n", 352 | " if count_models_training() < training_slots:\n", 353 | " slot_free = True\n", 354 | " else:\n", 355 | " # wait 5 minutes before checking for free slot again\n", 356 | " time.sleep(5*60)\n", 357 | "\n", 358 | " model_creation_successful = False\n", 359 | " while model_creation_successful == False:\n", 360 | " try:\n", 361 | " # Start training of model with hyperparams hp_exp_i\n", 362 | " cloneModel(clonefrom=clone_from_model,\n", 363 | " modelname=clone_from_model+\"-clone-\" +\n", 364 | " chr(start_naming_with_int),\n", 365 | " description=str(hp_exp_i)[1:-1],\n", 366 | " track=track,\n", 367 | " hyperparams=hp_exp_i,\n", 368 | " maxtime=maxtime_per_training,\n", 369 | " rfchanges=None,\n", 370 | " testmode=False)\n", 371 | " except:\n", 372 | " # If model creation failed, print message and wait for 5 minutes\n", 373 | " current_version = chr(start_naming_with_int)\n", 374 | " now = datetime.datetime.now()\n", 375 | " now_h = now.hour\n", 376 | " now_m = now.minute\n", 377 | " print(\n", 378 | " f\"{now_h}:{now_m} Model creation of {current_version} failed. Trying again in 5 minutes\")\n", 379 | " # Try logging in to AWS again, in case user was logged out\n", 380 | " try:\n", 381 | " awsLogin()\n", 382 | " except:\n", 383 | " pass\n", 384 | " time.sleep(5*60)\n", 385 | " else:\n", 386 | " time.sleep(5)\n", 387 | " # If not automatically transferred to new model url, print error message\n", 388 | " modelname_check = clone_from_model + \\\n", 389 | " \"-clone-\" + chr(start_naming_with_int)\n", 390 | " if browser.current_url != (\"https://console.aws.amazon.com/deepracer/home?region=us-east-1#model/%s\" % modelname_check):\n", 391 | " print(f\"Model creation of {modelname_check} may have failed.\", end=' ')\n", 392 | " print(\"Check in the console if model was created and create it manually if needed\")\n", 393 | "\n", 394 | " # If model creation was successful, escape the while loop\n", 395 | " model_creation_successful = True\n", 396 | " # Increasing naming number\n", 397 | " start_naming_with_int += 1\n", 398 | " \n", 399 | " \n", 400 | "def submit_to_spain(modelname):\n", 401 | "\n", 402 | " browser.get(\n", 403 | " \"https://console.aws.amazon.com/deepracer/home?region=us-east-1#model/%s\" % modelname)\n", 404 | " browser.refresh()\n", 405 | " time.sleep(5)\n", 406 | " \n", 407 | " submitToRaceButton = browser.find_element_by_xpath(\n", 408 | " '//button[@type=\"submit\"]/*[text()=\"Submit to virtual race\"]')\n", 409 | " submitToRaceButton.click()\n", 410 | " \n", 411 | " submitModelButton = browser.find_element_by_xpath(\n", 412 | " '//button[@type=\"submit\"]/*[text()=\"Submit model\"]')\n", 413 | " submitModelButton.click()\n", 414 | " \n", 415 | " # Sometimes, pressing the submit button will not trigger a submit\n", 416 | " # Therefore, just retry 5 times\n", 417 | " re_press_submit = 5\n", 418 | " while re_press_submit > 0:\n", 419 | " try:\n", 420 | " submitModelButton.click()\n", 421 | " re_press_submit -= 1\n", 422 | " time.sleep(2)\n", 423 | " except:\n", 424 | " # If click failed, means that submit was successful and we got re-routed to Event starting screen\n", 425 | " re_press_submit = 0\n", 426 | "\n", 427 | " time.sleep(15)\n", 428 | "\n", 429 | " print(f\"{datetime.datetime.now()} Submitted model {modelname} to F1 Race\")\n", 430 | " \n", 431 | " \n", 432 | "def submit_to_spain_multiple(modelname, repeat_hours=9):\n", 433 | "\n", 434 | " # Calculate when to stop\n", 435 | " datetime_stop = datetime.datetime.now() + datetime.timedelta(hours=repeat_hours)\n", 436 | "\n", 437 | " # Count number of submits\n", 438 | " count_submits = 0\n", 439 | " count_fails = 0\n", 440 | "\n", 441 | " # Repeat loop until time is up\n", 442 | " while datetime.datetime.now() < datetime_stop:\n", 443 | " try:\n", 444 | " # Submit model to summit\n", 445 | " submit_to_spain(modelname=modelname)\n", 446 | " # Wait for 10 minutes before attempting submit again\n", 447 | " time.sleep(10*60)\n", 448 | " except:\n", 449 | " # If failed to submit, wait for 2 minutes and try again\n", 450 | " count_fails += 1\n", 451 | " time.sleep(2*60)\n", 452 | " # If failed 5 times, try to log back in\n", 453 | " if count_fails >= 10:\n", 454 | " awsLogin()\n", 455 | " else:\n", 456 | " # If there was no error, increase counter by 1\n", 457 | " count_submits += 1\n", 458 | " count_fails = 0\n", 459 | "\n", 460 | " # Print final submit count\n", 461 | " print(f\"Submitted number of models to the race: {count_submits}\")" 462 | ] 463 | }, 464 | { 465 | "cell_type": "markdown", 466 | "metadata": {}, 467 | "source": [ 468 | "# EXECUTE CODE" 469 | ] 470 | }, 471 | { 472 | "cell_type": "markdown", 473 | "metadata": {}, 474 | "source": [ 475 | "### Log in to AWS\n", 476 | "- This cell should always be executed one time in the beginning\n", 477 | "- Important: The directory of this notebook should have a AWS_credentials.txt file, which has 3 lines: AWS_id, username, and password. This will not work with an AWS root account" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": null, 483 | "metadata": {}, 484 | "outputs": [], 485 | "source": [ 486 | "# Quits past browser instance\n", 487 | "quitBrowser()\n", 488 | "\n", 489 | "# Creates new browser instance\n", 490 | "browser = newBrowser()\n", 491 | "\n", 492 | "# Log in to AWS using AWS_credentials.txt\n", 493 | "awsLogin()" 494 | ] 495 | }, 496 | { 497 | "cell_type": "markdown", 498 | "metadata": {}, 499 | "source": [ 500 | "### Submit model to race multiple times" 501 | ] 502 | }, 503 | { 504 | "cell_type": "markdown", 505 | "metadata": {}, 506 | "source": [ 507 | "#### F1 Event (Spain track)" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": null, 513 | "metadata": {}, 514 | "outputs": [], 515 | "source": [ 516 | "# Submit the model to the summit race for multiple hours\n", 517 | "submit_to_spain_multiple(modelname=\"MODELNAME\", repeat_hours=12)" 518 | ] 519 | }, 520 | { 521 | "cell_type": "markdown", 522 | "metadata": {}, 523 | "source": [ 524 | "### Clone model and run experiments\n", 525 | "This cell runs experiments on hyperparameters. Be aware, that this could take several hours to execute. Best to let it run over night.\n", 526 | "1. Define hyperparameters that should be tested.\n", 527 | "2. clone_from_model: Name of model, which should be cloned\n", 528 | "3. training_slots: Maximum number of parallel training sessions that AWS allows (usually 4, but only 2 in May 2020)\n", 529 | "4. number_of_experiments: As many combinations of hyperparameters are possible, only a hand full should be randomly selected and tested\n", 530 | "5. start_naming_with: Defines with which character the clones should start being named. Be careful to start with a character that does not exist yet, otherwise AWS will give an error when trying to create the model\n", 531 | "6. track: Track name (e.g. \"reInvent2019\" or \"Spain\")\n", 532 | "7. maxtime_per_training: Define the number of minutes that each clone should be trained" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": null, 538 | "metadata": {}, 539 | "outputs": [], 540 | "source": [ 541 | "# All available hyperparameters\n", 542 | "# \"batchsize\": [32,64,128,256,512], \n", 543 | "# \"epochs\": [3 to 10], \n", 544 | "# \"learningrate\": [0.00000001 to 0.001], \n", 545 | "# \"entropy\": [0 to 1], \n", 546 | "# \"discount\": [0 to 1], \n", 547 | "# \"episodesUpdate\": [5 to 100]\n", 548 | "\n", 549 | "experiment = {\"batchsize\":[64,256], \n", 550 | " \"epochs\":[5,10], \n", 551 | " \"learningrate\":[0.0001,0.0002,0.0003], \n", 552 | " \"entropy\":[0.005,0.01], \n", 553 | " \"discount\":[0.9993], \n", 554 | " \"episodesUpdate\":[20]}\n", 555 | "\n", 556 | "clone_hyperparams_experiment(clone_from_model=\"Cup2019-v14-clone-e\", \n", 557 | " hyperparams_experiment=experiment,\n", 558 | " training_slots=2, \n", 559 | " number_of_experiments=24, \n", 560 | " start_naming_with=\"a\", \n", 561 | " track=\"reInvent2019\", \n", 562 | " maxtime_per_training=180)" 563 | ] 564 | } 565 | ], 566 | "metadata": { 567 | "kernelspec": { 568 | "display_name": "Python 3", 569 | "language": "python", 570 | "name": "python3" 571 | }, 572 | "language_info": { 573 | "codemirror_mode": { 574 | "name": "ipython", 575 | "version": 3 576 | }, 577 | "file_extension": ".py", 578 | "mimetype": "text/x-python", 579 | "name": "python", 580 | "nbconvert_exporter": "python", 581 | "pygments_lexer": "ipython3", 582 | "version": "3.7.7" 583 | } 584 | }, 585 | "nbformat": 4, 586 | "nbformat_minor": 4 587 | } 588 | -------------------------------------------------------------------------------- /Reward_Function/reward_function.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | class Reward: 5 | def __init__(self, verbose=False): 6 | self.first_racingpoint_index = None 7 | self.verbose = verbose 8 | 9 | def reward_function(self, params): 10 | 11 | # Import package (needed for heading) 12 | import math 13 | 14 | ################## HELPER FUNCTIONS ################### 15 | 16 | def dist_2_points(x1, x2, y1, y2): 17 | return abs(abs(x1-x2)**2 + abs(y1-y2)**2)**0.5 18 | 19 | def closest_2_racing_points_index(racing_coords, car_coords): 20 | 21 | # Calculate all distances to racing points 22 | distances = [] 23 | for i in range(len(racing_coords)): 24 | distance = dist_2_points(x1=racing_coords[i][0], x2=car_coords[0], 25 | y1=racing_coords[i][1], y2=car_coords[1]) 26 | distances.append(distance) 27 | 28 | # Get index of the closest racing point 29 | closest_index = distances.index(min(distances)) 30 | 31 | # Get index of the second closest racing point 32 | distances_no_closest = distances.copy() 33 | distances_no_closest[closest_index] = 999 34 | second_closest_index = distances_no_closest.index( 35 | min(distances_no_closest)) 36 | 37 | return [closest_index, second_closest_index] 38 | 39 | def dist_to_racing_line(closest_coords, second_closest_coords, car_coords): 40 | 41 | # Calculate the distances between 2 closest racing points 42 | a = abs(dist_2_points(x1=closest_coords[0], 43 | x2=second_closest_coords[0], 44 | y1=closest_coords[1], 45 | y2=second_closest_coords[1])) 46 | 47 | # Distances between car and closest and second closest racing point 48 | b = abs(dist_2_points(x1=car_coords[0], 49 | x2=closest_coords[0], 50 | y1=car_coords[1], 51 | y2=closest_coords[1])) 52 | c = abs(dist_2_points(x1=car_coords[0], 53 | x2=second_closest_coords[0], 54 | y1=car_coords[1], 55 | y2=second_closest_coords[1])) 56 | 57 | # Calculate distance between car and racing line (goes through 2 closest racing points) 58 | # try-except in case a=0 (rare bug in DeepRacer) 59 | try: 60 | distance = abs(-(a**4) + 2*(a**2)*(b**2) + 2*(a**2)*(c**2) - 61 | (b**4) + 2*(b**2)*(c**2) - (c**4))**0.5 / (2*a) 62 | except: 63 | distance = b 64 | 65 | return distance 66 | 67 | # Calculate which one of the closest racing points is the next one and which one the previous one 68 | def next_prev_racing_point(closest_coords, second_closest_coords, car_coords, heading): 69 | 70 | # Virtually set the car more into the heading direction 71 | heading_vector = [math.cos(math.radians( 72 | heading)), math.sin(math.radians(heading))] 73 | new_car_coords = [car_coords[0]+heading_vector[0], 74 | car_coords[1]+heading_vector[1]] 75 | 76 | # Calculate distance from new car coords to 2 closest racing points 77 | distance_closest_coords_new = dist_2_points(x1=new_car_coords[0], 78 | x2=closest_coords[0], 79 | y1=new_car_coords[1], 80 | y2=closest_coords[1]) 81 | distance_second_closest_coords_new = dist_2_points(x1=new_car_coords[0], 82 | x2=second_closest_coords[0], 83 | y1=new_car_coords[1], 84 | y2=second_closest_coords[1]) 85 | 86 | if distance_closest_coords_new <= distance_second_closest_coords_new: 87 | next_point_coords = closest_coords 88 | prev_point_coords = second_closest_coords 89 | else: 90 | next_point_coords = second_closest_coords 91 | prev_point_coords = closest_coords 92 | 93 | return [next_point_coords, prev_point_coords] 94 | 95 | def racing_direction_diff(closest_coords, second_closest_coords, car_coords, heading): 96 | 97 | # Calculate the direction of the center line based on the closest waypoints 98 | next_point, prev_point = next_prev_racing_point(closest_coords, 99 | second_closest_coords, 100 | car_coords, 101 | heading) 102 | 103 | # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians 104 | track_direction = math.atan2( 105 | next_point[1] - prev_point[1], next_point[0] - prev_point[0]) 106 | 107 | # Convert to degree 108 | track_direction = math.degrees(track_direction) 109 | 110 | # Calculate the difference between the track direction and the heading direction of the car 111 | direction_diff = abs(track_direction - heading) 112 | if direction_diff > 180: 113 | direction_diff = 360 - direction_diff 114 | 115 | return direction_diff 116 | 117 | # Gives back indexes that lie between start and end index of a cyclical list 118 | # (start index is included, end index is not) 119 | def indexes_cyclical(start, end, array_len): 120 | 121 | if end < start: 122 | end += array_len 123 | 124 | return [index % array_len for index in range(start, end)] 125 | 126 | # Calculate how long car would take for entire lap, if it continued like it did until now 127 | def projected_time(first_index, closest_index, step_count, times_list): 128 | 129 | # Calculate how much time has passed since start 130 | current_actual_time = (step_count-1) / 15 131 | 132 | # Calculate which indexes were already passed 133 | indexes_traveled = indexes_cyclical(first_index, closest_index, len(times_list)) 134 | 135 | # Calculate how much time should have passed if car would have followed optimals 136 | current_expected_time = sum([times_list[i] for i in indexes_traveled]) 137 | 138 | # Calculate how long one entire lap takes if car follows optimals 139 | total_expected_time = sum(times_list) 140 | 141 | # Calculate how long car would take for entire lap, if it continued like it did until now 142 | try: 143 | projected_time = (current_actual_time/current_expected_time) * total_expected_time 144 | except: 145 | projected_time = 9999 146 | 147 | return projected_time 148 | 149 | #################### RACING LINE ###################### 150 | 151 | # Optimal racing line for the Spain track 152 | # Each row: [x,y,speed,timeFromPreviousPoint] 153 | racing_track = [[0.34775, -2.173, 4.0, 0.07904], 154 | [0.03162, -2.17293, 4.0, 0.07903], 155 | [-0.28452, -2.17311, 4.0, 0.07904], 156 | [-0.60066, -2.17318, 4.0, 0.07903], 157 | [-0.91682, -2.17293, 4.0, 0.07904], 158 | [-1.23295, -2.17295, 4.0, 0.07903], 159 | [-1.54907, -2.17315, 4.0, 0.07903], 160 | [-1.86524, -2.17319, 4.0, 0.07904], 161 | [-2.18141, -2.17303, 4.0, 0.07904], 162 | [-2.49703, -2.17286, 4.0, 0.07891], 163 | [-2.81231, -2.17287, 3.67826, 0.08571], 164 | [-3.12023, -2.17178, 3.36387, 0.09154], 165 | [-3.40832, -2.16639, 3.11746, 0.09243], 166 | [-3.67315, -2.15481, 2.91304, 0.091], 167 | [-3.91587, -2.13615, 2.74019, 0.08884], 168 | [-4.13906, -2.11004, 2.59065, 0.08674], 169 | [-4.34538, -2.07631, 2.46067, 0.08496], 170 | [-4.53716, -2.03485, 2.34319, 0.08374], 171 | [-4.71631, -1.98551, 2.23387, 0.08318], 172 | [-4.88433, -1.92804, 2.13488, 0.08318], 173 | [-5.04235, -1.86205, 2.13488, 0.08021], 174 | [-5.19122, -1.78697, 2.13488, 0.0781], 175 | [-5.33151, -1.70198, 2.13488, 0.07683], 176 | [-5.46347, -1.60589, 2.13488, 0.07646], 177 | [-5.587, -1.49694, 2.50447, 0.06576], 178 | [-5.70146, -1.37258, 3.07226, 0.05501], 179 | [-5.80898, -1.23583, 4.0, 0.04349], 180 | [-5.91154, -1.09012, 4.0, 0.04455], 181 | [-6.01148, -0.9397, 4.0, 0.04515], 182 | [-6.0995, -0.80946, 4.0, 0.0393], 183 | [-6.18728, -0.68165, 4.0, 0.03876], 184 | [-6.27492, -0.55603, 4.0, 0.03829], 185 | [-6.36252, -0.43242, 4.0, 0.03788], 186 | [-6.45016, -0.31068, 4.0, 0.0375], 187 | [-6.5379, -0.19068, 4.0, 0.03716], 188 | [-6.6258, -0.07232, 4.0, 0.03686], 189 | [-6.71393, 0.04449, 4.0, 0.03658], 190 | [-6.80234, 0.15983, 4.0, 0.03633], 191 | [-6.89109, 0.27378, 3.71387, 0.03889], 192 | [-6.98023, 0.3864, 3.03045, 0.0474], 193 | [-7.06985, 0.49777, 2.61858, 0.05459], 194 | [-7.16, 0.60792, 2.33605, 0.06093], 195 | [-7.34242, 0.82509, 2.33605, 0.12141], 196 | [-7.51721, 1.0459, 2.33605, 0.12055], 197 | [-7.67697, 1.27353, 2.33168, 0.11927], 198 | [-7.81458, 1.51025, 2.2637, 0.12095], 199 | [-7.92294, 1.75686, 2.19952, 0.12247], 200 | [-7.99486, 2.012, 2.13463, 0.12418], 201 | [-8.03258, 2.27135, 2.13463, 0.12278], 202 | [-8.03667, 2.53161, 2.13463, 0.12194], 203 | [-8.00552, 2.78942, 2.13463, 0.12165], 204 | [-7.93746, 3.04073, 2.13463, 0.12197], 205 | [-7.83093, 3.28049, 2.30799, 0.11368], 206 | [-7.68478, 3.50219, 2.41869, 0.10979], 207 | [-7.50619, 3.70355, 2.54606, 0.10571], 208 | [-7.2994, 3.88253, 2.68781, 0.10175], 209 | [-7.0684, 4.03801, 2.85105, 0.09767], 210 | [-6.81682, 4.16957, 3.04632, 0.09319], 211 | [-6.54806, 4.27741, 3.28731, 0.08809], 212 | [-6.26528, 4.36241, 3.59377, 0.08216], 213 | [-5.97143, 4.42617, 4.0, 0.07517], 214 | [-5.6692, 4.47093, 3.70808, 0.0824], 215 | [-5.36095, 4.49949, 3.02882, 0.10221], 216 | [-5.04869, 4.51517, 2.62147, 0.11926], 217 | [-4.73409, 4.5216, 2.34382, 0.13426], 218 | [-4.41918, 4.52269, 2.13877, 0.14724], 219 | [-4.14695, 4.51532, 1.97844, 0.13765], 220 | [-3.91151, 4.49653, 1.84539, 0.12799], 221 | [-3.7048, 4.4656, 1.7342, 0.12052], 222 | [-3.5217, 4.4226, 1.63402, 0.1151], 223 | [-3.35902, 4.36803, 1.45633, 0.11782], 224 | [-3.21485, 4.30258, 1.45633, 0.10872], 225 | [-3.08807, 4.22706, 1.45633, 0.10133], 226 | [-2.97818, 4.14236, 1.45633, 0.09527], 227 | [-2.88509, 4.04944, 1.45633, 0.09031], 228 | [-2.80911, 3.94939, 1.45892, 0.08612], 229 | [-2.75307, 3.84294, 1.45892, 0.08245], 230 | [-2.71495, 3.73266, 1.51194, 0.07717], 231 | [-2.69466, 3.62029, 1.56647, 0.07289], 232 | [-2.69044, 3.5075, 1.62279, 0.06955], 233 | [-2.7009, 3.39547, 1.68465, 0.06679], 234 | [-2.72498, 3.28512, 1.75006, 0.06454], 235 | [-2.76177, 3.17716, 1.82263, 0.06258], 236 | [-2.81056, 3.0722, 1.89343, 0.06113], 237 | [-2.87069, 2.97075, 1.99628, 0.05908], 238 | [-2.94169, 2.87328, 2.11279, 0.05708], 239 | [-3.02285, 2.78011, 2.24716, 0.05499], 240 | [-3.11347, 2.69144, 2.40949, 0.05261], 241 | [-3.21279, 2.6074, 2.60909, 0.04987], 242 | [-3.32, 2.52796, 2.86347, 0.0466], 243 | [-3.43416, 2.45293, 2.6432, 0.05169], 244 | [-3.55428, 2.38197, 2.17238, 0.06422], 245 | [-3.67924, 2.31457, 1.8861, 0.07527], 246 | [-3.80787, 2.25006, 1.68759, 0.08527], 247 | [-3.93614, 2.18893, 1.53879, 0.09234], 248 | [-4.06004, 2.12529, 1.41938, 0.09813], 249 | [-4.17613, 2.05724, 1.41938, 0.09481], 250 | [-4.28186, 1.98342, 1.41938, 0.09085], 251 | [-4.37538, 1.90299, 1.41938, 0.0869], 252 | [-4.4553, 1.81542, 1.41938, 0.08353], 253 | [-4.52032, 1.72028, 1.53686, 0.07498], 254 | [-4.56869, 1.61687, 1.59239, 0.07169], 255 | [-4.60276, 1.50645, 1.65207, 0.06995], 256 | [-4.62264, 1.38906, 1.71608, 0.06939], 257 | [-4.62773, 1.26433, 1.78799, 0.06982], 258 | [-4.61657, 1.13147, 1.86957, 0.07131], 259 | [-4.58656, 0.9891, 1.96042, 0.07422], 260 | [-4.5332, 0.83495, 2.05988, 0.07919], 261 | [-4.4483, 0.6652, 2.16004, 0.08787], 262 | [-4.31593, 0.47389, 2.19343, 0.10606], 263 | [-4.1156, 0.26497, 2.22222, 0.13025], 264 | [-3.87159, 0.0865, 2.24818, 0.13447], 265 | [-3.61395, -0.04165, 2.27402, 0.12654], 266 | [-3.36566, -0.12046, 2.29642, 0.11344], 267 | [-3.13683, -0.1602, 2.31769, 0.10021], 268 | [-2.927, -0.17127, 2.33883, 0.08984], 269 | [-2.73327, -0.16075, 2.35863, 0.08226], 270 | [-2.55287, -0.13312, 2.37286, 0.07691], 271 | [-2.38357, -0.09112, 2.38661, 0.07309], 272 | [-2.22372, -0.0364, 2.39352, 0.07059], 273 | [-2.07202, 0.03012, 2.3947, 0.06917], 274 | [-1.92748, 0.10816, 2.3947, 0.06859], 275 | [-1.78927, 0.19798, 2.64412, 0.06234], 276 | [-1.65666, 0.3006, 2.78442, 0.06022], 277 | [-1.52811, 0.41511, 2.94568, 0.05844], 278 | [-1.40281, 0.54205, 3.13542, 0.05689], 279 | [-1.28, 0.68231, 3.36467, 0.05541], 280 | [-1.15892, 0.83709, 3.6421, 0.05395], 281 | [-1.03877, 1.00792, 4.0, 0.05221], 282 | [-0.91862, 1.19676, 3.68404, 0.06076], 283 | [-0.79737, 1.40593, 2.94366, 0.08213], 284 | [-0.67367, 1.63804, 2.51931, 0.1044], 285 | [-0.54587, 1.8957, 2.23675, 0.12859], 286 | [-0.41214, 2.1805, 2.03055, 0.15495], 287 | [-0.27186, 2.45834, 1.86713, 0.1667], 288 | [-0.11843, 2.71914, 1.73627, 0.17427], 289 | [0.05252, 2.95406, 1.73627, 0.16733], 290 | [0.24282, 3.1558, 1.73627, 0.15973], 291 | [0.4523, 3.31851, 1.73627, 0.15277], 292 | [0.6794, 3.4369, 1.73627, 0.1475], 293 | [0.92148, 3.50479, 1.95437, 0.12864], 294 | [1.17407, 3.51372, 2.11404, 0.11956], 295 | [1.42993, 3.47367, 2.31717, 0.11176], 296 | [1.68478, 3.38833, 2.5896, 0.10378], 297 | [1.9352, 3.26155, 2.98246, 0.09411], 298 | [2.1787, 3.09844, 3.62855, 0.08077], 299 | [2.41403, 2.90595, 4.0, 0.07601], 300 | [2.64178, 2.69291, 4.0, 0.07796], 301 | [2.86447, 2.46933, 4.0, 0.07889], 302 | [3.08676, 2.2447, 3.22907, 0.09787], 303 | [3.30934, 2.02005, 2.65999, 0.11889], 304 | [3.53187, 1.79508, 2.31065, 0.13695], 305 | [3.75427, 1.56983, 2.06878, 0.15301], 306 | [3.97641, 1.34721, 1.88874, 0.16651], 307 | [4.19715, 1.14479, 1.74488, 0.17164], 308 | [4.41321, 0.97574, 1.62782, 0.16853], 309 | [4.61966, 0.84634, 1.53154, 0.15909], 310 | [4.81352, 0.75553, 1.44494, 0.14816], 311 | [4.99381, 0.69892, 1.36805, 0.13812], 312 | [5.16051, 0.67175, 1.3, 0.12993], 313 | [5.31387, 0.67004, 1.3, 0.11798], 314 | [5.45403, 0.69082, 1.3, 0.10899], 315 | [5.58085, 0.73199, 1.3, 0.10257], 316 | [5.69365, 0.79243, 1.3, 0.09844], 317 | [5.79096, 0.87188, 1.37873, 0.09112], 318 | [5.87004, 0.97096, 1.47004, 0.08624], 319 | [5.93072, 1.08736, 1.57685, 0.08325], 320 | [5.97211, 1.21983, 1.71094, 0.08111], 321 | [5.99266, 1.36774, 1.88391, 0.07927], 322 | [5.99044, 1.5307, 2.12066, 0.07685], 323 | [5.96345, 1.70826, 2.46894, 0.07274], 324 | [5.91045, 1.89955, 2.27125, 0.0874], 325 | [5.83221, 2.10287, 2.02895, 0.10737], 326 | [5.7335, 2.31514, 1.85064, 0.1265], 327 | [5.63686, 2.50726, 1.70721, 0.12597], 328 | [5.55635, 2.69304, 1.59134, 0.12724], 329 | [5.49578, 2.87069, 1.4939, 0.12564], 330 | [5.45645, 3.03926, 1.40987, 0.12278], 331 | [5.4382, 3.19832, 1.33463, 0.11996], 332 | [5.44032, 3.34759, 1.33463, 0.11186], 333 | [5.46216, 3.48672, 1.33463, 0.10552], 334 | [5.50334, 3.61508, 1.33463, 0.101], 335 | [5.56401, 3.73161, 1.33463, 0.09844], 336 | [5.64513, 3.83446, 1.65679, 0.07906], 337 | [5.74889, 3.92024, 1.73875, 0.07743], 338 | [5.86816, 3.99315, 1.83096, 0.07635], 339 | [6.00303, 4.05237, 1.93405, 0.07616], 340 | [6.15417, 4.09658, 2.05482, 0.07664], 341 | [6.32298, 4.12377, 2.19828, 0.07778], 342 | [6.5118, 4.13106, 2.31507, 0.08162], 343 | [6.72455, 4.11405, 2.29787, 0.09288], 344 | [6.96924, 4.06504, 2.27981, 0.10946], 345 | [7.23605, 3.97246, 2.25811, 0.12507], 346 | [7.43782, 3.87107, 2.2331, 0.10112], 347 | [7.60293, 3.76378, 2.20505, 0.0893], 348 | [7.74238, 3.65197, 2.20505, 0.08106], 349 | [7.86188, 3.5364, 2.20505, 0.07539], 350 | [7.96479, 3.41757, 2.20505, 0.07129], 351 | [8.05322, 3.29577, 2.20505, 0.06826], 352 | [8.12853, 3.17115, 2.38864, 0.06096], 353 | [8.19155, 3.04381, 2.59316, 0.05479], 354 | [8.2447, 2.91427, 2.85739, 0.049], 355 | [8.28972, 2.78301, 3.21281, 0.04319], 356 | [8.32812, 2.65059, 3.74461, 0.03682], 357 | [8.3613, 2.51757, 4.0, 0.03427], 358 | [8.39063, 2.38441, 4.0, 0.03409], 359 | [8.41744, 2.25135, 4.0, 0.03393], 360 | [8.44605, 2.11888, 4.0, 0.03388], 361 | [8.47641, 1.98572, 4.0, 0.03414], 362 | [8.5082, 1.85198, 4.0, 0.03437], 363 | [8.54111, 1.71779, 4.0, 0.03454], 364 | [8.57477, 1.58329, 3.88727, 0.03567], 365 | [8.60883, 1.44862, 3.55754, 0.03905], 366 | [8.64619, 1.29642, 3.29481, 0.04757], 367 | [8.68404, 1.13291, 3.10924, 0.05398], 368 | [8.72093, 0.95736, 2.94774, 0.06086], 369 | [8.7547, 0.77082, 2.8059, 0.06756], 370 | [8.78272, 0.57589, 2.67642, 0.07358], 371 | [8.80238, 0.37615, 2.55908, 0.07843], 372 | [8.81154, 0.17535, 2.45225, 0.08197], 373 | [8.80892, -0.0233, 2.35362, 0.08441], 374 | [8.79385, -0.21739, 2.26053, 0.08612], 375 | [8.76608, -0.40519, 2.17117, 0.08744], 376 | [8.72555, -0.58546, 2.0881, 0.08848], 377 | [8.67236, -0.75727, 2.0881, 0.08613], 378 | [8.60661, -0.91991, 2.0881, 0.08401], 379 | [8.52835, -1.07276, 2.0881, 0.08224], 380 | [8.43746, -1.2152, 2.0881, 0.08092], 381 | [8.33356, -1.34653, 2.27364, 0.07365], 382 | [8.2158, -1.4659, 2.37408, 0.07063], 383 | [8.08601, -1.57512, 2.48104, 0.06837], 384 | [7.94437, -1.67477, 2.60203, 0.06655], 385 | [7.79071, -1.76521, 2.73873, 0.0651], 386 | [7.62452, -1.84667, 2.8987, 0.06385], 387 | [7.44497, -1.91922, 3.0896, 0.06268], 388 | [7.25094, -1.98282, 3.32007, 0.0615], 389 | [7.04109, -2.03732, 3.60931, 0.06007], 390 | [6.81386, -2.08251, 3.98257, 0.05817], 391 | [6.56783, -2.11817, 4.0, 0.06215], 392 | [6.30214, -2.14423, 4.0, 0.06674], 393 | [6.01745, -2.16104, 4.0, 0.0713], 394 | [5.71684, -2.16974, 4.0, 0.07518], 395 | [5.40604, -2.17253, 4.0, 0.0777], 396 | [5.08989, -2.17261, 4.0, 0.07904], 397 | [4.77373, -2.173, 4.0, 0.07904], 398 | [4.45759, -2.17311, 4.0, 0.07903], 399 | [4.14147, -2.17298, 4.0, 0.07903], 400 | [3.82532, -2.17289, 4.0, 0.07904], 401 | [3.50917, -2.17299, 4.0, 0.07904], 402 | [3.19303, -2.17314, 4.0, 0.07903], 403 | [2.87689, -2.1731, 4.0, 0.07904], 404 | [2.56075, -2.17295, 4.0, 0.07903], 405 | [2.2446, -2.17299, 4.0, 0.07904], 406 | [1.92847, -2.17312, 4.0, 0.07903], 407 | [1.61233, -2.17306, 4.0, 0.07904], 408 | [1.29618, -2.17296, 4.0, 0.07904], 409 | [0.98004, -2.17304, 4.0, 0.07903], 410 | [0.6639, -2.17313, 4.0, 0.07904]] 411 | 412 | ################## INPUT PARAMETERS ################### 413 | 414 | # Read all input parameters 415 | all_wheels_on_track = params['all_wheels_on_track'] 416 | x = params['x'] 417 | y = params['y'] 418 | distance_from_center = params['distance_from_center'] 419 | is_left_of_center = params['is_left_of_center'] 420 | heading = params['heading'] 421 | progress = params['progress'] 422 | steps = params['steps'] 423 | speed = params['speed'] 424 | steering_angle = params['steering_angle'] 425 | track_width = params['track_width'] 426 | waypoints = params['waypoints'] 427 | closest_waypoints = params['closest_waypoints'] 428 | is_offtrack = params['is_offtrack'] 429 | 430 | ############### OPTIMAL X,Y,SPEED,TIME ################ 431 | 432 | # Get closest indexes for racing line (and distances to all points on racing line) 433 | closest_index, second_closest_index = closest_2_racing_points_index( 434 | racing_track, [x, y]) 435 | 436 | # Get optimal [x, y, speed, time] for closest and second closest index 437 | optimals = racing_track[closest_index] 438 | optimals_second = racing_track[second_closest_index] 439 | 440 | # Save first racingpoint of episode for later 441 | if self.verbose == True: 442 | self.first_racingpoint_index = 0 # this is just for testing purposes 443 | if steps == 1: 444 | self.first_racingpoint_index = closest_index 445 | 446 | ################ REWARD AND PUNISHMENT ################ 447 | 448 | ## Define the default reward ## 449 | reward = 1 450 | 451 | ## Reward if car goes close to optimal racing line ## 452 | DISTANCE_MULTIPLE = 1 453 | dist = dist_to_racing_line(optimals[0:2], optimals_second[0:2], [x, y]) 454 | distance_reward = max(1e-3, 1 - (dist/(track_width*0.5))) 455 | reward += distance_reward * DISTANCE_MULTIPLE 456 | 457 | ## Reward if speed is close to optimal speed ## 458 | SPEED_DIFF_NO_REWARD = 1 459 | SPEED_MULTIPLE = 2 460 | speed_diff = abs(optimals[2]-speed) 461 | if speed_diff <= SPEED_DIFF_NO_REWARD: 462 | # we use quadratic punishment (not linear) bc we're not as confident with the optimal speed 463 | # so, we do not punish small deviations from optimal speed 464 | speed_reward = (1 - (speed_diff/(SPEED_DIFF_NO_REWARD))**2)**2 465 | else: 466 | speed_reward = 0 467 | reward += speed_reward * SPEED_MULTIPLE 468 | 469 | # Reward if less steps 470 | REWARD_PER_STEP_FOR_FASTEST_TIME = 1 471 | STANDARD_TIME = 37 472 | FASTEST_TIME = 27 473 | times_list = [row[3] for row in racing_track] 474 | projected_time = projected_time(self.first_racingpoint_index, closest_index, steps, times_list) 475 | try: 476 | steps_prediction = projected_time * 15 + 1 477 | reward_prediction = max(1e-3, (-REWARD_PER_STEP_FOR_FASTEST_TIME*(FASTEST_TIME) / 478 | (STANDARD_TIME-FASTEST_TIME))*(steps_prediction-(STANDARD_TIME*15+1))) 479 | steps_reward = min(REWARD_PER_STEP_FOR_FASTEST_TIME, reward_prediction / steps_prediction) 480 | except: 481 | steps_reward = 0 482 | reward += steps_reward 483 | 484 | # Zero reward if obviously wrong direction (e.g. spin) 485 | direction_diff = racing_direction_diff( 486 | optimals[0:2], optimals_second[0:2], [x, y], heading) 487 | if direction_diff > 30: 488 | reward = 1e-3 489 | 490 | # Zero reward of obviously too slow 491 | speed_diff_zero = optimals[2]-speed 492 | if speed_diff_zero > 0.5: 493 | reward = 1e-3 494 | 495 | ## Incentive for finishing the lap in less steps ## 496 | REWARD_FOR_FASTEST_TIME = 1500 # should be adapted to track length and other rewards 497 | STANDARD_TIME = 37 # seconds (time that is easily done by model) 498 | FASTEST_TIME = 27 # seconds (best time of 1st place on the track) 499 | if progress == 100: 500 | finish_reward = max(1e-3, (-REWARD_FOR_FASTEST_TIME / 501 | (15*(STANDARD_TIME-FASTEST_TIME)))*(steps-STANDARD_TIME*15)) 502 | else: 503 | finish_reward = 0 504 | reward += finish_reward 505 | 506 | ## Zero reward if off track ## 507 | if all_wheels_on_track == False: 508 | reward = 1e-3 509 | 510 | ####################### VERBOSE ####################### 511 | 512 | if self.verbose == True: 513 | print("Closest index: %i" % closest_index) 514 | print("Distance to racing line: %f" % dist) 515 | print("=== Distance reward (w/out multiple): %f ===" % (distance_reward)) 516 | print("Optimal speed: %f" % optimals[2]) 517 | print("Speed difference: %f" % speed_diff) 518 | print("=== Speed reward (w/out multiple): %f ===" % speed_reward) 519 | print("Direction difference: %f" % direction_diff) 520 | print("Predicted time: %f" % projected_time) 521 | print("=== Steps reward: %f ===" % steps_reward) 522 | print("=== Finish reward: %f ===" % finish_reward) 523 | 524 | #################### RETURN REWARD #################### 525 | 526 | # Always return a float value 527 | return float(reward) 528 | 529 | 530 | reward_object = Reward() # add parameter verbose=True to get noisy output for testing 531 | 532 | 533 | def reward_function(params): 534 | return reward_object.reward_function(params) 535 | --------------------------------------------------------------------------------