├── ActorNetwork.py ├── CriticNetwork.py ├── OU.py ├── README.md ├── ReplayBuffer.py ├── actormodel.h5 ├── actormodel.json ├── autostart.sh ├── criticmodel.h5 ├── criticmodel.json ├── ddpg.py ├── fast.gif ├── gym_torcs.py └── snakeoil3_gym.py /ActorNetwork.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | from keras.initializations import normal, identity 4 | from keras.models import model_from_json 5 | from keras.models import Sequential, Model 6 | from keras.engine.training import collect_trainable_weights 7 | from keras.layers import Dense, Flatten, Input, merge, Lambda 8 | from keras.optimizers import Adam 9 | import tensorflow as tf 10 | import keras.backend as K 11 | 12 | HIDDEN1_UNITS = 300 13 | HIDDEN2_UNITS = 600 14 | 15 | class ActorNetwork(object): 16 | def __init__(self, sess, state_size, action_size, BATCH_SIZE, TAU, LEARNING_RATE): 17 | self.sess = sess 18 | self.BATCH_SIZE = BATCH_SIZE 19 | self.TAU = TAU 20 | self.LEARNING_RATE = LEARNING_RATE 21 | 22 | K.set_session(sess) 23 | 24 | #Now create the model 25 | self.model , self.weights, self.state = self.create_actor_network(state_size, action_size) 26 | self.target_model, self.target_weights, self.target_state = self.create_actor_network(state_size, action_size) 27 | self.action_gradient = tf.placeholder(tf.float32,[None, action_size]) 28 | self.params_grad = tf.gradients(self.model.output, self.weights, -self.action_gradient) 29 | grads = zip(self.params_grad, self.weights) 30 | self.optimize = tf.train.AdamOptimizer(LEARNING_RATE).apply_gradients(grads) 31 | self.sess.run(tf.initialize_all_variables()) 32 | 33 | def train(self, states, action_grads): 34 | self.sess.run(self.optimize, feed_dict={ 35 | self.state: states, 36 | self.action_gradient: action_grads 37 | }) 38 | 39 | def target_train(self): 40 | actor_weights = self.model.get_weights() 41 | actor_target_weights = self.target_model.get_weights() 42 | for i in xrange(len(actor_weights)): 43 | actor_target_weights[i] = self.TAU * actor_weights[i] + (1 - self.TAU)* actor_target_weights[i] 44 | self.target_model.set_weights(actor_target_weights) 45 | 46 | def create_actor_network(self, state_size,action_dim): 47 | print("Now we build the model") 48 | S = Input(shape=[state_size]) 49 | h0 = Dense(HIDDEN1_UNITS, activation='relu')(S) 50 | h1 = Dense(HIDDEN2_UNITS, activation='relu')(h0) 51 | Steering = Dense(1,activation='tanh',init=lambda shape, name: normal(shape, scale=1e-4, name=name))(h1) 52 | Acceleration = Dense(1,activation='sigmoid',init=lambda shape, name: normal(shape, scale=1e-4, name=name))(h1) 53 | Brake = Dense(1,activation='sigmoid',init=lambda shape, name: normal(shape, scale=1e-4, name=name))(h1) 54 | V = merge([Steering,Acceleration,Brake],mode='concat') 55 | model = Model(input=S,output=V) 56 | return model, model.trainable_weights, S 57 | 58 | -------------------------------------------------------------------------------- /CriticNetwork.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | from keras.initializations import normal, identity 4 | from keras.models import model_from_json, load_model 5 | from keras.engine.training import collect_trainable_weights 6 | from keras.models import Sequential 7 | from keras.layers import Dense, Flatten, Input, merge, Lambda, Activation 8 | from keras.models import Sequential, Model 9 | from keras.optimizers import Adam 10 | import keras.backend as K 11 | import tensorflow as tf 12 | 13 | HIDDEN1_UNITS = 300 14 | HIDDEN2_UNITS = 600 15 | 16 | class CriticNetwork(object): 17 | def __init__(self, sess, state_size, action_size, BATCH_SIZE, TAU, LEARNING_RATE): 18 | self.sess = sess 19 | self.BATCH_SIZE = BATCH_SIZE 20 | self.TAU = TAU 21 | self.LEARNING_RATE = LEARNING_RATE 22 | self.action_size = action_size 23 | 24 | K.set_session(sess) 25 | 26 | #Now create the model 27 | self.model, self.action, self.state = self.create_critic_network(state_size, action_size) 28 | self.target_model, self.target_action, self.target_state = self.create_critic_network(state_size, action_size) 29 | self.action_grads = tf.gradients(self.model.output, self.action) #GRADIENTS for policy update 30 | self.sess.run(tf.initialize_all_variables()) 31 | 32 | def gradients(self, states, actions): 33 | return self.sess.run(self.action_grads, feed_dict={ 34 | self.state: states, 35 | self.action: actions 36 | })[0] 37 | 38 | def target_train(self): 39 | critic_weights = self.model.get_weights() 40 | critic_target_weights = self.target_model.get_weights() 41 | for i in xrange(len(critic_weights)): 42 | critic_target_weights[i] = self.TAU * critic_weights[i] + (1 - self.TAU)* critic_target_weights[i] 43 | self.target_model.set_weights(critic_target_weights) 44 | 45 | def create_critic_network(self, state_size,action_dim): 46 | print("Now we build the model") 47 | S = Input(shape=[state_size]) 48 | A = Input(shape=[action_dim],name='action2') 49 | w1 = Dense(HIDDEN1_UNITS, activation='relu')(S) 50 | a1 = Dense(HIDDEN2_UNITS, activation='linear')(A) 51 | h1 = Dense(HIDDEN2_UNITS, activation='linear')(w1) 52 | h2 = merge([h1,a1],mode='sum') 53 | h3 = Dense(HIDDEN2_UNITS, activation='relu')(h2) 54 | V = Dense(action_dim,activation='linear')(h3) 55 | model = Model(input=[S,A],output=V) 56 | adam = Adam(lr=self.LEARNING_RATE) 57 | model.compile(loss='mse', optimizer=adam) 58 | return model, A, S 59 | -------------------------------------------------------------------------------- /OU.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | class OU(object): 5 | 6 | def function(self, x, mu, theta, sigma): 7 | return theta * (mu - x) + sigma * np.random.randn(1) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Using Keras and Deep Deterministic Policy Gradient to play TORCS 2 | 3 | 300 lines of python code to demonstrate DDPG with Keras 4 | 5 | Please read the following blog for details 6 | 7 | https://yanpanlau.github.io/2016/10/11/Torcs-Keras.html 8 | 9 | ![](fast.gif) 10 | 11 | # Installation Dependencies: 12 | 13 | * Python 2.7 14 | * Keras 1.1.0 15 | * Tensorflow r0.10 16 | * [gym_torcs](https://github.com/ugo-nama-kun/gym_torcs) 17 | 18 | # How to Run? 19 | 20 | ``` 21 | git clone https://github.com/yanpanlau/DDPG-Keras-Torcs.git 22 | cd DDPG-Keras-Torcs 23 | cp *.* ~/gym_torcs 24 | cd ~/gym_torcs 25 | python ddpg.py 26 | ``` 27 | 28 | (Change the flag **train_indicator**=1 in ddpg.py if you want to train the network) 29 | -------------------------------------------------------------------------------- /ReplayBuffer.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | import random 3 | 4 | class ReplayBuffer(object): 5 | 6 | def __init__(self, buffer_size): 7 | self.buffer_size = buffer_size 8 | self.num_experiences = 0 9 | self.buffer = deque() 10 | 11 | def getBatch(self, batch_size): 12 | # Randomly sample batch_size examples 13 | if self.num_experiences < batch_size: 14 | return random.sample(self.buffer, self.num_experiences) 15 | else: 16 | return random.sample(self.buffer, batch_size) 17 | 18 | def size(self): 19 | return self.buffer_size 20 | 21 | def add(self, state, action, reward, new_state, done): 22 | experience = (state, action, reward, new_state, done) 23 | if self.num_experiences < self.buffer_size: 24 | self.buffer.append(experience) 25 | self.num_experiences += 1 26 | else: 27 | self.buffer.popleft() 28 | self.buffer.append(experience) 29 | 30 | def count(self): 31 | # if buffer is full, return buffer size 32 | # otherwise, return experience counter 33 | return self.num_experiences 34 | 35 | def erase(self): 36 | self.buffer = deque() 37 | self.num_experiences = 0 38 | -------------------------------------------------------------------------------- /actormodel.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanpanlau/DDPG-Keras-Torcs/455fadee1016ef15ef08817d98ec376d7e34b500/actormodel.h5 -------------------------------------------------------------------------------- /actormodel.json: -------------------------------------------------------------------------------- 1 | "{\"class_name\": \"Model\", \"keras_version\": \"1.1.0\", \"config\": {\"layers\": [{\"class_name\": \"InputLayer\", \"config\": {\"batch_input_shape\": [null, 29], \"input_dtype\": \"float32\", \"sparse\": false, \"name\": \"input_1\"}, \"inbound_nodes\": [], \"name\": \"input_1\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_1\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"relu\", \"output_dim\": 300}, \"inbound_nodes\": [[[\"input_1\", 0, 0]]], \"name\": \"dense_1\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_2\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"relu\", \"output_dim\": 600}, \"inbound_nodes\": [[[\"dense_1\", 0, 0]]], \"name\": \"dense_2\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_3\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"tanh\", \"output_dim\": 1}, \"inbound_nodes\": [[[\"dense_2\", 0, 0]]], \"name\": \"dense_3\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_4\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"sigmoid\", \"output_dim\": 1}, \"inbound_nodes\": [[[\"dense_2\", 0, 0]]], \"name\": \"dense_4\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_5\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"sigmoid\", \"output_dim\": 1}, \"inbound_nodes\": [[[\"dense_2\", 0, 0]]], \"name\": \"dense_5\"}, {\"class_name\": \"Merge\", \"config\": {\"name\": \"merge_1\", \"concat_axis\": -1, \"mode_type\": \"raw\", \"dot_axes\": -1, \"mode\": \"concat\", \"output_shape\": null, \"output_shape_type\": \"raw\"}, \"inbound_nodes\": [[[\"dense_3\", 0, 0], [\"dense_4\", 0, 0], [\"dense_5\", 0, 0]]], \"name\": \"merge_1\"}], \"input_layers\": [[\"input_1\", 0, 0]], \"output_layers\": [[\"merge_1\", 0, 0]], \"name\": \"model_1\"}}" -------------------------------------------------------------------------------- /autostart.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | xte 'key Return' 3 | xte 'usleep 100000' 4 | xte 'key Return' 5 | xte 'usleep 100000' 6 | xte 'key Up' 7 | xte 'usleep 100000' 8 | xte 'key Up' 9 | xte 'usleep 100000' 10 | xte 'key Return' 11 | xte 'usleep 100000' 12 | xte 'key Return' 13 | -------------------------------------------------------------------------------- /criticmodel.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanpanlau/DDPG-Keras-Torcs/455fadee1016ef15ef08817d98ec376d7e34b500/criticmodel.h5 -------------------------------------------------------------------------------- /criticmodel.json: -------------------------------------------------------------------------------- 1 | "{\"class_name\": \"Model\", \"keras_version\": \"1.1.0\", \"config\": {\"layers\": [{\"class_name\": \"InputLayer\", \"config\": {\"batch_input_shape\": [null, 29], \"input_dtype\": \"float32\", \"sparse\": false, \"name\": \"input_3\"}, \"inbound_nodes\": [], \"name\": \"input_3\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_11\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"relu\", \"output_dim\": 300}, \"inbound_nodes\": [[[\"input_3\", 0, 0]]], \"name\": \"dense_11\"}, {\"class_name\": \"InputLayer\", \"config\": {\"batch_input_shape\": [null, 3], \"input_dtype\": \"float32\", \"sparse\": false, \"name\": \"action2\"}, \"inbound_nodes\": [], \"name\": \"action2\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_13\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"linear\", \"output_dim\": 600}, \"inbound_nodes\": [[[\"dense_11\", 0, 0]]], \"name\": \"dense_13\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_12\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"linear\", \"output_dim\": 600}, \"inbound_nodes\": [[[\"action2\", 0, 0]]], \"name\": \"dense_12\"}, {\"class_name\": \"Merge\", \"config\": {\"name\": \"merge_3\", \"concat_axis\": -1, \"mode_type\": \"raw\", \"dot_axes\": -1, \"mode\": \"sum\", \"output_shape\": null, \"output_shape_type\": \"raw\"}, \"inbound_nodes\": [[[\"dense_13\", 0, 0], [\"dense_12\", 0, 0]]], \"name\": \"merge_3\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_14\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"relu\", \"output_dim\": 600}, \"inbound_nodes\": [[[\"merge_3\", 0, 0]]], \"name\": \"dense_14\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_15\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"linear\", \"output_dim\": 3}, \"inbound_nodes\": [[[\"dense_14\", 0, 0]]], \"name\": \"dense_15\"}], \"input_layers\": [[\"input_3\", 0, 0], [\"action2\", 0, 0]], \"output_layers\": [[\"dense_15\", 0, 0]], \"name\": \"model_3\"}}" -------------------------------------------------------------------------------- /ddpg.py: -------------------------------------------------------------------------------- 1 | from gym_torcs import TorcsEnv 2 | import numpy as np 3 | import random 4 | import argparse 5 | from keras.models import model_from_json, Model 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Dropout, Activation, Flatten 8 | from keras.optimizers import Adam 9 | import tensorflow as tf 10 | from keras.engine.training import collect_trainable_weights 11 | import json 12 | 13 | from ReplayBuffer import ReplayBuffer 14 | from ActorNetwork import ActorNetwork 15 | from CriticNetwork import CriticNetwork 16 | from OU import OU 17 | import timeit 18 | 19 | OU = OU() #Ornstein-Uhlenbeck Process 20 | 21 | def playGame(train_indicator=0): #1 means Train, 0 means simply Run 22 | BUFFER_SIZE = 100000 23 | BATCH_SIZE = 32 24 | GAMMA = 0.99 25 | TAU = 0.001 #Target Network HyperParameters 26 | LRA = 0.0001 #Learning rate for Actor 27 | LRC = 0.001 #Lerning rate for Critic 28 | 29 | action_dim = 3 #Steering/Acceleration/Brake 30 | state_dim = 29 #of sensors input 31 | 32 | np.random.seed(1337) 33 | 34 | vision = False 35 | 36 | EXPLORE = 100000. 37 | episode_count = 2000 38 | max_steps = 100000 39 | reward = 0 40 | done = False 41 | step = 0 42 | epsilon = 1 43 | indicator = 0 44 | 45 | #Tensorflow GPU optimization 46 | config = tf.ConfigProto() 47 | config.gpu_options.allow_growth = True 48 | sess = tf.Session(config=config) 49 | from keras import backend as K 50 | K.set_session(sess) 51 | 52 | actor = ActorNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRA) 53 | critic = CriticNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRC) 54 | buff = ReplayBuffer(BUFFER_SIZE) #Create replay buffer 55 | 56 | # Generate a Torcs environment 57 | env = TorcsEnv(vision=vision, throttle=True,gear_change=False) 58 | 59 | #Now load the weight 60 | print("Now we load the weight") 61 | try: 62 | actor.model.load_weights("actormodel.h5") 63 | critic.model.load_weights("criticmodel.h5") 64 | actor.target_model.load_weights("actormodel.h5") 65 | critic.target_model.load_weights("criticmodel.h5") 66 | print("Weight load successfully") 67 | except: 68 | print("Cannot find the weight") 69 | 70 | print("TORCS Experiment Start.") 71 | for i in range(episode_count): 72 | 73 | print("Episode : " + str(i) + " Replay Buffer " + str(buff.count())) 74 | 75 | if np.mod(i, 3) == 0: 76 | ob = env.reset(relaunch=True) #relaunch TORCS every 3 episode because of the memory leak error 77 | else: 78 | ob = env.reset() 79 | 80 | s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm)) 81 | 82 | total_reward = 0. 83 | for j in range(max_steps): 84 | loss = 0 85 | epsilon -= 1.0 / EXPLORE 86 | a_t = np.zeros([1,action_dim]) 87 | noise_t = np.zeros([1,action_dim]) 88 | 89 | a_t_original = actor.model.predict(s_t.reshape(1, s_t.shape[0])) 90 | noise_t[0][0] = train_indicator * max(epsilon, 0) * OU.function(a_t_original[0][0], 0.0 , 0.60, 0.30) 91 | noise_t[0][1] = train_indicator * max(epsilon, 0) * OU.function(a_t_original[0][1], 0.5 , 1.00, 0.10) 92 | noise_t[0][2] = train_indicator * max(epsilon, 0) * OU.function(a_t_original[0][2], -0.1 , 1.00, 0.05) 93 | 94 | #The following code do the stochastic brake 95 | #if random.random() <= 0.1: 96 | # print("********Now we apply the brake***********") 97 | # noise_t[0][2] = train_indicator * max(epsilon, 0) * OU.function(a_t_original[0][2], 0.2 , 1.00, 0.10) 98 | 99 | a_t[0][0] = a_t_original[0][0] + noise_t[0][0] 100 | a_t[0][1] = a_t_original[0][1] + noise_t[0][1] 101 | a_t[0][2] = a_t_original[0][2] + noise_t[0][2] 102 | 103 | ob, r_t, done, info = env.step(a_t[0]) 104 | 105 | s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm)) 106 | 107 | buff.add(s_t, a_t[0], r_t, s_t1, done) #Add replay buffer 108 | 109 | #Do the batch update 110 | batch = buff.getBatch(BATCH_SIZE) 111 | states = np.asarray([e[0] for e in batch]) 112 | actions = np.asarray([e[1] for e in batch]) 113 | rewards = np.asarray([e[2] for e in batch]) 114 | new_states = np.asarray([e[3] for e in batch]) 115 | dones = np.asarray([e[4] for e in batch]) 116 | y_t = np.asarray([e[1] for e in batch]) 117 | 118 | target_q_values = critic.target_model.predict([new_states, actor.target_model.predict(new_states)]) 119 | 120 | for k in range(len(batch)): 121 | if dones[k]: 122 | y_t[k] = rewards[k] 123 | else: 124 | y_t[k] = rewards[k] + GAMMA*target_q_values[k] 125 | 126 | if (train_indicator): 127 | loss += critic.model.train_on_batch([states,actions], y_t) 128 | a_for_grad = actor.model.predict(states) 129 | grads = critic.gradients(states, a_for_grad) 130 | actor.train(states, grads) 131 | actor.target_train() 132 | critic.target_train() 133 | 134 | total_reward += r_t 135 | s_t = s_t1 136 | 137 | print("Episode", i, "Step", step, "Action", a_t, "Reward", r_t, "Loss", loss) 138 | 139 | step += 1 140 | if done: 141 | break 142 | 143 | if np.mod(i, 3) == 0: 144 | if (train_indicator): 145 | print("Now we save model") 146 | actor.model.save_weights("actormodel.h5", overwrite=True) 147 | with open("actormodel.json", "w") as outfile: 148 | json.dump(actor.model.to_json(), outfile) 149 | 150 | critic.model.save_weights("criticmodel.h5", overwrite=True) 151 | with open("criticmodel.json", "w") as outfile: 152 | json.dump(critic.model.to_json(), outfile) 153 | 154 | print("TOTAL REWARD @ " + str(i) +"-th Episode : Reward " + str(total_reward)) 155 | print("Total Step: " + str(step)) 156 | print("") 157 | 158 | env.end() # This is for shutting down TORCS 159 | print("Finish.") 160 | 161 | if __name__ == "__main__": 162 | playGame() 163 | -------------------------------------------------------------------------------- /fast.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanpanlau/DDPG-Keras-Torcs/455fadee1016ef15ef08817d98ec376d7e34b500/fast.gif -------------------------------------------------------------------------------- /gym_torcs.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | import numpy as np 4 | # from os import path 5 | import snakeoil3_gym as snakeoil3 6 | import numpy as np 7 | import copy 8 | import collections as col 9 | import os 10 | import time 11 | 12 | 13 | class TorcsEnv: 14 | terminal_judge_start = 100 # If after 100 timestep still no progress, terminated 15 | termination_limit_progress = 5 # [km/h], episode terminates if car is running slower than this limit 16 | default_speed = 50 17 | 18 | initial_reset = True 19 | 20 | def __init__(self, vision=False, throttle=False, gear_change=False): 21 | self.vision = vision 22 | self.throttle = throttle 23 | self.gear_change = gear_change 24 | 25 | self.initial_run = True 26 | 27 | ##print("launch torcs") 28 | os.system('pkill torcs') 29 | time.sleep(0.5) 30 | if self.vision is True: 31 | os.system('torcs -nofuel -nodamage -nolaptime -vision &') 32 | else: 33 | os.system('torcs -nofuel -nolaptime &') 34 | time.sleep(0.5) 35 | os.system('sh autostart.sh') 36 | time.sleep(0.5) 37 | 38 | """ 39 | # Modify here if you use multiple tracks in the environment 40 | self.client = snakeoil3.Client(p=3101, vision=self.vision) # Open new UDP in vtorcs 41 | self.client.MAX_STEPS = np.inf 42 | 43 | client = self.client 44 | client.get_servers_input() # Get the initial input from torcs 45 | 46 | obs = client.S.d # Get the current full-observation from torcs 47 | """ 48 | if throttle is False: 49 | self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,)) 50 | else: 51 | self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2,)) 52 | 53 | if vision is False: 54 | high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf]) 55 | low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf]) 56 | self.observation_space = spaces.Box(low=low, high=high) 57 | else: 58 | high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255]) 59 | low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0]) 60 | self.observation_space = spaces.Box(low=low, high=high) 61 | 62 | def step(self, u): 63 | #print("Step") 64 | # convert thisAction to the actual torcs actionstr 65 | client = self.client 66 | 67 | this_action = self.agent_to_torcs(u) 68 | 69 | # Apply Action 70 | action_torcs = client.R.d 71 | 72 | # Steering 73 | action_torcs['steer'] = this_action['steer'] # in [-1, 1] 74 | 75 | # Simple Autnmatic Throttle Control by Snakeoil 76 | if self.throttle is False: 77 | target_speed = self.default_speed 78 | if client.S.d['speedX'] < target_speed - (client.R.d['steer']*50): 79 | client.R.d['accel'] += .01 80 | else: 81 | client.R.d['accel'] -= .01 82 | 83 | if client.R.d['accel'] > 0.2: 84 | client.R.d['accel'] = 0.2 85 | 86 | if client.S.d['speedX'] < 10: 87 | client.R.d['accel'] += 1/(client.S.d['speedX']+.1) 88 | 89 | # Traction Control System 90 | if ((client.S.d['wheelSpinVel'][2]+client.S.d['wheelSpinVel'][3]) - 91 | (client.S.d['wheelSpinVel'][0]+client.S.d['wheelSpinVel'][1]) > 5): 92 | action_torcs['accel'] -= .2 93 | else: 94 | action_torcs['accel'] = this_action['accel'] 95 | action_torcs['brake'] = this_action['brake'] 96 | 97 | # Automatic Gear Change by Snakeoil 98 | if self.gear_change is True: 99 | action_torcs['gear'] = this_action['gear'] 100 | else: 101 | # Automatic Gear Change by Snakeoil is possible 102 | action_torcs['gear'] = 1 103 | if self.throttle: 104 | if client.S.d['speedX'] > 50: 105 | action_torcs['gear'] = 2 106 | if client.S.d['speedX'] > 80: 107 | action_torcs['gear'] = 3 108 | if client.S.d['speedX'] > 110: 109 | action_torcs['gear'] = 4 110 | if client.S.d['speedX'] > 140: 111 | action_torcs['gear'] = 5 112 | if client.S.d['speedX'] > 170: 113 | action_torcs['gear'] = 6 114 | # Save the privious full-obs from torcs for the reward calculation 115 | obs_pre = copy.deepcopy(client.S.d) 116 | 117 | # One-Step Dynamics Update ################################# 118 | # Apply the Agent's action into torcs 119 | client.respond_to_server() 120 | # Get the response of TORCS 121 | client.get_servers_input() 122 | 123 | # Get the current full-observation from torcs 124 | obs = client.S.d 125 | 126 | # Make an obsevation from a raw observation vector from TORCS 127 | self.observation = self.make_observaton(obs) 128 | 129 | # Reward setting Here ####################################### 130 | # direction-dependent positive reward 131 | track = np.array(obs['track']) 132 | trackPos = np.array(obs['trackPos']) 133 | sp = np.array(obs['speedX']) 134 | damage = np.array(obs['damage']) 135 | rpm = np.array(obs['rpm']) 136 | 137 | progress = sp*np.cos(obs['angle']) - np.abs(sp*np.sin(obs['angle'])) - sp * np.abs(obs['trackPos']) 138 | reward = progress 139 | 140 | # collision detection 141 | if obs['damage'] - obs_pre['damage'] > 0: 142 | reward = -1 143 | 144 | # Termination judgement ######################### 145 | episode_terminate = False 146 | #if (abs(track.any()) > 1 or abs(trackPos) > 1): # Episode is terminated if the car is out of track 147 | # reward = -200 148 | # episode_terminate = True 149 | # client.R.d['meta'] = True 150 | 151 | #if self.terminal_judge_start < self.time_step: # Episode terminates if the progress of agent is small 152 | # if progress < self.termination_limit_progress: 153 | # print("No progress") 154 | # episode_terminate = True 155 | # client.R.d['meta'] = True 156 | 157 | if np.cos(obs['angle']) < 0: # Episode is terminated if the agent runs backward 158 | episode_terminate = True 159 | client.R.d['meta'] = True 160 | 161 | 162 | if client.R.d['meta'] is True: # Send a reset signal 163 | self.initial_run = False 164 | client.respond_to_server() 165 | 166 | self.time_step += 1 167 | 168 | return self.get_obs(), reward, client.R.d['meta'], {} 169 | 170 | def reset(self, relaunch=False): 171 | #print("Reset") 172 | 173 | self.time_step = 0 174 | 175 | if self.initial_reset is not True: 176 | self.client.R.d['meta'] = True 177 | self.client.respond_to_server() 178 | 179 | ## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug! 180 | if relaunch is True: 181 | self.reset_torcs() 182 | print("### TORCS is RELAUNCHED ###") 183 | 184 | # Modify here if you use multiple tracks in the environment 185 | self.client = snakeoil3.Client(p=3101, vision=self.vision) # Open new UDP in vtorcs 186 | self.client.MAX_STEPS = np.inf 187 | 188 | client = self.client 189 | client.get_servers_input() # Get the initial input from torcs 190 | 191 | obs = client.S.d # Get the current full-observation from torcs 192 | self.observation = self.make_observaton(obs) 193 | 194 | self.last_u = None 195 | 196 | self.initial_reset = False 197 | return self.get_obs() 198 | 199 | def end(self): 200 | os.system('pkill torcs') 201 | 202 | def get_obs(self): 203 | return self.observation 204 | 205 | def reset_torcs(self): 206 | #print("relaunch torcs") 207 | os.system('pkill torcs') 208 | time.sleep(0.5) 209 | if self.vision is True: 210 | os.system('torcs -nofuel -nodamage -nolaptime -vision &') 211 | else: 212 | os.system('torcs -nofuel -nolaptime &') 213 | time.sleep(0.5) 214 | os.system('sh autostart.sh') 215 | time.sleep(0.5) 216 | 217 | def agent_to_torcs(self, u): 218 | torcs_action = {'steer': u[0]} 219 | 220 | if self.throttle is True: # throttle action is enabled 221 | torcs_action.update({'accel': u[1]}) 222 | torcs_action.update({'brake': u[2]}) 223 | 224 | if self.gear_change is True: # gear change action is enabled 225 | torcs_action.update({'gear': int(u[3])}) 226 | 227 | return torcs_action 228 | 229 | 230 | def obs_vision_to_image_rgb(self, obs_image_vec): 231 | image_vec = obs_image_vec 232 | r = image_vec[0:len(image_vec):3] 233 | g = image_vec[1:len(image_vec):3] 234 | b = image_vec[2:len(image_vec):3] 235 | 236 | sz = (64, 64) 237 | r = np.array(r).reshape(sz) 238 | g = np.array(g).reshape(sz) 239 | b = np.array(b).reshape(sz) 240 | return np.array([r, g, b], dtype=np.uint8) 241 | 242 | def make_observaton(self, raw_obs): 243 | if self.vision is False: 244 | names = ['focus', 245 | 'speedX', 'speedY', 'speedZ', 'angle', 'damage', 246 | 'opponents', 247 | 'rpm', 248 | 'track', 249 | 'trackPos', 250 | 'wheelSpinVel'] 251 | Observation = col.namedtuple('Observaion', names) 252 | return Observation(focus=np.array(raw_obs['focus'], dtype=np.float32)/200., 253 | speedX=np.array(raw_obs['speedX'], dtype=np.float32)/300.0, 254 | speedY=np.array(raw_obs['speedY'], dtype=np.float32)/300.0, 255 | speedZ=np.array(raw_obs['speedZ'], dtype=np.float32)/300.0, 256 | angle=np.array(raw_obs['angle'], dtype=np.float32)/3.1416, 257 | damage=np.array(raw_obs['damage'], dtype=np.float32), 258 | opponents=np.array(raw_obs['opponents'], dtype=np.float32)/200., 259 | rpm=np.array(raw_obs['rpm'], dtype=np.float32)/10000, 260 | track=np.array(raw_obs['track'], dtype=np.float32)/200., 261 | trackPos=np.array(raw_obs['trackPos'], dtype=np.float32)/1., 262 | wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=np.float32)) 263 | else: 264 | names = ['focus', 265 | 'speedX', 'speedY', 'speedZ', 'angle', 266 | 'opponents', 267 | 'rpm', 268 | 'track', 269 | 'trackPos', 270 | 'wheelSpinVel', 271 | 'img'] 272 | Observation = col.namedtuple('Observaion', names) 273 | 274 | # Get RGB from observation 275 | image_rgb = self.obs_vision_to_image_rgb(raw_obs[names[8]]) 276 | 277 | return Observation(focus=np.array(raw_obs['focus'], dtype=np.float32)/200., 278 | speedX=np.array(raw_obs['speedX'], dtype=np.float32)/self.default_speed, 279 | speedY=np.array(raw_obs['speedY'], dtype=np.float32)/self.default_speed, 280 | speedZ=np.array(raw_obs['speedZ'], dtype=np.float32)/self.default_speed, 281 | opponents=np.array(raw_obs['opponents'], dtype=np.float32)/200., 282 | rpm=np.array(raw_obs['rpm'], dtype=np.float32), 283 | track=np.array(raw_obs['track'], dtype=np.float32)/200., 284 | trackPos=np.array(raw_obs['trackPos'], dtype=np.float32)/1., 285 | wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=np.float32), 286 | img=image_rgb) 287 | -------------------------------------------------------------------------------- /snakeoil3_gym.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # snakeoil.py 3 | # Chris X Edwards 4 | # Snake Oil is a Python library for interfacing with a TORCS 5 | # race car simulator which has been patched with the server 6 | # extentions used in the Simulated Car Racing competitions. 7 | # http://scr.geccocompetitions.com/ 8 | # 9 | # To use it, you must import it and create a "drive()" function. 10 | # This will take care of option handling and server connecting, etc. 11 | # To see how to write your own client do something like this which is 12 | # a complete working client: 13 | # /-----------------------------------------------\ 14 | # |#!/usr/bin/python | 15 | # |import snakeoil | 16 | # |if __name__ == "__main__": | 17 | # | C= snakeoil.Client() | 18 | # | for step in xrange(C.maxSteps,0,-1): | 19 | # | C.get_servers_input() | 20 | # | snakeoil.drive_example(C) | 21 | # | C.respond_to_server() | 22 | # | C.shutdown() | 23 | # \-----------------------------------------------/ 24 | # This should then be a full featured client. The next step is to 25 | # replace 'snakeoil.drive_example()' with your own. There is a 26 | # dictionary which holds various option values (see `default_options` 27 | # variable for all the details) but you probably only need a few 28 | # things from it. Mainly the `trackname` and `stage` are important 29 | # when developing a strategic bot. 30 | # 31 | # This dictionary also contains a ServerState object 32 | # (key=S) and a DriverAction object (key=R for response). This allows 33 | # you to get at all the information sent by the server and to easily 34 | # formulate your reply. These objects contain a member dictionary "d" 35 | # (for data dictionary) which contain key value pairs based on the 36 | # server's syntax. Therefore, you can read the following: 37 | # angle, curLapTime, damage, distFromStart, distRaced, focus, 38 | # fuel, gear, lastLapTime, opponents, racePos, rpm, 39 | # speedX, speedY, speedZ, track, trackPos, wheelSpinVel, z 40 | # The syntax specifically would be something like: 41 | # X= o[S.d['tracPos']] 42 | # And you can set the following: 43 | # accel, brake, clutch, gear, steer, focus, meta 44 | # The syntax is: 45 | # o[R.d['steer']]= X 46 | # Note that it is 'steer' and not 'steering' as described in the manual! 47 | # All values should be sensible for their type, including lists being lists. 48 | # See the SCR manual or http://xed.ch/help/torcs.html for details. 49 | # 50 | # If you just run the snakeoil.py base library itself it will implement a 51 | # serviceable client with a demonstration drive function that is 52 | # sufficient for getting around most tracks. 53 | # Try `snakeoil.py --help` to get started. 54 | 55 | # for Python3-based torcs python robot client 56 | from __future__ import division 57 | from __future__ import absolute_import 58 | import socket 59 | import sys 60 | import getopt 61 | import os 62 | import time 63 | PI= 3.14159265359 64 | 65 | data_size = 2**17 66 | 67 | # Initialize help messages 68 | ophelp= u'Options:\n' 69 | ophelp+= u' --host, -H TORCS server host. [localhost]\n' 70 | ophelp+= u' --port, -p TORCS port. [3001]\n' 71 | ophelp+= u' --id, -i ID for server. [SCR]\n' 72 | ophelp+= u' --steps, -m <#> Maximum simulation steps. 1 sec ~ 50 steps. [100000]\n' 73 | ophelp+= u' --episodes, -e <#> Maximum learning episodes. [1]\n' 74 | ophelp+= u' --track, -t Your name for this track. Used for learning. [unknown]\n' 75 | ophelp+= u' --stage, -s <#> 0=warm up, 1=qualifying, 2=race, 3=unknown. [3]\n' 76 | ophelp+= u' --debug, -d Output full telemetry.\n' 77 | ophelp+= u' --help, -h Show this help.\n' 78 | ophelp+= u' --version, -v Show current version.' 79 | usage= u'Usage: %s [ophelp [optargs]] \n' % sys.argv[0] 80 | usage= usage + ophelp 81 | version= u"20130505-2" 82 | 83 | def clip(v,lo,hi): 84 | if vhi: return hi 86 | else: return v 87 | 88 | def bargraph(x,mn,mx,w,c=u'X'): 89 | u'''Draws a simple asciiart bar graph. Very handy for 90 | visualizing what's going on with the data. 91 | x= Value from sensor, mn= minimum plottable value, 92 | mx= maximum plottable value, w= width of plot in chars, 93 | c= the character to plot with.''' 94 | if not w: return u'' # No width! 95 | if xmx: x= mx # Clip to bounds. 97 | tx= mx-mn # Total real units possible to show on graph. 98 | if tx<=0: return u'backwards' # Stupid bounds. 99 | upw= tx/float(w) # X Units per output char width. 100 | if upw<=0: return u'what?' # Don't let this happen. 101 | negpu, pospu, negnonpu, posnonpu= 0,0,0,0 102 | if mn < 0: # Then there is a negative part to graph. 103 | if x < 0: # And the plot is on the negative side. 104 | negpu= -x + min(0,mx) 105 | negnonpu= -mn + x 106 | else: # Plot is on pos. Neg side is empty. 107 | negnonpu= -mn + min(0,mx) # But still show some empty neg. 108 | if mx > 0: # There is a positive part to the graph 109 | if x > 0: # And the plot is on the positive side. 110 | pospu= x - max(0,mn) 111 | posnonpu= mx - x 112 | else: # Plot is on neg. Pos side is empty. 113 | posnonpu= mx - max(0,mn) # But still show some empty pos. 114 | nnc= int(negnonpu/upw)*u'-' 115 | npc= int(negpu/upw)*c 116 | ppc= int(pospu/upw)*c 117 | pnc= int(posnonpu/upw)*u'_' 118 | return u'[%s]' % (nnc+npc+ppc+pnc) 119 | 120 | class Client(object): 121 | def __init__(self,H=None,p=None,i=None,e=None,t=None,s=None,d=None,vision=False): 122 | # If you don't like the option defaults, change them here. 123 | self.vision = vision 124 | 125 | self.host= u'localhost' 126 | self.port= 3001 127 | self.sid= u'SCR' 128 | self.maxEpisodes=1 # "Maximum number of learning episodes to perform" 129 | self.trackname= u'unknown' 130 | self.stage= 3 # 0=Warm-up, 1=Qualifying 2=Race, 3=unknown 131 | self.debug= False 132 | self.maxSteps= 100000 # 50steps/second 133 | self.parse_the_command_line() 134 | if H: self.host= H 135 | if p: self.port= p 136 | if i: self.sid= i 137 | if e: self.maxEpisodes= e 138 | if t: self.trackname= t 139 | if s: self.stage= s 140 | if d: self.debug= d 141 | self.S= ServerState() 142 | self.R= DriverAction() 143 | self.setup_connection() 144 | 145 | def setup_connection(self): 146 | # == Set Up UDP Socket == 147 | try: 148 | self.so= socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 149 | except socket.error, emsg: 150 | print u'Error: Could not create socket...' 151 | sys.exit(-1) 152 | # == Initialize Connection To Server == 153 | self.so.settimeout(1) 154 | 155 | n_fail = 5 156 | while True: 157 | # This string establishes track sensor angles! You can customize them. 158 | #a= "-90 -75 -60 -45 -30 -20 -15 -10 -5 0 5 10 15 20 30 45 60 75 90" 159 | # xed- Going to try something a bit more aggressive... 160 | a= u"-45 -19 -12 -7 -4 -2.5 -1.7 -1 -.5 0 .5 1 1.7 2.5 4 7 12 19 45" 161 | 162 | initmsg=u'%s(init %s)' % (self.sid,a) 163 | 164 | try: 165 | self.so.sendto(initmsg.encode(), (self.host, self.port)) 166 | except socket.error, emsg: 167 | sys.exit(-1) 168 | sockdata= unicode() 169 | try: 170 | sockdata,addr= self.so.recvfrom(data_size) 171 | sockdata = sockdata.decode(u'utf-8') 172 | except socket.error, emsg: 173 | print u"Waiting for server on %d............" % self.port 174 | print u"Count Down : " + unicode(n_fail) 175 | if n_fail < 0: 176 | print u"relaunch torcs" 177 | os.system(u'pkill torcs') 178 | time.sleep(1.0) 179 | if self.vision is False: 180 | os.system(u'torcs -nofuel -nodamage -nolaptime &') 181 | else: 182 | os.system(u'torcs -nofuel -nodamage -nolaptime -vision &') 183 | 184 | time.sleep(1.0) 185 | os.system(u'sh autostart.sh') 186 | n_fail = 5 187 | n_fail -= 1 188 | 189 | identify = u'***identified***' 190 | if identify in sockdata: 191 | print u"Client connected on %d.............." % self.port 192 | break 193 | 194 | def parse_the_command_line(self): 195 | try: 196 | (opts, args) = getopt.getopt(sys.argv[1:], u'H:p:i:m:e:t:s:dhv', 197 | [u'host=',u'port=',u'id=',u'steps=', 198 | u'episodes=',u'track=',u'stage=', 199 | u'debug',u'help',u'version']) 200 | except getopt.error, why: 201 | print u'getopt error: %s\n%s' % (why, usage) 202 | sys.exit(-1) 203 | try: 204 | for opt in opts: 205 | if opt[0] == u'-h' or opt[0] == u'--help': 206 | print usage 207 | sys.exit(0) 208 | if opt[0] == u'-d' or opt[0] == u'--debug': 209 | self.debug= True 210 | if opt[0] == u'-H' or opt[0] == u'--host': 211 | self.host= opt[1] 212 | if opt[0] == u'-i' or opt[0] == u'--id': 213 | self.sid= opt[1] 214 | if opt[0] == u'-t' or opt[0] == u'--track': 215 | self.trackname= opt[1] 216 | if opt[0] == u'-s' or opt[0] == u'--stage': 217 | self.stage= int(opt[1]) 218 | if opt[0] == u'-p' or opt[0] == u'--port': 219 | self.port= int(opt[1]) 220 | if opt[0] == u'-e' or opt[0] == u'--episodes': 221 | self.maxEpisodes= int(opt[1]) 222 | if opt[0] == u'-m' or opt[0] == u'--steps': 223 | self.maxSteps= int(opt[1]) 224 | if opt[0] == u'-v' or opt[0] == u'--version': 225 | print u'%s %s' % (sys.argv[0], version) 226 | sys.exit(0) 227 | except ValueError, why: 228 | print u'Bad parameter \'%s\' for option %s: %s\n%s' % ( 229 | opt[1], opt[0], why, usage) 230 | sys.exit(-1) 231 | if len(args) > 0: 232 | print u'Superflous input? %s\n%s' % (u', '.join(args), usage) 233 | sys.exit(-1) 234 | 235 | def get_servers_input(self): 236 | u'''Server's input is stored in a ServerState object''' 237 | if not self.so: return 238 | sockdata= unicode() 239 | 240 | while True: 241 | try: 242 | # Receive server data 243 | sockdata,addr= self.so.recvfrom(data_size) 244 | sockdata = sockdata.decode(u'utf-8') 245 | except socket.error, emsg: 246 | print u'.', 247 | #print "Waiting for data on %d.............." % self.port 248 | if u'***identified***' in sockdata: 249 | print u"Client connected on %d.............." % self.port 250 | continue 251 | elif u'***shutdown***' in sockdata: 252 | print ((u"Server has stopped the race on %d. "+ 253 | u"You were in %d place.") % 254 | (self.port,self.S.d[u'racePos'])) 255 | self.shutdown() 256 | return 257 | elif u'***restart***' in sockdata: 258 | # What do I do here? 259 | print u"Server has restarted the race on %d." % self.port 260 | # I haven't actually caught the server doing this. 261 | self.shutdown() 262 | return 263 | elif not sockdata: # Empty? 264 | continue # Try again. 265 | else: 266 | self.S.parse_server_str(sockdata) 267 | if self.debug: 268 | sys.stderr.write(u"\x1b[2J\x1b[H") # Clear for steady output. 269 | print self.S 270 | break # Can now return from this function. 271 | 272 | def respond_to_server(self): 273 | if not self.so: return 274 | try: 275 | message = repr(self.R) 276 | self.so.sendto(message.encode(), (self.host, self.port)) 277 | except socket.error, emsg: 278 | print u"Error sending to server: %s Message %s" % (emsg[1],unicode(emsg[0])) 279 | sys.exit(-1) 280 | if self.debug: print self.R.fancyout() 281 | # Or use this for plain output: 282 | #if self.debug: print self.R 283 | 284 | def shutdown(self): 285 | if not self.so: return 286 | print (u"Race terminated or %d steps elapsed. Shutting down %d." 287 | % (self.maxSteps,self.port)) 288 | self.so.close() 289 | self.so = None 290 | #sys.exit() # No need for this really. 291 | 292 | class ServerState(object): 293 | u'''What the server is reporting right now.''' 294 | def __init__(self): 295 | self.servstr= unicode() 296 | self.d= dict() 297 | 298 | def parse_server_str(self, server_string): 299 | u'''Parse the server string.''' 300 | self.servstr= server_string.strip()[:-1] 301 | sslisted= self.servstr.strip().lstrip(u'(').rstrip(u')').split(u')(') 302 | for i in sslisted: 303 | w= i.split(u' ') 304 | self.d[w[0]]= destringify(w[1:]) 305 | 306 | def __repr__(self): 307 | # Comment the next line for raw output: 308 | return self.fancyout() 309 | # ------------------------------------- 310 | out= unicode() 311 | for k in sorted(self.d): 312 | strout= unicode(self.d[k]) 313 | if type(self.d[k]) is list: 314 | strlist= [unicode(i) for i in self.d[k]] 315 | strout= u', '.join(strlist) 316 | out+= u"%s: %s\n" % (k,strout) 317 | return out 318 | 319 | def fancyout(self): 320 | u'''Specialty output for useful ServerState monitoring.''' 321 | out= unicode() 322 | sensors= [ # Select the ones you want in the order you want them. 323 | #'curLapTime', 324 | #'lastLapTime', 325 | u'stucktimer', 326 | #'damage', 327 | #'focus', 328 | u'fuel', 329 | #'gear', 330 | u'distRaced', 331 | u'distFromStart', 332 | #'racePos', 333 | u'opponents', 334 | u'wheelSpinVel', 335 | u'z', 336 | u'speedZ', 337 | u'speedY', 338 | u'speedX', 339 | u'targetSpeed', 340 | u'rpm', 341 | u'skid', 342 | u'slip', 343 | u'track', 344 | u'trackPos', 345 | u'angle', 346 | ] 347 | 348 | #for k in sorted(self.d): # Use this to get all sensors. 349 | for k in sensors: 350 | if type(self.d.get(k)) is list: # Handle list type data. 351 | if k == u'track': # Nice display for track sensors. 352 | strout= unicode() 353 | # for tsensor in self.d['track']: 354 | # if tsensor >180: oc= '|' 355 | # elif tsensor > 80: oc= ';' 356 | # elif tsensor > 60: oc= ',' 357 | # elif tsensor > 39: oc= '.' 358 | # #elif tsensor > 13: oc= chr(int(tsensor)+65-13) 359 | # elif tsensor > 13: oc= chr(int(tsensor)+97-13) 360 | # elif tsensor > 3: oc= chr(int(tsensor)+48-3) 361 | # else: oc= '_' 362 | # strout+= oc 363 | # strout= ' -> '+strout[:9] +' ' + strout[9] + ' ' + strout[10:]+' <-' 364 | raw_tsens= [u'%.1f'%x for x in self.d[u'track']] 365 | strout+= u' '.join(raw_tsens[:9])+u'_'+raw_tsens[9]+u'_'+u' '.join(raw_tsens[10:]) 366 | elif k == u'opponents': # Nice display for opponent sensors. 367 | strout= unicode() 368 | for osensor in self.d[u'opponents']: 369 | if osensor >190: oc= u'_' 370 | elif osensor > 90: oc= u'.' 371 | elif osensor > 39: oc= unichr(int(osensor/2)+97-19) 372 | elif osensor > 13: oc= unichr(int(osensor)+65-13) 373 | elif osensor > 3: oc= unichr(int(osensor)+48-3) 374 | else: oc= u'?' 375 | strout+= oc 376 | strout= u' -> '+strout[:18] + u' ' + strout[18:]+u' <-' 377 | else: 378 | strlist= [unicode(i) for i in self.d[k]] 379 | strout= u', '.join(strlist) 380 | else: # Not a list type of value. 381 | if k == u'gear': # This is redundant now since it's part of RPM. 382 | gs= u'_._._._._._._._._' 383 | p= int(self.d[u'gear']) * 2 + 2 # Position 384 | l= u'%d'%self.d[u'gear'] # Label 385 | if l==u'-1': l= u'R' 386 | if l==u'0': l= u'N' 387 | strout= gs[:p]+ u'(%s)'%l + gs[p+3:] 388 | elif k == u'damage': 389 | strout= u'%6.0f %s' % (self.d[k], bargraph(self.d[k],0,10000,50,u'~')) 390 | elif k == u'fuel': 391 | strout= u'%6.0f %s' % (self.d[k], bargraph(self.d[k],0,100,50,u'f')) 392 | elif k == u'speedX': 393 | cx= u'X' 394 | if self.d[k]<0: cx= u'R' 395 | strout= u'%6.1f %s' % (self.d[k], bargraph(self.d[k],-30,300,50,cx)) 396 | elif k == u'speedY': # This gets reversed for display to make sense. 397 | strout= u'%6.1f %s' % (self.d[k], bargraph(self.d[k]*-1,-25,25,50,u'Y')) 398 | elif k == u'speedZ': 399 | strout= u'%6.1f %s' % (self.d[k], bargraph(self.d[k],-13,13,50,u'Z')) 400 | elif k == u'z': 401 | strout= u'%6.3f %s' % (self.d[k], bargraph(self.d[k],.3,.5,50,u'z')) 402 | elif k == u'trackPos': # This gets reversed for display to make sense. 403 | cx=u'<' 404 | if self.d[k]<0: cx= u'>' 405 | strout= u'%6.3f %s' % (self.d[k], bargraph(self.d[k]*-1,-1,1,50,cx)) 406 | elif k == u'stucktimer': 407 | if self.d[k]: 408 | strout= u'%3d %s' % (self.d[k], bargraph(self.d[k],0,300,50,u"'")) 409 | else: strout= u'Not stuck!' 410 | elif k == u'rpm': 411 | g= self.d[u'gear'] 412 | if g < 0: 413 | g= u'R' 414 | else: 415 | g= u'%1d'% g 416 | strout= bargraph(self.d[k],0,10000,50,g) 417 | elif k == u'angle': 418 | asyms= [ 419 | u" ! ", u".|' ", u"./' ", u"_.- ", u".-- ", u"..- ", 420 | u"--- ", u".__ ", u"-._ ", u"'-. ", u"'\. ", u"'|. ", 421 | u" | ", u" .|'", u" ./'", u" .-'", u" _.-", u" __.", 422 | u" ---", u" --.", u" -._", u" -..", u" '\.", u" '|." ] 423 | rad= self.d[k] 424 | deg= int(rad*180/PI) 425 | symno= int(.5+ (rad+PI) / (PI/12) ) 426 | symno= symno % (len(asyms)-1) 427 | strout= u'%5.2f %3d (%s)' % (rad,deg,asyms[symno]) 428 | elif k == u'skid': # A sensible interpretation of wheel spin. 429 | frontwheelradpersec= self.d[u'wheelSpinVel'][0] 430 | skid= 0 431 | if frontwheelradpersec: 432 | skid= .5555555555*self.d[u'speedX']/frontwheelradpersec - .66124 433 | strout= bargraph(skid,-.05,.4,50,u'*') 434 | elif k == u'slip': # A sensible interpretation of wheel spin. 435 | frontwheelradpersec= self.d[u'wheelSpinVel'][0] 436 | slip= 0 437 | if frontwheelradpersec: 438 | slip= ((self.d[u'wheelSpinVel'][2]+self.d[u'wheelSpinVel'][3]) - 439 | (self.d[u'wheelSpinVel'][0]+self.d[u'wheelSpinVel'][1])) 440 | strout= bargraph(slip,-5,150,50,u'@') 441 | else: 442 | strout= unicode(self.d[k]) 443 | out+= u"%s: %s\n" % (k,strout) 444 | return out 445 | 446 | class DriverAction(object): 447 | u'''What the driver is intending to do (i.e. send to the server). 448 | Composes something like this for the server: 449 | (accel 1)(brake 0)(gear 1)(steer 0)(clutch 0)(focus 0)(meta 0) or 450 | (accel 1)(brake 0)(gear 1)(steer 0)(clutch 0)(focus -90 -45 0 45 90)(meta 0)''' 451 | def __init__(self): 452 | self.actionstr= unicode() 453 | # "d" is for data dictionary. 454 | self.d= { u'accel':0.2, 455 | u'brake':0, 456 | u'clutch':0, 457 | u'gear':1, 458 | u'steer':0, 459 | u'focus':[-90,-45,0,45,90], 460 | u'meta':0 461 | } 462 | 463 | def clip_to_limits(self): 464 | u"""There pretty much is never a reason to send the server 465 | something like (steer 9483.323). This comes up all the time 466 | and it's probably just more sensible to always clip it than to 467 | worry about when to. The "clip" command is still a snakeoil 468 | utility function, but it should be used only for non standard 469 | things or non obvious limits (limit the steering to the left, 470 | for example). For normal limits, simply don't worry about it.""" 471 | self.d[u'steer']= clip(self.d[u'steer'], -1, 1) 472 | self.d[u'brake']= clip(self.d[u'brake'], 0, 1) 473 | self.d[u'accel']= clip(self.d[u'accel'], 0, 1) 474 | self.d[u'clutch']= clip(self.d[u'clutch'], 0, 1) 475 | if self.d[u'gear'] not in [-1, 0, 1, 2, 3, 4, 5, 6]: 476 | self.d[u'gear']= 0 477 | if self.d[u'meta'] not in [0,1]: 478 | self.d[u'meta']= 0 479 | if type(self.d[u'focus']) is not list or min(self.d[u'focus'])<-180 or max(self.d[u'focus'])>180: 480 | self.d[u'focus']= 0 481 | 482 | def __repr__(self): 483 | self.clip_to_limits() 484 | out= unicode() 485 | for k in self.d: 486 | out+= u'('+k+u' ' 487 | v= self.d[k] 488 | if not type(v) is list: 489 | out+= u'%.3f' % v 490 | else: 491 | out+= u' '.join([unicode(x) for x in v]) 492 | out+= u')' 493 | return out 494 | return out+u'\n' 495 | 496 | def fancyout(self): 497 | u'''Specialty output for useful monitoring of bot's effectors.''' 498 | out= unicode() 499 | od= self.d.copy() 500 | od.pop(u'gear',u'') # Not interesting. 501 | od.pop(u'meta',u'') # Not interesting. 502 | od.pop(u'focus',u'') # Not interesting. Yet. 503 | for k in sorted(od): 504 | if k == u'clutch' or k == u'brake' or k == u'accel': 505 | strout=u'' 506 | strout= u'%6.3f %s' % (od[k], bargraph(od[k],0,1,50,k[0].upper())) 507 | elif k == u'steer': # Reverse the graph to make sense. 508 | strout= u'%6.3f %s' % (od[k], bargraph(od[k]*-1,-1,1,50,u'S')) 509 | else: 510 | strout= unicode(od[k]) 511 | out+= u"%s: %s\n" % (k,strout) 512 | return out 513 | 514 | # == Misc Utility Functions 515 | def destringify(s): 516 | u'''makes a string into a value or a list of strings into a list of 517 | values (if possible)''' 518 | if not s: return s 519 | if type(s) is unicode: 520 | try: 521 | return float(s) 522 | except ValueError: 523 | print u"Could not find a value in %s" % s 524 | return s 525 | elif type(s) is list: 526 | if len(s) < 2: 527 | return destringify(s[0]) 528 | else: 529 | return [destringify(i) for i in s] 530 | 531 | def drive_example(c): 532 | u'''This is only an example. It will get around the track but the 533 | correct thing to do is write your own `drive()` function.''' 534 | S,R= c.S.d,c.R.d 535 | target_speed=1000 536 | 537 | # Steer To Corner 538 | R[u'steer']= S[u'angle']*10 / PI 539 | # Steer To Center 540 | R[u'steer']-= S[u'trackPos']*.10 541 | 542 | # Throttle Control 543 | if S[u'speedX'] < target_speed - (R[u'steer']*50): 544 | R[u'accel']+= .01 545 | else: 546 | R[u'accel']-= .01 547 | if S[u'speedX']<10: 548 | R[u'accel']+= 1/(S[u'speedX']+.1) 549 | 550 | # Traction Control System 551 | if ((S[u'wheelSpinVel'][2]+S[u'wheelSpinVel'][3]) - 552 | (S[u'wheelSpinVel'][0]+S[u'wheelSpinVel'][1]) > 5): 553 | R[u'accel']-= .2 554 | 555 | # Automatic Transmission 556 | R[u'gear']=1 557 | if S[u'speedX']>50: 558 | R[u'gear']=2 559 | if S[u'speedX']>80: 560 | R[u'gear']=3 561 | if S[u'speedX']>110: 562 | R[u'gear']=4 563 | if S[u'speedX']>140: 564 | R[u'gear']=5 565 | if S[u'speedX']>170: 566 | R[u'gear']=6 567 | return 568 | 569 | # ================ MAIN ================ 570 | if __name__ == u"__main__": 571 | C= Client(p=3101) 572 | for step in xrange(C.maxSteps,0,-1): 573 | C.get_servers_input() 574 | drive_example(C) 575 | C.respond_to_server() 576 | C.shutdown() 577 | --------------------------------------------------------------------------------