├── ActorNetwork.py
├── CriticNetwork.py
├── OU.py
├── README.md
├── ReplayBuffer.py
├── actormodel.h5
├── actormodel.json
├── autostart.sh
├── criticmodel.h5
├── criticmodel.json
├── ddpg.py
├── fast.gif
├── gym_torcs.py
└── snakeoil3_gym.py


/ActorNetwork.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | from keras.initializations import normal, identity
 4 | from keras.models import model_from_json
 5 | from keras.models import Sequential, Model
 6 | from keras.engine.training import collect_trainable_weights
 7 | from keras.layers import Dense, Flatten, Input, merge, Lambda
 8 | from keras.optimizers import Adam
 9 | import tensorflow as tf
10 | import keras.backend as K
11 | 
12 | HIDDEN1_UNITS = 300
13 | HIDDEN2_UNITS = 600
14 | 
15 | class ActorNetwork(object):
16 |     def __init__(self, sess, state_size, action_size, BATCH_SIZE, TAU, LEARNING_RATE):
17 |         self.sess = sess
18 |         self.BATCH_SIZE = BATCH_SIZE
19 |         self.TAU = TAU
20 |         self.LEARNING_RATE = LEARNING_RATE
21 | 
22 |         K.set_session(sess)
23 | 
24 |         #Now create the model
25 |         self.model , self.weights, self.state = self.create_actor_network(state_size, action_size)   
26 |         self.target_model, self.target_weights, self.target_state = self.create_actor_network(state_size, action_size) 
27 |         self.action_gradient = tf.placeholder(tf.float32,[None, action_size])
28 |         self.params_grad = tf.gradients(self.model.output, self.weights, -self.action_gradient)
29 |         grads = zip(self.params_grad, self.weights)
30 |         self.optimize = tf.train.AdamOptimizer(LEARNING_RATE).apply_gradients(grads)
31 |         self.sess.run(tf.initialize_all_variables())
32 | 
33 |     def train(self, states, action_grads):
34 |         self.sess.run(self.optimize, feed_dict={
35 |             self.state: states,
36 |             self.action_gradient: action_grads
37 |         })
38 | 
39 |     def target_train(self):
40 |         actor_weights = self.model.get_weights()
41 |         actor_target_weights = self.target_model.get_weights()
42 |         for i in xrange(len(actor_weights)):
43 |             actor_target_weights[i] = self.TAU * actor_weights[i] + (1 - self.TAU)* actor_target_weights[i]
44 |         self.target_model.set_weights(actor_target_weights)
45 | 
46 |     def create_actor_network(self, state_size,action_dim):
47 |         print("Now we build the model")
48 |         S = Input(shape=[state_size])   
49 |         h0 = Dense(HIDDEN1_UNITS, activation='relu')(S)
50 |         h1 = Dense(HIDDEN2_UNITS, activation='relu')(h0)
51 |         Steering = Dense(1,activation='tanh',init=lambda shape, name: normal(shape, scale=1e-4, name=name))(h1)  
52 |         Acceleration = Dense(1,activation='sigmoid',init=lambda shape, name: normal(shape, scale=1e-4, name=name))(h1)   
53 |         Brake = Dense(1,activation='sigmoid',init=lambda shape, name: normal(shape, scale=1e-4, name=name))(h1) 
54 |         V = merge([Steering,Acceleration,Brake],mode='concat')          
55 |         model = Model(input=S,output=V)
56 |         return model, model.trainable_weights, S
57 | 
58 | 


--------------------------------------------------------------------------------
/CriticNetwork.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | from keras.initializations import normal, identity
 4 | from keras.models import model_from_json, load_model
 5 | from keras.engine.training import collect_trainable_weights
 6 | from keras.models import Sequential
 7 | from keras.layers import Dense, Flatten, Input, merge, Lambda, Activation
 8 | from keras.models import Sequential, Model
 9 | from keras.optimizers import Adam
10 | import keras.backend as K
11 | import tensorflow as tf
12 | 
13 | HIDDEN1_UNITS = 300
14 | HIDDEN2_UNITS = 600
15 | 
16 | class CriticNetwork(object):
17 |     def __init__(self, sess, state_size, action_size, BATCH_SIZE, TAU, LEARNING_RATE):
18 |         self.sess = sess
19 |         self.BATCH_SIZE = BATCH_SIZE
20 |         self.TAU = TAU
21 |         self.LEARNING_RATE = LEARNING_RATE
22 |         self.action_size = action_size
23 |         
24 |         K.set_session(sess)
25 | 
26 |         #Now create the model
27 |         self.model, self.action, self.state = self.create_critic_network(state_size, action_size)  
28 |         self.target_model, self.target_action, self.target_state = self.create_critic_network(state_size, action_size)  
29 |         self.action_grads = tf.gradients(self.model.output, self.action)  #GRADIENTS for policy update
30 |         self.sess.run(tf.initialize_all_variables())
31 | 
32 |     def gradients(self, states, actions):
33 |         return self.sess.run(self.action_grads, feed_dict={
34 |             self.state: states,
35 |             self.action: actions
36 |         })[0]
37 | 
38 |     def target_train(self):
39 |         critic_weights = self.model.get_weights()
40 |         critic_target_weights = self.target_model.get_weights()
41 |         for i in xrange(len(critic_weights)):
42 |             critic_target_weights[i] = self.TAU * critic_weights[i] + (1 - self.TAU)* critic_target_weights[i]
43 |         self.target_model.set_weights(critic_target_weights)
44 | 
45 |     def create_critic_network(self, state_size,action_dim):
46 |         print("Now we build the model")
47 |         S = Input(shape=[state_size])  
48 |         A = Input(shape=[action_dim],name='action2')   
49 |         w1 = Dense(HIDDEN1_UNITS, activation='relu')(S)
50 |         a1 = Dense(HIDDEN2_UNITS, activation='linear')(A) 
51 |         h1 = Dense(HIDDEN2_UNITS, activation='linear')(w1)
52 |         h2 = merge([h1,a1],mode='sum')    
53 |         h3 = Dense(HIDDEN2_UNITS, activation='relu')(h2)
54 |         V = Dense(action_dim,activation='linear')(h3)   
55 |         model = Model(input=[S,A],output=V)
56 |         adam = Adam(lr=self.LEARNING_RATE)
57 |         model.compile(loss='mse', optimizer=adam)
58 |         return model, A, S 
59 | 


--------------------------------------------------------------------------------
/OU.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np 
3 | 
4 | class OU(object):
5 | 
6 |     def function(self, x, mu, theta, sigma):
7 |         return theta * (mu - x) + sigma * np.random.randn(1)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Using Keras and Deep Deterministic Policy Gradient to play TORCS
 2 | 
 3 | 300 lines of python code to demonstrate DDPG with Keras
 4 | 
 5 | Please read the following blog for details
 6 | 
 7 | https://yanpanlau.github.io/2016/10/11/Torcs-Keras.html
 8 | 
 9 | ![](fast.gif)
10 | 
11 | # Installation Dependencies:
12 | 
13 | * Python 2.7
14 | * Keras 1.1.0
15 | * Tensorflow r0.10
16 | * [gym_torcs](https://github.com/ugo-nama-kun/gym_torcs)
17 | 
18 | # How to Run?
19 | 
20 | ```
21 | git clone https://github.com/yanpanlau/DDPG-Keras-Torcs.git
22 | cd DDPG-Keras-Torcs
23 | cp *.* ~/gym_torcs
24 | cd ~/gym_torcs
25 | python ddpg.py 
26 | ```
27 | 
28 | (Change the flag **train_indicator**=1 in ddpg.py if you want to train the network)
29 | 


--------------------------------------------------------------------------------
/ReplayBuffer.py:
--------------------------------------------------------------------------------
 1 | from collections import deque
 2 | import random
 3 | 
 4 | class ReplayBuffer(object):
 5 | 
 6 |     def __init__(self, buffer_size):
 7 |         self.buffer_size = buffer_size
 8 |         self.num_experiences = 0
 9 |         self.buffer = deque()
10 | 
11 |     def getBatch(self, batch_size):
12 |         # Randomly sample batch_size examples
13 |         if self.num_experiences < batch_size:
14 |             return random.sample(self.buffer, self.num_experiences)
15 |         else:
16 |             return random.sample(self.buffer, batch_size)
17 | 
18 |     def size(self):
19 |         return self.buffer_size
20 | 
21 |     def add(self, state, action, reward, new_state, done):
22 |         experience = (state, action, reward, new_state, done)
23 |         if self.num_experiences < self.buffer_size:
24 |             self.buffer.append(experience)
25 |             self.num_experiences += 1
26 |         else:
27 |             self.buffer.popleft()
28 |             self.buffer.append(experience)
29 | 
30 |     def count(self):
31 |         # if buffer is full, return buffer size
32 |         # otherwise, return experience counter
33 |         return self.num_experiences
34 | 
35 |     def erase(self):
36 |         self.buffer = deque()
37 |         self.num_experiences = 0
38 | 


--------------------------------------------------------------------------------
/actormodel.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yanpanlau/DDPG-Keras-Torcs/455fadee1016ef15ef08817d98ec376d7e34b500/actormodel.h5


--------------------------------------------------------------------------------
/actormodel.json:
--------------------------------------------------------------------------------
1 | "{\"class_name\": \"Model\", \"keras_version\": \"1.1.0\", \"config\": {\"layers\": [{\"class_name\": \"InputLayer\", \"config\": {\"batch_input_shape\": [null, 29], \"input_dtype\": \"float32\", \"sparse\": false, \"name\": \"input_1\"}, \"inbound_nodes\": [], \"name\": \"input_1\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_1\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"relu\", \"output_dim\": 300}, \"inbound_nodes\": [[[\"input_1\", 0, 0]]], \"name\": \"dense_1\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_2\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"relu\", \"output_dim\": 600}, \"inbound_nodes\": [[[\"dense_1\", 0, 0]]], \"name\": \"dense_2\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_3\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"<lambda>\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"tanh\", \"output_dim\": 1}, \"inbound_nodes\": [[[\"dense_2\", 0, 0]]], \"name\": \"dense_3\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_4\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"<lambda>\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"sigmoid\", \"output_dim\": 1}, \"inbound_nodes\": [[[\"dense_2\", 0, 0]]], \"name\": \"dense_4\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_5\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"<lambda>\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"sigmoid\", \"output_dim\": 1}, \"inbound_nodes\": [[[\"dense_2\", 0, 0]]], \"name\": \"dense_5\"}, {\"class_name\": \"Merge\", \"config\": {\"name\": \"merge_1\", \"concat_axis\": -1, \"mode_type\": \"raw\", \"dot_axes\": -1, \"mode\": \"concat\", \"output_shape\": null, \"output_shape_type\": \"raw\"}, \"inbound_nodes\": [[[\"dense_3\", 0, 0], [\"dense_4\", 0, 0], [\"dense_5\", 0, 0]]], \"name\": \"merge_1\"}], \"input_layers\": [[\"input_1\", 0, 0]], \"output_layers\": [[\"merge_1\", 0, 0]], \"name\": \"model_1\"}}"


--------------------------------------------------------------------------------
/autostart.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | xte 'key Return'
 3 | xte 'usleep 100000'
 4 | xte 'key Return'
 5 | xte 'usleep 100000'
 6 | xte 'key Up'
 7 | xte 'usleep 100000'
 8 | xte 'key Up'
 9 | xte 'usleep 100000'
10 | xte 'key Return'
11 | xte 'usleep 100000'
12 | xte 'key Return'
13 | 


--------------------------------------------------------------------------------
/criticmodel.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yanpanlau/DDPG-Keras-Torcs/455fadee1016ef15ef08817d98ec376d7e34b500/criticmodel.h5


--------------------------------------------------------------------------------
/criticmodel.json:
--------------------------------------------------------------------------------
1 | "{\"class_name\": \"Model\", \"keras_version\": \"1.1.0\", \"config\": {\"layers\": [{\"class_name\": \"InputLayer\", \"config\": {\"batch_input_shape\": [null, 29], \"input_dtype\": \"float32\", \"sparse\": false, \"name\": \"input_3\"}, \"inbound_nodes\": [], \"name\": \"input_3\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_11\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"relu\", \"output_dim\": 300}, \"inbound_nodes\": [[[\"input_3\", 0, 0]]], \"name\": \"dense_11\"}, {\"class_name\": \"InputLayer\", \"config\": {\"batch_input_shape\": [null, 3], \"input_dtype\": \"float32\", \"sparse\": false, \"name\": \"action2\"}, \"inbound_nodes\": [], \"name\": \"action2\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_13\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"linear\", \"output_dim\": 600}, \"inbound_nodes\": [[[\"dense_11\", 0, 0]]], \"name\": \"dense_13\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_12\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"linear\", \"output_dim\": 600}, \"inbound_nodes\": [[[\"action2\", 0, 0]]], \"name\": \"dense_12\"}, {\"class_name\": \"Merge\", \"config\": {\"name\": \"merge_3\", \"concat_axis\": -1, \"mode_type\": \"raw\", \"dot_axes\": -1, \"mode\": \"sum\", \"output_shape\": null, \"output_shape_type\": \"raw\"}, \"inbound_nodes\": [[[\"dense_13\", 0, 0], [\"dense_12\", 0, 0]]], \"name\": \"merge_3\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_14\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"relu\", \"output_dim\": 600}, \"inbound_nodes\": [[[\"merge_3\", 0, 0]]], \"name\": \"dense_14\"}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_15\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": null, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"linear\", \"output_dim\": 3}, \"inbound_nodes\": [[[\"dense_14\", 0, 0]]], \"name\": \"dense_15\"}], \"input_layers\": [[\"input_3\", 0, 0], [\"action2\", 0, 0]], \"output_layers\": [[\"dense_15\", 0, 0]], \"name\": \"model_3\"}}"


--------------------------------------------------------------------------------
/ddpg.py:
--------------------------------------------------------------------------------
  1 | from gym_torcs import TorcsEnv
  2 | import numpy as np
  3 | import random
  4 | import argparse
  5 | from keras.models import model_from_json, Model
  6 | from keras.models import Sequential
  7 | from keras.layers.core import Dense, Dropout, Activation, Flatten
  8 | from keras.optimizers import Adam
  9 | import tensorflow as tf
 10 | from keras.engine.training import collect_trainable_weights
 11 | import json
 12 | 
 13 | from ReplayBuffer import ReplayBuffer
 14 | from ActorNetwork import ActorNetwork
 15 | from CriticNetwork import CriticNetwork
 16 | from OU import OU
 17 | import timeit
 18 | 
 19 | OU = OU()       #Ornstein-Uhlenbeck Process
 20 | 
 21 | def playGame(train_indicator=0):    #1 means Train, 0 means simply Run
 22 |     BUFFER_SIZE = 100000
 23 |     BATCH_SIZE = 32
 24 |     GAMMA = 0.99
 25 |     TAU = 0.001     #Target Network HyperParameters
 26 |     LRA = 0.0001    #Learning rate for Actor
 27 |     LRC = 0.001     #Lerning rate for Critic
 28 | 
 29 |     action_dim = 3  #Steering/Acceleration/Brake
 30 |     state_dim = 29  #of sensors input
 31 | 
 32 |     np.random.seed(1337)
 33 | 
 34 |     vision = False
 35 | 
 36 |     EXPLORE = 100000.
 37 |     episode_count = 2000
 38 |     max_steps = 100000
 39 |     reward = 0
 40 |     done = False
 41 |     step = 0
 42 |     epsilon = 1
 43 |     indicator = 0
 44 | 
 45 |     #Tensorflow GPU optimization
 46 |     config = tf.ConfigProto()
 47 |     config.gpu_options.allow_growth = True
 48 |     sess = tf.Session(config=config)
 49 |     from keras import backend as K
 50 |     K.set_session(sess)
 51 | 
 52 |     actor = ActorNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRA)
 53 |     critic = CriticNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRC)
 54 |     buff = ReplayBuffer(BUFFER_SIZE)    #Create replay buffer
 55 | 
 56 |     # Generate a Torcs environment
 57 |     env = TorcsEnv(vision=vision, throttle=True,gear_change=False)
 58 | 
 59 |     #Now load the weight
 60 |     print("Now we load the weight")
 61 |     try:
 62 |         actor.model.load_weights("actormodel.h5")
 63 |         critic.model.load_weights("criticmodel.h5")
 64 |         actor.target_model.load_weights("actormodel.h5")
 65 |         critic.target_model.load_weights("criticmodel.h5")
 66 |         print("Weight load successfully")
 67 |     except:
 68 |         print("Cannot find the weight")
 69 | 
 70 |     print("TORCS Experiment Start.")
 71 |     for i in range(episode_count):
 72 | 
 73 |         print("Episode : " + str(i) + " Replay Buffer " + str(buff.count()))
 74 | 
 75 |         if np.mod(i, 3) == 0:
 76 |             ob = env.reset(relaunch=True)   #relaunch TORCS every 3 episode because of the memory leak error
 77 |         else:
 78 |             ob = env.reset()
 79 | 
 80 |         s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))
 81 |      
 82 |         total_reward = 0.
 83 |         for j in range(max_steps):
 84 |             loss = 0 
 85 |             epsilon -= 1.0 / EXPLORE
 86 |             a_t = np.zeros([1,action_dim])
 87 |             noise_t = np.zeros([1,action_dim])
 88 |             
 89 |             a_t_original = actor.model.predict(s_t.reshape(1, s_t.shape[0]))
 90 |             noise_t[0][0] = train_indicator * max(epsilon, 0) * OU.function(a_t_original[0][0],  0.0 , 0.60, 0.30)
 91 |             noise_t[0][1] = train_indicator * max(epsilon, 0) * OU.function(a_t_original[0][1],  0.5 , 1.00, 0.10)
 92 |             noise_t[0][2] = train_indicator * max(epsilon, 0) * OU.function(a_t_original[0][2], -0.1 , 1.00, 0.05)
 93 | 
 94 |             #The following code do the stochastic brake
 95 |             #if random.random() <= 0.1:
 96 |             #    print("********Now we apply the brake***********")
 97 |             #    noise_t[0][2] = train_indicator * max(epsilon, 0) * OU.function(a_t_original[0][2],  0.2 , 1.00, 0.10)
 98 | 
 99 |             a_t[0][0] = a_t_original[0][0] + noise_t[0][0]
100 |             a_t[0][1] = a_t_original[0][1] + noise_t[0][1]
101 |             a_t[0][2] = a_t_original[0][2] + noise_t[0][2]
102 | 
103 |             ob, r_t, done, info = env.step(a_t[0])
104 | 
105 |             s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm))
106 |         
107 |             buff.add(s_t, a_t[0], r_t, s_t1, done)      #Add replay buffer
108 |             
109 |             #Do the batch update
110 |             batch = buff.getBatch(BATCH_SIZE)
111 |             states = np.asarray([e[0] for e in batch])
112 |             actions = np.asarray([e[1] for e in batch])
113 |             rewards = np.asarray([e[2] for e in batch])
114 |             new_states = np.asarray([e[3] for e in batch])
115 |             dones = np.asarray([e[4] for e in batch])
116 |             y_t = np.asarray([e[1] for e in batch])
117 | 
118 |             target_q_values = critic.target_model.predict([new_states, actor.target_model.predict(new_states)])  
119 |            
120 |             for k in range(len(batch)):
121 |                 if dones[k]:
122 |                     y_t[k] = rewards[k]
123 |                 else:
124 |                     y_t[k] = rewards[k] + GAMMA*target_q_values[k]
125 |        
126 |             if (train_indicator):
127 |                 loss += critic.model.train_on_batch([states,actions], y_t) 
128 |                 a_for_grad = actor.model.predict(states)
129 |                 grads = critic.gradients(states, a_for_grad)
130 |                 actor.train(states, grads)
131 |                 actor.target_train()
132 |                 critic.target_train()
133 | 
134 |             total_reward += r_t
135 |             s_t = s_t1
136 |         
137 |             print("Episode", i, "Step", step, "Action", a_t, "Reward", r_t, "Loss", loss)
138 |         
139 |             step += 1
140 |             if done:
141 |                 break
142 | 
143 |         if np.mod(i, 3) == 0:
144 |             if (train_indicator):
145 |                 print("Now we save model")
146 |                 actor.model.save_weights("actormodel.h5", overwrite=True)
147 |                 with open("actormodel.json", "w") as outfile:
148 |                     json.dump(actor.model.to_json(), outfile)
149 | 
150 |                 critic.model.save_weights("criticmodel.h5", overwrite=True)
151 |                 with open("criticmodel.json", "w") as outfile:
152 |                     json.dump(critic.model.to_json(), outfile)
153 | 
154 |         print("TOTAL REWARD @ " + str(i) +"-th Episode  : Reward " + str(total_reward))
155 |         print("Total Step: " + str(step))
156 |         print("")
157 | 
158 |     env.end()  # This is for shutting down TORCS
159 |     print("Finish.")
160 | 
161 | if __name__ == "__main__":
162 |     playGame()
163 | 


--------------------------------------------------------------------------------
/fast.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yanpanlau/DDPG-Keras-Torcs/455fadee1016ef15ef08817d98ec376d7e34b500/fast.gif


--------------------------------------------------------------------------------
/gym_torcs.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym import spaces
  3 | import numpy as np
  4 | # from os import path
  5 | import snakeoil3_gym as snakeoil3
  6 | import numpy as np
  7 | import copy
  8 | import collections as col
  9 | import os
 10 | import time
 11 | 
 12 | 
 13 | class TorcsEnv:
 14 |     terminal_judge_start = 100  # If after 100 timestep still no progress, terminated
 15 |     termination_limit_progress = 5  # [km/h], episode terminates if car is running slower than this limit
 16 |     default_speed = 50
 17 | 
 18 |     initial_reset = True
 19 | 
 20 |     def __init__(self, vision=False, throttle=False, gear_change=False):
 21 |         self.vision = vision
 22 |         self.throttle = throttle
 23 |         self.gear_change = gear_change
 24 | 
 25 |         self.initial_run = True
 26 | 
 27 |         ##print("launch torcs")
 28 |         os.system('pkill torcs')
 29 |         time.sleep(0.5)
 30 |         if self.vision is True:
 31 |             os.system('torcs -nofuel -nodamage -nolaptime -vision &')
 32 |         else:
 33 |             os.system('torcs -nofuel -nolaptime &')
 34 |         time.sleep(0.5)
 35 |         os.system('sh autostart.sh')
 36 |         time.sleep(0.5)
 37 | 
 38 |         """
 39 |         # Modify here if you use multiple tracks in the environment
 40 |         self.client = snakeoil3.Client(p=3101, vision=self.vision)  # Open new UDP in vtorcs
 41 |         self.client.MAX_STEPS = np.inf
 42 | 
 43 |         client = self.client
 44 |         client.get_servers_input()  # Get the initial input from torcs
 45 | 
 46 |         obs = client.S.d  # Get the current full-observation from torcs
 47 |         """
 48 |         if throttle is False:
 49 |             self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,))
 50 |         else:
 51 |             self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2,))
 52 | 
 53 |         if vision is False:
 54 |             high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf])
 55 |             low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf])
 56 |             self.observation_space = spaces.Box(low=low, high=high)
 57 |         else:
 58 |             high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255])
 59 |             low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0])
 60 |             self.observation_space = spaces.Box(low=low, high=high)
 61 | 
 62 |     def step(self, u):
 63 |        #print("Step")
 64 |         # convert thisAction to the actual torcs actionstr
 65 |         client = self.client
 66 | 
 67 |         this_action = self.agent_to_torcs(u)
 68 | 
 69 |         # Apply Action
 70 |         action_torcs = client.R.d
 71 | 
 72 |         # Steering
 73 |         action_torcs['steer'] = this_action['steer']  # in [-1, 1]
 74 | 
 75 |         #  Simple Autnmatic Throttle Control by Snakeoil
 76 |         if self.throttle is False:
 77 |             target_speed = self.default_speed
 78 |             if client.S.d['speedX'] < target_speed - (client.R.d['steer']*50):
 79 |                 client.R.d['accel'] += .01
 80 |             else:
 81 |                 client.R.d['accel'] -= .01
 82 | 
 83 |             if client.R.d['accel'] > 0.2:
 84 |                 client.R.d['accel'] = 0.2
 85 | 
 86 |             if client.S.d['speedX'] < 10:
 87 |                 client.R.d['accel'] += 1/(client.S.d['speedX']+.1)
 88 | 
 89 |             # Traction Control System
 90 |             if ((client.S.d['wheelSpinVel'][2]+client.S.d['wheelSpinVel'][3]) -
 91 |                (client.S.d['wheelSpinVel'][0]+client.S.d['wheelSpinVel'][1]) > 5):
 92 |                 action_torcs['accel'] -= .2
 93 |         else:
 94 |             action_torcs['accel'] = this_action['accel']
 95 |             action_torcs['brake'] = this_action['brake']
 96 | 
 97 |         #  Automatic Gear Change by Snakeoil
 98 |         if self.gear_change is True:
 99 |             action_torcs['gear'] = this_action['gear']
100 |         else:
101 |             #  Automatic Gear Change by Snakeoil is possible
102 |             action_torcs['gear'] = 1
103 |             if self.throttle:
104 |                 if client.S.d['speedX'] > 50:
105 |                     action_torcs['gear'] = 2
106 |                 if client.S.d['speedX'] > 80:
107 |                     action_torcs['gear'] = 3
108 |                 if client.S.d['speedX'] > 110:
109 |                     action_torcs['gear'] = 4
110 |                 if client.S.d['speedX'] > 140:
111 |                     action_torcs['gear'] = 5
112 |                 if client.S.d['speedX'] > 170:
113 |                     action_torcs['gear'] = 6
114 |         # Save the privious full-obs from torcs for the reward calculation
115 |         obs_pre = copy.deepcopy(client.S.d)
116 | 
117 |         # One-Step Dynamics Update #################################
118 |         # Apply the Agent's action into torcs
119 |         client.respond_to_server()
120 |         # Get the response of TORCS
121 |         client.get_servers_input()
122 | 
123 |         # Get the current full-observation from torcs
124 |         obs = client.S.d
125 | 
126 |         # Make an obsevation from a raw observation vector from TORCS
127 |         self.observation = self.make_observaton(obs)
128 | 
129 |         # Reward setting Here #######################################
130 |         # direction-dependent positive reward
131 |         track = np.array(obs['track'])
132 |         trackPos = np.array(obs['trackPos'])
133 |         sp = np.array(obs['speedX'])
134 |         damage = np.array(obs['damage'])
135 |         rpm = np.array(obs['rpm'])
136 | 
137 |         progress = sp*np.cos(obs['angle']) - np.abs(sp*np.sin(obs['angle'])) - sp * np.abs(obs['trackPos'])
138 |         reward = progress
139 | 
140 |         # collision detection
141 |         if obs['damage'] - obs_pre['damage'] > 0:
142 |             reward = -1
143 | 
144 |         # Termination judgement #########################
145 |         episode_terminate = False
146 |         #if (abs(track.any()) > 1 or abs(trackPos) > 1):  # Episode is terminated if the car is out of track
147 |         #    reward = -200
148 |         #    episode_terminate = True
149 |         #    client.R.d['meta'] = True
150 | 
151 |         #if self.terminal_judge_start < self.time_step: # Episode terminates if the progress of agent is small
152 |         #    if progress < self.termination_limit_progress:
153 |         #        print("No progress")
154 |         #        episode_terminate = True
155 |         #        client.R.d['meta'] = True
156 | 
157 |         if np.cos(obs['angle']) < 0: # Episode is terminated if the agent runs backward
158 |             episode_terminate = True
159 |             client.R.d['meta'] = True
160 | 
161 | 
162 |         if client.R.d['meta'] is True: # Send a reset signal
163 |             self.initial_run = False
164 |             client.respond_to_server()
165 | 
166 |         self.time_step += 1
167 | 
168 |         return self.get_obs(), reward, client.R.d['meta'], {}
169 | 
170 |     def reset(self, relaunch=False):
171 |         #print("Reset")
172 | 
173 |         self.time_step = 0
174 | 
175 |         if self.initial_reset is not True:
176 |             self.client.R.d['meta'] = True
177 |             self.client.respond_to_server()
178 | 
179 |             ## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug!
180 |             if relaunch is True:
181 |                 self.reset_torcs()
182 |                 print("### TORCS is RELAUNCHED ###")
183 | 
184 |         # Modify here if you use multiple tracks in the environment
185 |         self.client = snakeoil3.Client(p=3101, vision=self.vision)  # Open new UDP in vtorcs
186 |         self.client.MAX_STEPS = np.inf
187 | 
188 |         client = self.client
189 |         client.get_servers_input()  # Get the initial input from torcs
190 | 
191 |         obs = client.S.d  # Get the current full-observation from torcs
192 |         self.observation = self.make_observaton(obs)
193 | 
194 |         self.last_u = None
195 | 
196 |         self.initial_reset = False
197 |         return self.get_obs()
198 | 
199 |     def end(self):
200 |         os.system('pkill torcs')
201 | 
202 |     def get_obs(self):
203 |         return self.observation
204 | 
205 |     def reset_torcs(self):
206 |        #print("relaunch torcs")
207 |         os.system('pkill torcs')
208 |         time.sleep(0.5)
209 |         if self.vision is True:
210 |             os.system('torcs -nofuel -nodamage -nolaptime -vision &')
211 |         else:
212 |             os.system('torcs -nofuel -nolaptime &')
213 |         time.sleep(0.5)
214 |         os.system('sh autostart.sh')
215 |         time.sleep(0.5)
216 | 
217 |     def agent_to_torcs(self, u):
218 |         torcs_action = {'steer': u[0]}
219 | 
220 |         if self.throttle is True:  # throttle action is enabled
221 |             torcs_action.update({'accel': u[1]})
222 |             torcs_action.update({'brake': u[2]})
223 | 
224 |         if self.gear_change is True: # gear change action is enabled
225 |             torcs_action.update({'gear': int(u[3])})
226 | 
227 |         return torcs_action
228 | 
229 | 
230 |     def obs_vision_to_image_rgb(self, obs_image_vec):
231 |         image_vec =  obs_image_vec
232 |         r = image_vec[0:len(image_vec):3]
233 |         g = image_vec[1:len(image_vec):3]
234 |         b = image_vec[2:len(image_vec):3]
235 | 
236 |         sz = (64, 64)
237 |         r = np.array(r).reshape(sz)
238 |         g = np.array(g).reshape(sz)
239 |         b = np.array(b).reshape(sz)
240 |         return np.array([r, g, b], dtype=np.uint8)
241 | 
242 |     def make_observaton(self, raw_obs):
243 |         if self.vision is False:
244 |             names = ['focus',
245 |                      'speedX', 'speedY', 'speedZ', 'angle', 'damage',
246 |                      'opponents',
247 |                      'rpm',
248 |                      'track', 
249 |                      'trackPos',
250 |                      'wheelSpinVel']
251 |             Observation = col.namedtuple('Observaion', names)
252 |             return Observation(focus=np.array(raw_obs['focus'], dtype=np.float32)/200.,
253 |                                speedX=np.array(raw_obs['speedX'], dtype=np.float32)/300.0,
254 |                                speedY=np.array(raw_obs['speedY'], dtype=np.float32)/300.0,
255 |                                speedZ=np.array(raw_obs['speedZ'], dtype=np.float32)/300.0,
256 |                                angle=np.array(raw_obs['angle'], dtype=np.float32)/3.1416,
257 |                                damage=np.array(raw_obs['damage'], dtype=np.float32),
258 |                                opponents=np.array(raw_obs['opponents'], dtype=np.float32)/200.,
259 |                                rpm=np.array(raw_obs['rpm'], dtype=np.float32)/10000,
260 |                                track=np.array(raw_obs['track'], dtype=np.float32)/200.,
261 |                                trackPos=np.array(raw_obs['trackPos'], dtype=np.float32)/1.,
262 |                                wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=np.float32))
263 |         else:
264 |             names = ['focus',
265 |                      'speedX', 'speedY', 'speedZ', 'angle',
266 |                      'opponents',
267 |                      'rpm',
268 |                      'track',
269 |                      'trackPos',
270 |                      'wheelSpinVel',
271 |                      'img']
272 |             Observation = col.namedtuple('Observaion', names)
273 | 
274 |             # Get RGB from observation
275 |             image_rgb = self.obs_vision_to_image_rgb(raw_obs[names[8]])
276 | 
277 |             return Observation(focus=np.array(raw_obs['focus'], dtype=np.float32)/200.,
278 |                                speedX=np.array(raw_obs['speedX'], dtype=np.float32)/self.default_speed,
279 |                                speedY=np.array(raw_obs['speedY'], dtype=np.float32)/self.default_speed,
280 |                                speedZ=np.array(raw_obs['speedZ'], dtype=np.float32)/self.default_speed,
281 |                                opponents=np.array(raw_obs['opponents'], dtype=np.float32)/200.,
282 |                                rpm=np.array(raw_obs['rpm'], dtype=np.float32),
283 |                                track=np.array(raw_obs['track'], dtype=np.float32)/200.,
284 |                                trackPos=np.array(raw_obs['trackPos'], dtype=np.float32)/1.,
285 |                                wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=np.float32),
286 |                                img=image_rgb)
287 | 


--------------------------------------------------------------------------------
/snakeoil3_gym.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # snakeoil.py
  3 | # Chris X Edwards <snakeoil@xed.ch>
  4 | # Snake Oil is a Python library for interfacing with a TORCS
  5 | # race car simulator which has been patched with the server
  6 | # extentions used in the Simulated Car Racing competitions.
  7 | # http://scr.geccocompetitions.com/
  8 | #
  9 | # To use it, you must import it and create a "drive()" function.
 10 | # This will take care of option handling and server connecting, etc.
 11 | # To see how to write your own client do something like this which is
 12 | # a complete working client:
 13 | # /-----------------------------------------------\
 14 | # |#!/usr/bin/python                              |
 15 | # |import snakeoil                                |
 16 | # |if __name__ == "__main__":                     |
 17 | # |    C= snakeoil.Client()                       |
 18 | # |    for step in xrange(C.maxSteps,0,-1):       |
 19 | # |        C.get_servers_input()                  |
 20 | # |        snakeoil.drive_example(C)              |
 21 | # |        C.respond_to_server()                  |
 22 | # |    C.shutdown()                               |
 23 | # \-----------------------------------------------/
 24 | # This should then be a full featured client. The next step is to
 25 | # replace 'snakeoil.drive_example()' with your own. There is a
 26 | # dictionary which holds various option values (see `default_options`
 27 | # variable for all the details) but you probably only need a few
 28 | # things from it. Mainly the `trackname` and `stage` are important
 29 | # when developing a strategic bot.
 30 | #
 31 | # This dictionary also contains a ServerState object
 32 | # (key=S) and a DriverAction object (key=R for response). This allows
 33 | # you to get at all the information sent by the server and to easily
 34 | # formulate your reply. These objects contain a member dictionary "d"
 35 | # (for data dictionary) which contain key value pairs based on the
 36 | # server's syntax. Therefore, you can read the following:
 37 | #    angle, curLapTime, damage, distFromStart, distRaced, focus,
 38 | #    fuel, gear, lastLapTime, opponents, racePos, rpm,
 39 | #    speedX, speedY, speedZ, track, trackPos, wheelSpinVel, z
 40 | # The syntax specifically would be something like:
 41 | #    X= o[S.d['tracPos']]
 42 | # And you can set the following:
 43 | #    accel, brake, clutch, gear, steer, focus, meta
 44 | # The syntax is:
 45 | #     o[R.d['steer']]= X
 46 | # Note that it is 'steer' and not 'steering' as described in the manual!
 47 | # All values should be sensible for their type, including lists being lists.
 48 | # See the SCR manual or http://xed.ch/help/torcs.html for details.
 49 | #
 50 | # If you just run the snakeoil.py base library itself it will implement a
 51 | # serviceable client with a demonstration drive function that is
 52 | # sufficient for getting around most tracks.
 53 | # Try `snakeoil.py --help` to get started.
 54 | 
 55 | # for Python3-based torcs python robot client
 56 | from __future__ import division
 57 | from __future__ import absolute_import
 58 | import socket
 59 | import sys
 60 | import getopt
 61 | import os
 62 | import time
 63 | PI= 3.14159265359
 64 | 
 65 | data_size = 2**17
 66 | 
 67 | # Initialize help messages
 68 | ophelp=  u'Options:\n'
 69 | ophelp+= u' --host, -H <host>    TORCS server host. [localhost]\n'
 70 | ophelp+= u' --port, -p <port>    TORCS port. [3001]\n'
 71 | ophelp+= u' --id, -i <id>        ID for server. [SCR]\n'
 72 | ophelp+= u' --steps, -m <#>      Maximum simulation steps. 1 sec ~ 50 steps. [100000]\n'
 73 | ophelp+= u' --episodes, -e <#>   Maximum learning episodes. [1]\n'
 74 | ophelp+= u' --track, -t <track>  Your name for this track. Used for learning. [unknown]\n'
 75 | ophelp+= u' --stage, -s <#>      0=warm up, 1=qualifying, 2=race, 3=unknown. [3]\n'
 76 | ophelp+= u' --debug, -d          Output full telemetry.\n'
 77 | ophelp+= u' --help, -h           Show this help.\n'
 78 | ophelp+= u' --version, -v        Show current version.'
 79 | usage= u'Usage: %s [ophelp [optargs]] \n' % sys.argv[0]
 80 | usage= usage + ophelp
 81 | version= u"20130505-2"
 82 | 
 83 | def clip(v,lo,hi):
 84 |     if v<lo: return lo
 85 |     elif v>hi: return hi
 86 |     else: return v
 87 | 
 88 | def bargraph(x,mn,mx,w,c=u'X'):
 89 |     u'''Draws a simple asciiart bar graph. Very handy for
 90 |     visualizing what's going on with the data.
 91 |     x= Value from sensor, mn= minimum plottable value,
 92 |     mx= maximum plottable value, w= width of plot in chars,
 93 |     c= the character to plot with.'''
 94 |     if not w: return u'' # No width!
 95 |     if x<mn: x= mn      # Clip to bounds.
 96 |     if x>mx: x= mx      # Clip to bounds.
 97 |     tx= mx-mn # Total real units possible to show on graph.
 98 |     if tx<=0: return u'backwards' # Stupid bounds.
 99 |     upw= tx/float(w) # X Units per output char width.
100 |     if upw<=0: return u'what?' # Don't let this happen.
101 |     negpu, pospu, negnonpu, posnonpu= 0,0,0,0
102 |     if mn < 0: # Then there is a negative part to graph.
103 |         if x < 0: # And the plot is on the negative side.
104 |             negpu= -x + min(0,mx)
105 |             negnonpu= -mn + x
106 |         else: # Plot is on pos. Neg side is empty.
107 |             negnonpu= -mn + min(0,mx) # But still show some empty neg.
108 |     if mx > 0: # There is a positive part to the graph
109 |         if x > 0: # And the plot is on the positive side.
110 |             pospu= x - max(0,mn)
111 |             posnonpu= mx - x
112 |         else: # Plot is on neg. Pos side is empty.
113 |             posnonpu= mx - max(0,mn) # But still show some empty pos.
114 |     nnc= int(negnonpu/upw)*u'-'
115 |     npc= int(negpu/upw)*c
116 |     ppc= int(pospu/upw)*c
117 |     pnc= int(posnonpu/upw)*u'_'
118 |     return u'[%s]' % (nnc+npc+ppc+pnc)
119 | 
120 | class Client(object):
121 |     def __init__(self,H=None,p=None,i=None,e=None,t=None,s=None,d=None,vision=False):
122 |         # If you don't like the option defaults,  change them here.
123 |         self.vision = vision
124 | 
125 |         self.host= u'localhost'
126 |         self.port= 3001
127 |         self.sid= u'SCR'
128 |         self.maxEpisodes=1 # "Maximum number of learning episodes to perform"
129 |         self.trackname= u'unknown'
130 |         self.stage= 3 # 0=Warm-up, 1=Qualifying 2=Race, 3=unknown <Default=3>
131 |         self.debug= False
132 |         self.maxSteps= 100000  # 50steps/second
133 |         self.parse_the_command_line()
134 |         if H: self.host= H
135 |         if p: self.port= p
136 |         if i: self.sid= i
137 |         if e: self.maxEpisodes= e
138 |         if t: self.trackname= t
139 |         if s: self.stage= s
140 |         if d: self.debug= d
141 |         self.S= ServerState()
142 |         self.R= DriverAction()
143 |         self.setup_connection()
144 | 
145 |     def setup_connection(self):
146 |         # == Set Up UDP Socket ==
147 |         try:
148 |             self.so= socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
149 |         except socket.error, emsg:
150 |             print u'Error: Could not create socket...'
151 |             sys.exit(-1)
152 |         # == Initialize Connection To Server ==
153 |         self.so.settimeout(1)
154 | 
155 |         n_fail = 5
156 |         while True:
157 |             # This string establishes track sensor angles! You can customize them.
158 |             #a= "-90 -75 -60 -45 -30 -20 -15 -10 -5 0 5 10 15 20 30 45 60 75 90"
159 |             # xed- Going to try something a bit more aggressive...
160 |             a= u"-45 -19 -12 -7 -4 -2.5 -1.7 -1 -.5 0 .5 1 1.7 2.5 4 7 12 19 45"
161 | 
162 |             initmsg=u'%s(init %s)' % (self.sid,a)
163 | 
164 |             try:
165 |                 self.so.sendto(initmsg.encode(), (self.host, self.port))
166 |             except socket.error, emsg:
167 |                 sys.exit(-1)
168 |             sockdata= unicode()
169 |             try:
170 |                 sockdata,addr= self.so.recvfrom(data_size)
171 |                 sockdata = sockdata.decode(u'utf-8')
172 |             except socket.error, emsg:
173 |                 print u"Waiting for server on %d............" % self.port
174 |                 print u"Count Down : " + unicode(n_fail)
175 |                 if n_fail < 0:
176 |                     print u"relaunch torcs"
177 |                     os.system(u'pkill torcs')
178 |                     time.sleep(1.0)
179 |                     if self.vision is False:
180 |                         os.system(u'torcs -nofuel -nodamage -nolaptime &')
181 |                     else:
182 |                         os.system(u'torcs -nofuel -nodamage -nolaptime -vision &')
183 | 
184 |                     time.sleep(1.0)
185 |                     os.system(u'sh autostart.sh')
186 |                     n_fail = 5
187 |                 n_fail -= 1
188 | 
189 |             identify = u'***identified***'
190 |             if identify in sockdata:
191 |                 print u"Client connected on %d.............." % self.port
192 |                 break
193 | 
194 |     def parse_the_command_line(self):
195 |         try:
196 |             (opts, args) = getopt.getopt(sys.argv[1:], u'H:p:i:m:e:t:s:dhv',
197 |                        [u'host=',u'port=',u'id=',u'steps=',
198 |                         u'episodes=',u'track=',u'stage=',
199 |                         u'debug',u'help',u'version'])
200 |         except getopt.error, why:
201 |             print u'getopt error: %s\n%s' % (why, usage)
202 |             sys.exit(-1)
203 |         try:
204 |             for opt in opts:
205 |                 if opt[0] == u'-h' or opt[0] == u'--help':
206 |                     print usage
207 |                     sys.exit(0)
208 |                 if opt[0] == u'-d' or opt[0] == u'--debug':
209 |                     self.debug= True
210 |                 if opt[0] == u'-H' or opt[0] == u'--host':
211 |                     self.host= opt[1]
212 |                 if opt[0] == u'-i' or opt[0] == u'--id':
213 |                     self.sid= opt[1]
214 |                 if opt[0] == u'-t' or opt[0] == u'--track':
215 |                     self.trackname= opt[1]
216 |                 if opt[0] == u'-s' or opt[0] == u'--stage':
217 |                     self.stage= int(opt[1])
218 |                 if opt[0] == u'-p' or opt[0] == u'--port':
219 |                     self.port= int(opt[1])
220 |                 if opt[0] == u'-e' or opt[0] == u'--episodes':
221 |                     self.maxEpisodes= int(opt[1])
222 |                 if opt[0] == u'-m' or opt[0] == u'--steps':
223 |                     self.maxSteps= int(opt[1])
224 |                 if opt[0] == u'-v' or opt[0] == u'--version':
225 |                     print u'%s %s' % (sys.argv[0], version)
226 |                     sys.exit(0)
227 |         except ValueError, why:
228 |             print u'Bad parameter \'%s\' for option %s: %s\n%s' % (
229 |                                        opt[1], opt[0], why, usage)
230 |             sys.exit(-1)
231 |         if len(args) > 0:
232 |             print u'Superflous input? %s\n%s' % (u', '.join(args), usage)
233 |             sys.exit(-1)
234 | 
235 |     def get_servers_input(self):
236 |         u'''Server's input is stored in a ServerState object'''
237 |         if not self.so: return
238 |         sockdata= unicode()
239 | 
240 |         while True:
241 |             try:
242 |                 # Receive server data
243 |                 sockdata,addr= self.so.recvfrom(data_size)
244 |                 sockdata = sockdata.decode(u'utf-8')
245 |             except socket.error, emsg:
246 |                 print u'.',
247 |                 #print "Waiting for data on %d.............." % self.port
248 |             if u'***identified***' in sockdata:
249 |                 print u"Client connected on %d.............." % self.port
250 |                 continue
251 |             elif u'***shutdown***' in sockdata:
252 |                 print ((u"Server has stopped the race on %d. "+
253 |                         u"You were in %d place.") %
254 |                         (self.port,self.S.d[u'racePos']))
255 |                 self.shutdown()
256 |                 return
257 |             elif u'***restart***' in sockdata:
258 |                 # What do I do here?
259 |                 print u"Server has restarted the race on %d." % self.port
260 |                 # I haven't actually caught the server doing this.
261 |                 self.shutdown()
262 |                 return
263 |             elif not sockdata: # Empty?
264 |                 continue       # Try again.
265 |             else:
266 |                 self.S.parse_server_str(sockdata)
267 |                 if self.debug:
268 |                     sys.stderr.write(u"\x1b[2J\x1b[H") # Clear for steady output.
269 |                     print self.S
270 |                 break # Can now return from this function.
271 | 
272 |     def respond_to_server(self):
273 |         if not self.so: return
274 |         try:
275 |             message = repr(self.R)
276 |             self.so.sendto(message.encode(), (self.host, self.port))
277 |         except socket.error, emsg:
278 |             print u"Error sending to server: %s Message %s" % (emsg[1],unicode(emsg[0]))
279 |             sys.exit(-1)
280 |         if self.debug: print self.R.fancyout()
281 |         # Or use this for plain output:
282 |         #if self.debug: print self.R
283 | 
284 |     def shutdown(self):
285 |         if not self.so: return
286 |         print (u"Race terminated or %d steps elapsed. Shutting down %d."
287 |                % (self.maxSteps,self.port))
288 |         self.so.close()
289 |         self.so = None
290 |         #sys.exit() # No need for this really.
291 | 
292 | class ServerState(object):
293 |     u'''What the server is reporting right now.'''
294 |     def __init__(self):
295 |         self.servstr= unicode()
296 |         self.d= dict()
297 | 
298 |     def parse_server_str(self, server_string):
299 |         u'''Parse the server string.'''
300 |         self.servstr= server_string.strip()[:-1]
301 |         sslisted= self.servstr.strip().lstrip(u'(').rstrip(u')').split(u')(')
302 |         for i in sslisted:
303 |             w= i.split(u' ')
304 |             self.d[w[0]]= destringify(w[1:])
305 | 
306 |     def __repr__(self):
307 |         # Comment the next line for raw output:
308 |         return self.fancyout()
309 |         # -------------------------------------
310 |         out= unicode()
311 |         for k in sorted(self.d):
312 |             strout= unicode(self.d[k])
313 |             if type(self.d[k]) is list:
314 |                 strlist= [unicode(i) for i in self.d[k]]
315 |                 strout= u', '.join(strlist)
316 |             out+= u"%s: %s\n" % (k,strout)
317 |         return out
318 | 
319 |     def fancyout(self):
320 |         u'''Specialty output for useful ServerState monitoring.'''
321 |         out= unicode()
322 |         sensors= [ # Select the ones you want in the order you want them.
323 |         #'curLapTime',
324 |         #'lastLapTime',
325 |         u'stucktimer',
326 |         #'damage',
327 |         #'focus',
328 |         u'fuel',
329 |         #'gear',
330 |         u'distRaced',
331 |         u'distFromStart',
332 |         #'racePos',
333 |         u'opponents',
334 |         u'wheelSpinVel',
335 |         u'z',
336 |         u'speedZ',
337 |         u'speedY',
338 |         u'speedX',
339 |         u'targetSpeed',
340 |         u'rpm',
341 |         u'skid',
342 |         u'slip',
343 |         u'track',
344 |         u'trackPos',
345 |         u'angle',
346 |         ]
347 | 
348 |         #for k in sorted(self.d): # Use this to get all sensors.
349 |         for k in sensors:
350 |             if type(self.d.get(k)) is list: # Handle list type data.
351 |                 if k == u'track': # Nice display for track sensors.
352 |                     strout= unicode()
353 |                  #  for tsensor in self.d['track']:
354 |                  #      if   tsensor >180: oc= '|'
355 |                  #      elif tsensor > 80: oc= ';'
356 |                  #      elif tsensor > 60: oc= ','
357 |                  #      elif tsensor > 39: oc= '.'
358 |                  #      #elif tsensor > 13: oc= chr(int(tsensor)+65-13)
359 |                  #      elif tsensor > 13: oc= chr(int(tsensor)+97-13)
360 |                  #      elif tsensor >  3: oc= chr(int(tsensor)+48-3)
361 |                  #      else: oc= '_'
362 |                  #      strout+= oc
363 |                  #  strout= ' -> '+strout[:9] +' ' + strout[9] + ' ' + strout[10:]+' <-'
364 |                     raw_tsens= [u'%.1f'%x for x in self.d[u'track']]
365 |                     strout+= u' '.join(raw_tsens[:9])+u'_'+raw_tsens[9]+u'_'+u' '.join(raw_tsens[10:])
366 |                 elif k == u'opponents': # Nice display for opponent sensors.
367 |                     strout= unicode()
368 |                     for osensor in self.d[u'opponents']:
369 |                         if   osensor >190: oc= u'_'
370 |                         elif osensor > 90: oc= u'.'
371 |                         elif osensor > 39: oc= unichr(int(osensor/2)+97-19)
372 |                         elif osensor > 13: oc= unichr(int(osensor)+65-13)
373 |                         elif osensor >  3: oc= unichr(int(osensor)+48-3)
374 |                         else: oc= u'?'
375 |                         strout+= oc
376 |                     strout= u' -> '+strout[:18] + u' ' + strout[18:]+u' <-'
377 |                 else:
378 |                     strlist= [unicode(i) for i in self.d[k]]
379 |                     strout= u', '.join(strlist)
380 |             else: # Not a list type of value.
381 |                 if k == u'gear': # This is redundant now since it's part of RPM.
382 |                     gs= u'_._._._._._._._._'
383 |                     p= int(self.d[u'gear']) * 2 + 2  # Position
384 |                     l= u'%d'%self.d[u'gear'] # Label
385 |                     if l==u'-1': l= u'R'
386 |                     if l==u'0':  l= u'N'
387 |                     strout= gs[:p]+ u'(%s)'%l + gs[p+3:]
388 |                 elif k == u'damage':
389 |                     strout= u'%6.0f %s' % (self.d[k], bargraph(self.d[k],0,10000,50,u'~'))
390 |                 elif k == u'fuel':
391 |                     strout= u'%6.0f %s' % (self.d[k], bargraph(self.d[k],0,100,50,u'f'))
392 |                 elif k == u'speedX':
393 |                     cx= u'X'
394 |                     if self.d[k]<0: cx= u'R'
395 |                     strout= u'%6.1f %s' % (self.d[k], bargraph(self.d[k],-30,300,50,cx))
396 |                 elif k == u'speedY': # This gets reversed for display to make sense.
397 |                     strout= u'%6.1f %s' % (self.d[k], bargraph(self.d[k]*-1,-25,25,50,u'Y'))
398 |                 elif k == u'speedZ':
399 |                     strout= u'%6.1f %s' % (self.d[k], bargraph(self.d[k],-13,13,50,u'Z'))
400 |                 elif k == u'z':
401 |                     strout= u'%6.3f %s' % (self.d[k], bargraph(self.d[k],.3,.5,50,u'z'))
402 |                 elif k == u'trackPos': # This gets reversed for display to make sense.
403 |                     cx=u'<'
404 |                     if self.d[k]<0: cx= u'>'
405 |                     strout= u'%6.3f %s' % (self.d[k], bargraph(self.d[k]*-1,-1,1,50,cx))
406 |                 elif k == u'stucktimer':
407 |                     if self.d[k]:
408 |                         strout= u'%3d %s' % (self.d[k], bargraph(self.d[k],0,300,50,u"'"))
409 |                     else: strout= u'Not stuck!'
410 |                 elif k == u'rpm':
411 |                     g= self.d[u'gear']
412 |                     if g < 0:
413 |                         g= u'R'
414 |                     else:
415 |                         g= u'%1d'% g
416 |                     strout= bargraph(self.d[k],0,10000,50,g)
417 |                 elif k == u'angle':
418 |                     asyms= [
419 |                           u"  !  ", u".|'  ", u"./'  ", u"_.-  ", u".--  ", u"..-  ",
420 |                           u"---  ", u".__  ", u"-._  ", u"'-.  ", u"'\.  ", u"'|.  ",
421 |                           u"  |  ", u"  .|'", u"  ./'", u"  .-'", u"  _.-", u"  __.",
422 |                           u"  ---", u"  --.", u"  -._", u"  -..", u"  '\.", u"  '|."  ]
423 |                     rad= self.d[k]
424 |                     deg= int(rad*180/PI)
425 |                     symno= int(.5+ (rad+PI) / (PI/12) )
426 |                     symno= symno % (len(asyms)-1)
427 |                     strout= u'%5.2f %3d (%s)' % (rad,deg,asyms[symno])
428 |                 elif k == u'skid': # A sensible interpretation of wheel spin.
429 |                     frontwheelradpersec= self.d[u'wheelSpinVel'][0]
430 |                     skid= 0
431 |                     if frontwheelradpersec:
432 |                         skid= .5555555555*self.d[u'speedX']/frontwheelradpersec - .66124
433 |                     strout= bargraph(skid,-.05,.4,50,u'*')
434 |                 elif k == u'slip': # A sensible interpretation of wheel spin.
435 |                     frontwheelradpersec= self.d[u'wheelSpinVel'][0]
436 |                     slip= 0
437 |                     if frontwheelradpersec:
438 |                         slip= ((self.d[u'wheelSpinVel'][2]+self.d[u'wheelSpinVel'][3]) -
439 |                               (self.d[u'wheelSpinVel'][0]+self.d[u'wheelSpinVel'][1]))
440 |                     strout= bargraph(slip,-5,150,50,u'@')
441 |                 else:
442 |                     strout= unicode(self.d[k])
443 |             out+= u"%s: %s\n" % (k,strout)
444 |         return out
445 | 
446 | class DriverAction(object):
447 |     u'''What the driver is intending to do (i.e. send to the server).
448 |     Composes something like this for the server:
449 |     (accel 1)(brake 0)(gear 1)(steer 0)(clutch 0)(focus 0)(meta 0) or
450 |     (accel 1)(brake 0)(gear 1)(steer 0)(clutch 0)(focus -90 -45 0 45 90)(meta 0)'''
451 |     def __init__(self):
452 |        self.actionstr= unicode()
453 |        # "d" is for data dictionary.
454 |        self.d= { u'accel':0.2,
455 |                    u'brake':0,
456 |                   u'clutch':0,
457 |                     u'gear':1,
458 |                    u'steer':0,
459 |                    u'focus':[-90,-45,0,45,90],
460 |                     u'meta':0
461 |                     }
462 | 
463 |     def clip_to_limits(self):
464 |         u"""There pretty much is never a reason to send the server
465 |         something like (steer 9483.323). This comes up all the time
466 |         and it's probably just more sensible to always clip it than to
467 |         worry about when to. The "clip" command is still a snakeoil
468 |         utility function, but it should be used only for non standard
469 |         things or non obvious limits (limit the steering to the left,
470 |         for example). For normal limits, simply don't worry about it."""
471 |         self.d[u'steer']= clip(self.d[u'steer'], -1, 1)
472 |         self.d[u'brake']= clip(self.d[u'brake'], 0, 1)
473 |         self.d[u'accel']= clip(self.d[u'accel'], 0, 1)
474 |         self.d[u'clutch']= clip(self.d[u'clutch'], 0, 1)
475 |         if self.d[u'gear'] not in [-1, 0, 1, 2, 3, 4, 5, 6]:
476 |             self.d[u'gear']= 0
477 |         if self.d[u'meta'] not in [0,1]:
478 |             self.d[u'meta']= 0
479 |         if type(self.d[u'focus']) is not list or min(self.d[u'focus'])<-180 or max(self.d[u'focus'])>180:
480 |             self.d[u'focus']= 0
481 | 
482 |     def __repr__(self):
483 |         self.clip_to_limits()
484 |         out= unicode()
485 |         for k in self.d:
486 |             out+= u'('+k+u' '
487 |             v= self.d[k]
488 |             if not type(v) is list:
489 |                 out+= u'%.3f' % v
490 |             else:
491 |                 out+= u' '.join([unicode(x) for x in v])
492 |             out+= u')'
493 |         return out
494 |         return out+u'\n'
495 | 
496 |     def fancyout(self):
497 |         u'''Specialty output for useful monitoring of bot's effectors.'''
498 |         out= unicode()
499 |         od= self.d.copy()
500 |         od.pop(u'gear',u'') # Not interesting.
501 |         od.pop(u'meta',u'') # Not interesting.
502 |         od.pop(u'focus',u'') # Not interesting. Yet.
503 |         for k in sorted(od):
504 |             if k == u'clutch' or k == u'brake' or k == u'accel':
505 |                 strout=u''
506 |                 strout= u'%6.3f %s' % (od[k], bargraph(od[k],0,1,50,k[0].upper()))
507 |             elif k == u'steer': # Reverse the graph to make sense.
508 |                 strout= u'%6.3f %s' % (od[k], bargraph(od[k]*-1,-1,1,50,u'S'))
509 |             else:
510 |                 strout= unicode(od[k])
511 |             out+= u"%s: %s\n" % (k,strout)
512 |         return out
513 | 
514 | # == Misc Utility Functions
515 | def destringify(s):
516 |     u'''makes a string into a value or a list of strings into a list of
517 |     values (if possible)'''
518 |     if not s: return s
519 |     if type(s) is unicode:
520 |         try:
521 |             return float(s)
522 |         except ValueError:
523 |             print u"Could not find a value in %s" % s
524 |             return s
525 |     elif type(s) is list:
526 |         if len(s) < 2:
527 |             return destringify(s[0])
528 |         else:
529 |             return [destringify(i) for i in s]
530 | 
531 | def drive_example(c):
532 |     u'''This is only an example. It will get around the track but the
533 |     correct thing to do is write your own `drive()` function.'''
534 |     S,R= c.S.d,c.R.d
535 |     target_speed=1000
536 | 
537 |     # Steer To Corner
538 |     R[u'steer']= S[u'angle']*10 / PI
539 |     # Steer To Center
540 |     R[u'steer']-= S[u'trackPos']*.10
541 | 
542 |     # Throttle Control
543 |     if S[u'speedX'] < target_speed - (R[u'steer']*50):
544 |         R[u'accel']+= .01
545 |     else:
546 |         R[u'accel']-= .01
547 |     if S[u'speedX']<10:
548 |        R[u'accel']+= 1/(S[u'speedX']+.1)
549 | 
550 |     # Traction Control System
551 |     if ((S[u'wheelSpinVel'][2]+S[u'wheelSpinVel'][3]) -
552 |        (S[u'wheelSpinVel'][0]+S[u'wheelSpinVel'][1]) > 5):
553 |        R[u'accel']-= .2
554 | 
555 |     # Automatic Transmission
556 |     R[u'gear']=1
557 |     if S[u'speedX']>50:
558 |         R[u'gear']=2
559 |     if S[u'speedX']>80:
560 |         R[u'gear']=3
561 |     if S[u'speedX']>110:
562 |         R[u'gear']=4
563 |     if S[u'speedX']>140:
564 |         R[u'gear']=5
565 |     if S[u'speedX']>170:
566 |         R[u'gear']=6
567 |     return
568 | 
569 | # ================ MAIN ================
570 | if __name__ == u"__main__":
571 |     C= Client(p=3101)
572 |     for step in xrange(C.maxSteps,0,-1):
573 |         C.get_servers_input()
574 |         drive_example(C)
575 |         C.respond_to_server()
576 |     C.shutdown()
577 | 


--------------------------------------------------------------------------------