├── .gitignore ├── README.md ├── autostart.sh ├── dagger.py ├── gym_torcs.py └── snakeoil3_gym.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | model.npz 3 | _tmp.npy 4 | *.pyc 5 | *.txt 6 | image/ 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Imitation Learning with Dataset Aggregation (DAGGER) on Torcs Env 2 | 3 | This repository implements a simple algorithm for imitation learning: [DAGGER](https://www.cs.cmu.edu/~sross1/publications/Ross-AIStats11-NoRegret.pdf). 4 | In this example, the agent only learns to control the steer [-1, 1], the speed is computed 5 | automatically in `gym_torcs.TorcsEnv`. 6 | 7 | ## Requirements 8 | 9 | 1. Ubuntu (I only test on this) 10 | 2. Python 3 11 | 3. TensorLayer and TensorFlow 12 | 4. [Gym-Torcs](https://github.com/ugo-nama-kun/gym_torcs) 13 | 14 | ## Setting Up 15 | 16 | It is a little bit boring to set up the environment, but any incorrect configurations will lead to FAILURE. 17 | After installing [Gym-Torcs](https://github.com/ugo-nama-kun/gym_torcs), please follow the instructions to confirm everything work well: 18 | 19 | - Open a terminal: 20 | - Run `sudo torcs -vision` to start a game 21 | - `Race --> Practice --> Configure Race`: set the driver to `scr_server 1` instead of `player` 22 | - Open Torcs server by selecting `Race --> Practice --> New Race`: 23 | This should result that Torcs keeps a blue screen with several text information. 24 | 25 | - Open another terminal: 26 | - Run `python snakeoil3_gym.py` on another terminal, it will shows how the fake AI control the car. 27 | - Press F2 to see the driver view. 28 | 29 | - Set image size to 64x64x3: 30 | - The model is trained on 64x64 RGB observation. 31 | - Run `sudo torcs -vision` to start a game 32 | - `Options --> Display --> select 64x64 --> Apply` 33 | 34 | 35 | ## Usage 36 | Make sure everything above work well and then run: 37 | 38 | - `python dagger.py` 39 | 40 | It will start a Torcs server at the beginning of every episode, and terminate the server when the car crashs or the speed is too low. 41 | Note that, the self-contained `gym_torcs.py` is modified from [Gym-Torcs](https://github.com/ugo-nama-kun/gym_torcs), you can try different settings (like default speed, terminated speed) by modifying it. 42 | 43 | ## Results 44 | 45 | After Episode 1, the car crashes after 315 steps. 46 | 47 | ![](http://i.imgur.com/YfqFXQZ.gif) 48 | 49 | 61 | 62 | After Episode 3, the car does not crash anymore !!! 63 | 64 | ![](http://i.imgur.com/doz8U0z.gif) 65 | 66 | The number of steps and episodes might vary depending on the parameters initialization. 67 | 68 | 69 | ENJOY ! 70 | 71 | 74 | -------------------------------------------------------------------------------- /autostart.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | xte 'key Return' 3 | xte 'usleep 100000' 4 | xte 'key Return' 5 | xte 'usleep 100000' 6 | xte 'key Up' 7 | xte 'usleep 100000' 8 | xte 'key Up' 9 | xte 'usleep 100000' 10 | xte 'key Return' 11 | xte 'usleep 100000' 12 | xte 'key Return' 13 | -------------------------------------------------------------------------------- /dagger.py: -------------------------------------------------------------------------------- 1 | """ 2 | All information on README.md 3 | """ 4 | 5 | import tensorflow as tf 6 | import tensorlayer as tl 7 | from tensorlayer.layers import * 8 | from gym_torcs import TorcsEnv 9 | import numpy as np 10 | import time 11 | 12 | img_dim = [64, 64, 3] 13 | n_action = 1 # steer only (float, left and right 1 ~ -1) 14 | steps = 1000 # maximum step for a game 15 | batch_size = 32 16 | n_epoch = 100 17 | 18 | def get_teacher_action(ob): 19 | """ Compute steer from image for getting data of demonstration """ 20 | steer = ob.angle*10/np.pi 21 | steer -= ob.trackPos*0.10 22 | return np.array([steer]) 23 | 24 | def img_reshape(input_img): 25 | """ (3, 64, 64) --> (64, 64, 3) """ 26 | _img = np.transpose(input_img, (1, 2, 0)) 27 | _img = np.flipud(_img) 28 | _img = np.reshape(_img, (1, img_dim[0], img_dim[1], img_dim[2])) 29 | return _img 30 | 31 | images_all = np.zeros((0, img_dim[0], img_dim[1], img_dim[2])) 32 | actions_all = np.zeros((0, n_action)) 33 | rewards_all = np.zeros((0,)) 34 | 35 | img_list = [] 36 | action_list = [] 37 | reward_list = [] 38 | 39 | ###================= Get demonstration data 40 | env = TorcsEnv(vision=True, throttle=False) 41 | ob = env.reset(relaunch=True) 42 | 43 | print("#"*50) 44 | print('Collecting data from teacher (fake AI) ... ') 45 | for i in range(steps): 46 | if i == 0: 47 | act = np.array([0.0]) 48 | else: 49 | act = get_teacher_action(ob) 50 | if i % 100 == 0: 51 | print("step:", i) 52 | # if i > 50: # quick stop for quick debug 53 | # break 54 | ob, reward, done, _ = env.step(act) 55 | img_list.append(ob.img) 56 | action_list.append(act) 57 | reward_list.append(np.array([reward])) 58 | 59 | env.end() 60 | 61 | print("#"*50) 62 | print('Packing data into arrays... ') 63 | for img, act, rew in zip(img_list, action_list, reward_list): 64 | images_all = np.concatenate([images_all, img_reshape(img)], axis=0) 65 | actions_all = np.concatenate([actions_all, np.reshape(act, [1,n_action])], axis=0) 66 | rewards_all = np.concatenate([rewards_all, rew], axis=0) 67 | # save the teacher's data 68 | tl.files.save_any_to_npy(save_dict={'im': images_all, 'act': actions_all, 're': rewards_all}, name='_tmp.npy') 69 | # load the teacher's data 70 | # data = tl.files.load_npy_to_any(name='_tmp.npy') 71 | # images_all = data['im']; actions_all = data['act']; rewards_all = data['re'] 72 | 73 | # save some teacher's observaion 74 | tl.files.exists_or_mkdir('image/teacher', verbose=True) 75 | for i in range(0, len(images_all), 10): 76 | tl.vis.save_image(images_all[i], 'image/teacher/im_%d.png' % i) 77 | 78 | ###================= Define model 79 | class Agent(object): 80 | def __init__(self, name='model', sess=None): 81 | assert sess != None 82 | self.name = name 83 | self.sess = sess 84 | 85 | self.x = tf.placeholder(tf.float32, [None, img_dim[0], img_dim[1], img_dim[2]], name='Observaion') 86 | self.y = tf.placeholder(tf.float32, [None, n_action], name='Steer') 87 | 88 | self._build_net(True, False) 89 | self._build_net(False, True) 90 | self._define_train_ops() 91 | 92 | tl.layers.initialize_global_variables(self.sess) 93 | 94 | print() 95 | self.n_test.print_layers() 96 | print() 97 | self.n_test.print_params(False) 98 | print() 99 | # exit() 100 | 101 | def _build_net(self, is_train=True, reuse=None): 102 | with tf.variable_scope(self.name, reuse=reuse) as vs: 103 | tl.layers.set_name_reuse(reuse) 104 | 105 | n = InputLayer(self.x / 255, name='in') 106 | 107 | n = Conv2d(n, 32, (3, 3), (1, 1), tf.nn.relu, "VALID", name='c1/1') 108 | n = Conv2d(n, 32, (3, 3), (1, 1), tf.nn.relu, "VALID", name='c1/2') 109 | n = MaxPool2d(n, (2, 2), (2, 2), 'VALID', name='max1') 110 | 111 | n = DropoutLayer(n, 0.75, is_fix=True, is_train=is_train, name='drop1') 112 | 113 | n = Conv2d(n, 64, (3, 3), (1, 1), tf.nn.relu, "VALID", name='c2/1') 114 | n = Conv2d(n, 64, (3, 3), (1, 1), tf.nn.relu, "VALID", name='c2/2') 115 | n = MaxPool2d(n, (2, 2), (2, 2), 'VALID', name='max2') 116 | # print(n.outputs) 117 | n = DropoutLayer(n, 0.75, is_fix=True, is_train=is_train, name='drop2') 118 | 119 | n = FlattenLayer(n, name='f') 120 | n = DenseLayer(n, 512, tf.nn.relu, name='dense1') 121 | n = DropoutLayer(n, 0.5, is_fix=True, is_train=is_train, name='drop3') 122 | n = DenseLayer(n, n_action, tf.nn.tanh, name='o') 123 | 124 | if is_train: 125 | self.n_train = n 126 | else: 127 | self.n_test = n 128 | 129 | def _define_train_ops(self): 130 | self.cost = tl.cost.mean_squared_error(self.n_train.outputs, self.y, is_mean=False) 131 | self.train_params = tl.layers.get_variables_with_name(self.name, train_only=True, printable=False) 132 | self.train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(self.cost, var_list=self.train_params) 133 | 134 | def train(self, X, y, n_epoch=100, batch_size=10, print_freq=20): 135 | for epoch in range(n_epoch): 136 | start_time = time.time() 137 | total_err, n_iter = 0, 0 138 | for X_, y_ in tl.iterate.minibatches(X, y, batch_size, shuffle=True): 139 | _, err = self.sess.run([self.train_op, self.cost], feed_dict={self.x: X_, self.y: y_}) 140 | total_err += err 141 | n_iter += 1 142 | if epoch % print_freq == 0: 143 | print("Epoch [%d/%d] cost:%f took:%fs" % (epoch, n_epoch, total_err/n_iter, time.time()-start_time)) 144 | 145 | def predict(self, image): 146 | a = self.sess.run(self.n_test.outputs, {self.x : image}) 147 | return a 148 | 149 | def save_model(self): 150 | tl.files.save_npz(self.n_test.all_params, name=self.name+'.npz', sess=self.sess) 151 | 152 | def load_model(self): 153 | tl.files.load_and_assign_npz(sess=self.sess, name=self.name+'.npz', network=self.n_test) 154 | 155 | ###===================== Pretrain model using data for demonstration 156 | sess = tf.InteractiveSession() 157 | model = Agent(name='model', sess=sess) 158 | model.train(images_all, actions_all, n_epoch=n_epoch, batch_size=batch_size) 159 | # save model after pretraining 160 | model.save_model() 161 | # model.load_model() 162 | output_file = open('results.txt', 'w') 163 | 164 | ###===================== Aggregate and retrain 165 | n_episode = 5 166 | for episode in range(n_episode): 167 | ob_list = [] 168 | # restart the game for every episode 169 | env = TorcsEnv(vision=True, throttle=False) 170 | ob = env.reset(relaunch=True) 171 | reward_sum = 0.0 172 | print("#"*50) 173 | print("# Episode: %d start" % episode) 174 | for i in range(steps): 175 | act = model.predict(img_reshape(ob.img)) 176 | ob, reward, done, _ = env.step(act) 177 | if done is True: 178 | break 179 | else: 180 | ob_list.append(ob) 181 | reward_sum += reward 182 | # print(i, reward, reward_sum, done, str(act[0])) 183 | print("# step: %d reward: %f " % (i, reward_sum)) 184 | print("#"*50) 185 | output_file.write('Number of Steps: %02d\t Reward: %0.04f\n' % (i, reward_sum)) 186 | env.end() 187 | 188 | if i == (steps-1): 189 | break 190 | 191 | for ob in ob_list: 192 | images_all = np.concatenate([images_all, img_reshape(ob.img)], axis=0) 193 | # Dataset AGGregation: bring learner’s and expert’s trajectory distributions 194 | # closer by labelling additional data points resulting from applying the current policy 195 | actions_all = np.concatenate([actions_all, np.reshape(get_teacher_action(ob), [1, n_action])], axis=0) 196 | 197 | model.train(images_all, actions_all, n_epoch=n_epoch, batch_size=batch_size) 198 | model.save_model() 199 | 200 | ###=================== Play the game with the trained model 201 | # while True: 202 | # env = TorcsEnv(vision=True, throttle=False) 203 | # ob = env.reset(relaunch=True) 204 | # reward_sum = 0.0 205 | # for i in range(steps): 206 | # act = model.predict(img_reshape(ob.img)) 207 | # ob, reward, done, _ = env.step(act) 208 | # if done is True: 209 | # break 210 | # else: 211 | # ob_list.append(ob) 212 | # reward_sum += reward 213 | # print("PLAY WITH THE TRAINED MODEL") 214 | # print(reward_sum) 215 | # env.end() 216 | -------------------------------------------------------------------------------- /gym_torcs.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | import numpy as np 4 | # from os import path 5 | import snakeoil3_gym as snakeoil3 6 | import numpy as np 7 | 8 | 9 | import copy 10 | 11 | 12 | import collections as col 13 | import os 14 | import time 15 | 16 | 17 | class TorcsEnv: 18 | terminal_judge_start = 500 # Speed limit is applied after this step 19 | termination_limit_progress = 2 # [km/h], episode terminates if car is running slower than this limit [5] 20 | default_speed = 50 21 | 22 | initial_reset = True 23 | 24 | 25 | def __init__(self, vision=False, throttle=False, gear_change=False): 26 | #print("Init") 27 | self.vision = vision 28 | self.throttle = throttle 29 | self.gear_change = gear_change 30 | 31 | self.initial_run = True 32 | 33 | ##print("launch torcs") 34 | os.system('sudo pkill torcs') # Hao Dong add sudo 35 | time.sleep(0.5) 36 | if self.vision is True: 37 | os.system('torcs -nofuel -nodamage -nolaptime -vision &') 38 | else: 39 | os.system('torcs -nofuel -nodamage -nolaptime &') 40 | time.sleep(0.5) 41 | os.system('sh autostart.sh') 42 | time.sleep(0.5) 43 | 44 | """ 45 | # Modify here if you use multiple tracks in the environment 46 | self.client = snakeoil3.Client(p=3101, vision=self.vision) # Open new UDP in vtorcs 47 | self.client.MAX_STEPS = np.inf 48 | 49 | client = self.client 50 | client.get_servers_input() # Get the initial input from torcs 51 | 52 | obs = client.S.d # Get the current full-observation from torcs 53 | """ 54 | if throttle is False: 55 | self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,)) 56 | else: 57 | self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2,)) 58 | 59 | if vision is False: 60 | high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf]) 61 | low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf]) 62 | self.observation_space = spaces.Box(low=low, high=high) 63 | else: 64 | high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255]) 65 | low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0]) 66 | self.observation_space = spaces.Box(low=low, high=high) 67 | 68 | def step(self, u): 69 | #print("Step") 70 | # convert thisAction to the actual torcs actionstr 71 | client = self.client 72 | 73 | this_action = self.agent_to_torcs(u) 74 | 75 | # Apply Action 76 | action_torcs = client.R.d 77 | 78 | # Steering 79 | action_torcs['steer'] = this_action['steer'] # in [-1, 1] 80 | 81 | # Simple Autnmatic Throttle Control by Snakeoil 82 | if self.throttle is False: 83 | target_speed = self.default_speed 84 | if client.S.d['speedX'] < target_speed - (client.R.d['steer']*50): 85 | client.R.d['accel'] += .01 86 | else: 87 | client.R.d['accel'] -= .01 88 | 89 | if client.R.d['accel'] > 0.2: 90 | client.R.d['accel'] = 0.2 91 | 92 | if client.S.d['speedX'] < 10: 93 | client.R.d['accel'] += 1/(client.S.d['speedX']+.1) 94 | 95 | # Traction Control System 96 | if ((client.S.d['wheelSpinVel'][2]+client.S.d['wheelSpinVel'][3]) - 97 | (client.S.d['wheelSpinVel'][0]+client.S.d['wheelSpinVel'][1]) > 5): 98 | action_torcs['accel'] -= .2 99 | else: 100 | action_torcs['accel'] = this_action['accel'] 101 | 102 | # Automatic Gear Change by Snakeoil 103 | if self.gear_change is True: 104 | action_torcs['gear'] = this_action['gear'] 105 | else: 106 | # Automatic Gear Change by Snakeoil is possible 107 | action_torcs['gear'] = 1 108 | """ 109 | if client.S.d['speedX'] > 50: 110 | action_torcs['gear'] = 2 111 | if client.S.d['speedX'] > 80: 112 | action_torcs['gear'] = 3 113 | if client.S.d['speedX'] > 110: 114 | action_torcs['gear'] = 4 115 | if client.S.d['speedX'] > 140: 116 | action_torcs['gear'] = 5 117 | if client.S.d['speedX'] > 170: 118 | action_torcs['gear'] = 6 119 | """ 120 | 121 | # Save the privious full-obs from torcs for the reward calculation 122 | obs_pre = copy.deepcopy(client.S.d) 123 | 124 | # One-Step Dynamics Update ################################# 125 | # Apply the Agent's action into torcs 126 | client.respond_to_server() 127 | # Get the response of TORCS 128 | client.get_servers_input() 129 | 130 | # Get the current full-observation from torcs 131 | obs = client.S.d 132 | 133 | # Make an obsevation from a raw observation vector from TORCS 134 | self.observation = self.make_observaton(obs) 135 | 136 | # Reward setting Here ####################################### 137 | # direction-dependent positive reward 138 | track = np.array(obs['track']) 139 | sp = np.array(obs['speedX']) 140 | progress = sp*np.cos(obs['angle']) 141 | reward = progress 142 | 143 | # collision detection 144 | if obs['damage'] - obs_pre['damage'] > 0: 145 | reward = -1 146 | 147 | # Termination judgement ######################### 148 | episode_terminate = False 149 | if track.min() < 0: # Episode is terminated if the car is out of track 150 | reward = - 1 151 | episode_terminate = True 152 | client.R.d['meta'] = True 153 | 154 | if self.terminal_judge_start < self.time_step: # Episode terminates if the progress of agent is small 155 | if progress < self.termination_limit_progress: 156 | episode_terminate = True 157 | client.R.d['meta'] = True 158 | 159 | if np.cos(obs['angle']) < 0: # Episode is terminated if the agent runs backward 160 | episode_terminate = True 161 | client.R.d['meta'] = True 162 | 163 | 164 | if client.R.d['meta'] is True: # Send a reset signal 165 | self.initial_run = False 166 | client.respond_to_server() 167 | 168 | self.time_step += 1 169 | 170 | return self.get_obs(), reward, client.R.d['meta'], {} 171 | 172 | def reset(self, relaunch=False): 173 | #print("Reset") 174 | 175 | self.time_step = 0 176 | 177 | if self.initial_reset is not True: 178 | self.client.R.d['meta'] = True 179 | self.client.respond_to_server() 180 | 181 | ## TENTATIVE. Restarting TORCS every episode suffers the memory leak bug! 182 | if relaunch is True: 183 | self.reset_torcs() 184 | print("### TORCS is RELAUNCHED ###") 185 | 186 | # Modify here if you use multiple tracks in the environment 187 | self.client = snakeoil3.Client(p=3101, vision=self.vision) # Open new UDP in vtorcs 188 | self.client.MAX_STEPS = np.inf 189 | 190 | client = self.client 191 | client.get_servers_input() # Get the initial input from torcs 192 | 193 | obs = client.S.d # Get the current full-observation from torcs 194 | self.observation = self.make_observaton(obs) 195 | 196 | self.last_u = None 197 | 198 | self.initial_reset = False 199 | return self.get_obs() 200 | 201 | def end(self): 202 | os.system('pkill torcs') 203 | 204 | def get_obs(self): 205 | return self.observation 206 | 207 | def reset_torcs(self): 208 | #print("relaunch torcs") 209 | os.system('pkill torcs') 210 | time.sleep(0.5) 211 | if self.vision is True: 212 | os.system('torcs -nofuel -nodamage -nolaptime -vision &') 213 | else: 214 | os.system('torcs -nofuel -nodamage -nolaptime &') 215 | time.sleep(0.5) 216 | os.system('sh autostart.sh') 217 | time.sleep(0.5) 218 | 219 | def agent_to_torcs(self, u): 220 | torcs_action = {'steer': u[0]} 221 | 222 | if self.throttle is True: # throttle action is enabled 223 | torcs_action.update({'accel': u[1]}) 224 | 225 | if self.gear_change is True: # gear change action is enabled 226 | torcs_action.update({'gear': u[2]}) 227 | 228 | return torcs_action 229 | 230 | 231 | def obs_vision_to_image_rgb(self, obs_image_vec): 232 | image_vec = obs_image_vec 233 | r = image_vec[0:len(image_vec):3] 234 | g = image_vec[1:len(image_vec):3] 235 | b = image_vec[2:len(image_vec):3] 236 | 237 | sz = (64, 64) 238 | r = np.array(r).reshape(sz) 239 | g = np.array(g).reshape(sz) 240 | b = np.array(b).reshape(sz) 241 | return np.array([r, g, b], dtype=np.uint8) 242 | 243 | def make_observaton(self, raw_obs): 244 | if self.vision is False: 245 | names = ['focus', 246 | 'speedX', 'speedY', 'speedZ', 247 | 'opponents', 248 | 'rpm', 249 | 'track', 250 | 'wheelSpinVel', 251 | 'angle', 252 | 'trackPos'] 253 | Observation = col.namedtuple('Observaion', names) 254 | return Observation(focus=np.array(raw_obs['focus'], dtype=np.float32)/200., 255 | speedX=np.array(raw_obs['speedX'], dtype=np.float32)/self.default_speed, 256 | speedY=np.array(raw_obs['speedY'], dtype=np.float32)/self.default_speed, 257 | speedZ=np.array(raw_obs['speedZ'], dtype=np.float32)/self.default_speed, 258 | opponents=np.array(raw_obs['opponents'], dtype=np.float32)/200., 259 | rpm=np.array(raw_obs['rpm'], dtype=np.float32), 260 | track=np.array(raw_obs['track'], dtype=np.float32)/200., 261 | wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=np.float32), 262 | angle=np.array(raw_obs['angle'], dtype=np.float32), 263 | trackPos=np.array(raw_obs['trackPos'], dtype=np.float32) 264 | ) 265 | else: 266 | names = ['focus', 267 | 'speedX', 'speedY', 'speedZ', 268 | 'opponents', 269 | 'rpm', 270 | 'track', 271 | 'wheelSpinVel', 272 | 'img', 273 | 'angle', 274 | 'trackPos'] 275 | Observation = col.namedtuple('Observaion', names) 276 | 277 | # Get RGB from observation 278 | image_rgb = self.obs_vision_to_image_rgb(raw_obs[names[8]]) 279 | 280 | return Observation(focus=np.array(raw_obs['focus'], dtype=np.float32)/200., 281 | speedX=np.array(raw_obs['speedX'], dtype=np.float32)/self.default_speed, 282 | speedY=np.array(raw_obs['speedY'], dtype=np.float32)/self.default_speed, 283 | speedZ=np.array(raw_obs['speedZ'], dtype=np.float32)/self.default_speed, 284 | opponents=np.array(raw_obs['opponents'], dtype=np.float32)/200., 285 | rpm=np.array(raw_obs['rpm'], dtype=np.float32), 286 | track=np.array(raw_obs['track'], dtype=np.float32)/200., 287 | wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=np.float32), 288 | img=image_rgb, 289 | angle=np.array(raw_obs['angle'], dtype=np.float32), 290 | trackPos=np.array(raw_obs['trackPos'], dtype=np.float32)) 291 | -------------------------------------------------------------------------------- /snakeoil3_gym.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # snakeoil.py 3 | # Chris X Edwards 4 | # Snake Oil is a Python library for interfacing with a TORCS 5 | # race car simulator which has been patched with the server 6 | # extentions used in the Simulated Car Racing competitions. 7 | # http://scr.geccocompetitions.com/ 8 | # 9 | # To use it, you must import it and create a "drive()" function. 10 | # This will take care of option handling and server connecting, etc. 11 | # To see how to write your own client do something like this which is 12 | # a complete working client: 13 | # /-----------------------------------------------\ 14 | # |#!/usr/bin/python | 15 | # |import snakeoil | 16 | # |if __name__ == "__main__": | 17 | # | C= snakeoil.Client() | 18 | # | for step in xrange(C.maxSteps,0,-1): | 19 | # | C.get_servers_input() | 20 | # | snakeoil.drive_example(C) | 21 | # | C.respond_to_server() | 22 | # | C.shutdown() | 23 | # \-----------------------------------------------/ 24 | # This should then be a full featured client. The next step is to 25 | # replace 'snakeoil.drive_example()' with your own. There is a 26 | # dictionary which holds various option values (see `default_options` 27 | # variable for all the details) but you probably only need a few 28 | # things from it. Mainly the `trackname` and `stage` are important 29 | # when developing a strategic bot. 30 | # 31 | # This dictionary also contains a ServerState object 32 | # (key=S) and a DriverAction object (key=R for response). This allows 33 | # you to get at all the information sent by the server and to easily 34 | # formulate your reply. These objects contain a member dictionary "d" 35 | # (for data dictionary) which contain key value pairs based on the 36 | # server's syntax. Therefore, you can read the following: 37 | # angle, curLapTime, damage, distFromStart, distRaced, focus, 38 | # fuel, gear, lastLapTime, opponents, racePos, rpm, 39 | # speedX, speedY, speedZ, track, trackPos, wheelSpinVel, z 40 | # The syntax specifically would be something like: 41 | # X= o[S.d['tracPos']] 42 | # And you can set the following: 43 | # accel, brake, clutch, gear, steer, focus, meta 44 | # The syntax is: 45 | # o[R.d['steer']]= X 46 | # Note that it is 'steer' and not 'steering' as described in the manual! 47 | # All values should be sensible for their type, including lists being lists. 48 | # See the SCR manual or http://xed.ch/help/torcs.html for details. 49 | # 50 | # If you just run the snakeoil.py base library itself it will implement a 51 | # serviceable client with a demonstration drive function that is 52 | # sufficient for getting around most tracks. 53 | # Try `snakeoil.py --help` to get started. 54 | 55 | # for Python3-based torcs python robot client 56 | import socket 57 | import sys 58 | import getopt 59 | import os 60 | import time 61 | PI= 3.14159265359 62 | 63 | data_size = 2**17 64 | 65 | # Initialize help messages 66 | ophelp= 'Options:\n' 67 | ophelp+= ' --host, -H TORCS server host. [localhost]\n' 68 | ophelp+= ' --port, -p TORCS port. [3001]\n' 69 | ophelp+= ' --id, -i ID for server. [SCR]\n' 70 | ophelp+= ' --steps, -m <#> Maximum simulation steps. 1 sec ~ 50 steps. [100000]\n' 71 | ophelp+= ' --episodes, -e <#> Maximum learning episodes. [1]\n' 72 | ophelp+= ' --track, -t Your name for this track. Used for learning. [unknown]\n' 73 | ophelp+= ' --stage, -s <#> 0=warm up, 1=qualifying, 2=race, 3=unknown. [3]\n' 74 | ophelp+= ' --debug, -d Output full telemetry.\n' 75 | ophelp+= ' --help, -h Show this help.\n' 76 | ophelp+= ' --version, -v Show current version.' 77 | usage= 'Usage: %s [ophelp [optargs]] \n' % sys.argv[0] 78 | usage= usage + ophelp 79 | version= "20130505-2" 80 | 81 | def clip(v,lo,hi): 82 | if vhi: return hi 84 | else: return v 85 | 86 | def bargraph(x,mn,mx,w,c='X'): 87 | '''Draws a simple asciiart bar graph. Very handy for 88 | visualizing what's going on with the data. 89 | x= Value from sensor, mn= minimum plottable value, 90 | mx= maximum plottable value, w= width of plot in chars, 91 | c= the character to plot with.''' 92 | if not w: return '' # No width! 93 | if xmx: x= mx # Clip to bounds. 95 | tx= mx-mn # Total real units possible to show on graph. 96 | if tx<=0: return 'backwards' # Stupid bounds. 97 | upw= tx/float(w) # X Units per output char width. 98 | if upw<=0: return 'what?' # Don't let this happen. 99 | negpu, pospu, negnonpu, posnonpu= 0,0,0,0 100 | if mn < 0: # Then there is a negative part to graph. 101 | if x < 0: # And the plot is on the negative side. 102 | negpu= -x + min(0,mx) 103 | negnonpu= -mn + x 104 | else: # Plot is on pos. Neg side is empty. 105 | negnonpu= -mn + min(0,mx) # But still show some empty neg. 106 | if mx > 0: # There is a positive part to the graph 107 | if x > 0: # And the plot is on the positive side. 108 | pospu= x - max(0,mn) 109 | posnonpu= mx - x 110 | else: # Plot is on neg. Pos side is empty. 111 | posnonpu= mx - max(0,mn) # But still show some empty pos. 112 | nnc= int(negnonpu/upw)*'-' 113 | npc= int(negpu/upw)*c 114 | ppc= int(pospu/upw)*c 115 | pnc= int(posnonpu/upw)*'_' 116 | return '[%s]' % (nnc+npc+ppc+pnc) 117 | 118 | class Client(): 119 | def __init__(self,H=None,p=None,i=None,e=None,t=None,s=None,d=None,vision=False): 120 | # If you don't like the option defaults, change them here. 121 | self.vision = vision 122 | 123 | self.host= 'localhost' 124 | self.port= 3001 125 | self.sid= 'SCR' 126 | self.maxEpisodes=1 # "Maximum number of learning episodes to perform" 127 | self.trackname= 'unknown' 128 | self.stage= 3 # 0=Warm-up, 1=Qualifying 2=Race, 3=unknown 129 | self.debug= False 130 | self.maxSteps= 100000 # 50steps/second 131 | self.parse_the_command_line() 132 | if H: self.host= H 133 | if p: self.port= p 134 | if i: self.sid= i 135 | if e: self.maxEpisodes= e 136 | if t: self.trackname= t 137 | if s: self.stage= s 138 | if d: self.debug= d 139 | self.S= ServerState() 140 | self.R= DriverAction() 141 | self.setup_connection() 142 | 143 | def setup_connection(self): 144 | # == Set Up UDP Socket == 145 | try: 146 | self.so= socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 147 | except socket.error as emsg: 148 | print('Error: Could not create socket...') 149 | sys.exit(-1) 150 | # == Initialize Connection To Server == 151 | self.so.settimeout(1) 152 | 153 | n_fail = 5 154 | while True: 155 | # This string establishes track sensor angles! You can customize them. 156 | #a= "-90 -75 -60 -45 -30 -20 -15 -10 -5 0 5 10 15 20 30 45 60 75 90" 157 | # xed- Going to try something a bit more aggressive... 158 | a= "-45 -19 -12 -7 -4 -2.5 -1.7 -1 -.5 0 .5 1 1.7 2.5 4 7 12 19 45" 159 | 160 | initmsg='%s(init %s)' % (self.sid,a) 161 | 162 | try: 163 | self.so.sendto(initmsg.encode(), (self.host, self.port)) 164 | except socket.error as emsg: 165 | sys.exit(-1) 166 | sockdata= str() 167 | try: 168 | sockdata,addr= self.so.recvfrom(data_size) 169 | sockdata = sockdata.decode('utf-8') 170 | except socket.error as emsg: 171 | print("Waiting for server on %d............" % self.port) 172 | print("Count Down : " + str(n_fail)) 173 | if n_fail < 0: 174 | print("relaunch torcs") 175 | os.system('pkill torcs') 176 | time.sleep(1.0) 177 | if self.vision is False: 178 | os.system('torcs -nofuel -nodamage -nolaptime &') 179 | else: 180 | os.system('torcs -nofuel -nodamage -nolaptime -vision &') 181 | 182 | time.sleep(1.0) 183 | os.system('sh autostart.sh') 184 | n_fail = 5 185 | n_fail -= 1 186 | 187 | identify = '***identified***' 188 | if identify in sockdata: 189 | print("Client connected on %d.............." % self.port) 190 | break 191 | 192 | def parse_the_command_line(self): 193 | try: 194 | (opts, args) = getopt.getopt(sys.argv[1:], 'H:p:i:m:e:t:s:dhv', 195 | ['host=','port=','id=','steps=', 196 | 'episodes=','track=','stage=', 197 | 'debug','help','version']) 198 | except getopt.error as why: 199 | print('getopt error: %s\n%s' % (why, usage)) 200 | sys.exit(-1) 201 | try: 202 | for opt in opts: 203 | if opt[0] == '-h' or opt[0] == '--help': 204 | print(usage) 205 | sys.exit(0) 206 | if opt[0] == '-d' or opt[0] == '--debug': 207 | self.debug= True 208 | if opt[0] == '-H' or opt[0] == '--host': 209 | self.host= opt[1] 210 | if opt[0] == '-i' or opt[0] == '--id': 211 | self.sid= opt[1] 212 | if opt[0] == '-t' or opt[0] == '--track': 213 | self.trackname= opt[1] 214 | if opt[0] == '-s' or opt[0] == '--stage': 215 | self.stage= int(opt[1]) 216 | if opt[0] == '-p' or opt[0] == '--port': 217 | self.port= int(opt[1]) 218 | if opt[0] == '-e' or opt[0] == '--episodes': 219 | self.maxEpisodes= int(opt[1]) 220 | if opt[0] == '-m' or opt[0] == '--steps': 221 | self.maxSteps= int(opt[1]) 222 | if opt[0] == '-v' or opt[0] == '--version': 223 | print('%s %s' % (sys.argv[0], version)) 224 | sys.exit(0) 225 | except ValueError as why: 226 | print('Bad parameter \'%s\' for option %s: %s\n%s' % ( 227 | opt[1], opt[0], why, usage)) 228 | sys.exit(-1) 229 | if len(args) > 0: 230 | print('Superflous input? %s\n%s' % (', '.join(args), usage)) 231 | sys.exit(-1) 232 | 233 | def get_servers_input(self): 234 | '''Server's input is stored in a ServerState object''' 235 | if not self.so: return 236 | sockdata= str() 237 | 238 | while True: 239 | try: 240 | # Receive server data 241 | sockdata,addr= self.so.recvfrom(data_size) 242 | sockdata = sockdata.decode('utf-8') 243 | except socket.error as emsg: 244 | print('.', end=' ') 245 | #print "Waiting for data on %d.............." % self.port 246 | if '***identified***' in sockdata: 247 | print("Client connected on %d.............." % self.port) 248 | continue 249 | elif '***shutdown***' in sockdata: 250 | print((("Server has stopped the race on %d. "+ 251 | "You were in %d place.") % 252 | (self.port,self.S.d['racePos']))) 253 | self.shutdown() 254 | return 255 | elif '***restart***' in sockdata: 256 | # What do I do here? 257 | print("Server has restarted the race on %d." % self.port) 258 | # I haven't actually caught the server doing this. 259 | self.shutdown() 260 | return 261 | elif not sockdata: # Empty? 262 | continue # Try again. 263 | else: 264 | self.S.parse_server_str(sockdata) 265 | if self.debug: 266 | sys.stderr.write("\x1b[2J\x1b[H") # Clear for steady output. 267 | print(self.S) 268 | break # Can now return from this function. 269 | 270 | def respond_to_server(self): 271 | if not self.so: return 272 | try: 273 | message = repr(self.R) 274 | self.so.sendto(message.encode(), (self.host, self.port)) 275 | except socket.error as emsg: 276 | print("Error sending to server: %s Message %s" % (emsg[1],str(emsg[0]))) 277 | sys.exit(-1) 278 | if self.debug: print(self.R.fancyout()) 279 | # Or use this for plain output: 280 | #if self.debug: print self.R 281 | 282 | def shutdown(self): 283 | if not self.so: return 284 | print(("Race terminated or %d steps elapsed. Shutting down %d." 285 | % (self.maxSteps,self.port))) 286 | self.so.close() 287 | self.so = None 288 | #sys.exit() # No need for this really. 289 | 290 | class ServerState(): 291 | '''What the server is reporting right now.''' 292 | def __init__(self): 293 | self.servstr= str() 294 | self.d= dict() 295 | 296 | def parse_server_str(self, server_string): 297 | '''Parse the server string.''' 298 | self.servstr= server_string.strip()[:-1] 299 | sslisted= self.servstr.strip().lstrip('(').rstrip(')').split(')(') 300 | for i in sslisted: 301 | w= i.split(' ') 302 | self.d[w[0]]= destringify(w[1:]) 303 | 304 | def __repr__(self): 305 | # Comment the next line for raw output: 306 | return self.fancyout() 307 | # ------------------------------------- 308 | out= str() 309 | for k in sorted(self.d): 310 | strout= str(self.d[k]) 311 | if type(self.d[k]) is list: 312 | strlist= [str(i) for i in self.d[k]] 313 | strout= ', '.join(strlist) 314 | out+= "%s: %s\n" % (k,strout) 315 | return out 316 | 317 | def fancyout(self): 318 | '''Specialty output for useful ServerState monitoring.''' 319 | out= str() 320 | sensors= [ # Select the ones you want in the order you want them. 321 | #'curLapTime', 322 | #'lastLapTime', 323 | 'stucktimer', 324 | #'damage', 325 | #'focus', 326 | 'fuel', 327 | #'gear', 328 | 'distRaced', 329 | 'distFromStart', 330 | #'racePos', 331 | 'opponents', 332 | 'wheelSpinVel', 333 | 'z', 334 | 'speedZ', 335 | 'speedY', 336 | 'speedX', 337 | 'targetSpeed', 338 | 'rpm', 339 | 'skid', 340 | 'slip', 341 | 'track', 342 | 'trackPos', 343 | 'angle', 344 | ] 345 | 346 | #for k in sorted(self.d): # Use this to get all sensors. 347 | for k in sensors: 348 | if type(self.d.get(k)) is list: # Handle list type data. 349 | if k == 'track': # Nice display for track sensors. 350 | strout= str() 351 | # for tsensor in self.d['track']: 352 | # if tsensor >180: oc= '|' 353 | # elif tsensor > 80: oc= ';' 354 | # elif tsensor > 60: oc= ',' 355 | # elif tsensor > 39: oc= '.' 356 | # #elif tsensor > 13: oc= chr(int(tsensor)+65-13) 357 | # elif tsensor > 13: oc= chr(int(tsensor)+97-13) 358 | # elif tsensor > 3: oc= chr(int(tsensor)+48-3) 359 | # else: oc= '_' 360 | # strout+= oc 361 | # strout= ' -> '+strout[:9] +' ' + strout[9] + ' ' + strout[10:]+' <-' 362 | raw_tsens= ['%.1f'%x for x in self.d['track']] 363 | strout+= ' '.join(raw_tsens[:9])+'_'+raw_tsens[9]+'_'+' '.join(raw_tsens[10:]) 364 | elif k == 'opponents': # Nice display for opponent sensors. 365 | strout= str() 366 | for osensor in self.d['opponents']: 367 | if osensor >190: oc= '_' 368 | elif osensor > 90: oc= '.' 369 | elif osensor > 39: oc= chr(int(osensor/2)+97-19) 370 | elif osensor > 13: oc= chr(int(osensor)+65-13) 371 | elif osensor > 3: oc= chr(int(osensor)+48-3) 372 | else: oc= '?' 373 | strout+= oc 374 | strout= ' -> '+strout[:18] + ' ' + strout[18:]+' <-' 375 | else: 376 | strlist= [str(i) for i in self.d[k]] 377 | strout= ', '.join(strlist) 378 | else: # Not a list type of value. 379 | if k == 'gear': # This is redundant now since it's part of RPM. 380 | gs= '_._._._._._._._._' 381 | p= int(self.d['gear']) * 2 + 2 # Position 382 | l= '%d'%self.d['gear'] # Label 383 | if l=='-1': l= 'R' 384 | if l=='0': l= 'N' 385 | strout= gs[:p]+ '(%s)'%l + gs[p+3:] 386 | elif k == 'damage': 387 | strout= '%6.0f %s' % (self.d[k], bargraph(self.d[k],0,10000,50,'~')) 388 | elif k == 'fuel': 389 | strout= '%6.0f %s' % (self.d[k], bargraph(self.d[k],0,100,50,'f')) 390 | elif k == 'speedX': 391 | cx= 'X' 392 | if self.d[k]<0: cx= 'R' 393 | strout= '%6.1f %s' % (self.d[k], bargraph(self.d[k],-30,300,50,cx)) 394 | elif k == 'speedY': # This gets reversed for display to make sense. 395 | strout= '%6.1f %s' % (self.d[k], bargraph(self.d[k]*-1,-25,25,50,'Y')) 396 | elif k == 'speedZ': 397 | strout= '%6.1f %s' % (self.d[k], bargraph(self.d[k],-13,13,50,'Z')) 398 | elif k == 'z': 399 | strout= '%6.3f %s' % (self.d[k], bargraph(self.d[k],.3,.5,50,'z')) 400 | elif k == 'trackPos': # This gets reversed for display to make sense. 401 | cx='<' 402 | if self.d[k]<0: cx= '>' 403 | strout= '%6.3f %s' % (self.d[k], bargraph(self.d[k]*-1,-1,1,50,cx)) 404 | elif k == 'stucktimer': 405 | if self.d[k]: 406 | strout= '%3d %s' % (self.d[k], bargraph(self.d[k],0,300,50,"'")) 407 | else: strout= 'Not stuck!' 408 | elif k == 'rpm': 409 | g= self.d['gear'] 410 | if g < 0: 411 | g= 'R' 412 | else: 413 | g= '%1d'% g 414 | strout= bargraph(self.d[k],0,10000,50,g) 415 | elif k == 'angle': 416 | asyms= [ 417 | " ! ", ".|' ", "./' ", "_.- ", ".-- ", "..- ", 418 | "--- ", ".__ ", "-._ ", "'-. ", "'\. ", "'|. ", 419 | " | ", " .|'", " ./'", " .-'", " _.-", " __.", 420 | " ---", " --.", " -._", " -..", " '\.", " '|." ] 421 | rad= self.d[k] 422 | deg= int(rad*180/PI) 423 | symno= int(.5+ (rad+PI) / (PI/12) ) 424 | symno= symno % (len(asyms)-1) 425 | strout= '%5.2f %3d (%s)' % (rad,deg,asyms[symno]) 426 | elif k == 'skid': # A sensible interpretation of wheel spin. 427 | frontwheelradpersec= self.d['wheelSpinVel'][0] 428 | skid= 0 429 | if frontwheelradpersec: 430 | skid= .5555555555*self.d['speedX']/frontwheelradpersec - .66124 431 | strout= bargraph(skid,-.05,.4,50,'*') 432 | elif k == 'slip': # A sensible interpretation of wheel spin. 433 | frontwheelradpersec= self.d['wheelSpinVel'][0] 434 | slip= 0 435 | if frontwheelradpersec: 436 | slip= ((self.d['wheelSpinVel'][2]+self.d['wheelSpinVel'][3]) - 437 | (self.d['wheelSpinVel'][0]+self.d['wheelSpinVel'][1])) 438 | strout= bargraph(slip,-5,150,50,'@') 439 | else: 440 | strout= str(self.d[k]) 441 | out+= "%s: %s\n" % (k,strout) 442 | return out 443 | 444 | class DriverAction(): 445 | '''What the driver is intending to do (i.e. send to the server). 446 | Composes something like this for the server: 447 | (accel 1)(brake 0)(gear 1)(steer 0)(clutch 0)(focus 0)(meta 0) or 448 | (accel 1)(brake 0)(gear 1)(steer 0)(clutch 0)(focus -90 -45 0 45 90)(meta 0)''' 449 | def __init__(self): 450 | self.actionstr= str() 451 | # "d" is for data dictionary. 452 | self.d= { 'accel':0.2, 453 | 'brake':0, 454 | 'clutch':0, 455 | 'gear':1, 456 | 'steer':0, 457 | 'focus':[-90,-45,0,45,90], 458 | 'meta':0 459 | } 460 | 461 | def clip_to_limits(self): 462 | """There pretty much is never a reason to send the server 463 | something like (steer 9483.323). This comes up all the time 464 | and it's probably just more sensible to always clip it than to 465 | worry about when to. The "clip" command is still a snakeoil 466 | utility function, but it should be used only for non standard 467 | things or non obvious limits (limit the steering to the left, 468 | for example). For normal limits, simply don't worry about it.""" 469 | self.d['steer']= clip(self.d['steer'], -1, 1) 470 | self.d['brake']= clip(self.d['brake'], 0, 1) 471 | self.d['accel']= clip(self.d['accel'], 0, 1) 472 | self.d['clutch']= clip(self.d['clutch'], 0, 1) 473 | if self.d['gear'] not in [-1, 0, 1, 2, 3, 4, 5, 6]: 474 | self.d['gear']= 0 475 | if self.d['meta'] not in [0,1]: 476 | self.d['meta']= 0 477 | if type(self.d['focus']) is not list or min(self.d['focus'])<-180 or max(self.d['focus'])>180: 478 | self.d['focus']= 0 479 | 480 | def __repr__(self): 481 | self.clip_to_limits() 482 | out= str() 483 | for k in self.d: 484 | out+= '('+k+' ' 485 | v= self.d[k] 486 | if not type(v) is list: 487 | out+= '%.3f' % v 488 | else: 489 | out+= ' '.join([str(x) for x in v]) 490 | out+= ')' 491 | return out 492 | return out+'\n' 493 | 494 | def fancyout(self): 495 | '''Specialty output for useful monitoring of bot's effectors.''' 496 | out= str() 497 | od= self.d.copy() 498 | od.pop('gear','') # Not interesting. 499 | od.pop('meta','') # Not interesting. 500 | od.pop('focus','') # Not interesting. Yet. 501 | for k in sorted(od): 502 | if k == 'clutch' or k == 'brake' or k == 'accel': 503 | strout='' 504 | strout= '%6.3f %s' % (od[k], bargraph(od[k],0,1,50,k[0].upper())) 505 | elif k == 'steer': # Reverse the graph to make sense. 506 | strout= '%6.3f %s' % (od[k], bargraph(od[k]*-1,-1,1,50,'S')) 507 | else: 508 | strout= str(od[k]) 509 | out+= "%s: %s\n" % (k,strout) 510 | return out 511 | 512 | # == Misc Utility Functions 513 | def destringify(s): 514 | '''makes a string into a value or a list of strings into a list of 515 | values (if possible)''' 516 | if not s: return s 517 | if type(s) is str: 518 | try: 519 | return float(s) 520 | except ValueError: 521 | print("Could not find a value in %s" % s) 522 | return s 523 | elif type(s) is list: 524 | if len(s) < 2: 525 | return destringify(s[0]) 526 | else: 527 | return [destringify(i) for i in s] 528 | 529 | def drive_example(c): 530 | '''This is only an example. It will get around the track but the 531 | correct thing to do is write your own `drive()` function.''' 532 | S,R= c.S.d,c.R.d 533 | target_speed=100 534 | 535 | # Steer To Corner 536 | R['steer']= S['angle']*10 / PI 537 | # Steer To Center 538 | R['steer']-= S['trackPos']*.10 539 | 540 | # Throttle Control 541 | if S['speedX'] < target_speed - (R['steer']*50): 542 | R['accel']+= .01 543 | else: 544 | R['accel']-= .01 545 | if S['speedX']<10: 546 | R['accel']+= 1/(S['speedX']+.1) 547 | 548 | # Traction Control System 549 | if ((S['wheelSpinVel'][2]+S['wheelSpinVel'][3]) - 550 | (S['wheelSpinVel'][0]+S['wheelSpinVel'][1]) > 5): 551 | R['accel']-= .2 552 | 553 | # Automatic Transmission 554 | R['gear']=1 555 | if S['speedX']>50: 556 | R['gear']=2 557 | if S['speedX']>80: 558 | R['gear']=3 559 | if S['speedX']>110: 560 | R['gear']=4 561 | if S['speedX']>140: 562 | R['gear']=5 563 | if S['speedX']>170: 564 | R['gear']=6 565 | return 566 | 567 | # ================ MAIN ================ 568 | if __name__ == "__main__": 569 | C= Client(p=3101) 570 | for step in range(C.maxSteps,0,-1): 571 | C.get_servers_input() 572 | drive_example(C) 573 | C.respond_to_server() 574 | C.shutdown() --------------------------------------------------------------------------------