├── .gitignore
├── README.md
├── TSP
    ├── __init__.py
    └── tsp_utils.py
├── VRP
    ├── __init__.py
    ├── vrp_attention.py
    └── vrp_utils.py
├── configs.py
├── data
    └── .data
├── main.py
├── misc_utils.py
├── model
    ├── __init__.py
    └── attention_agent.py
├── shared
    ├── __init__.py
    ├── attention.py
    ├── decode_step.py
    ├── embeddings.py
    └── misc_utils.py
└── task_specific_params.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Add any directories, files, or patterns you don't want to be tracked by version control
 2 | *.pdf
 3 | *.pyc
 4 | *.npy
 5 | *.txt
 6 | *.out
 7 | *.err
 8 | *.mat
 9 | *.jpg
10 | *.aux
11 | *.log
12 | *.blb
13 | *.gz
14 | *.xls
15 | *.synctex
16 | *.synctex.gz
17 | *.synctex.gz(busy)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Reinforcement Learning for Solving the Vehicle Routing Problem
 3 | 
 4 | We use Reinforcement for solving Travelling Salesman Problem (TSP) and Vehicle Routing Problem (VRP).
 5 | 
 6 | 
 7 | ## Paper
 8 | Implementation of our paper: [Reinforcement Learning for Solving the Vehicle Routing Problem](https://arxiv.org/abs/1802.04240v2). 
 9 | 
10 | ## Dependencies
11 | 
12 | 
13 | * Numpy
14 | * [tensorflow](https://www.tensorflow.org/)>=1.2
15 | * tqdm
16 | 
17 | ## How to Run
18 | ### Train
19 | By default, the code is running in the training mode on a single gpu. For running the code, one can use the following command:
20 | ```bash
21 | python main.py --task=vrp10
22 | ```
23 | 
24 | It is possible to add other config parameters like:
25 | ```bash
26 | python main.py --task=vrp10 --gpu=0 --n_glimpses=1 --use_tanh=False 
27 | ```
28 | There is a full list of all configs in the ``config.py`` file. Also, task specific parameters are available in ``task_specific_params.py``
29 | ### Inference
30 | For running the trained model for inference, it is possible to turn off the training mode. For this, you need to specify the directory of the trained model, otherwise random model will be used for decoding:
31 | ```bash
32 | python main.py --task=vrp10 --is_train=False --model_dir=./path_to_your_saved_checkpoint
33 | ```
34 | The default inference is run in batch mode, meaning that all testing instances are fed simultanously. It is also possible to do inference in single mode, which means that we decode instances one-by-one. The latter case is used for reporting the runtimes and it will display detailed reports. For running the inference with single mode, you can try:
35 | ```bash
36 | python main.py --task=vrp10 --is_train=False --infer_type=single --model_dir=./path_to_your_saved_checkpoint
37 | ```
38 | ### Logs
39 | All logs are stored in ``result.txt`` file stored in ``./logs/task_date_time`` directory.
40 | ## Sample CVRP solution
41 | 
42 | ![enter image description here](https://lh3.googleusercontent.com/eUh69ZQsIV4SIE6RjwasAEkdw2VZaTmaeR8Fqk33di70-BGU62fvmcp6HLeGLE61lJDS7jLMpFf2 "Sample VRP")
43 | 
44 | ## Acknowledgements
45 | Thanks to [pemami4911/neural-combinatorial-rl-pytorch](https://github.com/pemami4911/neural-combinatorial-rl-pytorch) for getting the idea of restructuring the code.


--------------------------------------------------------------------------------
/TSP/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OptMLGroup/VRP-RL/b794fb1e4c4bb70a62cfa54504ee7a247adbc2a0/TSP/__init__.py


--------------------------------------------------------------------------------
/TSP/tsp_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import os
  4 | import warnings
  5 | import collections
  6 | 
  7 | def create_TSP_dataset(
  8 |         n_problems,
  9 |         n_nodes,
 10 |         data_dir,
 11 |         seed=None,
 12 |         data_type='train'):
 13 |     '''
 14 |     This function creates TSP instances and saves them on disk. If a file is already available,
 15 |     it will load the file.
 16 |     Input:
 17 |         n_problems: number of problems to generate.
 18 |         n_nodes: number of nodes in the problem.
 19 |         data_dir: the directory to save or load the file.
 20 |         seed: random seed for generating the data.
 21 |         data_type: the purpose for generating the data. It can be 'train', 'val', or any string.
 22 |     output:
 23 |         data: a numpy array with shape [n_problems x n_nodes x 2]
 24 |      '''
 25 | 
 26 |     # set random number generator
 27 |     if seed == None:
 28 |         rnd = np.random
 29 |     else:
 30 |         rnd = np.random.RandomState(seed)
 31 |     
 32 |     # build task name and datafiles
 33 |     task_name = 'tsp-size-{}-len-{}-{}.txt'.format(n_problems, n_nodes,data_type)
 34 |     fname = os.path.join(data_dir, task_name)
 35 | 
 36 |     # cteate/load data
 37 |     if os.path.exists(fname):
 38 |         print('Loading dataset for {}...'.format(task_name))
 39 |         data = np.loadtxt(fname)
 40 |         data = data.reshape(-1, n_nodes,2)
 41 |     else:
 42 |         print('Creating dataset for {}...'.format(task_name))
 43 |         # Generate a training set of size n_problems 
 44 |         data= rnd.uniform(0,1,size=(n_problems,n_nodes,2))
 45 |         np.savetxt(fname, data.reshape(-1, n_nodes*2))                         
 46 | 
 47 |     return data
 48 | 
 49 | class DataGenerator(object):
 50 |     def __init__(self, 
 51 |                  args):
 52 | 
 53 |         '''
 54 |         This class generates TSP problems for training and test
 55 |         Inputs:
 56 |             args: the parameter dictionary. It should include:
 57 |                 args['random_seed']: random seed
 58 |                 args['test_size']: number of problems to test
 59 |                 args['n_nodes': number of nodes
 60 |                 args['batch_size']: batchsize for training
 61 | 
 62 |         '''
 63 |         self.args = args
 64 |         self.rnd = np.random.RandomState(seed= args['random_seed'])
 65 |         print('Created train iterator.')
 66 | 
 67 |         # create test data
 68 |         self.n_problems = args['test_size']
 69 |         self.test_data = create_TSP_dataset(self.n_problems,args['n_nodes'],'./data',
 70 |             seed = args['random_seed']+1,data_type='test')
 71 | 
 72 |         self.reset()
 73 | 
 74 |     def reset(self):
 75 |         self.count = 0
 76 | 
 77 |     def get_train_next(self):
 78 |         '''
 79 |         Get next batch of problems for training
 80 |         '''
 81 |         input_data = self.rnd.uniform(0,1,
 82 |             size=[self.args['batch_size'],self.args['n_nodes'],2])
 83 | 
 84 |         return input_data
 85 |  
 86 |     def get_test_next(self):
 87 |         '''
 88 |         Get next batch of problems for testing
 89 |         '''
 90 |         if self.count<self.args['test_size']:
 91 |             input_data = self.test_data[self.count:self.count+1]
 92 |             self.count +=1
 93 |         else:
 94 |             warnings.warn("The test iterator reset.") 
 95 |             self.count = 0
 96 |             input_data = self.test_data[self.count:self.count+1]
 97 |             self.count +=1
 98 | 
 99 |         return input_data
100 | 
101 |     def get_test_all(self):
102 |         '''
103 |         Get all test problems
104 |         '''
105 |         return self.test_data
106 | 
107 | class State(collections.namedtuple("State",
108 |                                         ("mask"))):
109 |     pass
110 | class Env(object):
111 |     def __init__(self, 
112 |                  args):
113 |         '''
114 |         This is the environment for TSP.
115 |         Inputs: 
116 |             args: the parameter dictionary. It should include:
117 |                 args['n_nodes']: number of nodes in TSP
118 |                 args['input_dim']: dimension of the problem which is 2
119 |         '''
120 | 
121 |         self.n_nodes = args['n_nodes']
122 |         self.input_dim = args['input_dim']
123 |         self.input_data = tf.placeholder(tf.float32,\
124 |             shape=[None,self.n_nodes,args['input_dim']])
125 |         self.input_pnt = self.input_data
126 |         self.batch_size = tf.shape(self.input_data)[0] 
127 | 
128 |     def reset(self,beam_width=1):
129 |         '''
130 |         Resets the environment. This environment might be used with different decoders. 
131 |         In case of using with beam-search decoder, we need to have to increase the rows of 
132 |         the mask by a factor of beam_width.
133 |         '''
134 |         self.beam_width = beam_width
135 |         
136 |         self.input_pnt = self.input_data
137 |         self.mask = tf.zeros([self.batch_size*beam_width,self.n_nodes],dtype=tf.float32)
138 | 
139 |         state = State(mask = self.mask )
140 | 
141 |         return state
142 | 
143 |     def step(self,
144 |              idx,
145 |              beam_parent=None):
146 |         '''
147 |         Mask the nodes that can be visited in next steps.
148 |         '''
149 |         # if the environment is used in beam search decoder
150 |         if beam_parent is not None:
151 |             # BatchBeamSeq: [batch_size*beam_width x 1]
152 |             # [0,1,2,3,...,127,0,1,...],
153 |             batchBeamSeq = tf.expand_dims(tf.tile(tf.cast(tf.range(self.batch_size), tf.int64),
154 |                                                  [self.beam_width]),1)
155 |             # batchedBeamIdx:[batch_size*beam_width]
156 |             batchedBeamIdx= batchBeamSeq + tf.cast(self.batch_size,tf.int64)*beam_parent
157 |  
158 |             #MASK:[batch_size*beam_width x sourceL]
159 |             self.mask = tf.gather_nd(self.mask,batchedBeamIdx)
160 | 
161 |         self.mask = self.mask + tf.one_hot(tf.squeeze(idx,1),self.n_nodes)
162 | 
163 |         state = State(mask = self.mask )
164 | 
165 |         return state
166 | 
167 | 
168 | def reward_func(sample_solution=None):
169 |     """The reward for the TSP task is defined as the 
170 |     negative value of the route length. This function gets the decoded
171 |     actions and computed the reward.
172 | 
173 |     Args:
174 |         sample_solution : a list of tensors with len decode_len 
175 |             each having a shape [batch_size x input_dim]
176 | 
177 |     Returns:
178 |         rewards: tensor of size [batch_size]
179 | 
180 |     Example:
181 |         sample_solution = [[[1,1],[2,2]],[[3,3],[4,4]],[[5,5],[6,6]]]
182 |         decode_len = 3
183 |         batch_size = 2
184 |         input_dim = 2
185 |         sample_solution_tilted[ [[5,5]
186 |                                                     #  [6,6]]
187 |                                                     # [[1,1]
188 |                                                     #  [2,2]]
189 |                                                     # [[3,3]
190 |                                                     #  [4,4]] ]
191 |     """
192 | 
193 |     # make sample_solution of shape [sourceL x batch_size x input_dim]
194 |     sample_solution = tf.stack(sample_solution,0)
195 | 
196 |     sample_solution_tilted = tf.concat((tf.expand_dims(sample_solution[-1],0),
197 |          sample_solution[:-1]),0)
198 |     # get the reward based on the route lengths
199 | 
200 | 
201 |     route_lens_decoded = tf.reduce_sum(tf.pow(tf.reduce_sum(tf.pow(\
202 |         (sample_solution_tilted - sample_solution) ,2), 2) , .5), 0)
203 |     return route_lens_decoded 


--------------------------------------------------------------------------------
/VRP/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OptMLGroup/VRP-RL/b794fb1e4c4bb70a62cfa54504ee7a247adbc2a0/VRP/__init__.py


--------------------------------------------------------------------------------
/VRP/vrp_attention.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | class AttentionVRPActor(object):
  4 |     """A generic attention module for the attention in vrp model"""
  5 |     def __init__(self, dim, use_tanh=False, C=10,_name='Attention',_scope=''):
  6 |         self.use_tanh = use_tanh
  7 |         self._scope = _scope
  8 | 
  9 |         with tf.variable_scope(_scope+_name):
 10 |             # self.v: is a variable with shape [1 x dim]
 11 |             self.v = tf.get_variable('v',[1,dim],
 12 |                        initializer=tf.contrib.layers.xavier_initializer())
 13 |             self.v = tf.expand_dims(self.v,2)
 14 |             
 15 |         self.emb_d = tf.layers.Conv1D(dim,1,_scope=_scope+_name+'/emb_d' ) #conv1d
 16 |         self.emb_ld = tf.layers.Conv1D(dim,1,_scope=_scope+_name+'/emb_ld' ) #conv1d_2
 17 | 
 18 |         self.project_d = tf.layers.Conv1D(dim,1,_scope=_scope+_name+'/proj_d' ) #conv1d_1
 19 |         self.project_ld = tf.layers.Conv1D(dim,1,_scope=_scope+_name+'/proj_ld' ) #conv1d_3
 20 |         self.project_query = tf.layers.Dense(dim,_scope=_scope+_name+'/proj_q' ) #
 21 |         self.project_ref = tf.layers.Conv1D(dim,1,_scope=_scope+_name+'/proj_ref' ) #conv1d_4
 22 | 
 23 | 
 24 |         self.C = C  # tanh exploration parameter
 25 |         self.tanh = tf.nn.tanh
 26 | 
 27 |     def __call__(self, query, ref, env):
 28 |         """
 29 |         This function gets a query tensor and ref rensor and returns the logit op.
 30 |         Args: 
 31 |             query: is the hidden state of the decoder at the current
 32 |                 time step. [batch_size x dim]
 33 |             ref: the set of hidden states from the encoder. 
 34 |                 [batch_size x max_time x dim]
 35 | 
 36 |         Returns:
 37 |             e: convolved ref with shape [batch_size x max_time x dim]
 38 |             logits: [batch_size x max_time]
 39 |         """
 40 |         # get the current demand and load values from environment
 41 |         demand = env.demand
 42 |         load = env.load
 43 |         max_time = tf.shape(demand)[1]
 44 | 
 45 |         # embed demand and project it
 46 |         # emb_d:[batch_size x max_time x dim ]
 47 |         emb_d = self.emb_d(tf.expand_dims(demand,2))
 48 |         # d:[batch_size x max_time x dim ]
 49 |         d = self.project_d(emb_d)
 50 | 
 51 |         # embed load - demand
 52 |         # emb_ld:[batch_size*beam_width x max_time x hidden_dim]
 53 |         emb_ld = self.emb_ld(tf.expand_dims(tf.tile(tf.expand_dims(load,1),[1,max_time])-
 54 |                                               demand,2))
 55 |         # ld:[batch_size*beam_width x hidden_dim x max_time ] 
 56 |         ld = self.project_ld(emb_ld)
 57 | 
 58 |         # expanded_q,e: [batch_size x max_time x dim]
 59 |         e = self.project_ref(ref)
 60 |         q = self.project_query(query) #[batch_size x dim]
 61 |         expanded_q = tf.tile(tf.expand_dims(q,1),[1,max_time,1])
 62 | 
 63 |         # v_view:[batch_size x dim x 1]
 64 |         v_view = tf.tile( self.v, [tf.shape(e)[0],1,1]) 
 65 |         
 66 |         # u : [batch_size x max_time x dim] * [batch_size x dim x 1] = 
 67 |         #       [batch_size x max_time]
 68 |         u = tf.squeeze(tf.matmul(self.tanh(expanded_q + e + d + ld), v_view),2)
 69 | 
 70 |         if self.use_tanh:
 71 |             logits = self.C * self.tanh(u)
 72 |         else:
 73 |             logits = u  
 74 | 
 75 |         return e, logits
 76 | 
 77 | 
 78 | class AttentionVRPCritic(object):
 79 |     """A generic attention module for the attention in vrp model"""
 80 |     def __init__(self, dim, use_tanh=False, C=10,_name='Attention',_scope=''):
 81 | 
 82 |         self.use_tanh = use_tanh
 83 |         self._scope = _scope
 84 | 
 85 |         with tf.variable_scope(_scope+_name):
 86 |             # self.v: is a variable with shape [1 x dim]
 87 |             self.v = tf.get_variable('v',[1,dim],
 88 |                        initializer=tf.contrib.layers.xavier_initializer())
 89 |             self.v = tf.expand_dims(self.v,2)
 90 |             
 91 |         self.emb_d = tf.layers.Conv1D(dim,1,_scope=_scope+_name +'/emb_d') #conv1d
 92 |         self.project_d = tf.layers.Conv1D(dim,1,_scope=_scope+_name +'/proj_d') #conv1d_1
 93 |         
 94 |         self.project_query = tf.layers.Dense(dim,_scope=_scope+_name +'/proj_q') #
 95 |         self.project_ref = tf.layers.Conv1D(dim,1,_scope=_scope+_name +'/proj_e') #conv1d_2
 96 | 
 97 |         self.C = C  # tanh exploration parameter
 98 |         self.tanh = tf.nn.tanh
 99 |         
100 |     def __call__(self, query, ref, env):
101 |         """
102 |         This function gets a query tensor and ref rensor and returns the logit op.
103 |         Args: 
104 |             query: is the hidden state of the decoder at the current
105 |                 time step. [batch_size x dim]
106 |             ref: the set of hidden states from the encoder. 
107 |                 [batch_size x max_time x dim]
108 | 
109 |             env: keeps demand ond load values and help decoding. Also it includes mask.
110 |                 env.mask: a matrix used for masking the logits and glimpses. It is with shape
111 |                          [batch_size x max_time]. Zeros in this matrix means not-masked nodes. Any 
112 |                          positive number in this mask means that the node cannot be selected as next 
113 |                          decision point.
114 |                 env.demands: a list of demands which changes over time.
115 | 
116 |         Returns:
117 |             e: convolved ref with shape [batch_size x max_time x dim]
118 |             logits: [batch_size x max_time]
119 |         """
120 |         # we need the first demand value for the critic
121 |         demand = env.input_data[:,:,-1]
122 |         max_time = tf.shape(demand)[1]
123 | 
124 |         # embed demand and project it
125 |         # emb_d:[batch_size x max_time x dim ]
126 |         emb_d = self.emb_d(tf.expand_dims(demand,2))
127 |         # d:[batch_size x max_time x dim ]
128 |         d = self.project_d(emb_d)
129 | 
130 | 
131 |         # expanded_q,e: [batch_size x max_time x dim]
132 |         e = self.project_ref(ref)
133 |         q = self.project_query(query) #[batch_size x dim]
134 |         expanded_q = tf.tile(tf.expand_dims(q,1),[1,max_time,1])
135 | 
136 |         # v_view:[batch_size x dim x 1]
137 |         v_view = tf.tile( self.v, [tf.shape(e)[0],1,1]) 
138 |         
139 |         # u : [batch_size x max_time x dim] * [batch_size x dim x 1] = 
140 |         #       [batch_size x max_time]
141 |         u = tf.squeeze(tf.matmul(self.tanh(expanded_q + e + d), v_view),2)
142 | 
143 |         if self.use_tanh:
144 |             logits = self.C * self.tanh(u)
145 |         else:
146 |             logits = u  
147 | 
148 |         return e, logits


--------------------------------------------------------------------------------
/VRP/vrp_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import os
  4 | import warnings
  5 | import collections
  6 | 
  7 | 
  8 | def create_VRP_dataset(
  9 |         n_problems,
 10 |         n_cust,
 11 |         data_dir,
 12 |         seed=None,
 13 |         data_type='train'):
 14 |     '''
 15 |     This function creates VRP instances and saves them on disk. If a file is already available,
 16 |     it will load the file.
 17 |     Input:
 18 |         n_problems: number of problems to generate.
 19 |         n_cust: number of customers in the problem.
 20 |         data_dir: the directory to save or load the file.
 21 |         seed: random seed for generating the data.
 22 |         data_type: the purpose for generating the data. It can be 'train', 'val', or any string.
 23 |     output:
 24 |         data: a numpy array with shape [n_problems x (n_cust+1) x 3]
 25 |         in the last dimension, we have x,y,demand for customers. The last node is for depot and 
 26 |         it has demand 0.
 27 |      '''
 28 | 
 29 |     # set random number generator
 30 |     n_nodes = n_cust +1
 31 |     if seed == None:
 32 |         rnd = np.random
 33 |     else:
 34 |         rnd = np.random.RandomState(seed)
 35 |     
 36 |     # build task name and datafiles
 37 |     task_name = 'vrp-size-{}-len-{}-{}.txt'.format(n_problems, n_nodes,data_type)
 38 |     fname = os.path.join(data_dir, task_name)
 39 | 
 40 |     # cteate/load data
 41 |     if os.path.exists(fname):
 42 |         print('Loading dataset for {}...'.format(task_name))
 43 |         data = np.loadtxt(fname,delimiter=' ')
 44 |         data = data.reshape(-1, n_nodes,3)
 45 |     else:
 46 |         print('Creating dataset for {}...'.format(task_name))
 47 |         # Generate a training set of size n_problems 
 48 |         x = rnd.uniform(0,1,size=(n_problems,n_nodes,2))
 49 |         d = rnd.randint(1,10,[n_problems,n_nodes,1])
 50 |         d[:,-1]=0 # demand of depot
 51 |         data = np.concatenate([x,d],2)
 52 |         np.savetxt(fname, data.reshape(-1, n_nodes*3))
 53 | 
 54 |     return data
 55 | 
 56 | class DataGenerator(object):
 57 |     def __init__(self, 
 58 |                  args):
 59 | 
 60 |         '''
 61 |         This class generates VRP problems for training and test
 62 |         Inputs:
 63 |             args: the parameter dictionary. It should include:
 64 |                 args['random_seed']: random seed
 65 |                 args['test_size']: number of problems to test
 66 |                 args['n_nodes']: number of nodes
 67 |                 args['n_cust']: number of customers
 68 |                 args['batch_size']: batchsize for training
 69 | 
 70 |         '''
 71 |         self.args = args
 72 |         self.rnd = np.random.RandomState(seed= args['random_seed'])
 73 |         print('Created train iterator.')
 74 | 
 75 |         # create test data
 76 |         self.n_problems = args['test_size']
 77 |         self.test_data = create_VRP_dataset(self.n_problems,args['n_cust'],'./data',
 78 |             seed = args['random_seed']+1,data_type='test')
 79 | 
 80 |         self.reset()
 81 | 
 82 |     def reset(self):
 83 |         self.count = 0
 84 | 
 85 |     def get_train_next(self):
 86 |         '''
 87 |         Get next batch of problems for training
 88 |         Retuens:
 89 |             input_data: data with shape [batch_size x max_time x 3]
 90 |         '''
 91 | 
 92 |         input_pnt = self.rnd.uniform(0,1,
 93 |             size=(self.args['batch_size'],self.args['n_nodes'],2))
 94 | 
 95 |         demand = self.rnd.randint(1,10,[self.args['batch_size'],self.args['n_nodes']])
 96 |         demand[:,-1]=0 # demand of depot
 97 | 
 98 |         input_data = np.concatenate([input_pnt,np.expand_dims(demand,2)],2)
 99 | 
100 |         return input_data
101 | 
102 |  
103 |     def get_test_next(self):
104 |         '''
105 |         Get next batch of problems for testing
106 |         '''
107 |         if self.count<self.args['test_size']:
108 |             input_pnt = self.test_data[self.count:self.count+1]
109 |             self.count +=1
110 |         else:
111 |             warnings.warn("The test iterator reset.") 
112 |             self.count = 0
113 |             input_pnt = self.test_data[self.count:self.count+1]
114 |             self.count +=1
115 | 
116 |         return input_pnt
117 | 
118 |     def get_test_all(self):
119 |         '''
120 |         Get all test problems
121 |         '''
122 |         return self.test_data
123 |     
124 | 
125 | class State(collections.namedtuple("State",
126 |                                         ("load",
127 |                                          "demand",
128 |                                          'd_sat',
129 |                                          "mask"))):
130 |     pass
131 |     
132 | class Env(object):
133 |     def __init__(self,
134 |                  args):
135 |         '''
136 |         This is the environment for VRP.
137 |         Inputs: 
138 |             args: the parameter dictionary. It should include:
139 |                 args['n_nodes']: number of nodes in VRP
140 |                 args['n_custs']: number of customers in VRP
141 |                 args['input_dim']: dimension of the problem which is 2
142 |         '''
143 |         self.capacity = args['capacity']
144 |         self.n_nodes = args['n_nodes']
145 |         self.n_cust = args['n_cust']
146 |         self.input_dim = args['input_dim']
147 |         self.input_data = tf.placeholder(tf.float32,\
148 |             shape=[None,self.n_nodes,self.input_dim])
149 | 
150 |         self.input_pnt = self.input_data[:,:,:2]
151 |         self.demand = self.input_data[:,:,-1]
152 |         self.batch_size = tf.shape(self.input_pnt)[0] 
153 |         
154 |     def reset(self,beam_width=1):
155 |         '''
156 |         Resets the environment. This environment might be used with different decoders. 
157 |         In case of using with beam-search decoder, we need to have to increase
158 |         the rows of the mask by a factor of beam_width.
159 |         '''
160 | 
161 |         # dimensions
162 |         self.beam_width = beam_width
163 |         self.batch_beam = self.batch_size * beam_width
164 | 
165 |         self.input_pnt = self.input_data[:,:,:2]
166 |         self.demand = self.input_data[:,:,-1]
167 | 
168 |         # modify the self.input_pnt and self.demand for beam search decoder
169 | #         self.input_pnt = tf.tile(self.input_pnt, [self.beam_width,1,1])
170 | 
171 |         # demand: [batch_size * beam_width, max_time]
172 |         # demand[i] = demand[i+batchsize]
173 |         self.demand = tf.tile(self.demand, [self.beam_width,1])
174 | 
175 |         # load: [batch_size * beam_width]
176 |         self.load = tf.ones([self.batch_beam])*self.capacity
177 | 
178 |         # create mask
179 |         self.mask = tf.zeros([self.batch_size*beam_width,self.n_nodes],
180 |                 dtype=tf.float32)
181 | 
182 |         # update mask -- mask if customer demand is 0 and depot
183 |         self.mask = tf.concat([tf.cast(tf.equal(self.demand,0), tf.float32)[:,:-1],
184 |             tf.ones([self.batch_beam,1])],1)
185 | 
186 |         state = State(load=self.load,
187 |                     demand = self.demand,
188 |                     d_sat = tf.zeros([self.batch_beam,self.n_nodes]),
189 |                     mask = self.mask )
190 | 
191 |         return state
192 | 
193 |     def step(self,
194 |              idx,
195 |              beam_parent=None):
196 |         '''
197 |         runs one step of the environment and updates demands, loads and masks
198 |         '''
199 | 
200 |         # if the environment is used in beam search decoder
201 |         if beam_parent is not None:
202 |             # BatchBeamSeq: [batch_size*beam_width x 1]
203 |             # [0,1,2,3,...,127,0,1,...],
204 |             batchBeamSeq = tf.expand_dims(tf.tile(tf.cast(tf.range(self.batch_size), tf.int64),
205 |                                                  [self.beam_width]),1)
206 |             # batchedBeamIdx:[batch_size*beam_width]
207 |             batchedBeamIdx= batchBeamSeq + tf.cast(self.batch_size,tf.int64)*beam_parent
208 |             # demand:[batch_size*beam_width x sourceL]
209 |             self.demand= tf.gather_nd(self.demand,batchedBeamIdx)
210 |             #load:[batch_size*beam_width]
211 |             self.load = tf.gather_nd(self.load,batchedBeamIdx)
212 |             #MASK:[batch_size*beam_width x sourceL]
213 |             self.mask = tf.gather_nd(self.mask,batchedBeamIdx)
214 | 
215 | 
216 |         BatchSequence = tf.expand_dims(tf.cast(tf.range(self.batch_beam), tf.int64), 1)
217 |         batched_idx = tf.concat([BatchSequence,idx],1)
218 | 
219 |         # how much the demand is satisfied
220 |         d_sat = tf.minimum(tf.gather_nd(self.demand,batched_idx), self.load)
221 | 
222 |         # update the demand
223 |         d_scatter = tf.scatter_nd(batched_idx, d_sat, tf.cast(tf.shape(self.demand),tf.int64))
224 |         self.demand = tf.subtract(self.demand, d_scatter)
225 | 
226 |         # update load
227 |         self.load -= d_sat
228 | 
229 |         # refill the truck -- idx: [10,9,10] -> load_flag: [1 0 1]
230 |         load_flag = tf.squeeze(tf.cast(tf.equal(idx,self.n_cust),tf.float32),1)
231 |         self.load = tf.multiply(self.load,1-load_flag) + load_flag *self.capacity
232 | 
233 |         # mask for customers with zero demand
234 |         self.mask = tf.concat([tf.cast(tf.equal(self.demand,0), tf.float32)[:,:-1],
235 |                                           tf.zeros([self.batch_beam,1])],1)
236 | 
237 |         # mask if load= 0 
238 |         # mask if in depot and there is still a demand
239 | 
240 |         self.mask += tf.concat( [tf.tile(tf.expand_dims(tf.cast(tf.equal(self.load,0),
241 |             tf.float32),1), [1,self.n_cust]),                      
242 |             tf.expand_dims(tf.multiply(tf.cast(tf.greater(tf.reduce_sum(self.demand,1),0),tf.float32),
243 |                              tf.squeeze( tf.cast(tf.equal(idx,self.n_cust),tf.float32))),1)],1)
244 | 
245 |         state = State(load=self.load,
246 |                     demand = self.demand,
247 |                     d_sat = d_sat,
248 |                     mask = self.mask )
249 | 
250 |         return state
251 | 
252 | def reward_func(sample_solution):
253 |     """The reward for the VRP task is defined as the 
254 |     negative value of the route length
255 | 
256 |     Args:
257 |         sample_solution : a list tensor of size decode_len of shape [batch_size x input_dim]
258 |         demands satisfied: a list tensor of size decode_len of shape [batch_size]
259 | 
260 |     Returns:
261 |         rewards: tensor of size [batch_size]
262 | 
263 |     Example:
264 |         sample_solution = [[[1,1],[2,2]],[[3,3],[4,4]],[[5,5],[6,6]]]
265 |         sourceL = 3
266 |         batch_size = 2
267 |         input_dim = 2
268 |         sample_solution_tilted[ [[5,5]
269 |                                                     #  [6,6]]
270 |                                                     # [[1,1]
271 |                                                     #  [2,2]]
272 |                                                     # [[3,3]
273 |                                                     #  [4,4]] ]
274 |     """
275 |     # make init_solution of shape [sourceL x batch_size x input_dim]
276 | 
277 | 
278 |     # make sample_solution of shape [sourceL x batch_size x input_dim]
279 |     sample_solution = tf.stack(sample_solution,0)
280 | 
281 |     sample_solution_tilted = tf.concat((tf.expand_dims(sample_solution[-1],0),
282 |          sample_solution[:-1]),0)
283 |     # get the reward based on the route lengths
284 | 
285 | 
286 |     route_lens_decoded = tf.reduce_sum(tf.pow(tf.reduce_sum(tf.pow(\
287 |         (sample_solution_tilted - sample_solution) ,2), 2) , .5), 0)
288 |     return route_lens_decoded 
289 | 
290 | 


--------------------------------------------------------------------------------
/configs.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import shared.misc_utils as utils 
  3 | import os
  4 | from task_specific_params import task_lst
  5 | 
  6 | def str2bool(v):
  7 |     return v.lower() in ('true', '1')
  8 | 
  9 | def initialize_task_settings(args,task):
 10 | 
 11 |     try:
 12 |         task_params = task_lst[task]
 13 |     except:
 14 |         raise Exception('Task is not implemented.') 
 15 | 
 16 |     for name, value in task_params._asdict().items():
 17 |     	args[name] = value
 18 | 
 19 | 
 20 |     # args['task_name'] = task_params.task_name
 21 |     # args['input_dim'] = task_params.input_dim
 22 |     # args['n_nodes'] = task_params.n_nodes
 23 |     # if args['decode_len'] == None:
 24 |     #     args['decode_len'] = task_params.decode_len
 25 | 
 26 |     return args
 27 | 
 28 | def ParseParams():
 29 |     parser = argparse.ArgumentParser(description="Neural Combinatorial Optimization with RL")
 30 | 
 31 |     # Data
 32 |     parser.add_argument('--task', default='vrp10', help="Select the task to solve; i.e. tsp10")
 33 |     parser.add_argument('--batch_size', default=128,type=int, help='Batch size in training')
 34 |     parser.add_argument('--n_train', default=260000,type=int, help='Number of training steps')
 35 |     parser.add_argument('--test_size', default=1000,type=int, help='Number of problems in test set')
 36 | 
 37 |     # Network
 38 |     parser.add_argument('--agent_type', default='attention', help="attention|pointer")
 39 |     parser.add_argument('--forget_bias', default=1.0,type=float, help="Forget bias for BasicLSTMCell.")
 40 |     parser.add_argument('--embedding_dim', default=128,type=int, help='Dimension of input embedding')
 41 |     parser.add_argument('--hidden_dim', default=128,type=int, help='Dimension of hidden layers in Enc/Dec')
 42 |     parser.add_argument('--n_process_blocks', default=3,type=int,                     
 43 |                         help='Number of process block iters to run in the Critic network')
 44 |     parser.add_argument('--rnn_layers', default=1, type=int, help='Number of LSTM layers in the encoder and decoder')
 45 |     parser.add_argument('--decode_len', default=None,type=int,                     
 46 |                         help='Number of time steps the decoder runs before stopping')
 47 |     parser.add_argument('--n_glimpses', default=0, type=int, help='Number of glimpses to use in the attention')
 48 |     parser.add_argument('--tanh_exploration', default=10.,  type=float,                   
 49 |              help='Hyperparam controlling exploration in the net by scaling the tanh in the softmax')
 50 |     parser.add_argument('--use_tanh', type=str2bool, default=False, help='')
 51 |     parser.add_argument('--mask_glimpses', type=str2bool, default=True, help='')
 52 |     parser.add_argument('--mask_pointer', type=str2bool, default=True, help='')
 53 |     parser.add_argument('--dropout', default=0.1, type=float, help='The dropout prob')
 54 | 
 55 |     # Training
 56 |     parser.add_argument('--is_train', default=True,type=str2bool, help="whether to do the training or not")
 57 |     parser.add_argument('--actor_net_lr', default=1e-4,type=float, help="Set the learning rate for the actor network")
 58 |     parser.add_argument('--critic_net_lr', default=1e-4,type=float, help="Set the learning rate for the critic network")
 59 |     parser.add_argument('--random_seed', default=24601,type=int, help='')
 60 |     parser.add_argument('--max_grad_norm', default=2.0, type=float, help='Gradient clipping')
 61 |     parser.add_argument('--entropy_coeff', default=0.0, type=float, help='coefficient for entropy regularization')
 62 |     # parser.add_argument('--loss_type', type=int, default=1, help='1,2,3')
 63 | 
 64 |     # inference
 65 |     parser.add_argument('--infer_type', default='batch', 
 66 |         help='single|batch: do inference for the problems one-by-one, or run it all at once')
 67 |     parser.add_argument('--beam_width', default=10, type=int, help='')
 68 | 
 69 |     # Misc
 70 |     parser.add_argument('--stdout_print', default=True, type=str2bool, help='print control')
 71 |     parser.add_argument("--gpu", default='3', type=str,help="gpu number.")
 72 |     parser.add_argument('--log_interval', default=200,type=int, help='Log info every log_step steps')
 73 |     parser.add_argument('--test_interval', default=200,type=int, help='test every test_interval steps')
 74 |     parser.add_argument('--save_interval', default=10000,type=int, help='save every save_interval steps')
 75 |     parser.add_argument('--log_dir', type=str, default='logs')
 76 |     parser.add_argument('--data_dir', type=str, default='data')
 77 |     parser.add_argument('--model_dir', type=str, default='')
 78 |     parser.add_argument('--load_path', type=str, default='', help='Path to load trained variables')
 79 |     parser.add_argument('--disable_tqdm', default=True, type=str2bool)
 80 |                         
 81 |     args, unknown = parser.parse_known_args()
 82 |     args = vars(args)
 83 | 
 84 |     args['log_dir'] = "{}/{}-{}".format(args['log_dir'],args['task'], utils.get_time())
 85 |     if args['model_dir'] =='':
 86 |         args['model_dir'] = os.path.join(args['log_dir'],'model')
 87 | 
 88 |     # file to write the stdout
 89 |     try:
 90 |         os.makedirs(args['log_dir'])
 91 |         os.makedirs(args['model_dir'])
 92 |     except:
 93 |         pass
 94 | 
 95 |     # create a print handler
 96 |     out_file = open(os.path.join(args['log_dir'], 'results.txt'),'w+') 
 97 |     prt = utils.printOut(out_file,args['stdout_print'])
 98 | 
 99 |     os.environ["CUDA_VISIBLE_DEVICES"]=  args['gpu'] 
100 | 
101 |     args = initialize_task_settings(args,args['task'])
102 | 
103 |     # print the run args
104 |     for key, value in sorted(args.items()):
105 |         prt.print_out("{}: {}".format(key,value))
106 | 
107 |     return args, prt
108 | 
109 | 


--------------------------------------------------------------------------------
/data/.data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OptMLGroup/VRP-RL/b794fb1e4c4bb70a62cfa54504ee7a247adbc2a0/data/.data


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import numpy as np
  4 | from tqdm import tqdm 
  5 | import tensorflow as tf
  6 | import time
  7 | 
  8 | from configs import ParseParams
  9 | 
 10 | from shared.decode_step import RNNDecodeStep
 11 | from model.attention_agent import RLAgent
 12 | 
 13 | def load_task_specific_components(task):
 14 |     '''
 15 |     This function load task-specific libraries
 16 |     '''
 17 |     if task == 'tsp':
 18 |         from TSP.tsp_utils import DataGenerator, Env ,reward_func
 19 |         from shared.attention import Attention
 20 | 
 21 |         AttentionActor = Attention
 22 |         AttentionCritic = Attention
 23 | 
 24 | 
 25 |     elif task == 'vrp':
 26 |         from VRP.vrp_utils import DataGenerator,Env,reward_func
 27 |         from VRP.vrp_attention import AttentionVRPActor,AttentionVRPCritic
 28 | 
 29 |         AttentionActor = AttentionVRPActor
 30 |         AttentionCritic = AttentionVRPCritic
 31 | 
 32 |     else:
 33 |         raise Exception('Task is not implemented')
 34 | 
 35 | 
 36 |     return DataGenerator, Env, reward_func, AttentionActor, AttentionCritic
 37 | 
 38 | def main(args, prt):
 39 |     config = tf.ConfigProto()
 40 |     config.gpu_options.allow_growth = True
 41 |     sess = tf.Session(config=config)
 42 | 
 43 |     # load task specific classes
 44 |     DataGenerator, Env, reward_func, AttentionActor, AttentionCritic = \
 45 |         load_task_specific_components(args['task_name'])
 46 | 
 47 |     dataGen = DataGenerator(args)
 48 |     dataGen.reset()
 49 |     env = Env(args)
 50 |     # create an RL agent
 51 |     agent = RLAgent(args,
 52 |                     prt,
 53 |                     env,
 54 |                     dataGen,
 55 |                     reward_func,
 56 |                     AttentionActor,
 57 |                     AttentionCritic,
 58 |                     is_train=args['is_train'])
 59 |     agent.Initialize(sess)
 60 | 
 61 |     # train or evaluate
 62 |     start_time = time.time()
 63 |     if args['is_train']:
 64 |         prt.print_out('Training started ...')
 65 |         train_time_beg = time.time()
 66 |         for step in range(args['n_train']):
 67 |             summary = agent.run_train_step()
 68 |             _, _ , actor_loss_val, critic_loss_val, actor_gra_and_var_val, critic_gra_and_var_val,\
 69 |                 R_val, v_val, logprobs_val,probs_val, actions_val, idxs_val= summary
 70 | 
 71 |             if step%args['save_interval'] == 0:
 72 |                 agent.saver.save(sess,args['model_dir']+'/model.ckpt', global_step=step)
 73 | 
 74 |             if step%args['log_interval'] == 0:
 75 |                 train_time_end = time.time()-train_time_beg
 76 |                 prt.print_out('Train Step: {} -- Time: {} -- Train reward: {} -- Value: {}'\
 77 |                       .format(step,time.strftime("%H:%M:%S", time.gmtime(\
 78 |                         train_time_end)),np.mean(R_val),np.mean(v_val)))
 79 |                 prt.print_out('    actor loss: {} -- critic loss: {}'\
 80 |                       .format(np.mean(actor_loss_val),np.mean(critic_loss_val)))
 81 |                 train_time_beg = time.time()
 82 |             if step%args['test_interval'] == 0:
 83 |                 agent.inference(args['infer_type'])
 84 | 
 85 |     else: # inference
 86 |         prt.print_out('Evaluation started ...')
 87 |         agent.inference(args['infer_type'])
 88 | 
 89 | 
 90 |     prt.print_out('Total time is {}'.format(\
 91 |         time.strftime("%H:%M:%S", time.gmtime(time.time()-start_time))))
 92 | 
 93 | if __name__ == "__main__":
 94 |     args, prt = ParseParams()
 95 |     # Random
 96 |     random_seed = args['random_seed']
 97 |     if random_seed is not None and random_seed > 0:
 98 |         prt.print_out("# Set random seed to %d" % random_seed)
 99 |         np.random.seed(random_seed)
100 |         tf.set_random_seed(random_seed)
101 |     tf.reset_default_graph()
102 | 
103 |     main(args, prt)
104 | 


--------------------------------------------------------------------------------
/misc_utils.py:
--------------------------------------------------------------------------------
  1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
  2 | from __future__ import print_function
  3 | 
  4 | import json
  5 | import math
  6 | import os
  7 | import sys
  8 | import time
  9 | from datetime import datetime
 10 | import numpy as np
 11 | 
 12 | import tensorflow as tf
 13 | import numpy as np
 14 | import scipy.misc 
 15 | try:
 16 |     from StringIO import StringIO  # Python 2.7
 17 | except ImportError:
 18 |     from io import BytesIO         # Python 3.x
 19 | 
 20 | print_grad = True
 21 | 
 22 | 
 23 | class printOut(object):
 24 |   def __init__(self,f=None ,stdout_print=True):
 25 |     self.out_file = f
 26 |     self.stdout_print = stdout_print
 27 | 
 28 |   def print_out(self, s, new_line=True):
 29 |     """Similar to print but with support to flush and output to a file."""
 30 |     if isinstance(s, bytes):
 31 |       s = s.decode("utf-8")
 32 | 
 33 |     if self.out_file:
 34 |       self.out_file.write(s)
 35 |       if new_line:
 36 |         self.out_file.write("\n")
 37 |     self.out_file.flush()
 38 | 
 39 |     # stdout
 40 |     if self.stdout_print:
 41 |       print(s, end="", file=sys.stdout)
 42 |       if new_line:
 43 |         sys.stdout.write("\n")
 44 |       sys.stdout.flush()
 45 | 
 46 |   def print_time(self,s, start_time):
 47 |     """Take a start time, print elapsed duration, and return a new time."""
 48 |     self.print_out("%s, time %ds, %s." % (s, (time.time() - start_time) +"  " +str(time.ctime()) ))
 49 |     return time.time()
 50 | 
 51 |   def print_grad(self,model, last=False):
 52 |     # gets a model and prints the second norm of the weights and gradients
 53 |     if print_grad:
 54 |       for tag, value in model.named_parameters():
 55 |         if value.grad is not None:
 56 |           self.print_out('{0: <50}'.format(tag)+ "\t-- value:" \
 57 |             +'%.12f' % value.norm().data[0]+ "\t -- grad: "+ str(value.grad.norm().data[0]))
 58 |         else:
 59 |           self.print_out('{0: <50}'.format(tag)+ "\t-- value:" +\
 60 |             '%.12f' % value.norm().data[0])
 61 |       self.print_out("-----------------------------------")
 62 |       if last:
 63 |         self.print_out("-----------------------------------")
 64 |         self.print_out("-----------------------------------")
 65 | 
 66 | def get_time():
 67 |   return datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 68 | 
 69 | 
 70 | def to_np(x):
 71 |   return x.data.cpu().numpy()
 72 | 
 73 | def to_vars(x):
 74 |     if torch.cuda.is_available():
 75 |         x = x.cuda()
 76 |     return Variable(x)   
 77 | 
 78 | #  for extracting the gradients
 79 | def extract(xVar):
 80 |   global yGrad
 81 |   yGrad = xVar
 82 |   print(yGrad)
 83 | 
 84 | def extract_norm(xVar):
 85 |   global yGrad
 86 |   yGradNorm = xVar.norm() 
 87 |   print(yGradNorm)
 88 | 
 89 | # tensorboard logger
 90 | class Logger(object):
 91 |     
 92 |     def __init__(self, log_dir):
 93 |         """Create a summary writer logging to log_dir."""
 94 |         self.writer = tf.summary.FileWriter(log_dir)
 95 | 
 96 |     def scalar_summary(self, tag, value, step):
 97 |         """Log a scalar variable."""
 98 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
 99 |         self.writer.add_summary(summary, step)
100 | 
101 |     def image_summary(self, tag, images, step):
102 |         """Log a list of images."""
103 | 
104 |         img_summaries = []
105 |         for i, img in enumerate(images):
106 |             # Write the image to a string
107 |             try:
108 |                 s = StringIO()
109 |             except:
110 |                 s = BytesIO()
111 |             scipy.misc.toimage(img).save(s, format="png")
112 | 
113 |             # Create an Image object
114 |             img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
115 |                                        height=img.shape[0],
116 |                                        width=img.shape[1])
117 |             # Create a Summary value
118 |             img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
119 | 
120 |         # Create and write Summary
121 |         summary = tf.Summary(value=img_summaries)
122 |         self.writer.add_summary(summary, step)
123 |         
124 |     def histo_summary(self, tag, values, step, bins=1000):
125 |         """Log a histogram of the tensor of values."""
126 | 
127 |         # Create a histogram using numpy
128 |         counts, bin_edges = np.histogram(values, bins=bins)
129 | 
130 |         # Fill the fields of the histogram proto
131 |         hist = tf.HistogramProto()
132 |         hist.min = float(np.min(values))
133 |         hist.max = float(np.max(values))
134 |         hist.num = int(np.prod(values.shape))
135 |         hist.sum = float(np.sum(values))
136 |         hist.sum_squares = float(np.sum(values**2))
137 | 
138 |         # Drop the start of the first bin
139 |         bin_edges = bin_edges[1:]
140 | 
141 |         # Add bin edges and counts
142 |         for edge in bin_edges:
143 |             hist.bucket_limit.append(edge)
144 |         for c in counts:
145 |             hist.bucket.append(c)
146 | 
147 |         # Create and write Summary
148 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
149 |         self.writer.add_summary(summary, step)
150 |         self.writer.flush()
151 | 
152 | 
153 | def _single_cell(unit_type, num_units, forget_bias, dropout, prt,
154 |                                  residual_connection=False, device_str=None):
155 |     """Create an instance of a single RNN cell."""
156 |     # dropout (= 1 - keep_prob) is set to 0 during eval and infer
157 | 
158 |     # Cell Type
159 |     if unit_type == "lstm":
160 |         prt.print_out("  LSTM, forget_bias=%g" % forget_bias, new_line=False)
161 |         single_cell = tf.contrib.rnn.BasicLSTMCell(
162 |                 num_units,
163 |                 forget_bias=forget_bias)
164 |     elif unit_type == "gru":
165 |         prt.print_out("  GRU", new_line=False)
166 |         single_cell = tf.contrib.rnn.GRUCell(num_units)
167 |     else:
168 |         raise ValueError("Unknown unit type %s!" % unit_type)
169 | 
170 |     # Dropout (= 1 - keep_prob)
171 |     if dropout > 0.0:
172 |         single_cell = tf.contrib.rnn.DropoutWrapper(
173 |                 cell=single_cell, input_keep_prob=(1.0 - dropout))
174 |         prt.print_out("  %s, dropout=%g " %(type(single_cell).__name__, dropout),
175 |                                         new_line=False)
176 | 
177 |     # Residual
178 |     if residual_connection:
179 |         single_cell = tf.contrib.rnn.ResidualWrapper(single_cell)
180 |         prt.print_out("  %s" % type(single_cell).__name__, new_line=False)
181 | 
182 |     # Device Wrapper
183 |     """ if device_str:
184 |         single_cell = tf.contrib.rnn.DeviceWrapper(single_cell, device_str)
185 |         prt.print_out("  %s, device=%s" %
186 |                                         (type(single_cell).__name__, device_str), new_line=False)"""
187 | 
188 |     return single_cell
189 | 
190 | 
191 | def _cell_list(unit_type, num_units, num_layers, num_residual_layers,
192 |                              forget_bias, dropout, mode, prt, num_gpus, base_gpu=0):
193 |     """Create a list of RNN cells."""
194 |     # Multi-GPU
195 |     cell_list = []
196 |     for i in range(num_layers):
197 |         prt.print_out("  cell %d" % i, new_line=False)
198 |         dropout = dropout if mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0
199 |         single_cell = _single_cell(
200 |                 unit_type=unit_type,
201 |                 num_units=num_units,
202 |                 forget_bias=forget_bias,
203 |                 dropout=dropout,
204 |                 prt=prt,
205 |                 residual_connection=(i >= num_layers - num_residual_layers),
206 |                 device_str=get_device_str(i + base_gpu, num_gpus),
207 |         )
208 |         prt.print_out("")
209 |         cell_list.append(single_cell)
210 | 
211 |     return cell_list
212 | 
213 | 
214 | def create_rnn_cell(unit_type, num_units, num_layers, num_residual_layers,
215 |                                         forget_bias, dropout, mode, prt , num_gpus, base_gpu=0):
216 |     """Create multi-layer RNN cell.
217 | 
218 |     Args:
219 |         unit_type: string representing the unit type, i.e. "lstm".
220 |         num_units: the depth of each unit.
221 |         num_layers: number of cells.
222 |         num_residual_layers: Number of residual layers from top to bottom. For
223 |             example, if `num_layers=4` and `num_residual_layers=2`, the last 2 RNN
224 |             cells in the returned list will be wrapped with `ResidualWrapper`.
225 |         forget_bias: the initial forget bias of the RNNCell(s).
226 |         dropout: floating point value between 0.0 and 1.0:
227 |             the probability of dropout.  this is ignored if `mode != TRAIN`.
228 |         mode: either tf.contrib.learn.TRAIN/EVAL/INFER
229 |         num_gpus: The number of gpus to use when performing round-robin
230 |             placement of layers.
231 |         base_gpu: The gpu device id to use for the first RNN cell in the
232 |             returned list. The i-th RNN cell will use `(base_gpu + i) % num_gpus`
233 |             as its device id.
234 | 
235 |     Returns:
236 |         An `RNNCell` instance.
237 |     """
238 | 
239 |     cell_list = _cell_list(unit_type=unit_type,
240 |                              num_units=num_units,
241 |                              num_layers=num_layers,
242 |                              num_residual_layers=num_residual_layers,
243 |                              forget_bias=forget_bias,
244 |                              dropout=dropout,
245 |                              mode=mode,
246 |                              prt=prt,
247 |                              num_gpus=num_gpus,
248 |                              base_gpu=base_gpu)
249 | 
250 |     if len(cell_list) == 1:  # Single layer.
251 |         return cell_list[0]
252 |     else:  # Multi layers
253 |         return tf.contrib.rnn.MultiRNNCell(cell_list)
254 | 
255 | def gradient_clip(gradients, params, max_gradient_norm):
256 |     """Clipping gradients of a model."""
257 |     clipped_gradients, gradient_norm = tf.clip_by_global_norm(
258 |             gradients, max_gradient_norm)
259 |     gradient_norm_summary = [tf.summary.scalar("grad_norm", gradient_norm)]
260 |     gradient_norm_summary.append(
261 |             tf.summary.scalar("clipped_gradient", tf.global_norm(clipped_gradients)))
262 | 
263 |     return clipped_gradients, gradient_norm_summary
264 | 
265 | def create_or_load_model(model, model_dir, session, out_dir, name):
266 |     """Create translation model and initialize or load parameters in session."""
267 |     start_time = time.time()
268 |     latest_ckpt = tf.train.latest_checkpoint(model_dir)
269 |     if latest_ckpt:
270 |         model.saver.restore(session, latest_ckpt)
271 |         utils.print_out(
272 |                 "  loaded %s model parameters from %s, time %.2fs" %
273 |                 (name, latest_ckpt, time.time() - start_time))
274 |     else:
275 |         utils.print_out("  created %s model with fresh parameters, time %.2fs." %
276 |                                         (name, time.time() - start_time))
277 |         session.run(tf.global_variables_initializer())
278 | 
279 |     global_step = model.global_step.eval(session=session)
280 |     return model, global_step
281 |     
282 | def get_device_str(device_id, num_gpus):
283 |     """Return a device string for multi-GPU setup."""
284 |     if num_gpus == 0:
285 |         return "/cpu:0"
286 |     device_str_output = "/gpu:%d" % (device_id % num_gpus)
287 |     return device_str_output
288 | 
289 | def add_summary(summary_writer, global_step, tag, value):
290 |     """Add a new summary to the current summary_writer.
291 |     Useful to log things that are not part of the training graph, e.g., tag=BLEU.
292 |     """
293 |     summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
294 |     summary_writer.add_summary(summary, global_step)
295 | 
296 | 
297 | def get_config_proto(log_device_placement=False, allow_soft_placement=True):
298 |     # GPU options:
299 |     # https://www.tensorflow.org/versions/r0.10/how_tos/using_gpu/index.html
300 |     config_proto = tf.ConfigProto(
301 |             log_device_placement=log_device_placement,
302 |             allow_soft_placement=allow_soft_placement)
303 |     config_proto.gpu_options.allow_growth = True
304 |     return config_proto
305 | 
306 | def check_tensorflow_version():
307 |     if tf.__version__ < "1.2.1":
308 |         raise EnvironmentError("Tensorflow version must >= 1.2.1")
309 | 
310 | def debug_tensor(s, msg=None, summarize=10):
311 |     """Print the shape and value of a tensor at test time. Return a new tensor."""
312 |     if not msg:
313 |         msg = s.name
314 |     return tf.Print(s, [tf.shape(s), s], msg + " ", summarize=summarize)
315 | 
316 | def tf_print(tensor, transform=None):
317 | 
318 |     # Insert a custom python operation into the graph that does nothing but print a tensors value 
319 |     def print_tensor(x):
320 |         # x is typically a numpy array here so you could do anything you want with it,
321 |         # but adding a transformation of some kind usually makes the output more digestible
322 |         print(x if transform is None else transform(x))
323 |         return x
324 |     log_op = tf.py_func(print_tensor, [tensor], [tensor.dtype])[0]
325 |     with tf.control_dependencies([log_op]):
326 |         res = tf.identity(tensor)
327 | 
328 |     # Return the given tensor
329 |     return res
330 | 


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OptMLGroup/VRP-RL/b794fb1e4c4bb70a62cfa54504ee7a247adbc2a0/model/__init__.py


--------------------------------------------------------------------------------
/model/attention_agent.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import time
  4 | from shared.embeddings import LinearEmbedding
  5 | from shared.decode_step import RNNDecodeStep
  6 | 
  7 | class RLAgent(object):
  8 |     
  9 |     def __init__(self,
 10 |                 args,
 11 |                 prt,
 12 |                 env,
 13 |                 dataGen,
 14 |                 reward_func,
 15 |                 clAttentionActor,
 16 |                 clAttentionCritic,
 17 |                 is_train=True,
 18 |                 _scope=''):
 19 |         '''
 20 |         This class builds the model and run testt and train.
 21 |         Inputs:
 22 |             args: arguments. See the description in config.py file.
 23 |             prt: print controller which writes logs to a file.
 24 |             env: an instance of the environment.
 25 |             dataGen: a data generator which generates data for test and training.
 26 |             reward_func: the function which is used for computing the reward. In the 
 27 |                         case of TSP and VRP, it returns the tour length.
 28 |             clAttentionActor: Attention mechanism that is used in actor.
 29 |             clAttentionCritic: Attention mechanism that is used in critic.
 30 |             is_train: if true, the agent is used for training; else, it is used only 
 31 |                         for inference.
 32 |         '''
 33 |         
 34 |         self.args = args
 35 |         self.prt = prt
 36 |         self.env = env
 37 |         self.dataGen = dataGen
 38 |         self.reward_func = reward_func
 39 |         self.clAttentionCritic = clAttentionCritic
 40 |         
 41 |         self.embedding = LinearEmbedding(args['embedding_dim'],
 42 |             _scope=_scope+'Actor/')
 43 |         self.decodeStep = RNNDecodeStep(clAttentionActor,
 44 |                         args['hidden_dim'], 
 45 |                         use_tanh=args['use_tanh'],
 46 |                         tanh_exploration=args['tanh_exploration'],
 47 |                         n_glimpses=args['n_glimpses'],
 48 |                         mask_glimpses=args['mask_glimpses'], 
 49 |                         mask_pointer=args['mask_pointer'], 
 50 |                         forget_bias=args['forget_bias'], 
 51 |                         rnn_layers=args['rnn_layers'],
 52 |                         _scope='Actor/')
 53 |         self.decoder_input = tf.get_variable('decoder_input', [1,1,args['embedding_dim']],
 54 |                        initializer=tf.contrib.layers.xavier_initializer())
 55 | 
 56 |         start_time  = time.time()
 57 |         if is_train:
 58 |             self.train_summary = self.build_model(decode_type = "stochastic" )
 59 |             self.train_step = self.build_train_step()
 60 | 
 61 |         self.val_summary_greedy = self.build_model(decode_type = "greedy" )
 62 |         self.val_summary_beam = self.build_model(decode_type = "beam_search")
 63 | 
 64 |         model_time = time.time()- start_time
 65 |         self.prt.print_out("It took {}s to build the agent.".format(str(model_time)))
 66 | 
 67 |         self.saver = tf.train.Saver(
 68 |             var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
 69 |             
 70 |         
 71 |     def build_model(self, decode_type = "greedy"):
 72 |         
 73 |         # builds the model
 74 |         args = self.args
 75 |         env = self.env
 76 |         batch_size = tf.shape(env.input_pnt)[0]
 77 | 
 78 |         # input_pnt: [batch_size x max_time x 2]
 79 |         input_pnt = env.input_pnt
 80 |         # encoder_emb_inp: [batch_size, max_time, embedding_dim]
 81 |         encoder_emb_inp = self.embedding(input_pnt)
 82 | 
 83 |         if decode_type == 'greedy' or decode_type == 'stochastic':
 84 |             beam_width = 1
 85 |         elif decode_type == 'beam_search': 
 86 |             beam_width = args['beam_width']
 87 |             
 88 |         # reset the env. The environment is modified to handle beam_search decoding.
 89 |         env.reset(beam_width)
 90 | 
 91 |         BatchSequence = tf.expand_dims(tf.cast(tf.range(batch_size*beam_width), tf.int64), 1)
 92 | 
 93 | 
 94 |         # create tensors and lists
 95 |         actions_tmp = []
 96 |         logprobs = []
 97 |         probs = []
 98 |         idxs = []
 99 | 
100 |         # start from depot
101 |         idx = (env.n_nodes-1)*tf.ones([batch_size*beam_width,1])
102 |         action = tf.tile(input_pnt[:,env.n_nodes-1],[beam_width,1])
103 | 
104 | 
105 |         # decoder_state
106 |         initial_state = tf.zeros([args['rnn_layers'], 2, batch_size*beam_width, args['hidden_dim']])
107 |         l = tf.unstack(initial_state, axis=0)
108 |         decoder_state = tuple([tf.nn.rnn_cell.LSTMStateTuple(l[idx][0],l[idx][1])
109 |                   for idx in range(args['rnn_layers'])])            
110 | 
111 |         # start from depot in VRP and from a trainable nodes in TSP
112 |         # decoder_input: [batch_size*beam_width x 1 x hidden_dim]
113 |         if args['task_name'] == 'tsp':
114 |             # decoder_input: [batch_size*beam_width x 1 x hidden_dim]
115 |             decoder_input = tf.tile(self.decoder_input, [batch_size* beam_width,1,1])
116 |         elif args['task_name'] == 'vrp':
117 |             decoder_input = tf.tile(tf.expand_dims(encoder_emb_inp[:,env.n_nodes-1], 1), 
118 |                                     [beam_width,1,1])
119 | 
120 |         # decoding loop
121 |         context = tf.tile(encoder_emb_inp,[beam_width,1,1])
122 |         for i in range(args['decode_len']):
123 |             
124 |             logit, prob, logprob, decoder_state = self.decodeStep.step(decoder_input,
125 |                                 context,
126 |                                 env,
127 |                                 decoder_state)
128 |             # idx: [batch_size*beam_width x 1]
129 |             beam_parent = None
130 |             if decode_type == 'greedy':
131 |                 idx = tf.expand_dims(tf.argmax(prob, 1),1)
132 |             elif decode_type == 'stochastic':
133 |                 # select stochastic actions. idx has shape [batch_size x 1]
134 |                 # tf.multinomial sometimes gives numerical errors, so we use our multinomial :(
135 |                 def my_multinomial():
136 |                     prob_idx = tf.stop_gradient(prob)
137 |                     prob_idx_cum = tf.cumsum(prob_idx,1)
138 |                     rand_uni = tf.tile(tf.random_uniform([batch_size,1]),[1,env.n_nodes])
139 |                     # sorted_ind : [[0,1,2,3..],[0,1,2,3..] , ]
140 |                     sorted_ind = tf.cast(tf.tile(tf.expand_dims(tf.range(env.n_nodes),0),[batch_size,1]),tf.int64)
141 |                     tmp = tf.multiply(tf.cast(tf.greater(prob_idx_cum,rand_uni),tf.int64), sorted_ind)+\
142 |                         10000*tf.cast(tf.greater_equal(rand_uni,prob_idx_cum),tf.int64)
143 | 
144 |                     idx = tf.expand_dims(tf.argmin(tmp,1),1)
145 |                     return tmp, idx
146 | 
147 |                 tmp, idx = my_multinomial()
148 |                 # check validity of tmp -> True or False -- True mean take a new sample
149 |                 tmp_check = tf.cast(tf.reduce_sum(tf.cast(tf.greater(tf.reduce_sum(tmp,1),(10000*env.n_nodes)-1),
150 |                                                           tf.int32)),tf.bool)
151 |                 tmp , idx = tf.cond(tmp_check,my_multinomial,lambda:(tmp,idx))
152 | 
153 |             elif decode_type == 'beam_search':
154 |                 if i==0:
155 |                     # BatchBeamSeq: [batch_size*beam_width x 1]
156 |                     # [0,1,2,3,...,127,0,1,...],
157 |                     batchBeamSeq = tf.expand_dims(tf.tile(tf.cast(tf.range(batch_size), tf.int64),
158 |                                                          [beam_width]),1)
159 |                     beam_path  = []
160 |                     log_beam_probs = []
161 |                     # in the initial decoder step, we want to choose beam_width different branches
162 |                     # log_beam_prob: [batch_size, sourceL]
163 |                     log_beam_prob = tf.log(tf.split(prob,num_or_size_splits=beam_width, axis=0)[0])
164 | 
165 |                 elif i > 0:
166 |                     log_beam_prob = tf.log(prob) + log_beam_probs[-1]
167 |                     # log_beam_prob:[batch_size, beam_width*sourceL]
168 |                     log_beam_prob = tf.concat(tf.split(log_beam_prob, num_or_size_splits=beam_width, axis=0),1)
169 | 
170 |                 # topk_prob_val,topk_logprob_ind: [batch_size, beam_width]
171 |                 topk_logprob_val, topk_logprob_ind = tf.nn.top_k(log_beam_prob, beam_width)
172 | 
173 |                 # topk_logprob_val , topk_logprob_ind: [batch_size*beam_width x 1]
174 |                 topk_logprob_val = tf.transpose(tf.reshape(
175 |                     tf.transpose(topk_logprob_val), [1,-1]))
176 | 
177 |                 topk_logprob_ind = tf.transpose(tf.reshape(
178 |                     tf.transpose(topk_logprob_ind), [1,-1]))
179 | 
180 |                 #idx,beam_parent: [batch_size*beam_width x 1]                               
181 |                 idx = tf.cast(topk_logprob_ind % env.n_nodes, tf.int64) # Which city in route.
182 |                 beam_parent = tf.cast(topk_logprob_ind // env.n_nodes, tf.int64) # Which hypothesis it came from.
183 | 
184 |                 # batchedBeamIdx:[batch_size*beam_width]
185 |                 batchedBeamIdx= batchBeamSeq + tf.cast(batch_size,tf.int64)*beam_parent
186 |                 prob = tf.gather_nd(prob,batchedBeamIdx)
187 | 
188 |                 beam_path.append(beam_parent)
189 |                 log_beam_probs.append(topk_logprob_val)
190 | 
191 |             state = env.step(idx,beam_parent)
192 |             batched_idx = tf.concat([BatchSequence,idx],1)
193 | 
194 | 
195 |             decoder_input = tf.expand_dims(tf.gather_nd(
196 |                 tf.tile(encoder_emb_inp,[beam_width,1,1]), batched_idx),1)
197 | 
198 |             logprob = tf.log(tf.gather_nd(prob, batched_idx))
199 |             probs.append(prob)
200 |             idxs.append(idx)
201 |             logprobs.append(logprob)           
202 | 
203 |             action = tf.gather_nd(tf.tile(input_pnt, [beam_width,1,1]), batched_idx )
204 |             actions_tmp.append(action)
205 | 
206 |         if decode_type=='beam_search':
207 |             # find paths of the beam search
208 |             tmplst = []
209 |             tmpind = [BatchSequence]
210 |             for k in reversed(range(len(actions_tmp))):
211 | 
212 |                 tmplst = [tf.gather_nd(actions_tmp[k],tmpind[-1])] + tmplst
213 |                 tmpind += [tf.gather_nd(
214 |                     (batchBeamSeq + tf.cast(batch_size,tf.int64)*beam_path[k]),tmpind[-1])]
215 |             actions = tmplst
216 |         else: 
217 |             actions = actions_tmp
218 | 
219 |         R = self.reward_func(actions)            
220 | 
221 |         ### critic
222 |         v = tf.constant(0)
223 |         if decode_type=='stochastic':
224 |             with tf.variable_scope("Critic"):
225 |                 with tf.variable_scope("Encoder"):
226 |                     # init states
227 |                     initial_state = tf.zeros([args['rnn_layers'], 2, batch_size, args['hidden_dim']])
228 |                     l = tf.unstack(initial_state, axis=0)
229 |                     rnn_tuple_state = tuple([tf.nn.rnn_cell.LSTMStateTuple(l[idx][0],l[idx][1])
230 |                               for idx in range(args['rnn_layers'])])
231 | 
232 |                     hy = rnn_tuple_state[0][1]
233 | 
234 |                 with tf.variable_scope("Process"):
235 |                     for i in range(args['n_process_blocks']):
236 | 
237 |                         process = self.clAttentionCritic(args['hidden_dim'],_name="P"+str(i))
238 |                         e,logit = process(hy, encoder_emb_inp, env)
239 | 
240 |                         prob = tf.nn.softmax(logit)
241 |                         # hy : [batch_size x 1 x sourceL] * [batch_size  x sourceL x hidden_dim]  ->
242 |                         #[batch_size x h_dim ]
243 |                         hy = tf.squeeze(tf.matmul(tf.expand_dims(prob,1), e ) ,1)
244 | 
245 |                 with tf.variable_scope("Linear"):
246 |                     v = tf.squeeze(tf.layers.dense(tf.layers.dense(hy,args['hidden_dim']\
247 |                                                                ,tf.nn.relu,name='L1'),1,name='L2'),1)
248 | 
249 | 
250 |         return (R, v, logprobs, actions, idxs, env.input_pnt , probs)
251 |     
252 |     def build_train_step(self):
253 |         '''
254 |         This function returns a train_step op, in which by running it we proceed one training step.
255 |         '''
256 |         args = self.args
257 |         
258 |         R, v, logprobs, actions, idxs , batch , probs= self.train_summary
259 | 
260 |         v_nograd = tf.stop_gradient(v)
261 |         R = tf.stop_gradient(R)
262 | 
263 |         # losses
264 |         actor_loss = tf.reduce_mean(tf.multiply((R-v_nograd),tf.add_n(logprobs)),0)
265 |         critic_loss = tf.losses.mean_squared_error(R,v)
266 | 
267 |         # optimizers
268 |         actor_optim = tf.train.AdamOptimizer(args['actor_net_lr'])
269 |         critic_optim = tf.train.AdamOptimizer(args['critic_net_lr'])
270 | 
271 |         # compute gradients
272 |         actor_gra_and_var = actor_optim.compute_gradients(actor_loss,\
273 |                                 tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor'))
274 |         critic_gra_and_var = critic_optim.compute_gradients(critic_loss,\
275 |                                 tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic'))
276 | 
277 |         # clip gradients
278 |         clip_actor_gra_and_var = [(tf.clip_by_norm(grad, args['max_grad_norm']), var) \
279 |                                   for grad, var in actor_gra_and_var]
280 | 
281 |         clip_critic_gra_and_var = [(tf.clip_by_norm(grad, args['max_grad_norm']), var) \
282 |                                   for grad, var in critic_gra_and_var]
283 | 
284 |         # apply gradients
285 |         actor_train_step = actor_optim.apply_gradients(clip_actor_gra_and_var)
286 |         critic_train_step = critic_optim.apply_gradients(clip_critic_gra_and_var)
287 | 
288 |         train_step = [actor_train_step, 
289 |                           critic_train_step ,
290 |                           actor_loss, 
291 |                           critic_loss,
292 |                           actor_gra_and_var,
293 |                           critic_gra_and_var,
294 |                           R, 
295 |                           v, 
296 |                           logprobs,
297 |                           probs,
298 |                           actions,
299 |                           idxs]
300 |         return train_step
301 | 
302 |     def Initialize(self,sess):
303 |         self.sess = sess
304 |         self.sess.run(tf.global_variables_initializer())
305 |         self.load_model()
306 | 
307 |     def load_model(self):
308 |         latest_ckpt = tf.train.latest_checkpoint(self.args['load_path'])
309 |         if latest_ckpt is not None:
310 |             self.saver.restore(self.sess, latest_ckpt)
311 |             
312 |     def evaluate_single(self,eval_type='greedy'):
313 |         start_time = time.time()
314 |         avg_reward = []
315 | 
316 |         if eval_type == 'greedy':
317 |             summary = self.val_summary_greedy
318 |         elif eval_type == 'beam_search':
319 |             summary = self.val_summary_beam
320 |         self.dataGen.reset()
321 |         for step in range(self.dataGen.n_problems):
322 | 
323 |             data = self.dataGen.get_test_next()
324 |             R, v, logprobs, actions,idxs, batch, _= self.sess.run(summary,
325 |                                          feed_dict={self.env.input_data:data,
326 |                                                    self.decodeStep.dropout:0.0})
327 |             if eval_type=='greedy':
328 |                 avg_reward.append(R)
329 |                 R_ind0 = 0
330 |             elif eval_type=='beam_search':
331 |                 # R : [batch_size x beam_width]
332 |                 R = np.concatenate(np.split(np.expand_dims(R,1) ,self.args['beam_width'], axis=0),1 )
333 |                 R_val = np.amin(R,1, keepdims = False)
334 |                 R_ind0 = np.argmin(R,1)[0]
335 |                 avg_reward.append(R_val)
336 | 
337 | 
338 |             # sample decode
339 |             if step % int(self.args['log_interval']) == 0:
340 |                 example_output = []
341 |                 example_input = []
342 |                 for i in range(self.env.n_nodes):
343 |                     example_input.append(list(batch[0, i, :]))
344 |                 for idx, action in enumerate(actions):
345 |                     example_output.append(list(action[R_ind0*np.shape(batch)[0]]))
346 |                 self.prt.print_out('\n\nVal-Step of {}: {}'.format(eval_type,step))
347 |                 self.prt.print_out('\nExample test input: {}'.format(example_input))
348 |                 self.prt.print_out('\nExample test output: {}'.format(example_output))
349 |                 self.prt.print_out('\nExample test reward: {} - best: {}'.format(R[0],R_ind0))
350 | 
351 |         end_time = time.time() - start_time
352 | 
353 |         # Finished going through the iterator dataset.
354 |         self.prt.print_out('\nValidation overall avg_reward: {}'.format(np.mean(avg_reward)) )
355 |         self.prt.print_out('Validation overall reward std: {}'.format(np.sqrt(np.var(avg_reward))) )
356 | 
357 |         self.prt.print_out("Finished evaluation with %d steps in %s." % (step\
358 |                            ,time.strftime("%H:%M:%S", time.gmtime(end_time))))
359 | 
360 |         
361 |     def evaluate_batch(self,eval_type='greedy'):
362 |         self.env.reset()
363 |         if eval_type == 'greedy':
364 |             summary = self.val_summary_greedy
365 |             beam_width = 1
366 |         elif eval_type == 'beam_search':
367 |             summary = self.val_summary_beam
368 |             beam_width = self.args['beam_width']
369 |             
370 |             
371 |         data = self.dataGen.get_test_all()
372 |         start_time = time.time()
373 |         R, v, logprobs, actions,idxs, batch, _= self.sess.run(summary,
374 |                                      feed_dict={self.env.input_data:data,
375 |                                                self.decodeStep.dropout:0.0})
376 |         R = np.concatenate(np.split(np.expand_dims(R,1) ,beam_width, axis=0),1 )
377 |         R = np.amin(R,1, keepdims = False)
378 | 
379 |         end_time = time.time() - start_time
380 |         self.prt.print_out('Average of {} in batch-mode: {} -- std {} -- time {} s'.format(eval_type,\
381 |             np.mean(R),np.sqrt(np.var(R)),end_time))        
382 |         
383 |     def inference(self, infer_type='batch'):
384 |         if infer_type == 'batch':
385 |             self.evaluate_batch('greedy')
386 |             self.evaluate_batch('beam_search')
387 |         elif infer_type == 'single':
388 |             self.evaluate_single('greedy')
389 |             self.evaluate_single('beam_search')
390 |         self.prt.print_out("##################################################################")
391 | 
392 |     def run_train_step(self):
393 |         data = self.dataGen.get_train_next()
394 | 
395 |         train_results = self.sess.run(self.train_step,
396 |                                  feed_dict={self.env.input_data:data,
397 |                                   self.decodeStep.dropout:self.args['dropout']})
398 |         return train_results
399 | 


--------------------------------------------------------------------------------
/shared/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OptMLGroup/VRP-RL/b794fb1e4c4bb70a62cfa54504ee7a247adbc2a0/shared/__init__.py


--------------------------------------------------------------------------------
/shared/attention.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | class Attention(object):
 4 |     """A generic attention module for a decoder in seq2seq models"""
 5 |     def __init__(self, dim, use_tanh=False, C=10,_name='Attention',_scope=''):
 6 |         self.use_tanh = use_tanh
 7 |         self._scope = _scope
 8 | 
 9 |         with tf.variable_scope(_scope+_name):
10 |             # self.v: is a variable with shape [1 x dim]
11 |             self.v = tf.get_variable('v',[1,dim],
12 |                        initializer=tf.contrib.layers.xavier_initializer())
13 |             self.v = tf.expand_dims(self.v,2)
14 |         self.project_query = tf.layers.Dense(dim,_scope=_scope+_name +'/dense')
15 |         self.project_ref = tf.layers.Conv1D(dim,1,_scope=_scope+_name +'/conv1d')
16 |         self.C = C  # tanh exploration parameter
17 |         self.tanh = tf.nn.tanh
18 | 
19 |     def __call__(self, query, ref, *args, **kwargs):
20 |         """
21 |         This function gets a query tensor and ref rensor and returns the logit op.
22 |         Args: 
23 |             query: is the hidden state of the decoder at the current
24 |                 time step. [batch_size x dim]
25 |             ref: the set of hidden states from the encoder. 
26 |                 [batch_size x max_time x dim]
27 | 
28 |         Returns:
29 |             e: convolved ref with shape [batch_size x max_time x dim]
30 |             logits: [batch_size x max_time]
31 |         """
32 |         # expanded_q,e: [batch_size x max_time x dim]
33 |         e = self.project_ref(ref)
34 |         q = self.project_query(query) #[batch_size x dim]
35 |         expanded_q = tf.tile(tf.expand_dims(q,1),[1,tf.shape(e)[1],1])
36 | 
37 |         # v_view:[batch_size x dim x 1]
38 |         v_view = tf.tile( self.v, [tf.shape(e)[0],1,1]) 
39 |         
40 |         # u : [batch_size x max_time x dim] * [batch_size x dim x 1] = 
41 |         #       [batch_size x max_time]
42 |         u = tf.squeeze(tf.matmul(self.tanh(expanded_q + e), v_view),2)
43 | 
44 |         if self.use_tanh:
45 |             logits = self.C * self.tanh(u)
46 |         else:
47 |             logits = u  
48 | 
49 |         return e, logits
50 | 
51 | if __name__ == "__main__":
52 |     sess = tf.InteractiveSession()
53 |     tf.set_random_seed(100)
54 |     q = tf.random_uniform([2,128])
55 |     ref = tf.random_uniform([2,10,128])
56 |     attention = Attention(128,use_tanh=True, C=10)
57 |     e, logits = attention(q,ref)
58 |     sess.run(tf.global_variables_initializer())
59 |     print(sess.run([logits, tf.nn.softmax(logits)]))
60 | 


--------------------------------------------------------------------------------
/shared/decode_step.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | class DecodeStep(object):
  4 |     '''
  5 |     Base class of the decoding (without RNN)
  6 |     '''
  7 |     def __init__(self, 
  8 |             ClAttention,
  9 |             hidden_dim,
 10 |             use_tanh=False,
 11 |             tanh_exploration=10.,
 12 |             n_glimpses=0,
 13 |             mask_glimpses=True,
 14 |             mask_pointer=True,
 15 |             _scope=''):
 16 |         '''
 17 |         This class does one-step of decoding.
 18 |         Inputs:
 19 |             ClAttention:    the class which is used for attention
 20 |             hidden_dim:     hidden dimension of RNN
 21 |             use_tanh:       whether to use tanh exploration or not
 22 |             tanh_exploration: parameter for tanh exploration
 23 |             n_glimpses:     number of glimpses
 24 |             mask_glimpses:  whether to use masking for the glimpses or not
 25 |             mask_pointer:   whether to use masking for the glimpses or not
 26 |             _scope:         variable scope
 27 |         '''
 28 | 
 29 |         self.hidden_dim = hidden_dim
 30 |         self.use_tanh = use_tanh
 31 |         self.tanh_exploration = tanh_exploration
 32 |         self.n_glimpses = n_glimpses
 33 |         self.mask_glimpses = mask_glimpses
 34 |         self.mask_pointer = mask_pointer
 35 |         self._scope = _scope
 36 |         self.BIGNUMBER = 100000.
 37 | 
 38 | 
 39 |         # create glimpse and attention instances as well as tf.variables.
 40 |         ## create a list of class instances
 41 |         self.glimpses = [None for _ in range(self.n_glimpses)]
 42 |         for i in range(self.n_glimpses):
 43 |             self.glimpses[i] = ClAttention(hidden_dim, 
 44 |                 use_tanh=False,
 45 |                 _scope=self._scope,
 46 |                 _name="Glimpse"+str(i))
 47 |             
 48 |         # build TF variables required for pointer
 49 |         self.pointer = ClAttention(hidden_dim, 
 50 |             use_tanh=use_tanh, 
 51 |             C=tanh_exploration,
 52 |             _scope=self._scope,
 53 |             _name="Decoder/Attention")
 54 | 
 55 |     def get_logit_op(self,
 56 |                      decoder_inp,
 57 |                      context,
 58 |                      Env,
 59 |                     *args,
 60 |                     **kwargs):
 61 |         """
 62 |         For a given input to deocoder, returns the logit op.
 63 |         Input:
 64 |             decoder_inp: it is the input problem with dimensions [batch_size x dim].
 65 |                         Usually, it is the embedded problem with dim = embedding_dim.
 66 |             context: the context vetor from the encoder. It is usually the output of rnn with
 67 |                       shape [batch_size x max_time x dim]
 68 |             Env: an instance of the environment. It should have:
 69 |                 Env.mask: a matrix used for masking the logits and glimpses. It is with shape
 70 |                          [batch_size x max_time]. Zeros in this matrix means not-masked nodes. Any 
 71 |                          positive number in this mask means that the node cannot be selected as 
 72 |                          the next decision point.
 73 |         Returns:
 74 |             logit: the logits which will used by decoder for producing a solution. It has shape
 75 |             [batch_size x max_time].
 76 |         """
 77 | 
 78 |         # glimpses
 79 |         for i in range(self.n_glimpses):
 80 |             # ref: [batch_size x max_time x hidden_dim], logit : [batch_size x max_time]
 81 |             ref, logit = self.glimpses[i](decoder_inp, context,Env)
 82 |             if self.mask_glimpses:
 83 |                 logit -= self.BIGNUMBER* Env.mask
 84 |             # prob: [batch_size x max_time
 85 |             prob = tf.nn.softmax(logit)
 86 |             # decoder_inp : [batch_size x 1 x max_time ] * [batch_size x max_time x hidden_dim] -> 
 87 |             #[batch_size x hidden_dim ]
 88 |             decoder_inp = tf.squeeze(tf.matmul( tf.expand_dims(prob,1),ref) ,1)
 89 | 
 90 |         # attention
 91 |         _, logit = self.pointer(decoder_inp,context,Env)
 92 |         if self.mask_pointer:
 93 |             logit -= self.BIGNUMBER* Env.mask
 94 | 
 95 |         return logit , None
 96 | 
 97 |     def step(self,
 98 |             decoder_inp,
 99 |             context,
100 |             Env,
101 |             decoder_state=None,
102 |             *args,
103 |             **kwargs):
104 |         '''
105 |         get logits and probs at a given decoding step.
106 |         Inputs:
107 |             decoder_input: Input of the decoding step with shape [batch_size x embedding_dim]
108 |             context: context vector to use in attention
109 |             Env: an instance of the environment
110 |             decoder_state: The state of the LSTM cell. It can be None when we use a decoder without 
111 |                 LSTM cell.
112 |         Returns:
113 |             logit: logits with shape [batch_size x max_time]
114 |             prob: probabilities for the next location visit with shape of [batch_size x max_time]
115 |             logprob: log of probabilities
116 |             decoder_state: updated state of the LSTM cell
117 |         '''
118 | 
119 |         logit, decoder_state = self.get_logit_op(
120 |                      decoder_inp,
121 |                      context,
122 |                      Env, 
123 |                      decoder_state)
124 | 
125 |         logprob = tf.nn.log_softmax(logit)
126 |         prob = tf.exp(logprob)
127 | 
128 |         return logit, prob, logprob, decoder_state
129 | 
130 | class RNNDecodeStep(DecodeStep):
131 |     '''
132 |     Decodes the sequence. It keeps the decoding history in a RNN.
133 |     '''
134 |     def __init__(self, 
135 |             ClAttention,
136 |             hidden_dim,
137 |             use_tanh=False,
138 |             tanh_exploration=10.,
139 |             n_glimpses=0,
140 |             mask_glimpses=True,
141 |             mask_pointer=True,
142 |             forget_bias=1.0,
143 |             rnn_layers=1,
144 |             _scope=''):
145 | 
146 |         '''
147 |         This class does one-step of decoding which uses RNN for storing the sequence info.
148 |         Inputs:
149 |             ClAttention:    the class which is used for attention
150 |             hidden_dim:     hidden dimension of RNN
151 |             use_tanh:       whether to use tanh exploration or not
152 |             tanh_exploration: parameter for tanh exploration
153 |             n_glimpses:     number of glimpses
154 |             mask_glimpses:  whether to use masking for the glimpses or not
155 |             mask_pointer:   whether to use masking for the glimpses or not
156 |             forget_bias:    forget bias of LSTM
157 |             rnn_layers:     number of LSTM layers
158 |             _scope:         variable scope
159 | 
160 |         '''
161 | 
162 |         super(RNNDecodeStep,self).__init__(ClAttention,
163 |                                         hidden_dim,
164 |                                         use_tanh=use_tanh,
165 |                                         tanh_exploration=tanh_exploration,
166 |                                         n_glimpses=n_glimpses,
167 |                                         mask_glimpses=mask_glimpses,
168 |                                         mask_pointer=mask_pointer,
169 |                                         _scope=_scope)
170 |         self.forget_bias = forget_bias
171 |         self.rnn_layers = rnn_layers     
172 | #         self.dropout = tf.placeholder(tf.float32,name='decoder_rnn_dropout')
173 | 
174 |         # build a multilayer LSTM cell
175 |         single_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_dim, 
176 |             forget_bias=forget_bias)
177 |         self.dropout = tf.placeholder(tf.float32,name='decoder_rnn_dropout') 
178 |         single_cell = tf.contrib.rnn.DropoutWrapper(
179 |                 cell=single_cell, input_keep_prob=(1.0 - self.dropout))
180 |         self.cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * rnn_layers)
181 | 
182 |     def get_logit_op(self,
183 |                     decoder_inp,
184 |                     context,
185 |                     Env,
186 |                     decoder_state,
187 |                     *args,
188 |                     **kwargs):
189 |         """
190 |         For a given input to decoder, returns the logit op and new decoder_state.
191 |         Input:
192 |             decoder_inp: it is the input problem with dimensions [batch_size x dim].
193 |                         Usually, it is the embedded problem with dim = embedding_dim.
194 |             context: the context vetor from the encoder. It is usually the output of rnn with
195 |                       shape [batch_size x max_time x dim]
196 |             Env: an instance of the environment. It should have:
197 |                 Env.mask: a matrix used for masking the logits and glimpses. It is with shape
198 |                          [batch_size x max_time]. Zeros in this matrix means not-masked nodes. Any 
199 |                          positive number in this mask means that the node cannot be selected as 
200 |                          the next decision point.
201 |             decoder_state: The state as a list of size rnn_layers, and each element is a
202 |                     LSTMStateTuples with  x 2 tensors with dimension of [batch_size x hidden_dim].
203 |                     The first one corresponds to c and the second one is h.
204 |         Returns:
205 |             logit: the logits which will used by decoder for producing a solution. It has shape
206 |                     [batch_size x max_time].
207 |             decoder_state: the update decoder state.
208 |         """
209 | 
210 | #         decoder_inp = tf.reshape(decoder_inp,[-1,1,self.hidden_dim])
211 |         _ , decoder_state = tf.nn.dynamic_rnn(self.cell,
212 |                                               decoder_inp,
213 |                                               initial_state=decoder_state,
214 |                                               scope=self._scope+'Decoder/LSTM/rnn')
215 |         hy = decoder_state[-1].h
216 | 
217 |         # glimpses
218 |         for i in range(self.n_glimpses):
219 |             # ref: [batch_size x max_time x hidden_dim], logit : [batch_size x max_time]
220 |             ref, logit = self.glimpses[i](hy,context,Env)
221 |             if self.mask_glimpses:
222 |                 logit -= self.BIGNUMBER* Env.mask
223 |             prob = tf.nn.softmax(logit)
224 |             
225 |             # hy : [batch_size x 1 x max_time ] * [batch_size x max_time x hidden_dim] -> 
226 |             #[batch_size x hidden_dim ]
227 |             hy = tf.squeeze(tf.matmul( tf.expand_dims(prob,1),ref) ,1)
228 | 
229 |         # attention
230 |         _, logit = self.pointer(hy,context,Env)
231 |         if self.mask_pointer:
232 |             logit -= self.BIGNUMBER* Env.mask
233 |     
234 |         return logit , decoder_state


--------------------------------------------------------------------------------
/shared/embeddings.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class Embedding(object):
 5 |     '''
 6 |     This class is the base class for embedding the input graph.
 7 |     '''
 8 |     def __init__(self,emb_type, embedding_dim):
 9 |         self.emb_type = emb_type
10 |         self.embedding_dim = embedding_dim
11 | 
12 |     def __call__(self,input_pnt):
13 |         # returns the embeded tensor. Should be implemented in child classes
14 |         pass
15 | 
16 | class LinearEmbedding(Embedding):
17 |     '''
18 |     This class implements linear embedding. It is only a mapping 
19 |     to a higher dimensional space.
20 |     '''
21 |     def __init__(self,embedding_dim,_scope=''):
22 |         '''
23 |         Input: 
24 |             embedding_dim: embedding dimension
25 |         '''
26 | 
27 |         super(LinearEmbedding,self).__init__('linear',embedding_dim)
28 |         self.project_emb = tf.layers.Conv1D(embedding_dim,1,
29 |             _scope=_scope+'Embedding/conv1d')
30 | 
31 |     def __call__(self,input_pnt):
32 |         # emb_inp_pnt: [batch_size, max_time, embedding_dim]
33 |         emb_inp_pnt = self.project_emb(input_pnt)
34 |         # emb_inp_pnt = tf.Print(emb_inp_pnt,[emb_inp_pnt])
35 |         return emb_inp_pnt
36 | 
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     sess = tf.InteractiveSession()
41 |     input_pnt = tf.random_uniform([2,10,2])
42 |     Embedding = LinearEmbedding(128)
43 |     emb_inp_pnt = Embedding(input_pnt)
44 |     sess.run(tf.global_variables_initializer())
45 |     print(sess.run([emb_inp_pnt,tf.shape(emb_inp_pnt)]))
46 | 


--------------------------------------------------------------------------------
/shared/misc_utils.py:
--------------------------------------------------------------------------------
  1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
  2 | from __future__ import print_function
  3 | 
  4 | import json
  5 | import math
  6 | import os
  7 | import sys
  8 | import time
  9 | from datetime import datetime
 10 | import numpy as np
 11 | 
 12 | import tensorflow as tf
 13 | import numpy as np
 14 | import scipy.misc 
 15 | try:
 16 |         from StringIO import StringIO  # Python 2.7
 17 | except ImportError:
 18 |         from io import BytesIO         # Python 3.x
 19 | 
 20 | print_grad = True
 21 | 
 22 | 
 23 | class printOut(object):
 24 |     def __init__(self,f=None ,stdout_print=True):
 25 |         ''' 
 26 |         This class is used for controlling the printing. It will write in a 
 27 |         file f and screen simultanously.
 28 |         '''
 29 |         self.out_file = f
 30 |         self.stdout_print = stdout_print
 31 | 
 32 |     def print_out(self, s, new_line=True):
 33 |         """Similar to print but with support to flush and output to a file."""
 34 |         if isinstance(s, bytes):
 35 |             s = s.decode("utf-8")
 36 | 
 37 |         if self.out_file:
 38 |             self.out_file.write(s)
 39 |             if new_line:
 40 |                 self.out_file.write("\n")
 41 |         self.out_file.flush()
 42 | 
 43 |         # stdout
 44 |         if self.stdout_print:
 45 |             print(s, end="", file=sys.stdout)
 46 |             if new_line:
 47 |                 sys.stdout.write("\n")
 48 |             sys.stdout.flush()
 49 | 
 50 |     def print_time(self,s, start_time):
 51 |         """Take a start time, print elapsed duration, and return a new time."""
 52 |         self.print_out("%s, time %ds, %s." % (s, (time.time() - start_time) +"  " +str(time.ctime()) ))
 53 |         return time.time()
 54 | 
 55 | 
 56 | def get_time():
 57 |     '''returns formatted current time'''
 58 |     return datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 59 |  
 60 | 
 61 | def get_config_proto(log_device_placement=False, allow_soft_placement=True):
 62 |         # GPU options:
 63 |         # https://www.tensorflow.org/versions/r0.10/how_tos/using_gpu/index.html
 64 |         config_proto = tf.ConfigProto(
 65 |                         log_device_placement=log_device_placement,
 66 |                         allow_soft_placement=allow_soft_placement)
 67 |         config_proto.gpu_options.allow_growth = True
 68 |         return config_proto
 69 | 
 70 | def debug_tensor(s, msg=None, summarize=10):
 71 |         """Print the shape and value of a tensor at test time. Return a new tensor."""
 72 |         if not msg:
 73 |                 msg = s.name
 74 |         return tf.Print(s, [tf.shape(s), s], msg + " ", summarize=summarize)
 75 | 
 76 | def has_nan(datum, tensor):
 77 |         if hasattr(tensor, 'dtype'):
 78 |                 if (np.issubdtype(tensor.dtype, np.float) or
 79 |                         np.issubdtype(tensor.dtype, np.complex) or
 80 |                         np.issubdtype(tensor.dtype, np.integer)):
 81 |                         return np.any(np.isnan(tensor))
 82 |                 else:
 83 |                         return False
 84 |         else:
 85 |                 return False
 86 | 
 87 | def openAI_entropy(logits):
 88 |         # Entropy proposed by OpenAI in their A2C baseline
 89 |         a0 = logits - tf.reduce_max(logits, 2, keepdims=True)
 90 |         ea0 = tf.exp(a0)
 91 |         z0 = tf.reduce_sum(ea0, 2, keepdims=True)
 92 |         p0 = ea0 / z0
 93 |         return tf.reduce_mean(tf.reduce_sum(p0 * (tf.log(z0) - a0), 2))
 94 | 
 95 | 
 96 | def softmax_entropy(p0):
 97 |         # Normal information theory entropy by Shannon
 98 |         return - tf.reduce_sum(p0 * tf.log(p0 + 1e-6), axis=1)
 99 | 
100 | def Dist_mat(A):
101 |         # A is of shape [batch_size x nnodes x 2].
102 |         # return: a distance matrix with shape [batch_size x nnodes x nnodes]
103 |         nnodes = tf.shape(A)[1]
104 |         A1 = tf.tile(tf.expand_dims(A,1),[1,nnodes,1,1])
105 |         A2 = tf.tile(tf.expand_dims(A,2),[1,1,nnodes,1])
106 |         dist = tf.norm(A1-A2,axis=3)
107 |         return dist


--------------------------------------------------------------------------------
/task_specific_params.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | # task specific params
 4 | TaskTSP = namedtuple('TaskTSP', ['task_name', 
 5 | 						'input_dim', 
 6 | 						'n_nodes',
 7 | 						'decode_len'])
 8 | TaskVRP = namedtuple('TaskVRP', ['task_name', 
 9 | 						'input_dim',
10 | 						'n_nodes' ,
11 | 						'n_cust',
12 | 						'decode_len',
13 | 						'capacity',
14 | 						'demand_max'])
15 | 
16 | 
17 | task_lst = {}
18 | 
19 | # TSP10
20 | tsp10 = TaskTSP(task_name = 'tsp',
21 | 			  input_dim=2,
22 | 			  n_nodes = 10,
23 | 			  decode_len=10)
24 | task_lst['tsp10'] = tsp10
25 | 
26 | # TSP20
27 | tsp20 = TaskTSP(task_name = 'tsp',
28 | 			  input_dim=2,
29 | 			  n_nodes = 20,
30 | 			  decode_len=20)
31 | task_lst['tsp20'] = tsp20
32 | 
33 | # TSP50
34 | tsp50 = TaskTSP(task_name = 'tsp',
35 | 			  input_dim=2,
36 | 			  n_nodes = 50,
37 | 			  decode_len=50)
38 | task_lst['tsp50'] = tsp50
39 | 
40 | # TSP100
41 | tsp100 = TaskTSP(task_name = 'tsp',
42 | 			  input_dim=2,
43 | 			  n_nodes = 100,
44 | 			  decode_len=100)
45 | task_lst['tsp100'] = tsp100
46 | 
47 | 
48 | # VRP10
49 | vrp10 = TaskVRP(task_name = 'vrp',
50 | 			  input_dim=3,
51 | 			  n_nodes=11,
52 | 			  n_cust = 10,
53 | 			  decode_len=16,
54 | 			  capacity=20,
55 | 			  demand_max=9)
56 | task_lst['vrp10'] = vrp10
57 | 
58 | # VRP20
59 | vrp20 = TaskVRP(task_name = 'vrp',
60 | 			  input_dim=3,
61 | 			  n_nodes=21,
62 | 			  n_cust = 20,
63 | 			  decode_len=30,
64 | 			  capacity=30,
65 | 			  demand_max=9)
66 | task_lst['vrp20'] = vrp20
67 | 
68 | # VRP50
69 | vrp50 = TaskVRP(task_name = 'vrp',
70 | 			  input_dim=3,
71 | 			  n_nodes=51,
72 | 			  n_cust = 50,
73 | 			  decode_len=70,
74 | 			  capacity=40,
75 | 			  demand_max=9)
76 | task_lst['vrp50'] = vrp50
77 | 
78 | # VRP100
79 | vrp100 = TaskVRP(task_name = 'vrp',
80 | 			  input_dim=3,
81 | 			  n_nodes=101,
82 | 			  n_cust = 100,
83 | 			  decode_len=140,
84 | 			  capacity=50,
85 | 			  demand_max=9)
86 | task_lst['vrp100'] = vrp100


--------------------------------------------------------------------------------