├── .gitattributes ├── .gitignore ├── requirements.txt ├── main.py ├── plot.py ├── README.md ├── network.py ├── test.py ├── heuristic.py ├── simulation.py └── mcts.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Python ### 2 | venv 3 | __pycache__ 4 | *ipynb 5 | 6 | ### macOS ### 7 | .DS_Store 8 | .AppleDouble 9 | .LSOverride 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cycler==0.10.0 2 | decorator==4.4.0 3 | kiwisolver==1.0.1 4 | matplotlib==3.0.3 5 | networkx==2.3 6 | numpy==1.16.2 7 | pandas==0.24.2 8 | pyparsing==2.4.0 9 | python-dateutil==2.8.0 10 | pytz==2019.1 11 | six==1.12.0 12 | tqdm==4.31.1 13 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from network import Network 2 | from plot import plot_path 3 | from test import test 4 | from simulation import simulation 5 | 6 | # test(num_of_node, side_length=100, plot=False) 7 | test(30, plot=True) 8 | 9 | # simulation(num_of_network, trail_per_network, num_of_node, side_length=100, plot=False) 10 | simulation(50, 10, 30, plot=True) 11 | -------------------------------------------------------------------------------- /plot.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | from matplotlib import pyplot as plt 3 | 4 | def plot_path(ax, model_name, cost, time, nodes, edges, positions): 5 | graph = nx.Graph() 6 | graph.add_nodes_from(nodes) 7 | graph.add_edges_from(edges) 8 | 9 | nx.draw(graph, positions, ax=ax, node_size=50, edge_color='0.2') 10 | ax.set_title('{:s} model\ncost={:.2f} time={:.4f}s'.format(model_name, cost, time)) 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mcts-travel-salesman 2 | 3 | Exploring the use of Monte Carlo Tree Search (MCTS) in solving the Traveling Salesman Problem (TSP). 4 | 5 | ## Quick Start 6 | 7 | Install virtual environment 8 | ``` 9 | python3 -m venv venv 10 | ``` 11 | 12 | Activate virtual environment 13 | ``` 14 | source venv/bin/activate 15 | ``` 16 | 17 | Install dependencies 18 | ``` 19 | pip3 install -r requirements.txt 20 | ``` 21 | 22 | Run code 23 | ``` 24 | python3 main.py 25 | ``` 26 | -------------------------------------------------------------------------------- /network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import networkx as nx 3 | 4 | ### Network ### 5 | # 6 | # Create random fully connected newtowk using networkx with pisition information 7 | # 8 | # Input 9 | # |- num_of_node: number of nodes in the network 10 | # └- side_length: length of the 2-d square to place nodes on 11 | 12 | class Network(): 13 | 14 | def __init__(self, num_of_node, side_length): 15 | self.num_of_node = num_of_node 16 | self.side_length = side_length 17 | self.initialize_graph() 18 | 19 | 20 | def initialize_graph(self): 21 | 22 | # generate random node position 23 | nodes = np.random.randint(self.side_length, size=self.num_of_node*2) 24 | nodes = nodes.reshape(self.num_of_node, 2) 25 | self.positions = {key: tuple(node) for key, node in enumerate(nodes)} 26 | 27 | # setup the graph 28 | self.graph = nx.Graph() 29 | self.graph.add_nodes_from([i for i in range(self.num_of_node)]) 30 | 31 | # setup edge and edge weight 32 | for i in range(self.num_of_node-1): 33 | d = nodes[i] - nodes[i+1:, :] 34 | weight = (d[:, 0]**2 + d[:, 1]**2)**0.5 35 | weighted_edges = [(i, i+j, weight[j-1]) for j in range(1, self.num_of_node-i)] 36 | self.graph.add_weighted_edges_from(weighted_edges) 37 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | from network import Network 2 | from heuristic import greedy, two_opt 3 | from mcts import RandomMCTS, GreedyMCTS 4 | from plot import plot_path 5 | from matplotlib import pyplot as plt 6 | import time 7 | 8 | 9 | # ###### Test ###### 10 | # 11 | # To run each of the method once to test code is working. This function can 12 | # also plot the path each algorithm found. 13 | # 14 | # Input 15 | # |- num_of_node: number of node to visit 16 | # |- side_length: the side length of the 2d square the all the nodes rest on 17 | # └- plot: a boolean telling if to plot the path each algorithm came up with 18 | 19 | def test(num_of_node, side_length=100, plot=False): 20 | 21 | network = Network(num_of_node, side_length) 22 | edges_set = [] 23 | cost_set = [] 24 | run_time_set = [] 25 | 26 | ### heuristic 1 - greedy 27 | start = time.time() 28 | edges, cost = greedy(network) 29 | run_time = time.time() - start 30 | 31 | edges_set.append(edges) 32 | cost_set.append(cost) 33 | run_time_set.append(run_time) 34 | print ("greedy heuristic has cost of {:.2f} using {:.4f}s".format(cost, run_time)) 35 | 36 | 37 | ### heuristic 2 - two opt 38 | start = time.time() 39 | edges, cost = two_opt(network) 40 | run_time = time.time() - start 41 | 42 | edges_set.append(edges) 43 | cost_set.append(cost) 44 | run_time_set.append(run_time) 45 | print ("two-opt heuristic has cost of {:.2f} using {:.4f}s".format(cost, run_time)) 46 | 47 | 48 | ### mcts 1 - random 49 | start = time.time() 50 | random_mcts = RandomMCTS(network) 51 | edges, cost = random_mcts.run(50, 10, 1000) # run takes (number to expand, number to simulate, 52 | ## and constant C) as input 53 | run_time = time.time() - start 54 | 55 | edges_set.append(edges) 56 | cost_set.append(cost) 57 | run_time_set.append(run_time) 58 | print ("random mcts has cost of {:.2f} using {:.4f}s".format(cost, run_time)) 59 | 60 | 61 | ### mcts 2 - greedy 62 | start = time.time() 63 | greedy_mcts = GreedyMCTS(network, 0.2) 64 | edges, cost = greedy_mcts.run(50, 10, 100) # run takes (number to expand, number to simulate, 65 | ## and constant C) as input 66 | run_time = time.time() - start 67 | 68 | edges_set.append(edges) 69 | cost_set.append(cost) 70 | run_time_set.append(run_time) 71 | print ("greedy mcts has cost of {:.2f} using {:.4f}s".format(cost, run_time)) 72 | 73 | 74 | if plot == True: 75 | fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(8,8)) 76 | fig.suptitle("Path Found by Different Models (num_of_node={:d}, side_length={:d})".format(num_of_node, side_length)) 77 | model_names = ['greedy heuristic', '2-opt heuristic', 'random mcts', 'greedy mcts'] 78 | for i in range(4): 79 | plot_path(axs[int(i/2),i%2], model_names[i], cost_set[i], run_time_set[i], 80 | network.graph.nodes, edges_set[i], network.positions) 81 | plt.show() 82 | -------------------------------------------------------------------------------- /heuristic.py: -------------------------------------------------------------------------------- 1 | import random 2 | import copy 3 | import numpy as np 4 | 5 | def calculate_cost(edges): 6 | cost = 0 7 | for edge in edges: 8 | cost += edge[2]['weight'] 9 | return cost 10 | 11 | def greedy(network): 12 | 13 | # setup 14 | g = copy.deepcopy(network.graph) 15 | visited_edges = [] 16 | current_node = random.choice(list(g.nodes)) 17 | 18 | # path finding 19 | while len(g.nodes) > 0: 20 | edges = g.edges(current_node, data=True) 21 | edges = sorted(edges, key = lambda x: x[2]['weight'], reverse=False) 22 | for edge in edges: 23 | if edge not in visited_edges: 24 | visited_edges.append(edge) 25 | next_node = edge[1] 26 | break 27 | g.remove_node(current_node) 28 | current_node = next_node 29 | 30 | # return to the origin node 31 | visited_edges.append(tuple([visited_edges[-1][1], visited_edges[0][0], 32 | network.graph.edges[visited_edges[-1][1], visited_edges[0][1]]])) 33 | 34 | 35 | # calculate cost 36 | cost = calculate_cost(visited_edges) 37 | 38 | return visited_edges, cost 39 | 40 | 41 | def two_opt(network): 42 | 43 | # setup 44 | g = network.graph 45 | path_edges = [] 46 | unvisited_nodes = list(g.nodes) 47 | random.shuffle(unvisited_nodes) 48 | current_node = unvisited_nodes.pop() 49 | 50 | # generate a random path 51 | while len(unvisited_nodes) > 0: 52 | next_node = unvisited_nodes.pop() 53 | path_edges.append(tuple([current_node, next_node, 54 | g.edges[current_node, next_node]])) 55 | current_node = next_node 56 | 57 | path_edges.append(tuple([path_edges[-1][1], path_edges[0][0], 58 | g.edges[path_edges[-1][1], path_edges[0][1]]])) 59 | path_edges = np.array(path_edges) 60 | 61 | 62 | # two opt 63 | counter = 0 64 | while counter < len(path_edges)*10: # when path is stable, terminate 65 | counter += 1 66 | np.roll(path_edges, 1) # roll the path so the first and last edge can be updated 67 | selected_edge_indexs = random.sample(range(len(path_edges)), 2) 68 | selected_edge_indexs = sorted(selected_edge_indexs) 69 | selected_edges = [path_edges[i] for i in selected_edge_indexs] 70 | current_cost = selected_edges[0][2]['weight'] + selected_edges[1][2]['weight'] 71 | new_edges = [tuple([selected_edges[0][0], selected_edges[1][0], 72 | g.edges[selected_edges[0][0], selected_edges[1][0]]]), 73 | tuple([selected_edges[0][1], selected_edges[1][1], 74 | g.edges[selected_edges[0][1], selected_edges[1][1]]])] 75 | new_cost = new_edges[0][2]['weight'] + new_edges[1][2]['weight'] 76 | if new_cost < current_cost: # perform the switch 77 | # switch the two selected edges 78 | path_edges[selected_edge_indexs[0]] = new_edges[0] 79 | path_edges[selected_edge_indexs[1]] = new_edges[1] 80 | 81 | # reoreder the edges in between 82 | if abs(selected_edge_indexs[0] - selected_edge_indexs[1]) > 1: 83 | path_edges[selected_edge_indexs[0]+1:selected_edge_indexs[1]] = path_edges[selected_edge_indexs[1]-1:selected_edge_indexs[0]:-1] 84 | for i in range(selected_edge_indexs[0] + 1, selected_edge_indexs[1]): 85 | path_edges[i]= tuple([path_edges[i][1], path_edges[i][0], path_edges[i][2]]) 86 | counter = 0 # reset timeout counter 87 | 88 | # calculate cost 89 | cost = calculate_cost(path_edges) 90 | 91 | return path_edges, cost 92 | -------------------------------------------------------------------------------- /simulation.py: -------------------------------------------------------------------------------- 1 | from network import Network 2 | from heuristic import greedy, two_opt 3 | from mcts import RandomMCTS, GreedyMCTS 4 | from plot import plot_path 5 | from matplotlib import pyplot as plt 6 | import time 7 | import numpy as np 8 | 9 | from tqdm import tqdm 10 | from multiprocessing import Pool 11 | from itertools import product 12 | 13 | 14 | # trail function defined separately for multiprocessing 15 | def run_trail(network): 16 | 17 | results = [] 18 | 19 | ### heuristic 1 - greedy 20 | start = time.time() 21 | edges, cost = greedy(network) 22 | run_time = time.time() - start 23 | 24 | results.append([cost, run_time]) 25 | 26 | ### heuristic 2 - two opt 27 | start = time.time() 28 | edges, cost = two_opt(network) 29 | run_time = time.time() - start 30 | 31 | results.append([cost, run_time]) 32 | 33 | ### mcts 1 - random 34 | start = time.time() 35 | random_mcts = RandomMCTS(network) 36 | edges, cost = random_mcts.run(50, 20, 100) 37 | run_time = time.time() - start 38 | 39 | results.append([cost, run_time]) 40 | 41 | ### mcts 2 - greedy 42 | start = time.time() 43 | greedy_mcts = GreedyMCTS(network, 0.2) 44 | edges, cost = greedy_mcts.run(50, 20, 100) 45 | run_time = time.time() - start 46 | 47 | results.append([cost, run_time]) 48 | 49 | return results 50 | 51 | 52 | 53 | # ###### Simulation ###### 54 | # 55 | # Run Monte Carlo simulation to get performance report. 56 | # 57 | # Input 58 | # |- num_of_network: number of network to test on 59 | # |- trail_per_network: number of trail to run each method on a network 60 | # |- num_of_node: number of node to visit 61 | # |- side_length: the side length of the 2d square the all the nodes rest on 62 | # └- plot: a boolean telling if to plot a histogram 63 | 64 | def simulation(num_of_network, trail_per_network, num_of_node, side_length=100, plot=False): 65 | 66 | results = np.array([]) 67 | 68 | for i in tqdm(range(num_of_network)): 69 | 70 | # run trails at the same time using multiprocessing 71 | network = Network(num_of_node, side_length) 72 | networks = [network for _ in range(trail_per_network)] 73 | p = Pool(10) 74 | result = p.map(run_trail, networks) 75 | p.close() 76 | result = np.array(result) 77 | 78 | # add to result set 79 | results = np.vstack((results, result)) if results.size else result 80 | 81 | 82 | h_1 = results[:, 0, :] 83 | h_2 = results[:, 1, :] 84 | t_1 = results[:, 2, :] 85 | t_2 = results[:, 3, :] 86 | 87 | print ("greedy has average cost of {:.2f}".format(np.mean(h_1))) 88 | print ("2 opt has average cost of {:.2f}".format(np.mean(h_2))) 89 | print ("random mcts has average cost of {:.2f}".format(np.mean(t_1))) 90 | print ("greedy mcts has average cost of {:.2f}".format(np.mean(t_2))) 91 | 92 | if plot == True: 93 | 94 | ### time vs cost plot 95 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8,8)) 96 | ax.plot(h_1[:,1], h_1[:,0], 'r+', label='greedy heuristic') 97 | ax.plot(h_2[:,1], h_2[:,0], 'm+', label='two opt heuristic') 98 | ax.plot(t_1[:,1], t_1[:,0], 'b+', label='random mcts') 99 | ax.plot(t_2[:,1], t_2[:,0], 'c+', label='greedy mcts') 100 | ax.set_xscale('log') 101 | ax.set_xlabel('time (s)') 102 | ax.set_ylabel('cost') 103 | ax.set_title("Time-Cost Performance (num_of_node={:d}, side_length={:d})".format(num_of_node, side_length)) 104 | 105 | # put legend below current axis 106 | box = ax.get_position() 107 | ax.set_position([box.x0, box.y0 + box.height * 0.1, 108 | box.width, box.height * 0.9]) 109 | ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), 110 | fancybox=True, shadow=True, ncol=5) 111 | 112 | plt.show() 113 | 114 | 115 | ### cost histogram 116 | bin_start = int(np.amin(results[:, :, 0])/20) * 20 117 | bin_end = int(np.amax(results[:, :, 0])/20) * 20 + 21 118 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8,8)) 119 | ax.hist(h_1[:,0], bins=range(bin_start, bin_end, 20), density=True, alpha=0.6, color= 'r', label='greedy heuristic') 120 | ax.hist(h_2[:,0], bins=range(bin_start, bin_end, 20), density=True, alpha=0.6, color= 'm', label='two opt heuristic') 121 | ax.hist(t_1[:,0], bins=range(bin_start, bin_end, 20), density=True, alpha=0.6, color= 'b', label='random mcts') 122 | ax.hist(t_2[:,0], bins=range(bin_start, bin_end, 20), density=True, alpha=0.6, color= 'c', label='greedy mcts') 123 | ax.set_xlabel('cost') 124 | ax.set_ylabel('frequency') 125 | ax.set_title("Cost Histogram (num_of_node={:d}, side_length={:d})".format(num_of_node, side_length)) 126 | 127 | # put legend below current axis 128 | box = ax.get_position() 129 | ax.set_position([box.x0, box.y0 + box.height * 0.1, 130 | box.width, box.height * 0.9]) 131 | ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), 132 | fancybox=True, shadow=True, ncol=5) 133 | 134 | plt.show() 135 | -------------------------------------------------------------------------------- /mcts.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import random 3 | import numpy as np 4 | 5 | ### Node Object ### 6 | class Node(): 7 | def __init__(self, parent, node, path, unvisited_nodes, cost): 8 | self.parent = parent 9 | self.node = node 10 | self.path = path 11 | self.unvisited_nodes = unvisited_nodes 12 | self.cost = cost 13 | self.num_of_visit = 1 14 | self.estimate = None 15 | self.score = None 16 | self.policy = None 17 | self.expandables = copy.deepcopy(unvisited_nodes) 18 | random.shuffle(self.expandables) 19 | self.expanded = {} 20 | 21 | def calculate_score(self, C=1): 22 | self.score = self.estimate + C * (np.log(self.parent.num_of_visit) / self.num_of_visit)**0.5 23 | 24 | 25 | 26 | class MCTS(): 27 | 28 | def __init__(self, network): 29 | self.num_of_node = network.num_of_node 30 | self.graph = network.graph 31 | self.root = Node(None, 'root', [], list(self.graph.nodes), 0) 32 | 33 | 34 | def select(self, node): 35 | if node.policy == None: 36 | return node 37 | else: 38 | return self.select(node.policy) 39 | 40 | 41 | def expand(self, node): 42 | new_node = node.expandables.pop() 43 | new_path = copy.deepcopy(node.path) 44 | new_path.append(new_node) 45 | new_unvisited_nodes = copy.deepcopy(node.unvisited_nodes) 46 | new_unvisited_nodes.remove(new_node) 47 | new_cost = copy.deepcopy(node.cost) 48 | if node.node != 'root': 49 | new_cost += self.graph.edges[node.node, new_node]['weight'] 50 | new_node_object = Node(node, new_node, new_path, new_unvisited_nodes, new_cost) 51 | node.expanded[new_node] = new_node_object 52 | return new_node_object 53 | 54 | 55 | def backpropagate(self, node): 56 | 57 | # decide policy for this node 58 | scores = [] 59 | for key, n in node.expanded.items(): 60 | if node.node != 'root': 61 | scores.append([key, n.score + self.graph.edges[node.node, n.node]['weight']]) 62 | else: 63 | scores.append([key, n.score]) 64 | scores = np.array(scores) 65 | node.score = sum(scores[:, 1]) / len(scores) 66 | node.policy = node.expanded[scores[np.argmin(scores[:, 1])][0]] 67 | 68 | if node.node != 'root': 69 | 70 | # evaluate how good this node is as a child 71 | estimates = [] 72 | for key, n in node.expanded.items(): 73 | estimates.append([key, n.estimate + self.graph.edges[node.node, n.node]['weight']]) 74 | estimates = np.array(estimates) 75 | node.estimate = sum(estimates[:, 1]) / len(estimates) 76 | node.calculate_score() 77 | 78 | # keep going until root node 79 | self.backpropagate(node.parent) 80 | 81 | 82 | def calculate_path_edges(self, path): 83 | path_edges = [] 84 | cost = 0 85 | current_node = path.pop() 86 | while len(path) > 0: 87 | next_node = path.pop() 88 | path_edges.append(tuple([current_node, next_node, 89 | self.graph.edges[current_node, next_node]])) 90 | cost += path_edges[-1][2]['weight'] 91 | current_node = next_node 92 | path_edges.append(tuple([path_edges[-1][1], path_edges[0][0], 93 | self.graph.edges[path_edges[-1][1], path_edges[0][1]]])) 94 | cost += path_edges[-1][2]['weight'] 95 | return path_edges, cost 96 | 97 | 98 | def run(self, num_of_expand, num_of_simulate, C): 99 | while True: 100 | current_node = self.select(self.root) 101 | 102 | # reach the end, break condition 103 | if len(current_node.path) == self.num_of_node: 104 | break 105 | 106 | # expand and simulate 107 | for i in range(min(num_of_expand, len(current_node.expandables))): 108 | new_node = self.expand(current_node) 109 | costs = [] 110 | for j in range(num_of_simulate): 111 | costs.append(self.simulate(new_node)) 112 | new_node.estimate = sum(costs) / num_of_simulate 113 | new_node.calculate_score() 114 | 115 | 116 | # back up the estimate, calculate score, and update policy 117 | self.backpropagate(current_node) 118 | 119 | return self.calculate_path_edges(current_node.path) 120 | 121 | 122 | 123 | class RandomMCTS(MCTS): 124 | 125 | def __init__(self, network): 126 | MCTS.__init__(self, network) 127 | 128 | 129 | def simulate(self, node): 130 | 131 | # setup 132 | unvisited_nodes = copy.deepcopy(node.unvisited_nodes) 133 | random.shuffle(unvisited_nodes) 134 | current_node = node.node 135 | cost = 0 136 | 137 | # path finding 138 | while len(unvisited_nodes) > 0: 139 | next_node = unvisited_nodes.pop() 140 | cost += self.graph.edges[current_node, next_node]['weight'] 141 | current_node = next_node 142 | 143 | cost += self.graph.edges[current_node, node.path[0]]['weight'] 144 | 145 | return cost 146 | 147 | 148 | 149 | class GreedyMCTS(MCTS): 150 | 151 | def __init__(self, network, prob_greedy): 152 | MCTS.__init__(self, network) 153 | self.prob_greedy = prob_greedy 154 | 155 | 156 | def simulate(self, node): 157 | 158 | # setup 159 | unvisited_nodes = copy.deepcopy(node.unvisited_nodes) 160 | random.shuffle(unvisited_nodes) 161 | current_node = node.node 162 | cost = 0 163 | 164 | # greedy path finding 165 | while len(unvisited_nodes) > 0: 166 | if random.random() < self.prob_greedy: 167 | edges = [] 168 | for n in unvisited_nodes: 169 | edges.append(tuple([current_node, n, self.graph.edges[current_node, n]])) 170 | edges = sorted(edges, key = lambda x: x[2]['weight'], reverse=False) 171 | unvisited_nodes.remove(edges[0][1]) 172 | cost += edges[0][2]['weight'] 173 | current_node = edges[0][1] 174 | else: 175 | next_node = unvisited_nodes.pop() 176 | cost += self.graph.edges[current_node, next_node]['weight'] 177 | current_node = next_node 178 | 179 | cost += self.graph.edges[current_node, node.path[0]]['weight'] 180 | 181 | return cost 182 | --------------------------------------------------------------------------------