├── markov.py ├── readme.md └── trajectory100000.csv /markov.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import operator 3 | 4 | 5 | class Markov: 6 | markov_matrix = {} 7 | his_count = 1 8 | pre_count = 3 9 | test_count = 3 10 | tol = 3 11 | 12 | def __init__(self, his_count=1, pre_count=3, test_count=3, tol=3): 13 | """ 14 | :param his_count: history point(s) count. 15 | :param pre_count: prediction point(s) count. 16 | :param test_count: point(s) count remain for test. 17 | :param tol: int, tolerant point(s) count of prediction. 18 | """ 19 | self.his_count = his_count 20 | self.pre_count = pre_count 21 | self.test_count = test_count 22 | self.tol = tol 23 | 24 | def get_predict(self, his_seq: tuple): 25 | """ 26 | Get prediction from a history sequence. 27 | 28 | :param his_seq: tuple, history sequence. 29 | :return: list, containing count of tolerant point(s) as a prediction set. 30 | """ 31 | result = [] 32 | predict_list = self.markov_matrix[his_seq] 33 | for i in range(self.pre_count): 34 | pre_i_dict = predict_list[i] 35 | sorted_i_list = sorted(pre_i_dict.items(), key=operator.itemgetter(1)) 36 | sorted_i_list.reverse() 37 | result_i = [] 38 | for j in range(self.tol): 39 | try: 40 | result_i.append(sorted_i_list[j][0]) 41 | except IndexError: 42 | break 43 | result.append(result_i) 44 | return result 45 | 46 | def read_csv(self, file_dir: str): 47 | """ 48 | Read sequences from one csv file. 49 | 50 | :param file_dir: str, file directory of the csv file. 51 | """ 52 | with open(file_dir, 'r') as file: 53 | reader = csv.reader(file, delimiter=',') 54 | for row in reader: 55 | self.add_row(row) 56 | 57 | def add_row(self, row: list): 58 | """ 59 | Add one rows to markov matrix. 60 | 61 | :param row: list, containing one row of training sequence. 62 | """ 63 | input_row = row[:-self.test_count] 64 | for i in range(len(input_row) - (self.his_count + self.pre_count) + 1): 65 | his_seq = tuple(input_row[i:i + self.his_count]) 66 | pre_seq = tuple(input_row[i + self.his_count:i + self.his_count + self.pre_count]) 67 | self.add_seq(his_seq, pre_seq) 68 | 69 | def add_seq(self, his_seq: tuple, pre_seq: tuple): 70 | """ 71 | Add a new sequence to markov matrix. 72 | 73 | :param his_seq: tuple, containing one history sequence. 74 | :param pre_seq: tuple, containing one prediction sequence. 75 | """ 76 | try: 77 | history_row = self.markov_matrix[his_seq] 78 | 79 | for i in range(self.pre_count): 80 | try: 81 | predict_i_count = history_row[i][pre_seq[i]] 82 | self.markov_matrix[his_seq][i][pre_seq[i]] = predict_i_count + 1 83 | except KeyError: 84 | self.markov_matrix[his_seq][i][pre_seq[i]] = 1 85 | except KeyError: 86 | self.markov_matrix[his_seq] = [] 87 | for point in pre_seq: 88 | self.markov_matrix[his_seq].append({point: 1}) 89 | 90 | def test_seq(self, his_seq: tuple, real_seq: tuple): 91 | """ 92 | Use current markov matrix to run a test and return accuracy. 93 | 94 | :param his_seq: tuple, history sequence used to predict next N sequence. 95 | :param real_seq: tuple, correct sequence used to compare to prediction. 96 | :return: list of int, correct count of this sequence, with each point in time series separated. 97 | """ 98 | result = [0] * self.pre_count 99 | try: 100 | predict_sequence = self.get_predict(his_seq) 101 | for i in range(self.pre_count): 102 | if real_seq[i] in predict_sequence[i]: 103 | result[i] += 1 104 | except KeyError: 105 | pass 106 | return result 107 | 108 | def test_csv(self, file_dir: str): 109 | """ 110 | Use all test data from a csv file to test markov model. 111 | 112 | :param file_dir: str, file directory of the csv file. 113 | :return: float, accuracy calculated using whole file's data. 114 | """ 115 | aggregate = [0] * self.pre_count 116 | row_count = 0 117 | with open(file_dir, 'r') as file: 118 | reader = csv.reader(file, delimiter=',') 119 | for row in reader: 120 | row_count += 1 121 | aggregate = [sum(x) for x in 122 | zip(self.test_seq(tuple(row[-(self.his_count + self.test_count):-self.test_count]), 123 | tuple(row[-self.test_count:])), aggregate)] 124 | return [x/row_count for x in aggregate] 125 | 126 | def write_pre(self, test_dir: str, result_dir: str): 127 | """ 128 | Predict sequences using test data and write predict result into a csv file. 129 | 130 | :param test_dir: str, test data's file directory 131 | :param result_dir: str, predict result's file directory 132 | """ 133 | test_file = open(test_dir, 'r') 134 | result_file = open(result_dir, 'w', newline='') 135 | reader = csv.reader(test_file, delimiter=',') 136 | writer = csv.writer(result_file, delimiter=',') 137 | for row in reader: 138 | try: 139 | predict_sequence = self.get_predict(tuple(row[-(self.his_count + self.test_count):-self.test_count])) 140 | max_seq = [] 141 | for point_list in predict_sequence: 142 | if len(point_list) > 0: 143 | max_seq.append(point_list[0]) 144 | else: 145 | max_seq.append(None) 146 | writer.writerow(max_seq) 147 | except KeyError: 148 | writer.writerow([None] * self.pre_count) 149 | test_file.close() 150 | result_file.close() 151 | 152 | 153 | if __name__ == '__main__': 154 | markov = Markov(his_count=1) 155 | markov.read_csv('trajectory100000.csv') 156 | print(markov.test_csv('trajectory100000.csv')) 157 | markov.write_pre('trajectory100000.csv', 'result.csv') 158 | print() 159 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # 多候选马尔可夫轨迹预测 2 | 3 | ## 功能 4 | 5 | - 导入csv数据并计算马尔可夫矩阵 6 | - 根据给定历史序列给出预测 7 | - 每个点的预测都能给出多个候选值 8 | - 历史轨迹长度、预测轨迹长度和候选值数量均可调整 9 | 10 | ## 使用 11 | 12 | ### 初始化 13 | 14 | markov = Markov(his_count=1, pre_count=3, test_count=3, tol=3) 15 | 16 | - `his_count`为历史轨迹长度 17 | - `pre_count`为预测轨迹长度 18 | - `test_count`为csv文件中每一行数据需要留作测试集的长度(取最后`test_count`数量的点作测试集) 19 | - `tol`为候选值数量 20 | 21 | ### 读入csv文件 22 | 23 | markov.read_csv('trajectory100000.csv') 24 | 25 | ### 使用csv文件数据进行测试 26 | 27 | acc = markov.test_csv('trajectory100000.csv') 28 | 29 | ### 导出预测序列 30 | 31 | markov.write_pre('trajectory100000.csv', 'result.csv') 32 | --------------------------------------------------------------------------------