├── data ├── .keep └── traj.csv ├── EditDistance.py ├── ShowTrajData.py ├── readme.md ├── HausdorffDistance.py ├── FréchetDistanceLoop.py ├── FréchetDistanceRecursive.py ├── DynamicTimeWarpingLoop.py ├── DynamicTimeWarpingRecursive.py ├── LongestCommonSubsequenceLoopOptimize.py ├── LongestCommonSubsequenceRecursive.py └── LongestCommonSubsequenceLoop.py /data/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /EditDistance.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 编辑距离(未完成) 3 | import Levenshtein 4 | print(Levenshtein.distance("abac","cdadd")) -------------------------------------------------------------------------------- /ShowTrajData.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | data = np.loadtxt("./data/traj.csv",delimiter=",") 5 | # 加载第一条轨迹 6 | traj1 = data[:8] 7 | # 加载第二条轨迹 8 | traj2 = data[8:15] 9 | # 加载第三条轨迹 10 | traj3 = data[15:] 11 | fig = plt.figure() 12 | # 绘制第一条轨迹 13 | plt.plot(traj1[:,0],traj1[:,1],label='traj1') 14 | # 绘制第二条轨迹 15 | plt.plot(traj2[:,0],traj2[:,1],label='traj2') 16 | # 绘制第三条轨迹 17 | plt.plot(traj3[:,0],traj3[:,1],label='traj3') 18 | # 显示图例 19 | plt.legend(loc='upper right') 20 | plt.show() -------------------------------------------------------------------------------- /data/traj.csv: -------------------------------------------------------------------------------- 1 | 13021872.73,4390386.818 2 | 13021896.42,4390399.022 3 | 13021918.5,4390406.918 4 | 13021937.34,4390419.48 5 | 13021967.67,4390437.965 6 | 13021956.9,4390409.969 7 | 13021982.38,4390405.841 8 | 13022009.66,4390411.764 9 | 13021874.17,4390378.204 10 | 13021898.75,4390393.099 11 | 13021934.65,4390394.894 12 | 13021938.24,4390416.43 13 | 13021968.21,4390432.94 14 | 13022005.36,4390410.328 15 | 13021984.54,4390401.714 16 | 13021894.81,4390404.585 17 | 13021917.42,4390408.533 18 | 13021941.11,4390422.531 19 | 13021963.72,4390436.53 20 | 13021959.95,4390409.43 21 | 13022008.23,4390412.661 -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Sequence-Similarity 2 | ## 1 介绍 3 | ### 1.1在本实例中,如果想将代码直接运行需注意以下几点: 4 | * Python版本3.X(本人使用的是Python 3.6) 5 | * numpy版本:1.16.0 6 | * scipy版本:0.19.1 7 | ### 1.2 项目说明 8 | 9 | * **data**:存放测试数据的文件夹 10 | * **traj**:存放三条轨迹数据(1-8行是第一条轨迹,9-15行是第二条轨迹,16-21是第三条轨迹) 11 | * **ShowTrajData**:使用matplotlib直观的显示三条轨迹的形状 12 | * **DynamicTimeWarpingRecursive**:动态时间归整(Dynamic Time Warping,DTW)算法的递归实现 13 | * **DynamicTimeWarpingLoop**:动态时间归整(Dynamic Time Warping,DTW)算法的循环实现 14 | * **FréchetDistanceRecursive**:弗雷歇离散距离(Fréchet Distance)算法的递归实现 15 | * **FréchetDistanceLoop**:弗雷歇离散距离(Fréchet Distance)算法的循环实现 16 | * **HausdorffDistance**:豪斯多夫距离(Hausdorff Distance)算法的实现 17 | * **LongestCommonSubsequenceRecursive**:最长公共子序列(Longest Common Subsequence)算法的递归实现 18 | * **LongestCommonSubsequenceLoop**:最长公共子序列(Longest Common Subsequence)算法的循环实现 19 | * **LongestCommonSubsequenceLoopOptimize**:最长公共子序列(Longest Common Subsequence)算法的循环实现,代码进行了优化,速度为**LongestCommonSubsequenceLoop**的**三分之一** 20 | * **EditDistance**:编辑距离(EditDistance)(未完成) 21 | ## 2 序列相似度算法原理 -------------------------------------------------------------------------------- /HausdorffDistance.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | from scipy.spatial.distance import cdist 4 | import time 5 | # 计算单向的Hausdorff距离 6 | # max(a∈ptSetA)min(b∈ptSetB)‖a-b‖ 7 | def OneWayHausdorffDistance(ptSetA, ptSetB): 8 | # 计算任意向量之间的距离,假设ptSetA有n个向量,ptSetB有m个向量 9 | # 得到矩阵C(n行m列)Cij代表A中都第i个向量到B中第j向量都距离 10 | dist = cdist(ptSetA, ptSetB, metric='euclidean') 11 | # np.min(dist,axis=1):计算每一行的的最小值 12 | # 即:固定点集A的值,求点集A中到集合B的最小值 13 | return np.max(np.min(dist, axis=1)) 14 | # 计算双向的Hausdorff距离=====>H(ptSetA,ptSetB)=max(h(ptSetA,ptSetB),h(ptSetB,ptSetA)) 15 | # ptSetA:输入的第一个点集 16 | # ptSetB:输入的第二个点集 17 | # Hausdorff距离度量了两个点集间的最大不匹配程度 18 | def HausdorffDistance(ptSetA, ptSetB): 19 | # 计算双向的Hausdorff距离距离 20 | res = np.array([ 21 | OneWayHausdorffDistance(ptSetA, ptSetB), 22 | OneWayHausdorffDistance(ptSetB, ptSetA) 23 | ]) 24 | return np.max(res) 25 | data = np.loadtxt("./data/traj.csv",delimiter=",") 26 | # 加载三条轨迹 27 | traj1, traj2, traj3 = data[:8], data[8:15], data[15:] 28 | starttime = time.clock() 29 | print("轨迹1与轨迹2的Hausdorff距离为:%s"%(HausdorffDistance(traj1,traj2))) 30 | print("轨迹2与轨迹3的Hausdorff距离为:%s"%(HausdorffDistance(traj2,traj3))) 31 | print("轨迹1与轨迹3的Hausdorff距离为:%s"%(HausdorffDistance(traj1,traj3))) 32 | endtime = time.clock() 33 | print("运行时间:%s秒"%(endtime - starttime,)) -------------------------------------------------------------------------------- /FréchetDistanceLoop.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | from scipy.spatial.distance import cdist 4 | import time 5 | # 使用循环的方式求解两个序列点对的相似度(距离) 6 | # 即costMatrix右下角的最后一个值为Frechet距离 7 | def FrechetDistance(ptSetA, ptSetB): 8 | # 获得点集ptSetA中点的个数n 9 | n = ptSetA.shape[0] 10 | # 获得点集ptSetB中点的个数m 11 | m = ptSetB.shape[0] 12 | # 计算任意两个点的距离矩阵 13 | # disMat[i][j]对应ptSetA的第i个点到ptSetB中第j点的距离 14 | disMat = cdist(ptSetA, ptSetB, metric='euclidean') 15 | # 初始化消耗矩阵 16 | costMatrix = np.full((n, m), -1.0) 17 | # 逐行给消耗矩阵赋值 18 | # 首先给第一行赋值 19 | # 然后依次给2,3,4,...,m行赋值 20 | for i in range(n): 21 | for j in range(m): 22 | if i == 0 and j == 0: 23 | # 给左上角赋值 24 | costMatrix[0][0] = disMat[0][0] 25 | if i == 0 and j > 0: 26 | # 给第一行赋值 27 | costMatrix[0][j] = max(costMatrix[0][j-1], disMat[0][j]) 28 | if i > 0 and j == 0: 29 | # 给第一列赋值 30 | costMatrix[i][0] = max(costMatrix[i-1][0], disMat[i][0]) 31 | if i > 0 and j > 0: 32 | # 给其他赋值 33 | costMatrix[i][j] = max(min(costMatrix[i-1][j], 34 | costMatrix[i-1][j-1], 35 | costMatrix[i][j-1]), disMat[i][j]) 36 | return costMatrix[n-1][m-1] 37 | data = np.loadtxt("./data/traj.csv",delimiter=",") 38 | # 加载三条轨迹 39 | traj1, traj2, traj3 = data[:8], data[8:15], data[15:] 40 | starttime = time.clock() 41 | print("轨迹1与轨迹2的Frechet距离为:%s"%(FrechetDistance(traj2,traj1))) 42 | print("轨迹2与轨迹3的Frechet距离为:%s"%(FrechetDistance(traj2,traj3))) 43 | print("轨迹1与轨迹3的Frechet距离为:%s"%(FrechetDistance(traj1,traj3))) 44 | endtime = time.clock() 45 | print("运行时间:%s秒"%(endtime - starttime,)) -------------------------------------------------------------------------------- /FréchetDistanceRecursive.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | from scipy.spatial.distance import cdist 4 | import time 5 | # 使用递归的方式求解costMatrix的i,j的数值 6 | # 即costMatrix右下角的最后一个值为Frechet距离 7 | def _frechet(disMat,costMatrix,i,j): 8 | # 如果cstMatrix[i][j]不等于-1,直接返回,不需要计算了(借助动态规划的思想) 9 | if costMatrix[i][j] > -1: 10 | return costMatrix[i][j] 11 | # 当i,j都等于0的时候,计算消耗矩阵的值 12 | if i == 0 and j == 0: 13 | costMatrix[i][j] = disMat[0][0] 14 | # 计算第一列的值 15 | if i > 0 and j == 0: 16 | costMatrix[i][j] = max(_frechet(disMat,costMatrix, i - 1, 0), disMat[i][0]) 17 | # 计算第一行的值 18 | if i == 0 and j > 0: 19 | costMatrix[i][j] = max(_frechet(disMat,costMatrix, 0, j - 1), disMat[0][j]) 20 | # 计算其他值 21 | if i > 0 and j > 0: 22 | costMatrix[i][j] = max(min(_frechet(disMat,costMatrix, i - 1, j), 23 | _frechet(disMat,costMatrix, i - 1, j - 1), 24 | _frechet(disMat,costMatrix, i, j - 1)), 25 | disMat[i][j]) 26 | return costMatrix[i][j] 27 | def FrechetDistance(ptSetA, ptSetB): 28 | # 获得点集ptSetA中点的个数n 29 | n = ptSetA.shape[0] 30 | # 获得点集ptSetB中点的个数m 31 | m = ptSetB.shape[0] 32 | # 计算任意两个点的距离矩阵 33 | disMat = cdist(ptSetA, ptSetB, metric='euclidean') 34 | # 初始化消耗矩阵 35 | costMatrix = np.full((n,m),-1.0) 36 | # 递归求解Frechet距离 37 | return _frechet(disMat,costMatrix,n-1,m-1) 38 | data = np.loadtxt("./data/traj.csv",delimiter=",") 39 | # 加载三条轨迹 40 | traj1, traj2, traj3 = data[:8], data[8:15], data[15:] 41 | starttime = time.clock() 42 | print("轨迹1与轨迹2的Frechet距离为:%s"%(FrechetDistance(traj1,traj2))) 43 | print("轨迹2与轨迹3的Frechet距离为:%s"%(FrechetDistance(traj2,traj3))) 44 | print("轨迹1与轨迹3的Frechet距离为:%s"%(FrechetDistance(traj1,traj3))) 45 | endtime = time.clock() 46 | print("运行时间:%s秒"%(endtime - starttime,)) -------------------------------------------------------------------------------- /DynamicTimeWarpingLoop.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | from scipy.spatial.distance import cdist 4 | import time 5 | # 提取路径对齐 6 | def extractPath(costMatrix,i,j): 7 | # 初始化路径 8 | path = [] 9 | # 从右下角循环,寻找对齐点 10 | while i != 0 and j != 0: 11 | # 首先加入右下角点 12 | path.insert(0, (i, j)) 13 | # 循环的元素,三个坐标 14 | idxArr = [(i-1,j),(i-1,j-1),(i,j-1)] 15 | # 寻找到最小的那个值 16 | minArg = np.argmin(np.array([ 17 | costMatrix[i - 1][j], 18 | costMatrix[i - 1][j - 1], 19 | costMatrix[i][j - 1]])) 20 | # 对应的消耗矩阵的元素 21 | minIndex = idxArr[minArg] 22 | # 重新迭代 23 | i = minIndex[0] 24 | j = minIndex[1] 25 | # 寻找靠边的点 26 | while i != 0: 27 | path.insert(0, (i, 0)) 28 | i = i-1 29 | # 寻找靠边的点 30 | while j != 0: 31 | path.insert(0, (0, j)) 32 | j = j - 1 33 | # 加入0,0 34 | path.insert(0, (0, 0)) 35 | return path 36 | # 使用循环的方式求解costMatrix的i,j的数值 37 | # 即costMatrix右下角的最后一个值为dtw距离 38 | # 返回值:dtw值+路径 39 | def DynamicTimeWarping(ptSetA, ptSetB): 40 | # 获得点集ptSetA中点的个数n 41 | n = ptSetA.shape[0] 42 | # 获得点集ptSetB中点的个数m 43 | m = ptSetB.shape[0] 44 | # 计算任意两个点的距离矩阵 45 | # disMat[i][j]对应ptSetA的第i个点到ptSetB中第j点的距离 46 | disMat = cdist(ptSetA, ptSetB, metric='euclidean') 47 | # 初始化消耗矩阵 48 | costMatrix = np.full((n, m), -1.0) 49 | # 逐行给消耗矩阵赋值 50 | # 首先给第一行赋值 51 | # 然后依次给2,3,4,...,m行赋值 52 | for i in range(n): 53 | for j in range(m): 54 | if i == 0 and j == 0: 55 | # 给左上角赋值 56 | costMatrix[0][0] = disMat[0][0] 57 | if i == 0 and j > 0: 58 | # 给第一行赋值 59 | costMatrix[0][j] = costMatrix[0][j-1]+disMat[0][j] 60 | if i > 0 and j == 0: 61 | # 给第一列赋值 62 | costMatrix[i][0] = costMatrix[i-1][0]+disMat[i][0] 63 | if i > 0 and j > 0: 64 | # 给其他赋值 65 | costMatrix[i][j] = min(costMatrix[i-1][j], 66 | costMatrix[i-1][j-1], 67 | costMatrix[i][j-1]) + disMat[i][j] 68 | # 提取路径 69 | path = extractPath(costMatrix, n - 1, m - 1) 70 | return costMatrix[n-1][m-1],path 71 | data = np.loadtxt("./data/traj.csv",delimiter=",") 72 | # 加载三条轨迹 73 | traj1, traj2, traj3 = data[:8], data[8:15], data[15:] 74 | starttime = time.clock() 75 | print("轨迹1与轨迹2的DTW距离为:%s"%(DynamicTimeWarping(traj1,traj2)[0])) 76 | print("轨迹1与轨迹2的path路径为:%s"%(DynamicTimeWarping(traj1,traj2)[1])) 77 | print("轨迹2与轨迹3的DTW距离为:%s"%(DynamicTimeWarping(traj2,traj3)[0])) 78 | print("轨迹2与轨迹3的path路径为:%s"%(DynamicTimeWarping(traj2,traj3)[1])) 79 | print("轨迹1与轨迹3的DTW距离为:%s"%(DynamicTimeWarping(traj1,traj3)[0])) 80 | print("轨迹1与轨迹3的path路径为:%s"%(DynamicTimeWarping(traj1,traj3)[1])) 81 | endtime = time.clock() 82 | print("运行时间:%s秒"%(endtime - starttime,)) -------------------------------------------------------------------------------- /DynamicTimeWarpingRecursive.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | from scipy.spatial.distance import cdist 4 | import time 5 | # 提取路径对齐 6 | def extractPath(costMatrix,i,j): 7 | # 初始化路径 8 | path = [] 9 | # 从右下角循环,寻找对齐点 10 | while i != 0 and j != 0: 11 | # 首先加入右下角点 12 | path.insert(0, (i, j)) 13 | # 循环的元素,三个坐标 14 | idxArr = [(i-1,j),(i-1,j-1),(i,j-1)] 15 | # 寻找到最小的那个值 16 | minArg = np.argmin(np.array([ 17 | costMatrix[i - 1][j], 18 | costMatrix[i - 1][j - 1], 19 | costMatrix[i][j - 1]])) 20 | # 对应的消耗矩阵的元素 21 | minIndex = idxArr[minArg] 22 | # 重新迭代 23 | i = minIndex[0] 24 | j = minIndex[1] 25 | # 寻找靠边的点 26 | while i != 0: 27 | path.insert(0, (i, 0)) 28 | i = i-1 29 | # 寻找靠边的点 30 | while j != 0: 31 | path.insert(0, (0, j)) 32 | j = j - 1 33 | # 加入0,0 34 | path.insert(0, (0, 0)) 35 | return path 36 | # 使用递归的方式求解costMatrix的i,j的数值 37 | # 即costMatrix右下角的最后一个值为dtw距离 38 | # 返回值:dtw值+路径 39 | def _dtw(disMat,costMatrix,i,j): 40 | # 如果costMatrix[i][j]不等于-1,直接返回,不需要计算了(借助动态规划的思想) 41 | if costMatrix[i][j] > -1: 42 | return costMatrix[i][j] 43 | # 当i,j都等于0的时候,计算消耗矩阵的值 44 | if i == 0 and j == 0: 45 | costMatrix[i][j] = disMat[0][0] 46 | # 计算第一列的值 47 | if i > 0 and j == 0: 48 | costMatrix[i][j] = _dtw(disMat, costMatrix, i - 1, 0) + disMat[i][0] 49 | # 计算第一行的值 50 | if i == 0 and j > 0: 51 | costMatrix[i][j] = _dtw(disMat, costMatrix, 0, j - 1) + disMat[0][j] 52 | # 计算其他值 53 | if i > 0 and j > 0: 54 | costMatrix[i][j] = min(_dtw(disMat, costMatrix, i, j-1), 55 | _dtw(disMat, costMatrix, i - 1, j-1), 56 | _dtw(disMat, costMatrix, i - 1, j)) + disMat[i][j] 57 | return costMatrix[i][j] 58 | def DynamicTimeWarping(ptSetA, ptSetB): 59 | # 获得点集ptSetA中点的个数n 60 | n = ptSetA.shape[0] 61 | # 获得点集ptSetB中点的个数m 62 | m = ptSetB.shape[0] 63 | # 计算任意两个点的距离矩阵 64 | disMat = cdist(ptSetA, ptSetB, metric='euclidean') 65 | # 初始化消耗矩阵 66 | costMatrix = np.full((n,m),-1.0) 67 | # 递归求解DTW距离 68 | dtwDis = _dtw(disMat,costMatrix,n-1,m-1) 69 | # 提取路径 70 | path = extractPath(costMatrix,n-1,m-1) 71 | return dtwDis,path 72 | data = np.loadtxt("./data/traj.csv",delimiter=",") 73 | # 加载三条轨迹 74 | traj1, traj2, traj3 = data[:8], data[8:15], data[15:] 75 | starttime = time.clock() 76 | print("轨迹1与轨迹2的DTW距离为:%s"%(DynamicTimeWarping(traj1,traj2)[0])) 77 | print("轨迹1与轨迹2的path路径为:%s"%(DynamicTimeWarping(traj1,traj2)[1])) 78 | print("轨迹2与轨迹3的DTW距离为:%s"%(DynamicTimeWarping(traj2,traj3)[0])) 79 | print("轨迹2与轨迹3的path路径为:%s"%(DynamicTimeWarping(traj2,traj3)[1])) 80 | print("轨迹1与轨迹3的DTW距离为:%s"%(DynamicTimeWarping(traj1,traj3)[0])) 81 | print("轨迹1与轨迹3的path路径为:%s"%(DynamicTimeWarping(traj1,traj3)[1])) 82 | endtime = time.clock() 83 | print("运行时间:%s秒"%(endtime - starttime,)) -------------------------------------------------------------------------------- /LongestCommonSubsequenceLoopOptimize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import time 3 | import numpy as np 4 | from scipy.spatial.distance import cdist 5 | def extractCommonSequence(lcsMat,isEqual): 6 | # 获得seqA与seqB的长度 7 | n,m = isEqual.shape 8 | # 公共序列在序列A中的索引 9 | subSeqAIndex=[] 10 | # 公共序列在序列B中的索引 11 | subSeqBIndex=[] 12 | i = n-1 13 | j = m-1 14 | # 回溯寻找 15 | while i != 0 and j != 0: 16 | if isEqual[i][j] == 1: 17 | # 如果这两个值都相等,那么回溯到isEqual[i-1][i-1] 18 | subSeqAIndex.insert(0, i) 19 | subSeqBIndex.insert(0, j) 20 | i = i-1 21 | j = j-1 22 | elif lcsMat[i+1][j] >= lcsMat[i][j+1]: 23 | # 进入到这里,说明不相等,且矩阵矩阵左边大于上面 24 | # 即lcsMat[i+1][j+1]是由lcsMat[i+1][j]得到,向左回退,因此j回退 25 | j = j - 1 26 | else: 27 | # 进入到这里,说明不相等,且矩阵上面大于左边 28 | # 即lcsMat[i+1][j+1]是由lcsMat[i][j+1]得到,向上回退,因此j回退 29 | i = i - 1 30 | return [subSeqAIndex, subSeqBIndex] 31 | # 循环求解最长公共子序列 32 | def LongestCommonSubsequence(seqA,seqB,tol): 33 | # 获取序列A的长度 34 | n = seqA.shape[0] 35 | # 获取序列B的长度 36 | m = seqB.shape[0] 37 | # 生成0矩阵,序列矩阵lcsMat[i][j] 38 | # lcsMat[i][j]:表示seqA前i个序列与seqB前j个序列的最长公共子序列长度 39 | # lcsMat[0][j],与lcsMat[i][0]都为0,即,当一个序列为0时,没有公共子序列 40 | lcsMat = np.zeros((n+1, m+1), dtype=np.int) 41 | # 计算任意两个点的距离 42 | disMat = cdist(seqA, seqB, metric='euclidean') 43 | # 用于判断,元素是否相等 44 | # isEqual[i][j]==1表示seqA[i]==seqB[j] 45 | # isEqual[i][j]==0表示seqA[i]!=seqB[j] 46 | isEqual = np.where(disMat < tol, 1, 0) 47 | # 循环为lcsMat赋值 48 | # lcsMat[n][m]为最长公共子序列的长度 49 | for i in range(n): 50 | # 为lcsMat一行一行的赋值(注意:要思考为什么可以一行一行的赋值) 51 | # 原因:lcsMat[i][j]的值仅与三个位置有关,分别为:lcsMat[i-1][j-1]、lcsMat[i-1][j]、lcsMat[i][j-1] 52 | # 因此每一次迭代,所有的三个元素肯定都已经有值了 53 | for j in range(m): 54 | # seqA[i]seqB[j]的公共子序列长度,记录在lcsMat[i + 1][j + 1] 55 | # 因为有了0序列,矩阵多了一行一列(均为0) 56 | if isEqual[i][j] == 1: 57 | # 如果最后一个元素相等,那么就是lcsMat[i][j]+1,即当前元素等于左上角元素加1 58 | lcsMat[i + 1][j + 1] = lcsMat[i][j] + 1 59 | else: 60 | # 如果最后一个元素不相等,即当前元素等于左边元素或者上面元素中最大的那个 61 | lcsMat[i + 1][j + 1] = max(lcsMat[i + 1][j],lcsMat[i][j + 1]) 62 | # 用于求解公共子序列相应的位置 63 | sequence = extractCommonSequence(lcsMat,isEqual) 64 | # 用于计算相似度 65 | similarity = 1-lcsMat[n][m]*1.0/min(n,m) 66 | return lcsMat[n][m],similarity,sequence 67 | data = np.loadtxt("./data/traj.csv",delimiter=",") 68 | # 加载三条轨迹 69 | traj1, traj2, traj3 = data[:8], data[8:15], data[15:] 70 | starttime = time.clock() 71 | l1, similarity1, sequence1 = LongestCommonSubsequence(traj1, traj2, 5) 72 | l2, similarity2, sequence2 = LongestCommonSubsequence(traj2, traj3, 5) 73 | l3, similarity3, sequence3 = LongestCommonSubsequence(traj1, traj3, 5) 74 | endtime = time.clock() 75 | print("轨迹1与轨迹2的LCSS距离为:%s\n轨迹1与轨迹2公共子序列的长度为:%s"%(similarity1,l1)) 76 | print("轨迹2与轨迹3的LCSS距离为:%s\n轨迹1与轨迹2公共子序列的长度为:%s"%(similarity2,l2)) 77 | print("轨迹1与轨迹3的LCSS距离为:%s\n轨迹1与轨迹2公共子序列的长度为:%s"%(similarity3,l3)) 78 | print("轨迹1与轨迹3公共轨迹点的索引如下:\n traj1:%s\n traj3:%s"%(sequence3[0],sequence3[1])) 79 | print("运行时间:%s秒"%(endtime - starttime,)) -------------------------------------------------------------------------------- /LongestCommonSubsequenceRecursive.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import time 3 | import numpy as np 4 | from scipy.spatial.distance import cdist 5 | # 回溯法提取公共序列的过程 6 | def extractCommonSequence(lcsMat,isEqual): 7 | # 获得seqA与seqB的长度 8 | n,m = isEqual.shape 9 | # 公共序列在序列A中的索引 10 | subSeqAIndex=[] 11 | # 公共序列在序列B中的索引 12 | subSeqBIndex=[] 13 | i = n-1 14 | j = m-1 15 | # 回溯寻找 16 | while i > 0 and j > 0: 17 | if isEqual[i][j] == 1: 18 | # 如果这两个值都相等,那么回溯到isEqual[i-1][i-1] 19 | subSeqAIndex.insert(0, i) 20 | subSeqBIndex.insert(0, j) 21 | i = i-1 22 | j = j-1 23 | elif lcsMat[i+1][j] >= lcsMat[i][j+1]: 24 | # 进入到这里,说明不相等,且矩阵矩阵左边大于上面 25 | # 即lcsMat[i+1][j+1]是由lcsMat[i+1][j]得到,向左回退,因此j回退 26 | j = j - 1 27 | else: 28 | # 进入到这里,说明不相等,且矩阵上面大于左边 29 | # 即lcsMat[i+1][j+1]是由lcsMat[i][j+1]得到,向上回退,因此j回退 30 | i = i - 1 31 | return [subSeqAIndex, subSeqBIndex] 32 | # 使用递归的方式求解lcsMat的i,j的数值 33 | # 即seqMat右下角的最后一个值为lcss距离 34 | # 返回值:lcss值,最长公共子序列 35 | def _lcss(lcsMat,isEqual, i, j): 36 | # 如果lcsMat[i][j]不等于-1,直接返回,不需要计算了(借助动态规划的思想) 37 | if lcsMat[i][j] > -1: 38 | return lcsMat[i][j] 39 | if i == 0 or j == 0: 40 | # 当i,j都等于0的时候,直接复制给 41 | lcsMat[i][j] = 0 42 | elif isEqual[i-1][j-1] == 1: 43 | # 进入到这里,一定满足i != 0 and j != 0 44 | # 同时lcsMat[i][j]记录的是seqA[i-1]与seqB[j-1]序列长度 45 | # 如果相等,那么等于lcsMat[i-1][j-1]+1 46 | lcsMat[i][j] = _lcss(lcsMat,isEqual, i-1, j-1)+1 47 | else: 48 | # 进入到这里,一定满足i != 0 and j != 0 and seqA[i-1]!=seqB[j-1] 49 | # 同时lcsMat[i][j]记录的是seqA[i-1]与seqB[j-1]序列长度 50 | # 如果不等,那么等于max(lcsMat[i-1][j],lcsMat[i][j-1]) 51 | lcsMat[i][j] = max(_lcss(lcsMat, isEqual, i-1, j), _lcss(lcsMat, isEqual, i, j-1)) 52 | return lcsMat[i][j] 53 | # 递归求解最长公共子序列 54 | def LongestCommonSubsequence(seqA,seqB,tol): 55 | # 获取序列A的长度 56 | n = seqA.shape[0] 57 | # 获取序列B的长度 58 | m = seqB.shape[0] 59 | # 生成0矩阵,序列矩阵seqMat[i][j] 60 | # lcsMat[i][j]:表示seqA前i个序列与seqB前j个序列的最长公共子序列长度 61 | # lcsMat[0][j],与lcsMat[i][0]都为0,即,当一个序列为0时,没有公共子序列 62 | lcsMat = np.full((n+1, m+1), -1) 63 | disMat = cdist(seqA, seqB, metric='euclidean') 64 | # 任意两点的距离,如果小于容差,那么就是赋值为1,负责赋值为0 65 | # 用于判断,元素是否相等 66 | # isEqual[i][j]==1表示seqA[i]==seqB[j] 67 | # isEqual[i][j]==0表示seqA[i]!=seqB[j] 68 | isEqual = np.where(disMat < tol, 1, 0) 69 | _lcss(lcsMat,isEqual, n, m) 70 | # 用于求解公共子序列相应的位置 71 | sequence = extractCommonSequence(lcsMat, isEqual) 72 | # 用于计算相似度 73 | similarity = 1 - lcsMat[n][m] * 1.0 / min(n, m) 74 | return lcsMat[n][m], similarity, sequence 75 | data = np.loadtxt("./data/traj.csv",delimiter=",") 76 | # 加载三条轨迹 77 | traj1, traj2, traj3 = data[:8], data[8:15], data[15:] 78 | starttime = time.clock() 79 | l1, similarity1, sequence1 = LongestCommonSubsequence(traj1, traj2, 5) 80 | l2, similarity2, sequence2 = LongestCommonSubsequence(traj2, traj3, 5) 81 | l3, similarity3, sequence3 = LongestCommonSubsequence(traj1, traj3, 5) 82 | endtime = time.clock() 83 | print("轨迹1与轨迹2的LCSS距离为:%s\n轨迹1与轨迹2公共子序列的长度为:%s" % (similarity1,l1)) 84 | print("轨迹2与轨迹3的LCSS距离为:%s\n轨迹1与轨迹2公共子序列的长度为:%s" % (similarity2,l2)) 85 | print("轨迹1与轨迹3的LCSS距离为:%s\n轨迹1与轨迹2公共子序列的长度为:%s" % (similarity3,l3)) 86 | print("轨迹1与轨迹3公共轨迹点的索引如下:\n traj1:%s\n traj3:%s" % (sequence3[0],sequence3[1])) 87 | print("运行时间:%s秒" % (endtime - starttime,)) -------------------------------------------------------------------------------- /LongestCommonSubsequenceLoop.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import time 3 | import numpy as np 4 | # 用于判断两个点是否相等(或者是同一个点),相等返回true,否则false 5 | # 在本实例中,序列中的每一个元素只有一个值,因此只比较a[0]=b[0]即可 6 | # 这是算法可以改进的地方 7 | # 这里可以进一步优化,直接求n*m的距离矩阵(详细看递归) 8 | def _equal(pt1,pt2,tol): 9 | # 求两个点的欧式距离 10 | dis = np.linalg.norm(pt1 - pt2) 11 | if dis < tol: 12 | return True 13 | else: 14 | return False 15 | # 回溯法提取公共序列的过程 16 | def extractCommonSequence(lcsMat,isEqual): 17 | # 获得seqA与seqB的长度 18 | n,m = isEqual.shape 19 | # 公共序列在序列A中的索引 20 | subSeqAIndex=[] 21 | # 公共序列在序列B中的索引 22 | subSeqBIndex=[] 23 | i = n-1 24 | j = m-1 25 | # 回溯寻找 26 | while i != 0 and j != 0: 27 | if isEqual[i][j] == 1: 28 | # 如果这两个值都相等,那么回溯到isEqual[i-1][i-1] 29 | subSeqAIndex.insert(0, i) 30 | subSeqBIndex.insert(0, j) 31 | i = i-1 32 | j = j-1 33 | elif lcsMat[i+1][j] >= lcsMat[i][j+1]: 34 | # 进入到这里,说明不相等,且矩阵矩阵左边大于上面 35 | # 即lcsMat[i+1][j+1]是由lcsMat[i+1][j]得到,向左回退,因此j回退 36 | j = j - 1 37 | else: 38 | # 进入到这里,说明不相等,且矩阵上面大于左边 39 | # 即lcsMat[i+1][j+1]是由lcsMat[i][j+1]得到,向上回退,因此j回退 40 | i = i - 1 41 | return [subSeqAIndex, subSeqBIndex] 42 | # 循环求解最长公共子序列 43 | def LongestCommonSubsequence(seqA,seqB,tol): 44 | # 获取序列A的长度 45 | n = seqA.shape[0] 46 | # 获取序列B的长度 47 | m = seqB.shape[0] 48 | # 生成0矩阵,序列矩阵lcsMat[i][j] 49 | # lcsMat[i][j]:表示seqA前i个序列与seqB前j个序列的最长公共子序列长度 50 | # lcsMat[0][j],与lcsMat[i][0]都为0,即,当一个序列为0时,没有公共子序列 51 | lcsMat = np.zeros((n+1, m+1), dtype=np.int) 52 | # 用于判断,元素是否相等 53 | # isEqual[i][j]==1表示seqA[i]==seqB[j] 54 | # isEqual[i][j]==0表示seqA[i]!=seqB[j] 55 | isEqual = np.zeros((n, m), dtype=np.int) 56 | # 循环为lcsMat赋值 57 | # lcsMat[n][m]为最长公共子序列的长度 58 | for i in range(n): 59 | # 为lcsMat一行一行的赋值(注意:要思考为什么可以一行一行的赋值) 60 | # 原因:lcsMat[i][j]的值仅与三个位置有关,分别为:lcsMat[i-1][j-1]、lcsMat[i-1][j]、lcsMat[i][j-1] 61 | # 因此每一次迭代,所有的三个元素肯定都已经有值了 62 | for j in range(m): 63 | # seqA[i]seqB[j]的公共子序列长度,记录在lcsMat[i + 1][j + 1] 64 | # 因为有了0序列,矩阵多了一行一列(均为0) 65 | if _equal(seqA[i],seqB[j],tol): 66 | # 如果最后一个元素相等,那么就是lcsMat[i][j]+1,即当前元素等于左上角元素加1 67 | lcsMat[i + 1][j + 1] = lcsMat[i][j] + 1 68 | isEqual[i][j] = 1 69 | else: 70 | # 如果最后一个元素不相等,即当前元素等于左边元素或者上面元素中最大的那个 71 | lcsMat[i + 1][j + 1] = max(lcsMat[i + 1][j],lcsMat[i][j + 1]) 72 | # 用于求解公共子序列相应的位置 73 | sequence = extractCommonSequence(lcsMat,isEqual) 74 | # 用于计算相似度 75 | similarity = 1-lcsMat[n][m]*1.0/min(n,m) 76 | return lcsMat[n][m],similarity,sequence 77 | data = np.loadtxt("./data/traj.csv",delimiter=",") 78 | # 加载三条轨迹 79 | traj1, traj2, traj3 = data[:8], data[8:15], data[15:] 80 | starttime = time.clock() 81 | l1, similarity1, sequence1 = LongestCommonSubsequence(traj1, traj2, 5) 82 | l2, similarity2, sequence2 = LongestCommonSubsequence(traj2, traj3, 5) 83 | l3, similarity3, sequence3 = LongestCommonSubsequence(traj1, traj3, 5) 84 | endtime = time.clock() 85 | print("轨迹1与轨迹2的LCSS距离为:%s\n轨迹1与轨迹2公共子序列的长度为:%s"%(similarity1,l1)) 86 | print("轨迹2与轨迹3的LCSS距离为:%s\n轨迹1与轨迹2公共子序列的长度为:%s"%(similarity2,l2)) 87 | print("轨迹1与轨迹3的LCSS距离为:%s\n轨迹1与轨迹2公共子序列的长度为:%s"%(similarity3,l3)) 88 | print("轨迹1与轨迹3公共轨迹点的索引如下:\n traj1:%s\n traj3:%s"%(sequence3[0],sequence3[1])) 89 | print("运行时间:%s秒" % (endtime - starttime,)) --------------------------------------------------------------------------------