├── .gitignore ├── readme.txt ├── lists.txt └── lcs.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- 1 | Python LCS 2 | 3 | A relatively simple Python script to find the longest common subsequence(s) of two lists of integers. For more information on the problem: http://en.wikipedia.org/wiki/Longest_common_subsequence_problem 4 | 5 | To use: store two lists of comma-separated integers in "lists.txt", separated by a line break. A lists.txt file is checked into the repo, populated with randomly generated lists of 100 integers as an example. -------------------------------------------------------------------------------- /lists.txt: -------------------------------------------------------------------------------- 1 | -67, 64, -63, -53, -2, -35, 41, -77, 29, 19, 52, 12, -79, 10, -69, -43, -64, -44, 83, 41, 76, -11, 2, -26, 83, -11, -30, 47, -36, -91, 31, 91, 90, -68, -16, 94, 38, 59, -23, 59, 86, 75, -11, -12, 47, 26, -84, 70, 55, 11, 12, 56, 37, -16, -53, 39, -21, 33, 22, -98, 18, 18, -97, -76, 90, 11, 5, -94, -47, 53, 27, -64, 20, 42, 85, -70, -3, -47, -25, 49, -24, 90, 26, 89, -75, -54, -97, -58, 5, 33, 74, 59, 75, -56, 11, -68, -20, 84, -67, -31 2 | -38, 18, -69, -54, 97, 100, -99, 86, -42, 51, -91, 57, -9, -39, -51, 63, 72, 99, 70, -30, -20, -73, -66, 54, 96, 26, 81, 75, -78, -20, 18, -59, -5, -53, 69, 7, -62, -30, 19, -78, -32, -54, -40, 100, 95, -39, 71, 18, 47, -44, -94, -66, -98, -91, -29, -31, 34, -56, -60, -35, -6, -69, 44, -2, -43, 9, 93, 92, -58, -58, 54, 13, -74, 67, -96, 55, -29, 60, 18, -52, 3, -5, -37, -86, 66, 47, 6, -77, 95, 53, -72, -47, -68, -35, -66, 63, 70, 74, 32, 75 -------------------------------------------------------------------------------- /lcs.py: -------------------------------------------------------------------------------- 1 | ### solve the longest common subsequence problem 2 | 3 | # get the matrix of LCS lengths at each sub-step of the recursive process 4 | # (m+1 by n+1, where m=len(list1) & n=len(list2) ... it's one larger in each direction 5 | # so we don't have to special-case the x-1 cases at the first elements of the iteration 6 | def lcs_mat(list1, list2): 7 | m = len(list1) 8 | n = len(list2) 9 | # construct the matrix, of all zeroes 10 | mat = [[0] * (n+1) for row in range(m+1)] 11 | # populate the matrix, iteratively 12 | for row in range(1, m+1): 13 | for col in range(1, n+1): 14 | if list1[row - 1] == list2[col - 1]: 15 | # if it's the same element, it's one longer than the LCS of the truncated lists 16 | mat[row][col] = mat[row - 1][col - 1] + 1 17 | else: 18 | # they're not the same, so it's the the maximum of the lengths of the LCSs of the two options (different list truncated in each case) 19 | mat[row][col] = max(mat[row][col - 1], mat[row - 1][col]) 20 | # the matrix is complete 21 | return mat 22 | 23 | # backtracks all the LCSs through a provided matrix 24 | def all_lcs(lcs_dict, mat, list1, list2, index1, index2): 25 | # if we've calculated it already, just return that 26 | if (lcs_dict.has_key((index1, index2))): return lcs_dict[(index1, index2)] 27 | # otherwise, calculate it recursively 28 | if (index1 == 0) or (index2 == 0): # base case 29 | return [[]] 30 | elif list1[index1 - 1] == list2[index2 - 1]: 31 | # elements are equal! Add it to all LCSs that pass through these indices 32 | lcs_dict[(index1, index2)] = [prevs + [list1[index1 - 1]] for prevs in all_lcs(lcs_dict, mat, list1, list2, index1 - 1, index2 - 1)] 33 | return lcs_dict[(index1, index2)] 34 | else: 35 | lcs_list = [] # set of sets of LCSs from here 36 | # not the same, so follow longer path recursively 37 | if mat[index1][index2 - 1] >= mat[index1 - 1][index2]: 38 | before = all_lcs(lcs_dict, mat, list1, list2, index1, index2 - 1) 39 | for series in before: # iterate through all those before 40 | if not series in lcs_list: lcs_list.append(series) # and if it's not already been found, append to lcs_list 41 | if mat[index1 - 1][index2] >= mat[index1][index2 - 1]: 42 | before = all_lcs(lcs_dict, mat, list1, list2, index1 - 1, index2) 43 | for series in before: 44 | if not series in lcs_list: lcs_list.append(series) 45 | lcs_dict[(index1, index2)] = lcs_list 46 | return lcs_list 47 | 48 | # return a set of the sets of longest common subsequences in list1 and list2 49 | def lcs(list1, list2): 50 | # mapping of indices to list of LCSs, so we can cut down recursive calls enormously 51 | mapping = dict() 52 | # start the process... 53 | return all_lcs(mapping, lcs_mat(list1, list2), list1, list2, len(list1), len(list2)); 54 | 55 | ### main ### 56 | 57 | def main(): 58 | # get two lists 59 | f = open("lists.txt") 60 | contents = f.read().split("\n") 61 | l1 = [int(i) for i in contents[0].split(",")] 62 | l2 = [int(i) for i in contents[1].split(",")] 63 | lists = lcs(l1, l2) 64 | for l in lists: 65 | print l 66 | 67 | if __name__ == "__main__": 68 | main() --------------------------------------------------------------------------------