├── Slides.pptx ├── Project Report.pdf ├── README.md ├── LICENSE ├── palindrome.ipynb ├── Project.ipynb ├── LPS.ipynb ├── LPS3.ipynb ├── new.ipynb └── Code.ipynb /Slides.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jayasurya-Marasani/Suffix-Arrays-in-Genome-Assembly/HEAD/Slides.pptx -------------------------------------------------------------------------------- /Project Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jayasurya-Marasani/Suffix-Arrays-in-Genome-Assembly/HEAD/Project Report.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Suffix-Arrays-in-Genome-Assembly 2 | The repository contains the use of suffix arrays in genome assembly 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Jayasurya Marasani 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /palindrome.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 7, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "def longestPalSubstr(str):\n", 10 | " n = len(str)\n", 11 | " maxLength = 1\n", 12 | " start = 0\n", 13 | " for i in range(n):\n", 14 | " for j in range(i, n):\n", 15 | " flag = 1\n", 16 | " for k in range(0, ((j - i) // 2) + 1):\n", 17 | " if (str[i + k] != str[j - k]):\n", 18 | " flag = 0\n", 19 | " if (flag != 0 and (j - i + 1) > maxLength):\n", 20 | " start = i\n", 21 | " maxLength = j - i + 1 \n", 22 | " print(\"Longest palindrome subString is: \")\n", 23 | " s1 =''\n", 24 | " for i in range(start, start + maxLength):\n", 25 | " s1 = s1 + str[i]\n", 26 | " return s1, len(s1)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 8, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "Longest palindrome subString is: \n", 39 | "('anana', 5)\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "s = input('Enter the string: ')\n", 45 | "print(longestPalSubstr(s))" 46 | ] 47 | } 48 | ], 49 | "metadata": { 50 | "interpreter": { 51 | "hash": "3dc8e47f82edb7463ca464588a7358a3d93aa0e55ce1d99dfe2c7a888c7afeb3" 52 | }, 53 | "kernelspec": { 54 | "display_name": "Python 3.10.1 64-bit", 55 | "language": "python", 56 | "name": "python3" 57 | }, 58 | "language_info": { 59 | "codemirror_mode": { 60 | "name": "ipython", 61 | "version": 3 62 | }, 63 | "file_extension": ".py", 64 | "mimetype": "text/x-python", 65 | "name": "python", 66 | "nbconvert_exporter": "python", 67 | "pygments_lexer": "ipython3", 68 | "version": "3.10.1" 69 | }, 70 | "orig_nbformat": 4 71 | }, 72 | "nbformat": 4, 73 | "nbformat_minor": 2 74 | } 75 | -------------------------------------------------------------------------------- /Project.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a48ea6b0", 6 | "metadata": {}, 7 | "source": [ 8 | "# Finding the longest repeated substring" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 31, 14 | "id": "fc4abb7b", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "def Construct_LCP_array(suffix_array, input_string):\n", 19 | " n = len(suffix_array)\n", 20 | " size = len(input_string)\n", 21 | " r = [None] * size\n", 22 | " for i in range(n):\n", 23 | " r[suffix_array[i]] = i\n", 24 | " lcp = [None] * size\n", 25 | " lcp[0] = 0\n", 26 | " h = 0\n", 27 | " for i in range(size):\n", 28 | " if r[i] > 0:\n", 29 | " j = suffix_array[r[i] - 1]\n", 30 | " while i != size - h and j != size - h and input_string[i + h] == input_string[j + h]:\n", 31 | " h = h + 1\n", 32 | " lcp[r[i]] = h\n", 33 | " if h > 0:\n", 34 | " h = h - 1\n", 35 | " return lcp" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 32, 41 | "id": "c2cbdb1d", 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "def longestCommonPrefix(strs):\n", 46 | " if len(strs) == 0:\n", 47 | " return \"\"\n", 48 | " current = strs[0]\n", 49 | " for i in range(1, len(strs)):\n", 50 | " temp = \"\"\n", 51 | " if len(current) == 0:\n", 52 | " break\n", 53 | " for j in range(len(strs[i])):\n", 54 | " if j < len(current) and current[j] == strs[i][j]:\n", 55 | " temp += current[j]\n", 56 | " else:\n", 57 | " break\n", 58 | " current = temp\n", 59 | " return len(current)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 33, 65 | "id": "587fe403", 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "def Construct_LCP_array(suffix_array, input_string):\n", 70 | " \n", 71 | " ordered_list = []\n", 72 | " for i in suffix_array:\n", 73 | " ordered_list.append(input_string[i:])\n", 74 | " print(ordered_list)\n", 75 | " list3 = [0]*len(ordered_list)\n", 76 | " for i in range(0 , len(ordered_list)):\n", 77 | " if i==0:\n", 78 | " list3[i]=0\n", 79 | " else:\n", 80 | " strs=[ordered_list[i], ordered_list[i-1]]\n", 81 | " list3[i]=longestCommonPrefix(strs)\n", 82 | " return list3" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 34, 88 | "id": "1d2f2ea7", 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "def Construct_SuffixArray(input_str, len_str):\n", 93 | " Suffix_array = []\n", 94 | " Suffix_Dict = {}\n", 95 | " Suffix_DictReverse = {}\n", 96 | "\n", 97 | " for i in range(len_str):\n", 98 | " permutation = input_str[i:len_str]\n", 99 | " Suffix_Dict[i] = permutation\n", 100 | " Suffix_DictReverse[permutation] = i\n", 101 | "\n", 102 | " orderedList = sorted(Suffix_Dict.values())\n", 103 | "\n", 104 | " for i in orderedList:\n", 105 | " Suffix_array.append(Suffix_DictReverse[i])\n", 106 | "\n", 107 | " return Suffix_array, Suffix_Dict" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 35, 113 | "id": "fc479fdb", 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "def main():\n", 118 | " input_str = input(\"Enter the Input String : \")\n", 119 | " len_str = len(input_str)\n", 120 | " Suffix_array, suffix_dict = Construct_SuffixArray(input_str, len_str)\n", 121 | " print(Suffix_array)\n", 122 | " lcp = Construct_LCP_array(Suffix_array, input_str)\n", 123 | " print(lcp)\n", 124 | " idx = lcp.index(max(lcp))\n", 125 | " idx_suffix = Suffix_array[idx]\n", 126 | " result = suffix_dict[idx_suffix]\n", 127 | " print(\"Input string: \" + input_str)\n", 128 | " print(\"Longest repeated substring: \" + result[0:max(lcp)])" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 36, 134 | "id": "be97b6b4", 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "[10, 7, 0, 3, 5, 8, 1, 4, 6, 9, 2]\n", 142 | "['A', 'ABRA', 'ABRACADABRA', 'ACADABRA', 'ADABRA', 'BRA', 'BRACADABRA', 'CADABRA', 'DABRA', 'RA', 'RACADABRA']\n", 143 | "[0, 1, 4, 1, 1, 0, 3, 0, 0, 0, 2]\n", 144 | "Input string: ABRACADABRA\n", 145 | "Longest repeated substring: ABRA\n" 146 | ] 147 | } 148 | ], 149 | "source": [ 150 | "if __name__ == \"__main__\": #ABRACADABRA\n", 151 | " main()" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "id": "5ddf0e55", 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [] 161 | } 162 | ], 163 | "metadata": { 164 | "kernelspec": { 165 | "display_name": "Python 3", 166 | "language": "python", 167 | "name": "python3" 168 | }, 169 | "language_info": { 170 | "codemirror_mode": { 171 | "name": "ipython", 172 | "version": 3 173 | }, 174 | "file_extension": ".py", 175 | "mimetype": "text/x-python", 176 | "name": "python", 177 | "nbconvert_exporter": "python", 178 | "pygments_lexer": "ipython3", 179 | "version": "3.10.1" 180 | } 181 | }, 182 | "nbformat": 4, 183 | "nbformat_minor": 5 184 | } 185 | -------------------------------------------------------------------------------- /LPS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 18, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "banana\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "s = input('Enter the String:\\n')\n", 18 | "print(s)\n" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 19, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/plain": [ 29 | "'banana#ananab'" 30 | ] 31 | }, 32 | "execution_count": 19, 33 | "metadata": {}, 34 | "output_type": "execute_result" 35 | } 36 | ], 37 | "source": [ 38 | "s_new = s + '#' + s[::-1]\n", 39 | "s_new" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 20, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "def suffix_array_alternative_naive(s):\n", 49 | " return [rank for suffix, rank in sorted((s[i:], i) for i in range(len(s)))]" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 21, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "def LCP(sa , s_new):\n", 59 | " size = len(s_new)\n", 60 | " r = [None]* size\n", 61 | " for i in range(size):\n", 62 | " r[sa[i]] = i\n", 63 | " lcp = [None]*size\n", 64 | " h = 0\n", 65 | " print(r)\n", 66 | " for i in range(size):\n", 67 | " if r[i] > 0:\n", 68 | " j = sa [r[i] - 1]\n", 69 | " while i != size-h and j!= size-h and s_new[i+h] == s_new[j+h]:\n", 70 | " h = h+1\n", 71 | " lcp[r[i]] =h\n", 72 | " if h > 0:\n", 73 | " h = h - 1\n", 74 | " if size>0:\n", 75 | " lcp[0] = 0\n", 76 | " return lcp" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 26, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "[6, 5, 11, 3, 9, 1, 7, 12, 0, 4, 10, 2, 8]\n", 89 | "#ananab\n", 90 | "a#ananab\n", 91 | "ab\n", 92 | "ana#ananab\n", 93 | "anab\n", 94 | "anana#ananab\n", 95 | "ananab\n", 96 | "b\n", 97 | "banana#ananab\n", 98 | "na#ananab\n", 99 | "nab\n", 100 | "nana#ananab\n", 101 | "nanab\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "sa = suffix_array_alternative_naive(s_new)\n", 107 | "print(sa)\n", 108 | "for i in range (len(s_new)):\n", 109 | " print(s_new[sa[i]:])" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 23, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "[8, 5, 11, 3, 9, 1, 0, 6, 12, 4, 10, 2, 7]\n" 122 | ] 123 | }, 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "[0, 0, 1, 1, 3, 3, 5, 0, 1, 0, 2, 2, 4]" 128 | ] 129 | }, 130 | "execution_count": 23, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "lcp = LCP(sa,s_new)\n", 137 | "lcp" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 24, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "def longestCommonPrefix(strs):\n", 147 | " if len(strs) == 0:\n", 148 | " return \"\"\n", 149 | " current = strs[0]\n", 150 | " for i in range(1, len(strs)):\n", 151 | " temp = \"\"\n", 152 | " if len(current) == 0:\n", 153 | " break\n", 154 | " for j in range(len(strs[i])):\n", 155 | " if j < len(current) and current[j] == strs[i][j]:\n", 156 | " temp += current[j]\n", 157 | " else:\n", 158 | " break\n", 159 | " current = temp\n", 160 | " return current" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 25, 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "name": "stdout", 170 | "output_type": "stream", 171 | "text": [ 172 | "calculating longest prefixes between a#ananab and ab\n", 173 | "longest prefix between them is \"a\"\n", 174 | "The length is = 1\n", 175 | "Position = 11\n", 176 | "calculating longest prefixes between ana#ananab and anab\n", 177 | "longest prefix between them is \"ana\"\n", 178 | "The length is = 3\n", 179 | "Position = 9\n", 180 | "calculating longest prefixes between anana#ananab and ananab\n", 181 | "longest prefix between them is \"anana\"\n", 182 | "The length is = 5\n", 183 | "Position = 7\n", 184 | "Length of Longest Palindrome is = 5\n", 185 | "Longest Palindrome is = anana\n" 186 | ] 187 | } 188 | ], 189 | "source": [ 190 | "length_of_longest_palindrome = 0\n", 191 | "longest_length = 0\n", 192 | "Len = len(s_new) # Length of updated string\n", 193 | "actual_len = len(s) # Length of original string\n", 194 | "Position = 0\n", 195 | "strs = [None]*2\n", 196 | "for i in range(1,Len):\n", 197 | " \n", 198 | " if lcp[i]>longest_length:\n", 199 | " if(sa[i-1]actual_len) or (sa[i]actual_len):\n", 200 | " print('calculating longest prefixes between {a} and {b}'.format(a = s_new[sa[i-1]:], b = s_new[sa[i]:]))\n", 201 | " strs[0] = s_new[sa[i-1]:]\n", 202 | " strs[1] = s_new[sa[i]:]\n", 203 | " longest_length = lcp[i]\n", 204 | " print('longest prefix between them is \"{}\"'.format(longestCommonPrefix(strs)))\n", 205 | " print('The length is = {}'.format(longest_length))\n", 206 | " Position = sa[i]\n", 207 | " print(\"Position = \",Position)\n", 208 | "\n", 209 | "length_of_longest_palindrome = longest_length\n", 210 | "longest_palindrome = s_new[Position:Position+longest_length]\n", 211 | "print('Length of Longest Palindrome is = ',length_of_longest_palindrome)\n", 212 | "print('Longest Palindrome is = ',longest_palindrome)\n", 213 | " " 214 | ] 215 | } 216 | ], 217 | "metadata": { 218 | "interpreter": { 219 | "hash": "3dc8e47f82edb7463ca464588a7358a3d93aa0e55ce1d99dfe2c7a888c7afeb3" 220 | }, 221 | "kernelspec": { 222 | "display_name": "Python 3.10.1 64-bit", 223 | "language": "python", 224 | "name": "python3" 225 | }, 226 | "language_info": { 227 | "codemirror_mode": { 228 | "name": "ipython", 229 | "version": 3 230 | }, 231 | "file_extension": ".py", 232 | "mimetype": "text/x-python", 233 | "name": "python", 234 | "nbconvert_exporter": "python", 235 | "pygments_lexer": "ipython3", 236 | "version": "3.10.1" 237 | }, 238 | "orig_nbformat": 4 239 | }, 240 | "nbformat": 4, 241 | "nbformat_minor": 2 242 | } 243 | -------------------------------------------------------------------------------- /LPS3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 9, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "The inverted suffix array is:\n", 13 | "\n", 14 | "[1, 10, '#', 0]\n", 15 | "[2, 10, '$', 0]\n", 16 | "[2, 4, 'AAATGC$', 2]\n", 17 | "[2, 5, 'AATGC$', 1]\n", 18 | "[2, 2, 'ATAAATGC$', 2]\n", 19 | "[1, 6, 'ATGC#', 4]\n", 20 | "[2, 6, 'ATGC$', 2]\n", 21 | "[1, 2, 'ATTTATGC#', 0]\n", 22 | "[1, 9, 'C#', 1]\n", 23 | "[2, 9, 'C$', 1]\n", 24 | "[2, 1, 'CATAAATGC$', 3]\n", 25 | "[1, 1, 'CATTTATGC#', 0]\n", 26 | "[1, 8, 'GC#', 2]\n", 27 | "[2, 8, 'GC$', 2]\n", 28 | "[2, 0, 'GCATAAATGC$', 4]\n", 29 | "[1, 0, 'GCATTTATGC#', 0]\n", 30 | "[2, 3, 'TAAATGC$', 2]\n", 31 | "[1, 5, 'TATGC#', 1]\n", 32 | "[1, 7, 'TGC#', 3]\n", 33 | "[2, 7, 'TGC$', 1]\n", 34 | "[1, 4, 'TTATGC#', 2]\n", 35 | "[1, 3, 'TTTATGC#', 0]\n" 36 | ] 37 | } 38 | ], 39 | "source": [ 40 | "def complement(seq):\n", 41 | " complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}\n", 42 | " bases = [str(i) for i in seq]\n", 43 | " bases = [complement[base] for base in bases]\n", 44 | " compliment= ''.join(bases)\n", 45 | " reverse_compliment=compliment[::-1]\n", 46 | " return seq,reverse_compliment\n", 47 | "\n", 48 | "def add_charectors(seq,reverse_compliment):\n", 49 | " seq+= '#'\n", 50 | " reverse_compliment+='$'\n", 51 | " return seq,reverse_compliment\n", 52 | "\n", 53 | "\n", 54 | "\n", 55 | "def Construct_SuffixArray(str_1, str_2):\n", 56 | " list1=[]\n", 57 | " string_id={}\n", 58 | " suffix_dict={}\n", 59 | " len_str=len(str_1)\n", 60 | " for i in range(len_str):\n", 61 | " permutation1 = str_1[i:len_str]\n", 62 | " permutation2 = str_2[i:len_str]\n", 63 | " list1.append(permutation1)\n", 64 | " list1.append(permutation2)\n", 65 | " suffix_dict[permutation1]=i\n", 66 | " suffix_dict[permutation2]=i\n", 67 | " string_id[permutation1]=1\n", 68 | " string_id[permutation2]=2\n", 69 | " \n", 70 | " ordered_list=sorted(list1)\n", 71 | " list2 = [[string_id[i],i] for i in ordered_list]\n", 72 | " list3 = []\n", 73 | " for i in list2:\n", 74 | " list3.append([i[0],suffix_dict[i[1]],i[1]])\n", 75 | "\n", 76 | " for i in range(0 , len(ordered_list)):\n", 77 | " if len(ordered_list)-1 == i:\n", 78 | " list3[i].append(0)\n", 79 | " else:\n", 80 | " strs=[ordered_list[i], ordered_list[i+1]]\n", 81 | " list3[i].append(longestCommonPrefix(strs))\n", 82 | " return list3\n", 83 | "\n", 84 | "def longestCommonPrefix(strs):\n", 85 | " if len(strs) == 0:\n", 86 | " return \"\"\n", 87 | " current = strs[0]\n", 88 | " for i in range(1, len(strs)):\n", 89 | " temp = \"\"\n", 90 | " if len(current) == 0:\n", 91 | " break\n", 92 | " for j in range(len(strs[i])):\n", 93 | " if j < len(current) and current[j] == strs[i][j]:\n", 94 | " temp += current[j]\n", 95 | " else:\n", 96 | " break\n", 97 | " current = temp\n", 98 | " return len(current)\n", 99 | "\n", 100 | "\n", 101 | "\n", 102 | "seq,rev=complement(input('Enter the DNA sequence:')) # GCATTTATGC , CGCTGTAGCG, \n", 103 | "seq1,rev1=add_charectors(seq,rev)\n", 104 | "inverted_sa= Construct_SuffixArray(seq1,rev1)\n", 105 | "print('The inverted suffix array is:\\n')\n", 106 | "for i in inverted_sa:\n", 107 | " print(i)\n" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "## ALGORITHM TO DETECT LONG ARMED GAPPED PALINDROMES" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "Finding Maximum LCP indexes corresponding to Maximum LCP Length" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 10, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "[6, 0]\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "max_LCP = max(inverted_sa,key = lambda x: x[3])[3]\n", 139 | "max_LCP_indexes = []\n", 140 | "for i in inverted_sa:\n", 141 | " if i[3]==max_LCP and i[0]==1:\n", 142 | " max_LCP_indexes.append(i[1])\n", 143 | " elif i[3]==max_LCP and i[0]==2:\n", 144 | " max_LCP_indexes.append(i[1])\n", 145 | "print(max_LCP_indexes)\n" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "Verifying Length Constraints" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 11, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | "Complementary\n" 165 | ] 166 | } 167 | ], 168 | "source": [ 169 | "from Bio.Seq import Seq\n", 170 | "\n", 171 | "i = min(max_LCP_indexes)\n", 172 | "j = max(max_LCP_indexes)\n", 173 | "wi=''\n", 174 | "wj=''\n", 175 | "while(i=0 and b>=0 and b>a:\n", 179 | " wi =''\n", 180 | " wi = seq[0:i]\n", 181 | " if(wi==rev[a:b+1]):\n", 182 | " wi = wi[::-1]\n", 183 | " break\n", 184 | " i = i+1\n", 185 | "while i=0 and b>=0 and b>a:\n", 299 | " wi = ''\n", 300 | " wi = seq[0:i]\n", 301 | " if(wi==rev[a:b+1]):\n", 302 | " wi = wi[::-1]\n", 303 | " break\n", 304 | " i = i+1\n", 305 | "\n", 306 | "while ilongest_length:\n", 88 | " if(sa[i-1]actual_len) or (sa[i]actual_len):\n", 89 | " print('calculating longest prefixes between {a} and {b}'.format(a = s_new[sa[i-1]:], b = s_new[sa[i]:]))\n", 90 | " strs[0] = s_new[sa[i-1]:]\n", 91 | " strs[1] = s_new[sa[i]:]\n", 92 | " longest_length = lcp[i]\n", 93 | " print('longest prefix between them is \"{}\"'.format(longestCommonPrefix(strs)))\n", 94 | " print('The length is = {}'.format(longest_length))\n", 95 | " Position = sa[i]\n", 96 | " print(\"Position = \",Position)\n", 97 | "\n", 98 | "length_of_longest_palindrome = longest_length\n", 99 | "longest_palindrome = s_new[Position:Position+longest_length]\n", 100 | "print('Length of Longest Palindrome is = ',length_of_longest_palindrome)\n", 101 | "print('Longest Palindrome is = ',longest_palindrome)\n", 102 | " \n" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 4, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "name": "stdout", 112 | "output_type": "stream", 113 | "text": [ 114 | "The Sequence is: GCATTTATGC\n", 115 | "The reversed sequence is: GCATAAATGC\n", 116 | "The Updated Sequence is: GCATTTATGC#\n", 117 | "The updated reversed sequence is: GCATAAATGC$\n", 118 | "The inverted suffix array is:\n", 119 | "[1, 10, '#', 0]\n", 120 | "[2, 10, '$', 0]\n", 121 | "[2, 4, 'AAATGC$', 2]\n", 122 | "[2, 5, 'AATGC$', 1]\n", 123 | "[2, 2, 'ATAAATGC$', 2]\n", 124 | "[1, 6, 'ATGC#', 4]\n", 125 | "[2, 6, 'ATGC$', 2]\n", 126 | "[1, 2, 'ATTTATGC#', 0]\n", 127 | "[1, 9, 'C#', 1]\n", 128 | "[2, 9, 'C$', 1]\n", 129 | "[2, 1, 'CATAAATGC$', 3]\n", 130 | "[1, 1, 'CATTTATGC#', 0]\n", 131 | "[1, 8, 'GC#', 2]\n", 132 | "[2, 8, 'GC$', 2]\n", 133 | "[2, 0, 'GCATAAATGC$', 4]\n", 134 | "[1, 0, 'GCATTTATGC#', 0]\n", 135 | "[2, 3, 'TAAATGC$', 2]\n", 136 | "[1, 5, 'TATGC#', 1]\n", 137 | "[1, 7, 'TGC#', 3]\n", 138 | "[2, 7, 'TGC$', 1]\n", 139 | "[1, 4, 'TTATGC#', 2]\n", 140 | "[1, 3, 'TTTATGC#', 0]\n", 141 | "The Maximum LCP Indexes are: [6, 0]\n", 142 | "The two Arms are Complementary\n", 143 | "The sequence GCATTTATGC is long armed gapped palindrome\n", 144 | "spacer length = 2\n", 145 | "palindrome arm length = 4\n" 146 | ] 147 | } 148 | ], 149 | "source": [ 150 | "def complement(seq):\n", 151 | " complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}\n", 152 | " bases = [str(i) for i in seq]\n", 153 | " bases = [complement[base] for base in bases]\n", 154 | " compliment= ''.join(bases)\n", 155 | " reverse_compliment=compliment[::-1]\n", 156 | " return seq,reverse_compliment\n", 157 | "\n", 158 | "def add_charectors(seq,reverse_compliment):\n", 159 | " seq+= '#'\n", 160 | " reverse_compliment+='$'\n", 161 | " return seq,reverse_compliment\n", 162 | "\n", 163 | "\n", 164 | "\n", 165 | "def Construct_SuffixArray(str_1, str_2):\n", 166 | " list1=[]\n", 167 | " string_id={}\n", 168 | " suffix_dict={}\n", 169 | " len_str=len(str_1)\n", 170 | " for i in range(len_str):\n", 171 | " permutation1 = str_1[i:len_str]\n", 172 | " permutation2 = str_2[i:len_str]\n", 173 | " list1.append(permutation1)\n", 174 | " list1.append(permutation2)\n", 175 | " suffix_dict[permutation1]=i\n", 176 | " suffix_dict[permutation2]=i\n", 177 | " string_id[permutation1]=1\n", 178 | " string_id[permutation2]=2\n", 179 | " \n", 180 | " ordered_list=sorted(list1)\n", 181 | " list2 = [[string_id[i],i] for i in ordered_list]\n", 182 | " list3 = []\n", 183 | " for i in list2:\n", 184 | " list3.append([i[0],suffix_dict[i[1]],i[1]])\n", 185 | "\n", 186 | " for i in range(0 , len(ordered_list)):\n", 187 | " if len(ordered_list)-1 == i:\n", 188 | " list3[i].append(0)\n", 189 | " else:\n", 190 | " strs=[ordered_list[i], ordered_list[i+1]]\n", 191 | " list3[i].append(longestCommonPrefix(strs))\n", 192 | " return list3\n", 193 | "\n", 194 | "def longestCommonPrefix(strs):\n", 195 | " if len(strs) == 0:\n", 196 | " return \"\"\n", 197 | " current = strs[0]\n", 198 | " for i in range(1, len(strs)):\n", 199 | " temp = \"\"\n", 200 | " if len(current) == 0:\n", 201 | " break\n", 202 | " for j in range(len(strs[i])):\n", 203 | " if j < len(current) and current[j] == strs[i][j]:\n", 204 | " temp += current[j]\n", 205 | " else:\n", 206 | " break\n", 207 | " current = temp\n", 208 | " return len(current)\n", 209 | "\n", 210 | "\n", 211 | "\n", 212 | "seq,rev=complement(input('Enter the DNA sequence:')) # GCATTTATGC , CGCTGTAGCG, \n", 213 | "print('The Sequence is: ',seq)\n", 214 | "print('The reversed sequence is: ',rev)\n", 215 | "seq1,rev1=add_charectors(seq,rev)\n", 216 | "print('The Updated Sequence is: ',seq1)\n", 217 | "print('The updated reversed sequence is: ',rev1)\n", 218 | "inverted_sa= Construct_SuffixArray(seq1,rev1)\n", 219 | "print('The inverted suffix array is:')\n", 220 | "for i in inverted_sa:\n", 221 | " print(i)\n", 222 | "\n", 223 | "max_LCP = max(inverted_sa,key = lambda x: x[3])[3]\n", 224 | "max_LCP_indexes = []\n", 225 | "for i in inverted_sa:\n", 226 | " if i[3]==max_LCP and i[0]==1:\n", 227 | " max_LCP_indexes.append(i[1])\n", 228 | " elif i[3]==max_LCP and i[0]==2:\n", 229 | " max_LCP_indexes.append(i[1])\n", 230 | "print('The Maximum LCP Indexes are: ',max_LCP_indexes)\n", 231 | "from Bio.Seq import Seq\n", 232 | "\n", 233 | "i = min(max_LCP_indexes)\n", 234 | "j = max(max_LCP_indexes)\n", 235 | "wi=''\n", 236 | "wj=''\n", 237 | "while(i=0 and b>=0 and b>a:\n", 241 | " wi =''\n", 242 | " wi = seq[0:i]\n", 243 | " if(wi==rev[a:b+1]):\n", 244 | " wi = wi[::-1]\n", 245 | " break\n", 246 | " i = i+1\n", 247 | "while i=0 and b>=0 and b>a:\n", 403 | " wi = ''\n", 404 | " wi = seq[0:i]\n", 405 | " if(wi==rev[a:b+1]):\n", 406 | " wi = wi[::-1]\n", 407 | " break\n", 408 | " i = i+1\n", 409 | "\n", 410 | "while i 0 and match_at(Suffix_array[first - 1]):\n", 142 | " print(match_at(Suffix_array[first - 1]))\n", 143 | " first -= 1\n", 144 | "\n", 145 | " # and walk forwards to find the last match\n", 146 | " last = l\n", 147 | " while last maxLength):\n", 492 | " start = i\n", 493 | " maxLength = j - i + 1 \n", 494 | " print(\"Longest palindrome subString is: \")\n", 495 | " s1 =''\n", 496 | " for i in range(start, start + maxLength):\n", 497 | " s1 = s1 + str[i]\n", 498 | " return s1, len(s1)" 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": 23, 504 | "metadata": {}, 505 | "outputs": [ 506 | { 507 | "name": "stdout", 508 | "output_type": "stream", 509 | "text": [ 510 | "Enter the string: banana\n", 511 | "Longest palindrome subString is: \n", 512 | "('anana', 5)\n" 513 | ] 514 | } 515 | ], 516 | "source": [ 517 | "s = input('Enter the string: ')\n", 518 | "print(longestPalSubstr(s))" 519 | ] 520 | }, 521 | { 522 | "cell_type": "markdown", 523 | "metadata": {}, 524 | "source": [ 525 | "Method 2 : Using Suffix Array" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 24, 531 | "metadata": {}, 532 | "outputs": [ 533 | { 534 | "name": "stdout", 535 | "output_type": "stream", 536 | "text": [ 537 | "Enter the String:\n", 538 | "banana\n", 539 | "banana\n" 540 | ] 541 | } 542 | ], 543 | "source": [ 544 | "s = input('Enter the String:\\n')\n", 545 | "print(s)\n" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": 25, 551 | "metadata": {}, 552 | "outputs": [ 553 | { 554 | "data": { 555 | "text/plain": [ 556 | "'banana#ananab'" 557 | ] 558 | }, 559 | "execution_count": 25, 560 | "metadata": {}, 561 | "output_type": "execute_result" 562 | } 563 | ], 564 | "source": [ 565 | "s_new = s + '#' + s[::-1]\n", 566 | "s_new" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": 26, 572 | "metadata": {}, 573 | "outputs": [], 574 | "source": [ 575 | "def suffix_array_alternative_naive(s):\n", 576 | " return [rank for suffix, rank in sorted((s[i:], i) for i in range(len(s)))]" 577 | ] 578 | }, 579 | { 580 | "cell_type": "code", 581 | "execution_count": 30, 582 | "metadata": {}, 583 | "outputs": [], 584 | "source": [ 585 | "def LCP(sa , s_new):\n", 586 | " size = len(s_new)\n", 587 | " r = [None]* size\n", 588 | " for i in range(size):\n", 589 | " r[sa[i]] = i\n", 590 | " lcp = [None]*size\n", 591 | " h = 0\n", 592 | "\n", 593 | " for i in range(size):\n", 594 | " if r[i] > 0:\n", 595 | " j = sa [r[i] - 1]\n", 596 | " while i != size-h and j!= size-h and s_new[i+h] == s_new[j+h]:\n", 597 | " h = h+1\n", 598 | " lcp[r[i]] =h\n", 599 | " if h > 0:\n", 600 | " h = h - 1\n", 601 | " if size>0:\n", 602 | " lcp[0] = 0\n", 603 | " return lcp\n" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": 31, 609 | "metadata": {}, 610 | "outputs": [ 611 | { 612 | "name": "stdout", 613 | "output_type": "stream", 614 | "text": [ 615 | "[6, 5, 11, 3, 9, 1, 7, 12, 0, 4, 10, 2, 8]\n", 616 | "#ananab\n", 617 | "a#ananab\n", 618 | "ab\n", 619 | "ana#ananab\n", 620 | "anab\n", 621 | "anana#ananab\n", 622 | "ananab\n", 623 | "b\n", 624 | "banana#ananab\n", 625 | "na#ananab\n", 626 | "nab\n", 627 | "nana#ananab\n", 628 | "nanab\n" 629 | ] 630 | } 631 | ], 632 | "source": [ 633 | "sa = suffix_array_alternative_naive(s_new)\n", 634 | "print(sa)\n", 635 | "for i in range (len(s_new)):\n", 636 | " print(s_new[sa[i]:])" 637 | ] 638 | }, 639 | { 640 | "cell_type": "code", 641 | "execution_count": 32, 642 | "metadata": {}, 643 | "outputs": [ 644 | { 645 | "data": { 646 | "text/plain": [ 647 | "[0, 0, 1, 1, 3, 3, 5, 0, 1, 0, 2, 2, 4]" 648 | ] 649 | }, 650 | "execution_count": 32, 651 | "metadata": {}, 652 | "output_type": "execute_result" 653 | } 654 | ], 655 | "source": [ 656 | "lcp = LCP(sa,s_new)\n", 657 | "lcp" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": 33, 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [ 666 | "def longestCommonPrefix(strs):\n", 667 | " if len(strs) == 0:\n", 668 | " return \"\"\n", 669 | " current = strs[0]\n", 670 | " for i in range(1, len(strs)):\n", 671 | " temp = \"\"\n", 672 | " if len(current) == 0:\n", 673 | " break\n", 674 | " for j in range(len(strs[i])):\n", 675 | " if j < len(current) and current[j] == strs[i][j]:\n", 676 | " temp += current[j]\n", 677 | " else:\n", 678 | " break\n", 679 | " current = temp\n", 680 | " return current" 681 | ] 682 | }, 683 | { 684 | "cell_type": "code", 685 | "execution_count": 34, 686 | "metadata": {}, 687 | "outputs": [ 688 | { 689 | "name": "stdout", 690 | "output_type": "stream", 691 | "text": [ 692 | "calculating longest prefixes between a#ananab and ab\n", 693 | "longest prefix between them is \"a\"\n", 694 | "The length is = 1\n", 695 | "Position = 11\n", 696 | "calculating longest prefixes between ana#ananab and anab\n", 697 | "longest prefix between them is \"ana\"\n", 698 | "The length is = 3\n", 699 | "Position = 9\n", 700 | "calculating longest prefixes between anana#ananab and ananab\n", 701 | "longest prefix between them is \"anana\"\n", 702 | "The length is = 5\n", 703 | "Position = 7\n", 704 | "Length of Longest Palindrome is = 5\n", 705 | "Longest Palindrome is = anana\n" 706 | ] 707 | } 708 | ], 709 | "source": [ 710 | "length_of_longest_palindrome = 0\n", 711 | "longest_length = 0\n", 712 | "Len = len(s_new) # Length of updated string\n", 713 | "actual_len = len(s) # Length of original string\n", 714 | "Position = 0\n", 715 | "strs = [None]*2\n", 716 | "for i in range(1,Len):\n", 717 | " \n", 718 | " if lcp[i]>longest_length:\n", 719 | " if(sa[i-1]actual_len) or (sa[i]actual_len):\n", 720 | " print('calculating longest prefixes between {a} and {b}'.format(a = s_new[sa[i-1]:], b = s_new[sa[i]:]))\n", 721 | " strs[0] = s_new[sa[i-1]:]\n", 722 | " strs[1] = s_new[sa[i]:]\n", 723 | " longest_length = lcp[i]\n", 724 | " print('longest prefix between them is \"{}\"'.format(longestCommonPrefix(strs)))\n", 725 | " print('The length is = {}'.format(longest_length))\n", 726 | " Position = sa[i]\n", 727 | " print(\"Position = \",Position)\n", 728 | "\n", 729 | "length_of_longest_palindrome = longest_length\n", 730 | "longest_palindrome = s_new[Position:Position+longest_length]\n", 731 | "print('Length of Longest Palindrome is = ',length_of_longest_palindrome)\n", 732 | "print('Longest Palindrome is = ',longest_palindrome)\n", 733 | " " 734 | ] 735 | }, 736 | { 737 | "cell_type": "markdown", 738 | "metadata": {}, 739 | "source": [ 740 | " " 741 | ] 742 | }, 743 | { 744 | "cell_type": "markdown", 745 | "metadata": {}, 746 | "source": [ 747 | "## Algorithm to detect Long armed gapped palindrome" 748 | ] 749 | }, 750 | { 751 | "cell_type": "code", 752 | "execution_count": 35, 753 | "metadata": {}, 754 | "outputs": [ 755 | { 756 | "name": "stdout", 757 | "output_type": "stream", 758 | "text": [ 759 | "Enter the DNA sequence:GCATTTATGC\n", 760 | "The inverted suffix array is:\n", 761 | "\n", 762 | "[1, 10, '#', 0]\n", 763 | "[2, 10, '$', 0]\n", 764 | "[2, 4, 'AAATGC$', 2]\n", 765 | "[2, 5, 'AATGC$', 1]\n", 766 | "[2, 2, 'ATAAATGC$', 2]\n", 767 | "[1, 6, 'ATGC#', 4]\n", 768 | "[2, 6, 'ATGC$', 2]\n", 769 | "[1, 2, 'ATTTATGC#', 0]\n", 770 | "[1, 9, 'C#', 1]\n", 771 | "[2, 9, 'C$', 1]\n", 772 | "[2, 1, 'CATAAATGC$', 3]\n", 773 | "[1, 1, 'CATTTATGC#', 0]\n", 774 | "[1, 8, 'GC#', 2]\n", 775 | "[2, 8, 'GC$', 2]\n", 776 | "[2, 0, 'GCATAAATGC$', 4]\n", 777 | "[1, 0, 'GCATTTATGC#', 0]\n", 778 | "[2, 3, 'TAAATGC$', 2]\n", 779 | "[1, 5, 'TATGC#', 1]\n", 780 | "[1, 7, 'TGC#', 3]\n", 781 | "[2, 7, 'TGC$', 1]\n", 782 | "[1, 4, 'TTATGC#', 2]\n", 783 | "[1, 3, 'TTTATGC#', 0]\n" 784 | ] 785 | } 786 | ], 787 | "source": [ 788 | "def complement(seq):\n", 789 | " complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}\n", 790 | " bases = [str(i) for i in seq]\n", 791 | " bases = [complement[base] for base in bases]\n", 792 | " compliment= ''.join(bases)\n", 793 | " reverse_compliment=compliment[::-1]\n", 794 | " return seq,reverse_compliment\n", 795 | "\n", 796 | "def add_charectors(seq,reverse_compliment):\n", 797 | " seq+= '#'\n", 798 | " reverse_compliment+='$'\n", 799 | " return seq,reverse_compliment\n", 800 | "\n", 801 | "\n", 802 | "\n", 803 | "def Construct_SuffixArray(str_1, str_2):\n", 804 | " list1=[]\n", 805 | " string_id={}\n", 806 | " suffix_dict={}\n", 807 | " len_str=len(str_1)\n", 808 | " for i in range(len_str):\n", 809 | " permutation1 = str_1[i:len_str]\n", 810 | " permutation2 = str_2[i:len_str]\n", 811 | " list1.append(permutation1)\n", 812 | " list1.append(permutation2)\n", 813 | " suffix_dict[permutation1]=i\n", 814 | " suffix_dict[permutation2]=i\n", 815 | " string_id[permutation1]=1\n", 816 | " string_id[permutation2]=2\n", 817 | " \n", 818 | " ordered_list=sorted(list1)\n", 819 | " list2 = [[string_id[i],i] for i in ordered_list]\n", 820 | " list3 = []\n", 821 | " for i in list2:\n", 822 | " list3.append([i[0],suffix_dict[i[1]],i[1]])\n", 823 | "\n", 824 | " for i in range(0 , len(ordered_list)):\n", 825 | " if len(ordered_list)-1 == i:\n", 826 | " list3[i].append(0)\n", 827 | " else:\n", 828 | " strs=[ordered_list[i], ordered_list[i+1]]\n", 829 | " list3[i].append(longestCommonPrefix(strs))\n", 830 | " return list3\n", 831 | "\n", 832 | "def longestCommonPrefix(strs):\n", 833 | " if len(strs) == 0:\n", 834 | " return \"\"\n", 835 | " current = strs[0]\n", 836 | " for i in range(1, len(strs)):\n", 837 | " temp = \"\"\n", 838 | " if len(current) == 0:\n", 839 | " break\n", 840 | " for j in range(len(strs[i])):\n", 841 | " if j < len(current) and current[j] == strs[i][j]:\n", 842 | " temp += current[j]\n", 843 | " else:\n", 844 | " break\n", 845 | " current = temp\n", 846 | " return len(current)\n", 847 | "\n", 848 | "\n", 849 | "\n", 850 | "seq,rev=complement(input('Enter the DNA sequence:')) # GCATTTATGC , CGCTGTAGCG, \n", 851 | "seq1,rev1=add_charectors(seq,rev)\n", 852 | "inverted_sa= Construct_SuffixArray(seq1,rev1)\n", 853 | "print('The inverted suffix array is:\\n')\n", 854 | "for i in inverted_sa:\n", 855 | " print(i)\n" 856 | ] 857 | }, 858 | { 859 | "cell_type": "markdown", 860 | "metadata": {}, 861 | "source": [ 862 | "Finding maximum Lcp indexes corresponding to the maximum LCP length" 863 | ] 864 | }, 865 | { 866 | "cell_type": "code", 867 | "execution_count": 36, 868 | "metadata": {}, 869 | "outputs": [ 870 | { 871 | "name": "stdout", 872 | "output_type": "stream", 873 | "text": [ 874 | "[6, 0]\n" 875 | ] 876 | } 877 | ], 878 | "source": [ 879 | "max_LCP = max(inverted_sa,key = lambda x: x[3])[3]\n", 880 | "max_LCP_indexes = []\n", 881 | "for i in inverted_sa:\n", 882 | " if i[3]==max_LCP and i[0]==1:\n", 883 | " max_LCP_indexes.append(i[1])\n", 884 | " elif i[3]==max_LCP and i[0]==2:\n", 885 | " max_LCP_indexes.append(i[1])\n", 886 | "print(max_LCP_indexes)\n" 887 | ] 888 | }, 889 | { 890 | "cell_type": "code", 891 | "execution_count": 37, 892 | "metadata": {}, 893 | "outputs": [ 894 | { 895 | "name": "stdout", 896 | "output_type": "stream", 897 | "text": [ 898 | "Complementary\n" 899 | ] 900 | } 901 | ], 902 | "source": [ 903 | "from Bio.Seq import Seq\n", 904 | "\n", 905 | "i = min(max_LCP_indexes)\n", 906 | "j = max(max_LCP_indexes)\n", 907 | "wi=''\n", 908 | "wj=''\n", 909 | "while(i=0 and b>=0 and b>a:\n", 913 | " wi =''\n", 914 | " wi = seq[0:i]\n", 915 | " if(wi==rev[a:b+1]):\n", 916 | " wi = wi[::-1]\n", 917 | " break\n", 918 | " i = i+1\n", 919 | "while i=0 and b>=0 and b>a:\n", 1148 | " wi = ''\n", 1149 | " wi = seq[0:i]\n", 1150 | " if(wi==rev[a:b+1]):\n", 1151 | " wi = wi[::-1]\n", 1152 | " break\n", 1153 | " i = i+1\n", 1154 | "\n", 1155 | "while i