├── requirements.txt ├── screenshot-no-deletions.png ├── .gitignore ├── search_recombinants.py ├── setup.py ├── LICENSE ├── mapping.csv ├── primers ├── midnight.bed ├── artic_v3.bed ├── artic_v4.bed └── artic_v4_1.bed ├── README.md ├── reference.fasta └── sc2rf.py /requirements.txt: -------------------------------------------------------------------------------- 1 | termcolor >= 1.1.0 2 | requests >= 2.27.1 3 | tqdm >= 4.58.0 -------------------------------------------------------------------------------- /screenshot-no-deletions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lenaschimmel/sc2rf/HEAD/screenshot-no-deletions.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Project specific 2 | nextclade-output/ 3 | data 4 | filter_by_id.mjs 5 | genemap.gff 6 | 7 | 8 | # generated once we use setup.py 9 | build 10 | sc2rf.egg-info 11 | 12 | # files that lena did not clean up yet 13 | test.ansi 14 | test.csv 15 | -------------------------------------------------------------------------------- /search_recombinants.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | print ("'search_recombinants.py' has been renamed to 'sc2rf.py', along with renaming the whole repository.") 3 | print () 4 | print ("The old repository URL will still be reachable and redirect to the new one, but you will have to get used to the new filename. The small program which shows you this message will also go away in the near future.") 5 | print () 6 | print ("Sorry for any inconvenience!") -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name="sc2rf", 5 | description="SARS-Cov-2 Recombinant Finder for fasta sequences", 6 | license="MIT", 7 | author="Lena Schimmel", 8 | url="https://github.com/lenaschimmel/sc2rf", 9 | version="0.0.2", 10 | py_modules=["sc2rf"], 11 | install_requires=[ 12 | "termcolor>=1.1.0", 13 | "requests>=2.27.1", 14 | "tqdm>=4.63.1", 15 | ], 16 | python_requires=">=3.6", 17 | entry_points={ 18 | "console_scripts": [ 19 | "sc2rf = sc2rf:main", 20 | ], 21 | }, 22 | classifiers=[ 23 | "Development Status :: 4 - Beta", 24 | "Environment :: Console", 25 | "Operating System :: POSIX", 26 | "Intended Audience :: Science/Research", 27 | "License :: OSI Approved :: MIT License", 28 | "Programming Language :: Python :: 3.6", 29 | "Topic :: Scientific/Engineering :: Bio-Informatics", 30 | ], 31 | keywords="cli" 32 | ) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Lena Schimmel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /mapping.csv: -------------------------------------------------------------------------------- 1 | NextstrainClade,PangoLineage,Letter,WhoLabel,Other,WhoClass,Query 2 | 20I,B.1.1.7,α,Alpha,VOC 202012/01,VOC, 3 | 20H,B.1.351,β,Beta,501Y.V2,VOC, 4 | 20J,P.1,γ,Gamma,,VOC, 5 | 21A,B.1.617.2,δ,Delta,,VOC, 6 | 21I,,δ,Delta,,VOC, 7 | 21J,,δ,Delta,,VOC, 8 | ,AY.4,δ,Delta,,VOC, 9 | ,AY.103,δ,Delta,,VOC, 10 | ,AY.43,δ,Delta,,VOC, 11 | ,AY.44,δ,Delta,,VOC, 12 | ,AY.122,δ,Delta,,VOC, 13 | ,AY.3,δ,Delta,,VOC, 14 | ,AY.4.2,δ,Delta,,VOC, 15 | ,AY.100,δ,Delta,,VOC, 16 | ,AY.25.1,δ,Delta,,VOC, 17 | ,AY.25,δ,Delta,,VOC, 18 | 21B,B.1.617.1,κ,Kappa,,Former, 19 | 21C,B.1.427 / B.1.429,ε,Epsilon,CAL.20C,Former, 20 | 21D,B.1.525,η,Eta,,Former, 21 | 21F,B.1.526,ι,Iota,(Part of Pango lineage),Former, 22 | 21G,C.37,λ,Lambda,,VOI, 23 | 21H,B.1.621,μ,Mu,,VOI, 24 | 21K,BA.1,ο,Omicron,,VOC, 25 | 21L,BA.2,ο,Omicron,,VOC, 26 | ,BA.3,ο,Omicron,,VOC, 27 | ,BA.4,ο,Omicron,,VOC,"BA.4* | (BA.2* & 12160A & 9866C & 27788T)" 28 | ,BA.5,ο,Omicron,,VOC,"BA.5* | (BA.2* & 12160A & 9866C & 27259A)" 29 | 21M,B.1.1.529,ο,Omicron,Parental lineage,VOC, 30 | 20E,B.1.177,,,EU1,, 31 | 20B/S:732A,B.1.1.519,,,,, 32 | 20A/S:126A,B.1.620,,,,, 33 | 20A.EU2,B.1.160,,,,, 34 | 20A/S:439K,B.1.258,,,,, 35 | 20A/S:98F,B.1.221,,,,, 36 | 20C/S:80Y,B.1.367,,,,, 37 | 20B/S:626S,B.1.1.277,,,,, 38 | 20B/S:1122L,B.1.1.302,,,,, -------------------------------------------------------------------------------- /primers/midnight.bed: -------------------------------------------------------------------------------- 1 | MN908947.3 30 54 nCoV-2019_1_LEFT nCoV-2019_1 + 2 | MN908947.3 1183 1205 nCoV-2019_1_RIGHT nCoV-2019_1 - 3 | MN908947.3 1100 1128 nCoV-2019_2_LEFT nCoV-2019_2 + 4 | MN908947.3 2244 2266 nCoV-2019_2_RIGHT nCoV-2019_2 - 5 | MN908947.3 2153 2179 nCoV-2019_3_LEFT nCoV-2019_1 + 6 | MN908947.3 3235 3257 nCoV-2019_3_RIGHT nCoV-2019_1 - 7 | MN908947.3 3144 3166 nCoV-2019_4_LEFT nCoV-2019_2 + 8 | MN908947.3 4240 4262 nCoV-2019_4_RIGHT nCoV-2019_2 - 9 | MN908947.3 4167 4189 nCoV-2019_5_LEFT nCoV-2019_1 + 10 | MN908947.3 5337 5359 nCoV-2019_5_RIGHT nCoV-2019_1 - 11 | MN908947.3 5257 5286 nCoV-2019_6_LEFT nCoV-2019_2 + 12 | MN908947.3 6358 6380 nCoV-2019_6_RIGHT nCoV-2019_2 - 13 | MN908947.3 6283 6307 nCoV-2019_7_LEFT nCoV-2019_1 + 14 | MN908947.3 7379 7401 nCoV-2019_7_RIGHT nCoV-2019_1 - 15 | MN908947.3 7298 7328 nCoV-2019_8_LEFT nCoV-2019_2 + 16 | MN908947.3 8363 8385 nCoV-2019_8_RIGHT nCoV-2019_2 - 17 | MN908947.3 8253 8282 nCoV-2019_9_LEFT nCoV-2019_1 + 18 | MN908947.3 9378 9400 nCoV-2019_9_RIGHT nCoV-2019_1 - 19 | MN908947.3 9303 9327 nCoV-2019_10_LEFT nCoV-2019_2 + 20 | MN908947.3 10429 10451 nCoV-2019_10_RIGHT nCoV-2019_2 - 21 | MN908947.3 10343 10370 nCoV-2019_11_LEFT nCoV-2019_1 + 22 | MN908947.3 11447 11469 nCoV-2019_11_RIGHT nCoV-2019_1 - 23 | MN908947.3 11372 11394 nCoV-2019_12_LEFT nCoV-2019_2 + 24 | MN908947.3 12538 12560 nCoV-2019_12_RIGHT nCoV-2019_2 - 25 | MN908947.3 12450 12473 nCoV-2019_13_LEFT nCoV-2019_1 + 26 | MN908947.3 13599 13621 nCoV-2019_13_RIGHT nCoV-2019_1 - 27 | MN908947.3 13509 13532 nCoV-2019_14_LEFT nCoV-2019_2 + 28 | MN908947.3 14619 14641 nCoV-2019_14_RIGHT nCoV-2019_2 - 29 | MN908947.3 14540 14568 nCoV-2019_15_LEFT nCoV-2019_1 + 30 | MN908947.3 15713 15735 nCoV-2019_15_RIGHT nCoV-2019_1 - 31 | MN908947.3 15608 15634 nCoV-2019_16_LEFT nCoV-2019_2 + 32 | MN908947.3 16698 16720 nCoV-2019_16_RIGHT nCoV-2019_2 - 33 | MN908947.3 16624 16647 nCoV-2019_17_LEFT nCoV-2019_1 + 34 | MN908947.3 17732 17754 nCoV-2019_17_RIGHT nCoV-2019_1 - 35 | MN908947.3 17622 17649 nCoV-2019_18_LEFT nCoV-2019_2 + 36 | MN908947.3 18684 18706 nCoV-2019_18_RIGHT nCoV-2019_2 - 37 | MN908947.3 18596 18618 nCoV-2019_19_LEFT nCoV-2019_1 + 38 | MN908947.3 19655 19678 nCoV-2019_19_RIGHT nCoV-2019_1 - 39 | MN908947.3 19574 19604 nCoV-2019_20_LEFT nCoV-2019_2 + 40 | MN908947.3 20676 20698 nCoV-2019_20_RIGHT nCoV-2019_2 - 41 | MN908947.3 20553 20581 nCoV-2019_21_LEFT nCoV-2019_1 + 42 | MN908947.3 21620 21642 nCoV-2019_21_RIGHT nCoV-2019_1 - 43 | MN908947.3 21532 21562 nCoV-2019_22_LEFT nCoV-2019_2 + 44 | MN908947.3 22590 22612 nCoV-2019_22_RIGHT nCoV-2019_2 - 45 | MN908947.3 22511 22537 nCoV-2019_23_LEFT nCoV-2019_1 + 46 | MN908947.3 23609 23631 nCoV-2019_23_RIGHT nCoV-2019_1 - 47 | MN908947.3 23518 23544 nCoV-2019_24_LEFT nCoV-2019_2 + 48 | MN908947.3 24714 24736 nCoV-2019_24_RIGHT nCoV-2019_2 - 49 | MN908947.3 24633 24658 nCoV-2019_25_LEFT nCoV-2019_1 + 50 | MN908947.3 25768 25790 nCoV-2019_25_RIGHT nCoV-2019_1 - 51 | MN908947.3 25690 25712 nCoV-2019_26_LEFT nCoV-2019_2 + 52 | MN908947.3 26835 26857 nCoV-2019_26_RIGHT nCoV-2019_2 - 53 | MN908947.3 26744 26766 nCoV-2019_27_LEFT nCoV-2019_1 + 54 | MN908947.3 27872 27894 nCoV-2019_27_RIGHT nCoV-2019_1 - 55 | MN908947.3 27784 27808 nCoV-2019_28_LEFT nCoV-2019_2 + 56 | MN908947.3 28985 29007 nCoV-2019_28_RIGHT nCoV-2019_2 - 57 | MN908947.3 28677 28699 nCoV-2019_29_LEFT nCoV-2019_1 + 58 | MN908947.3 29768 29790 nCoV-2019_29_RIGHT nCoV-2019_1 - -------------------------------------------------------------------------------- /primers/artic_v3.bed: -------------------------------------------------------------------------------- 1 | MN908947.3 30 54 nCoV-2019_1_LEFT 1 + 2 | MN908947.3 385 410 nCoV-2019_1_RIGHT 1 - 3 | MN908947.3 320 342 nCoV-2019_2_LEFT 2 + 4 | MN908947.3 704 726 nCoV-2019_2_RIGHT 2 - 5 | MN908947.3 642 664 nCoV-2019_3_LEFT 1 + 6 | MN908947.3 1004 1028 nCoV-2019_3_RIGHT 1 - 7 | MN908947.3 943 965 nCoV-2019_4_LEFT 2 + 8 | MN908947.3 1312 1337 nCoV-2019_4_RIGHT 2 - 9 | MN908947.3 1242 1264 nCoV-2019_5_LEFT 1 + 10 | MN908947.3 1623 1651 nCoV-2019_5_RIGHT 1 - 11 | MN908947.3 1573 1595 nCoV-2019_6_LEFT 2 + 12 | MN908947.3 1942 1964 nCoV-2019_6_RIGHT 2 - 13 | MN908947.3 1875 1897 nCoV-2019_7_LEFT 1 + 14 | MN908947.3 1868 1890 nCoV-2019_7_LEFT_alt0 1 + 15 | MN908947.3 2247 2269 nCoV-2019_7_RIGHT 1 - 16 | MN908947.3 2242 2264 nCoV-2019_7_RIGHT_alt5 1 - 17 | MN908947.3 2181 2205 nCoV-2019_8_LEFT 2 + 18 | MN908947.3 2568 2592 nCoV-2019_8_RIGHT 2 - 19 | MN908947.3 2505 2529 nCoV-2019_9_LEFT 1 + 20 | MN908947.3 2504 2528 nCoV-2019_9_LEFT_alt4 1 + 21 | MN908947.3 2882 2904 nCoV-2019_9_RIGHT 1 - 22 | MN908947.3 2880 2902 nCoV-2019_9_RIGHT_alt2 1 - 23 | MN908947.3 2826 2850 nCoV-2019_10_LEFT 2 + 24 | MN908947.3 3183 3210 nCoV-2019_10_RIGHT 2 - 25 | MN908947.3 3144 3166 nCoV-2019_11_LEFT 1 + 26 | MN908947.3 3507 3531 nCoV-2019_11_RIGHT 1 - 27 | MN908947.3 3460 3482 nCoV-2019_12_LEFT 2 + 28 | MN908947.3 3826 3853 nCoV-2019_12_RIGHT 2 - 29 | MN908947.3 3771 3795 nCoV-2019_13_LEFT 1 + 30 | MN908947.3 4142 4164 nCoV-2019_13_RIGHT 1 - 31 | MN908947.3 4054 4077 nCoV-2019_14_LEFT 2 + 32 | MN908947.3 4044 4068 nCoV-2019_14_LEFT_alt4 2 + 33 | MN908947.3 4428 4450 nCoV-2019_14_RIGHT 2 - 34 | MN908947.3 4402 4424 nCoV-2019_14_RIGHT_alt2 2 - 35 | MN908947.3 4294 4321 nCoV-2019_15_LEFT 1 + 36 | MN908947.3 4296 4322 nCoV-2019_15_LEFT_alt1 1 + 37 | MN908947.3 4674 4696 nCoV-2019_15_RIGHT 1 - 38 | MN908947.3 4666 4689 nCoV-2019_15_RIGHT_alt3 1 - 39 | MN908947.3 4636 4658 nCoV-2019_16_LEFT 2 + 40 | MN908947.3 4995 5017 nCoV-2019_16_RIGHT 2 - 41 | MN908947.3 4939 4966 nCoV-2019_17_LEFT 1 + 42 | MN908947.3 5296 5321 nCoV-2019_17_RIGHT 1 - 43 | MN908947.3 5230 5259 nCoV-2019_18_LEFT 2 + 44 | MN908947.3 5257 5287 nCoV-2019_18_LEFT_alt2 2 + 45 | MN908947.3 5620 5644 nCoV-2019_18_RIGHT 2 - 46 | MN908947.3 5620 5643 nCoV-2019_18_RIGHT_alt1 2 - 47 | MN908947.3 5563 5586 nCoV-2019_19_LEFT 1 + 48 | MN908947.3 5932 5957 nCoV-2019_19_RIGHT 1 - 49 | MN908947.3 5867 5894 nCoV-2019_20_LEFT 2 + 50 | MN908947.3 6247 6272 nCoV-2019_20_RIGHT 2 - 51 | MN908947.3 6167 6196 nCoV-2019_21_LEFT 1 + 52 | MN908947.3 6168 6197 nCoV-2019_21_LEFT_alt2 1 + 53 | MN908947.3 6528 6550 nCoV-2019_21_RIGHT 1 - 54 | MN908947.3 6526 6548 nCoV-2019_21_RIGHT_alt0 1 - 55 | MN908947.3 6466 6495 nCoV-2019_22_LEFT 2 + 56 | MN908947.3 6846 6873 nCoV-2019_22_RIGHT 2 - 57 | MN908947.3 6718 6745 nCoV-2019_23_LEFT 1 + 58 | MN908947.3 7092 7117 nCoV-2019_23_RIGHT 1 - 59 | MN908947.3 7035 7058 nCoV-2019_24_LEFT 2 + 60 | MN908947.3 7389 7415 nCoV-2019_24_RIGHT 2 - 61 | MN908947.3 7305 7332 nCoV-2019_25_LEFT 1 + 62 | MN908947.3 7671 7694 nCoV-2019_25_RIGHT 1 - 63 | MN908947.3 7626 7651 nCoV-2019_26_LEFT 2 + 64 | MN908947.3 7997 8019 nCoV-2019_26_RIGHT 2 - 65 | MN908947.3 7943 7968 nCoV-2019_27_LEFT 1 + 66 | MN908947.3 8319 8341 nCoV-2019_27_RIGHT 1 - 67 | MN908947.3 8249 8275 nCoV-2019_28_LEFT 2 + 68 | MN908947.3 8635 8661 nCoV-2019_28_RIGHT 2 - 69 | MN908947.3 8595 8619 nCoV-2019_29_LEFT 1 + 70 | MN908947.3 8954 8983 nCoV-2019_29_RIGHT 1 - 71 | MN908947.3 8888 8913 nCoV-2019_30_LEFT 2 + 72 | MN908947.3 9245 9271 nCoV-2019_30_RIGHT 2 - 73 | MN908947.3 9204 9226 nCoV-2019_31_LEFT 1 + 74 | MN908947.3 9557 9585 nCoV-2019_31_RIGHT 1 - 75 | MN908947.3 9477 9502 nCoV-2019_32_LEFT 2 + 76 | MN908947.3 9834 9858 nCoV-2019_32_RIGHT 2 - 77 | MN908947.3 9784 9806 nCoV-2019_33_LEFT 1 + 78 | MN908947.3 10146 10171 nCoV-2019_33_RIGHT 1 - 79 | MN908947.3 10076 10099 nCoV-2019_34_LEFT 2 + 80 | MN908947.3 10437 10459 nCoV-2019_34_RIGHT 2 - 81 | MN908947.3 10362 10384 nCoV-2019_35_LEFT 1 + 82 | MN908947.3 10737 10763 nCoV-2019_35_RIGHT 1 - 83 | MN908947.3 10666 10688 nCoV-2019_36_LEFT 2 + 84 | MN908947.3 11048 11074 nCoV-2019_36_RIGHT 2 - 85 | MN908947.3 10999 11022 nCoV-2019_37_LEFT 1 + 86 | MN908947.3 11372 11394 nCoV-2019_37_RIGHT 1 - 87 | MN908947.3 11306 11331 nCoV-2019_38_LEFT 2 + 88 | MN908947.3 11668 11693 nCoV-2019_38_RIGHT 2 - 89 | MN908947.3 11555 11584 nCoV-2019_39_LEFT 1 + 90 | MN908947.3 11927 11949 nCoV-2019_39_RIGHT 1 - 91 | MN908947.3 11863 11889 nCoV-2019_40_LEFT 2 + 92 | MN908947.3 12234 12256 nCoV-2019_40_RIGHT 2 - 93 | MN908947.3 12110 12133 nCoV-2019_41_LEFT 1 + 94 | MN908947.3 12465 12490 nCoV-2019_41_RIGHT 1 - 95 | MN908947.3 12417 12439 nCoV-2019_42_LEFT 2 + 96 | MN908947.3 12779 12802 nCoV-2019_42_RIGHT 2 - 97 | MN908947.3 12710 12732 nCoV-2019_43_LEFT 1 + 98 | MN908947.3 13074 13096 nCoV-2019_43_RIGHT 1 - 99 | MN908947.3 13005 13027 nCoV-2019_44_LEFT 2 + 100 | MN908947.3 13007 13029 nCoV-2019_44_LEFT_alt3 2 + 101 | MN908947.3 13378 13400 nCoV-2019_44_RIGHT 2 - 102 | MN908947.3 13363 13385 nCoV-2019_44_RIGHT_alt0 2 - 103 | MN908947.3 13319 13344 nCoV-2019_45_LEFT 1 + 104 | MN908947.3 13307 13336 nCoV-2019_45_LEFT_alt2 1 + 105 | MN908947.3 13669 13699 nCoV-2019_45_RIGHT 1 - 106 | MN908947.3 13660 13689 nCoV-2019_45_RIGHT_alt7 1 - 107 | MN908947.3 13599 13621 nCoV-2019_46_LEFT 2 + 108 | MN908947.3 13602 13625 nCoV-2019_46_LEFT_alt1 2 + 109 | MN908947.3 13962 13984 nCoV-2019_46_RIGHT 2 - 110 | MN908947.3 13961 13984 nCoV-2019_46_RIGHT_alt2 2 - 111 | MN908947.3 13918 13946 nCoV-2019_47_LEFT 1 + 112 | MN908947.3 14271 14299 nCoV-2019_47_RIGHT 1 - 113 | MN908947.3 14207 14232 nCoV-2019_48_LEFT 2 + 114 | MN908947.3 14579 14601 nCoV-2019_48_RIGHT 2 - 115 | MN908947.3 14545 14570 nCoV-2019_49_LEFT 1 + 116 | MN908947.3 14898 14926 nCoV-2019_49_RIGHT 1 - 117 | MN908947.3 14865 14895 nCoV-2019_50_LEFT 2 + 118 | MN908947.3 15224 15246 nCoV-2019_50_RIGHT 2 - 119 | MN908947.3 15171 15193 nCoV-2019_51_LEFT 1 + 120 | MN908947.3 15538 15560 nCoV-2019_51_RIGHT 1 - 121 | MN908947.3 15481 15503 nCoV-2019_52_LEFT 2 + 122 | MN908947.3 15861 15886 nCoV-2019_52_RIGHT 2 - 123 | MN908947.3 15827 15851 nCoV-2019_53_LEFT 1 + 124 | MN908947.3 16186 16209 nCoV-2019_53_RIGHT 1 - 125 | MN908947.3 16118 16144 nCoV-2019_54_LEFT 2 + 126 | MN908947.3 16485 16510 nCoV-2019_54_RIGHT 2 - 127 | MN908947.3 16416 16444 nCoV-2019_55_LEFT 1 + 128 | MN908947.3 16804 16833 nCoV-2019_55_RIGHT 1 - 129 | MN908947.3 16748 16770 nCoV-2019_56_LEFT 2 + 130 | MN908947.3 17130 17152 nCoV-2019_56_RIGHT 2 - 131 | MN908947.3 17065 17087 nCoV-2019_57_LEFT 1 + 132 | MN908947.3 17430 17452 nCoV-2019_57_RIGHT 1 - 133 | MN908947.3 17381 17406 nCoV-2019_58_LEFT 2 + 134 | MN908947.3 17738 17761 nCoV-2019_58_RIGHT 2 - 135 | MN908947.3 17674 17697 nCoV-2019_59_LEFT 1 + 136 | MN908947.3 18036 18062 nCoV-2019_59_RIGHT 1 - 137 | MN908947.3 17966 17993 nCoV-2019_60_LEFT 2 + 138 | MN908947.3 18324 18348 nCoV-2019_60_RIGHT 2 - 139 | MN908947.3 18253 18275 nCoV-2019_61_LEFT 1 + 140 | MN908947.3 18650 18672 nCoV-2019_61_RIGHT 1 - 141 | MN908947.3 18596 18618 nCoV-2019_62_LEFT 2 + 142 | MN908947.3 18957 18979 nCoV-2019_62_RIGHT 2 - 143 | MN908947.3 18896 18918 nCoV-2019_63_LEFT 1 + 144 | MN908947.3 19275 19297 nCoV-2019_63_RIGHT 1 - 145 | MN908947.3 19204 19232 nCoV-2019_64_LEFT 2 + 146 | MN908947.3 19591 19616 nCoV-2019_64_RIGHT 2 - 147 | MN908947.3 19548 19570 nCoV-2019_65_LEFT 1 + 148 | MN908947.3 19911 19939 nCoV-2019_65_RIGHT 1 - 149 | MN908947.3 19844 19866 nCoV-2019_66_LEFT 2 + 150 | MN908947.3 20231 20255 nCoV-2019_66_RIGHT 2 - 151 | MN908947.3 20172 20200 nCoV-2019_67_LEFT 1 + 152 | MN908947.3 20542 20572 nCoV-2019_67_RIGHT 1 - 153 | MN908947.3 20472 20496 nCoV-2019_68_LEFT 2 + 154 | MN908947.3 20867 20890 nCoV-2019_68_RIGHT 2 - 155 | MN908947.3 20786 20813 nCoV-2019_69_LEFT 1 + 156 | MN908947.3 21146 21169 nCoV-2019_69_RIGHT 1 - 157 | MN908947.3 21075 21104 nCoV-2019_70_LEFT 2 + 158 | MN908947.3 21427 21455 nCoV-2019_70_RIGHT 2 - 159 | MN908947.3 21357 21386 nCoV-2019_71_LEFT 1 + 160 | MN908947.3 21716 21743 nCoV-2019_71_RIGHT 1 - 161 | MN908947.3 21658 21682 nCoV-2019_72_LEFT 2 + 162 | MN908947.3 22013 22038 nCoV-2019_72_RIGHT 2 - 163 | MN908947.3 21961 21990 nCoV-2019_73_LEFT 1 + 164 | MN908947.3 22324 22346 nCoV-2019_73_RIGHT 1 - 165 | MN908947.3 22262 22290 nCoV-2019_74_LEFT 2 + 166 | MN908947.3 22626 22650 nCoV-2019_74_RIGHT 2 - 167 | MN908947.3 22516 22542 nCoV-2019_75_LEFT 1 + 168 | MN908947.3 22877 22903 nCoV-2019_75_RIGHT 1 - 169 | MN908947.3 22797 22819 nCoV-2019_76_LEFT 2 + 170 | MN908947.3 22798 22821 nCoV-2019_76_LEFT_alt3 2 + 171 | MN908947.3 23192 23214 nCoV-2019_76_RIGHT 2 - 172 | MN908947.3 23189 23212 nCoV-2019_76_RIGHT_alt0 2 - 173 | MN908947.3 23122 23144 nCoV-2019_77_LEFT 1 + 174 | MN908947.3 23500 23522 nCoV-2019_77_RIGHT 1 - 175 | MN908947.3 23443 23466 nCoV-2019_78_LEFT 2 + 176 | MN908947.3 23822 23847 nCoV-2019_78_RIGHT 2 - 177 | MN908947.3 23789 23812 nCoV-2019_79_LEFT 1 + 178 | MN908947.3 24145 24169 nCoV-2019_79_RIGHT 1 - 179 | MN908947.3 24078 24100 nCoV-2019_80_LEFT 2 + 180 | MN908947.3 24443 24467 nCoV-2019_80_RIGHT 2 - 181 | MN908947.3 24391 24416 nCoV-2019_81_LEFT 1 + 182 | MN908947.3 24765 24789 nCoV-2019_81_RIGHT 1 - 183 | MN908947.3 24696 24721 nCoV-2019_82_LEFT 2 + 184 | MN908947.3 25052 25076 nCoV-2019_82_RIGHT 2 - 185 | MN908947.3 24978 25003 nCoV-2019_83_LEFT 1 + 186 | MN908947.3 25347 25369 nCoV-2019_83_RIGHT 1 - 187 | MN908947.3 25279 25301 nCoV-2019_84_LEFT 2 + 188 | MN908947.3 25646 25673 nCoV-2019_84_RIGHT 2 - 189 | MN908947.3 25601 25623 nCoV-2019_85_LEFT 1 + 190 | MN908947.3 25969 25994 nCoV-2019_85_RIGHT 1 - 191 | MN908947.3 25902 25924 nCoV-2019_86_LEFT 2 + 192 | MN908947.3 26290 26315 nCoV-2019_86_RIGHT 2 - 193 | MN908947.3 26197 26219 nCoV-2019_87_LEFT 1 + 194 | MN908947.3 26566 26590 nCoV-2019_87_RIGHT 1 - 195 | MN908947.3 26520 26542 nCoV-2019_88_LEFT 2 + 196 | MN908947.3 26890 26913 nCoV-2019_88_RIGHT 2 - 197 | MN908947.3 26835 26857 nCoV-2019_89_LEFT 1 + 198 | MN908947.3 26838 26860 nCoV-2019_89_LEFT_alt2 1 + 199 | MN908947.3 27202 27227 nCoV-2019_89_RIGHT 1 - 200 | MN908947.3 27190 27215 nCoV-2019_89_RIGHT_alt4 1 - 201 | MN908947.3 27141 27164 nCoV-2019_90_LEFT 2 + 202 | MN908947.3 27511 27533 nCoV-2019_90_RIGHT 2 - 203 | MN908947.3 27446 27471 nCoV-2019_91_LEFT 1 + 204 | MN908947.3 27825 27854 nCoV-2019_91_RIGHT 1 - 205 | MN908947.3 27784 27808 nCoV-2019_92_LEFT 2 + 206 | MN908947.3 28145 28172 nCoV-2019_92_RIGHT 2 - 207 | MN908947.3 28081 28104 nCoV-2019_93_LEFT 1 + 208 | MN908947.3 28442 28464 nCoV-2019_93_RIGHT 1 - 209 | MN908947.3 28394 28416 nCoV-2019_94_LEFT 2 + 210 | MN908947.3 28756 28779 nCoV-2019_94_RIGHT 2 - 211 | MN908947.3 28677 28699 nCoV-2019_95_LEFT 1 + 212 | MN908947.3 29041 29063 nCoV-2019_95_RIGHT 1 - 213 | MN908947.3 28985 29007 nCoV-2019_96_LEFT 2 + 214 | MN908947.3 29356 29378 nCoV-2019_96_RIGHT 2 - 215 | MN908947.3 29288 29316 nCoV-2019_97_LEFT 1 + 216 | MN908947.3 29665 29693 nCoV-2019_97_RIGHT 1 - 217 | MN908947.3 29486 29510 nCoV-2019_98_LEFT 2 + 218 | MN908947.3 29836 29866 nCoV-2019_98_RIGHT 2 - 219 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Sc2rf - SARS-Cov-2 Recombinant Finder 2 | _Pronounced: Scarf_ 3 | 4 | ## What's this? 5 | Sc2rf can search genome sequences of SARS-CoV-2 for potential recombinants - new virus lineages that have (partial) genes from more than one parent lineage. 6 | 7 | ## Is it already usable? 8 | **This is a very young project, started on March 5th, 2022. As such, proceed with care. Results may be wrong or misleading, and with every update, anything can still change a lot.** 9 | 10 | Anyway, I'm happy that scientists are already seeing benefits from Sc2rf and using it to prepare lineage proposals for [cov-lineages/pango-designation](https://github.com/cov-lineages/pango-designation/issues). 11 | 12 | Though I already have a lot of ideas and plans for Sc2rf (see at the bottom of this document), I'm very open for suggestions and feature requests. Please write an [issue](https://github.com/lenaschimmel/sarscov2recombinants/issues), start a [discussion](https://github.com/lenaschimmel/sarscov2recombinants/discussions) or get in touch via [mail](mailto:mail@lenaschimmel.de) or [twitter](https://twitter.com/LenaSchimmel)! 13 | 14 | ## Example output 15 | ![Screenshot of the terminal output of Sc2rf](screenshot-no-deletions.png) 16 | 17 | ## Requirements and Installation 18 | You need at least Python 3.6 and you need to install the requirements first. You might use something like `python3 -m pip install -r requirements.txt` to do that. There's a `setup.py` which you should probably ignore, since it's work in progress and does not work as intented yet. 19 | 20 | Also, you need a terminal which supports ANSI control sequences to display colored text. On Linux, MacOS, etc. it should probably work. 21 | 22 | On Windows, color support is tricky. On a recent version of Windows 10, it should work, but if it doesn't, install Windows Terminal from [GitHub](https://github.com/Microsoft/Terminal) or [Microsoft Store](https://www.microsoft.com/de-de/p/windows-terminal/9n0dx20hk701?rtc=1&activetab=pivot:overviewtab) and run it from there. 23 | 24 | ## Basic Usage 25 | Start with a `.fasta` file with one or more sequences which might contain recombinants. Your sequences have to be aligned to the `reference.fasta`. If they are not, you will get an error message like: 26 | 27 | > Sequence hCoV-19/Phantasialand/EFWEFWD not properly aligned, length is 29718 instead of 29903. 28 | 29 | _(For historical reasons, I always used [Nextclade](https://docs.nextstrain.org/projects/nextclade/en/stable/user/nextclade-cli.html) to get aligned sequences, but you might also use [Nextalign](https://docs.nextstrain.org/projects/nextclade/en/stable/user/nextalign-cli.html) or any other tool. Installing them is easy on Linux or MacOS, but not on Windows. You can also use a web-based tool like [MAFFT](https://mafft.cbrc.jp/alignment/software/closelyrelatedviralgenomes.html).)_ 30 | 31 | Then call: 32 | 33 | ``` 34 | sc2rf.py 35 | ``` 36 | 37 | If you just need some fasta files for testing, you can search the [pango-lineage proposals](https://github.com/cov-lineages/pango-designation/issues) for recombinant issues with fasta-files, or take some files from [my shared-sequences repository](https://github.com/lenaschimmel/shared-sequences), which might not contain any actual recombinants, but hundreds of sequences that look like they were! 38 | 39 | ## No output / some sequences not shown 40 | By default, a lot filters are active to show only the likely recombinants, so that you can input 10000s of sequences and just get output for the interesting ones. If you want, you can disable all filters like that, which is only recommended for small input files with less than 100 sequences: 41 | 42 | ``` 43 | sc2rf.py --parents 1-35 --breakpoints 0-100 \ 44 | --unique 1 --max-ambiguous 10000 45 | ``` 46 | 47 | or even 48 | 49 | ``` 50 | sc2rf.py --parents 1-35 --breakpoints 0-100 \ 51 | --unique 1 --max-ambiguous 10000 --force-all-parents \ 52 | --clades all 53 | ``` 54 | 55 | The meaning of these parameters is described below. 56 | 57 | ## Advanced Usage 58 | You can execute `sc2rf.py -h` to get excactly this help message: 59 | 60 | 61 | ``` 62 | usage: sc2rf.py [-h] [--primers [PRIMER ...]] 63 | [--primer-intervals [INTERVAL ...]] 64 | [--parents INTERVAL] [--breakpoints INTERVAL] 65 | [--clades [CLADES ...]] [--unique NUM] 66 | [--max-intermission-length NUM] 67 | [--max-intermission-count NUM] 68 | [--max-name-length NUM] [--max-ambiguous NUM] 69 | [--force-all-parents] 70 | [--select-sequences INTERVAL] 71 | [--enable-deletions] [--show-private-mutations] 72 | [--rebuild-examples] [--mutation-threshold NUM] 73 | [--add-spaces [NUM]] [--sort-by-id [NUM]] 74 | [--verbose] [--ansi] [--hide-progress] 75 | [--csvfile CSVFILE] 76 | [input ...] 77 | 78 | Analyse SARS-CoV-2 sequences for potential, unknown recombinant 79 | variants. 80 | 81 | positional arguments: 82 | input input sequence(s) to test, as aligned 83 | .fasta file(s) (default: None) 84 | 85 | optional arguments: 86 | -h, --help show this help message and exit 87 | 88 | --primers [PRIMER ...] 89 | Filenames of primer set(s) to visualize. 90 | The .bed formats for ARTIC and EasySeq 91 | are recognized and supported. (default: 92 | None) 93 | 94 | --primer-intervals [INTERVAL ...] 95 | Coordinate intervals in which to 96 | visualize primers. (default: None) 97 | 98 | --parents INTERVAL, -p INTERVAL 99 | Allowed number of potential parents of a 100 | recombinant. (default: 2-4) 101 | 102 | --breakpoints INTERVAL, -b INTERVAL 103 | Allowed number of breakpoints in a 104 | recombinant. (default: 1-4) 105 | 106 | --clades [CLADES ...], -c [CLADES ...] 107 | List of variants which are considered as 108 | potential parents. Use Nextstrain clades 109 | (like "21B"), or Pango Lineages (like 110 | "B.1.617.1") or both. Also accepts "all". 111 | (default: ['20I', '20H', '20J', '21I', 112 | '21J', 'BA.1', 'BA.2', 'BA.3']) 113 | 114 | --unique NUM, -u NUM Minimum of substitutions in a sample 115 | which are unique to a potential parent 116 | clade, so that the clade will be 117 | considered. (default: 2) 118 | 119 | --max-intermission-length NUM, -l NUM 120 | The maximum length of an intermission in 121 | consecutive substitutions. Intermissions 122 | are stretches to be ignored when counting 123 | breakpoints. (default: 2) 124 | 125 | --max-intermission-count NUM, -i NUM 126 | The maximum number of intermissions which 127 | will be ignored. Surplus intermissions 128 | count towards the number of breakpoints. 129 | (default: 8) 130 | 131 | --max-name-length NUM, -n NUM 132 | Only show up to NUM characters of sample 133 | names. (default: 30) 134 | 135 | --max-ambiguous NUM, -a NUM 136 | Maximum number of ambiguous nucs in a 137 | sample before it gets ignored. (default: 138 | 50) 139 | 140 | --force-all-parents, -f 141 | Force to consider all clades as potential 142 | parents for all sequences. Only useful 143 | for debugging. 144 | 145 | --select-sequences INTERVAL, -s INTERVAL 146 | Use only a specific range of input 147 | sequences. DOES NOT YET WORK WITH 148 | MULTIPLE INPUT FILES. (default: 0-999999) 149 | 150 | --enable-deletions, -d 151 | Include deletions in lineage comparision. 152 | 153 | --show-private-mutations 154 | Display mutations which are not in any of 155 | the potential parental clades. 156 | 157 | --rebuild-examples, -r 158 | Rebuild the mutations in examples by 159 | querying cov-spectrum.org. 160 | 161 | --mutation-threshold NUM, -t NUM 162 | Consider mutations with a prevalence of 163 | at least NUM as mandatory for a clade 164 | (range 0.05 - 1.0, default: 0.75). 165 | 166 | --add-spaces [NUM] Add spaces between every N colums, which 167 | makes it easier to keep your eye at a 168 | fixed place. (default without flag: 0, 169 | default with flag: 5) 170 | 171 | --sort-by-id [NUM] Sort the input sequences by the ID. If 172 | you provide NUM, only the first NUM 173 | characters are considered. Useful if this 174 | correlates with meaning full meta 175 | information, e.g. the sequencing lab. 176 | (default without flag: 0, default with 177 | flag: 999) 178 | 179 | --verbose, -v Print some more information, mostly 180 | useful for debugging. 181 | 182 | --ansi Use only ASCII characters to be 183 | compatible with ansilove. 184 | 185 | --hide-progress Don't show progress bars during long 186 | task. 187 | 188 | --csvfile CSVFILE Path to write results in CSV format. 189 | (default: None) 190 | 191 | An Interval can be a single number ("3"), a closed interval 192 | ("2-5" ) or an open one ("4-" or "-7"). The limits are inclusive. 193 | Only positive numbers are supported. 194 | 195 | ``` 196 | 197 | 198 | 199 | 200 | ## Interpreting the output 201 | _To be written..._ 202 | 203 | There already is a short [Twitter thread](https://twitter.com/LenaSchimmel/status/1506768971931996162) which explains the basics. 204 | 205 | ## Source material attribution 206 | * `virus_properties.json` contains data from [LAPIS / cov-spectrum](https://lapis.cov-spectrum.org/) which uses data from [NCBI GenBank](https://www.ncbi.nlm.nih.gov/genbank/), prepared and hosted by Nextstrain, see [blog post](https://nextstrain.org/blog/2021-07-08-ncov-open-announcement). 207 | * `reference.fasta` is taken from Nextstrain's [nextclade_data](https://github.com/nextstrain/nextclade_data/tree/master/data/datasets/sars-cov-2/references/MN908947/versions/2022-03-04T12:00:00Z/files), see [NCBI](https://www.ncbi.nlm.nih.gov/nuccore/MN908947) for attribution. 208 | * `mapping.csv` is a modified version of the table on the [covariants homepage](https://covariants.org/) by Nextstrain. 209 | * Example output / screenshot based on Sequences published by the [German Robert-Koch-Institut](https://github.com/robert-koch-institut/SARS-CoV-2-Sequenzdaten_aus_Deutschland). 210 | * Primers: 211 | * [ARTIC primers](https://github.com/artic-network/artic-ncov2019) CC-BY-4.0 by the ARTICnetwork project 212 | * ~~[EasySeq primers](https://github.com/JordyCoolen/easyseq_covid19) by Coolen, J. P., Wolters, F., Tostmann, A., van Groningen, L. F., Bleeker-Rovers, C. P., Tan, E. C., ... & Melchers, W. J.~~ Removed until I understand the format if the `.bed` file. There will be an issue soon. 213 | * [midnight primers](https://zenodo.org/record/3897530#.Xuk7oGpLjep) CC-BY-4.0 by Silander, Olin K, Massey University 214 | 215 | The initial version of this program was written in cooperation with [@flauschzelle](https://github.com/flauschzelle). 216 | 217 | ## TODO / IDEAS / PLANS 218 | * [ ] Move these TODOs into actual issues 219 | * [x] add disclaimer and link to pango-designation 220 | * [ ] provide a sample file (maybe both `.fasta` and `.csv`, as long as the csv step is still needed) 221 | * [X] accept aligned fasta 222 | * [x] as input file 223 | * [ ] as piped stream 224 | * [ ] If we still accept csv/ssv input, autodetect the delimiter either by file name or by analysing the first line 225 | * [ ] find a way to handle already designated recombinant lineages 226 | * [ ] Output structured results 227 | * [ ] csv 228 | * [ ] html? 229 | * [ ] fasta of all sequences that match the criteria, which enables efficient multi-pass strategies 230 | * [ ] filter sequences 231 | * [ ] by ID 232 | * [ ] by metadata 233 | * [ ] take metadata csv 234 | * [ ] document the output in README 235 | * [ ] check / fix `--enabled-deletions` 236 | * [x] adjustable threshold for mutation prevalence 237 | * [ ] new color mode (with background color and monochrome text on top) 238 | * [ ] new bar mode (with colored lines beneath each sequence, one for each example sequence, and "intermissions" shown in the color of the "surrounding" lineage, but not as bright) 239 | * [ ] interactive mode, for filtering, reordering, etc. 240 | * [x] sort sequences within each block 241 | * [ ] re-think this whole "intermission" concept 242 | * [ ] select a single sequence and let the tool refine the choice of parental sequences, not just focusing on commonly known lineages (going up and down in the tree) 243 | * [ ] use more common terms to describe things (needs feedback from people with actual experience in the field) 244 | -------------------------------------------------------------------------------- /primers/artic_v4.bed: -------------------------------------------------------------------------------- 1 | MN908947.3 25 50 SARS-CoV-2_1_LEFT 1 + AACAAACCAACCAACTTTCGATCTC 2 | MN908947.3 408 431 SARS-CoV-2_1_RIGHT 1 - CTTCTACTAAGCCACAAGTGCCA 3 | MN908947.3 324 344 SARS-CoV-2_2_LEFT 2 + TTTACAGGTTCGCGACGTGC 4 | MN908947.3 705 727 SARS-CoV-2_2_RIGHT 2 - ATAAGGATCAGTGCCAAGCTCG 5 | MN908947.3 644 666 SARS-CoV-2_3_LEFT 1 + GTAATAAAGGAGCTGGTGGCCA 6 | MN908947.3 1017 1044 SARS-CoV-2_3_RIGHT 1 - GCCAATTTAATTTCAAAAGGTGTCTGC 7 | MN908947.3 944 966 SARS-CoV-2_4_LEFT 2 + GTGTATACTGCTGCCGTGAACA 8 | MN908947.3 1337 1362 SARS-CoV-2_4_RIGHT 2 - ACAACAGCATTTTGGGGTAAGTAAC 9 | MN908947.3 1245 1266 SARS-CoV-2_5_LEFT 1 + TGAAACTTCATGGCAGACGGG 10 | MN908947.3 1623 1650 SARS-CoV-2_5_RIGHT 1 - TTGATGTTGACTTTCTCTTTTTGGAGT 11 | MN908947.3 1540 1562 SARS-CoV-2_6_LEFT 2 + CGTGCTAGCGCTAACATAGGTT 12 | MN908947.3 1925 1948 SARS-CoV-2_6_RIGHT 2 - AACACGCACAGAATTTTGAGCAG 13 | MN908947.3 1851 1875 SARS-CoV-2_7_LEFT 1 + ACTGAGTCCTCTTTATGCATTTGC 14 | MN908947.3 2228 2250 SARS-CoV-2_7_RIGHT 1 - CCACCGACAATTTCACAAGCAC 15 | MN908947.3 2154 2180 SARS-CoV-2_8_LEFT 2 + GCTTGAAGAGAAGTTTAAGGAAGGTG 16 | MN908947.3 2544 2571 SARS-CoV-2_8_RIGHT 2 - GGTTGTTCTAATGGTTGTAAATCACCA 17 | MN908947.3 2483 2508 SARS-CoV-2_9_LEFT 1 + TCTTCTTAGAGGGAGAAACACTTCC 18 | MN908947.3 2861 2885 SARS-CoV-2_9_RIGHT 1 - CACAGGCGAACTCATTTACTTCTG 19 | MN908947.3 2826 2850 SARS-CoV-2_10_LEFT 2 + TGAGAAGTGCTCTGCCTATACAGT 20 | MN908947.3 3183 3210 SARS-CoV-2_10_RIGHT 2 - TCATCTAACCAATCTTCTTCTTGCTCT 21 | MN908947.3 3078 3102 SARS-CoV-2_11_LEFT 1 + AGAAGAGTTTGAGCCATCAACTCA 22 | MN908947.3 3470 3492 SARS-CoV-2_11_RIGHT 1 - TTTAAGGCTCCTGCAACACCTC 23 | MN908947.3 3390 3412 SARS-CoV-2_12_LEFT 2 + TGCAGACATTGTGGAAGAAGCT 24 | MN908947.3 3769 3794 SARS-CoV-2_12_RIGHT 2 - CAGCTAAGTAGACATTTGTGCGAAC 25 | MN908947.3 3683 3705 SARS-CoV-2_13_LEFT 1 + AGCACGAAGTTCTACTTGCACC 26 | MN908947.3 4067 4093 SARS-CoV-2_13_RIGHT 1 - GATGTCAATGTCACTAACAAGAGTGG 27 | MN908947.3 3992 4018 SARS-CoV-2_14_LEFT 2 + TGGAAGAAACTAAGTTCCTCACAGAA 28 | MN908947.3 4387 4409 SARS-CoV-2_14_RIGHT 2 - CATGTGCAAGCATTTCTCGCAA 29 | MN908947.3 4312 4339 SARS-CoV-2_15_LEFT 1 + AAAAGTGCCTTTTACATTCTACCATCT 30 | MN908947.3 4685 4710 SARS-CoV-2_15_RIGHT 1 - GCATCAGGTGAAGAAACAGAAACTG 31 | MN908947.3 4620 4648 SARS-CoV-2_16_LEFT 2 + TGTAACACATGGCTTAAATTTGGAAGAA 32 | MN908947.3 4995 5017 SARS-CoV-2_16_RIGHT 2 - CACAACTTGCGTGTGGAGGTTA 33 | MN908947.3 4923 4953 SARS-CoV-2_17_LEFT 1 + TGACAATCTTAAGACACTTCTTTCTTTGAG 34 | MN908947.3 5302 5331 SARS-CoV-2_17_RIGHT 1 - TTCAACTCTATTTGTTGGAGTGTTAACAA 35 | MN908947.3 5230 5259 SARS-CoV-2_18_LEFT 2 + TGGAAATACCCACAAGTTAATGGTTTAAC 36 | MN908947.3 5620 5643 SARS-CoV-2_18_RIGHT 2 - GCTTGTTTACCACACGTACAAGG 37 | MN908947.3 5561 5584 SARS-CoV-2_19_LEFT 1 + AAGCTGTTATGTACATGGGCACA 38 | MN908947.3 5932 5957 SARS-CoV-2_19_RIGHT 1 - TGTCCAACTTAGGGTCAATTTCTGT 39 | MN908947.3 5867 5894 SARS-CoV-2_20_LEFT 2 + ACAAAGAAAACAGTTACACAACAACCA 40 | MN908947.3 6247 6272 SARS-CoV-2_20_RIGHT 2 - ACGTGGCTTTATTAGTTGCATTGTT 41 | MN908947.3 6184 6210 SARS-CoV-2_21_LEFT 1 + CACTACACACCCTCTTTTAAGAAAGG 42 | MN908947.3 6553 6582 SARS-CoV-2_21_RIGHT 1 - GTAAGACTAGAATTGTCTACATAAGCAGC 43 | MN908947.3 6478 6507 SARS-CoV-2_22_LEFT 2 + GTAGGAGACATTATACTTAAACCAGCAAA 44 | MN908947.3 6859 6885 SARS-CoV-2_22_RIGHT 2 - CCGACACTCTTAACAGTATTCTTTGC 45 | MN908947.3 6747 6776 SARS-CoV-2_23_LEFT 1 + AAACCGTGTTTGTACTAATTATATGCCTT 46 | MN908947.3 7122 7148 SARS-CoV-2_23_RIGHT 1 - AACCACTAAGACAAACACTACAAGGT 47 | MN908947.3 7057 7084 SARS-CoV-2_24_LEFT 2 + GGTTACAGAGAAGGCTATTTGAACTCT 48 | MN908947.3 7440 7467 SARS-CoV-2_24_RIGHT 2 - ACAACATGCACATAACTTTTCCATACA 49 | MN908947.3 7381 7403 SARS-CoV-2_25_LEFT 1 + CAAATGGCCCCGATTTCAGCTA 50 | MN908947.3 7747 7770 SARS-CoV-2_25_RIGHT 1 - TGGATGGAACCATTCTTCACTGT 51 | MN908947.3 7672 7695 SARS-CoV-2_26_LEFT 2 + GCGAGAGACTTGTCACTACAGTT 52 | MN908947.3 8063 8092 SARS-CoV-2_26_RIGHT 2 - GAGTTTTTCCATTGGTACGTTAAAAGTTG 53 | MN908947.3 7997 8019 SARS-CoV-2_27_LEFT 1 + CTGATGTTGGTGATAGTGCGGA 54 | MN908947.3 8370 8395 SARS-CoV-2_27_RIGHT 1 - AGCAATGTTGTGACTTTTTGCTACC 55 | MN908947.3 8304 8326 SARS-CoV-2_28_LEFT 2 + TGAAAACATGACACCCCGTGAC 56 | MN908947.3 8691 8714 SARS-CoV-2_28_RIGHT 2 - TGACACCACCATCAATAGCCTTG 57 | MN908947.3 8596 8619 SARS-CoV-2_29_LEFT 1 + CTTGTGTTCCTTTTTGTTGCTGC 58 | MN908947.3 8990 9013 SARS-CoV-2_29_RIGHT 1 - AGCCAAAACACAAGCTGATGTTG 59 | MN908947.3 8919 8944 SARS-CoV-2_30_LEFT 2 + ACCTAGAGTTTTTAGTGCAGTTGGT 60 | MN908947.3 9306 9329 SARS-CoV-2_30_RIGHT 2 - CTACACCACAGAAAACTCCTGGT 61 | MN908947.3 9168 9192 SARS-CoV-2_31_LEFT 1 + CCTTGAAGGTTCTGTTAGAGTGGT 62 | MN908947.3 9535 9564 SARS-CoV-2_31_RIGHT 1 - AATGAGTAAACTGGTGTTAAACAGAGTAC 63 | MN908947.3 9470 9497 SARS-CoV-2_32_LEFT 2 + GAGCTTTTGGTGAATACAGTCATGTAG 64 | MN908947.3 9842 9866 SARS-CoV-2_32_RIGHT 2 - GAGGTAATAGCACATCACTACGCA 65 | MN908947.3 9782 9805 SARS-CoV-2_33_LEFT 1 + GTACTTTTGAAGAAGCTGCGCTG 66 | MN908947.3 10150 10176 SARS-CoV-2_33_RIGHT 1 - TGTCTTGGACAGTAAACTACGTCATC 67 | MN908947.3 10076 10099 SARS-CoV-2_34_LEFT 2 + TCCCATCTGGTAAAGTTGAGGGT 68 | MN908947.3 10465 10491 SARS-CoV-2_34_RIGHT 2 - CCACATGAACCATTAAGGAATGAACC 69 | MN908947.3 10393 10419 SARS-CoV-2_35_LEFT 1 + GTGTTAGCTTGTTACAATGGTTCACC 70 | MN908947.3 10785 10810 SARS-CoV-2_35_RIGHT 1 - AGGTCCTAGTATGTCAACATGGTCT 71 | MN908947.3 10713 10742 SARS-CoV-2_36_LEFT 2 + CAATCGATTTACCACAACTCTTAATGACT 72 | MN908947.3 11092 11116 SARS-CoV-2_36_RIGHT 2 - ACCCATAGCAAAAGGTAAAAAGGC 73 | MN908947.3 11000 11023 SARS-CoV-2_37_LEFT 1 + CACACCACTGGTTGTTACTCACA 74 | MN908947.3 11388 11414 SARS-CoV-2_37_RIGHT 1 - GTGTCAAGACATTCATAAGTGTCCAC 75 | MN908947.3 11305 11330 SARS-CoV-2_38_LEFT 2 + GACTGTGTTATGTATGCATCAGCTG 76 | MN908947.3 11689 11720 SARS-CoV-2_38_RIGHT 2 - CCTGTGTAGAAACTAAGTAATCATAAACACC 77 | MN908947.3 11624 11651 SARS-CoV-2_39_LEFT 1 + GCTATTTTTGTACTTGTTACTTTGGCC 78 | MN908947.3 12011 12033 SARS-CoV-2_39_RIGHT 1 - CCCTGCATGGAAAGCAAAACAG 79 | MN908947.3 11937 11963 SARS-CoV-2_40_LEFT 2 + TGTCCAGTTACACAATGACATTCTCT 80 | MN908947.3 12317 12339 SARS-CoV-2_40_RIGHT 2 - ACTTTTGCCCTCTTGTCCTCAG 81 | MN908947.3 12234 12255 SARS-CoV-2_41_LEFT 1 + ATTTGACCGTGATGCAGCCAT 82 | MN908947.3 12618 12643 SARS-CoV-2_41_RIGHT 1 - AAGAGGCCATGCTAAATTAGGTGAA 83 | MN908947.3 12519 12546 SARS-CoV-2_42_LEFT 2 + TGGTACAACATTTACTTATGCATCAGC 84 | MN908947.3 12895 12920 SARS-CoV-2_42_RIGHT 2 - TGTCTGTAACAAACCTACAAGGTGG 85 | MN908947.3 12831 12856 SARS-CoV-2_43_LEFT 1 + GGATTTGAAATGGGCTAGATTCCCT 86 | MN908947.3 13218 13240 SARS-CoV-2_43_RIGHT 1 - CGATGCACCACCAAAGGATTCT 87 | MN908947.3 13124 13148 SARS-CoV-2_44_LEFT 2 + GGGGACAACCAATCACTAATTGTG 88 | MN908947.3 13506 13528 SARS-CoV-2_44_RIGHT 2 - CATCAGTACTAGTGCCTGTGCC 89 | MN908947.3 13463 13485 SARS-CoV-2_45_LEFT 1 + TAAACGGGTTTGCGGTGTAAGT 90 | MN908947.3 13833 13859 SARS-CoV-2_45_RIGHT 1 - TCACAATTACCTTCATCAAAATGCCT 91 | MN908947.3 13752 13775 SARS-CoV-2_46_LEFT 2 + AGAATAGACGGTGACATGGTACC 92 | MN908947.3 14120 14144 SARS-CoV-2_46_RIGHT 2 - TCTACAACAGGAACTCCACTACCT 93 | MN908947.3 14045 14075 SARS-CoV-2_47_LEFT 1 + TGGTGTACTGACATTAGATAATCAAGATCT 94 | MN908947.3 14428 14457 SARS-CoV-2_47_RIGHT 1 - TGGAACACCATCAACAAATATTTTTCTCA 95 | MN908947.3 14338 14362 SARS-CoV-2_48_LEFT 2 + ACTGTTTGGATGACAGATGCATTC 96 | MN908947.3 14717 14743 SARS-CoV-2_48_RIGHT 2 - CAGAACTTCCTTCCTTAAAGAAACCC 97 | MN908947.3 14647 14674 SARS-CoV-2_49_LEFT 1 + ACAATGTTGCTTTTCAAACTGTCAAAC 98 | MN908947.3 15023 15050 SARS-CoV-2_49_RIGHT 1 - GGGATGACATTACGTTTTGTATATGCG 99 | MN908947.3 14953 14983 SARS-CoV-2_50_LEFT 2 + CATTTAATAAATGGGGTAAGGCTAGACTTT 100 | MN908947.3 15336 15358 SARS-CoV-2_50_RIGHT 2 - GAGCAAGAACAAGTGAGGCCAT 101 | MN908947.3 15214 15237 SARS-CoV-2_51_LEFT 1 + GCAAATTCTATGGTGGTTGGCAC 102 | MN908947.3 15596 15619 SARS-CoV-2_51_RIGHT 1 - GTCTGTGTTGTAAATTGCGGACA 103 | MN908947.3 15535 15557 SARS-CoV-2_52_LEFT 2 + CTGTCACGGCCAATGTTAATGC 104 | MN908947.3 15917 15941 SARS-CoV-2_52_RIGHT 2 - GGATCTGGGTAAGGAAGGTACACA 105 | MN908947.3 15855 15881 SARS-CoV-2_53_LEFT 1 + ACTAAAGGACCTCATGAATTTTGCTC 106 | MN908947.3 16239 16260 SARS-CoV-2_53_RIGHT 1 - GCAAAGAACACAAGCCCCAAC 107 | MN908947.3 16112 16137 SARS-CoV-2_54_LEFT 2 + ACATGATGAGTTAACAGGACACATG 108 | MN908947.3 16483 16508 SARS-CoV-2_54_RIGHT 2 - CCAAAAACTTGTCCATTAGCACACA 109 | MN908947.3 16386 16408 SARS-CoV-2_55_LEFT 1 + AATGCTCCAGGTTGTGATGTCA 110 | MN908947.3 16767 16796 SARS-CoV-2_55_RIGHT 1 - ACACGATAACCAGTAAAGACATAATTTCG 111 | MN908947.3 16692 16714 SARS-CoV-2_56_LEFT 2 + ACTGTACGTGAAGTGCTGTCTG 112 | MN908947.3 17082 17105 SARS-CoV-2_56_RIGHT 2 - TGACTCTTACCAGTACCAGGTGG 113 | MN908947.3 16986 17013 SARS-CoV-2_57_LEFT 1 + GGCTTATACCCAACACTCAATATCTCA 114 | MN908947.3 17381 17405 SARS-CoV-2_57_RIGHT 1 - CTGGCATTGACAACACTCAAATCA 115 | MN908947.3 17323 17345 SARS-CoV-2_58_LEFT 2 + TGCCTGAGACGACAGCAGATAT 116 | MN908947.3 17688 17711 SARS-CoV-2_58_RIGHT 2 - TGTGGCCTGTTAATTGCAGATGA 117 | MN908947.3 17615 17642 SARS-CoV-2_59_LEFT 1 + GCTTAAAGCACATAAAGACAAATCAGC 118 | MN908947.3 17997 18022 SARS-CoV-2_59_RIGHT 1 - TCCTACGTGGAATTTCAAGACTTGT 119 | MN908947.3 17911 17939 SARS-CoV-2_60_LEFT 2 + ACAGATTTAATGTTGCTATTACCAGAGC 120 | MN908947.3 18307 18328 SARS-CoV-2_60_RIGHT 2 - TAGCATGACACCCCTCGACAT 121 | MN908947.3 18244 18267 SARS-CoV-2_61_LEFT 1 + ACCCTAACATGTTTATCACCCGC 122 | MN908947.3 18624 18652 SARS-CoV-2_61_RIGHT 1 - GCTCAGGTCCTATTTTCACAAAATACTT 123 | MN908947.3 18550 18578 SARS-CoV-2_62_LEFT 2 + GTGACACACTTAAAAATCTCTCTGACAG 124 | MN908947.3 18936 18961 SARS-CoV-2_62_RIGHT 2 - CCGCATTAATCTTCAGTTCATCACC 125 | MN908947.3 18869 18891 SARS-CoV-2_63_LEFT 1 + TAGGTGTCTAGCTGTCCACGAG 126 | MN908947.3 19252 19277 SARS-CoV-2_63_RIGHT 1 - CCAGGCAAGTTAAGGTTAGATAGCA 127 | MN908947.3 19183 19208 SARS-CoV-2_64_LEFT 2 + GCCTATTTTGGAATTGCAATGTCGA 128 | MN908947.3 19558 19586 SARS-CoV-2_64_RIGHT 2 - GTATCAAATTGTTTGTAAACCCACAAGC 129 | MN908947.3 19485 19513 SARS-CoV-2_65_LEFT 1 + GTCTGTAGACATCATGCTAATGAGTACA 130 | MN908947.3 19877 19901 SARS-CoV-2_65_RIGHT 1 - GCTGGAGCATCTCTTTTGTAGTCC 131 | MN908947.3 19810 19836 SARS-CoV-2_66_LEFT 2 + AACCAGTACCAGAGGTGAAAATACTC 132 | MN908947.3 20186 20216 SARS-CoV-2_66_RIGHT 2 - TTTCTACTCTGAGTAAAGTAAGTTTCAGGT 133 | MN908947.3 20090 20117 SARS-CoV-2_67_LEFT 1 + CAAACAAGCTAGTCTTAATGGAGTCAC 134 | MN908947.3 20472 20497 SARS-CoV-2_67_RIGHT 1 - AACACACACACTTAGATGAACCTGT 135 | MN908947.3 20377 20405 SARS-CoV-2_68_LEFT 2 + GACTAGCTAAACGTTTTAAGGAATCACC 136 | MN908947.3 20766 20792 SARS-CoV-2_68_RIGHT 2 - GCGACATTCATCATTATGCCTTTAGG 137 | MN908947.3 20677 20699 SARS-CoV-2_69_LEFT 1 + CGGGTGTTGCTATGCCTAATCT 138 | MN908947.3 21050 21080 SARS-CoV-2_69_RIGHT 1 - TTTGTAACATTTTTAGTCTTAGGGTCGTAC 139 | MN908947.3 20988 21013 SARS-CoV-2_70_LEFT 2 + TTGATTGGTGATTGTGCAACTGTAC 140 | MN908947.3 21358 21387 SARS-CoV-2_70_RIGHT 2 - AGAATAGGAAGACAACTGAATTGGATTTG 141 | MN908947.3 21294 21316 SARS-CoV-2_71_LEFT 1 + GGCAAACCACGCGAACAAATAG 142 | MN908947.3 21675 21700 SARS-CoV-2_71_RIGHT 1 - TGAGGATCTGAAAACTTTGTCAGGG 143 | MN908947.3 21532 21561 SARS-CoV-2_72_LEFT 2 + GTGATGTTCTTGTTAACAACTAAACGAAC 144 | MN908947.3 21904 21933 SARS-CoV-2_72_RIGHT 2 - GTAGCGTTATTAACAATAAGTAGGGACTG 145 | MN908947.3 21865 21889 SARS-CoV-2_73_LEFT 1 + AGAGGCTGGATTTTTGGTACTACT 146 | MN908947.3 22247 22274 SARS-CoV-2_73_RIGHT 1 - ACCTAGTGATGTTAATACCTATTGGCA 147 | MN908947.3 22091 22113 SARS-CoV-2_74_LEFT 2 + TGGACCTTGAAGGAAAACAGGG 148 | MN908947.3 22474 22503 SARS-CoV-2_74_RIGHT 2 - TGATAGATTCCTTTTTCTACAGTGAAGGA 149 | MN908947.3 22402 22428 SARS-CoV-2_75_LEFT 1 + GAAAATGGAACCATTACAGATGCTGT 150 | MN908947.3 22785 22805 SARS-CoV-2_75_RIGHT 1 - TTTGCCCTGGAGCGATTTGT 151 | MN908947.3 22648 22677 SARS-CoV-2_76_LEFT 2 + GCTGATTATTCTGTCCTATATAATTCCGC 152 | MN908947.3 23028 23057 SARS-CoV-2_76_RIGHT 2 - GTTGGAAACCATATGATTGTAAAGGAAAG 153 | MN908947.3 22944 22974 SARS-CoV-2_77_LEFT 1 + CAAACCTTTTGAGAGAGATATTTCAACTGA 154 | MN908947.3 23327 23351 SARS-CoV-2_77_RIGHT 1 - CACTGACACCACCAAAAGAACATG 155 | MN908947.3 23219 23246 SARS-CoV-2_78_LEFT 2 + CTGAGTCTAACAAAAAGTTTCTGCCTT 156 | MN908947.3 23611 23635 SARS-CoV-2_78_RIGHT 2 - GGATTGACTAGCTACACTACGTGC 157 | MN908947.3 23553 23575 SARS-CoV-2_79_LEFT 1 + ACCCATTGGTGCAGGTATATGC 158 | MN908947.3 23927 23955 SARS-CoV-2_79_RIGHT 1 - CCAAAATCTTTAATTGGTGGTGTTTTGT 159 | MN908947.3 23853 23876 SARS-CoV-2_80_LEFT 2 + CCGTGCTTTAACTGGAATAGCTG 160 | MN908947.3 24233 24258 SARS-CoV-2_80_RIGHT 2 - GCAAATGGTATTTGTAATGCAGCAC 161 | MN908947.3 24171 24194 SARS-CoV-2_81_LEFT 1 + TGCTCAATACACTTCTGCACTGT 162 | MN908947.3 24545 24567 SARS-CoV-2_81_RIGHT 1 - TGAAGTCTGCCTGTGATCAACC 163 | MN908947.3 24426 24448 SARS-CoV-2_82_LEFT 2 + TGCACAAGCTTTAAACACGCTT 164 | MN908947.3 24814 24836 SARS-CoV-2_82_RIGHT 2 - CACGAGGAAAGTGTGCTTTTCC 165 | MN908947.3 24750 24772 SARS-CoV-2_83_LEFT 1 + GCATGTGACTTATGTCCCTGCA 166 | MN908947.3 25122 25150 SARS-CoV-2_83_RIGHT 1 - AGATTCATTTAAATTCTTGGCAACCTCA 167 | MN908947.3 25051 25076 SARS-CoV-2_84_LEFT 2 + GTTGATTTAGGTGACATCTCTGGCA 168 | MN908947.3 25438 25461 SARS-CoV-2_84_RIGHT 2 - AGCATCCTTGATTTCACCTTGCT 169 | MN908947.3 25331 25353 SARS-CoV-2_85_LEFT 1 + ATGAAGACGACTCTGAGCCAGT 170 | MN908947.3 25711 25740 SARS-CoV-2_85_RIGHT 1 - CTGCAAGAAGTAGACTAAAGCATAAAGAT 171 | MN908947.3 25645 25672 SARS-CoV-2_86_LEFT 2 + TGTTGTTTGTAACAGTTTACTCACACC 172 | MN908947.3 26026 26050 SARS-CoV-2_86_RIGHT 2 - TCAATTGAGTTGAGTACAGCTGGT 173 | MN908947.3 25951 25979 SARS-CoV-2_87_LEFT 1 + GTGGTTATACTGAAAAATGGGAATCTGG 174 | MN908947.3 26338 26360 SARS-CoV-2_87_RIGHT 1 - AATCGAAGCGCAGTAAGGATGG 175 | MN908947.3 26255 26277 SARS-CoV-2_88_LEFT 2 + CGTTTCGGAAGAGACAGGTACG 176 | MN908947.3 26635 26661 SARS-CoV-2_88_RIGHT 2 - ACAAAAACCTATTCCTGTTGGCATAG 177 | MN908947.3 26564 26587 SARS-CoV-2_89_LEFT 1 + AAGCTCCTTGAACAATGGAACCT 178 | MN908947.3 26956 26979 SARS-CoV-2_89_RIGHT 1 - CAGCAATACGAAGATGTCCACGA 179 | MN908947.3 26873 26895 SARS-CoV-2_90_LEFT 2 + ATTCTTCTCAACGTGCCACTCC 180 | MN908947.3 27256 27283 SARS-CoV-2_90_RIGHT 2 - TCCAAATGGAAACTTTAAAAGTCCTCA 181 | MN908947.3 27152 27177 SARS-CoV-2_91_LEFT 1 + TCCAGTAGCAGTGACAATATTGCTT 182 | MN908947.3 27534 27560 SARS-CoV-2_91_RIGHT 1 - AGTGCAAATTTGTTATCAGCTAGAGG 183 | MN908947.3 27447 27473 SARS-CoV-2_92_LEFT 2 + CACTACCAAGAGTGTGTTAGAGGTAC 184 | MN908947.3 27826 27855 SARS-CoV-2_92_RIGHT 2 - GTTCAAGTGAGAACCAAAAGATAATAAGC 185 | MN908947.3 27700 27726 SARS-CoV-2_93_LEFT 1 + TTGTTGCGGCAATAGTGTTTATAACA 186 | MN908947.3 28082 28104 SARS-CoV-2_93_RIGHT 1 - TGGGTGATTTAGAACCAGCCTC 187 | MN908947.3 27996 28021 SARS-CoV-2_94_LEFT 2 + ACCCGTGTCCTATTCACTTCTATTC 188 | MN908947.3 28394 28416 SARS-CoV-2_94_RIGHT 2 - TTATTGGGTAAACCTTGGGGCC 189 | MN908947.3 28190 28214 SARS-CoV-2_95_LEFT 1 + GTGCGTTGTTCGTTCTATGAAGAC 190 | MN908947.3 28572 28598 SARS-CoV-2_95_RIGHT 1 - ACCATCTTGGACTGAGATCTTTCATT 191 | MN908947.3 28512 28536 SARS-CoV-2_96_LEFT 2 + AGATGACCAAATTGGCTACTACCG 192 | MN908947.3 28893 28914 SARS-CoV-2_96_RIGHT 2 - CCATTGCCAGCCATTCTAGCA 193 | MN908947.3 28827 28849 SARS-CoV-2_97_LEFT 1 + TTCCTCATCACGTAGTCGCAAC 194 | MN908947.3 29206 29227 SARS-CoV-2_97_RIGHT 1 - CGACATTCCGAAGAACGCTGA 195 | MN908947.3 29136 29161 SARS-CoV-2_98_LEFT 2 + CCAGGAACTAATCAGACAAGGAACT 196 | MN908947.3 29512 29534 SARS-CoV-2_98_RIGHT 2 - TTTAGGCCTGAGTTGAGTCAGC 197 | MN908947.3 29452 29475 SARS-CoV-2_99_LEFT 1 + CTTCTTCCTGCTGCAGATTTGGA 198 | MN908947.3 29827 29854 SARS-CoV-2_99_RIGHT 1 - GCTATTAAAATCACATGGGGATAGCAC 199 | -------------------------------------------------------------------------------- /primers/artic_v4_1.bed: -------------------------------------------------------------------------------- 1 | MN908947.3 25 50 SARS-CoV-2_1_LEFT 1 + AACAAACCAACCAACTTTCGATCTC 2 | MN908947.3 324 344 SARS-CoV-2_2_LEFT 2 + TTTACAGGTTCGCGACGTGC 3 | MN908947.3 408 431 SARS-CoV-2_1_RIGHT 1 - CTTCTACTAAGCCACAAGTGCCA 4 | MN908947.3 644 666 SARS-CoV-2_3_LEFT 1 + GTAATAAAGGAGCTGGTGGCCA 5 | MN908947.3 705 727 SARS-CoV-2_2_RIGHT 2 - ATAAGGATCAGTGCCAAGCTCG 6 | MN908947.3 944 966 SARS-CoV-2_4_LEFT 2 + GTGTATACTGCTGCCGTGAACA 7 | MN908947.3 1017 1044 SARS-CoV-2_3_RIGHT 1 - GCCAATTTAATTTCAAAAGGTGTCTGC 8 | MN908947.3 1245 1266 SARS-CoV-2_5_LEFT 1 + TGAAACTTCATGGCAGACGGG 9 | MN908947.3 1337 1362 SARS-CoV-2_4_RIGHT 2 - ACAACAGCATTTTGGGGTAAGTAAC 10 | MN908947.3 1540 1562 SARS-CoV-2_6_LEFT 2 + CGTGCTAGCGCTAACATAGGTT 11 | MN908947.3 1623 1650 SARS-CoV-2_5_RIGHT 1 - TTGATGTTGACTTTCTCTTTTTGGAGT 12 | MN908947.3 1851 1875 SARS-CoV-2_7_LEFT 1 + ACTGAGTCCTCTTTATGCATTTGC 13 | MN908947.3 1925 1948 SARS-CoV-2_6_RIGHT 2 - AACACGCACAGAATTTTGAGCAG 14 | MN908947.3 2154 2180 SARS-CoV-2_8_LEFT 2 + GCTTGAAGAGAAGTTTAAGGAAGGTG 15 | MN908947.3 2228 2250 SARS-CoV-2_7_RIGHT 1 - CCACCGACAATTTCACAAGCAC 16 | MN908947.3 2483 2508 SARS-CoV-2_9_LEFT 1 + TCTTCTTAGAGGGAGAAACACTTCC 17 | MN908947.3 2544 2571 SARS-CoV-2_8_RIGHT 2 - GGTTGTTCTAATGGTTGTAAATCACCA 18 | MN908947.3 2780 2813 SARS-CoV-2_10_LEFT_alt1 2 + TGAATATCACTTTTGAACTTGATGAAAGGATTG 19 | MN908947.3 2826 2850 SARS-CoV-2_10_LEFT 2 + TGAGAAGTGCTCTGCCTATACAGT 20 | MN908947.3 2861 2885 SARS-CoV-2_9_RIGHT 1 - CACAGGCGAACTCATTTACTTCTG 21 | MN908947.3 3078 3102 SARS-CoV-2_11_LEFT 1 + AGAAGAGTTTGAGCCATCAACTCA 22 | MN908947.3 3156 3177 SARS-CoV-2_10_RIGHT_alt1 2 - GGTTGAAGAGCAGCAGAAGTG 23 | MN908947.3 3183 3210 SARS-CoV-2_10_RIGHT 2 - TCATCTAACCAATCTTCTTCTTGCTCT 24 | MN908947.3 3390 3412 SARS-CoV-2_12_LEFT 2 + TGCAGACATTGTGGAAGAAGCT 25 | MN908947.3 3470 3492 SARS-CoV-2_11_RIGHT 1 - TTTAAGGCTCCTGCAACACCTC 26 | MN908947.3 3683 3705 SARS-CoV-2_13_LEFT 1 + AGCACGAAGTTCTACTTGCACC 27 | MN908947.3 3769 3794 SARS-CoV-2_12_RIGHT 2 - CAGCTAAGTAGACATTTGTGCGAAC 28 | MN908947.3 3992 4018 SARS-CoV-2_14_LEFT 2 + TGGAAGAAACTAAGTTCCTCACAGAA 29 | MN908947.3 4067 4093 SARS-CoV-2_13_RIGHT 1 - GATGTCAATGTCACTAACAAGAGTGG 30 | MN908947.3 4312 4339 SARS-CoV-2_15_LEFT 1 + AAAAGTGCCTTTTACATTCTACCATCT 31 | MN908947.3 4387 4409 SARS-CoV-2_14_RIGHT 2 - CATGTGCAAGCATTTCTCGCAA 32 | MN908947.3 4620 4648 SARS-CoV-2_16_LEFT 2 + TGTAACACATGGCTTAAATTTGGAAGAA 33 | MN908947.3 4685 4710 SARS-CoV-2_15_RIGHT 1 - GCATCAGGTGAAGAAACAGAAACTG 34 | MN908947.3 4923 4953 SARS-CoV-2_17_LEFT 1 + TGACAATCTTAAGACACTTCTTTCTTTGAG 35 | MN908947.3 4995 5017 SARS-CoV-2_16_RIGHT 2 - CACAACTTGCGTGTGGAGGTTA 36 | MN908947.3 5230 5259 SARS-CoV-2_18_LEFT 2 + TGGAAATACCCACAAGTTAATGGTTTAAC 37 | MN908947.3 5302 5331 SARS-CoV-2_17_RIGHT 1 - TTCAACTCTATTTGTTGGAGTGTTAACAA 38 | MN908947.3 5561 5584 SARS-CoV-2_19_LEFT 1 + AAGCTGTTATGTACATGGGCACA 39 | MN908947.3 5620 5643 SARS-CoV-2_18_RIGHT 2 - GCTTGTTTACCACACGTACAAGG 40 | MN908947.3 5867 5894 SARS-CoV-2_20_LEFT 2 + ACAAAGAAAACAGTTACACAACAACCA 41 | MN908947.3 5932 5957 SARS-CoV-2_19_RIGHT 1 - TGTCCAACTTAGGGTCAATTTCTGT 42 | MN908947.3 6184 6210 SARS-CoV-2_21_LEFT 1 + CACTACACACCCTCTTTTAAGAAAGG 43 | MN908947.3 6247 6272 SARS-CoV-2_20_RIGHT 2 - ACGTGGCTTTATTAGTTGCATTGTT 44 | MN908947.3 6478 6507 SARS-CoV-2_22_LEFT 2 + GTAGGAGACATTATACTTAAACCAGCAAA 45 | MN908947.3 6553 6582 SARS-CoV-2_21_RIGHT 1 - GTAAGACTAGAATTGTCTACATAAGCAGC 46 | MN908947.3 6747 6776 SARS-CoV-2_23_LEFT 1 + AAACCGTGTTTGTACTAATTATATGCCTT 47 | MN908947.3 6859 6885 SARS-CoV-2_22_RIGHT 2 - CCGACACTCTTAACAGTATTCTTTGC 48 | MN908947.3 7057 7084 SARS-CoV-2_24_LEFT 2 + GGTTACAGAGAAGGCTATTTGAACTCT 49 | MN908947.3 7122 7148 SARS-CoV-2_23_RIGHT 1 - AACCACTAAGACAAACACTACAAGGT 50 | MN908947.3 7127 7156 SARS-CoV-2_23_RIGHT_alt1 1 - AGAATCTAAACCACTAAGACAAACACTAC 51 | MN908947.3 7381 7403 SARS-CoV-2_25_LEFT 1 + CAAATGGCCCCGATTTCAGCTA 52 | MN908947.3 7440 7467 SARS-CoV-2_24_RIGHT 2 - ACAACATGCACATAACTTTTCCATACA 53 | MN908947.3 7672 7695 SARS-CoV-2_26_LEFT 2 + GCGAGAGACTTGTCACTACAGTT 54 | MN908947.3 7747 7770 SARS-CoV-2_25_RIGHT 1 - TGGATGGAACCATTCTTCACTGT 55 | MN908947.3 7997 8019 SARS-CoV-2_27_LEFT 1 + CTGATGTTGGTGATAGTGCGGA 56 | MN908947.3 8063 8092 SARS-CoV-2_26_RIGHT 2 - GAGTTTTTCCATTGGTACGTTAAAAGTTG 57 | MN908947.3 8304 8326 SARS-CoV-2_28_LEFT 2 + TGAAAACATGACACCCCGTGAC 58 | MN908947.3 8367 8392 SARS-CoV-2_27_RIGHT_alt1 1 - AATGTTGTGACTTTTTGCTACCTGC 59 | MN908947.3 8370 8395 SARS-CoV-2_27_RIGHT 1 - AGCAATGTTGTGACTTTTTGCTACC 60 | MN908947.3 8596 8619 SARS-CoV-2_29_LEFT 1 + CTTGTGTTCCTTTTTGTTGCTGC 61 | MN908947.3 8691 8714 SARS-CoV-2_28_RIGHT 2 - TGACACCACCATCAATAGCCTTG 62 | MN908947.3 8919 8944 SARS-CoV-2_30_LEFT 2 + ACCTAGAGTTTTTAGTGCAGTTGGT 63 | MN908947.3 8990 9013 SARS-CoV-2_29_RIGHT 1 - AGCCAAAACACAAGCTGATGTTG 64 | MN908947.3 9168 9192 SARS-CoV-2_31_LEFT 1 + CCTTGAAGGTTCTGTTAGAGTGGT 65 | MN908947.3 9306 9329 SARS-CoV-2_30_RIGHT 2 - CTACACCACAGAAAACTCCTGGT 66 | MN908947.3 9470 9497 SARS-CoV-2_32_LEFT 2 + GAGCTTTTGGTGAATACAGTCATGTAG 67 | MN908947.3 9535 9564 SARS-CoV-2_31_RIGHT 1 - AATGAGTAAACTGGTGTTAAACAGAGTAC 68 | MN908947.3 9782 9805 SARS-CoV-2_33_LEFT 1 + GTACTTTTGAAGAAGCTGCGCTG 69 | MN908947.3 9842 9866 SARS-CoV-2_32_RIGHT 2 - GAGGTAATAGCACATCACTACGCA 70 | MN908947.3 10076 10099 SARS-CoV-2_34_LEFT 2 + TCCCATCTGGTAAAGTTGAGGGT 71 | MN908947.3 10150 10176 SARS-CoV-2_33_RIGHT 1 - TGTCTTGGACAGTAAACTACGTCATC 72 | MN908947.3 10393 10419 SARS-CoV-2_35_LEFT 1 + GTGTTAGCTTGTTACAATGGTTCACC 73 | MN908947.3 10465 10491 SARS-CoV-2_34_RIGHT 2 - CCACATGAACCATTAAGGAATGAACC 74 | MN908947.3 10713 10742 SARS-CoV-2_36_LEFT 2 + CAATCGATTTACCACAACTCTTAATGACT 75 | MN908947.3 10785 10810 SARS-CoV-2_35_RIGHT 1 - AGGTCCTAGTATGTCAACATGGTCT 76 | MN908947.3 11000 11023 SARS-CoV-2_37_LEFT 1 + CACACCACTGGTTGTTACTCACA 77 | MN908947.3 11092 11116 SARS-CoV-2_36_RIGHT 2 - ACCCATAGCAAAAGGTAAAAAGGC 78 | MN908947.3 11305 11330 SARS-CoV-2_38_LEFT 2 + GACTGTGTTATGTATGCATCAGCTG 79 | MN908947.3 11388 11414 SARS-CoV-2_37_RIGHT 1 - GTGTCAAGACATTCATAAGTGTCCAC 80 | MN908947.3 11624 11651 SARS-CoV-2_39_LEFT 1 + GCTATTTTTGTACTTGTTACTTTGGCC 81 | MN908947.3 11689 11720 SARS-CoV-2_38_RIGHT 2 - CCTGTGTAGAAACTAAGTAATCATAAACACC 82 | MN908947.3 11937 11963 SARS-CoV-2_40_LEFT 2 + TGTCCAGTTACACAATGACATTCTCT 83 | MN908947.3 12011 12033 SARS-CoV-2_39_RIGHT 1 - CCCTGCATGGAAAGCAAAACAG 84 | MN908947.3 12234 12255 SARS-CoV-2_41_LEFT 1 + ATTTGACCGTGATGCAGCCAT 85 | MN908947.3 12317 12339 SARS-CoV-2_40_RIGHT 2 - ACTTTTGCCCTCTTGTCCTCAG 86 | MN908947.3 12519 12546 SARS-CoV-2_42_LEFT 2 + TGGTACAACATTTACTTATGCATCAGC 87 | MN908947.3 12618 12643 SARS-CoV-2_41_RIGHT 1 - AAGAGGCCATGCTAAATTAGGTGAA 88 | MN908947.3 12831 12856 SARS-CoV-2_43_LEFT 1 + GGATTTGAAATGGGCTAGATTCCCT 89 | MN908947.3 12895 12920 SARS-CoV-2_42_RIGHT 2 - TGTCTGTAACAAACCTACAAGGTGG 90 | MN908947.3 13124 13148 SARS-CoV-2_44_LEFT 2 + GGGGACAACCAATCACTAATTGTG 91 | MN908947.3 13218 13240 SARS-CoV-2_43_RIGHT 1 - CGATGCACCACCAAAGGATTCT 92 | MN908947.3 13463 13485 SARS-CoV-2_45_LEFT 1 + TAAACGGGTTTGCGGTGTAAGT 93 | MN908947.3 13506 13528 SARS-CoV-2_44_RIGHT 2 - CATCAGTACTAGTGCCTGTGCC 94 | MN908947.3 13752 13775 SARS-CoV-2_46_LEFT 2 + AGAATAGACGGTGACATGGTACC 95 | MN908947.3 13833 13859 SARS-CoV-2_45_RIGHT 1 - TCACAATTACCTTCATCAAAATGCCT 96 | MN908947.3 14045 14075 SARS-CoV-2_47_LEFT 1 + TGGTGTACTGACATTAGATAATCAAGATCT 97 | MN908947.3 14120 14144 SARS-CoV-2_46_RIGHT 2 - TCTACAACAGGAACTCCACTACCT 98 | MN908947.3 14338 14362 SARS-CoV-2_48_LEFT 2 + ACTGTTTGGATGACAGATGCATTC 99 | MN908947.3 14428 14457 SARS-CoV-2_47_RIGHT 1 - TGGAACACCATCAACAAATATTTTTCTCA 100 | MN908947.3 14647 14674 SARS-CoV-2_49_LEFT 1 + ACAATGTTGCTTTTCAAACTGTCAAAC 101 | MN908947.3 14717 14743 SARS-CoV-2_48_RIGHT 2 - CAGAACTTCCTTCCTTAAAGAAACCC 102 | MN908947.3 14953 14983 SARS-CoV-2_50_LEFT 2 + CATTTAATAAATGGGGTAAGGCTAGACTTT 103 | MN908947.3 15023 15050 SARS-CoV-2_49_RIGHT 1 - GGGATGACATTACGTTTTGTATATGCG 104 | MN908947.3 15214 15237 SARS-CoV-2_51_LEFT 1 + GCAAATTCTATGGTGGTTGGCAC 105 | MN908947.3 15336 15358 SARS-CoV-2_50_RIGHT 2 - GAGCAAGAACAAGTGAGGCCAT 106 | MN908947.3 15535 15557 SARS-CoV-2_52_LEFT 2 + CTGTCACGGCCAATGTTAATGC 107 | MN908947.3 15596 15619 SARS-CoV-2_51_RIGHT 1 - GTCTGTGTTGTAAATTGCGGACA 108 | MN908947.3 15855 15881 SARS-CoV-2_53_LEFT 1 + ACTAAAGGACCTCATGAATTTTGCTC 109 | MN908947.3 15917 15941 SARS-CoV-2_52_RIGHT 2 - GGATCTGGGTAAGGAAGGTACACA 110 | MN908947.3 16112 16137 SARS-CoV-2_54_LEFT 2 + ACATGATGAGTTAACAGGACACATG 111 | MN908947.3 16239 16260 SARS-CoV-2_53_RIGHT 1 - GCAAAGAACACAAGCCCCAAC 112 | MN908947.3 16386 16408 SARS-CoV-2_55_LEFT 1 + AATGCTCCAGGTTGTGATGTCA 113 | MN908947.3 16483 16508 SARS-CoV-2_54_RIGHT 2 - CCAAAAACTTGTCCATTAGCACACA 114 | MN908947.3 16692 16714 SARS-CoV-2_56_LEFT 2 + ACTGTACGTGAAGTGCTGTCTG 115 | MN908947.3 16767 16796 SARS-CoV-2_55_RIGHT 1 - ACACGATAACCAGTAAAGACATAATTTCG 116 | MN908947.3 16986 17013 SARS-CoV-2_57_LEFT 1 + GGCTTATACCCAACACTCAATATCTCA 117 | MN908947.3 17082 17105 SARS-CoV-2_56_RIGHT 2 - TGACTCTTACCAGTACCAGGTGG 118 | MN908947.3 17323 17345 SARS-CoV-2_58_LEFT 2 + TGCCTGAGACGACAGCAGATAT 119 | MN908947.3 17381 17405 SARS-CoV-2_57_RIGHT 1 - CTGGCATTGACAACACTCAAATCA 120 | MN908947.3 17615 17642 SARS-CoV-2_59_LEFT 1 + GCTTAAAGCACATAAAGACAAATCAGC 121 | MN908947.3 17688 17711 SARS-CoV-2_58_RIGHT 2 - TGTGGCCTGTTAATTGCAGATGA 122 | MN908947.3 17911 17939 SARS-CoV-2_60_LEFT 2 + ACAGATTTAATGTTGCTATTACCAGAGC 123 | MN908947.3 17997 18022 SARS-CoV-2_59_RIGHT 1 - TCCTACGTGGAATTTCAAGACTTGT 124 | MN908947.3 18244 18267 SARS-CoV-2_61_LEFT 1 + ACCCTAACATGTTTATCACCCGC 125 | MN908947.3 18307 18328 SARS-CoV-2_60_RIGHT 2 - TAGCATGACACCCCTCGACAT 126 | MN908947.3 18550 18578 SARS-CoV-2_62_LEFT 2 + GTGACACACTTAAAAATCTCTCTGACAG 127 | MN908947.3 18624 18652 SARS-CoV-2_61_RIGHT 1 - GCTCAGGTCCTATTTTCACAAAATACTT 128 | MN908947.3 18869 18891 SARS-CoV-2_63_LEFT 1 + TAGGTGTCTAGCTGTCCACGAG 129 | MN908947.3 18936 18961 SARS-CoV-2_62_RIGHT 2 - CCGCATTAATCTTCAGTTCATCACC 130 | MN908947.3 19183 19208 SARS-CoV-2_64_LEFT 2 + GCCTATTTTGGAATTGCAATGTCGA 131 | MN908947.3 19252 19277 SARS-CoV-2_63_RIGHT 1 - CCAGGCAAGTTAAGGTTAGATAGCA 132 | MN908947.3 19485 19513 SARS-CoV-2_65_LEFT 1 + GTCTGTAGACATCATGCTAATGAGTACA 133 | MN908947.3 19558 19586 SARS-CoV-2_64_RIGHT 2 - GTATCAAATTGTTTGTAAACCCACAAGC 134 | MN908947.3 19810 19836 SARS-CoV-2_66_LEFT 2 + AACCAGTACCAGAGGTGAAAATACTC 135 | MN908947.3 19877 19901 SARS-CoV-2_65_RIGHT 1 - GCTGGAGCATCTCTTTTGTAGTCC 136 | MN908947.3 20090 20117 SARS-CoV-2_67_LEFT 1 + CAAACAAGCTAGTCTTAATGGAGTCAC 137 | MN908947.3 20186 20216 SARS-CoV-2_66_RIGHT 2 - TTTCTACTCTGAGTAAAGTAAGTTTCAGGT 138 | MN908947.3 20377 20405 SARS-CoV-2_68_LEFT 2 + GACTAGCTAAACGTTTTAAGGAATCACC 139 | MN908947.3 20472 20497 SARS-CoV-2_67_RIGHT 1 - AACACACACACTTAGATGAACCTGT 140 | MN908947.3 20677 20699 SARS-CoV-2_69_LEFT 1 + CGGGTGTTGCTATGCCTAATCT 141 | MN908947.3 20766 20792 SARS-CoV-2_68_RIGHT 2 - GCGACATTCATCATTATGCCTTTAGG 142 | MN908947.3 20988 21013 SARS-CoV-2_70_LEFT 2 + TTGATTGGTGATTGTGCAACTGTAC 143 | MN908947.3 21050 21080 SARS-CoV-2_69_RIGHT 1 - TTTGTAACATTTTTAGTCTTAGGGTCGTAC 144 | MN908947.3 21294 21316 SARS-CoV-2_71_LEFT 1 + GGCAAACCACGCGAACAAATAG 145 | MN908947.3 21358 21387 SARS-CoV-2_70_RIGHT 2 - AGAATAGGAAGACAACTGAATTGGATTTG 146 | MN908947.3 21532 21561 SARS-CoV-2_72_LEFT 2 + GTGATGTTCTTGTTAACAACTAAACGAAC 147 | MN908947.3 21675 21700 SARS-CoV-2_71_RIGHT 1 - TGAGGATCTGAAAACTTTGTCAGGG 148 | MN908947.3 21865 21889 SARS-CoV-2_73_LEFT 1 + AGAGGCTGGATTTTTGGTACTACT 149 | MN908947.3 21904 21933 SARS-CoV-2_72_RIGHT 2 - GTAGCGTTATTAACAATAAGTAGGGACTG 150 | MN908947.3 22091 22113 SARS-CoV-2_74_LEFT 2 + TGGACCTTGAAGGAAAACAGGG 151 | MN908947.3 22247 22274 SARS-CoV-2_73_RIGHT 1 - ACCTAGTGATGTTAATACCTATTGGCA 152 | MN908947.3 22402 22428 SARS-CoV-2_75_LEFT 1 + GAAAATGGAACCATTACAGATGCTGT 153 | MN908947.3 22474 22503 SARS-CoV-2_74_RIGHT 2 - TGATAGATTCCTTTTTCTACAGTGAAGGA 154 | MN908947.3 22648 22677 SARS-CoV-2_76_LEFT 2 + GCTGATTATTCTGTCCTATATAATTCCGC 155 | MN908947.3 22742 22774 SARS-CoV-2_76_LEFT_alt1 2 + ATGTCTATGCAGATTCATTTGTAATTAGAGGT 156 | MN908947.3 22785 22805 SARS-CoV-2_75_RIGHT 1 - TTTGCCCTGGAGCGATTTGT 157 | MN908947.3 22944 22974 SARS-CoV-2_77_LEFT 1 + CAAACCTTTTGAGAGAGATATTTCAACTGA 158 | MN908947.3 23028 23057 SARS-CoV-2_76_RIGHT 2 - GTTGGAAACCATATGATTGTAAAGGAAAG 159 | MN908947.3 23120 23141 SARS-CoV-2_76_RIGHT_alt1 2 - GTCCACAAACAGTTGCTGGTG 160 | MN908947.3 23219 23246 SARS-CoV-2_78_LEFT 2 + CTGAGTCTAACAAAAAGTTTCTGCCTT 161 | MN908947.3 23327 23351 SARS-CoV-2_77_RIGHT 1 - CACTGACACCACCAAAAGAACATG 162 | MN908947.3 23553 23575 SARS-CoV-2_79_LEFT 1 + ACCCATTGGTGCAGGTATATGC 163 | MN908947.3 23611 23635 SARS-CoV-2_78_RIGHT 2 - GGATTGACTAGCTACACTACGTGC 164 | MN908947.3 23853 23876 SARS-CoV-2_80_LEFT 2 + CCGTGCTTTAACTGGAATAGCTG 165 | MN908947.3 23914 23944 SARS-CoV-2_79_RIGHT_alt1 1 - AATTGGTGGTGTTTTGTAAATTTGTTTGAC 166 | MN908947.3 23927 23955 SARS-CoV-2_79_RIGHT 1 - CCAAAATCTTTAATTGGTGGTGTTTTGT 167 | MN908947.3 24171 24194 SARS-CoV-2_81_LEFT 1 + TGCTCAATACACTTCTGCACTGT 168 | MN908947.3 24233 24258 SARS-CoV-2_80_RIGHT 2 - GCAAATGGTATTTGTAATGCAGCAC 169 | MN908947.3 24426 24448 SARS-CoV-2_82_LEFT 2 + TGCACAAGCTTTAAACACGCTT 170 | MN908947.3 24545 24567 SARS-CoV-2_81_RIGHT 1 - TGAAGTCTGCCTGTGATCAACC 171 | MN908947.3 24750 24772 SARS-CoV-2_83_LEFT 1 + GCATGTGACTTATGTCCCTGCA 172 | MN908947.3 24814 24836 SARS-CoV-2_82_RIGHT 2 - CACGAGGAAAGTGTGCTTTTCC 173 | MN908947.3 25051 25076 SARS-CoV-2_84_LEFT 2 + GTTGATTTAGGTGACATCTCTGGCA 174 | MN908947.3 25122 25150 SARS-CoV-2_83_RIGHT 1 - AGATTCATTTAAATTCTTGGCAACCTCA 175 | MN908947.3 25331 25353 SARS-CoV-2_85_LEFT 1 + ATGAAGACGACTCTGAGCCAGT 176 | MN908947.3 25438 25461 SARS-CoV-2_84_RIGHT 2 - AGCATCCTTGATTTCACCTTGCT 177 | MN908947.3 25645 25672 SARS-CoV-2_86_LEFT 2 + TGTTGTTTGTAACAGTTTACTCACACC 178 | MN908947.3 25711 25740 SARS-CoV-2_85_RIGHT 1 - CTGCAAGAAGTAGACTAAAGCATAAAGAT 179 | MN908947.3 25951 25979 SARS-CoV-2_87_LEFT 1 + GTGGTTATACTGAAAAATGGGAATCTGG 180 | MN908947.3 26026 26050 SARS-CoV-2_86_RIGHT 2 - TCAATTGAGTTGAGTACAGCTGGT 181 | MN908947.3 26242 26268 SARS-CoV-2_88_LEFT_alt1 2 + TTATGTACTCATTCGTTTCGGAAGAG 182 | MN908947.3 26255 26277 SARS-CoV-2_88_LEFT 2 + CGTTTCGGAAGAGACAGGTACG 183 | MN908947.3 26338 26360 SARS-CoV-2_87_RIGHT 1 - AATCGAAGCGCAGTAAGGATGG 184 | MN908947.3 26564 26587 SARS-CoV-2_89_LEFT 1 + AAGCTCCTTGAACAATGGAACCT 185 | MN908947.3 26592 26621 SARS-CoV-2_89_LEFT_alt1 1 + TAGGTTTCCTATTCCTTACATGGATTTGT 186 | MN908947.3 26635 26661 SARS-CoV-2_88_RIGHT 2 - ACAAAAACCTATTCCTGTTGGCATAG 187 | MN908947.3 26873 26895 SARS-CoV-2_90_LEFT 2 + ATTCTTCTCAACGTGCCACTCC 188 | MN908947.3 26956 26979 SARS-CoV-2_89_RIGHT 1 - CAGCAATACGAAGATGTCCACGA 189 | MN908947.3 26966 26991 SARS-CoV-2_89_RIGHT_alt1 1 - CTAGATGGTGTCCAGCAATACGAAG 190 | MN908947.3 27152 27177 SARS-CoV-2_91_LEFT 1 + TCCAGTAGCAGTGACAATATTGCTT 191 | MN908947.3 27218 27251 SARS-CoV-2_90_RIGHT_alt1 2 - ATTAGTAATATCTCTGCTATAGTAACCTGAAAG 192 | MN908947.3 27256 27283 SARS-CoV-2_90_RIGHT 2 - TCCAAATGGAAACTTTAAAAGTCCTCA 193 | MN908947.3 27447 27473 SARS-CoV-2_92_LEFT 2 + CACTACCAAGAGTGTGTTAGAGGTAC 194 | MN908947.3 27534 27560 SARS-CoV-2_91_RIGHT 1 - AGTGCAAATTTGTTATCAGCTAGAGG 195 | MN908947.3 27700 27726 SARS-CoV-2_93_LEFT 1 + TTGTTGCGGCAATAGTGTTTATAACA 196 | MN908947.3 27826 27855 SARS-CoV-2_92_RIGHT 2 - GTTCAAGTGAGAACCAAAAGATAATAAGC 197 | MN908947.3 27996 28021 SARS-CoV-2_94_LEFT 2 + ACCCGTGTCCTATTCACTTCTATTC 198 | MN908947.3 28082 28104 SARS-CoV-2_93_RIGHT 1 - TGGGTGATTTAGAACCAGCCTC 199 | MN908947.3 28190 28214 SARS-CoV-2_95_LEFT 1 + GTGCGTTGTTCGTTCTATGAAGAC 200 | MN908947.3 28394 28416 SARS-CoV-2_94_RIGHT 2 - TTATTGGGTAAACCTTGGGGCC 201 | MN908947.3 28512 28536 SARS-CoV-2_96_LEFT 2 + AGATGACCAAATTGGCTACTACCG 202 | MN908947.3 28572 28598 SARS-CoV-2_95_RIGHT 1 - ACCATCTTGGACTGAGATCTTTCATT 203 | MN908947.3 28827 28849 SARS-CoV-2_97_LEFT 1 + TTCCTCATCACGTAGTCGCAAC 204 | MN908947.3 28893 28914 SARS-CoV-2_96_RIGHT 2 - CCATTGCCAGCCATTCTAGCA 205 | MN908947.3 29136 29161 SARS-CoV-2_98_LEFT 2 + CCAGGAACTAATCAGACAAGGAACT 206 | MN908947.3 29206 29227 SARS-CoV-2_97_RIGHT 1 - CGACATTCCGAAGAACGCTGA 207 | MN908947.3 29452 29475 SARS-CoV-2_99_LEFT 1 + CTTCTTCCTGCTGCAGATTTGGA 208 | MN908947.3 29512 29534 SARS-CoV-2_98_RIGHT 2 - TTTAGGCCTGAGTTGAGTCAGC 209 | MN908947.3 29827 29854 SARS-CoV-2_99_RIGHT 1 - GCTATTAAAATCACATGGGGATAGCAC 210 | -------------------------------------------------------------------------------- /reference.fasta: -------------------------------------------------------------------------------- 1 | >MN908947 (Wuhan-Hu-1/2019) 2 | ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCT 3 | GTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACT 4 | CACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATC 5 | TTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT 6 | CGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC 7 | ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGG 8 | AGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG 9 | CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAA 10 | ACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACT 11 | CGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG 12 | CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGG 13 | TGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGA 14 | TCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGA 15 | ACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGG 16 | CCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTC 17 | ATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCG 18 | TGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCA 19 | GACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAA 20 | TTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAA 21 | GCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG 22 | CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCA 23 | GACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGA 24 | AGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGC 25 | ATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGG 26 | CTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTC 27 | TTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGG 28 | TTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGA 29 | AATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA 30 | GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAA 31 | AGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTAC 32 | AAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCC 33 | TCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCT 34 | TGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGG 35 | AATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTAC 36 | TAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG 37 | GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGA 38 | AGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTAT 39 | CTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAA 40 | GGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTC 41 | TATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCA 42 | CTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCC 43 | TCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTT 44 | AACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGA 45 | AGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGA 46 | AATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATAC 47 | CTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGA 48 | AGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATGAAAGGATTGATAAAGT 49 | ACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGC 50 | CTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC 51 | ACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGG 52 | TGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTCTACCCTCCAGATGAGGATGAAGA 53 | AGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGA 54 | AGATGATTACCAAGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGA 55 | AGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGA 56 | CGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATT 57 | AGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTT 58 | AAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGT 59 | AAAACCAACAGTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGC 60 | AGGAGCCTTAAATAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGC 61 | TACTAATGGACCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAA 62 | ACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTAA 63 | GAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGG 64 | TATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAA 65 | TGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGA 66 | AATGAAGAGTGAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAA 67 | GCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAAT 68 | CAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAAACTAAGTTCCTCACAGAAAA 69 | CTTGTTACTTTATATTGACATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTTAG 70 | TGACATTGACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCA 71 | AGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAAT 72 | GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCA 73 | GGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGC 74 | CTTTTACATTCTACCATCTATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTC 75 | TTGGAATTTGCGAGAAATGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTG 76 | TGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACA 77 | AGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAACTGTAGC 78 | GTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTA 79 | TGTAACACATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCC 80 | AGCTACAGTTTCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTC 81 | TTCTTCTAAAACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAA 82 | AGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAGAGGTGA 83 | TAAAAGTGTATATTACACTAGTAATCCTACCACATTCCACCTAGATGGTGAAGTTATCAC 84 | CTTTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTTAC 85 | AACAGTAGACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACA 86 | ACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTC 87 | ACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTT 88 | TGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCA 89 | CACTAAAAAGTGGAAATACCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAA 90 | CAACTGTTATCTTGCCACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCC 91 | ACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGC 92 | ACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAAT 93 | GAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTG 94 | TAAAACTTGTGGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGG 95 | CACACTTTCTTATGAACAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACA 96 | AGCTACAAAATATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGC 97 | TCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAATTACCA 98 | GTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATTGCATAGACGGTGCTTT 99 | ACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAG 100 | TTACACAACAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAAT 101 | TGACCCTAAGTTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAAT 102 | TGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATG 103 | TGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCTTC 104 | AAGAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGATTA 105 | TAAACACTACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTG 106 | GCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG 107 | TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGA 108 | CGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGT 109 | GGAAAATCCTACCATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGT 110 | AGGAGACATTATACTTAAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCA 111 | CACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGA 112 | ATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGTTAATAG 113 | TGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTAC 114 | AACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTT 115 | CTTTACTTTATTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGC 116 | ATCTATGCCGACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGA 117 | GGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTATAATTTG 118 | GTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTACTCAACCGCTGCTTTAGGTGT 119 | TTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTGAA 120 | CTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCT 121 | TAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTC 122 | ATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATAT 123 | TCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAG 124 | CTATTTTGCAGTACATTTTATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTTGT 125 | ACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTA 126 | TGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTG 127 | TTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAG 128 | GTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTG 129 | TGTTAATTGTGATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGA 130 | CTTGTCACTACAGTTTAAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGA 131 | TAGTGTTACAGTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGAC 132 | TTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCTAATAACAC 133 | TAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATC 134 | ATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACT 135 | AGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGA 136 | TGCTTACGTTAATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACT 137 | AGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTAC 138 | TTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAAACTAAAGATGTTGT 139 | TGAATGTCTTAAATTGTCACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAATAA 140 | CTATATGCTCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTAT 141 | TGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT 142 | ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGC 143 | TGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAA 144 | TGTTGTAACAACAAAGATAGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCA 145 | GTTAATTAAAGTTACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACC 146 | TGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTAT 147 | TGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAAACATGC 148 | TGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCC 149 | ATTGATTGCTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCAC 150 | GATATTACGCACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGT 151 | TGGTAACATCTGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGC 152 | TTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGTACCATA 153 | TTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGAAAGTTTACGCCCTGACAC 154 | ACGTTATGTGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACCTTGAAGGTTC 155 | TGTTAGAGTGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATC 156 | AGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAG 157 | ATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATATGTTTAC 158 | ACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTAT 159 | TGTAGCTATCGTAGTAACATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTTGG 160 | TGAATACAGTCATGTAGTTGCCTTTAATACTTTACTATTCCTTATGTCATTCACTGTACT 161 | CTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTT 162 | GACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTT 163 | CACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCA 164 | TTTCTATTGGTTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTT 165 | TAGTACTTTTGAAGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAA 166 | GTTGCGTAGTGATGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAA 167 | TAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGCTTGTTG 168 | TCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTCTTTACCAACC 169 | ACCACAAACCTCTATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCC 170 | ATCTGGTAAAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGG 171 | TCTTTGGCTTGATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACAT 172 | GCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACA 173 | GGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATGCAAAATTGTGTACTTAAGCT 174 | TAAGGTTGATACAGCCAATCCTAAGACACCTAAGTATAAGTTTGTTCGCATTCAACCAGG 175 | ACAGACTTTTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGC 176 | TATGAGGCCCAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGG 177 | TTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAAC 178 | TGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCA 179 | AACAGCACAAGCAGCTGGTACGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTA 180 | CGCTGCTGTTATAAATGGAGACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGA 181 | CTTTAACCTTGTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACAT 182 | ACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTCATTAAA 183 | AGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGA 184 | TGAATTTACACCTTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGT 185 | GAAAAGAACAATCAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTT 186 | AGTTTTAGTCCAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTT 187 | ACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCAAACATAA 188 | GCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCACTGTAGCTTATTTTAATAT 189 | GGTCTATATGCCTGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGATAC 190 | TAGTTTGTCTGGTTTTAAGCTAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACT 191 | AATCCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTAT 192 | GAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTC 193 | CATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCAT 194 | GTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAAC 195 | TGGTAATACACTTCAGTGTATAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTG 196 | TTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGA 197 | TTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCAA 198 | GAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTG 199 | TATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTT 200 | ACTCTCAGTTTTGCAACAACTCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGT 201 | CCAGTTACACAATGACATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGT 202 | TTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGA 203 | AGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTAGTTCCCTTCC 204 | ATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGA 205 | TTCTGAAGTTGTTCTTAAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGA 206 | CCGTGATGCAGCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAAT 207 | GTATAAACAGGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAAT 208 | GCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATTATCAACAATGC 209 | AAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACAACAGCAGCCAAACTAATGGT 210 | TGTCATACCAGACTATAACACATATAAAAATACGTGTGATGGTACAACATTTACTTATGC 211 | ATCAGCATTGTGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAG 212 | TGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAG 213 | GGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGAT 214 | GTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTA 215 | CAACACAACAAAGGGAGGTAGGTTTGTACTTGCACTGTTATCCGATTTACAGGATTTGAA 216 | ATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATCTATACAGAACTGGAACCACC 217 | TTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAA 218 | AGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCT 219 | ACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTT 220 | TGCTGTAGATGCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCAC 221 | TAATTGTGTTAAGATGTTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACC 222 | GGAAGCCAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTG 223 | CCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAGTATGTACAAAT 224 | ACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTAAAAACACAGTCTGTACCGT 225 | CTGCGGTATGTGGAAAGGTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCA 226 | GTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACA 227 | CCGTGCGGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGAT 228 | AAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGAC 229 | GAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTAC 230 | CAACATGAAGAAACAATTTATAATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGAC 231 | TTCTTTAAGTTTAGAATAGACGGTGACATGGTACCACATATATCACGTCAACGTCTTACT 232 | AAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGAC 233 | ACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTCAATAAAAAG 234 | GACTGGTATGATTTTGTAGAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAA 235 | CGTGTACGCCAAGCTTTGTTAAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGT 236 | ATTGTTGGTGTACTGACATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGT 237 | GATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTG 238 | TTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGTTGACACTGAC 239 | TTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTA 240 | AAACTCTTTGACCGTTATTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTTAAC 241 | TGTTTGGATGACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTG 242 | TTCCCACCTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTT 243 | GTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATCAGGATGTAAAC 244 | TTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGTATGCTGCTGACCCTGCTATG 245 | CACGCTGCTTCTGGTAATCTATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCA 246 | CTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTAT 247 | GACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTC 248 | TTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGTTATAATCTA 249 | CCAACAATGTGTGATATCAGACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTT 250 | GATTGTTACGATGGTGGCTGTATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAA 251 | TCAGCTGGTTTTCCATTTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGT 252 | TATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAACT 253 | CAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTC 254 | TCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCC 255 | GCCACTAGAGGAGCTACTGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAAC 256 | ATGTTAAAAACTGTTTATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCT 257 | AAATGTGATAGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGC 258 | AAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTAATGAGTGTGCT 259 | CAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTATATGTTAAACCAGGTGGAACC 260 | TCATCAGGAGATGCCACAACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTC 261 | ACGGCCAATGTTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTC 262 | CGCAATTTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGAC 263 | TTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGATACTCTCTGAC 264 | GATGCTGTTGTGTGTTTCAATAGCACTTATGCATCTCAAGGTCTAGTGGCTAGCATAAAG 265 | AACTTTAAGTCAGTTCTTTATTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGG 266 | ACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTT 267 | AAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCC 268 | GGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTG 269 | TCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTC 270 | TTTCATTTGTACTTACAATACATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTA 271 | GACATGTATTCTGTTATGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTT 272 | TATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGC 273 | AATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTTATGTTGTAAA 274 | TGCTGTTACGACCATGTCATATCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTAT 275 | GTTTGCAATGCTCCAGGTTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATG 276 | AGCTATTATTGTAAATCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAA 277 | GTTTTTGGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCA 278 | ATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACACCTGTACTGAA 279 | AGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGTCT 280 | TATGGTATTGCTACTGTACGTGAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAA 281 | GTTGGTAAACCTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACT 282 | AAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCT 283 | GTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACA 284 | TCACATACAGTAATGCCATTAAGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGA 285 | ATTACTGGCTTATACCCAACACTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAAT 286 | TATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAG 287 | AGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCT 288 | TGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGAT 289 | AAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTG 290 | AATTCAACATTAGAACAGTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCA 291 | GATATAGTTGTCTTTGATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAAT 292 | GCCAGATTACGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCA 293 | CGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTT 294 | ATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATT 295 | GTTGACACTGTGAGTGCTTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCA 296 | GCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATT 297 | AACAGGCCACAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAA 298 | GCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTA 299 | CCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATATGACTATGTCATATTCACTCAA 300 | ACCACTGAAACAGCTCACTCTTGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCA 301 | AAAGTAGGCATACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACA 302 | AGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTC 303 | TTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCTACACACCTC 304 | AGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTTGACATACCTGGCATACCTAAG 305 | GACATGACCTATAGAAGACTCATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAAT 306 | GGTTACCCTAACATGTTTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATT 307 | GGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTA 308 | CAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTATGTTGATACA 309 | CCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAA 310 | CACCTCATACCACTTATGTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTA 311 | CAAATGTTAAGTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCA 312 | CATGGCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGT 313 | TGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGG 314 | CATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTATGATTGATGTTCAACAATGG 315 | GGTTTTACAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCA 316 | CATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTT 317 | AAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCG 318 | GCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCA 319 | GTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAA 320 | TGGAAGTTCTATGATGCACAGCCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTC 321 | TATTCTTATGCCACACATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGC 322 | AATGTCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCT 323 | AACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCAC 324 | ACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTAC 325 | TCTGACAGTCCATGTGAGTCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCA 326 | CTAAAGTCTGCTACGTGTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCAT 327 | GCTAATGAGTACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAGC 328 | TTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTACAAGACTTCAG 329 | AGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGT 330 | GAAGTACCAGTTTCTATCATTAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTA 331 | GAATTGTTTGAAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAG 332 | CGCAACATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCT 333 | GCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATATCTACTATTGGT 334 | GTTTGTTCTATGACTGACATAGCCAAGAAACCAACTGAAACGATTTGTGCACCACTCACT 335 | GTCTTTTTTGATGGTAGAGTTGATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGT 336 | GTTCTTATTACAGAAGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCT 337 | AGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAG 338 | AAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTA 339 | CAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAA 340 | TTCATTGAACGGTATAAATTAGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTT 341 | AGTCATAGTCAGTTAGGTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAA 342 | TCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATA 343 | ACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTTATTACTTGAT 344 | GATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTG 345 | ACTATTGACTATACAGAAATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACA 346 | TTTTACCCAAAATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTT 347 | TACAAAATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCA 348 | ACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGTGTCAATATTTA 349 | AACACATTAACATTAGCTGTACCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCT 350 | GATAAAGGAGTTGCACCAGGTACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTG 351 | CTTGTCGATTCAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGAT 352 | TGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCT 353 | AAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGT 354 | GGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACAT 355 | TCTTGGAATGCTGATCTTTATAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTT 356 | ACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAA 357 | CCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATACA 358 | AATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTA 359 | AGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTT 360 | CTTAGTAAAGGTAGACTTATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTT 361 | CTTGTTAACAACTAAACGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAG 362 | TCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCAC 363 | ACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGA 364 | CTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGGAC 365 | CAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGC 366 | TTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAA 367 | GACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATT 368 | TCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGAT 369 | GGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCA 370 | GCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGT 371 | GTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGT 372 | GCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTAT 373 | TAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGA 374 | TTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAG 375 | GACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACT 376 | TGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTA 377 | TCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTAC 378 | AAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTG 379 | GAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATC 380 | ATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTAC 381 | TAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGG 382 | GCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGT 383 | TATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTA 384 | TAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTA 385 | TCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACA 386 | ATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACT 387 | TTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTT 388 | GGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTAC 389 | TGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTAC 390 | TGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGG 391 | TGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCA 392 | GGATGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTG 393 | GCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGC 394 | TGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAG 395 | TTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCAT 396 | TGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGC 397 | CATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAA 398 | GACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTT 399 | GTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGA 400 | ACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACC 401 | AATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAG 402 | CAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTT 403 | CATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACA 404 | AAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATA 405 | CACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGC 406 | ATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACA 407 | GAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAA 408 | AATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAA 409 | CCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAAT 410 | TTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAAT 411 | TGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAAT 412 | TAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGT 413 | ACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCC 414 | TCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAA 415 | GAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGG 416 | TGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACA 417 | AATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGT 418 | CAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGA 419 | TAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAA 420 | TGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTT 421 | AAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCC 422 | ATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTAT 423 | GCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTG 424 | CTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACAC 425 | ATAAACGAACTTATGGATTTGTTTATGAGAATCTTCACAATTGGAACTGTAACTTTGAAG 426 | CAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTTGTTCGCGCTACTGCAACGATACCG 427 | ATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTT 428 | CAGAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGT 429 | GTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTTTTGCTC 430 | GTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAG 431 | AGTATAAACTTTGTAAGAATAATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAA 432 | AACCCATTACTTTATGATGCCAACTATTTTCTTTGCTGGCATACTAATTGTTACGACTAT 433 | TGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACA 434 | AGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGGGAATCTGGA 435 | GTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCA 436 | ACTCAATTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATT 437 | GTTGATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTT 438 | AATCCAGTAATGGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAA 439 | GCACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGACAGGTACGTTA 440 | ATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCC 441 | ATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTA 442 | AAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGAT 443 | CTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAG 444 | CCATGGCAGATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAAT 445 | GGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATG 446 | CCAACAGGAATAGGTTTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCAG 447 | TAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAA 448 | TTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTT 449 | TCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTC 450 | TCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAA 451 | TCGGAGCTGTGATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTG 452 | ACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACA 453 | AATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACA 454 | GGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATATTGCTTTGC 455 | TTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTACTATAGCAGAG 456 | ATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATA 457 | AACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGAT 458 | GAAGAGCAACCAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTG 459 | ATAACACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTA 460 | CTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAATTCACCATTTCATCCTCTA 461 | GCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGAC 462 | GGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGA 463 | CAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTT 464 | ATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACT 465 | TCTATTTGTGCTTTTTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTT 466 | GGTTCTCACTTGAACTGCAAGATCATAATGAAACTTGTCACGCCTAAACGAACATGAAAT 467 | TTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTAC 468 | AGTCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATT 469 | CTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGG 470 | ATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCT 471 | GTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTT 472 | CGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAA 473 | CGAACAAACTAAAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTAC 474 | GTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGAGAACGCAGTGGGGCGCG 475 | ATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGGTTCACCGCTCT 476 | CACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACAC 477 | CAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGG 478 | TGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGG 479 | GCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGA 480 | GGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGC 481 | AATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAG 482 | CAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAGTTCAAGAAA 483 | TTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGA 484 | TGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG 485 | TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAA 486 | GAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAG 487 | ACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAAC 488 | TGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGG 489 | AATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGC 490 | CATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGCTGAATAAGCA 491 | TATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGC 492 | TGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCC 493 | TGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTC 494 | AACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGC 495 | TTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACTACATAGC 496 | ACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTA 497 | GGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTACGATCGAGTGT 498 | ACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAAT 499 | TTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAA 500 | AAAAAAAAAAAAAAAAAAAAAAA 501 | -------------------------------------------------------------------------------- /sc2rf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import csv 4 | import enum 5 | from typing import NamedTuple 6 | from termcolor import colored, cprint 7 | import json 8 | import argparse 9 | import os 10 | import requests 11 | from tqdm import tqdm 12 | import urllib.parse 13 | 14 | 15 | colors = ['red', 'green', 'blue', 'yellow', 'magenta', 'cyan'] 16 | 17 | width_override = None 18 | 19 | # I removed "ORF" from the names, because often we only see the first one or two letters of a name, and "ORF" provides no information 20 | genes = { 21 | '1a': ( 266, 13468), 22 | '1b': (13468, 21555), 23 | 'S': (21563, 25384), 24 | 'E': (26245, 26472), 25 | '3a': (25393, 26220), 26 | 'M': (26523, 27191), 27 | '6': (27202, 27387), 28 | '7a': (27394, 27759), 29 | '7b': (27756, 27887), 30 | '8': (27894, 28259), 31 | 'N': (28274, 28283), # my algorithm does not like that ORF9b is inside N, so I split N in two halves 32 | '9b': (28284, 28577), 33 | 'N': (28578, 29533), 34 | '': (29534, 99999), # probably nothing, but need something to mark the end of N 35 | } 36 | 37 | 38 | class Interval: 39 | """ An interval of integers, e.g., 27, 27-, -30 or 27-30 """ 40 | def __init__(self, string): 41 | # TODO allow multiple separators, see https://stackoverflow.com/questions/1059559/split-strings-into-words-with-multiple-word-boundary-delimiters 42 | self.original_string = string 43 | parts = string.split('-') 44 | if len(parts) == 1: 45 | self.min = int(parts[0]) 46 | self.max = int(parts[0]) 47 | elif len(parts) == 2: 48 | self.min = int(parts[0]) if parts[0] else None 49 | self.max = int(parts[1]) if parts[1] else None 50 | else: 51 | raise ValueError('invalid interval: ' + string) 52 | 53 | def matches(self, num): 54 | """ check if num is within closed interval """ 55 | if self.min and num < self.min: 56 | return False 57 | if self.max and num > self.max: 58 | return False 59 | 60 | return True 61 | 62 | def __str__(self): 63 | return self.original_string 64 | 65 | 66 | def main(): 67 | """ Command line interface """ 68 | global mappings 69 | global width_override 70 | global dot_character 71 | 72 | dot_character = '•' 73 | 74 | # This strange line should enable handling of 75 | # ANSI / VT 100 codes in windows terminal 76 | # See https://stackoverflow.com/a/39675059/39946 77 | os.system('') 78 | 79 | mappings = read_mappings('mapping.csv') 80 | 81 | parser = argparse.ArgumentParser( 82 | description='Analyse SARS-CoV-2 sequences for potential, unknown recombinant variants.', 83 | epilog='An Interval can be a single number ("3"), a closed interval ("2-5" ) or an open one ("4-" or "-7").' 84 | ' The limits are inclusive. Only positive numbers are supported.', 85 | formatter_class=ArgumentAdvancedDefaultsHelpFormatter 86 | ) 87 | parser.add_argument('input', nargs='*', help='input sequence(s) to test, as aligned .fasta file(s)') 88 | parser.add_argument('--primers', nargs='*', metavar='PRIMER', help='Filenames of primer set(s) to visualize. The .bed formats for ARTIC and EasySeq are recognized and supported.') 89 | parser.add_argument('--primer-intervals', nargs='*', metavar='INTERVAL', type=Interval, help='Coordinate intervals in which to visualize primers.') 90 | parser.add_argument('--parents', '-p', default='2-4', metavar='INTERVAL', type=Interval, help='Allowed number of potential parents of a recombinant.') 91 | parser.add_argument('--breakpoints', '-b', default='1-4', metavar='INTERVAL', type=Interval, help='Allowed number of breakpoints in a recombinant.') 92 | parser.add_argument('--clades', '-c', nargs='*', default=['20I', '20H', '20J', '21I', '21J', 'BA.1', 'BA.2', 'BA.3'], help='List of variants which are considered as potential parents. Use Nextstrain clades (like "21B"), or Pango Lineages (like "B.1.617.1") or both. Also accepts "all".') 93 | parser.add_argument('--unique', '-u', default=2, type=int, metavar='NUM', help='Minimum of substitutions in a sample which are unique to a potential parent clade, so that the clade will be considered.') 94 | parser.add_argument('--max-intermission-length', '-l', metavar='NUM', default=2, type=int, help='The maximum length of an intermission in consecutive substitutions. Intermissions are stretches to be ignored when counting breakpoints.') 95 | parser.add_argument('--max-intermission-count', '-i', metavar='NUM', default=8, type=int, help='The maximum number of intermissions which will be ignored. Surplus intermissions count towards the number of breakpoints.') 96 | parser.add_argument('--max-name-length', '-n', metavar='NUM', default=30, type=int, help='Only show up to NUM characters of sample names.') 97 | parser.add_argument('--max-ambiguous', '-a', metavar='NUM', default=50, type=int, help='Maximum number of ambiguous nucs in a sample before it gets ignored.') 98 | parser.add_argument('--force-all-parents', '-f', action='store_true', help='Force to consider all clades as potential parents for all sequences. Only useful for debugging.') 99 | parser.add_argument('--select-sequences', '-s', default='0-999999', metavar='INTERVAL', type=Interval, help='Use only a specific range of input sequences. DOES NOT YET WORK WITH MULTIPLE INPUT FILES.') 100 | parser.add_argument('--enable-deletions', '-d', action='store_true', help='Include deletions in lineage comparision.') 101 | parser.add_argument('--show-private-mutations', action='store_true', help='Display mutations which are not in any of the potential parental clades.') 102 | parser.add_argument('--rebuild-examples', '-r', action='store_true', help='Rebuild the mutations in examples by querying cov-spectrum.org.') 103 | parser.add_argument('--mutation-threshold', '-t', metavar='NUM', default=0.75, type=float, help='Consider mutations with a prevalence of at least NUM as mandatory for a clade (range 0.05 - 1.0, default: %(default)s).') 104 | parser.add_argument('--add-spaces', metavar='NUM', nargs='?', default=0, const=5, type=int, help='Add spaces between every N colums, which makes it easier to keep your eye at a fixed place.') 105 | parser.add_argument('--sort-by-id', metavar='NUM', nargs='?', default=0, const=999, type=int, help='Sort the input sequences by the ID. If you provide NUM, only the first NUM characters are considered. Useful if this correlates with meaning full meta information, e.g. the sequencing lab.') 106 | #parser.add_argument('--sort-by-first-breakpoint', action='store_true', help='Does what it says.') 107 | parser.add_argument('--verbose', '-v', action='store_true', help='Print some more information, mostly useful for debugging.') 108 | parser.add_argument('--ansi', action='store_true', help='Use only ASCII characters to be compatible with ansilove.') 109 | parser.add_argument('--update-readme', action='store_true', help=argparse.SUPPRESS) 110 | parser.add_argument('--hide-progress', action='store_true', help="Don't show progress bars during long task.") 111 | parser.add_argument('--csvfile', type=argparse.FileType('w'), help="Path to write results in CSV format.") 112 | 113 | global args 114 | args = parser.parse_args() 115 | 116 | if args.ansi: 117 | dot_character = '.' 118 | 119 | if args.update_readme: 120 | update_readme(parser) 121 | print("Readme was updated. Program exits.") 122 | return 123 | 124 | if args.rebuild_examples: 125 | rebuild_examples() 126 | if len(args.input) == 0: 127 | print("Examples were rebuilt, and no input sequences were provided. Program exits.") 128 | return 129 | elif len(args.input) == 0: 130 | print("Input sequences must be provided, except when rebuilding the examples. Use --help for more info. Program exits.") 131 | return 132 | 133 | if args.mutation_threshold < 0.05 or args.mutation_threshold > 1.0 : 134 | print("mutation-threshold must be between 0.05 and 1.0") 135 | return 136 | 137 | global reference 138 | vprint("Reading reference genome, lineage definitions...") 139 | reference = read_fasta('reference.fasta', None)['MN908947 (Wuhan-Hu-1/2019)'] 140 | all_examples = read_examples('virus_properties.json') 141 | 142 | used_examples = [] 143 | if 'all' in args.clades: 144 | used_examples = all_examples 145 | else: 146 | # screen for a subset of examples only 147 | for example in all_examples: 148 | if len(example['NextstrainClade']) and example['NextstrainClade'] in args.clades: 149 | used_examples.append(example) 150 | elif len(example['PangoLineage']) and example['PangoLineage'] in args.clades: 151 | used_examples.append(example) 152 | 153 | if args.force_all_parents and not args.parents.matches(len(used_examples)): 154 | print("The number of allowed parents, the number of selected clades and the --force-all-parents conflict so that the results must be empty.") 155 | return 156 | 157 | vprint("Done.\nReading actual input.") 158 | all_samples = dict() 159 | for path in args.input: 160 | read_samples = read_subs_from_fasta(path) 161 | for key, val in read_samples.items(): 162 | all_samples[key] = val # deep copy 163 | vprint("Done.") 164 | 165 | global primer_sets 166 | primer_sets = dict() 167 | if args.primers: 168 | vprint("Reading primers.") 169 | for path in args.primers: 170 | pools = read_bed(path) 171 | primer_sets[path] = pools 172 | vprint("Done.") 173 | 174 | calculate_relations(used_examples) 175 | 176 | # lists of samples keyed by tuples of example indices 177 | match_sets = dict() 178 | 179 | vprint("Scanning input for matches against lineage definitons...") 180 | for sa_name, sa in my_tqdm(all_samples.items(), desc="First pass scan"): 181 | matching_example_indices = [] 182 | if args.force_all_parents: 183 | matching_example_indices = range(0, len(used_examples)) 184 | else: 185 | for i, ex in enumerate(used_examples): 186 | matches_count = len(sa['subs_set'] & ex['unique_subs_set']) 187 | # theoretically > 0 already gives us recombinants, but they are much 188 | # more likely to be errors or coincidences 189 | if matches_count >= args.unique: 190 | matching_example_indices.append(i) 191 | 192 | matching_examples_tup = tuple(matching_example_indices) 193 | 194 | if args.parents.matches(len(matching_example_indices)): 195 | #print(f"{sa_name} is a possible recombinant of {len(matching_example_names)} lineages: {matching_example_names}") 196 | if match_sets.get(matching_examples_tup): 197 | match_sets[matching_examples_tup].append(sa) 198 | else: 199 | match_sets[matching_examples_tup] = [sa] 200 | 201 | vprint("Done.\nPrinting detailed analysis:\n\n") 202 | 203 | if len(match_sets): 204 | writer = None 205 | if args.csvfile: 206 | fieldnames = ['sample', 'examples', 'intermissions', 'breakpoints', 'regions'] 207 | if args.show_private_mutations: fieldnames.append('privates') 208 | writer = csv.DictWriter(args.csvfile, fieldnames=fieldnames) 209 | writer.writeheader() 210 | 211 | for matching_example_indices, samples in match_sets.items(): 212 | show_matches([used_examples[i] for i in matching_example_indices], samples, writer=writer) 213 | else: 214 | print("First pass found no potential recombinants, see ") 215 | 216 | 217 | def my_tqdm(*margs, **kwargs): 218 | return tqdm(*margs, delay=0.1, colour="green", disable=bool(args.hide_progress), **kwargs) 219 | 220 | 221 | def update_readme(parser: argparse.ArgumentParser): 222 | # on wide monitors, github displays up to 90 columns of preformatted text 223 | # but 10% of web users have screens which can only fit 65 characters 224 | global width_override 225 | width_override = 65 226 | 227 | help = parser.format_help() 228 | 229 | new_lines = [] 230 | 231 | between_markers = False 232 | 233 | with open("README.md", "rt") as old_readme: 234 | for line in old_readme: 235 | if line.strip() == "": 236 | between_markers = True 237 | new_lines.append('\n') 238 | 239 | if line.strip() == "": 240 | between_markers = False 241 | new_lines.append('```\n') 242 | new_lines.append(help + '\n') 243 | new_lines.append('```\n') 244 | 245 | if not between_markers: 246 | new_lines.append(line) 247 | 248 | with open("README.md", "wt") as new_readme: 249 | new_readme.writelines(new_lines) 250 | 251 | 252 | 253 | def vprint(text: str): 254 | if args.verbose: 255 | print(text) 256 | 257 | def rebuild_examples(): 258 | print("Rebuilding examples from cov-spectrum.org...") 259 | with open('virus_properties.json', newline='', mode='w') as jsonfile: 260 | 261 | the_list = [] 262 | 263 | for variant_props in mappings['list']: 264 | pango = variant_props['PangoLineage'] 265 | clade = variant_props['NextstrainClade'] 266 | who_label = variant_props['WhoLabel'] 267 | query = "" 268 | if variant_props['Query']: 269 | query = f"?variantQuery={urllib.parse.quote_plus(variant_props['Query'])}" 270 | elif pango and len(pango) > 0: 271 | query = f"?pangoLineage={pango}*" 272 | elif clade and len(clade) > 0: 273 | query = f"?nextstrainClade={clade}+({who_label})" 274 | else: 275 | print("Variant has neither pango nor clade, check out mapping.csv!") 276 | continue 277 | 278 | print(f"Fetching data for {query}") 279 | url = f'https://lapis.cov-spectrum.org/gisaid/v1/sample/nuc-mutations{query}&minProportion=0.05' 280 | print(f"Url is {url}") 281 | r = requests.get(url) 282 | result = r.json() 283 | if len(result['errors']): 284 | print("Errors occured while querying cov-spectrum.org:") 285 | for e in result['errors']: 286 | print(" " + e) 287 | 288 | variant = variant_props.copy() 289 | variant['mutations'] = result['data']; 290 | 291 | names = [who_label, pango, clade] 292 | names = [n for n in names if n is not None and len(n.strip()) > 0] 293 | variant['name'] = " / ".join(names) 294 | 295 | the_list.append(variant) 296 | 297 | props = { 298 | "schemaVersion": "s2r 0.0.2", 299 | "comment": "New file format, no longer looks like the original virus_properties.json", 300 | 'variants': the_list 301 | } 302 | 303 | json.dump(props, jsonfile, indent=4) 304 | print("Examples written to disk.") 305 | 306 | 307 | def read_examples(path): 308 | with open(path, newline='') as jsonfile: 309 | props = json.load(jsonfile) 310 | assert props['schemaVersion'] == 's2r 0.0.2' 311 | examples = [] 312 | for variant in props['variants']: 313 | subs_dict = dict() 314 | for m in variant['mutations']: 315 | if m['proportion'] < args.mutation_threshold: 316 | continue 317 | sub_string = m['mutation'].strip() 318 | 319 | if len(sub_string) > 0: 320 | sub = parse_sub(sub_string) 321 | if (sub.mut != '-' or args.enable_deletions) and sub.mut != '.': 322 | subs_dict[sub.coordinate] = sub 323 | example = { 324 | 'name': variant['name'], 325 | 'NextstrainClade': variant['NextstrainClade'], 326 | 'PangoLineage': variant['PangoLineage'], 327 | 'subs_dict': subs_dict, 328 | 'subs_list': list(subs_dict.values()), 329 | 'subs_set': set(subs_dict.values()), 330 | 'missings': [] 331 | } 332 | 333 | examples.append(example) 334 | 335 | return examples 336 | 337 | 338 | class Primer(NamedTuple): 339 | start: int 340 | end: int 341 | direction: str 342 | alt: bool 343 | name: str 344 | sequence: str 345 | 346 | 347 | class Amplicon: 348 | left_primers: list 349 | right_primer: list 350 | number: int 351 | color: str 352 | start: int 353 | end: int 354 | 355 | def __init__(self, number: int): 356 | self.number = number 357 | self.left_primers = list() 358 | self.right_primers = list() 359 | self.color = get_color(number) 360 | self.start = None 361 | self.end = None 362 | self.amp_start = None 363 | self.amp_end = None 364 | 365 | def __str__(self): 366 | return f"Amplicon {self.number} ({self.start} to {self.end})" 367 | 368 | def add_primer(self, primer): 369 | if primer.direction == '+': 370 | self.left_primers.append(primer) 371 | if self.amp_start: 372 | self.amp_start = max(self.amp_start, primer.end + 1) 373 | else: 374 | self.amp_start = primer.end + 1 375 | else: 376 | self.right_primers.append(primer) 377 | if self.amp_end: 378 | self.amp_end = min(self.amp_end, primer.start - 1) 379 | else: 380 | self.amp_end = primer.start - 1 381 | 382 | if self.start: 383 | self.start = min(self.start, primer.start) 384 | else: 385 | self.start = primer.start 386 | 387 | if self.end: 388 | self.end = max(self.end, primer.end) 389 | else: 390 | self.end = primer.end 391 | 392 | def get_char(self, coord: int): 393 | if coord <= self.start or coord >= self.end: 394 | return ' ' 395 | 396 | for primer in self.left_primers: 397 | if primer.start <= coord and primer.end >= coord: 398 | if primer.alt: 399 | return '{' if args.ansi else '‹' 400 | else: 401 | return '<' if args.ansi else '«' 402 | 403 | for primer in self.right_primers: 404 | if primer.start <= coord and primer.end >= coord: 405 | if primer.alt: 406 | return '}' if args.ansi else '›' 407 | else: 408 | return '>' if args.ansi else '»' 409 | 410 | return '-' 411 | 412 | def overlaps_coord(self, coord: int, actual_amplicon: bool): 413 | if actual_amplicon: 414 | return coord >= self.amp_start and coord <= self.amp_end 415 | else: 416 | return coord >= self.start and coord <= self.end 417 | 418 | def overlaps_interval(self, interval: Interval): 419 | if interval.max and self.start > interval.max: 420 | return False 421 | if interval.min and self.end < interval.min: 422 | return False 423 | return True 424 | 425 | 426 | def read_bed(path): 427 | pools = dict() 428 | index = 0 429 | current_name = None 430 | with open(path, newline='') as bed: 431 | for line in bed: 432 | parts = line.strip().split("\t") 433 | 434 | if(parts[0] == parts[3]): # EasySeq format 435 | name = parts[6] 436 | name_parts = name.split("_") 437 | amplicon_index = int(name_parts[1]) 438 | amplicon = Amplicon(amplicon_index) 439 | left_primer = Primer( 440 | start = int(parts[1]), 441 | end = int(parts[2]), 442 | name = "left_" + str(amplicon_index), 443 | alt = False, 444 | direction = "+", 445 | sequence = None 446 | ) 447 | right_primer = Primer( 448 | start = int(parts[4]), 449 | end = int(parts[5]), 450 | name = "right_" + str(amplicon_index), 451 | alt = False, 452 | direction = "-", 453 | sequence = None 454 | ) 455 | amplicon.add_primer(left_primer) 456 | amplicon.add_primer(right_primer) 457 | 458 | pool_index = (amplicon_index + 1) % 2 + 1 459 | 460 | if not pools.get(pool_index): 461 | pools[pool_index] = dict() 462 | 463 | pools[pool_index][amplicon_index] = amplicon 464 | 465 | else: 466 | # ARTIC format 467 | name = parts[3] 468 | name_parts = name.split("_") 469 | amplicon_index = int(name_parts[1]) 470 | pool_index = parts[4] 471 | direction = parts[5] 472 | 473 | pool = pools.get(pool_index) 474 | if not pool: 475 | pool = dict() 476 | pools[pool_index] = pool 477 | 478 | amplicon = pool.get(amplicon_index) 479 | if not amplicon: 480 | amplicon = Amplicon(amplicon_index) 481 | pool[amplicon_index] = amplicon 482 | 483 | primer = Primer( 484 | start = int(parts[1]), 485 | end = int(parts[2]), 486 | name = parts[3], 487 | alt = len(name_parts) == 4, 488 | direction = direction, 489 | sequence = parts[6] if 6 < len(parts) else None 490 | ) 491 | 492 | amplicon.add_primer(primer) 493 | 494 | return pools 495 | 496 | 497 | def read_fasta(path, index_range): 498 | """ 499 | :param path: str, absolute or relative path to FASTA file 500 | :param index_range: Interval, select specific records from FASTA 501 | :return: dict, sequences keyed by header 502 | """ 503 | sequences = dict() 504 | index = 0 505 | current_name = None 506 | 507 | file_pos = 0 508 | with my_tqdm(total=os.stat(path).st_size, desc="Read " + path, unit_scale=True) as pbar: 509 | with open(path, newline='') as fasta: 510 | current_sequence = '' 511 | for line in fasta: 512 | file_pos += len(line) 513 | pbar.update(file_pos - pbar.n) 514 | if line[0] == '>': 515 | if current_name and (not index_range or index_range.matches(index)): 516 | sequences[current_name] = current_sequence 517 | index += 1 518 | if index_range and index_range.max and index > index_range.max: 519 | return sequences 520 | current_sequence = '' 521 | current_name = line[1:].strip() 522 | else: 523 | current_sequence += line.strip().upper() 524 | sequences[current_name] = current_sequence 525 | 526 | return sequences 527 | 528 | 529 | def read_subs_from_fasta(path): 530 | """ 531 | Extract substitutions relative to reference genome. 532 | :param path: str, path to input FASTA file 533 | :return: dict, substitutions (as dict, list or set) keyed by genome name 534 | """ 535 | fastas = read_fasta(path, args.select_sequences) 536 | sequences = dict() 537 | start_n = -1 # used for tracking runs of Ns or gaps 538 | removed_due_to_ambig = 0 539 | for name, fasta in my_tqdm(fastas.items(), desc="Finding mutations in " + path): 540 | subs_dict = dict() # substitutions keyed by position 541 | missings = list() # start/end tuples of N's or gaps 542 | if len(fasta) != len(reference): 543 | print(f"Sequence {name} not properly aligned, length is {len(fasta)} instead of {len(reference)}.") 544 | else: 545 | ambiguous_count = 0 546 | for i in range(1, len(reference) + 1): 547 | r = reference[i - 1] 548 | s = fasta[i - 1] 549 | if s == 'N' or s == '-': 550 | if start_n == -1: 551 | start_n = i # mark the start of possible run of N's 552 | elif start_n >= 0: 553 | # we've been tracking a run of N's, this base marks the end 554 | missings.append((start_n, i-1)) # Python-style (closed, open) interval 555 | start_n = -1 556 | 557 | if s != 'N' and s != '-' and r != s: 558 | subs_dict[i] = Sub(r, i, s) # nucleotide substitution 559 | 560 | if not s in "AGTCN-": 561 | ambiguous_count += 1 # count mixtures 562 | 563 | if ambiguous_count <= args.max_ambiguous: 564 | sequences[name] = { 565 | 'name': name, # isn't this redundant? 566 | 'subs_dict': subs_dict, 567 | 'subs_list': list(subs_dict.values()), 568 | 'subs_set': set(subs_dict.values()), 569 | 'missings': missings 570 | } 571 | else: 572 | removed_due_to_ambig += 1 573 | 574 | if removed_due_to_ambig: 575 | print(f"Removed {removed_due_to_ambig} of {len(fastas)} sequences with more than { args.max_ambiguous} ambiguous nucs.") 576 | 577 | return sequences 578 | 579 | 580 | class Sub(NamedTuple): 581 | ref: str 582 | coordinate: int 583 | mut: str 584 | 585 | 586 | def parse_sub(s): 587 | if s[0].isdigit(): 588 | coordinate = int(s[0:-1]) 589 | return Sub(reference[coordinate-1], coordinate, s[-1]) 590 | else: 591 | return Sub(s[0], int(s[1:-1]), s[-1]) 592 | 593 | 594 | def prunt(s, color=None): 595 | if color: 596 | cprint(s, color, end="") 597 | else: 598 | print(s, end="") 599 | 600 | 601 | def fixed_len(s, l): 602 | trunc = s[0:l] 603 | return trunc.ljust(l) 604 | 605 | 606 | def show_matches(examples, samples, writer): 607 | """ 608 | Display results to screen 609 | :param examples: list, dict for every variant reference genome with keys: 610 | ['name', 'NextstrainClade', 'PangoLineage', 'subs_dict', 611 | 'subs_list', 'subs_set', 'missings', 'unique_subs_set'] 612 | :param samples: list, dict for every query genome, same structure as above 613 | :param writer: csv.DictWriter, optional (defaults to None) 614 | """ 615 | ml = args.max_name_length 616 | examples_str = ','.join([ex['name'] for ex in examples]) 617 | 618 | if args.sort_by_id: 619 | samples.sort(key = lambda sample: sample['name'][:args.sort_by_id]) 620 | 621 | # set union of mutations in all example genomes 622 | coords = set() 623 | for ex in examples: 624 | for sub in ex['subs_list']: 625 | coords.add(sub.coordinate) 626 | 627 | if args.show_private_mutations: 628 | # append mutations unique to sample genomes 629 | for sa in samples: 630 | for sub in sa['subs_list']: 631 | coords.add(sub.coordinate) 632 | 633 | if args.primers: 634 | for name, primer_set in primer_sets.items(): 635 | for pool in primer_set.values(): 636 | for amplicon in pool.values(): 637 | if args.primer_intervals: 638 | # check if amplicon should be shown at all, or if it's outside primer_intervals 639 | amplicon_matches = False 640 | for interval in args.primer_intervals: 641 | if amplicon.overlaps_interval(interval): 642 | amplicon_matches = True 643 | break 644 | if not amplicon_matches: 645 | continue 646 | 647 | # check if enough of the actual amplicon range is shown to display its number 648 | name_len = len(str(amplicon.number)) 649 | matched_coords = 0 650 | for coord in coords: 651 | if amplicon.overlaps_coord(coord, True): 652 | matched_coords += 1 653 | if matched_coords < name_len: 654 | coords.update(range(amplicon.amp_start, amplicon.amp_start + name_len)) 655 | 656 | # make sure that every alt primer is shown for at least one coord 657 | # otherwise mismatches in the primary primer may look as if they 658 | # would not be compensated by an alt primer 659 | for primer in (amplicon.left_primers + amplicon.right_primers): 660 | if primer.alt: 661 | coords.add(primer.start) 662 | 663 | # if amplicon.number == 76: 664 | # coords.update(range(amplicon.start, amplicon.end + 1)) 665 | 666 | ordered_coords = list(coords) 667 | ordered_coords.sort() 668 | 669 | color_by_name = dict() 670 | color_index = 0 671 | for ex in examples: 672 | color_by_name[ex['name']] = get_color(color_index) 673 | color_index += 1 674 | 675 | # This method works in a weird way: it pre-constructs the lines for the actual sequences, 676 | # and while it constructs the strings, it decides if they are worth showing at the same time. 677 | # Then, if at least one such string was collected, it prints the header lines for them, and after that the strings. 678 | 679 | ###### SHOW SAMPLES 680 | current_color = 'grey' 681 | collected_outputs = [] 682 | last_id = "" 683 | 684 | for sa in my_tqdm(samples, desc=f"Second pass scan for {[ex['name'] for ex in examples]}"): 685 | #current_color = get_color(color_index) 686 | #color_by_name[sa['name']] = current_color 687 | 688 | prev_definitive_match = None 689 | breakpoints = 0 690 | definitives_since_breakpoint = 0 691 | definitives_count = [] 692 | regions = [] # for CSV output 693 | privates = [] 694 | start_coord = ordered_coords[0] 695 | last_coord = None 696 | 697 | output = '' 698 | 699 | output += fixed_len(sa['name'], ml) + ' ' 700 | for c, coord in enumerate(ordered_coords): 701 | if args.add_spaces and c % args.add_spaces == 0: 702 | output += " " 703 | if is_missing(coord, sa['missings']): 704 | output += colored('N', 'white', attrs=['reverse']) 705 | else: 706 | if sa['subs_dict'].get(coord): # sample has sub here 707 | matching_exs = [] 708 | for ex in examples: 709 | if ex['subs_dict'].get(coord) and ex['subs_dict'].get(coord).mut == sa['subs_dict'][coord].mut: 710 | matching_exs.append(ex['name']) 711 | 712 | text = sa['subs_dict'][coord].mut 713 | fg = 'white' 714 | bg = None 715 | attrs = ['bold'] 716 | 717 | if len(matching_exs) == 0: 718 | # none of the examples match - private mutation 719 | bg = 'on_cyan' 720 | privates.append(sa['subs_dict'].get(coord)) 721 | 722 | elif len(matching_exs) == 1: 723 | # exactly one of the examples match - definite match 724 | fg = color_by_name[matching_exs[0]] 725 | if matching_exs[0] != prev_definitive_match: 726 | if prev_definitive_match: 727 | breakpoints += 1 728 | regions.append((start_coord, last_coord, prev_definitive_match)) 729 | start_coord = coord # start of a new region 730 | 731 | if definitives_since_breakpoint: 732 | definitives_count.append((prev_definitive_match, definitives_since_breakpoint)) 733 | 734 | prev_definitive_match = matching_exs[0] 735 | definitives_since_breakpoint = 0 736 | 737 | definitives_since_breakpoint += 1 738 | 739 | elif len(matching_exs) < len(examples): 740 | # more than one, but not all examples match - can't provide proper color 741 | #bg = 'on_blue' 742 | attrs = ['bold', 'underline'] 743 | # else: all examples match 744 | 745 | output += colored(text, fg, bg, attrs=attrs) 746 | 747 | else: # sample does not have sub here 748 | matching_exs = [] 749 | for ex in examples: 750 | if not ex['subs_dict'].get(coord): 751 | matching_exs.append(ex['name']) 752 | 753 | text = dot_character 754 | fg = 'white' 755 | bg = None 756 | attrs = [] 757 | 758 | if len(matching_exs) == 0: 759 | # none of the examples match - private reverse mutation 760 | bg = 'on_magenta' 761 | 762 | elif len(matching_exs) == 1: 763 | # exactly one of the examples match - definite match 764 | fg = color_by_name[matching_exs[0]] 765 | if matching_exs[0] != prev_definitive_match: 766 | if prev_definitive_match: 767 | breakpoints += 1 768 | regions.append((start_coord, last_coord, prev_definitive_match)) 769 | start_coord = coord # start of a new region 770 | 771 | if definitives_since_breakpoint: 772 | definitives_count.append((prev_definitive_match, definitives_since_breakpoint)) 773 | 774 | prev_definitive_match = matching_exs[0] 775 | definitives_since_breakpoint = 0 776 | 777 | definitives_since_breakpoint += 1 778 | 779 | elif len(matching_exs) < len(examples): 780 | # more than one, but not all examples match - can't provide proper color 781 | #bg = 'on_yellow' 782 | attrs = ['underline'] 783 | # else: all examples match (which should not happen, because some example must have a mutation here) 784 | 785 | output += colored(text, fg, bg, attrs=attrs) 786 | 787 | last_coord = coord # save current coord before iterating to next 788 | 789 | # output last region 790 | regions.append((start_coord, last_coord, prev_definitive_match)) 791 | 792 | if definitives_since_breakpoint: 793 | definitives_count.append((prev_definitive_match, definitives_since_breakpoint)) 794 | 795 | # now transform definitive streaks: every sequence like ..., X, S, Y, ... where S is a small numer into ..., (X+Y), ... 796 | 797 | reduced = list(filter(lambda ex_count: ex_count[1] > args.max_intermission_length, definitives_count)) 798 | num_intermissions = len(definitives_count) - len(reduced) 799 | further_reduced = [] 800 | 801 | if len(reduced): 802 | last_ex = reduced[0][0]; 803 | last_count = 0 804 | for (ex, count) in reduced: 805 | if ex != last_ex: 806 | further_reduced.append(last_count) 807 | last_count = count 808 | last_ex = ex 809 | else: 810 | last_count += count 811 | if last_count: 812 | further_reduced.append(last_count) 813 | 814 | postfix = '' 815 | num_breakpoints = len(further_reduced) - 1 816 | if num_intermissions > args.max_intermission_count: 817 | postfix = '/' + str(num_intermissions) 818 | num_breakpoints += (num_intermissions - args.max_intermission_count) * 2 819 | num_intermissions = args.max_intermission_count 820 | 821 | output += f" {num_breakpoints} BP" 822 | if num_intermissions: 823 | output += f", {num_intermissions}{postfix} I <= {args.max_intermission_length}" 824 | 825 | if args.breakpoints.matches(num_breakpoints): 826 | if args.sort_by_id and args.sort_by_id != 999 and last_id != sa['name'][:args.sort_by_id] != last_id[:args.sort_by_id]: 827 | collected_outputs.append("---") 828 | 829 | last_id = sa['name'] 830 | collected_outputs.append(output) 831 | if writer: 832 | row = { 833 | 'sample': last_id, 834 | 'examples': examples_str.replace(' ', ''), 835 | 'intermissions': num_intermissions, 836 | 'breakpoints': num_breakpoints, 837 | 'regions': ','.join([f"{start}:{stop}|{ex.replace(' ', '')}" for start, stop, ex in regions]) 838 | } 839 | if args.show_private_mutations: 840 | row.update({'privates': ','.join([f"{ps.ref}{ps.coordinate}{ps.mut}" for ps in privates])}) 841 | writer.writerow(row) 842 | 843 | if len(collected_outputs) == 0: 844 | print(f"\n\nSecond pass scan found no potential recombinants between {[ex['name'] for ex in examples]}.\n") 845 | else: 846 | print(f"\n\nPotential recombinants between {[ex['name'] for ex in examples]}:\n") 847 | 848 | ###### SHOW COORDS 849 | 850 | for exp in range(5,0,-1): 851 | div = 10**(exp-1) 852 | 853 | if exp == 5: 854 | prunt(fixed_len("coordinates", ml + 1)) 855 | else: 856 | prunt(' ' * (ml+1)) 857 | 858 | for c, coord in enumerate(ordered_coords): 859 | if args.add_spaces and c % args.add_spaces == 0: 860 | prunt(" ") 861 | if coord//div > 0: 862 | prunt((coord//div)%10) 863 | else: 864 | prunt(' ') 865 | #print(f"{coord} // {div} = {(coord//div)}") 866 | print() 867 | print() 868 | 869 | ###### SHOW GENES 870 | prunt(fixed_len("genes", ml + 1)) 871 | 872 | current_name = '' 873 | color_index = 0 874 | current_color = get_color(color_index) 875 | text_index = 0 876 | 877 | for c, coord in enumerate(ordered_coords): 878 | for name, limits in genes.items(): 879 | if coord >= limits[0] and coord <= limits[1]: 880 | if current_name != name: 881 | current_name = name 882 | color_index += 1 883 | current_color = get_color(color_index) 884 | text_index = 0 885 | 886 | # Do this once or twice, depending on space insertion 887 | for i in range(1 + (args.add_spaces and c % args.add_spaces == 0)): 888 | char = ' ' 889 | if len(current_name) > text_index: 890 | char = current_name[text_index] 891 | cprint(char, 'grey', "on_" + current_color, end='') 892 | text_index += 1 893 | 894 | print(" ") 895 | 896 | if args.primers: 897 | ###### SHOW PRIMERS 898 | 899 | prunt('\n') 900 | for name, primer_set in primer_sets.items(): 901 | for index, pool in primer_set.items(): 902 | prunt(fixed_len(f"{name}, pool {index}", ml + 1)) 903 | 904 | for c, coord in enumerate(ordered_coords): 905 | char = ' ' 906 | for amplicon in pool.values(): 907 | 908 | if args.primer_intervals: 909 | amplicon_matches = False 910 | for interval in args.primer_intervals: 911 | if amplicon.overlaps_interval(interval): 912 | amplicon_matches = True 913 | break 914 | if not amplicon_matches: 915 | continue 916 | 917 | if amplicon.overlaps_coord(coord, False): 918 | char = amplicon.get_char(coord) 919 | if current_name != str(amplicon.number): 920 | current_name = str(amplicon.number) 921 | text_index = 0 922 | current_color = amplicon.color 923 | 924 | if args.add_spaces and c % args.add_spaces == 0: 925 | prunt(' ') 926 | 927 | if char == '-' and len(current_name) > text_index: 928 | char = current_name[text_index] 929 | text_index += 1 930 | cprint(char, current_color, end='') 931 | 932 | print(' ') 933 | 934 | print() 935 | 936 | ###### SHOW REF 937 | 938 | prunt(fixed_len("ref", ml + 1)) 939 | for c, coord in enumerate(ordered_coords): 940 | if args.add_spaces and c % args.add_spaces == 0: 941 | prunt(" ") 942 | prunt(reference[coord-1]) 943 | print() 944 | print() 945 | 946 | ###### SHOW EXAMPLES 947 | 948 | for ex in examples: 949 | current_color = color_by_name[ex['name']] 950 | prunt(fixed_len(ex['name'], ml) + ' ', current_color) 951 | for c, coord in enumerate(ordered_coords): 952 | if args.add_spaces and c % args.add_spaces == 0: 953 | prunt(" ") 954 | if(ex['subs_dict'].get(coord)): 955 | prunt(ex['subs_dict'][coord].mut, current_color) 956 | else: 957 | prunt(dot_character) 958 | print() 959 | print() 960 | 961 | for output in collected_outputs: 962 | print(output) 963 | 964 | print() 965 | cprint("made with Sc2rf - available at https://github.com/lenaschimmel/sc2rf", "white") 966 | print() 967 | 968 | def get_color(color_index): 969 | return colors[color_index % len(colors)] 970 | 971 | def read_mappings(path): 972 | with open(path, newline='') as csvfile: 973 | mappings = { 974 | 'by_clade': dict(), 975 | 'by_lineage': dict(), 976 | 'list': list() 977 | } 978 | reader = csv.DictReader(csvfile) 979 | line_count = 0 980 | for row in reader: 981 | if len(row['NextstrainClade']): 982 | mappings['by_clade'][row['NextstrainClade']] = row 983 | if len(row['PangoLineage']): 984 | mappings['by_lineage'][row['PangoLineage']] = row 985 | mappings['list'].append(row) 986 | return mappings 987 | 988 | def read_subs(path, delimiter = ',', max_lines = -1): 989 | with open(path, newline='') as csvfile: 990 | sequences = {} 991 | reader = csv.DictReader(csvfile, delimiter=delimiter) 992 | line_count = 0 993 | for row in reader: 994 | subs_dict = dict() 995 | missings = list() 996 | for s in row['substitutions'].split(","): 997 | s = s.strip() 998 | if len(s) > 0: 999 | sub = parse_sub(s) 1000 | subs_dict[sub.coordinate] = sub 1001 | 1002 | for m in row['missing'].split(","): 1003 | m = m.strip() 1004 | if len(m) > 0: 1005 | parts = m.split('-') 1006 | if len(parts) == 1: 1007 | missings.append((int(parts[0]),int(parts[0]))) 1008 | else: 1009 | missings.append((int(parts[0]),int(parts[1]))) 1010 | 1011 | sequences[row['seqName']] = { 1012 | 'name': row['seqName'], 1013 | 'subs_dict': subs_dict, 1014 | 'subs_list': list(subs_dict.values()), 1015 | 'subs_set': set(subs_dict.values()), 1016 | 'missings': missings 1017 | } 1018 | 1019 | line_count += 1 1020 | if max_lines != -1 and line_count == max_lines: 1021 | break 1022 | return sequences 1023 | 1024 | def is_missing(coordinate, missings): 1025 | for missing in missings: 1026 | if coordinate >= missing[0] and coordinate <= missing[1]: 1027 | return True 1028 | return False 1029 | 1030 | 1031 | def calculate_relations(examples): 1032 | """ 1033 | 1034 | """ 1035 | for example in examples: 1036 | union = set() 1037 | for other in examples: 1038 | if other is not example: 1039 | union = union | (other['subs_set']) 1040 | example['unique_subs_set'] = example['subs_set'] - union 1041 | unique_count = len(example['unique_subs_set']) 1042 | color = None 1043 | if unique_count < 5: 1044 | color = "yellow" 1045 | if unique_count < 3: 1046 | color = "red" 1047 | vprint(colored(f"Clade {example['name']} has {len(example['subs_set'])} mutations, of which {unique_count} are unique.", color)) 1048 | 1049 | 1050 | class ArgumentAdvancedDefaultsHelpFormatter(argparse.HelpFormatter): 1051 | """In contrast to ArgumentDefaultsHelpFormatter from argparse, 1052 | this formatter also shows 'const' values if they are present, and 1053 | adds blank lines between actions. 1054 | """ 1055 | 1056 | def __init__(self, 1057 | prog, 1058 | indent_increment=2, 1059 | max_help_position=24, 1060 | width=None): 1061 | 1062 | global width_override 1063 | 1064 | if width_override: 1065 | width = width_override 1066 | 1067 | super().__init__(prog, 1068 | indent_increment, 1069 | max_help_position, 1070 | width) 1071 | 1072 | def _get_help_string(self, action): 1073 | help = action.help 1074 | if '%(default)' not in action.help and not isinstance(action, argparse._StoreConstAction): 1075 | if action.default is not argparse.SUPPRESS: 1076 | defaulting_nargs = [argparse.OPTIONAL, argparse.ZERO_OR_MORE] 1077 | if action.option_strings or action.nargs in defaulting_nargs: 1078 | if action.const: 1079 | help += ' (default without flag: %(default)s, default with flag: %(const)s)' 1080 | else: 1081 | help += ' (default: %(default)s)' 1082 | return help 1083 | 1084 | def _format_action(self, action): 1085 | return super()._format_action(action) + '\n' 1086 | 1087 | 1088 | if __name__ == '__main__': 1089 | main() 1090 | --------------------------------------------------------------------------------