├── LICENSE.md ├── Makefile ├── README.md ├── build ├── program └── src │ ├── MurmurHash3.o │ ├── MyBloom.o │ ├── Rambo_construction.o │ ├── bitArray.o │ ├── main.o │ └── utils.o ├── data ├── 0.zip ├── ArtfcKmersToy100.txt └── artificialKmer.py ├── include ├── MurmurHash3.h ├── MyBloom.h ├── Rambo_construction.h ├── bitArray.h ├── constants.h └── utils.h └── src ├── MurmurHash3.cpp ├── MyBloom.cpp ├── Rambo_construction.cpp ├── bitArray.cpp ├── insertBloomfilter.cpp ├── main.cpp └── utils.cpp /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # declare variables 2 | 3 | CXX := g++ 4 | CXXFLAGS := -Wall -fopenmp -std=c++11 -O2 5 | INCLUDE := -Iinclude/ 6 | BUILD := ./build 7 | TARGET := program 8 | 9 | SRC := \ 10 | $(wildcard src/MurmurHash3.cpp) \ 11 | $(wildcard src/Rambo_construction.cpp) \ 12 | $(wildcard src/bitArray.cpp) \ 13 | $(wildcard src/utils.cpp) \ 14 | $(wildcard src/MyBloom.cpp) \ 15 | $(wildcard src/main.cpp) \ 16 | 17 | 18 | OBJECTS := $(SRC:%.cpp=$(BUILD)/%.o) 19 | 20 | all: clean build $(BUILD)/$(TARGET) 21 | 22 | $(BUILD)/$(TARGET): $(OBJECTS) 23 | @mkdir -p $(@D) 24 | $(CXX) $(CXXFLAGS) $(INCLUDE) $(OBJECTS) -o $(BUILD)/$(TARGET) 25 | 26 | $(BUILD)/%.o: %.cpp 27 | @mkdir -p $(@D) 28 | $(CXX) $(CXXFLAGS) $(INCLUDE) -o $@ -c $< 29 | 30 | .PHONY: all build clean debug release 31 | 32 | build: 33 | @mkdir -p $(BUILD) 34 | 35 | debug: CXXFLAGS += -DDEBUG -g 36 | debug: all 37 | 38 | release: CXXFLAGS += -O2 39 | release: all 40 | 41 | clean: 42 | -@rm -rvf $(BUILD)/* 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # RAMBO Fast Processing and Querying of 170TB of Genomics Data via a Repeated And Merged BloOm Filter (RAMBO) 3 | RAMBO is a method to reduce the query cost of sequence search over the archive of dataset files to address the sheer scale and explosive increase of new sequence files. 4 | It solves achives sublinear query time (O(\sqrt{K} log K)) in number of files with memory requirement of slightly more then the information theoretical limit. 5 | 6 | This code is the implementation of: 7 | [https://dl.acm.org/doi/10.1145/3448016.3457333](https://dl.acm.org/doi/10.1145/3448016.3457333) 8 | for gene sequence search. 9 | 10 | If you use RAMBO in an academic context or for any publication, please cite our paper: 11 | ``` 12 | @inproceedings{10.1145/3448016.3457333, 13 | author = {Gupta, Gaurav and Yan, Minghao and Coleman, Benjamin and Kille, Bryce and Elworth, R. A. Leo and Medini, Tharun and Treangen, Todd and Shrivastava, Anshumali}, 14 | title = {Fast Processing and Querying of 170TB of Genomics Data via a Repeated And Merged BloOm Filter (RAMBO)}, 15 | year = {2021}, 16 | isbn = {9781450383431}, 17 | publisher = {Association for Computing Machinery}, 18 | address = {New York, NY, USA}, 19 | url = {https://doi.org/10.1145/3448016.3457333}, 20 | doi = {10.1145/3448016.3457333}, 21 | pages = {2226–2234}, 22 | numpages = {9}, 23 | keywords = {information retrieval, bloom filter, genomic sequence search}, 24 | location = {Virtual Event, China}, 25 | series = {SIGMOD/PODS '21} 26 | } 27 | ``` 28 | 29 | Step 1: data download 30 | Requirement: 31 | Install latest GNU parallel OS X: 32 | run: 33 | ``` 34 | brew install parallel 35 | ``` 36 | 37 | Debian/Ubuntu: 38 | run: 39 | ``` 40 | sudo apt-get install parallel 41 | ``` 42 | 43 | RedHat/CentOS: 44 | run: 45 | ``` 46 | sudo yum install parallel 47 | ``` 48 | 49 | Install wget and bzip2 50 | 51 | Install cortexpy Refer to this installation [document] (https://cortexpy.readthedocs.io/en/latest/overview.html#installation) 52 | 53 | run: 54 | ``` 55 | unzip data/0.zip 56 | sh data/0/downoad.sh 57 | mkdir -p results/RAMBOSer_100_0 results/RAMBOSer_200_0 results/RAMBOSer_500_0 results/RAMBOSer_1000_0 results/RAMBOSer_2000_0 58 | ``` 59 | 60 | In the end we need to execute commands from 0_1.txt > 0_2.txt > 0_3.txt for the 100 files. 61 | 62 | Step 2: ensure all 100 files are present in data/0/inflated/ 63 | 64 | Step 3: create test set 65 | run: 66 | ``` 67 | python3 artificialKmer.py 68 | ``` 69 | 70 | Step 4: Set parameters and run code 71 | number of sets in line 7 of include/constants.h 72 | m, B and R in line 29-31 of src/main.cpp 73 | run: 74 | ``` 75 | make 76 | ./build/program 0 77 | ``` 78 | -------------------------------------------------------------------------------- /build/program: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gaurav16gupta/RAMBO_MSMT/2ec69b7318ae165f98a3c018b9c8fc4c3a4d9499/build/program -------------------------------------------------------------------------------- /build/src/MurmurHash3.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gaurav16gupta/RAMBO_MSMT/2ec69b7318ae165f98a3c018b9c8fc4c3a4d9499/build/src/MurmurHash3.o -------------------------------------------------------------------------------- /build/src/MyBloom.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gaurav16gupta/RAMBO_MSMT/2ec69b7318ae165f98a3c018b9c8fc4c3a4d9499/build/src/MyBloom.o -------------------------------------------------------------------------------- /build/src/Rambo_construction.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gaurav16gupta/RAMBO_MSMT/2ec69b7318ae165f98a3c018b9c8fc4c3a4d9499/build/src/Rambo_construction.o -------------------------------------------------------------------------------- /build/src/bitArray.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gaurav16gupta/RAMBO_MSMT/2ec69b7318ae165f98a3c018b9c8fc4c3a4d9499/build/src/bitArray.o -------------------------------------------------------------------------------- /build/src/main.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gaurav16gupta/RAMBO_MSMT/2ec69b7318ae165f98a3c018b9c8fc4c3a4d9499/build/src/main.o -------------------------------------------------------------------------------- /build/src/utils.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gaurav16gupta/RAMBO_MSMT/2ec69b7318ae165f98a3c018b9c8fc4c3a4d9499/build/src/utils.o -------------------------------------------------------------------------------- /data/0.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gaurav16gupta/RAMBO_MSMT/2ec69b7318ae165f98a3c018b9c8fc4c3a4d9499/data/0.zip -------------------------------------------------------------------------------- /data/ArtfcKmersToy100.txt: -------------------------------------------------------------------------------- 1 | GTTGTTTATA;62 2 | CGCATAGACC;69,62,13 3 | CGGAGCATGA;25 4 | GCCCGGATAC;95 5 | CAGACAATGT;13 6 | CAGCCGGACG;58 7 | CCACCAACTT;61 8 | GAAACTTCGT;3 9 | TTTATAGTTC;56 10 | CAGACCCTTA;46 11 | GATCGCCGAC;5 12 | GTAGGGTACC;39 13 | TAGATAGCTG;17,51 14 | GTCTCTCGCT;94 15 | GTGTCACCCC;19 16 | ATGGAGCTCC;32,0 17 | ACACGGAATC;93 18 | TGCACTGTCA;34,66 19 | GGGCTGTCAC;85 20 | CCCCCCATAT;16,5 21 | CCTGCGGCCT;35 22 | GTTCCATCCG;98,98 23 | AGTGCCACCG;50,58 24 | ATTCGCCTTG;0,7 25 | CCCGATTGAC;56,53 26 | TGATTAAATT;15 27 | CTTCTGACCG;46,31 28 | GACTGAGACG;27 29 | GAGGATTTTT;90,24 30 | GCGCAACCCA;37 31 | CTTCTGATCT;19,51 32 | CCTCAACTGA;93,56,42,21 33 | TGTTCTTTCT;51 34 | GAAGCCTGGC;32,25,74 35 | CCGTGGAGTA;60 36 | TTCTCTTAGA;51 37 | TTCACTCCAT;64 38 | GTGATTACCA;63 39 | TAGGGGGAGT;43 40 | TATTATGACT;90,0 41 | AATCTACCCG;3 42 | TTCGTCTAGA;49,85 43 | ACTCGTCCAT;60 44 | GCCACTGGGA;27,62 45 | TAGAGCTCGA;66 46 | GTATTGTTAA;92 47 | CCGACAAATC;83 48 | TGCGCGTCAA;17 49 | ACTAAACTTT;19,92 50 | TTCCTAGTAC;70,68 51 | GTGGTCGGGA;59 52 | ATCCCCTCGC;30 53 | ATAACGATCA;63,25 54 | CAACCAACGG;58 55 | CTTAACCCGT;69 56 | GTACAACAGT;4 57 | CCTAGCTCAT;49 58 | CGCCTTCACA;70 59 | CATCTGGAAA;35,68 60 | GACTAGCAAT;4 61 | TTGGTGAGGA;74,34,28,92,31,27 62 | CAGCGATACG;80 63 | CTCGAGGGGC;19,77 64 | ATTATTCTGC;31 65 | TCGGGCGGTG;82 66 | CTCTTATTTA;53 67 | TTTAGACAAC;36,37 68 | ACGGATTGTT;46 69 | ACACTGTTCC;28 70 | ACAATAGCGA;1 71 | CGTCTATATC;39 72 | CGGACCCCCA;13,9 73 | GCGGCGGCTG;19 74 | GGCCTCAGGG;48,3 75 | TGCGGGGCAT;64,44 76 | GACACTGCAC;39,34,89 77 | CGGTCGGGTA;5 78 | CTTGAATTGC;39 79 | GGAATAGTCG;63 80 | GTGATGGTGG;79 81 | TGGATAGGGT;68 82 | ATGCATAGCA;12 83 | GTTTCGATAA;88 84 | CGCAGTACGA;93 85 | GGGATTGCGG;74 86 | ACAGCGTCCA;33,81 87 | TCGTATCGCA;65 88 | CTATATACTA;90 89 | AGCTGAACCA;95,67,86,86 90 | TACCCGCTAT;74 91 | TTATGGCCGA;6 92 | GAAACATGGC;35,37 93 | TGAGGATAAT;77 94 | TTGGGCAAGA;32,61 95 | AATAAGGCCC;40 96 | TGAGAAAAAG;61,60,74 97 | GGTGTCATGG;48 98 | GGGACCACGA;81 99 | TTCTAATGCG;81 100 | TGCTGCTACA;93,91 101 | CCGGGCCCGA;48 102 | GGGTGGCTTT;22 103 | CCGGGGTGCG;79 104 | CATTCGATAT;98 105 | TTGCCCGATC;41 106 | ATCACAAAAT;39 107 | TGATGGAAGC;55 108 | CCGCCGGTAC;14 109 | ACTCACTAGG;58 110 | ATATCCGCAA;59,43 111 | CCCGACACCC;66 112 | ACTATATGGA;73 113 | ACCAGCTTAC;45,47 114 | GATCTCGATG;68,62,47 115 | GTAGGTTGAT;49 116 | AACTTTGGCT;64 117 | AAAGTTTCCT;30,45,85 118 | CATTGTGTGA;30,98 119 | TGCGGCATTA;31,6,82,30,47 120 | GTTCGAGTGC;17 121 | AACTAGTGGA;61 122 | AAATATAGTG;62,81,62 123 | TCTCCCTCAA;48 124 | GCACCGTCCC;90 125 | AATGTGGCGT;45,91,22 126 | GCCGCTCGGG;33 127 | GTCGTTCTCT;46 128 | TTACACAGTG;65 129 | AGAGTGGAGA;95 130 | TGCAGCATCG;53 131 | CTGAAAGTTC;64,28,79 132 | CGTATTCTAG;89 133 | AACTGTGGTA;88 134 | TAACGCCTGG;4 135 | ATCCTACTGC;31 136 | CAGGCTTAGT;38,1,8 137 | TCACACCGCG;79,28 138 | TTAGTGTGTG;10,81,50 139 | TCTCTTTGAG;28,94 140 | AAGACACTCC;62 141 | CTTTACGACC;31 142 | AATATGTGTG;89,93,83 143 | AGGACAACAG;8,75 144 | GTTGCTCGCG;85 145 | CAGGGGGTAG;33 146 | AAATACGATC;90 147 | ACAGCTGCGA;8,93 148 | TAAATCTCTG;98,21 149 | CCTCTTTTGA;45,2 150 | AATTATCATC;75,55,99,62 151 | TGCCGTAACA;56,29,30 152 | TGTCGTCTTG;27,70 153 | CGTACATCAT;79 154 | GAACTGCAGT;68 155 | GGTATCTGCC;28,36 156 | TTCCTTTCAC;71,38 157 | TCGAGGCTAA;86 158 | GCTACCTTCC;24 159 | AGAGTCAACC;89 160 | CCACACGTTT;20 161 | GAAGATTTGT;9 162 | CATTCGTTCG;77 163 | CCGCTGTAAC;30 164 | TACGTATTAC;5,17,72,64,30 165 | CATTAGACGC;97 166 | ACATTACGTT;34 167 | ATAGGGTGGG;30,97,7,46 168 | CCGTCATTTT;96,73 169 | AACAACGCCA;25 170 | GCAGATAGTG;47 171 | TGCTTGGGAT;59 172 | CACCTCTGGG;53,5,17 173 | TTTATACACT;32,38 174 | ATTTCGGAGG;34,83 175 | GATTGCGGAT;56,40 176 | GCGCAACCAA;21 177 | CCCTTGGGTA;78,69,38,38,56,19,16 178 | GGTAGAAACG;35 179 | TCTTCCGCCG;38 180 | AGCGCCTAGT;0 181 | CTAACACAAA;85,9 182 | GACCACGAGC;68 183 | ACATCTCTTG;88 184 | GCGCTTCGTG;75,50,82 185 | GAAAGATCTG;78,19,7 186 | GCTACCGAAT;92,86 187 | ATGCAACCAA;3,67 188 | CTTATGCTGG;44 189 | CAGTCTAAGT;99,8 190 | CTACTCATCA;32,22,50,71,0 191 | TGATCTGTGA;61 192 | GCTCTAATAA;71 193 | ATCCCTCTTC;98,32 194 | GGGTTTGAGC;14 195 | AGAATTAACT;46,81,95,55,89 196 | AGACGCTTCC;80 197 | ACTGTCGGAA;87,84 198 | GAGGGGTAAC;4,37,50 199 | GTTCGTTGTT;97 200 | TTTCGCCGCT;33 201 | CCTGCGACCT;41 202 | ATGTCGTTAT;48 203 | GATTGGGAAG;73 204 | CTTATCGGCC;19,86,12 205 | TATCCACGTA;85 206 | CCCCCTGAAG;72 207 | CCCGAGCGTT;58 208 | GACTCCTCTA;72,33,23,50 209 | TCTAGTAGCC;10,49,32,24 210 | TCAATACGTC;11,85 211 | AGTTAGCGCG;55 212 | ACCTTGTAGC;68 213 | AAACCGGTGG;57 214 | GATAGCATCA;31,17,4,16 215 | AATCATGCGT;69 216 | CGATCGCCCT;38 217 | ATCTCGGTGA;75 218 | AGAGTGTGGG;59,53,64 219 | CCAATGTGAG;35 220 | TCGCGAAGTT;35 221 | AACAGTGTGA;75,6 222 | GGAGCTCCCA;53 223 | ATTTTCCGAC;78 224 | CTAGAGTATC;10 225 | ACGCTGCACG;28,47 226 | GACACATGCC;89 227 | TGCGATCCTA;26 228 | TACCGGGGAA;14,50 229 | GAAAATGTTT;26 230 | GCAGGTACGC;14 231 | CTGCGTCGTC;48,64,40,23,72 232 | CGTTACAGGG;27,12 233 | ATGGGTAAAC;14 234 | CTTACGGTCC;8 235 | TACATGAAGC;81 236 | TTCACCTAGC;61,48 237 | GCTCCGGAGC;17,42 238 | CATCCTTCCT;36 239 | TGGGCGGTGA;18,56,85 240 | GAATCCGTAG;84,98 241 | TGGCACCCGT;14,34,74 242 | CCCGACAGCT;26 243 | AGTTAGCTCC;26 244 | AGACCCGCAT;94 245 | TTAGAGATGC;23,1,42 246 | GCCCCTCTCT;74,99 247 | ACCCTGTGTG;52 248 | ATAACCAGCG;26 249 | TTAGTGCTCC;57 250 | CGGTATTACA;5 251 | CCCAGGGGCG;93,21,53,58 252 | GCCATCGGAT;28 253 | GTACATGCAG;38,68 254 | ACCCGGGAAC;67 255 | AGGTGGGCAC;8 256 | CCACACTGCA;0 257 | GGAGGGGGGT;32 258 | AGCGGGTGGC;74 259 | CCGGGACACG;1 260 | ACTGGCCATG;51 261 | TTTGTAGCCC;67,83 262 | CCGTGTCATG;1 263 | CTGACCTAGC;14 264 | GTATTACGTT;85 265 | ACTAGGTGAT;44,48 266 | AAAAACGCGC;99 267 | GCGTATAGAT;67 268 | ATCCTATATG;0 269 | CAGTGCTGAA;67,69,10,84 270 | CACTCGCGGC;28 271 | AACCCTGCGT;17,66 272 | CTTTGTAACG;45,60 273 | AAACGCGCAT;31 274 | TTGAGTGAAT;12,21 275 | CCGCTGCTAT;79,82 276 | GCCTCGGGTT;21 277 | GCTTAAAAGA;85 278 | GGGGCATGGC;0 279 | CGACGTAACG;2,85 280 | TCGTCCAATG;73 281 | CTGAGCAACT;34,10,48 282 | GTTCTGTGGA;64 283 | ACATCTAAAG;42,40 284 | GGATCACATT;8 285 | CAACCATTCT;15 286 | TAAGATGATT;67 287 | CGTACATCGT;71,73,71,55,92,60 288 | TGGCCACCTA;86,57 289 | TACGGCTGAG;45 290 | AACTGTTACG;69,99,18,28 291 | TGCGCTTATA;0 292 | TCTCTCAGTG;93,16,57 293 | TCGCGCATTT;96 294 | GTCCCCAAGA;54 295 | AGTGTGTGAG;49 296 | GCTGAGCCAA;5,80 297 | GCCAGATTCA;85 298 | CATTCTCCAG;21 299 | CAGGCGAGTA;2 300 | CAACTGATCG;52 301 | ATAAGACAAC;79,43 302 | AACATTTTTC;92,32 303 | CGGTTTTTTG;12 304 | AGTGCGTTCT;25 305 | TGTTAACAGC;16 306 | CACGCAGCTC;52 307 | ATATCATATC;78,74 308 | TACTATCGCG;87 309 | TAGGACTACT;4,89 310 | ACGAAACGTT;74 311 | TATGGTCCTC;42,38 312 | CCATACCGTG;86,72,61 313 | TGTGTCGCGT;83 314 | GCCAAATTAC;78,90 315 | ACTCACATTC;55,32 316 | AATTACCCAG;98 317 | CATCACTCCT;8,98,16,59 318 | TCGAGCTGGT;24 319 | CATGAATGGG;80,46,24 320 | GGGAATTGAT;61,28,82 321 | ACGCGAGGCC;49 322 | CCCGATCAGG;75 323 | TGCAATGCCC;44 324 | AAGCAGATGA;15 325 | ATAGCTAACA;94 326 | TCTGTGACCG;36,89,49 327 | GCACTCCGTG;51,96 328 | CAATGAACCG;90 329 | CAGGCCCATG;9 330 | TTGCTAAAAC;94,31,87,15 331 | CGGGTGGAGA;50 332 | GGTCAGTCAG;11 333 | TCGGTTGGAA;96 334 | TACATAAACT;3 335 | GACAGTGTTA;84 336 | TTCAGTTACC;79 337 | CTTAGGTTTA;68 338 | GCTATGCACC;23 339 | CGGGATTTCT;8 340 | TAACACGTGT;82 341 | CACGCGGCAG;59 342 | AACTACTAGC;0 343 | TCGCGCACCT;20,28,89 344 | GATTGCGAGT;40 345 | GGGCATGCAT;42 346 | CGAAATCGAC;35,43,73 347 | CACTCAATCC;22 348 | AGAGGATTCC;47 349 | TCTGACCATG;1 350 | TGGATTCCGG;13 351 | CCCATATCCT;17 352 | GTGCCACGCG;37,60 353 | ACACGACTTA;23,39 354 | TTGCGATTGA;42,52 355 | CACGATCGGA;50 356 | GCCTGCTAAC;18,87 357 | GAGGAATAGC;80,74,82 358 | CTCCGCGAGA;66 359 | TGCCGCGTGC;62,84 360 | CAGCGTGACG;35,66,44,82 361 | AAGACGCAGA;29 362 | CAAGTAGCGT;95 363 | GAAAGTTAGA;86 364 | GCCCTAGTGC;59,54,77,33 365 | GCCGCTCAGG;14 366 | TCCTTGAGCA;45,0,2,0 367 | AAAAGCATCA;88 368 | TGGCAACACC;47 369 | ATCGCTTAAT;12,7 370 | CCCATGCGTT;29 371 | CTAGATGCGT;55 372 | ACACTGCCGC;92 373 | GGAATAATGG;26,12,53,25,49 374 | TGGCACAACG;95,36 375 | AGATTCACGG;31,62 376 | GGTTCAAGGG;50,81,2,84 377 | CCGCTGTGGA;73 378 | CAAGCCCCGC;95,4 379 | GGTTAGGTCG;35 380 | ATTACTTAAA;49,85,65,4,28 381 | CAACACGTGA;11 382 | AAACTAATAT;45 383 | GTTAGGTCTC;18 384 | CATTCGACGA;98 385 | GCACACGCTG;6 386 | TATGATCTTA;37,31 387 | CCTTTTCGAG;43,85 388 | AAAAGTGATG;78,7,17,7 389 | CCTCTCCTTC;87,38 390 | AGACAAATCG;15 391 | ACAACACGAT;32 392 | GATCTATGGG;35 393 | GCTACTGCGA;14,22,64 394 | GCTGCCAAGT;9,82,98,93 395 | TCTATGCCTT;84 396 | TTACCACCCG;47 397 | AGGCGTCGGC;2,13,98 398 | CGAGACGCTC;14 399 | CCGTAGTCGA;18,81 400 | ACTGTCTACG;43 401 | CATAGCTACA;12 402 | TGTTCAGTAG;84 403 | AGTTGGTTGA;45 404 | TTTTTTTGCC;77,65 405 | AGTCTACCCT;37 406 | GCTCAGACTT;32 407 | CGAAAAACGC;12,8 408 | GCATGAGGTC;71,22 409 | TTCATGATCA;97,15 410 | TCGTGCCAGG;18 411 | CAAACACCCC;89 412 | TCTGTGGGCG;45 413 | ATGCGCGAGG;75,99 414 | GGATCTGAGA;19 415 | TGAATCCTTA;60,82 416 | GCTTTCGAGG;66,40,8,70,25 417 | CGGCTTGGGG;98 418 | TCGTTAGTGT;47,28 419 | TATAGAGCTC;34 420 | TGGCCGTGCC;52 421 | GTATGTGGTT;73 422 | TCTCATCCCT;1 423 | CGTAGGCTGA;28 424 | TTATGTTCCT;21 425 | TGGTTTTCGG;41 426 | TCGGGTGACG;57 427 | TGTGACCTGT;72,16 428 | CGGCCCATAC;1,77 429 | TATATCGCAG;61 430 | TGGTAAGGGA;77 431 | CAAAATGTGT;44,3 432 | TCGGGGTACA;16,5 433 | TGTACACGCC;55 434 | CCACCTGGTT;15,97 435 | CTCAATGACT;25,31 436 | GAGATGTTCT;17,22 437 | CTCAGCCAGC;7 438 | TTAACACTCG;53,95,15,58,10 439 | CTGCGTATCG;82 440 | GGAGTAATGA;28,48,28 441 | TCGAATCGGA;6,73,4,84,24 442 | GGAGCAAATT;26 443 | GATTTAAACG;43 444 | GAGATTTTTG;14 445 | AAATGCCTAC;64 446 | GAAGTCATTT;70 447 | ACCGCACTGG;41 448 | GTTACATCCC;44,67,13,56 449 | ATAGAGTCAC;44 450 | GATTTCTCGC;78 451 | GTTGCGGACT;49 452 | AGGATGTTAA;6 453 | CAATCCGGCA;54 454 | TCGAATATTG;24 455 | CGCTAAGATA;84 456 | CAGAATGGTA;34 457 | ATCTCGGGGC;65 458 | CGTATCTTTG;23,83 459 | ATCGTTCTCG;6,62 460 | CTAGTAAACT;30 461 | GTGTATTAAT;38 462 | CTAACAGCCA;6 463 | ATCCTACGGC;22,82 464 | CGTCGTGTGA;15 465 | GGGTTGGCAT;58 466 | AGACACTGGG;60,55 467 | CCCGTGTAGG;44 468 | TGGGACCTGG;82,69,71,50 469 | ACCGTGCTTA;60 470 | TCTGTTGCTA;90 471 | AGAGGCAGAC;78,93 472 | TTCCAGGACT;14 473 | GGATCTATCC;76,11 474 | TCTATCACCG;32,65,57 475 | ATTACTACTA;59 476 | ATATCATGGG;99 477 | GCTTGAGGAA;91 478 | GCTACTGGTT;80,45,69 479 | GTGTTCCGGC;33,76 480 | CCCCTGAAAA;28 481 | ACCAAATACC;37 482 | AGCTCCCTTC;15 483 | ATCTATTAAC;0 484 | TTACGAAACT;91,52 485 | CCGCGCTGCG;65 486 | TCCGGGTGAT;22 487 | GAAGTGAAAG;89 488 | TGCGCCGGCC;68,22 489 | TAAGGGTGTG;13 490 | CGACTCGACA;77 491 | TATTCCGCGT;3,25 492 | GTTTTTAGGT;11,12 493 | GCCCGGCTTG;6,58 494 | ATCGCTCCTT;3,64,83,64,91 495 | GCCTGTTTTA;49 496 | CGTTCCCCCA;83 497 | CTCCGAAATG;37,45 498 | TGATTCACGC;28,61,37 499 | GCCCTCGCCC;55,71 500 | TGTGATTACC;77 501 | TGCGCTCTAG;65 502 | TAATTAAGCT;97 503 | TGAAAACGAC;22,15,94 504 | ATACAGTCGC;41,16,0 505 | AGTACTGGAG;61 506 | CGGCGTGACC;58 507 | TAGCTACGAC;31 508 | CCCCGTGATA;60,2 509 | GGATCCCGTA;31 510 | ACTTCCCACC;23,75 511 | AGGGCCTAAG;39 512 | GTCCAATGTC;24 513 | CTGACACAGT;26,50 514 | GGCGCCTGCC;12 515 | CCTACTGTAG;75,27 516 | AGGTCTTAAC;6 517 | CAGGATATTG;3 518 | CGTTAAACGC;9 519 | GTGCGCCGTT;4 520 | GTGCTGCCTG;18 521 | GGTTCTTTCG;61,3,73 522 | GGATTGCGTA;1,57 523 | CGAGCCCGAC;38 524 | TAAAATCCAC;37,1 525 | GAATATGACA;68 526 | CTCAGGGCCC;73 527 | TTTGGTACCA;81 528 | GATGGGCATG;61,52 529 | ACTAGCAATA;21 530 | TAGTAGTCGT;44,73,66 531 | CTACAGGCCT;77,37 532 | CGTGTGTGCA;6 533 | TCTGTCCCAA;66 534 | TGCGATACCG;74 535 | TTTTAATCAA;16,60 536 | CAGCTGTGGG;89,77,55 537 | GGCACCATTT;27,15,88,97 538 | TTTAATAGGC;76 539 | TCCCAACTCA;60,30 540 | ACCAGGAAGG;6,21,64 541 | CTGGAGTCGC;23 542 | GACATGTCAG;25 543 | CGCATGAAAG;74,14,91,27 544 | CATTGTAGTG;29,94 545 | AGCTCCAGAA;36 546 | CACAACACTG;25 547 | GCTCCAACTC;39 548 | TGCCGGCTGG;51,66 549 | AAGATATCCA;31 550 | TGAACAGCAG;99 551 | ATATTTGTGT;29,7,84,26 552 | CTAGCCGAGA;40,6 553 | GGATAGGCCT;93 554 | CGATAGAAAA;0 555 | TGTCCCGCCA;70 556 | CTTTTGGCGG;43,7,24 557 | AGCGGCCAAG;16 558 | GGACTAACCA;12,52 559 | ACGAGTCCGT;80 560 | GTTGATCCGG;83,85,92,7 561 | GGCACCACAT;49,6 562 | GTGATTGCAC;82 563 | CGGGATCTCC;75 564 | TCAATGAGCT;3 565 | GTACTTGCTT;59 566 | TTTTCTATGA;9 567 | ATCTAGCGAT;74 568 | GCATACCTTC;5 569 | GTATCGGTTG;92 570 | AGGAACTTAA;49,35 571 | AACGTGGGTG;78 572 | TGGTTTTGTC;37,93 573 | ACACCTTGCC;76 574 | ACTGGTCCCA;42,91 575 | CTAAGGAAGT;67 576 | TAGTCTACAA;88,75 577 | GCAACGAAGG;64 578 | CAACCGAAGT;9 579 | GGCCACTGTG;17 580 | CACCAATTTC;9,16 581 | TTGCAGAAGT;4,13 582 | TAATTCGCAA;21 583 | CCTTCTAGCT;85,86 584 | ATTTCAAAAC;32 585 | CAAATCGCCG;84 586 | TTGGTTACCC;15 587 | TTAGTCAGCG;47,15 588 | CTGGTTTACA;43 589 | TCTTCATGGT;69 590 | ACGTGATGAT;50 591 | AGGTCGCAAA;13 592 | CGAGGTGTCG;26,96,86 593 | CGGCTTCGAA;39,80 594 | GTCAAGGATG;84 595 | CGGCTGACAA;71 596 | TTTTCGACCA;40 597 | ACTGACTAAT;66 598 | GTCGAAAAGG;15 599 | TGAAAACTCG;25 600 | CGTTGTCGAT;13,68 601 | GTGATCCTAA;4,57 602 | AAGCCCACGT;26 603 | CAGTAAAATA;59 604 | GGAGTCAATC;15,0 605 | AAGTACAGTC;58 606 | CACAGGTAGC;83,61,96 607 | CTCCAGACTT;10 608 | CGCATGTTCC;10,89 609 | GCGCCAACAC;13,36 610 | ACCTCTTTTT;36 611 | TAATGATAAA;42 612 | ACCGAGTGAA;87,29 613 | AATGAGCTGC;86 614 | AACATTTGGA;95,6 615 | CTCGGTACTT;17,70 616 | AGCCACACAT;82,81 617 | AGGGGGAACT;45,51 618 | ACACTCTGCT;78 619 | ATGTCGCAAG;61,67 620 | CAAGTCGATT;5 621 | TTCGTCTGAG;48 622 | ATTTAGCCGT;11,50 623 | TAGAAGTCAT;35 624 | CACTATGACC;30,74,52 625 | GTCCCGGGAC;72,5 626 | TGATGAACTC;61 627 | TAACTGTCAG;3 628 | CTAGTACAAG;78,31 629 | TGCTTATGCA;77 630 | AGCCTCTATC;5 631 | CCATGGCGTC;39,91 632 | GCTTTAAAGG;2,1,8 633 | TCGAGCACGG;49 634 | TCCCGTTCTA;62 635 | ACTCAGTCGC;84 636 | ATATGAGGCA;1,49 637 | GGGTGCCATT;94 638 | CCTTACTATG;97 639 | AGTCTCTAAG;49 640 | CGCTCACGTT;24,95,25,76 641 | CCGAAGTAAA;73,2,7,17 642 | GCCTCCGGCT;79 643 | CGTGACCTCG;79,11 644 | CCGATTTATG;58,10 645 | ACAGCTGAGT;23 646 | CGATGGGCCC;80 647 | CTTCGCCTTG;74 648 | ACTTAGCGGT;83 649 | AACAGTGGTG;30,20 650 | CTCCTATTAA;31,83 651 | CCCTCGATAC;58 652 | AGACGCAGGG;98,67 653 | AAACCGTCGT;14 654 | CCAGTCATTT;93 655 | TGAAAGTACA;61 656 | CTTTTCTGGC;89,12 657 | ACGCTCAATC;82,23 658 | GTTCGCGTTA;75 659 | GGAGCGACCC;91 660 | AAGCGTGCGA;44 661 | TTCTCAATGT;19 662 | CAAGTTATTT;36,72,29 663 | GCCACGATTT;58 664 | CTTAGGGTCC;54 665 | GGCACCCAGC;99,58 666 | AAGACCGTTT;76 667 | TGGACGATAT;71 668 | ACTGGTTAAG;4,8 669 | TGATTCGGCA;22 670 | GTAGAAACCG;54 671 | ATTGCAATCT;24 672 | CCTTCGGGCG;88 673 | ATTCCGGCCT;57 674 | AACCGGCGAA;29,34 675 | TAAAGTCCAT;26 676 | GATCCATCCA;38 677 | GCGGGGGACA;30 678 | GCTTACCCCT;7,99 679 | GCTCAGCTGG;38,7,10 680 | CACAATTCCC;16,80,57 681 | CACAAACTTC;42,27 682 | CCGTTAGAGT;96,18 683 | TCCTACAACG;47,14 684 | GGGTACCGCG;99 685 | TCTTTCTCCG;77 686 | TAGTTAATTA;31 687 | GATGGTGTGC;49 688 | GCTACCTCCT;64 689 | GTCATTCCAA;45,51 690 | TGGACTACAA;91,39 691 | AATGTATTGA;7,71 692 | AACTTTAGCG;61,75 693 | GCTTCGCATA;54,76 694 | TGTGGAAGCT;12 695 | GATATCTTGA;16 696 | GGCAGCTGGT;64,56 697 | TGTCGACAAG;91,69 698 | ACCCGCTTGT;1 699 | CGATTACTGG;33,78,78 700 | CGTAAGTAAA;42,1 701 | CAGGCCTTAC;79,77 702 | ACACGGTTCG;50 703 | TGGGATAAAT;9 704 | AAACTCTAAA;93 705 | ACACGCACAC;93 706 | TGTCTGATGA;15,25,36 707 | TCGCTCAATA;66 708 | AAAGTCATGC;25,96 709 | AGAGTCATAT;61 710 | ATACCTGCGA;27,65 711 | CGCTGGAAGC;57,66 712 | TAGTCGTATT;49 713 | GAAGACCGTC;48 714 | TTTGATAGAG;28,85 715 | AGATCGAGAT;88,18 716 | CACACAATTG;21 717 | TAATCTGACC;36 718 | TAAGCAATTG;94,18,97,4,56 719 | GAAATAATAC;50,69 720 | AGAATGACAC;58 721 | ACAGGAACTA;28 722 | GGGTGGAGAC;61 723 | CGTTTCGTCT;32 724 | CTGAACGTCG;14 725 | AAACATGCCC;34,97 726 | TCAGTCTTAA;45 727 | GTACGTTCGA;94,39 728 | GACTTTCGCA;82 729 | GCCTTGGAGA;83 730 | AAAGCTCGCA;35 731 | TCAAGACAGA;49 732 | GGAATCTTCC;22,59,16 733 | CTTGGGCCAA;6 734 | ACGAGGCATG;62 735 | TTATGGAGTT;87 736 | ATAAGCGTGA;87,96 737 | CTCTTGCAGA;76 738 | CCAACTAGCC;59 739 | GGTGAGAGCA;44 740 | TTTCCGTTTA;18 741 | TAGGCACTTC;14,52 742 | GGGTCACGCA;16 743 | GACCTCGGGT;59 744 | ATGACGGTTG;28,58 745 | TTGCCTACGT;84,16 746 | TCGCACCACC;62,1 747 | GGGGGTAGTT;22,16 748 | TATGGACGAA;17 749 | AGTTAATTTA;16 750 | TTCTACTATG;65 751 | AGCCCGCAGA;29,88 752 | CCCGTAGCGG;3 753 | CTTCGGTAAT;37 754 | AATTAATGAG;40 755 | AACTGAAATT;2 756 | GTGCCTTGGC;83 757 | GTCTTACCCC;12 758 | TGACGCTAAT;65 759 | ACATGTATCT;78 760 | AGTCTATAGG;10,34 761 | TTGATGGTCC;29 762 | GGACCCTTGT;53,98,98,35,88,90,23 763 | TGCATAGCAT;66 764 | AGACCATGCG;30,80,27,51 765 | GCAAGGTGAA;31 766 | AATACCGGGG;12,29,92 767 | TTTTAAGGCC;54,28,96 768 | GGGTCGTTCT;13 769 | TTTGATTTCC;54,51 770 | GGCACTCTGG;57 771 | CGTGTTAAGC;86,90 772 | AGCGTTAGAA;1 773 | CTGACGCGGC;96 774 | CTCCACTTTC;30,70 775 | CGACCACGCA;67 776 | TCTTGCTGAG;42 777 | GGATACCCGT;57 778 | TTTACGTGAT;51,74 779 | CCAGGCACGA;45 780 | CTGATCTCAC;5 781 | AGAGGACGCC;64,21 782 | ATGTGTAAGT;31 783 | TGCATTGCAT;58 784 | ATTTTTCTAT;75,13,50,71 785 | GAGTAAACTA;82 786 | TCAATGCTCG;93 787 | GCCTGTAATG;85 788 | ATGAATACCA;34 789 | CCATAGCGGG;31,23,38 790 | AAAGACATGT;56 791 | TGCATCTCCC;33 792 | TTAGAATCTA;40 793 | TTAGAAGGGG;20 794 | CGTATGGTTT;44 795 | GTCTGCGGAT;28 796 | CTCACTATGA;26,99 797 | TTAACGCATC;60 798 | GCAGTCGCGC;5,8 799 | CTCACAAGAA;8 800 | TGGGCGCTTA;8,52 801 | GGGACCTGCC;86,57 802 | GCCACCGGTC;85,96,73,37 803 | CGAGGCGATG;41 804 | TTACGGTCCG;80 805 | TAAGAGTGAC;18,50,82 806 | CGCTGGGCAC;96 807 | ATCGCTTATC;0,25 808 | TCCTGGGCTT;22 809 | TGTTGTTAAT;80,7,24 810 | ACAACGCCTA;60,29 811 | GGAGGCACTT;98 812 | AGTGGGCGCA;74 813 | CAGTCTGATG;51 814 | TCCTACCTGT;22 815 | CCCTGGTTGA;99 816 | GGCCTTGTTA;2,29 817 | CCCGCTCACG;66 818 | TGCTACCAGG;20,85 819 | GAGTACCCAC;5,74 820 | ACCCACAGGG;70 821 | TGTCTGACAA;51 822 | TTCTGTCCTT;37,56 823 | CCTTAGGATT;87 824 | ACGCTCGGTC;89 825 | TGAGGAAAGC;95,99 826 | AATTTCCCCG;16 827 | GACCATGCCA;53 828 | GTGTCTCATA;74 829 | AATGTGATAT;46 830 | AGCTTCAGGA;17,26 831 | ACTACATGAC;94 832 | ATACCTAAGT;48 833 | CTAAAGTAAA;12,15,89 834 | CACCCCTGCC;47 835 | ACCTACACCC;1 836 | CCACTATGCA;55 837 | TCTCCGCACT;58 838 | CAAGCCCATC;49,10 839 | CCGGGGCCTA;61 840 | CGCTAAAGTT;84,17 841 | CCCCTGGTAA;59,98 842 | CCAAAATTAT;24 843 | CGGGACCAGA;62 844 | GATGCCCAAT;45,6 845 | CGCGAACTAA;16 846 | CGTCCGAGGC;41,87 847 | CATTAAGGCT;86,64,93 848 | TGCCGCAACT;87 849 | ATCTTGAATA;18,68,55,74 850 | CTGGATGATG;68 851 | AGGAGTAGTC;21,16,99 852 | ACACCTCTAG;40,1 853 | GTTACCAATT;54 854 | CTACGAATAT;55 855 | TAGTGCTCCG;84,92,18,7 856 | TACTTCTAGT;12,93,99 857 | AATGATAAGA;58,26,31 858 | CCCTCTTCGG;61 859 | CTTCATCTGA;75,96 860 | TCTGCGCCAG;32,68 861 | GGGCTAGATG;18,72 862 | TCATCTTGTG;21 863 | GCGAGGTCCG;67 864 | CTTGGCTTAC;73 865 | TACAATTTTC;73,51,71 866 | GAGTTAACTC;82 867 | ATTTACTGTA;54,89 868 | AAATAGTTCA;51,40,99 869 | AACTACAGCC;81 870 | GGCTGAGTCG;99 871 | CAGAAGGGGA;78,32 872 | GTAAGGCTAT;66 873 | CTATTCCACA;89 874 | TATCCCTTGG;82 875 | TTGGTACAGA;8 876 | TGAGGAGTCA;41 877 | ACTAGCAGAT;65,48 878 | ATGACATTGT;27 879 | ATTACGATTG;77,91 880 | ACCGCCCCGC;75 881 | TATCGTAAAC;26,16 882 | ACGGCTGCGA;36 883 | CACCGCAGAT;27,7 884 | ACCGTGTTCA;88 885 | GCAGATCAAA;96,41 886 | TGACCGCGCG;11 887 | CTAGAGGCCC;78 888 | CTGTCGAACG;58 889 | GACCGGGAGT;72,24 890 | ATCGCTGACG;53 891 | ACAGTCCTAG;38 892 | GTCGTCACCG;92 893 | GTATGGACAT;92 894 | CGGCAGGCTA;31 895 | CTCATCGTAG;5 896 | GTTCCAAGCC;58 897 | ATGAGAAGTA;48 898 | CTTTATGGTT;48 899 | TGGGGCCCAT;85 900 | TGTTAACGTT;53 901 | CCGGACCGGC;63,56,13 902 | ACGCTAATAA;88 903 | AACACTTGAA;98,73,7,30,63 904 | TAGTCCGCTG;56 905 | ATTTGCTATG;44,88 906 | CTGCTAATTC;19,6,27 907 | AACGAGCTTT;63 908 | AATTAGTACA;25,20 909 | ACAACCCCAA;6 910 | CAGTCTTTTC;31,24 911 | TGTGTACCGA;28,85 912 | ATCTTCCTCG;1 913 | GGCAGAAAAC;9,50,20 914 | CAAAATGTCG;8,83 915 | TTGGTATGTC;77 916 | CAATGATCAA;65 917 | GTAGACCACC;81 918 | ACTCTCCTGC;46,74 919 | GTAACCGATG;45 920 | TTGGTGAATA;45,58 921 | GCTAGTCCCA;23 922 | CCTGTGCCGG;89 923 | GACTATATAC;21 924 | ACGTGTGCTG;93,4 925 | GGAGTAAGAA;79 926 | AAGAACAGCT;48 927 | GATTGTATAT;14,83 928 | ACGCTCTTAG;9 929 | TACCGACCAC;70 930 | AGATAGGTGT;44 931 | CCGGGGATAC;94,1 932 | CGAGTTTTGA;37 933 | CTGTTTCGTA;77,10,79 934 | ATGGGTCGAG;49,53,27,11 935 | CATCTTGTAA;23,31,48 936 | CATCGGAATT;8,64,96,38 937 | GGTCACCTGT;83,53 938 | GAAGATATTA;62 939 | GGATAACACT;62,75 940 | TGGACACATG;24 941 | TGGTGGGTAC;41 942 | TCTAACCTTC;20 943 | GTGTACTACT;3 944 | ACCATACGAC;92 945 | AGGTCAATTA;39,2 946 | GACAGGAATG;1 947 | ACGAAGAACC;17,93,74,11 948 | ATGACATCCC;18,70,90,26,78 949 | TACCGCAATT;85 950 | ATAGCCACTT;0 951 | TCCTTGCGTC;37 952 | TCGTACTCTT;6,46,63 953 | CATGATCGTT;52,47,54 954 | TTTACTGGAG;97 955 | TGACGCTCGT;95 956 | CCGAGGCTGA;83 957 | CGTCTGGACT;70 958 | CCGCCTCGGA;1,52,10 959 | CGATACCCTC;84 960 | GCTGTCTTTC;84,83 961 | TCGTCTGTGT;75,7 962 | TCATTCTGAA;20,47 963 | CCAAAGATGT;27 964 | GAGCAGAGCC;31 965 | CTTGCTCATC;92 966 | CCCTGCCCTA;89,21 967 | TCAATACTAA;25 968 | CTCAGGGCAG;56 969 | GGCTTGTAGG;33 970 | TCCGCTCTTC;93 971 | GTGTTCTTCT;86,48 972 | CCACTCAAGC;50 973 | TGAGACCGAT;86,52,84 974 | TCGGTGTTAG;75 975 | CTAATCTTTG;36 976 | CTCGATACAC;34,67,60 977 | GCGAAAGACA;82,47 978 | GCATCTAATT;13,88 979 | TATAGTTACC;86,39,54 980 | TGCGTACTTT;9,70 981 | TTCCGAGTAT;57 982 | TGAGCTACGG;19,28 983 | CAGCTGTTTA;54,33 984 | GTGCCGAGGA;21 985 | GCAAAATGAA;25 986 | AGACGGCGGT;41 987 | AACCAGTGCG;20,36 988 | TTGCAAGGCT;36,11 989 | AATCTATAAA;59 990 | TCATACTCAC;64 991 | ATATTCTTAG;73 992 | GCCAACTGGA;60 993 | GTGGTGTTGA;66 994 | AAGCGATCGA;79 995 | CGAGGAGTCC;37 996 | ATATCAAGCT;21 997 | CCCACGTTCT;32 998 | ACTGGTCCAG;82 999 | CAGTTCCTTC;95,6 1000 | GCATAGACTT;0 1001 | -------------------------------------------------------------------------------- /data/artificialKmer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # import matplotlib.pyplot as plt 3 | 4 | gene = 'ATGC' 5 | kmers = [] 6 | 7 | maxV = 10 8 | intervel = 100 #number of files inserted in rambo 9 | 10 | f = open("ArtfcKmersToy"+str(intervel)+".txt", "w") 11 | 12 | for j in range(0,1000): #these many queries 13 | a = '' 14 | for i in range(0,10): 15 | a = a + gene[np.random.randint(4, size=1)[0]] 16 | for k in range(0,1): #for 1 merges 17 | maxV = int((intervel-1)*np.random.exponential(0.01, 1)) +1 18 | #maxV =1 19 | print (maxV) 20 | VI = np.random.randint(intervel*k, intervel*(k+1), maxV) # maxV is max V 21 | a = a + ';'+ ','.join(['%d' % num for num in VI]) 22 | f.write(a + '\n') 23 | f.close() 24 | -------------------------------------------------------------------------------- /include/MurmurHash3.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 3 | // domain. The author hereby disclaims copyright to this source code. 4 | 5 | #ifndef _MURMURHASH3_H_ 6 | #define _MURMURHASH3_H_ 7 | 8 | //----------------------------------------------------------------------------- 9 | // Platform-specific functions and macros 10 | 11 | // Microsoft Visual Studio 12 | 13 | #if defined(_MSC_VER) && (_MSC_VER < 1600) 14 | 15 | typedef unsigned char uint8_t; 16 | typedef unsigned int uint32_t; 17 | typedef unsigned __int64 uint64_t; 18 | 19 | // Other compilers 20 | 21 | #else // defined(_MSC_VER) 22 | 23 | #include 24 | 25 | #endif // !defined(_MSC_VER) 26 | 27 | //----------------------------------------------------------------------------- 28 | 29 | void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ); 30 | 31 | void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out ); 32 | 33 | void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); 34 | 35 | //----------------------------------------------------------------------------- 36 | 37 | #endif // _MURMURHASH3_H_ 38 | -------------------------------------------------------------------------------- /include/MyBloom.h: -------------------------------------------------------------------------------- 1 | #ifndef _MYBLOOM_ 2 | #define _MYBLOOM_ 3 | #include 4 | #include "constants.h" 5 | #include 6 | #include "bitArray.h" 7 | 8 | std::vector myhash(std::string key, int len, int k, int r, int range); 9 | 10 | class BloomFiler{ 11 | public: 12 | // BloomFiler(int capacity, float FPR, int k); 13 | BloomFiler(int sz, float FPR, int k); 14 | void insert(std::vector a); 15 | bool test(std::vector a); 16 | void serializeBF(std::string BF_file); 17 | void deserializeBF(std::vector BF_file); 18 | 19 | // void serialize1(std::string BF_file); 20 | 21 | int n; 22 | float p; 23 | int R; 24 | int k; 25 | // std::vector m_bits; 26 | // std::bitset m_bits; 27 | bitArray* m_bits; 28 | }; 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /include/Rambo_construction.h: -------------------------------------------------------------------------------- 1 | #ifndef _RamboConstruction_ 2 | #define _RamboConstruction_ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "constants.h" 8 | #include "bitArray.h" 9 | 10 | 11 | // vector hashfunc( void* key, int len, int R, int B){ 12 | // } 13 | 14 | class RAMBO{ 15 | public: 16 | 17 | RAMBO(int n, int r1, int b1, int K); 18 | std::vector hashfunc( std::string key, int len); 19 | void insertion (std::string setID, std::vector keys); 20 | std::set takeunion(std::set set1, std::set set2); 21 | std::set takeIntrsec(std::set* setArray); 22 | std::vector getdata(std::string filenameSet); 23 | bitArray query (std::string query_key, int len); 24 | void createMetaRambo(int K, bool verbose); 25 | void serializeRAMBO(std::string dir); 26 | void deserializeRAMBO(std::vector dir); 27 | void insertion2 (std::vector alllines); 28 | bitArray queryseq (std::string query_key, int len); 29 | void insertionRare (std::string setID, std::vector keys); 30 | 31 | int R ; 32 | int B; 33 | int n; 34 | float p; 35 | int range; 36 | int k; 37 | float FPR; 38 | BloomFiler** Rambo_array; 39 | std::vector* metaRambo; 40 | }; 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /include/bitArray.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTBITARRAY_ 2 | #define _INTBITARRAY_ 3 | #include 4 | 5 | // #include 6 | // #include 7 | 8 | class bitArray{ 9 | public: 10 | // friend class boost::serialization::access; 11 | 12 | bitArray(int size); 13 | void SetBit(uint k); 14 | void ClearBit( uint k); 15 | bool TestBit( uint k); 16 | void ANDop(char* B); 17 | void serializeBitAr(std::string BF_file); 18 | void deserializeBitAr(std::vector BF_file); 19 | int getcount(void); 20 | char *A; 21 | int ar_size; 22 | // template 23 | // void serialize(Archive & ar, const unsigned int version){ 24 | // ar & A; 25 | // } 26 | }; 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /include/constants.h: -------------------------------------------------------------------------------- 1 | // constants.h 2 | 3 | #ifndef _MYLIB_CONSTANTS_H_ 4 | #define _MYLIB_CONSTANTS_H_ 5 | 6 | //const int Ki = 27947252; // number of sets 7 | const int Ki = 100; // number of sets 8 | 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /include/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _utils_ 2 | #define _utils_ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | std::vector getsets( std::string path); 10 | std::vector line2array( std::string line, char d); 11 | void writeRAMBOresults(std::string path, int rows, int cols, float* values); 12 | std::vector arrayunion(std::vector &v1, std::vector &v2); 13 | std::vector arrayintersection(std::vector &v1, std::vector &v2); 14 | std::set takeunion(std::set set1, std::set set2); 15 | std::vector getctxdata(std::string filenameSet); 16 | std::vector readlines( std::string path, int num); 17 | std::vector getRandomTestKeys(int keysize, int n); 18 | std::map> makeInvIndex(int n, std::vector foldernames); 19 | std::vector getkmers(std::string query_key, int kmersize); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/MurmurHash3.cpp: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 3 | // domain. The author hereby disclaims copyright to this source code. 4 | 5 | // Note - The x86 and x64 versions do _not_ produce the same results, as the 6 | // algorithms are optimized for their respective platforms. You can still 7 | // compile and run any of them on any platform, but your performance with the 8 | // non-native version will be less than optimal. 9 | 10 | #include "MurmurHash3.h" 11 | 12 | //----------------------------------------------------------------------------- 13 | // Platform-specific functions and macros 14 | 15 | // Microsoft Visual Studio 16 | 17 | #if defined(_MSC_VER) 18 | 19 | #define FORCE_INLINE __forceinline 20 | 21 | #include 22 | 23 | #define ROTL32(x,y) _rotl(x,y) 24 | #define ROTL64(x,y) _rotl64(x,y) 25 | 26 | #define BIG_CONSTANT(x) (x) 27 | 28 | // Other compilers 29 | 30 | #else // defined(_MSC_VER) 31 | 32 | #define FORCE_INLINE inline __attribute__((always_inline)) 33 | 34 | inline uint32_t rotl32 ( uint32_t x, int8_t r ) 35 | { 36 | return (x << r) | (x >> (32 - r)); 37 | } 38 | 39 | inline uint64_t rotl64 ( uint64_t x, int8_t r ) 40 | { 41 | return (x << r) | (x >> (64 - r)); 42 | } 43 | 44 | #define ROTL32(x,y) rotl32(x,y) 45 | #define ROTL64(x,y) rotl64(x,y) 46 | 47 | #define BIG_CONSTANT(x) (x##LLU) 48 | 49 | #endif // !defined(_MSC_VER) 50 | 51 | //----------------------------------------------------------------------------- 52 | // Block read - if your platform needs to do endian-swapping or can only 53 | // handle aligned reads, do the conversion here 54 | 55 | FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i ) 56 | { 57 | return p[i]; 58 | } 59 | 60 | FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i ) 61 | { 62 | return p[i]; 63 | } 64 | 65 | //----------------------------------------------------------------------------- 66 | // Finalization mix - force all bits of a hash block to avalanche 67 | 68 | FORCE_INLINE uint32_t fmix32 ( uint32_t h ) 69 | { 70 | h ^= h >> 16; 71 | h *= 0x85ebca6b; 72 | h ^= h >> 13; 73 | h *= 0xc2b2ae35; 74 | h ^= h >> 16; 75 | 76 | return h; 77 | } 78 | 79 | //---------- 80 | 81 | FORCE_INLINE uint64_t fmix64 ( uint64_t k ) 82 | { 83 | k ^= k >> 33; 84 | k *= BIG_CONSTANT(0xff51afd7ed558ccd); 85 | k ^= k >> 33; 86 | k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); 87 | k ^= k >> 33; 88 | 89 | return k; 90 | } 91 | 92 | //----------------------------------------------------------------------------- 93 | 94 | void MurmurHash3_x86_32 ( const void * key, int len, 95 | uint32_t seed, void * out ) 96 | { 97 | const uint8_t * data = (const uint8_t*)key; 98 | const int nblocks = len / 4; 99 | 100 | uint32_t h1 = seed; 101 | 102 | const uint32_t c1 = 0xcc9e2d51; 103 | const uint32_t c2 = 0x1b873593; 104 | 105 | //---------- 106 | // body 107 | 108 | const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); 109 | 110 | for(int i = -nblocks; i; i++) 111 | { 112 | uint32_t k1 = getblock32(blocks,i); 113 | 114 | k1 *= c1; 115 | k1 = ROTL32(k1,15); 116 | k1 *= c2; 117 | 118 | h1 ^= k1; 119 | h1 = ROTL32(h1,13); 120 | h1 = h1*5+0xe6546b64; 121 | } 122 | 123 | //---------- 124 | // tail 125 | 126 | const uint8_t * tail = (const uint8_t*)(data + nblocks*4); 127 | 128 | uint32_t k1 = 0; 129 | 130 | switch(len & 3) 131 | { 132 | case 3: k1 ^= tail[2] << 16; 133 | case 2: k1 ^= tail[1] << 8; 134 | case 1: k1 ^= tail[0]; 135 | k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 136 | }; 137 | 138 | //---------- 139 | // finalization 140 | 141 | h1 ^= len; 142 | 143 | h1 = fmix32(h1); 144 | 145 | *(uint32_t*)out = h1; 146 | } 147 | 148 | //----------------------------------------------------------------------------- 149 | 150 | void MurmurHash3_x86_128 ( const void * key, const int len, 151 | uint32_t seed, void * out ) 152 | { 153 | const uint8_t * data = (const uint8_t*)key; 154 | const int nblocks = len / 16; 155 | 156 | uint32_t h1 = seed; 157 | uint32_t h2 = seed; 158 | uint32_t h3 = seed; 159 | uint32_t h4 = seed; 160 | 161 | const uint32_t c1 = 0x239b961b; 162 | const uint32_t c2 = 0xab0e9789; 163 | const uint32_t c3 = 0x38b34ae5; 164 | const uint32_t c4 = 0xa1e38b93; 165 | 166 | //---------- 167 | // body 168 | 169 | const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); 170 | 171 | for(int i = -nblocks; i; i++) 172 | { 173 | uint32_t k1 = getblock32(blocks,i*4+0); 174 | uint32_t k2 = getblock32(blocks,i*4+1); 175 | uint32_t k3 = getblock32(blocks,i*4+2); 176 | uint32_t k4 = getblock32(blocks,i*4+3); 177 | 178 | k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 179 | 180 | h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; 181 | 182 | k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; 183 | 184 | h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; 185 | 186 | k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; 187 | 188 | h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; 189 | 190 | k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; 191 | 192 | h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; 193 | } 194 | 195 | //---------- 196 | // tail 197 | 198 | const uint8_t * tail = (const uint8_t*)(data + nblocks*16); 199 | 200 | uint32_t k1 = 0; 201 | uint32_t k2 = 0; 202 | uint32_t k3 = 0; 203 | uint32_t k4 = 0; 204 | 205 | switch(len & 15) 206 | { 207 | case 15: k4 ^= tail[14] << 16; 208 | case 14: k4 ^= tail[13] << 8; 209 | case 13: k4 ^= tail[12] << 0; 210 | k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; 211 | 212 | case 12: k3 ^= tail[11] << 24; 213 | case 11: k3 ^= tail[10] << 16; 214 | case 10: k3 ^= tail[ 9] << 8; 215 | case 9: k3 ^= tail[ 8] << 0; 216 | k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; 217 | 218 | case 8: k2 ^= tail[ 7] << 24; 219 | case 7: k2 ^= tail[ 6] << 16; 220 | case 6: k2 ^= tail[ 5] << 8; 221 | case 5: k2 ^= tail[ 4] << 0; 222 | k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; 223 | 224 | case 4: k1 ^= tail[ 3] << 24; 225 | case 3: k1 ^= tail[ 2] << 16; 226 | case 2: k1 ^= tail[ 1] << 8; 227 | case 1: k1 ^= tail[ 0] << 0; 228 | k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 229 | }; 230 | 231 | //---------- 232 | // finalization 233 | 234 | h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; 235 | 236 | h1 += h2; h1 += h3; h1 += h4; 237 | h2 += h1; h3 += h1; h4 += h1; 238 | 239 | h1 = fmix32(h1); 240 | h2 = fmix32(h2); 241 | h3 = fmix32(h3); 242 | h4 = fmix32(h4); 243 | 244 | h1 += h2; h1 += h3; h1 += h4; 245 | h2 += h1; h3 += h1; h4 += h1; 246 | 247 | ((uint32_t*)out)[0] = h1; 248 | ((uint32_t*)out)[1] = h2; 249 | ((uint32_t*)out)[2] = h3; 250 | ((uint32_t*)out)[3] = h4; 251 | } 252 | 253 | //----------------------------------------------------------------------------- 254 | 255 | void MurmurHash3_x64_128 ( const void * key, const int len, 256 | const uint32_t seed, void * out ) 257 | { 258 | const uint8_t * data = (const uint8_t*)key; 259 | const int nblocks = len / 16; 260 | 261 | uint64_t h1 = seed; 262 | uint64_t h2 = seed; 263 | 264 | const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); 265 | const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); 266 | 267 | //---------- 268 | // body 269 | 270 | const uint64_t * blocks = (const uint64_t *)(data); 271 | 272 | for(int i = 0; i < nblocks; i++) 273 | { 274 | uint64_t k1 = getblock64(blocks,i*2+0); 275 | uint64_t k2 = getblock64(blocks,i*2+1); 276 | 277 | k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; 278 | 279 | h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; 280 | 281 | k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; 282 | 283 | h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; 284 | } 285 | 286 | //---------- 287 | // tail 288 | 289 | const uint8_t * tail = (const uint8_t*)(data + nblocks*16); 290 | 291 | uint64_t k1 = 0; 292 | uint64_t k2 = 0; 293 | 294 | switch(len & 15) 295 | { 296 | case 15: k2 ^= ((uint64_t)tail[14]) << 48; 297 | case 14: k2 ^= ((uint64_t)tail[13]) << 40; 298 | case 13: k2 ^= ((uint64_t)tail[12]) << 32; 299 | case 12: k2 ^= ((uint64_t)tail[11]) << 24; 300 | case 11: k2 ^= ((uint64_t)tail[10]) << 16; 301 | case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; 302 | case 9: k2 ^= ((uint64_t)tail[ 8]) << 0; 303 | k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; 304 | 305 | case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; 306 | case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; 307 | case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; 308 | case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; 309 | case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; 310 | case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; 311 | case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; 312 | case 1: k1 ^= ((uint64_t)tail[ 0]) << 0; 313 | k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; 314 | }; 315 | 316 | //---------- 317 | // finalization 318 | 319 | h1 ^= len; h2 ^= len; 320 | 321 | h1 += h2; 322 | h2 += h1; 323 | 324 | h1 = fmix64(h1); 325 | h2 = fmix64(h2); 326 | 327 | h1 += h2; 328 | h2 += h1; 329 | 330 | ((uint64_t*)out)[0] = h1; 331 | ((uint64_t*)out)[1] = h2; 332 | } 333 | 334 | //----------------------------------------------------------------------------- 335 | 336 | -------------------------------------------------------------------------------- /src/MyBloom.cpp: -------------------------------------------------------------------------------- 1 | #include "MurmurHash3.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "MyBloom.h" 7 | #include 8 | #include "constants.h" 9 | #include 10 | #include "bitArray.h" 11 | 12 | using namespace std; 13 | 14 | vector myhash( std::string key, int len, int k, int r, int range){ 15 | // int hashvals[k]; 16 | vector hashvals; 17 | uint op; // takes 4 byte 18 | 19 | for (int i=0+ k*r; i a){ 33 | int N = a.size(); 34 | for (int n =0 ; nSetBit(a[n]); 36 | } 37 | } 38 | 39 | bool BloomFiler::test(vector a){ 40 | int N = a.size(); 41 | for (int n =0 ; nTestBit(a[n])){ 43 | return false; 44 | } 45 | } 46 | return true; 47 | } 48 | 49 | void BloomFiler::serializeBF(string BF_file){ 50 | m_bits->serializeBitAr(BF_file); 51 | } 52 | 53 | void BloomFiler::deserializeBF(vector BF_file){ 54 | m_bits->deserializeBitAr(BF_file); 55 | } 56 | -------------------------------------------------------------------------------- /src/Rambo_construction.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "MyBloom.h" 12 | #include "MurmurHash3.h" 13 | #include "Rambo_construction.h" 14 | #include "utils.h" 15 | #include "constants.h" 16 | #include "bitArray.h" 17 | #include 18 | #include 19 | #include 20 | 21 | using namespace std; 22 | 23 | vector RAMBO:: hashfunc( std::string key, int len){ 24 | // int hashvals[k]; 25 | vector hashvals; 26 | uint op; 27 | for (int i=0; i RAMBO:: getdata(string filenameSet){ 35 | //get the size of Bloom filter by count 36 | ifstream cntfile (filenameSet); 37 | std::vector allKeys; 38 | int totKmerscnt = 0; 39 | while ( cntfile.good() ) 40 | { 41 | string line1, vals; 42 | while( getline ( cntfile, line1 ) ){ 43 | stringstream is; 44 | is<30 ){ 46 | for (uint idx =0; idx[B*R]; //constains set info in it. 71 | for(int b=0; b hashvals = RAMBO::hashfunc(std::to_string(i), std::to_string(i).size()); // R hashvals, each with max value B 82 | for(int r=0; r keys){ 102 | vector hashvals = RAMBO::hashfunc(setID, setID.size()); // R hashvals 103 | 104 | //make this loop parallel 105 | #pragma omp parallel for 106 | for(std::size_t i=0; i temp = myhash(keys[i].c_str(), keys[i].size() , k,r, range); 109 | Rambo_array[hashvals[r] + B*r]->insert(temp); 110 | } 111 | } 112 | } 113 | 114 | // given inverted index type arrangement, kmer;files;files;.. 115 | void RAMBO::insertion2 (std::vector alllines){ 116 | //make this loop parallel 117 | //#pragma omp parallel for 118 | for(std::size_t i=0; iKeySets = line2array(alllines[i], d);//sets for a key 121 | 122 | std::vectorKeySet = line2array(KeySets[1], ','); 123 | for (uint j = 0; j hashvals = RAMBO::hashfunc(KeySet[j], KeySet[j].size()); // R hashvals 125 | for(int r=0; r temp = myhash(KeySets[0].c_str(), KeySets[0].size() , k, r, range);// i is the key 127 | Rambo_array[hashvals[r] + B*r]->insert(temp); 128 | } 129 | } 130 | } 131 | } 132 | 133 | // // give set and keys in the set 134 | // void RAMBO::insertionRare (std::string setID, std::vector keys){ 135 | // vector hashvals = RAMBO::hashfunc(setID, setID.size()); // R hashvals 136 | // 137 | // //make this loop parallel 138 | // int skip =0; 139 | // #pragma omp parallel for 140 | // for(std::size_t i=0; i temp = myhash(keys[i].c_str(), keys[i].size() , k, r, range); 145 | // Rambo_array[hashvals[r] + B*r]->insert(temp); 146 | // } 147 | // } 148 | // else{ skip++;} 149 | // } 150 | // cout<<"skipped "< resUnion[R]; //constains union results in it. 156 | bitArray bitarray_K(Ki); 157 | // bitset bitarray_K; 158 | // set res; 159 | float count=0.0; 160 | vector check; 161 | for(int r=0; r bitarray_K1; 165 | for(int b=0; btest(check)){ 167 | chrono::time_point t5 = chrono::high_resolution_clock::now(); 168 | for (uint j=0; j t6 = chrono::high_resolution_clock::now(); 172 | count+=((t6-t5).count()/1000000000.0); 173 | } 174 | } 175 | if (r ==0){ 176 | bitarray_K = bitarray_K1; 177 | } 178 | else{ 179 | bitarray_K.ANDop(bitarray_K1.A); 180 | } 181 | } 182 | vector().swap(check); 183 | return bitarray_K; 184 | } 185 | 186 | void RAMBO::serializeRAMBO(string dir){ 187 | for(int b=0; bserializeBF(br); 191 | } 192 | } 193 | } 194 | 195 | void RAMBO::deserializeRAMBO(vector dir){ 196 | for(int b=0; b br; 199 | for (uint j=0; jdeserializeBF(br); 203 | 204 | } 205 | } 206 | } 207 | -------------------------------------------------------------------------------- /src/bitArray.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "bitArray.h" 5 | 6 | using namespace std; 7 | 8 | bitArray::bitArray(int size){ 9 | ar_size = size; 10 | A = new char[ar_size/8 +1]; 11 | for (int i=0; i<(ar_size/8 +1); i++ ){ 12 | A[i] = '\0'; // Clear the bit array 13 | } 14 | } 15 | 16 | void bitArray::SetBit(uint k) { 17 | A[(k/8)] |= (1 << (k%8)); 18 | } 19 | 20 | void bitArray::ClearBit(uint k) { 21 | A[(k/8)] &= ~(1 << (k%8)); 22 | } 23 | 24 | bool bitArray::TestBit(uint k) { 25 | return (A[(k/8)] & (1 << (k%8))); 26 | } 27 | 28 | void bitArray::ANDop(char* B){ 29 | for (int i=0; i<(ar_size/8 +1); i++ ){ 30 | A[i] &= B[i]; 31 | } 32 | } 33 | 34 | int bitArray::getcount(void){ 35 | int count = 0; 36 | 37 | for (int kp=0; kp BF_file){ 58 | for(uint j =0; j 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "smhasher/src/MyBloom.h" 13 | using namespace std; 14 | 15 | // the code is mostly file reading 16 | 17 | int main(){ 18 | 19 | struct ConstrintBF { 20 | string refinement; 21 | BloomFiler BF; 22 | } ; 23 | 24 | //get the size of Bloom filter by count 25 | ifstream cntfile ("finalData3/counts.csv"); 26 | while ( cntfile.good() ) 27 | { 28 | string line1, vals; 29 | while( getline ( cntfile, line1 ) ){ 30 | stringstream is; 31 | is<10){ 61 | //do something about it 62 | // cout<<'$'< 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "MyBloom.h" 11 | #include "MurmurHash3.h" 12 | #include "Rambo_construction.h" 13 | #include "utils.h" 14 | #include "constants.h" 15 | #include "bitArray.h" 16 | #include 17 | 18 | using namespace std; 19 | 20 | int main(int argc, char** argv){ 21 | 22 | string job(argv[1]); 23 | 24 | bool insert =false; 25 | bool ser =false; 26 | bool test = true; 27 | bool deser = true; 28 | 29 | int n_perSet = 1000000000; //cardinality of each set 30 | int R_all = 2; 31 | int B_all = 15; 32 | 33 | int K = Ki; // total number of sets 34 | 35 | float fp_ops; 36 | float ins_time; 37 | float query_time; 38 | 39 | // constructor 40 | RAMBO myRambo(n_perSet, R_all, B_all, K); 41 | 42 | // details of RAMBO set partitioning 43 | myRambo.createMetaRambo (K, false); 44 | cout<<"created meta"< t3 = chrono::high_resolution_clock::now(); 58 | 59 | string dataPath = "data/"+ job +"/inflated/" + to_string(batch) + "_indexed.txt"; 60 | std::vector setIDs = readlines(dataPath, 0); 61 | int stpt; 62 | stpt = 5; 63 | //} 64 | for (uint ss=0; ss setID = line2array(setIDs[ss], d); 68 | string mainfile = "data/"+ job +"/inflated/" + setID[1]+ ".out"; 69 | vector keys = getctxdata(mainfile); 70 | failedFiles< t4 = chrono::high_resolution_clock::now(); 81 | cout << chrono::duration_cast(t4-t3).count()/1000000000.0 << "sec\n"; 82 | ins_time = (t4-t3).count()/1000000000.0; 83 | failedFiles<<"insertion time (sec) of 100 files: "<m_bits->getcount()< SerOpFile2; 101 | SerOpFile2.push_back(SerOpFile); // mutliple files can be pushed here 102 | 103 | cout<<"deser starting"< alllines = readlines("data/ArtfcKmersToy"+to_string(K)+".txt", 0); 111 | //std::vector alllines = readlines("data/query.txt", 0); 112 | std::vector testKeys; 113 | std::vector gt_size; 114 | for(uint i=0; iKeySets = line2array(alllines[i], ';');//sets for a key 116 | testKeys.push_back(KeySets[0]); 117 | gt_size.push_back( line2array(KeySets[1], ',').size() ); 118 | } 119 | myRambo.createMetaRambo (K, false); 120 | // cout<<"load: "<m_bits->getcount(); 121 | cout<<"total number of queries : "< t5 = chrono::high_resolution_clock::now(); 130 | 131 | for (std::size_t i=0; i t6 = chrono::high_resolution_clock::now(); 146 | float QTpt_cpu = 1000.0 * (t6_cpu-t5_cpu)/(CLOCKS_PER_SEC*testKeys.size()); //in ms 147 | float QTpt = chrono::duration_cast(t6-t5).count()/(1000000.0*testKeys.size()); 148 | cout <<"query time wall clock is :" < 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "MyBloom.h" 12 | #include "MurmurHash3.h" 13 | #include "utils.h" 14 | #include 15 | #include 16 | using namespace std; 17 | 18 | //readlines from a file 19 | std::vector getsets( string path){ 20 | //get the size of Bloom filter by count 21 | ifstream cntfile (path); 22 | std::vector allKeys; 23 | int linecnt = 0; 24 | while (cntfile.good()) 25 | { 26 | string line1, vals; 27 | while( getline ( cntfile, line1 ) ){ 28 | stringstream is; 29 | is< line2array( string line, char d){ 48 | stringstream is; 49 | is< op; 51 | string vals; 52 | while( getline (is, vals, d)){ 53 | op.push_back(vals); 54 | } 55 | return op; 56 | } 57 | 58 | //file write 59 | void writeRAMBOresults(string path, int rows, int cols, float* values){ 60 | ofstream myfile; 61 | myfile.open (path); 62 | for (int i =0;i arrayunion(std::vector &v1, std::vector &v2) { 73 | std::vector v; 74 | std::set_union(v1.begin(), v1.end(), v2.begin(), v2.end(), 75 | std::back_inserter(v)); 76 | return v; 77 | } 78 | 79 | std::vector arrayintersection(std::vector &v1, std::vector &v2) { 80 | std::vector v; 81 | std::set_intersection(v1.begin(), v1.end(), v2.begin(), v2.end(), 82 | std::back_inserter(v)); 83 | return v; 84 | } 85 | 86 | set takeunion(set set1, set set2){ 87 | set res; 88 | res.insert(set1.begin(), set1.end()); 89 | res.insert(set2.begin(), set2.end()); 90 | return res; 91 | } 92 | 93 | std::vector getctxdata(string filenameSet){ 94 | //get the size of Bloom filter by count 95 | ifstream cntfile (filenameSet); 96 | std::vector allKeys; 97 | int totKmerscnt = 0; 98 | while ( cntfile.good() ) 99 | { 100 | string line1; 101 | while( getline ( cntfile, line1 ) ){ 102 | std::vector linesplit = line2array(line1, ' '); 103 | allKeys.push_back(linesplit[0]); 104 | totKmerscnt++; 105 | 106 | } 107 | } 108 | std::cout<<"total inserted from one file"< readlines( string path, int num){ 115 | ifstream pathfile (path); 116 | std::vector allfiles; 117 | int count=0; 118 | while ( pathfile.good() ) 119 | { 120 | string line1; 121 | while( getline (pathfile, line1 ) ){ 122 | count++; 123 | allfiles.push_back(line1); 124 | if (count >num && num){ 125 | break; 126 | } 127 | } 128 | std::cout << count<< '\n'; 129 | return allfiles; 130 | } 131 | } 132 | 133 | std::vector getRandomTestKeys(int keysize, int n){ 134 | static const char alphanum[] = "ATGC"; 135 | std::vector s; 136 | 137 | for (int j = 0; j < n; ++j){ 138 | string st; 139 | for (int i = 0; i < keysize; ++i) { 140 | st = st + alphanum[rand()%4]; 141 | } 142 | s.push_back(st); 143 | } 144 | return s; 145 | } 146 | 147 | std::map> makeInvIndex(int n, vector foldernames){ 148 | std::map> m; 149 | for (uint f=0; f setIDs = readlines(dataPath, 0); 154 | cout< setID = line2array(setIDs[ss], d); 159 | string mainfile = foldername + setID[1]+ ".out"; 160 | cout<<"getting keys"< keys = getctxdata(mainfile); 162 | cout<<"gotkeys"< >::iterator it = m.begin(); it != m.end(); ++it){ 170 | std::cout << it->first <second[0]<<"\n"; 171 | } 172 | } 173 | else{ 174 | 175 | for (uint i =0; i getkmers(std::string query_key, int kmersize){ 190 | std::vector query_kmers; 191 | for (uint idx =0; idx