├── src ├── README.md ├── main │ ├── jni_fpga │ │ ├── README.md │ │ ├── pom.xml │ │ └── sw_extend_fpga.c │ ├── scala │ │ └── cs │ │ │ └── ucla │ │ │ └── edu │ │ │ └── bwaspark │ │ │ ├── jni │ │ │ ├── HelloWorld.scala │ │ │ ├── SWExtendFPGAJNI.scala │ │ │ ├── SeqSWType.scala │ │ │ ├── MateSWType.scala │ │ │ ├── RefSWType.scala │ │ │ └── MateSWJNI.scala │ │ │ ├── datatype │ │ │ ├── MemAlnRegArrayType.scala │ │ │ ├── RefType.scala │ │ │ ├── SRTType.scala │ │ │ ├── SWPreResultType.scala │ │ │ ├── FASTQSingleNode.scala │ │ │ ├── CigarSegType.scala │ │ │ ├── SWAlnType.scala │ │ │ ├── CigarType.scala │ │ │ ├── PeStatPrepType.scala │ │ │ ├── MemChainType.scala │ │ │ ├── BNTAmbType.scala │ │ │ ├── MemPeStat.scala │ │ │ ├── SAMString.scala │ │ │ ├── BWTIntvType.scala │ │ │ ├── SMemItrType.scala │ │ │ ├── MemAlnType.scala │ │ │ ├── BNTAnnType.scala │ │ │ ├── ReadType.scala │ │ │ ├── PairEndReadType.scala │ │ │ ├── MemAlnRegType.scala │ │ │ ├── ExtensionParameters.scala │ │ │ ├── BWAIdxType.scala │ │ │ ├── MemOptType.scala │ │ │ └── BNTSeqType.scala │ │ │ ├── fastq │ │ │ ├── RawRead.scala │ │ │ ├── SerializablePairEndFASTQRecord.java │ │ │ ├── SerializableFASTQRecord.java │ │ │ └── FASTQRDDLoader.scala │ │ │ ├── debug │ │ │ ├── DebugFlag.scala │ │ │ └── TestBWAIdxType.scala │ │ │ ├── commandline │ │ │ ├── UploadCommand.scala │ │ │ └── BWAMEMCommand.scala │ │ │ ├── util │ │ │ ├── LocusEncode.scala │ │ │ └── BNTSeqUtil.scala │ │ │ ├── sam │ │ │ ├── SAMWriter.scala │ │ │ ├── SAMHDFSWriter.scala │ │ │ └── SAMHeader.scala │ │ │ ├── profiling │ │ │ ├── PairEndBatchedProfile.scala │ │ │ └── SWBatchTimeBreakdown.scala │ │ │ ├── dnaseq │ │ │ ├── MergeADAMFiles.scala │ │ │ └── Sort.scala │ │ │ ├── broadcast │ │ │ └── ReferenceBroadcast.scala │ │ │ ├── worker2 │ │ │ └── MemMarkPrimarySe.scala │ │ │ ├── worker1 │ │ │ ├── SAPos2RefPos.scala │ │ │ ├── BWAMemWorker1.scala │ │ │ └── MemSortAndDedup.scala │ │ │ └── Usage.scala │ ├── avro │ │ └── fastq.avdl │ ├── java │ │ ├── accUCLA │ │ │ └── accAPI │ │ │ │ ├── MyTimer.java │ │ │ │ ├── big2LittleEndian.java │ │ │ │ └── Connector2FPGA.java │ │ └── cs │ │ │ └── ucla │ │ │ └── edu │ │ │ └── avro │ │ │ └── fastq │ │ │ └── FASTQ.java │ ├── native │ │ ├── kstring.c │ │ ├── jni_hello_world.c │ │ ├── kthread.c │ │ ├── bwa.h │ │ ├── bntseq.h │ │ ├── kstring.h │ │ ├── kvec.h │ │ ├── utils.h │ │ ├── bwt.h │ │ └── ksw.h │ ├── perl │ │ ├── gen_small_input.pl │ │ ├── cal_aligned_percentage.pl │ │ └── worker1_verifier.pl │ ├── alphadata │ │ ├── my_timer.h │ │ └── alphadata_host.tcl │ └── python │ │ ├── sam_single_end_permutation_inplace.py │ │ └── sam_pair_end_permutation_inplace.py └── pom.xml ├── compile.pl ├── README.md └── run_test.sh /src/README.md: -------------------------------------------------------------------------------- 1 | Compilation command: 2 | mvn package -PotherOutputDir 3 | -------------------------------------------------------------------------------- /src/main/jni_fpga/README.md: -------------------------------------------------------------------------------- 1 | Compilation command: 2 | mvn package -PotherOutputDir 3 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/jni/HelloWorld.scala: -------------------------------------------------------------------------------- 1 | package cs.ucla.edu.bwaspark.jni 2 | 3 | class HelloWorld { 4 | @native def helloWorld 5 | } 6 | 7 | -------------------------------------------------------------------------------- /compile.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | 3 | system "mvn clean package"; 4 | chdir "./src"; 5 | system "mvn package -PotherOutputDir"; 6 | chdir "./main/jni_fpga"; 7 | system "mvn package -PotherOutputDir"; 8 | chdir "../alphadata"; 9 | system "sdaccel alphadata_host.tcl"; 10 | chdir "../../../"; 11 | -------------------------------------------------------------------------------- /src/main/avro/fastq.avdl: -------------------------------------------------------------------------------- 1 | @namespace("cs.ucla.edu.avro.fastq") 2 | protocol FASTQ { 3 | 4 | record FASTQRecord { 5 | bytes name; 6 | bytes seq; 7 | bytes quality; 8 | int seqLength; 9 | bytes comment = null; 10 | } 11 | 12 | record PairEndFASTQRecord { 13 | FASTQRecord seq0; 14 | FASTQRecord seq1; 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/accUCLA/accAPI/MyTimer.java: -------------------------------------------------------------------------------- 1 | 2 | package accUCLA.api; 3 | import java.lang.System; 4 | 5 | public class MyTimer { 6 | private long start; 7 | public MyTimer( ) 8 | { 9 | start = System.nanoTime( ); 10 | } 11 | public void report( ) 12 | { 13 | long end = System.nanoTime(); 14 | System.out.println("elapsed time: " + (end-start)/1e9); 15 | start=end;; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/native/kstring.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "kstring.h" 4 | 5 | #ifdef USE_MALLOC_WRAPPERS 6 | # include "malloc_wrap.h" 7 | #endif 8 | 9 | int ksprintf(kstring_t *s, const char *fmt, ...) 10 | { 11 | va_list ap; 12 | int l; 13 | va_start(ap, fmt); 14 | l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); 15 | va_end(ap); 16 | if (l + 1 > s->m - s->l) { 17 | s->m = s->l + l + 2; 18 | kroundup32(s->m); 19 | s->s = (char*)realloc(s->s, s->m); 20 | va_start(ap, fmt); 21 | l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); 22 | } 23 | va_end(ap); 24 | s->l += l; 25 | return l; 26 | } 27 | 28 | #ifdef KSTRING_MAIN 29 | #include 30 | int main() 31 | { 32 | kstring_t *s; 33 | s = (kstring_t*)calloc(1, sizeof(kstring_t)); 34 | ksprintf(s, "abcdefg: %d", 100); 35 | printf("%s\n", s->s); 36 | free(s); 37 | return 0; 38 | } 39 | #endif 40 | -------------------------------------------------------------------------------- /src/main/java/cs/ucla/edu/avro/fastq/FASTQ.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Autogenerated by Avro 3 | * 4 | * DO NOT EDIT DIRECTLY 5 | */ 6 | package cs.ucla.edu.avro.fastq; 7 | 8 | @SuppressWarnings("all") 9 | @org.apache.avro.specific.AvroGenerated 10 | public interface FASTQ { 11 | public static final org.apache.avro.Protocol PROTOCOL = org.apache.avro.Protocol.parse("{\"protocol\":\"FASTQ\",\"namespace\":\"cs.ucla.edu.avro.fastq\",\"types\":[{\"type\":\"record\",\"name\":\"FASTQRecord\",\"fields\":[{\"name\":\"name\",\"type\":\"bytes\"},{\"name\":\"seq\",\"type\":\"bytes\"},{\"name\":\"quality\",\"type\":\"bytes\"},{\"name\":\"seqLength\",\"type\":\"int\"},{\"name\":\"comment\",\"type\":\"bytes\",\"default\":null}]},{\"type\":\"record\",\"name\":\"PairEndFASTQRecord\",\"fields\":[{\"name\":\"seq0\",\"type\":\"FASTQRecord\"},{\"name\":\"seq1\",\"type\":\"FASTQRecord\"}]}],\"messages\":{}}"); 12 | 13 | @SuppressWarnings("all") 14 | public interface Callback extends FASTQ { 15 | public static final org.apache.avro.Protocol PROTOCOL = cs.ucla.edu.avro.fastq.FASTQ.PROTOCOL; 16 | } 17 | } -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/jni/SWExtendFPGAJNI.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.jni 20 | 21 | class SWExtendFPGAJNI { 22 | @native def swExtendFPGAJNI(taskNum: Int, SWArray: Array[Byte]): Array[Short] 23 | } 24 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/jni/SeqSWType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.jni 20 | 21 | class SeqSWType { 22 | var readIdx: Int = -1 23 | var pairIdx: Int = -1 24 | var seqLength: Int = 0 25 | var seqTrans: Array[Byte] = _ 26 | } 27 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/MemAlnRegArrayType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | class MemAlnRegArrayType { 22 | var maxLength: Int = 0 23 | var curLength: Int = 0 24 | var regs: Array[MemAlnRegType] = _ 25 | } 26 | 27 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/fastq/RawRead.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.fastq 20 | 21 | class RawRead { 22 | var name: String = new String 23 | var seq: String = new String 24 | var description: String = new String 25 | var qual: String = new String 26 | } 27 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/RefType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | class RefType { 22 | var rBeg: Long = -1 23 | var rEnd: Long = -1 24 | var ref: Array[Byte] = _ 25 | var len: Long = 0 // set to 0; This will influence the size of array allocation 26 | } 27 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/jni/MateSWType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.jni 20 | 21 | import cs.ucla.edu.bwaspark.datatype.{MemAlnRegType} 22 | 23 | class MateSWType { 24 | var readIdx: Int = -1 25 | var pairIdx: Int = -1 26 | var regIdx: Int = -1 27 | var alnReg: MemAlnRegType = _ 28 | } 29 | -------------------------------------------------------------------------------- /src/main/native/jni_hello_world.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | #include 20 | #include 21 | #include // generated by javah via maven-native-plugin 22 | 23 | JNIEXPORT void JNICALL Java_cs_ucla_edu_bwaspark_jni_HelloWorld_helloWorld(JNIEnv * env, jobject obj) 24 | { 25 | printf("Hello World from C\n"); 26 | } 27 | 28 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/SRTType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | /** 22 | * Data structure which keep both the length of a seed and its index in the original chain 23 | */ 24 | class SRTType(len_i: Int, index_i: Int) { 25 | var len: Int = len_i 26 | var index: Int = index_i 27 | } 28 | 29 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/SWPreResultType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | class SWPreResultType(rmax_c: Array[Long], 22 | srt_c: Array[SRTType], 23 | rseq_c: Array[Byte], 24 | rlen_l: Long) { 25 | var rmax = rmax_c; 26 | var srt = srt_c; 27 | var rseq = rseq_c; 28 | var rlen = rlen_l; 29 | } 30 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/FASTQSingleNode.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | // For single node or test use 22 | class FASTQSingleNode { 23 | var name: String = new String 24 | var seq: String = new String 25 | var qual: String = new String 26 | var comment: String = new String 27 | var seqLen: Int = 0 28 | var sam: String = new String 29 | } 30 | 31 | -------------------------------------------------------------------------------- /src/main/perl/gen_small_input.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | 3 | # * 4 | # * Licensed to the Apache Software Foundation (ASF) under one or more 5 | # * contributor license agreements. See the NOTICE file distributed with 6 | # * this work for additional information regarding copyright ownership. 7 | # * The ASF licenses this file to You under the Apache License, Version 2.0 8 | # * (the "License"); you may not use this file except in compliance with 9 | # * the License. You may obtain a copy of the License at 10 | # * 11 | # * http://www.apache.org/licenses/LICENSE-2.0 12 | # * 13 | # * Unless required by applicable law or agreed to in writing, software 14 | # * distributed under the License is distributed on an "AS IS" BASIS, 15 | # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # * See the License for the specific language governing permissions and 17 | # * limitations under the License. 18 | # * 19 | 20 | 21 | $inFile = $ARGV[0]; 22 | $outFile = $ARGV[1]; 23 | $lineNum = $ARGV[2]; 24 | 25 | open IN, $inFile; 26 | open OUT, ">$outFile"; 27 | 28 | $i = 0; 29 | 30 | while() { 31 | if($i < $lineNum) { 32 | print OUT $_; 33 | } 34 | else { 35 | last; 36 | } 37 | $i++; 38 | } 39 | 40 | close IN; 41 | close OUT; 42 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/CigarSegType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | class CigarSegType { 22 | var op: Byte = _ // The operation on this cigar segment 23 | var len: Int = _ // The length of this segment 24 | 25 | def copy(): CigarSegType = { 26 | var seg = new CigarSegType 27 | seg.op = op 28 | seg.len = len 29 | seg 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/jni/RefSWType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.jni 20 | 21 | class RefSWType { 22 | var readIdx: Int = -1 23 | var pairIdx: Int = -1 24 | var regIdx: Int = -1 25 | var rBegArray: Array[Long] = _ 26 | var rEndArray: Array[Long] = _ 27 | var lenArray: Array[Long] = _ 28 | var ref0: Array[Byte] = _ 29 | var ref1: Array[Byte] = _ 30 | var ref2: Array[Byte] = _ 31 | var ref3: Array[Byte] = _ 32 | } 33 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/debug/DebugFlag.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.debug 20 | 21 | //this standalone object is for debug handling 22 | //which has only one variable for marking debug level 23 | //different debug level will generate different printing information 24 | 25 | object DebugFlag { 26 | 27 | //0: no debug output at all 28 | //1: all debug information 29 | 30 | var debugLevel: Int = _ 31 | var debugBWTSMem : Boolean = false 32 | 33 | 34 | } 35 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/jni/MateSWJNI.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.jni 20 | 21 | import cs.ucla.edu.bwaspark.datatype.{MemOptType, MemPeStat, FASTQSingleNode} 22 | 23 | class MateSWJNI { 24 | @native def mateSWJNI(opt: MemOptType, pacLen: Long, pes: Array[MemPeStat], groupSize: Int, seqsPairs: Array[SeqSWType], 25 | mateSWArray: Array[MateSWType], refSWArray: Array[RefSWType], refSWArraySize: Array[Int]): Array[MateSWType] 26 | } 27 | 28 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/SWAlnType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | class SWAlnType { 22 | var score: Int = 0 // best score 23 | var tEnd: Int = -1 // target end 24 | var qEnd: Int = -1 // query end 25 | var scoreSecond: Int = -1 // second best score 26 | var tEndSecond: Int = -1 // second best ending position on the target 27 | var tBeg: Int = -1 // target start 28 | var qBeg: Int = -1 // query start 29 | } 30 | 31 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/CigarType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | import scala.collection.immutable.Vector 22 | 23 | class CigarType { 24 | var cigarSegs: Vector[CigarSegType] = scala.collection.immutable.Vector.empty 25 | var cigarStr: String = new String 26 | 27 | /** 28 | * Make a copy of the current object 29 | */ 30 | def copy(): CigarType = { 31 | var cigar = new CigarType 32 | cigarSegs.foreach(seg => { 33 | cigar.cigarSegs = cigar.cigarSegs :+ seg.copy 34 | } ) 35 | cigar.cigarStr = cigarStr 36 | cigar 37 | } 38 | } 39 | 40 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/PeStatPrepType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | import java.io.ObjectInputStream 22 | import java.io.ObjectOutputStream 23 | import scala.Serializable 24 | 25 | class PeStatPrepType extends Serializable { 26 | var dir: Int = -1 // direction 27 | var dist: Int = -1 // distance 28 | 29 | private def writeObject(out: ObjectOutputStream) { 30 | out.writeInt(dir) 31 | out.writeInt(dist) 32 | } 33 | 34 | private def readObject(in: ObjectInputStream) { 35 | dir = in.readInt 36 | dist = in.readInt 37 | } 38 | 39 | private def readObjectNoData() { 40 | 41 | } 42 | 43 | } 44 | 45 | -------------------------------------------------------------------------------- /src/main/native/kthread.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | struct kt_for_t; 5 | 6 | typedef struct { 7 | struct kt_for_t *t; 8 | int i; 9 | } ktf_worker_t; 10 | 11 | typedef struct kt_for_t { 12 | int n_threads, n; 13 | ktf_worker_t *w; 14 | void (*func)(void*,int,int); 15 | void *data; 16 | } kt_for_t; 17 | 18 | static inline int steal_work(kt_for_t *t) 19 | { 20 | int i, k, min = 0x7fffffff, min_i = -1; 21 | for (i = 0; i < t->n_threads; ++i) 22 | if (min > t->w[i].i) min = t->w[i].i, min_i = i; 23 | k = __sync_fetch_and_add(&t->w[min_i].i, t->n_threads); 24 | return k >= t->n? -1 : k; 25 | } 26 | 27 | static void *ktf_worker(void *data) 28 | { 29 | ktf_worker_t *w = (ktf_worker_t*)data; 30 | int i; 31 | for (;;) { 32 | i = __sync_fetch_and_add(&w->i, w->t->n_threads); 33 | if (i >= w->t->n) break; 34 | w->t->func(w->t->data, i, w - w->t->w); 35 | } 36 | while ((i = steal_work(w->t)) >= 0) 37 | w->t->func(w->t->data, i, w - w->t->w); 38 | pthread_exit(0); 39 | } 40 | 41 | void kt_for(int n_threads, void (*func)(void*,int,int), void *data, int n) 42 | { 43 | int i; 44 | kt_for_t t; 45 | pthread_t *tid; 46 | t.func = func, t.data = data, t.n_threads = n_threads, t.n = n; 47 | t.w = (ktf_worker_t*)alloca(n_threads * sizeof(ktf_worker_t)); 48 | tid = (pthread_t*)alloca(n_threads * sizeof(pthread_t)); 49 | for (i = 0; i < n_threads; ++i) 50 | t.w[i].t = &t, t.w[i].i = i; 51 | for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktf_worker, &t.w[i]); 52 | for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0); 53 | } 54 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/MemChainType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | import scala.collection.mutable.MutableList 22 | 23 | class MemSeedType(rbeg_i: Long, qbeg_i: Int, len_i: Int) { 24 | var rBeg: Long = rbeg_i 25 | var qBeg: Int = qbeg_i 26 | var len: Int = len_i 27 | } 28 | 29 | class MemChainType(pos_i: Long, seeds_i: MutableList[MemSeedType]) { 30 | var pos: Long = pos_i 31 | var seeds: MutableList[MemSeedType] = seeds_i 32 | var seedsRefArray: Array[MemSeedType] = _ 33 | 34 | def print() { 35 | println("The reference position of the chain: " + pos) 36 | seeds.map (ele => println("Ref Begin: " + ele.rBeg + ", Query Begin: " + ele.qBeg + ", Length: " + ele.len)) 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/BNTAmbType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | import java.io.ObjectInputStream 22 | import java.io.ObjectOutputStream 23 | import scala.Serializable 24 | 25 | class BNTAmbType (offset_l: Long, 26 | len_i: Int, 27 | amb_c: Char) extends Serializable { 28 | var offset = offset_l 29 | var len = len_i 30 | var amb = amb_c 31 | 32 | private def writeObject(out: ObjectOutputStream) { 33 | out.writeLong(offset) 34 | out.writeInt(len) 35 | out.writeInt(amb) 36 | } 37 | 38 | private def readObject(in: ObjectInputStream) { 39 | offset = in.readLong 40 | len = in.readInt 41 | amb = in.readChar 42 | } 43 | 44 | private def readObjectNoData() { 45 | 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/commandline/UploadCommand.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.commandline 20 | 21 | class UploadFASTQCommand { 22 | var isPairEnd: Int = 0 // pair-end or single-end data 23 | var filePartitionNum: Int = 0 // the number of partitions in HDFS of this batch. We suggest to set this number equal to the number of core in the cluster. 24 | var inputFASTQFilePath1: String = "" // the first input path of the FASTQ file in the local file system (for both single-end and pair-end) 25 | var inputFASTQFilePath2: String = "" // the second input path of the FASTQ file in the local file system (for pair-end) 26 | var outFileHDFSPath: String = "" // the root path of the output FASTQ files in HDFS 27 | var batchedNum: Int = 250000 // (Optional) the number of lines to be read in one group (batch) 28 | } 29 | 30 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/util/LocusEncode.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.util 20 | 21 | object LocusEncode { 22 | /** 23 | * LocusEncode scheme in FASTQ for different types of reads (e.g. Illumina, SOLiD, and so on) 24 | */ 25 | def locusEncode(locus: Char): Byte = { 26 | // Transforming from A/C/G/T to 0,1,2,3 27 | // For SOLiD reads, they have encoded with 0,1,2,3 already from the second position 28 | // Please refer to: https://en.wikipedia.org/wiki/FASTQ_format (Color space) 29 | locus match { 30 | case 'A' => 0 31 | case 'a' => 0 32 | case '0' => 0 33 | case 'C' => 1 34 | case 'c' => 1 35 | case '1' => 1 36 | case 'G' => 2 37 | case 'g' => 2 38 | case '2' => 2 39 | case 'T' => 3 40 | case 't' => 3 41 | case '3' => 3 42 | case '-' => 5 43 | case _ => 4 44 | } 45 | } 46 | 47 | } 48 | 49 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/sam/SAMWriter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.sam 20 | 21 | import java.io.BufferedWriter 22 | import java.nio.charset.Charset 23 | import java.nio.file.{Files,Path,Paths} 24 | 25 | 26 | class SAMWriter() { 27 | var writer: BufferedWriter = _ 28 | var outFile: String = new String 29 | 30 | def init(path: String) { 31 | outFile = path 32 | writer = Files.newBufferedWriter(Paths.get(outFile), Charset.forName("utf-8")) 33 | } 34 | 35 | def writeString(str: String) { 36 | writer.write(str, 0, str.length) 37 | } 38 | 39 | def writeStringArray(strArray: Array[String]) { 40 | var i = 0 41 | while(i < strArray.length) { 42 | writer.write(strArray(i), 0, strArray(i).length) 43 | i += 1 44 | } 45 | writer.flush 46 | } 47 | 48 | def flush() { 49 | writer.flush 50 | } 51 | 52 | def close() { 53 | writer.close 54 | } 55 | } 56 | 57 | -------------------------------------------------------------------------------- /src/main/alphadata/my_timer.h: -------------------------------------------------------------------------------- 1 | #ifndef MY_TIMER_H 2 | #define MY_TIMER_H 3 | 4 | #include 5 | #include 6 | 7 | typedef struct timespec timespec; 8 | timespec diff(timespec start, timespec end) 9 | { 10 | timespec temp; 11 | if ((end.tv_nsec-start.tv_nsec)<0) { 12 | temp.tv_sec = end.tv_sec-start.tv_sec-1; 13 | temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec; 14 | } else { 15 | temp.tv_sec = end.tv_sec-start.tv_sec; 16 | temp.tv_nsec = end.tv_nsec-start.tv_nsec; 17 | } 18 | return temp; 19 | } 20 | 21 | timespec sum(timespec t1, timespec t2) { 22 | timespec temp; 23 | if (t1.tv_nsec + t2.tv_nsec >= 1000000000) { 24 | temp.tv_sec = t1.tv_sec + t2.tv_sec + 1; 25 | temp.tv_nsec = t1.tv_nsec + t2.tv_nsec - 1000000000; 26 | } else { 27 | temp.tv_sec = t1.tv_sec + t2.tv_sec; 28 | temp.tv_nsec = t1.tv_nsec + t2.tv_nsec; 29 | } 30 | return temp; 31 | } 32 | 33 | void printTimeSpec(timespec t) { 34 | printf("elapsed time: %d.%09d\n", (int)t.tv_sec, (int)t.tv_nsec); 35 | } 36 | 37 | timespec tic( ) 38 | { 39 | timespec start_time; 40 | clock_gettime(CLOCK_REALTIME, &start_time); 41 | return start_time; 42 | } 43 | 44 | void toc( timespec* start_time ) 45 | { 46 | timespec current_time; 47 | clock_gettime(CLOCK_REALTIME, ¤t_time); 48 | printTimeSpec( diff( *start_time, current_time ) ); 49 | *start_time = current_time; 50 | } 51 | 52 | void accTime (timespec* accSpec, timespec* oldTime) { 53 | timespec current_time; 54 | clock_gettime(CLOCK_REALTIME, ¤t_time); 55 | timespec delta = diff(*oldTime, current_time); 56 | //printTimeSpec(delta); 57 | *oldTime = current_time; 58 | *accSpec = sum(*accSpec, delta); 59 | //printTimeSpec(*accSpec); 60 | } 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/MemPeStat.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | import java.io.ObjectInputStream 22 | import java.io.ObjectOutputStream 23 | import scala.Serializable 24 | 25 | //Original data structure: mem_pestat_t in bwamem.h 26 | class MemPeStat extends Serializable { 27 | var low: Int = 0 28 | var high: Int = 0 29 | var failed: Int = 0 30 | var avg: Double = 0 31 | var std: Double = 0 32 | 33 | private def writeObject(out: ObjectOutputStream) { 34 | out.writeInt(low) 35 | out.writeInt(high) 36 | out.writeInt(failed) 37 | out.writeDouble(avg) 38 | out.writeDouble(std) 39 | } 40 | 41 | private def readObject(in: ObjectInputStream) { 42 | low = in.readInt 43 | high = in.readInt 44 | failed = in.readInt 45 | avg = in.readDouble 46 | std = in.readDouble 47 | } 48 | 49 | private def readObjectNoData() { 50 | 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/SAMString.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | class SAMString { 22 | var str: Array[Char] = new Array[Char](8192) 23 | var idx: Int = 0 24 | var size: Int = 8192 25 | 26 | def addCharArray(in: Array[Char]) { 27 | if((idx + in.size + 1) >= size) { 28 | size = size << 2 29 | val old = str 30 | str = new Array[Char](size) 31 | old.copyToArray(str, 0, idx + 1) 32 | } 33 | 34 | var i = 0 35 | while(i < in.size) { 36 | str(idx) = in(i) 37 | i += 1 38 | idx += 1 39 | } 40 | } 41 | 42 | def addChar(c: Char) { 43 | if((idx + 1) >= size) { 44 | size = size << 2 45 | val old = str 46 | str = new Array[Char](size) 47 | old.copyToArray(str, 0, idx + 1) 48 | } 49 | 50 | str(idx) = c 51 | idx += 1 52 | } 53 | 54 | override def toString = new String(str, 0, idx) 55 | 56 | } 57 | 58 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/BWTIntvType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | class BWTIntvType(startPoint_l: Int, //uint32_t 22 | endPoint_l: Int, //uint32_t 23 | k_l: Long, //uint64_t 24 | l_l: Long, //uint64_t 25 | s_l: Long) { //uint64_t 26 | 27 | // endPoint - startPoint = length of the seed 28 | var startPoint = startPoint_l 29 | var endPoint = endPoint_l 30 | 31 | //a tuple (k, l, s) stands for a bi-interval, which is consistent with Heng Li's paper 32 | var k = k_l 33 | var l = l_l 34 | var s = s_l 35 | 36 | def print() { 37 | println ("start " + startPoint + ", end " + endPoint + ", (k, l, s) (" + k + ", " + l + ", " + s + ").") 38 | } 39 | 40 | def copy(intv: BWTIntvType) { 41 | startPoint = intv.startPoint 42 | endPoint = intv.endPoint 43 | k = intv.k 44 | l = intv.l 45 | s = intv.s 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/profiling/PairEndBatchedProfile.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.profiling 20 | 21 | import scala.Serializable 22 | 23 | import java.io.ObjectInputStream 24 | import java.io.ObjectOutputStream 25 | import java.io.ObjectStreamException 26 | 27 | import cs.ucla.edu.bwaspark.datatype.PairEndReadType 28 | 29 | class PairEndBatchedProfile extends Serializable { 30 | var pairEndReadArray: Array[PairEndReadType] = _ 31 | var swBatchTimeBreakdown: SWBatchTimeBreakdown = _ 32 | 33 | private def writeObject(out: ObjectOutputStream) { 34 | out.writeObject(pairEndReadArray) 35 | out.writeObject(swBatchTimeBreakdown) 36 | } 37 | 38 | private def readObject(in: ObjectInputStream) { 39 | pairEndReadArray = in.readObject.asInstanceOf[Array[PairEndReadType]] 40 | swBatchTimeBreakdown = in.readObject.asInstanceOf[SWBatchTimeBreakdown] 41 | } 42 | 43 | private def readObjectNoData() { 44 | 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/sam/SAMHDFSWriter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.sam 20 | 21 | import java.io.BufferedWriter 22 | import java.io.OutputStreamWriter 23 | import java.nio.charset.Charset 24 | 25 | import org.apache.hadoop.conf.Configuration; 26 | import org.apache.hadoop.fs.FileSystem; 27 | import org.apache.hadoop.fs.FSDataInputStream; 28 | import org.apache.hadoop.fs.FSDataOutputStream; 29 | import org.apache.hadoop.fs.Path; 30 | 31 | 32 | class SAMHDFSWriter(outFile: String) { 33 | var writer: BufferedWriter = _ 34 | 35 | def init() { 36 | val fs = FileSystem.get(new Configuration) 37 | writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(outFile + "/header")))); 38 | } 39 | 40 | def writeString(str: String) { 41 | writer.write(str, 0, str.length) 42 | } 43 | 44 | def writeStringArray(strArray: Array[String]) { 45 | var i = 0 46 | while(i < strArray.length) { 47 | writer.write(strArray(i), 0, strArray(i).length) 48 | i += 1 49 | } 50 | writer.flush 51 | } 52 | 53 | def close() { 54 | writer.close 55 | } 56 | } 57 | 58 | 59 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/SMemItrType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | import scala.collection.mutable.MutableList 22 | 23 | //Used for smemNext function 24 | //bwt: bwt index and suffix array 25 | //query: read 26 | //start: the start point for forward and backward extension 27 | //len: the length of the read 28 | //matches: iteratively calling smemNext will accumulate this mutable list 29 | //sub: temporary bi-interval array 30 | //tmpVec0: temporary array 0 31 | //tmpVec1: temporary array 1 32 | class SMemItrType(bwt_c: BWTType, 33 | query_c: Array[Byte], //uint8_t 34 | start_i: Int, 35 | len_i: Int, 36 | matches_c: MutableList[BWTIntvType], 37 | sub_c: MutableList[BWTIntvType], 38 | tmpVec0_c: MutableList[BWTIntvType], 39 | tmpVec1_c: MutableList[BWTIntvType]) { 40 | var bwt = bwt_c 41 | var query = query_c 42 | var start = start_i 43 | var len = len_i 44 | var matches = matches_c 45 | var sub = sub_c 46 | var tmpVec0 = tmpVec0_c 47 | var tmpVec1 = tmpVec1_c 48 | } 49 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/dnaseq/MergeADAMFiles.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.dnaseq 20 | 21 | import org.apache.spark.SparkContext 22 | import org.apache.spark.rdd.RDD 23 | 24 | import org.bdgenomics.formats.avro._ 25 | import org.bdgenomics.adam.rdd.ADAMContext 26 | import org.bdgenomics.adam.rdd.ADAMContext._ 27 | 28 | import org.apache.hadoop.conf._ 29 | import org.apache.hadoop.fs._ 30 | 31 | import java.net.URI 32 | 33 | object MergeADAMFiles extends Serializable { 34 | def apply(sc: SparkContext, hdfsAddress: String, alignmentsRootPath: String, coalesceFactor: Int) = { 35 | val conf = new Configuration 36 | val fs = FileSystem.get(new URI(hdfsAddress), conf) 37 | val paths = fs.listStatus(new Path(alignmentsRootPath)).map(ele => ele.getPath) 38 | val totalFilePartitions = paths.flatMap(p => fs.listStatus(p)).map(ele => ele.getPath).size 39 | println("Total number of new file partitions" + (totalFilePartitions/coalesceFactor)) 40 | var adamRecords: RDD[AlignmentRecord] = new ADAMContext(sc).loadAlignmentsFromPaths(paths) 41 | adamRecords.coalesce(totalFilePartitions/coalesceFactor) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/native/bwa.h: -------------------------------------------------------------------------------- 1 | #ifndef BWA_H_ 2 | #define BWA_H_ 3 | 4 | #include 5 | #include "bntseq.h" 6 | #include "bwt.h" 7 | 8 | #define BWA_IDX_BWT 0x1 9 | #define BWA_IDX_BNS 0x2 10 | #define BWA_IDX_PAC 0x4 11 | #define BWA_IDX_ALL 0x7 12 | 13 | typedef struct { 14 | bwt_t *bwt; // FM-index 15 | bntseq_t *bns; // information on the reference sequences 16 | uint8_t *pac; // the actual 2-bit encoded reference sequences with 'N' converted to a random base 17 | } bwaidx_t; 18 | 19 | typedef struct { 20 | int l_seq; 21 | char *name, *comment, *seq, *qual, *sam; 22 | } bseq1_t; 23 | 24 | extern int bwa_verbose; 25 | extern char bwa_rg_id[256]; 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_); 32 | 33 | void bwa_fill_scmat(int a, int b, int8_t mat[25]); 34 | uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM); 35 | uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM); 36 | int bwa_fix_xref(const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re); 37 | int bwa_fix_xref2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re); 38 | 39 | char *bwa_idx_infer_prefix(const char *hint); 40 | bwt_t *bwa_idx_load_bwt(const char *hint); 41 | 42 | bwaidx_t *bwa_idx_load(const char *hint, int which); 43 | void bwa_idx_destroy(bwaidx_t *idx); 44 | 45 | void bwa_print_sam_hdr(const bntseq_t *bns, const char *rg_line); 46 | char *bwa_set_rg(const char *s); 47 | 48 | #ifdef __cplusplus 49 | } 50 | #endif 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/dnaseq/Sort.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.dnaseq 20 | 21 | import org.apache.spark.SparkContext 22 | import org.apache.spark.rdd.RDD 23 | 24 | import org.bdgenomics.formats.avro._ 25 | import org.bdgenomics.adam.rdd.ADAMContext 26 | import org.bdgenomics.adam.rdd.ADAMContext._ 27 | 28 | import org.apache.hadoop.conf._ 29 | import org.apache.hadoop.fs._ 30 | 31 | import java.net.URI 32 | 33 | object Sort extends Serializable { 34 | def apply(sc: SparkContext, hdfsAddress: String, alignmentsRootPath: String, coalesceFactor: Int) = { 35 | val conf = new Configuration 36 | val fs = FileSystem.get(new URI(hdfsAddress), conf) 37 | val paths = fs.listStatus(new Path(alignmentsRootPath)).map(ele => ele.getPath) 38 | val totalFilePartitions = paths.flatMap(p => fs.listStatus(p)).map(ele => ele.getPath).size 39 | println("Total number of new file partitions" + (totalFilePartitions/coalesceFactor)) 40 | var adamRecords: RDD[AlignmentRecord] = new ADAMContext(sc).loadAlignmentsFromPaths(paths) 41 | adamRecords.coalesce(totalFilePartitions/coalesceFactor).adamSortReadsByReferencePosition() 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/MemAlnType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | class MemAlnType { 22 | var pos: Long = 0 // forward strand 5'-end mapping position 23 | var rid: Int = 0 // reference sequence index in bntseq_t; <0 for unmapped 24 | var flag: Int = 0 // extra flag 25 | var isRev: Byte = 0 // is_rev: whether on the reverse strand 26 | var mapq: Short = 0 // mapq: mapping quality 27 | var NM: Int = 0 // NM: edit distance 28 | var nCigar: Int = 0 // number of CIGAR operations 29 | var cigar: CigarType = _ // CIGAR in the BAM encoding: opLen<<4|op; op to integer mapping: MIDSH=>01234 30 | var score: Int = 0 31 | var sub: Int = 0 32 | 33 | /** 34 | * Make a copy of the current object 35 | */ 36 | def copy(): MemAlnType = { 37 | var aln = new MemAlnType 38 | aln.pos = pos 39 | aln.rid = rid 40 | aln.flag = flag 41 | aln.isRev = isRev 42 | aln.mapq = mapq 43 | aln.NM = NM 44 | aln.nCigar = nCigar 45 | if(nCigar > 0) 46 | aln.cigar = cigar.copy 47 | aln.score = score 48 | aln.sub = sub 49 | aln 50 | } 51 | } 52 | 53 | -------------------------------------------------------------------------------- /src/main/perl/cal_aligned_percentage.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | 3 | # * 4 | # * Licensed to the Apache Software Foundation (ASF) under one or more 5 | # * contributor license agreements. See the NOTICE file distributed with 6 | # * this work for additional information regarding copyright ownership. 7 | # * The ASF licenses this file to You under the Apache License, Version 2.0 8 | # * (the "License"); you may not use this file except in compliance with 9 | # * the License. You may obtain a copy of the License at 10 | # * 11 | # * http://www.apache.org/licenses/LICENSE-2.0 12 | # * 13 | # * Unless required by applicable law or agreed to in writing, software 14 | # * distributed under the License is distributed on an "AS IS" BASIS, 15 | # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # * See the License for the specific language governing permissions and 17 | # * limitations under the License. 18 | # * 19 | 20 | 21 | # input SAM file 22 | $sam = $ARGV[0]; 23 | $is_pair_end = $ARGV[1]; 24 | $aligned_num = 0; 25 | $total_read_num = 0; 26 | $prev_read = ""; 27 | 28 | open IN, $sam or die "cannot open $sam\n"; 29 | 30 | while() { 31 | if($_ =~ /^([a-zA-Z0-9_\:\@\/]+)\t(\d+)/) { 32 | if($is_pair_end == 1) { 33 | $is_secondary = $2 & 0x100; 34 | 35 | if(($is_secondary == 0) || ($1 ne $prev_read)) { 36 | if(($2 & 0x4) == 0) { 37 | $aligned_num++; 38 | } 39 | $total_read_num++; 40 | } 41 | 42 | $prev_read = $1; 43 | } 44 | else { 45 | if(($1 ne $prev_read)) { 46 | if(($2 & 0x4) == 0) { 47 | $aligned_num++; 48 | } 49 | $total_read_num++; 50 | } 51 | 52 | $prev_read = $1; 53 | } 54 | } 55 | 56 | } 57 | 58 | close IN; 59 | 60 | $percentage = ($aligned_num / $total_read_num) * 100; 61 | 62 | print "Input file: " . $sam . "\n"; 63 | print "Total # of reads: " . $total_read_num . "\n"; 64 | print "Total # of aligned reads: " . $aligned_num . "\n"; 65 | print "Aligned (%): " . $percentage . "%\n"; 66 | 67 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/BNTAnnType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | import java.io.ObjectInputStream 22 | import java.io.ObjectOutputStream 23 | import scala.Serializable 24 | 25 | class BNTAnnType (offset_l: Long, 26 | len_i: Int, 27 | n_ambs_i: Int, 28 | gi_i: Int, //uint32_t 29 | name_s: String, 30 | anno_s: String) extends Serializable { 31 | var offset = offset_l 32 | var len = len_i 33 | var n_ambs = n_ambs_i 34 | var gi = gi_i 35 | var name = name_s 36 | var anno = anno_s 37 | 38 | private def writeObject(out: ObjectOutputStream) { 39 | out.writeLong(offset) 40 | out.writeInt(len) 41 | out.writeInt(n_ambs) 42 | out.writeInt(gi) 43 | out.writeObject(name) 44 | out.writeObject(anno) 45 | } 46 | 47 | private def readObject(in: ObjectInputStream) { 48 | offset = in.readLong 49 | len = in.readInt 50 | n_ambs = in.readInt 51 | gi = in.readInt 52 | name = in.readObject.asInstanceOf[String] 53 | anno = in.readObject.asInstanceOf[String] 54 | } 55 | 56 | private def readObjectNoData() { 57 | 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /src/main/python/sam_single_end_permutation_inplace.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | 3 | # * 4 | # * Licensed to the Apache Software Foundation (ASF) under one or more 5 | # * contributor license agreements. See the NOTICE file distributed with 6 | # * this work for additional information regarding copyright ownership. 7 | # * The ASF licenses this file to You under the Apache License, Version 2.0 8 | # * (the "License"); you may not use this file except in compliance with 9 | # * the License. You may obtain a copy of the License at 10 | # * 11 | # * http://www.apache.org/licenses/LICENSE-2.0 12 | # * 13 | # * Unless required by applicable law or agreed to in writing, software 14 | # * distributed under the License is distributed on an "AS IS" BASIS, 15 | # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # * See the License for the specific language governing permissions and 17 | # * limitations under the License. 18 | # * 19 | 20 | import random 21 | import sys 22 | 23 | infile = sys.argv[1] # input file name 24 | outfile = sys.argv[2] # output file name 25 | 26 | class FASTQ: 27 | name = '' 28 | seq = '' 29 | comment = '' 30 | quality = '' 31 | 32 | i = 0 33 | 34 | fastqList = [] 35 | 36 | file = open(infile, 'r') 37 | for line in file: 38 | if i % 4 == 0: 39 | inStr = FASTQ() 40 | inStr.name = line 41 | elif i % 4 == 1: 42 | inStr.seq = line 43 | elif i % 4 == 2: 44 | inStr.comment = line 45 | else: 46 | inStr.quality = line 47 | fastqList.append(inStr) 48 | i += 1 49 | file.close() 50 | 51 | # permute fastqList 52 | fastqListLen = len(fastqList) 53 | for i in range(fastqListLen): 54 | j = random.randint(i, fastqListLen - 1) 55 | tmp = fastqList[i] 56 | fastqList[i] = fastqList[j] 57 | fastqList[j] = tmp 58 | 59 | # dump output permuted FASTQ file 60 | file = open(outfile, 'w') 61 | for fq in fastqList: 62 | file.write(fq.name) 63 | file.write(fq.seq) 64 | file.write(fq.comment) 65 | file.write(fq.quality) 66 | file.close() 67 | 68 | -------------------------------------------------------------------------------- /src/main/java/accUCLA/accAPI/big2LittleEndian.java: -------------------------------------------------------------------------------- 1 | 2 | package accUCLA.api; 3 | 4 | import java.lang.Float; 5 | 6 | public class big2LittleEndian { 7 | 8 | public static int Int(int i) { 9 | int b0,b1,b2,b3; 10 | 11 | b0 = (i&0xff)>>0; 12 | b1 = (i&0xff00)>>8; 13 | b2 = (i&0xff0000)>>16; 14 | b3 = (i&0xff000000)>>24; 15 | 16 | return ((b0<<24)|(b1<<16)|(b2<<8)|(b3<<0)); 17 | } 18 | public static byte[] Float(float f) { 19 | int floatBits = Float.floatToIntBits(f); 20 | byte floatBytes[] = new byte[4]; 21 | floatBytes[3] = (byte)(floatBits>>24 & 0xff); 22 | floatBytes[2] = (byte)(floatBits>>16 & 0xff); 23 | floatBytes[1] = (byte)(floatBits>>8 & 0xff); 24 | floatBytes[0] = (byte)(floatBits & 0xff); 25 | return floatBytes; 26 | } 27 | 28 | public static byte[] floatArray(float[] f) { 29 | int len = f.length; 30 | byte floatBytes[] = new byte[4*len]; 31 | for (int i = 0; i < len; i++) { 32 | int floatBits = Float.floatToIntBits(f[i]); 33 | floatBytes[4*i + 3] = (byte)(floatBits>>24 & 0xff); 34 | floatBytes[4*i + 2] = (byte)(floatBits>>16 & 0xff); 35 | floatBytes[4*i + 1] = (byte)(floatBits>>8 & 0xff); 36 | floatBytes[4*i + 0] = (byte)(floatBits & 0xff); 37 | } 38 | return floatBytes; 39 | } 40 | 41 | public static byte[] IntArray(int[] f) { 42 | int len = f.length; 43 | byte intBytes[] = new byte[4*len]; 44 | for (int i = 0; i < len; i++) { 45 | intBytes[4*i + 3] = (byte)(f[i]>>24 & 0xff); 46 | intBytes[4*i + 2] = (byte)(f[i]>>16 & 0xff); 47 | intBytes[4*i + 1] = (byte)(f[i]>>8 & 0xff); 48 | intBytes[4*i + 0] = (byte)(f[i] & 0xff); 49 | } 50 | return intBytes; 51 | } 52 | 53 | public static byte[] floatArray(float[][] f) { 54 | int len1 = f.length; 55 | int len2 = f[0].length; 56 | float[] a = new float[len1*len2]; 57 | for (int i = 0; i < len1; i++) 58 | { 59 | System.arraycopy(f[i],0,a,i*len2,len2); 60 | } 61 | return floatArray(a); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/ReadType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | import scala.Serializable 22 | 23 | import org.apache.avro.io._ 24 | import org.apache.avro.specific.SpecificDatumReader 25 | import org.apache.avro.specific.SpecificDatumWriter 26 | 27 | import cs.ucla.edu.avro.fastq._ 28 | 29 | import java.io.ObjectInputStream 30 | import java.io.ObjectOutputStream 31 | import java.io.ObjectStreamException 32 | 33 | class ReadType extends Serializable { 34 | var seq: FASTQRecord = _ 35 | var regs: Array[MemAlnRegType] = _ 36 | 37 | private def writeObject(out: ObjectOutputStream) { 38 | out.writeObject(regs) 39 | val writer = new SpecificDatumWriter[FASTQRecord](classOf[FASTQRecord]) 40 | val encoder = EncoderFactory.get.binaryEncoder(out, null) 41 | writer.write(seq, encoder) 42 | encoder.flush() 43 | } 44 | 45 | private def readObject(in: ObjectInputStream) { 46 | regs = in.readObject.asInstanceOf[Array[MemAlnRegType]] 47 | val reader = new SpecificDatumReader[FASTQRecord](classOf[FASTQRecord]); 48 | val decoder = DecoderFactory.get.binaryDecoder(in, null); 49 | seq = reader.read(null, decoder).asInstanceOf[FASTQRecord] 50 | } 51 | 52 | private def readObjectNoData() { 53 | 54 | } 55 | } 56 | 57 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/broadcast/ReferenceBroadcast.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.broadcast 20 | 21 | import java.io._ 22 | 23 | import org.apache.spark.broadcast.Broadcast 24 | import org.apache.spark.SparkException 25 | import org.apache.spark.Logging 26 | import org.apache.spark.util.Utils 27 | 28 | import scala.reflect.ClassTag 29 | 30 | import cs.ucla.edu.bwaspark.datatype.BWAIdxType 31 | 32 | class ReferenceBroadcast(bd: Broadcast[BWAIdxType], isFromLocal: Boolean, path: String) extends Serializable { 33 | lazy val reference: BWAIdxType = init() 34 | 35 | private def init(): BWAIdxType = { 36 | this.synchronized { 37 | if(isFromLocal) { 38 | val ref = new BWAIdxType 39 | println("Load reference genome") 40 | ref.load(path, 0) 41 | ref 42 | } 43 | else 44 | bd.value 45 | } 46 | } 47 | 48 | def value() = { 49 | reference 50 | } 51 | } 52 | 53 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/PairEndReadType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | import scala.Serializable 22 | 23 | import org.apache.avro.io._ 24 | import org.apache.avro.specific.SpecificDatumReader 25 | import org.apache.avro.specific.SpecificDatumWriter 26 | 27 | import cs.ucla.edu.avro.fastq._ 28 | 29 | import java.io.ObjectInputStream 30 | import java.io.ObjectOutputStream 31 | import java.io.ObjectStreamException 32 | 33 | class PairEndReadType extends Serializable { 34 | var seq0: FASTQRecord = _ 35 | var regs0: Array[MemAlnRegType] = _ 36 | var seq1: FASTQRecord = _ 37 | var regs1: Array[MemAlnRegType] = _ 38 | 39 | private def writeObject(out: ObjectOutputStream) { 40 | out.writeObject(regs0) 41 | out.writeObject(regs1) 42 | val writer = new SpecificDatumWriter[FASTQRecord](classOf[FASTQRecord]) 43 | val encoder = EncoderFactory.get.binaryEncoder(out, null) 44 | writer.write(seq0, encoder) 45 | writer.write(seq1, encoder) 46 | encoder.flush() 47 | } 48 | 49 | private def readObject(in: ObjectInputStream) { 50 | regs0 = in.readObject.asInstanceOf[Array[MemAlnRegType]] 51 | regs1 = in.readObject.asInstanceOf[Array[MemAlnRegType]] 52 | val reader = new SpecificDatumReader[FASTQRecord](classOf[FASTQRecord]); 53 | val decoder = DecoderFactory.get.binaryDecoder(in, null); 54 | seq0 = reader.read(null, decoder).asInstanceOf[FASTQRecord] 55 | seq1 = reader.read(null, decoder).asInstanceOf[FASTQRecord] 56 | } 57 | 58 | private def readObjectNoData() { 59 | 60 | } 61 | } 62 | 63 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/fastq/SerializablePairEndFASTQRecord.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.fastq; 20 | 21 | import cs.ucla.edu.avro.fastq.*; 22 | import org.apache.avro.io.*; 23 | import org.apache.avro.specific.SpecificDatumReader; 24 | import org.apache.avro.specific.SpecificDatumWriter; 25 | 26 | import java.io.IOException; 27 | import java.io.ObjectStreamException; 28 | import java.io.Serializable; 29 | 30 | /** 31 | * For now, Spark does not support Avro. This class is just a quick 32 | * workaround that (de)serializes PairEndFASTQRecord objects using Avro. 33 | */ 34 | public class SerializablePairEndFASTQRecord extends PairEndFASTQRecord implements Serializable { 35 | 36 | private void setValues(PairEndFASTQRecord rec) { 37 | setSeq0(rec.getSeq0()); 38 | setSeq1(rec.getSeq1()); 39 | } 40 | 41 | public SerializablePairEndFASTQRecord(PairEndFASTQRecord rec) { 42 | setValues(rec); 43 | } 44 | 45 | private void writeObject(java.io.ObjectOutputStream out) 46 | throws IOException { 47 | DatumWriter writer = new SpecificDatumWriter(PairEndFASTQRecord.class); 48 | Encoder encoder = EncoderFactory.get().binaryEncoder(out, null); 49 | writer.write(this, encoder); 50 | encoder.flush(); 51 | } 52 | 53 | private void readObject(java.io.ObjectInputStream in) 54 | throws IOException, ClassNotFoundException { 55 | DatumReader reader = 56 | new SpecificDatumReader(PairEndFASTQRecord.class); 57 | Decoder decoder = DecoderFactory.get().binaryDecoder(in, null); 58 | setValues(reader.read(null, decoder)); 59 | } 60 | 61 | private void readObjectNoData() 62 | throws ObjectStreamException { 63 | } 64 | 65 | } 66 | 67 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/fastq/SerializableFASTQRecord.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.fastq; 20 | 21 | import cs.ucla.edu.avro.fastq.*; 22 | import org.apache.avro.io.*; 23 | import org.apache.avro.specific.SpecificDatumReader; 24 | import org.apache.avro.specific.SpecificDatumWriter; 25 | 26 | import java.io.IOException; 27 | import java.io.ObjectStreamException; 28 | import java.io.Serializable; 29 | 30 | /** 31 | * For now, Spark does not support Avro. This class is just a quick 32 | * workaround that (de)serializes FASTQRecord objects using Avro. 33 | */ 34 | public class SerializableFASTQRecord extends FASTQRecord implements Serializable { 35 | 36 | private void setValues(FASTQRecord rec) { 37 | setName(rec.getName()); 38 | setSeq(rec.getSeq()); 39 | setSeqLength(rec.getSeqLength()); 40 | setQuality(rec.getQuality()); 41 | setComment(rec.getComment()); 42 | } 43 | 44 | public SerializableFASTQRecord(FASTQRecord rec) { 45 | setValues(rec); 46 | } 47 | 48 | private void writeObject(java.io.ObjectOutputStream out) 49 | throws IOException { 50 | DatumWriter writer = new SpecificDatumWriter(FASTQRecord.class); 51 | Encoder encoder = EncoderFactory.get().binaryEncoder(out, null); 52 | writer.write(this, encoder); 53 | encoder.flush(); 54 | } 55 | 56 | private void readObject(java.io.ObjectInputStream in) 57 | throws IOException, ClassNotFoundException { 58 | DatumReader reader = 59 | new SpecificDatumReader(FASTQRecord.class); 60 | Decoder decoder = DecoderFactory.get().binaryDecoder(in, null); 61 | setValues(reader.read(null, decoder)); 62 | } 63 | 64 | private void readObjectNoData() 65 | throws ObjectStreamException { 66 | } 67 | 68 | } 69 | 70 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/MemAlnRegType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | import java.io.ObjectInputStream 22 | import java.io.ObjectOutputStream 23 | import scala.Serializable 24 | 25 | class MemAlnRegType extends Serializable { 26 | var rBeg: Long = 0 // [rBeg,rEnd): reference sequence in the alignment 27 | var rEnd: Long = 0 28 | var qBeg: Int = 0 // [qBeg,qEnd): query sequence in the alignment 29 | var qEnd: Int = 0 30 | var score: Int = 0 // best local SW score 31 | var trueScore: Int = 0 // actual score corresponding to the aligned region; possibly smaller than $score 32 | var sub: Int = 0 // 2nd best SW score 33 | var csub: Int = 0 // SW score of a tandem hit 34 | var subNum: Int = 0 // approximate number of suboptimal hits 35 | var width: Int = 0 // actual band width used in extension 36 | var seedCov: Int = 0 // length of regions coverged by seeds 37 | var secondary: Int = 0 // index of the parent hit shadowing the current hit; <0 if primary 38 | var hash: Long = 0 39 | 40 | private def writeObject(out: ObjectOutputStream) { 41 | out.writeLong(rBeg) 42 | out.writeLong(rEnd) 43 | out.writeInt(qBeg) 44 | out.writeInt(qEnd) 45 | out.writeInt(score) 46 | out.writeInt(trueScore) 47 | out.writeInt(sub) 48 | out.writeInt(csub) 49 | out.writeInt(subNum) 50 | out.writeInt(width) 51 | out.writeInt(seedCov) 52 | out.writeInt(secondary) 53 | out.writeLong(hash) 54 | } 55 | 56 | private def readObject(in: ObjectInputStream) { 57 | rBeg = in.readLong 58 | rEnd = in.readLong 59 | qBeg = in.readInt 60 | qEnd = in.readInt 61 | score = in.readInt 62 | trueScore = in.readInt 63 | sub = in.readInt 64 | csub = in.readInt 65 | subNum = in.readInt 66 | width = in.readInt 67 | seedCov = in.readInt 68 | secondary = in.readInt 69 | hash = in.readLong 70 | } 71 | 72 | private def readObjectNoData() { 73 | 74 | } 75 | 76 | } 77 | 78 | -------------------------------------------------------------------------------- /src/main/native/bntseq.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008 Genome Research Ltd (GRL). 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | /* Contact: Heng Li */ 27 | 28 | #ifndef BWT_BNTSEQ_H 29 | #define BWT_BNTSEQ_H 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | #ifndef BWA_UBYTE 36 | #define BWA_UBYTE 37 | typedef uint8_t ubyte_t; 38 | #endif 39 | 40 | typedef struct { 41 | int64_t offset; 42 | int32_t len; 43 | int32_t n_ambs; 44 | uint32_t gi; 45 | char *name, *anno; 46 | } bntann1_t; 47 | 48 | typedef struct { 49 | int64_t offset; 50 | int32_t len; 51 | char amb; 52 | } bntamb1_t; 53 | 54 | typedef struct { 55 | int64_t l_pac; 56 | int32_t n_seqs; 57 | uint32_t seed; 58 | bntann1_t *anns; // n_seqs elements 59 | int32_t n_holes; 60 | bntamb1_t *ambs; // n_holes elements 61 | FILE *fp_pac; 62 | } bntseq_t; 63 | 64 | extern unsigned char nst_nt4_table[256]; 65 | 66 | #ifdef __cplusplus 67 | extern "C" { 68 | #endif 69 | 70 | void bns_dump(const bntseq_t *bns, const char *prefix); 71 | bntseq_t *bns_restore(const char *prefix); 72 | bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, const char* pac_filename); 73 | void bns_destroy(bntseq_t *bns); 74 | int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only); 75 | int bns_pos2rid(const bntseq_t *bns, int64_t pos_f); 76 | int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id); 77 | uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end, int64_t *len); 78 | 79 | #ifdef __cplusplus 80 | } 81 | #endif 82 | 83 | static inline int64_t bns_depos(const bntseq_t *bns, int64_t pos, int *is_rev) 84 | { 85 | return (*is_rev = (pos >= bns->l_pac))? (bns->l_pac<<1) - 1 - pos : pos; 86 | } 87 | 88 | #endif 89 | -------------------------------------------------------------------------------- /src/main/native/kstring.h: -------------------------------------------------------------------------------- 1 | #ifndef KSTRING_H 2 | #define KSTRING_H 3 | 4 | #include 5 | #include 6 | 7 | #ifdef USE_MALLOC_WRAPPERS 8 | # include "malloc_wrap.h" 9 | #endif 10 | 11 | #ifndef kroundup32 12 | #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 13 | #endif 14 | 15 | #ifndef KSTRING_T 16 | #define KSTRING_T kstring_t 17 | typedef struct __kstring_t { 18 | size_t l, m; 19 | char *s; 20 | } kstring_t; 21 | #endif 22 | 23 | static inline void ks_resize(kstring_t *s, size_t size) 24 | { 25 | if (s->m < size) { 26 | s->m = size; 27 | kroundup32(s->m); 28 | s->s = (char*)realloc(s->s, s->m); 29 | } 30 | } 31 | 32 | static inline int kputsn(const char *p, int l, kstring_t *s) 33 | { 34 | if (s->l + l + 1 >= s->m) { 35 | s->m = s->l + l + 2; 36 | kroundup32(s->m); 37 | s->s = (char*)realloc(s->s, s->m); 38 | } 39 | memcpy(s->s + s->l, p, l); 40 | s->l += l; 41 | s->s[s->l] = 0; 42 | return l; 43 | } 44 | 45 | static inline int kputs(const char *p, kstring_t *s) 46 | { 47 | return kputsn(p, strlen(p), s); 48 | } 49 | 50 | static inline int kputc(int c, kstring_t *s) 51 | { 52 | if (s->l + 1 >= s->m) { 53 | s->m = s->l + 2; 54 | kroundup32(s->m); 55 | s->s = (char*)realloc(s->s, s->m); 56 | } 57 | s->s[s->l++] = c; 58 | s->s[s->l] = 0; 59 | return c; 60 | } 61 | 62 | static inline int kputw(int c, kstring_t *s) 63 | { 64 | char buf[16]; 65 | int l, x; 66 | if (c == 0) return kputc('0', s); 67 | for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0'; 68 | if (c < 0) buf[l++] = '-'; 69 | if (s->l + l + 1 >= s->m) { 70 | s->m = s->l + l + 2; 71 | kroundup32(s->m); 72 | s->s = (char*)realloc(s->s, s->m); 73 | } 74 | for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x]; 75 | s->s[s->l] = 0; 76 | return 0; 77 | } 78 | 79 | static inline int kputuw(unsigned c, kstring_t *s) 80 | { 81 | char buf[16]; 82 | int l, i; 83 | unsigned x; 84 | if (c == 0) return kputc('0', s); 85 | for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0'; 86 | if (s->l + l + 1 >= s->m) { 87 | s->m = s->l + l + 2; 88 | kroundup32(s->m); 89 | s->s = (char*)realloc(s->s, s->m); 90 | } 91 | for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i]; 92 | s->s[s->l] = 0; 93 | return 0; 94 | } 95 | 96 | static inline int kputl(long c, kstring_t *s) 97 | { 98 | char buf[32]; 99 | long l, x; 100 | if (c == 0) return kputc('0', s); 101 | for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0'; 102 | if (c < 0) buf[l++] = '-'; 103 | if (s->l + l + 1 >= s->m) { 104 | s->m = s->l + l + 2; 105 | kroundup32(s->m); 106 | s->s = (char*)realloc(s->s, s->m); 107 | } 108 | for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x]; 109 | s->s[s->l] = 0; 110 | return 0; 111 | } 112 | 113 | int ksprintf(kstring_t *s, const char *fmt, ...); 114 | 115 | #endif 116 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/profiling/SWBatchTimeBreakdown.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.profiling 20 | 21 | import scala.Serializable 22 | 23 | import java.io.ObjectInputStream 24 | import java.io.ObjectOutputStream 25 | import java.io.ObjectStreamException 26 | 27 | class SWBatchTimeBreakdown extends Serializable { 28 | var isFPGA: Boolean = true 29 | var initSWBatchTime: Long = 0 30 | var SWBatchRuntime: Long = 0 31 | var SWBatchOnFPGA: Long = 0 32 | var postProcessSWBatchTime: Long = 0 33 | var FPGADataPreProcTime: Long = 0 34 | var FPGARoutineRuntime: Long = 0 35 | var FPGADataPostProcTime: Long = 0 36 | var FPGATaskNum: Long = 0 37 | var CPUTaskNum: Long = 0 38 | var generatedChainTime: Long = 0 39 | var filterChainTime: Long = 0 40 | var chainToAlnTime: Long = 0 41 | var sortAndDedupTime: Long = 0 42 | 43 | private def writeObject(out: ObjectOutputStream) { 44 | out.writeBoolean(isFPGA) 45 | out.writeLong(initSWBatchTime) 46 | out.writeLong(SWBatchRuntime) 47 | out.writeLong(SWBatchOnFPGA) 48 | out.writeLong(postProcessSWBatchTime) 49 | out.writeLong(FPGADataPreProcTime) 50 | out.writeLong(FPGARoutineRuntime) 51 | out.writeLong(FPGADataPostProcTime) 52 | out.writeLong(FPGATaskNum) 53 | out.writeLong(CPUTaskNum) 54 | out.writeLong(generatedChainTime) 55 | out.writeLong(filterChainTime) 56 | out.writeLong(chainToAlnTime) 57 | out.writeLong(sortAndDedupTime) 58 | } 59 | 60 | private def readObject(in: ObjectInputStream) { 61 | isFPGA = in.readBoolean 62 | initSWBatchTime = in.readLong 63 | SWBatchRuntime = in.readLong 64 | SWBatchOnFPGA = in.readLong 65 | postProcessSWBatchTime = in.readLong 66 | FPGADataPreProcTime = in.readLong 67 | FPGARoutineRuntime = in.readLong 68 | FPGADataPreProcTime = in.readLong 69 | FPGATaskNum = in.readLong 70 | CPUTaskNum = in.readLong 71 | generatedChainTime = in.readLong 72 | filterChainTime = in.readLong 73 | chainToAlnTime = in.readLong 74 | sortAndDedupTime = in.readLong 75 | } 76 | 77 | private def readObjectNoData() { 78 | 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/main/python/sam_pair_end_permutation_inplace.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | 3 | # * 4 | # * Licensed to the Apache Software Foundation (ASF) under one or more 5 | # * contributor license agreements. See the NOTICE file distributed with 6 | # * this work for additional information regarding copyright ownership. 7 | # * The ASF licenses this file to You under the Apache License, Version 2.0 8 | # * (the "License"); you may not use this file except in compliance with 9 | # * the License. You may obtain a copy of the License at 10 | # * 11 | # * http://www.apache.org/licenses/LICENSE-2.0 12 | # * 13 | # * Unless required by applicable law or agreed to in writing, software 14 | # * distributed under the License is distributed on an "AS IS" BASIS, 15 | # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # * See the License for the specific language governing permissions and 17 | # * limitations under the License. 18 | # * 19 | 20 | import random 21 | import sys 22 | 23 | infile1 = sys.argv[1] # input file name: pair-end 1 24 | infile2 = sys.argv[2] # input file name: pair-end 2 25 | outfile1 = sys.argv[3] # output file name: pair-end 1 26 | outfile2 = sys.argv[4] # output file name: pair-end 2 27 | 28 | # FASTQ definition 29 | class FASTQ: 30 | name = '' 31 | seq = '' 32 | comment = '' 33 | quality = '' 34 | 35 | fastqList1 = [] 36 | fastqList2 = [] 37 | 38 | # read two FASTQ files 39 | i = 0 40 | file = open(infile1, 'r') 41 | for line in file: 42 | if i % 4 == 0: 43 | inStr = FASTQ() 44 | inStr.name = line 45 | elif i % 4 == 1: 46 | inStr.seq = line 47 | elif i % 4 == 2: 48 | inStr.comment = line 49 | else: 50 | inStr.quality = line 51 | fastqList1.append(inStr) 52 | i += 1 53 | file.close() 54 | 55 | i = 0 56 | file = open(infile2, 'r') 57 | for line in file: 58 | if i % 4 == 0: 59 | inStr = FASTQ() 60 | inStr.name = line 61 | elif i % 4 == 1: 62 | inStr.seq = line 63 | elif i % 4 == 2: 64 | inStr.comment = line 65 | else: 66 | inStr.quality = line 67 | fastqList2.append(inStr) 68 | i += 1 69 | file.close() 70 | 71 | # permute fastqList1 and fastqList2 together! 72 | if len(fastqList1) != len(fastqList2): 73 | print 'Error: different numbers of reads on two FASTQ files' 74 | else: 75 | fastqListLen = len(fastqList1) 76 | for i in range(fastqListLen): 77 | j = random.randint(i, fastqListLen - 1) 78 | tmp = fastqList1[i] 79 | fastqList1[i] = fastqList1[j] 80 | fastqList1[j] = tmp 81 | tmp = fastqList2[i] 82 | fastqList2[i] = fastqList2[j] 83 | fastqList2[j] = tmp 84 | 85 | # dump output permuted FASTQ files 86 | file = open(outfile1, 'w') 87 | for fq in fastqList1: 88 | file.write(fq.name) 89 | file.write(fq.seq) 90 | file.write(fq.comment) 91 | file.write(fq.quality) 92 | file.close() 93 | 94 | file = open(outfile2, 'w') 95 | for fq in fastqList2: 96 | file.write(fq.name) 97 | file.write(fq.seq) 98 | file.write(fq.comment) 99 | file.write(fq.quality) 100 | file.close() 101 | 102 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/ExtensionParameters.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | class ExtParam() { 22 | var leftQs: Array[Byte] = _ 23 | var leftQlen: Int = -1 24 | var leftRs: Array[Byte] = _ 25 | var leftRlen: Int = -1 26 | var rightQs: Array[Byte] = _ 27 | var rightQlen: Int = -1 28 | var rightRs: Array[Byte] = _ 29 | var rightRlen: Int = -1 30 | var w: Int = -1 31 | var mat: Array[Byte] = _ 32 | var oDel: Int = -1 33 | var eDel: Int = -1 34 | var oIns: Int = -1 35 | var eIns: Int = -1 36 | var penClip5: Int = -1 37 | var penClip3: Int = -1 38 | var zdrop: Int = -1 39 | var h0: Int = -1 40 | var regScore: Int = -1 41 | var qBeg: Int = -1; 42 | //var rBeg: Long = -1l; 43 | //var qe: Int = -1; 44 | //var re: Long = -1l; 45 | var idx: Int = -1 46 | //var rmax0: Long = -1l 47 | def display() { 48 | println("leftQlen: " + leftQlen) 49 | if (leftQlen > 0) leftQs.foreach(ele => {print(ele + " ")}) 50 | println() 51 | println("leftRlen: " + leftRlen) 52 | if (leftRlen > 0) leftRs.foreach(ele => {print(ele + " ")}) 53 | println() 54 | println("rightQlen: " + rightQlen) 55 | if (rightQlen > 0 ) rightQs.foreach(ele => {print(ele + " ")}) 56 | println() 57 | println("rightRlen: " + rightRlen) 58 | if (rightRlen > 0) rightRs.foreach(ele => {print(ele + " ")}) 59 | println() 60 | println("w: " + w) 61 | println("oDel: " + oDel) 62 | println("eDel: " + eDel) 63 | println("oIns: " + oIns) 64 | println("eIns: " + eIns) 65 | println("penClip5: " + penClip5) 66 | println("penClip3: " + penClip3) 67 | println("zdrop: " + zdrop) 68 | println("h0: " + h0) 69 | println("regScore: " + regScore) 70 | println("qBeg: " + qBeg) 71 | //println("rBeg: " + rBeg) 72 | //println("qe: " + qe) 73 | //println("re: " + re) 74 | println("idx: " + idx) 75 | //println("rmax0: " + rmax0) 76 | } 77 | } 78 | 79 | class ExtRet() { 80 | var qBeg: Int = -1 81 | var rBeg: Long = -1 82 | var qEnd: Int = -1 83 | var rEnd: Long = -1 84 | var score: Int = -1 85 | var trueScore: Int = -1 86 | var width: Int = -1 87 | var idx: Int = -1 88 | def display() { 89 | println("qBeg: " + qBeg) 90 | println("rBeg: " + rBeg) 91 | println("qEnd: " + qEnd) 92 | println("rEnd: " + rEnd) 93 | println("score: " + score) 94 | println("trueScore: " + trueScore) 95 | println("width: " + width) 96 | println("idx: " + idx) 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/main/native/kvec.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008, by Attractive Chaos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | /* 27 | An example: 28 | 29 | #include "kvec.h" 30 | int main() { 31 | kvec_t(int) array; 32 | kv_init(array); 33 | kv_push(int, array, 10); // append 34 | kv_a(int, array, 20) = 5; // dynamic 35 | kv_A(array, 20) = 4; // static 36 | kv_destroy(array); 37 | return 0; 38 | } 39 | */ 40 | 41 | /* 42 | 2008-09-22 (0.1.0): 43 | 44 | * The initial version. 45 | 46 | */ 47 | 48 | #ifndef AC_KVEC_H 49 | #define AC_KVEC_H 50 | 51 | #include 52 | 53 | #ifdef USE_MALLOC_WRAPPERS 54 | # include "malloc_wrap.h" 55 | #endif 56 | 57 | #define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 58 | 59 | #define kvec_t(type) struct { size_t n, m; type *a; } 60 | #define kv_init(v) ((v).n = (v).m = 0, (v).a = 0) 61 | #define kv_destroy(v) free((v).a) 62 | #define kv_A(v, i) ((v).a[(i)]) 63 | #define kv_pop(v) ((v).a[--(v).n]) 64 | #define kv_size(v) ((v).n) 65 | #define kv_max(v) ((v).m) 66 | 67 | #define kv_resize(type, v, s) ((v).m = (s), (v).a = (type*)realloc((v).a, sizeof(type) * (v).m)) 68 | 69 | #define kv_copy(type, v1, v0) do { \ 70 | if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n); \ 71 | (v1).n = (v0).n; \ 72 | memcpy((v1).a, (v0).a, sizeof(type) * (v0).n); \ 73 | } while (0) \ 74 | 75 | #define kv_push(type, v, x) do { \ 76 | if ((v).n == (v).m) { \ 77 | (v).m = (v).m? (v).m<<1 : 2; \ 78 | (v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \ 79 | } \ 80 | (v).a[(v).n++] = (x); \ 81 | } while (0) 82 | 83 | #define kv_pushp(type, v) ((((v).n == (v).m)? \ 84 | ((v).m = ((v).m? (v).m<<1 : 2), \ 85 | (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ 86 | : 0), &(v).a[(v).n++]) 87 | 88 | #define kv_a(type, v, i) (((v).m <= (size_t)(i)? \ 89 | ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \ 90 | (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ 91 | : (v).n <= (size_t)(i)? (v).n = (i) + 1 \ 92 | : 0), (v).a[(i)]) 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /src/main/jni_fpga/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | 5 | cs.ucla.edu 6 | jniSWExtend 7 | 0.2.2 8 | JNI Smith-Waterman Extend on FPGA 9 | 10 | so 11 | 12 | 13 | 14 | cs.ucla.edu 15 | cloud-scale-bwamem 16 | 0.2.2 17 | jar 18 | system 19 | /curr/pengwei/github/cloud-scale-bwamem/target/cloud-scale-bwamem-0.2.2.jar 20 | 21 | 22 | 23 | 24 | 25 | 26 | maven-compiler-plugin 27 | 28 | 1.6 29 | 1.6 30 | 31 | 32 | 33 | 34 | org.codehaus.mojo 35 | native-maven-plugin 36 | true 37 | 38 | 39 | linux 40 | 41 | generic-classic 42 | gcc 43 | 44 | gcc 45 | 46 | 47 | ./ 48 | 49 | sw_extend_fpga.c 50 | 51 | 52 | 53 | 54 | 55 | -s -fPIC -O2 56 | 57 | 58 | 59 | -shared -lrt 60 | 61 | 62 | 63 | 64 | 65 | 66 | javah 67 | generate-sources 68 | 69 | linux 70 | default 71 | 72 | 73 | ../../../target/custom-javah 74 | ../../../target 75 | jni_sw_extend.h 76 | 77 | cs.ucla.edu.bwaspark.jni.SWExtendFPGAJNI 78 | 79 | 80 | 81 | javah 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | otherOutputDir 92 | 93 | ../../../target 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/commandline/BWAMEMCommand.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.commandline 20 | 21 | class BWAMEMCommand { 22 | var fastaInputPath: String = "" // the local BWA index files (bns, pac, and so on) 23 | var fastqHDFSInputPath: String = "" // the raw read file stored in HDFS 24 | var isPairEnd: Boolean = false // perform pair-end or single-end mapping 25 | var fastqInputFolderNum: Int = 0 // the number of folders generated in the HDFS for the raw reads 26 | var batchedFolderNum: Int = 4 // the number of raw read folders in a batch to be processed 27 | var isPSWBatched: Boolean = true // (optional) whether the pair-end Smith Waterman is performed in a batched way 28 | var subBatchSize: Int = 10 // (optional) the number of reads to be processed in a subbatch 29 | var isPSWJNI: Boolean = true // (optional) whether the native JNI library is called for better performance 30 | var jniLibPath: String = "./target/jniNative.so" // (optional) the JNI library path in the local machine 31 | var outputChoice: Int = 1 // (optional) the output format choice 32 | // 0: no output (pure computation) 33 | // 1: SAM file output in the local file system (default) 34 | // 2: ADAM format output in the distributed file system 35 | var outputPath: String = "output.sam" // (optional) the output path; users need to provide correct path in the local or distributed file system 36 | var localRef: Int = 0 // (optional) specify if each local node has the reference genome (you can use it to bypass the broadcasting overhead. 37 | // 0: w/o local reference genome (default) 38 | // 1: with local reference genome 39 | var headerLine: String = "@RG\tID:foo\tSM:bar"; // (should be added for common use case) Complete read group header line: Example: @RG\tID:foo\tSM:bar 40 | var isSWExtBatched: Boolean = false // (optional) whether the SWExtend is executed in a batched way 41 | var swExtBatchSize: Int = 1024 // (optional) the batch size used for used for SWExtend 42 | var isFPGAAccSWExtend: Boolean = false // (optional) whether the FPGA accelerator is used for accelerating SWExtend 43 | var fpgaSWExtThreshold: Int = 128 // (optional) the threshold of using FPGA accelerator for SWExtend 44 | var jniSWExtendLibPath: String = "./target/jniSWExtend.so" // (optional) the JNI library path used for SWExtend FPGA acceleration 45 | } 46 | 47 | -------------------------------------------------------------------------------- /src/main/alphadata/alphadata_host.tcl: -------------------------------------------------------------------------------- 1 | # ******************************************************************************* 2 | # Vendor: Xilinx 3 | # Associated Filename: example_alphadata.tcl 4 | # Purpose: Commands to construct the OpenCL C matrix multiply example 5 | # 6 | # ******************************************************************************* 7 | # Copyright (C) 2014 XILINX, Inc. 8 | # 9 | # This file contains confidential and proprietary information of Xilinx, Inc. and 10 | # is protected under U.S. and international copyright and other intellectual 11 | # property laws. 12 | # 13 | # DISCLAIMER 14 | # This disclaimer is not a license and does not grant any rights to the materials 15 | # distributed herewith. Except as otherwise provided in a valid license issued to 16 | # you by Xilinx, and to the maximum extent permitted by applicable law: 17 | # (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX 18 | # HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, 19 | # INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR 20 | # FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether 21 | # in contract or tort, including negligence, or under any other theory of 22 | # liability) for any loss or damage of any kind or nature related to, arising under 23 | # or in connection with these materials, including for any direct, or any indirect, 24 | # special, incidental, or consequential loss or damage (including loss of data, 25 | # profits, goodwill, or any type of loss or damage suffered as a result of any 26 | # action brought by a third party) even if such damage or loss was reasonably 27 | # foreseeable or Xilinx had been advised of the possibility of the same. 28 | # 29 | # CRITICAL APPLICATIONS 30 | # Xilinx products are not designed or intended to be fail-safe, or for use in any 31 | # application requiring fail-safe performance, such as life-support or safety 32 | # devices or systems, Class III medical devices, nuclear facilities, applications 33 | # related to the deployment of airbags, or any other applications that could lead 34 | # to death, personal injury, or severe property or environmental damage 35 | # (individually and collectively, "Critical Applications"). Customer assumes the 36 | # sole risk and liability of any use of Xilinx products in Critical Applications, 37 | # subject only to applicable laws and regulations governing limitations on product 38 | # liability. 39 | # 40 | # THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT 41 | # ALL TIMES. 42 | 43 | #******************************************************************************* 44 | # Define the solution for SDAccel 45 | create_solution -name alphadata_host -dir . -force 46 | add_device -vbnv xilinx:adm-pcie-7v3:1ddr:1.1 47 | 48 | # Host Compiler Flags 49 | set_property -name host_cflags -value "-g -Wall -D FPGA_DEVICE -D C_KERNEL" -objects [current_solution] 50 | 51 | # Host Source Files 52 | add_files "shm_host.c" 53 | 54 | ## Kernel Definition 55 | #create_kernel mmult -type c 56 | #add_files -kernel [get_kernels mmult] "mmult1.c" 57 | # 58 | ## Define Binary Containers 59 | #create_opencl_binary mmult1 60 | #set_property region "OCL_REGION_0" [get_opencl_binary mmult1] 61 | #create_compute_unit -opencl_binary [get_opencl_binary mmult1] -kernel [get_kernels mmult] -name k1 62 | # 63 | ## Compile the design for CPU based emulation 64 | #compile_emulation -flow cpu -opencl_binary [get_opencl_binary mmult1] 65 | # 66 | ## Run the compiled application in CPU based emulation mode 67 | #run_emulation -flow cpu -args "mmult1.xclbin" 68 | 69 | # Compile the application to run on the accelerator card 70 | build_system 71 | 72 | # Package the application binaries 73 | package_system 74 | 75 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/BWAIdxType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | import java.io.ObjectInputStream 22 | import java.io.ObjectOutputStream 23 | import java.io.{FileInputStream, IOException} 24 | import java.nio.channels.FileChannel 25 | import cs.ucla.edu.bwaspark.datatype.BinaryFileReadUtil._ 26 | import scala.Serializable 27 | import org.apache.hadoop.conf.Configuration; 28 | import org.apache.hadoop.fs.FileSystem; 29 | import org.apache.hadoop.fs.FSDataInputStream; 30 | import org.apache.hadoop.fs.FSDataOutputStream; 31 | import org.apache.hadoop.fs.Path; 32 | 33 | //BWAIdxType: maintaining all the information of BWA Index generated from FastA Reference 34 | class BWAIdxType extends Serializable { 35 | 36 | //1st: BWTType(".bwt", ".sa" files) 37 | var bwt: BWTType = _ 38 | 39 | //2nd: BNTSeqType(".ann", ".amb" files) 40 | var bns: BNTSeqType = _ 41 | 42 | //3rd: PACType(".pac" file) 43 | var pac: Array[Byte] = _ //uint8_t[] 44 | 45 | //loading files into fields 46 | //prefix: prefix of filenames 47 | //which: !!!to add!!! 48 | def load(prefix: String) { load(prefix, 0) } 49 | def load(prefix: String, which: Int) { 50 | //There is a function called "bwa_idx_infer_prefix" in original BWA, 51 | //but it seems to be useless 52 | 53 | //loading bwt 54 | //!!!In the beginning, set all as true 55 | //if (which & BWA_IDX_BWT) { 56 | if (true) { 57 | bwt = new BWTType 58 | bwt.load(prefix) 59 | } 60 | 61 | //loading bns 62 | //!!!In the beginning, set all as true 63 | //if (which & BWA_IDX_BNS) { 64 | if (true) { 65 | bns = new BNTSeqType 66 | bns.load(prefix) 67 | 68 | //loading pac 69 | //!!!In the beginning, set all as true 70 | //if (which & BWA_IDX_PAC) { 71 | if (true) { 72 | def pacLoader(filename: String, length: Long): Array[Byte] = { 73 | //to add: reading binary file 74 | val conf = new Configuration 75 | val fs = FileSystem.get(conf) 76 | val path = new Path(filename) 77 | //var reader: AnyRef = null 78 | if (fs.exists(path)) { 79 | val reader = fs.open(path) 80 | var pac = readByteArray(reader, (length/4+1).toInt, 0) 81 | pac 82 | } 83 | else { 84 | val reader = new FileInputStream(filename).getChannel 85 | var pac = readByteArray(reader, (length/4+1).toInt, 0) 86 | pac 87 | } 88 | } 89 | pac = pacLoader(prefix+".pac", bns.l_pac) 90 | } 91 | } 92 | } 93 | 94 | private def writeObject(out: ObjectOutputStream) { 95 | out.writeObject(bwt) 96 | out.writeObject(bns) 97 | out.writeObject(pac) 98 | } 99 | 100 | private def readObject(in: ObjectInputStream) { 101 | bwt = in.readObject.asInstanceOf[BWTType] 102 | bns = in.readObject.asInstanceOf[BNTSeqType] 103 | pac = in.readObject.asInstanceOf[Array[Byte]] 104 | } 105 | 106 | private def readObjectNoData() { 107 | 108 | } 109 | 110 | } 111 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cloud-Scale BWAMEM 2 | 3 | # Introduction 4 | Cloud-scale BWAMEM (CS-BWAMEM) is an ultrafast and highly scalable aligner built on top of cloud infrastructures, including Spark and Hadoop distributed file system (HDFS). It leverages the abundant computing resources in a public or private cloud to fully exploit the parallelism obtained from the enormous number of reads. With CSBWAMEM, the pair-end whole-genome reads (30x) can be aligned within 80 minutes in a 25-node cluster with 300 cores. 5 | 6 | # Build and Install 7 | 1. git clone git@github.com:ytchen0323/cloud-scale-bwamem.git 8 | 2. cd cloud-scale-bwamem 9 | 3. updated absolute path in two pom.xml: src/pom.xml and src/main/jni_fpga/pom.xml 10 | update: 11 | 12 | /curr/pengwei/github/cloud-scale-bwamem/target/cloud-scale-bwamem-0.2.2.jar 13 | to your path: 14 | 15 | /youpath/cloud-scale-bwamem/target/cloud-scale-bwamem-0.2.2.jar 16 | 4. ./compile.pl 17 | 18 | # Upload FASTQ file(s) to HDFS 19 | - commands: upload-fastq [-bn INT] isPairEnd filePartitionNum inputFASTQFilePath1 [inputFASTQFilePath2] outFileHDFSPath 20 | - Required arguments (in the following order): 21 | 22 | (1) isPairEnd: 23 | 24 | 1: pair-end 25 | 26 | 0: single-end (not fully verified yet) 27 | 28 | (2) inputFASTQFilePath1: the first input path of the FASTQ file in the local file system (for both single-end and pair-end) 29 | 30 | (3) inputFASTQFilePath2: (optional) the second input path of the FASTQ file in the local file system (for pair-end) 31 | 32 | (4) outFileHDFSPath: the root path of the output FASTQ files in HDFS 33 | - Optional arguments: 34 | (1) -bn (optional): the number of lines to be read in one batch, which depends on the DRAM you have on the driver node. 35 | 36 | # Use CS-BWAMEM aligner 37 | - commands: cs-bwamem [-bfn INT] [-bPSW (0/1)] [-sbatch INT] [-bPSWJNI (0/1)] [-jniPath STRING] [-oType (0/1/2)] [-oPath STRING] isPairEnd fastaInputPath fastqHDFSInputPath fastqInputFolderNum 38 | - Required arguments (in the following order): 39 | 40 | (1) isPairEnd: 41 | 42 | 1: pair-end 43 | 44 | 0: single-end (not fully verified yet) 45 | 46 | (2) fastaInputPath: the path of BWA index files (bns, pac, and so on). This path is locate at local machine instead of HDFS. 47 | 48 | (3) fastqHDFSInputPath: the path of the raw read files stored in HDFS 49 | 50 | (4) fastqInputFolderNum: the number of folders generated in the HDFS for the raw reads (output from Usage1). (NOTE: this parameter can be automatically fetched in the next version) 51 | - Optional arguments: 52 | 53 | (1) -bfn (optional): the number of folders of raw reads to be processed in a batch 54 | 55 | (2) -bPSW (optional): whether the pair-end Smith Waterman is performed in a batched way 56 | 57 | (3) -sbatch (optional): the number of reads to be processed in a subbatch using JNI library 58 | 59 | (4) -bPSWJNI (optional): whether the native JNI library is called for better performance 60 | 61 | (5) -jniPath (optional): the JNI library path in the local machine 62 | 63 | (6) -oChoice (optional): the output format choice 64 | 65 | 0: no output (pure computation) 66 | 67 | 1: SAM file output in the local file system (default) 68 | 69 | 2: ADAM format output in the distributed file system 70 | 71 | (7) -oPath (optional): the output path; users need to provide correct path in the local or distributed file system 72 | 73 | # Merge the output ADAM folders 74 | - This command merges the output ADAM folders after alignment and then save the output as a new ADAM file in HDFS 75 | - commands: merge adamHDFSRootInputPath adamHDFSOutputPath 76 | 77 | # Sort the output ADAM folders 78 | - This commands sorts the output ADAM folders after alignment and then save the output as a new ADAM file in HDFS 79 | - commands: sort adamHDFSRootInputPath adamHDFSOutputPath 80 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/worker2/MemMarkPrimarySe.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.worker2 20 | 21 | import scala.List 22 | import scala.math.abs 23 | 24 | import cs.ucla.edu.bwaspark.datatype._ 25 | 26 | //MemMarkPrimarySe 27 | object MemMarkPrimarySe { 28 | 29 | 30 | /** 31 | * The main function of memMarkPrimarySe class 32 | * 33 | * @param opt the MemOptType object 34 | * @param a the MemAlnRegType object 35 | * @param id the Long object 36 | */ 37 | def memMarkPrimarySe(opt: MemOptType, a: Array[MemAlnRegType], id: Long) : Array[MemAlnRegType] = { 38 | var n: Int = 0 39 | if(a != null) n = a.length 40 | var i: Int = 0 41 | var j: Int = 0 42 | var tmp: Int = 0 43 | var k: Int = 0 44 | var z: Array[Int] = new Array[Int](n) 45 | var zIdx = 0 46 | //aVar, the returned value 47 | var aVar: Array[MemAlnRegType] = null 48 | if(n != 0) { 49 | i = 0 50 | while(i < n) { 51 | a(i).sub = 0 52 | a(i).secondary = -1 53 | a(i).hash = hash64((id + i.toLong)) 54 | i += 1 55 | } 56 | //ks_introsort(mem_ars_hash, n, a) 57 | //#define alnreg_hlt(a, b) ((a).score > (b).score || ((a).score == (b).score && (a).hash < (b).hash)) 58 | //aVar = a.sortWith( (x, y) => ((x.score > y.score) || ( x.score == y.score && (x.hash >>> 1) < (y.hash >>> 1) ) ) ) 59 | aVar = a.sortBy(r => (- r.score, r.hash)) 60 | tmp = opt.a + opt.b 61 | if((opt.oDel + opt.eDel) > tmp) { 62 | tmp = opt.oDel + opt.eDel 63 | } 64 | if((opt.oIns + opt.eIns) > tmp) { 65 | tmp = opt.oIns + opt.eIns 66 | } 67 | //kv_push() 68 | z(0) = 0 69 | zIdx += 1 70 | i = 1 71 | while(i < n) { 72 | var breakIdx: Int = zIdx 73 | var isBreak = false 74 | k = 0 75 | 76 | while(k < zIdx && !isBreak) { 77 | j = z(k) 78 | var bMax: Int = if(aVar(j).qBeg > aVar(i).qBeg) aVar(j).qBeg else aVar(i).qBeg 79 | var eMin: Int = if(aVar(j).qEnd < aVar(i).qEnd) aVar(j).qEnd else aVar(i).qEnd 80 | // have overlap 81 | if( eMin > bMax ) { 82 | var minL: Int = if ((aVar(i).qEnd - aVar(i).qBeg)<(aVar(j).qEnd - aVar(j).qBeg)) (aVar(i).qEnd - aVar(i).qBeg) else (aVar(j).qEnd - aVar(j).qBeg) 83 | //have significant overlap 84 | if((eMin - bMax)>= minL * opt.maskLevel) { 85 | if(aVar(j).sub == 0) { 86 | aVar(j).sub = aVar(i).score 87 | } 88 | if((aVar(j).score - aVar(i).score) <= tmp) aVar(j).subNum = aVar(j).subNum + 1 89 | breakIdx = k 90 | isBreak = true 91 | } 92 | } 93 | 94 | k += 1 95 | } 96 | 97 | if(breakIdx == zIdx) { 98 | z(zIdx) = i 99 | zIdx += 1 100 | } 101 | else { 102 | aVar(i).secondary = z(k) 103 | } 104 | 105 | i += 1 106 | } 107 | } 108 | aVar 109 | } 110 | 111 | def hash64( key: Long ) : Long = { 112 | var keyVar: Long = key 113 | keyVar += ~(keyVar << 32) 114 | keyVar ^= (keyVar >>> 22) 115 | keyVar += ~(keyVar << 13) 116 | keyVar ^= (keyVar >>> 8) 117 | keyVar += (keyVar << 3) 118 | keyVar ^= (keyVar >>> 15) 119 | keyVar += ~(keyVar <<27) 120 | keyVar ^= (keyVar >>> 31) 121 | keyVar 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/main/native/utils.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008 Genome Research Ltd (GRL). 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | /* Contact: Heng Li */ 27 | 28 | #ifndef LH3_UTILS_H 29 | #define LH3_UTILS_H 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | #ifdef __GNUC__ 36 | // Tell GCC to validate printf format string and args 37 | #define ATTRIBUTE(list) __attribute__ (list) 38 | #else 39 | #define ATTRIBUTE(list) 40 | #endif 41 | 42 | #define err_fatal_simple(msg) _err_fatal_simple(__func__, msg) 43 | #define err_fatal_simple_core(msg) _err_fatal_simple_core(__func__, msg) 44 | 45 | #define xopen(fn, mode) err_xopen_core(__func__, fn, mode) 46 | #define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp) 47 | #define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode) 48 | 49 | #define xassert(cond, msg) if ((cond) == 0) _err_fatal_simple_core(__func__, msg) 50 | 51 | typedef struct { 52 | uint64_t x, y; 53 | } pair64_t; 54 | 55 | typedef struct { size_t n, m; uint64_t *a; } uint64_v; 56 | typedef struct { size_t n, m; pair64_t *a; } pair64_v; 57 | 58 | #ifdef __cplusplus 59 | extern "C" { 60 | #endif 61 | 62 | void err_fatal(const char *header, const char *fmt, ...) ATTRIBUTE((noreturn)); 63 | void err_fatal_core(const char *header, const char *fmt, ...) ATTRIBUTE((noreturn)); 64 | void _err_fatal_simple(const char *func, const char *msg) ATTRIBUTE((noreturn)); 65 | void _err_fatal_simple_core(const char *func, const char *msg) ATTRIBUTE((noreturn)); 66 | FILE *err_xopen_core(const char *func, const char *fn, const char *mode); 67 | FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp); 68 | gzFile err_xzopen_core(const char *func, const char *fn, const char *mode); 69 | size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); 70 | size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream); 71 | 72 | int err_gzread(gzFile file, void *ptr, unsigned int len); 73 | int err_fseek(FILE *stream, long offset, int whence); 74 | #define err_rewind(FP) err_fseek((FP), 0, SEEK_SET) 75 | long err_ftell(FILE *stream); 76 | int err_fprintf(FILE *stream, const char *format, ...) 77 | ATTRIBUTE((format(printf, 2, 3))); 78 | int err_printf(const char *format, ...) 79 | ATTRIBUTE((format(printf, 1, 2))); 80 | int err_fputc(int c, FILE *stream); 81 | #define err_putchar(C) err_fputc((C), stdout) 82 | int err_fputs(const char *s, FILE *stream); 83 | #define err_puts(S) err_fputs((S), stdout) 84 | int err_fflush(FILE *stream); 85 | int err_fclose(FILE *stream); 86 | int err_gzclose(gzFile file); 87 | 88 | double cputime(); 89 | double realtime(); 90 | 91 | void ks_introsort_64 (size_t n, uint64_t *a); 92 | void ks_introsort_128(size_t n, pair64_t *a); 93 | 94 | #ifdef __cplusplus 95 | } 96 | #endif 97 | 98 | static inline uint64_t hash_64(uint64_t key) 99 | { 100 | key += ~(key << 32); 101 | key ^= (key >> 22); 102 | key += ~(key << 13); 103 | key ^= (key >> 8); 104 | key += (key << 3); 105 | key ^= (key >> 15); 106 | key += ~(key << 27); 107 | key ^= (key >> 31); 108 | return key; 109 | } 110 | 111 | #endif 112 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/debug/TestBWAIdxType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | import cs.ucla.edu.bwaspark.datatype._ 20 | import cs.ucla.edu.bwaspark.worker1.BWAMemWorker1._ 21 | import scala.collection.mutable.MutableList 22 | import java.util.TreeSet 23 | import java.util.Comparator 24 | import cs.ucla.edu.bwaspark.worker1.MemChain._ 25 | import cs.ucla.edu.bwaspark.worker1.MemChainFilter._ 26 | import cs.ucla.edu.bwaspark.worker1.MemChainToAlign._ 27 | import cs.ucla.edu.bwaspark.worker1.MemSortAndDedup._ 28 | import cs.ucla.edu.bwaspark.debug.DebugFlag._ 29 | 30 | import java.io._ 31 | 32 | object TestWorker1 { 33 | 34 | def main(args: Array[String]) { 35 | 36 | //loading index files 37 | val bwaIdx = new BWAIdxType 38 | val prefix = "/home/pengwei/genomics/ReferenceMetadata/human_g1k_v37.fasta" 39 | bwaIdx.load(prefix, 0) 40 | 41 | //loading BWA MEM options 42 | val bwaMemOpt = new MemOptType 43 | bwaMemOpt.load() 44 | 45 | //loading reads 46 | // val rawData = "TTACTCGTGATGTGTGTCCTCAACTAAAGGAGTAGAACTTTTCTTTTCATAGAGAAGTTTTGAAACGCTCTTTTTGTGGAATCTGCAAGTGGATATTTGGC" //read 0 47 | // val rawData = "ATAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCGTAACCCTAACCGTAACCCTCACCCTAACCATAAC" //read 1 48 | val rawData = "TTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGTTAAGGGTAAGGGTTAGGGTTAGGGTTAGGTTTGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGG" 49 | //val goldenRef = "33013123203232323113100130002202302001333313333103020200233332000121313333323220031321002322030333221" 50 | val seq: Array[Int] = rawData.map(ele => ele.toInt).toArray 51 | 52 | debugLevel = 1 53 | 54 | //test bwaMemWork1 55 | // val aligns = bwaMemWorker1(bwaMemOpt, bwaIdx.bwt, bwaIdx.bns, bwaIdx.pac, null, seq.length, seq) 56 | 57 | //test the first step: memChain 58 | def locusEncode(locus: Int): Byte = { 59 | //transforming from A/C/G/T to 0,1,2,3 60 | locus match { 61 | case 'A' => 0 62 | case 'a' => 0 63 | case 'C' => 1 64 | case 'c' => 1 65 | case 'G' => 2 66 | case 'g' => 2 67 | case 'T' => 3 68 | case 't' => 3 69 | case '-' => 5 70 | case _ => 4 71 | } 72 | } 73 | val read: Array[Byte] = seq.map(ele => locusEncode(ele)) 74 | 75 | read.map (ele => print(ele)) 76 | println() 77 | 78 | 79 | val chains = generateChains(bwaMemOpt, bwaIdx.bwt, bwaIdx.bns.l_pac, read.length, read) 80 | 81 | def readChainsFromFile(filename: String): Array[MemChainType] = { 82 | 83 | val chainsReader = new BufferedReader(new FileReader(filename)) 84 | chainsReader.readLine() //1st line, useless 85 | val line = chainsReader.readLine() //2nd line, #chains 86 | val chains = new Array[MemChainType](line.toInt) 87 | for (i <- 0 until line.toInt) { 88 | var aChain = chainsReader.readLine.split(" ") 89 | var seeds = new MutableList[MemSeedType] 90 | for (j <- 0 until aChain(1).toInt) { 91 | val aSeed = chainsReader.readLine.split(" ") 92 | seeds += new MemSeedType(aSeed(0).toLong, aSeed(1).toInt, aSeed(2).toInt) 93 | } 94 | chains(i) = new MemChainType(aChain(0).toLong, seeds) 95 | } 96 | chainsReader.close(); 97 | chains 98 | } 99 | 100 | // val chainsFromFile = readChainsFromFile("/home/pengwei/genomics/OutputFiles/chains.log") 101 | 102 | // val chainsFiltered = memChainFilter(bwaMemOpt, chainsFromFile) 103 | val chainsFiltered = memChainFilter(bwaMemOpt, chains) 104 | 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /run_test.sh: -------------------------------------------------------------------------------- 1 | #SPARK_DRIVER_MEMORY=60g /cluster/spark/spark-1.5.1-bin-hadoop2.4/bin/spark-submit --executor-memory 48g --class cs.ucla.edu.bwaspark.BWAMEMSpark --total-executor-cores 150 --master spark://10.0.1.2:7077 --driver-java-options "-XX:+PrintFlagsFinal" --conf spark.driver.host=10.0.1.2 --conf spark.driver.cores=20 --conf spark.driver.maxResultSize=40g --conf spark.storage.memoryFraction=0.7 --conf spark.eventLog.enabled=true --conf spark.eventLog.dir="hdfs://cdsc0:9000/user/ytchen/eventLogs" --conf spark.akka.threads=20 --conf spark.akka.frameSize=1024 /home/ytchen/cs-bwamem/master/cloud-scale-bwamem/target/cloud-scale-bwamem-0.2.2-assembly.jar cs-bwamem -bfn 4 -bPSW 1 -sbatch 10 -bPSWJNI 1 -jniPath /curr/ytchen0323/shared_lib/jniNative.so -oChoice 2 -oPath hdfs://cdsc0:9000/user/ytchen0323/data/SC_data/HCC1954_100Mreads_tinyPart.adam -localRef 1 -R "@RG ID:HCC1954 LB:HCC1954 SM:HCC1954" -isSWExtBatched 1 -bSWExtSize 32768 -FPGAAccSWExt 0 -FPGASWExtThreshold 64 -jniSWExtendLibPath "/curr/genomics_spark/shared_lib/jniSWExtend.so" 1 /space/scratch/ReferenceMetadata/human_g1k_v37.fasta hdfs://cdsc0:9000/user/ytchen0323/data/SC_data/HCC1954_100Mreads_tinyPart.fq 2 | 3 | #SPARK_DRIVER_MEMORY=60g /cluster/spark/spark-1.5.1-bin-hadoop2.4/bin/spark-submit --executor-memory 48g --class cs.ucla.edu.bwaspark.BWAMEMSpark --total-executor-cores 48 --master spark://10.0.1.2:7077 --driver-java-options "-XX:+PrintFlagsFinal" --conf spark.driver.host=10.0.1.2 --conf spark.driver.cores=20 --conf spark.driver.maxResultSize=40g --conf spark.storage.memoryFraction=0.7 --conf spark.eventLog.enabled=true --conf spark.eventLog.dir="hdfs://cdsc0:9000/user/ytchen/eventLogs" --conf spark.akka.threads=20 --conf spark.akka.frameSize=1024 /home/ytchen/cs-bwamem/master/cloud-scale-bwamem/target/cloud-scale-bwamem-0.2.2-assembly.jar cs-bwamem -bfn 1 -bPSW 1 -sbatch 10 -bPSWJNI 1 -jniPath /curr/ytchen0323/shared_lib/jniNative.so -oChoice 2 -oPath hdfs://cdsc0:9000/user/ytchen0323/data/SC_data/HCC1954_100Mreads_tinyPart_1folder.adam -localRef 1 -R "@RG ID:HCC1954 LB:HCC1954 SM:HCC1954" -isSWExtBatched 1 -bSWExtSize 32768 -FPGAAccSWExt 1 -FPGASWExtThreshold 64 -jniSWExtendLibPath "/curr/genomics_spark/shared_lib/jniSWExtend.so" 1 /space/scratch/ReferenceMetadata/human_g1k_v37.fasta hdfs://cdsc0:9000/user/ytchen0323/data/SC_data/HCC1954_100Mreads_tinyPart_1folder.fq 4 | 5 | #SPARK_DRIVER_MEMORY=60g /cluster/spark/spark-1.5.1-bin-hadoop2.4/bin/spark-submit --executor-memory 48g --class cs.ucla.edu.bwaspark.BWAMEMSpark --total-executor-cores 48 --master spark://10.0.1.2:7077 --driver-java-options "-XX:+PrintFlagsFinal" --conf spark.driver.host=10.0.1.2 --conf spark.driver.cores=20 --conf spark.driver.maxResultSize=40g --conf spark.storage.memoryFraction=0.7 --conf spark.eventLog.enabled=true --conf spark.eventLog.dir="hdfs://cdsc0:9000/user/ytchen/eventLogs" --conf spark.akka.threads=20 --conf spark.akka.frameSize=1024 /home/ytchen/cs-bwamem/master/cloud-scale-bwamem/target/cloud-scale-bwamem-0.2.2-assembly.jar cs-bwamem -bfn 1 -bPSW 1 -sbatch 10 -bPSWJNI 1 -jniPath /curr/ytchen0323/shared_lib/jniNative.so -oChoice 2 -oPath hdfs://cdsc0:9000/user/ytchen0323/data/SC_data/HCC1954_100Mreads_tinyPart_1folder.adam -localRef 1 -R "@RG ID:HCC1954 LB:HCC1954 SM:HCC1954" -isSWExtBatched 1 -bSWExtSize 32768 -FPGAAccSWExt 0 -FPGASWExtThreshold 64 -jniSWExtendLibPath "/curr/genomics_spark/shared_lib/jniSWExtend.so" 1 /space/scratch/ReferenceMetadata/human_g1k_v37.fasta hdfs://cdsc0:9000/user/ytchen0323/data/SC_data/HCC1954_100Mreads_tinyPart_1folder.fq 6 | 7 | SPARK_DRIVER_MEMORY=60g /cluster/spark/spark-1.5.1-bin-hadoop2.4/bin/spark-submit --executor-memory 48g --class cs.ucla.edu.bwaspark.BWAMEMSpark --total-executor-cores 20 --master local[20] --driver-java-options "-XX:+PrintFlagsFinal" --conf spark.driver.host=10.0.1.2 --conf spark.driver.cores=20 --conf spark.driver.maxResultSize=40g --conf spark.storage.memoryFraction=0.7 --conf spark.eventLog.enabled=true --conf spark.eventLog.dir="hdfs://cdsc0:9000/user/ytchen/eventLogs" --conf spark.akka.threads=20 --conf spark.akka.frameSize=1024 /home/ytchen/cs-bwamem/master/cloud-scale-bwamem/target/cloud-scale-bwamem-0.2.2-assembly.jar cs-bwamem -bfn 1 -bPSW 1 -sbatch 10 -bPSWJNI 1 -jniPath /curr/ytchen0323/shared_lib/jniNative.so -oChoice 2 -oPath hdfs://cdsc0:9000/user/ytchen0323/data/SC_data/HCC1954_100Mreads_tinyPart_1folder.adam -localRef 1 -R "@RG ID:HCC1954 LB:HCC1954 SM:HCC1954" -isSWExtBatched 1 -bSWExtSize 32768 -FPGAAccSWExt 0 -FPGASWExtThreshold 64 -jniSWExtendLibPath "/curr/genomics_spark/shared_lib/jniSWExtend.so" 1 /space/scratch/ReferenceMetadata/human_g1k_v37.fasta hdfs://cdsc0:9000/user/ytchen0323/data/SC_data/HCC1954_100Mreads_tinyPart_1folder.fq 8 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/MemOptType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | import java.io.ObjectInputStream 22 | import java.io.ObjectOutputStream 23 | 24 | import scala.math.log 25 | import scala.Serializable 26 | 27 | class MemOptType extends Serializable { 28 | var a : Int = 1; 29 | var b : Int = 4; 30 | var oDel : Int = 6; 31 | var eDel : Int = 1; 32 | var oIns : Int = 6; 33 | var eIns : Int = 1; 34 | var penUnpaired : Int = 17; 35 | var penClip5 : Int = 5; 36 | var penClip3 : Int = 5; 37 | var w : Int = 100; 38 | var zdrop : Int = 100; 39 | 40 | var T : Int = 30; 41 | var flag: Int = 0; 42 | var minSeedLen: Int = 19; 43 | var splitFactor: Float = 1.5f; 44 | var splitWidth: Int = 10; 45 | var maxOcc: Int = 10000; 46 | var maxChainGap: Int = 10000; 47 | 48 | var chunkSize: Int = 10000000; 49 | var maskLevel: Float = 0.50f; 50 | var chainDropRatio: Float = 0.50f; 51 | var maskLevelRedun: Float = 0.95f; 52 | var mapQCoefLen: Float = 50f; 53 | var mapQCoefFac: Int = log(mapQCoefLen).asInstanceOf[Int] 54 | var maxIns: Int = 10000; 55 | var maxMatesw: Int = 100; 56 | var mat: Array[Byte] = new Array[Byte](25);//all initialized to 0 57 | 58 | private def bwaFillScmat(){ 59 | var k = 0 60 | for(i <- 0 to 3){ 61 | for(j <- 0 to 3){ 62 | var temp = ( if (i == j) a else -b) 63 | 64 | mat(k) = temp.asInstanceOf[Byte] 65 | k = k + 1 66 | } 67 | mat(k) = -1 68 | k = k + 1 69 | } 70 | for(j <- 0 to 4){ 71 | mat(k) = -1 72 | k = k + 1 73 | } 74 | 75 | } 76 | 77 | def load() { 78 | bwaFillScmat() 79 | } 80 | 81 | private def writeObject(out: ObjectOutputStream) { 82 | out.writeInt(a) 83 | out.writeInt(b) 84 | out.writeInt(oDel) 85 | out.writeInt(eDel) 86 | out.writeInt(oIns) 87 | out.writeInt(eIns) 88 | out.writeInt(penUnpaired) 89 | out.writeInt(penClip5) 90 | out.writeInt(penClip3) 91 | out.writeInt(w) 92 | out.writeInt(zdrop) 93 | out.writeInt(T) 94 | out.writeInt(flag) 95 | out.writeInt(minSeedLen) 96 | out.writeFloat(splitFactor) 97 | out.writeInt(splitWidth) 98 | out.writeInt(maxOcc) 99 | out.writeInt(maxChainGap) 100 | out.writeInt(chunkSize) 101 | out.writeFloat(maskLevel) 102 | out.writeFloat(chainDropRatio) 103 | out.writeFloat(maskLevelRedun) 104 | out.writeFloat(mapQCoefLen) 105 | out.writeInt(mapQCoefFac) 106 | out.writeInt(maxIns) 107 | out.writeInt(maxMatesw) 108 | out.writeObject(mat) 109 | } 110 | 111 | private def readObject(in: ObjectInputStream) { 112 | a = in.readInt 113 | b = in.readInt 114 | oDel = in.readInt 115 | eDel = in.readInt 116 | oIns = in.readInt 117 | eIns = in.readInt 118 | penUnpaired = in.readInt 119 | penClip5 = in.readInt 120 | penClip3 = in.readInt 121 | w = in.readInt 122 | zdrop = in.readInt 123 | T = in.readInt 124 | flag = in.readInt 125 | minSeedLen = in.readInt 126 | splitFactor = in.readFloat 127 | splitWidth = in.readInt 128 | maxOcc = in.readInt 129 | maxChainGap = in.readInt 130 | chunkSize = in.readInt 131 | maskLevel = in.readFloat 132 | chainDropRatio = in.readFloat 133 | maskLevelRedun = in.readFloat 134 | mapQCoefLen = in.readFloat 135 | mapQCoefFac = in.readInt 136 | maxIns = in.readInt 137 | maxMatesw = in.readInt 138 | mat = in.readObject.asInstanceOf[Array[Byte]] 139 | } 140 | 141 | private def readObjectNoData() { 142 | 143 | } 144 | 145 | } 146 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/util/BNTSeqUtil.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.util 20 | 21 | import scala.util.control.Breaks._ 22 | 23 | import cs.ucla.edu.bwaspark.datatype.BNTSeqType 24 | 25 | object BNTSeqUtil { 26 | /** 27 | * Retrieve the reference sequence 28 | * This private function is used by memChainToAln() 29 | * scala: l_pac, pac, rmax[0], rmax[1], return &rlen, return rseq 30 | * c: l_pac, pac, beg, end, len, return rseq 31 | * 32 | * @param pacLen the length of the PAC array 33 | * @param pac the PAC array 34 | * @param beg the reference begin 35 | * @param end the reference end 36 | */ 37 | def bnsGetSeq(pacLen: Long, pac: Array[Byte], beg: Long, end: Long) : (Array[Byte], Long) = { 38 | var endVar: Long = 0//for swapping 39 | var begVar: Long = 0//for swapping 40 | if(end < beg) {//if end is smaller, swap 41 | endVar = beg 42 | begVar = end 43 | } 44 | else {//else keep the value 45 | endVar = end 46 | begVar = beg 47 | } 48 | if(endVar > (pacLen<<1)) endVar = pacLen<<1 49 | if(begVar < 0) begVar = 0 50 | var rLen: Long = endVar - begVar// for return rlen 51 | var seq: Array[Byte] = new Array[Byte](rLen.toInt)//for return seq 52 | 53 | if(begVar >= pacLen || endVar <= pacLen) { 54 | var k: Long = 0 55 | var l: Int = 0 56 | if( begVar >= pacLen ) {//reverse strand 57 | var begF: Long = (pacLen<<1) - 1 - endVar 58 | var endF: Long = (pacLen<<1) - 1 - begVar 59 | k = endF 60 | while(k >= (begF + 1)) { 61 | seq(l) = (3 - ( pac((k>>>2).toInt) >>> (((~k)&3) <<1) ) & 3).toByte // Inline getPac(pac, k) 62 | l += 1 63 | k -= 1 64 | } 65 | } 66 | else { 67 | k = begVar 68 | while(k < endVar) { 69 | seq(l) = (( pac((k>>>2).toInt) >>> (((~k)&3) <<1) ) & 3).toByte // Inline getPac(pac, k) 70 | k += 1 71 | l += 1 72 | } 73 | } 74 | } 75 | else // if bridging the forward-reverse boundary, return nothing 76 | rLen = 0 77 | 78 | (seq, rLen)//return a Tuple 79 | } 80 | 81 | /** 82 | * Realize: #define _get_pac(pac, l) ((pac)[(l)>>2]>>((~(l)&3)<<1)&3) 83 | * Used by bnsGetSeq() 84 | * 85 | * @param pac PAC array 86 | * @param l 87 | */ 88 | private def getPac(pac: Array[Byte], l: Long) : Long = { 89 | var pacValue: Long = ( pac((l>>>2).toInt) >>> (((~l)&3) <<1) ) & 3 90 | pacValue 91 | } 92 | 93 | 94 | /** 95 | * bnsDepos: 96 | * 97 | * @param bns the bns object 98 | * @param pos the position in the reference 99 | */ 100 | def bnsDepos(bns: BNTSeqType, pos: Long): (Long, Int) = { 101 | var isRev = 0 102 | if(pos >= bns.l_pac) isRev = 1 103 | else isRev = 0 104 | 105 | if(isRev == 1) ((bns.l_pac << 1) - 1 - pos, 1) 106 | else (pos, 0) 107 | } 108 | 109 | 110 | /** 111 | * bnsPosToRid 112 | * 113 | * @param bns the bns object 114 | * @param posF 115 | */ 116 | def bnsPosToRid(bns: BNTSeqType, posF: Long): Int = { 117 | if(posF >= bns.l_pac) -1 118 | else { 119 | var left = 0 120 | var mid = 0 121 | var right = bns.n_seqs 122 | 123 | // binary search 124 | var isBreak = false 125 | while(left < right && !isBreak) { 126 | mid = (left + right) >> 1 127 | 128 | if(posF >= bns.anns(mid).offset) { 129 | if(mid == bns.n_seqs - 1) isBreak = true 130 | else if(posF < bns.anns(mid + 1).offset) isBreak = true 131 | if(!isBreak) left = mid + 1 132 | } 133 | else 134 | right = mid 135 | } 136 | 137 | mid 138 | } 139 | } 140 | 141 | } 142 | 143 | -------------------------------------------------------------------------------- /src/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | 5 | cs.ucla.edu 6 | jniNative 7 | 0.2.2 8 | JNI Pair-End Smith-Waterman AVX Library 9 | 10 | so 11 | 12 | 13 | 14 | cs.ucla.edu 15 | cloud-scale-bwamem 16 | 0.2.2 17 | jar 18 | system 19 | /curr/pengwei/github/cloud-scale-bwamem/target/cloud-scale-bwamem-0.2.2.jar 20 | 21 | 22 | 23 | 24 | 25 | 26 | maven-compiler-plugin 27 | 28 | 1.6 29 | 1.6 30 | 31 | 32 | 33 | 34 | org.codehaus.mojo 35 | native-maven-plugin 36 | true 37 | 38 | 39 | linux 40 | 41 | generic-classic 42 | gcc 43 | 44 | gcc 45 | 46 | 47 | ./main/native/ 48 | 49 | jni_hello_world.c 50 | jni_mate_sw.c 51 | bwt.c 52 | bwa.c 53 | bntseq.c 54 | bwamem.c 55 | bwamem_pair.c 56 | ksw.c 57 | kstring.c 58 | utils.c 59 | 60 | 61 | 62 | 63 | 64 | -s -fPIC -O2 65 | 66 | 67 | 68 | -shared -lm 69 | 70 | 71 | 72 | 73 | 74 | 75 | javah 76 | generate-sources 77 | 78 | linux 79 | default 80 | 81 | 82 | ../target/custom-javah 83 | ../target 84 | jni_mate_sw.h 85 | 86 | cs.ucla.edu.bwaspark.jni.HelloWorld 87 | cs.ucla.edu.bwaspark.jni.MateSWType 88 | cs.ucla.edu.bwaspark.jni.SeqSWType 89 | cs.ucla.edu.bwaspark.jni.MateSWJNI 90 | cs.ucla.edu.bwaspark.datatype.MemAlnRegType 91 | cs.ucla.edu.bwaspark.datatype.MemOptType 92 | cs.ucla.edu.bwaspark.datatype.MemPeStat 93 | cs.ucla.edu.bwaspark.datatype.FASTQSingleNode 94 | 95 | 96 | 97 | javah 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | otherOutputDir 108 | 109 | ../target 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /src/main/native/bwt.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008 Genome Research Ltd (GRL). 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | /* Contact: Heng Li */ 27 | 28 | #ifndef BWA_BWT_H 29 | #define BWA_BWT_H 30 | 31 | #include 32 | #include 33 | 34 | // requirement: (OCC_INTERVAL%16 == 0); please DO NOT change this line because some part of the code assume OCC_INTERVAL=0x80 35 | #define OCC_INTV_SHIFT 7 36 | #define OCC_INTERVAL (1LL<bwt[(k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) + sizeof(bwtint_t)/4*4 + (k)%OCC_INTERVAL/16]) 68 | #define bwt_occ_intv(b, k) ((b)->bwt + (k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) 69 | */ 70 | 71 | // The following two lines are ONLY correct when OCC_INTERVAL==0x80 72 | #define bwt_bwt(b, k) ((b)->bwt[((k)>>7<<4) + sizeof(bwtint_t) + (((k)&0x7f)>>4)]) 73 | #define bwt_occ_intv(b, k) ((b)->bwt + ((k)>>7<<4)) 74 | 75 | /* retrieve a character from the $-removed BWT string. Note that 76 | * bwt_t::bwt is not exactly the BWT string and therefore this macro is 77 | * called bwt_B0 instead of bwt_B */ 78 | #define bwt_B0(b, k) (bwt_bwt(b, k)>>((~(k)&0xf)<<1)&3) 79 | 80 | #define bwt_set_intv(bwt, c, ik) ((ik).x[0] = (bwt)->L2[(int)(c)]+1, (ik).x[2] = (bwt)->L2[(int)(c)+1]-(bwt)->L2[(int)(c)], (ik).x[1] = (bwt)->L2[3-(c)]+1, (ik).info = 0) 81 | 82 | #ifdef __cplusplus 83 | extern "C" { 84 | #endif 85 | 86 | void bwt_dump_bwt(const char *fn, const bwt_t *bwt); 87 | void bwt_dump_sa(const char *fn, const bwt_t *bwt); 88 | 89 | bwt_t *bwt_restore_bwt(const char *fn); 90 | void bwt_restore_sa(const char *fn, bwt_t *bwt); 91 | 92 | void bwt_destroy(bwt_t *bwt); 93 | 94 | void bwt_bwtgen(const char *fn_pac, const char *fn_bwt); // from BWT-SW 95 | void bwt_cal_sa(bwt_t *bwt, int intv); 96 | 97 | void bwt_bwtupdate_core(bwt_t *bwt); 98 | 99 | bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c); 100 | void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4]); 101 | bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k); 102 | 103 | // more efficient version of bwt_occ/bwt_occ4 for retrieving two close Occ values 104 | void bwt_gen_cnt_table(bwt_t *bwt); 105 | void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint_t *ok, bwtint_t *ol); 106 | void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtint_t cntl[4]); 107 | 108 | int bwt_match_exact(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *sa_begin, bwtint_t *sa_end); 109 | int bwt_match_exact_alt(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *k0, bwtint_t *l0); 110 | 111 | /** 112 | * Extend bi-SA-interval _ik_ 113 | */ 114 | void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_back); 115 | 116 | /** 117 | * Given a query _q_, collect potential SMEMs covering position _x_ and store them in _mem_. 118 | * Return the end of the longest exact match starting from _x_. 119 | */ 120 | int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]); 121 | 122 | // SMEM iterator interface 123 | 124 | #ifdef __cplusplus 125 | } 126 | #endif 127 | 128 | #endif 129 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/worker1/SAPos2RefPos.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.worker1 20 | 21 | import cs.ucla.edu.bwaspark.datatype._ 22 | 23 | //standalone object for transforming from position in suffix array 24 | //to position in reference genome 25 | object SAPos2RefPos { 26 | 27 | def occAux(y: Long, c: Int): Int = { 28 | 29 | //println("The previous y is: " + y) 30 | 31 | var res1 = 0l 32 | var res2 = 0l 33 | 34 | var tmp1 = 0l 35 | var tmp2 = 0l 36 | var tmp3 = 0l 37 | 38 | if ( (c & 2) != 0 ) tmp1 = y else tmp1 = ~y 39 | 40 | tmp1 = tmp1 >>> 1 41 | 42 | if ( (c & 1) != 0 ) tmp2 = y else tmp2 = ~y 43 | 44 | tmp3 = 0x5555555555555555l 45 | 46 | res1 = tmp1 & tmp2 & tmp3 47 | 48 | //println("The 1st result is: " + res1) 49 | 50 | tmp3 = 0x3333333333333333l 51 | 52 | tmp1 = (res1 & tmp3) 53 | 54 | tmp2 = (res1 >>> 2) & tmp3 55 | 56 | res2 = tmp1 + tmp2 57 | 58 | //println("The 2nd result is: " + res2) 59 | 60 | tmp3 = 0x0f0f0f0f0f0f0f0fl 61 | 62 | tmp1 = (res2 + (res2 >>> 4)) & tmp3 63 | 64 | tmp1 = tmp1 * 0x0101010101010101l 65 | 66 | //println("The return value is: " + (tmp1 >>> 56).toInt) 67 | 68 | (tmp1 >>> 56).toInt 69 | } 70 | 71 | 72 | 73 | def bwtOcc(bwt: BWTType, pos: Long, x: Long): Long = { 74 | var n: Long = 0l 75 | var k = pos 76 | 77 | //transform c from Long to ubyte_t?(Int instead) 78 | var c = (x & 0xffl).toInt 79 | 80 | if (k == bwt.seqLen) bwt.L2(c+1) - bwt.L2(c) 81 | else if (k == -1l) 0l //the original is (uint64_t)(-1) 82 | else { 83 | if (k >= bwt.primary) k = k - 1 84 | 85 | //println(k) 86 | //calculate new pointer position 87 | var newStartPoint = ((k >>> 7) << 4).toInt 88 | //println(c) 89 | 90 | n = (bwt.bwt(c * 2 + 1 + newStartPoint)).toLong 91 | n = n << 32 92 | n = n + (bwt.bwt(c * 2 + newStartPoint)).toLong 93 | //println ("The n in bwtOcc is: " + n) 94 | 95 | //jump to the start of the first bwt cell 96 | 97 | newStartPoint += 8 //size of Long: 8 bytes 98 | 99 | val occIntvMask = (1l << 7) - 1l 100 | 101 | var newEndPoint = newStartPoint + (((k >>> 5) - ((k & ~occIntvMask) >>> 5)) << 1).toInt 102 | //println((((k >>> 5) - ((k & ~occIntvMask) >>> 5)) << 1)) 103 | //println("newStartPoint: " + newStartPoint + ", newEndPoint: " + newEndPoint + ", diff: " + (newEndPoint - newStartPoint)) 104 | while (newStartPoint < newEndPoint) { 105 | n = n + occAux(((bwt.bwt(newStartPoint).toLong << 32) | (bwt.bwt(newStartPoint + 1).toLong << 32 >>> 32)), c) 106 | newStartPoint += 2 107 | } 108 | //println ("The n after loop is: " + n) 109 | //println (bwt.bwt(newStartPoint.toInt)) 110 | //println (bwt.bwt(newStartPoint.toInt + 1)) 111 | n += occAux(((bwt.bwt(newStartPoint).toLong << 32) | (bwt.bwt(newStartPoint + 1).toLong << 32 >>> 32)) & ~((1l << ((~k & 31) << 1)) - 1), c) 112 | //println (n) 113 | if (c == 0) n -= ~k & 31 114 | //println ("The final n is: " + n) 115 | n 116 | 117 | } 118 | } 119 | 120 | //compute inverse CSA 121 | def bwtInvPsi(bwt:BWTType, k: Long): Long = { 122 | var x: Long = if (k > bwt.primary) k - 1 else k 123 | 124 | //println("The x before bwt_B0 is " + x) 125 | var bwtBwt = bwt.bwt(((x >>> 7 << 4) + 8 + ((x & 0x7f) >>> 4)).toInt) 126 | //println(((x >>> 7 << 4) + 8 + ((x & 0x7f) >>> 4))) 127 | //println(bwtBwt) 128 | //println((((~x & 0xf) << 1) & 3)) 129 | //println(bwtBwt >>> (((~x & 0xf) << 1) & 3)) 130 | x = bwtBwt >>> (((~x & 0xf) << 1).toInt) & 3 131 | //println("The x for bwt L2 access is " + x) 132 | //println(bwt.L2(x.toInt)) 133 | //println(bwtOcc(bwt, k, x)) 134 | x = bwt.L2(x.toInt) + bwtOcc(bwt, k, x) 135 | //println("The x for return is " + x) 136 | if (k == bwt.primary) 0 137 | else x 138 | } 139 | 140 | def suffixArrayPos2ReferencePos(bwt: BWTType, k: Long /*uint64_t*/): Long /*uint64_t*/ = { 141 | 142 | //initialization 143 | var sa = 0l 144 | var mask = (bwt.saIntv - 1).toLong 145 | 146 | var pos = k 147 | 148 | while ( (pos & mask) != 0 ) { 149 | sa += 1l 150 | pos = bwtInvPsi(bwt, pos) 151 | } 152 | 153 | sa + bwt.sa((pos / bwt.saIntv).toInt) 154 | 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/worker1/BWAMemWorker1.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.worker1 20 | 21 | import cs.ucla.edu.bwaspark.datatype._ 22 | import scala.collection.mutable.MutableList 23 | import java.util.TreeSet 24 | import java.util.Comparator 25 | import cs.ucla.edu.bwaspark.worker1.MemChain._ 26 | import cs.ucla.edu.bwaspark.worker1.MemChainFilter._ 27 | import cs.ucla.edu.bwaspark.worker1.MemChainToAlign._ 28 | import cs.ucla.edu.bwaspark.worker1.MemSortAndDedup._ 29 | import cs.ucla.edu.bwaspark.util.LocusEncode._ 30 | import cs.ucla.edu.avro.fastq._ 31 | 32 | //this standalone object defines the main job of BWA MEM: 33 | //1)for each read, generate all the possible seed chains 34 | //2)using SW algorithm to extend each chain to all possible aligns 35 | object BWAMemWorker1 { 36 | 37 | /** 38 | * Perform BWAMEM worker1 function for single-end alignment 39 | * 40 | * @param opt the MemOptType object, BWAMEM options 41 | * @param bwt BWT and Suffix Array 42 | * @param bns .ann, .amb files 43 | * @param pac .pac file (PAC array: uint8_t) 44 | * @param pes pes array for worker2 45 | * @param seq a read 46 | * 47 | * Return: a read with alignments 48 | */ 49 | def bwaMemWorker1(opt: MemOptType, //BWA MEM options 50 | bwt: BWTType, //BWT and Suffix Array 51 | bns: BNTSeqType, //.ann, .amb files 52 | pac: Array[Byte], //.pac file uint8_t 53 | pes: Array[MemPeStat], //pes array 54 | seq: FASTQRecord //a read 55 | ): ReadType = { //all possible alignment 56 | 57 | val seqStr = new String(seq.getSeq.array) 58 | val read: Array[Byte] = seqStr.toCharArray.map(ele => locusEncode(ele)) 59 | 60 | //first step: generate all possible MEM chains for this read 61 | val chains = generateChains(opt, bwt, bns.l_pac, seq.getSeqLength, read) 62 | 63 | //second step: filter chains 64 | val chainsFiltered = memChainFilter(opt, chains) 65 | 66 | val readRet = new ReadType 67 | readRet.seq = seq 68 | 69 | if (chainsFiltered == null) { 70 | readRet.regs = null 71 | } 72 | else { 73 | // build the references of the seeds in each chain 74 | var totalSeedNum = 0 75 | chainsFiltered.foreach(chain => { 76 | totalSeedNum += chain.seeds.length 77 | } ) 78 | 79 | //third step: for each chain, from chain to aligns 80 | var regArray = new MemAlnRegArrayType 81 | regArray.maxLength = totalSeedNum 82 | regArray.regs = new Array[MemAlnRegType](totalSeedNum) 83 | 84 | for (i <- 0 until chainsFiltered.length) { 85 | memChainToAln(opt, bns.l_pac, pac, seq.getSeqLength, read, chainsFiltered(i), regArray) 86 | } 87 | 88 | regArray.regs = regArray.regs.filter(r => (r != null)) 89 | regArray.maxLength = regArray.regs.length 90 | assert(regArray.curLength == regArray.maxLength, "[Error] After filtering array elements") 91 | 92 | //last step: sorting and deduplication 93 | regArray = memSortAndDedup(regArray, opt.maskLevelRedun) 94 | readRet.regs = regArray.regs 95 | } 96 | 97 | readRet 98 | } 99 | 100 | 101 | /** 102 | * Perform BWAMEM worker1 function for pair-end alignment 103 | * 104 | * @param opt the MemOptType object, BWAMEM options 105 | * @param bwt BWT and Suffix Array 106 | * @param bns .ann, .amb files 107 | * @param pac .pac file (PAC array: uint8_t) 108 | * @param pes pes array for worker2 109 | * @param pairSeqs a read with both ends 110 | * 111 | * Return: a read with alignments on both ends 112 | */ 113 | def pairEndBwaMemWorker1(opt: MemOptType, //BWA MEM options 114 | bwt: BWTType, //BWT and Suffix Array 115 | bns: BNTSeqType, //.ann, .amb files 116 | pac: Array[Byte], //.pac file uint8_t 117 | pes: Array[MemPeStat], //pes array 118 | pairSeqs: PairEndFASTQRecord //a read 119 | ): PairEndReadType = { //all possible alignment 120 | 121 | val read0 = bwaMemWorker1(opt, bwt, bns, pac, pes, pairSeqs.seq0) 122 | val read1 = bwaMemWorker1(opt, bwt, bns, pac, pes, pairSeqs.seq1) 123 | var pairEndRead = new PairEndReadType 124 | pairEndRead.seq0 = read0.seq 125 | pairEndRead.regs0 = read0.regs 126 | pairEndRead.seq1 = read1.seq 127 | pairEndRead.regs1 = read1.regs 128 | 129 | pairEndRead // return 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/Usage.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark 20 | 21 | object Usage { 22 | val usage: String = "Usage 1: upload raw FASTQ file(s) to HDFS\n" + 23 | "Usage: upload-fastq [-bn INT] isPairEnd filePartitionNum inputFASTQFilePath1 [inputFASTQFilePath2] outFileHDFSPath\n\n" + 24 | "Required arguments (in the following order): \n" + 25 | "isPairEnd: pair-end (1) or single-end (0) data\n" + 26 | "inputFASTQFilePath1: the first input path of the FASTQ file in the local file system (for both single-end and pair-end)\n" + 27 | "inputFASTQFilePath2: (optional) the second input path of the FASTQ file in the local file system (for pair-end)\n" + 28 | "outFileHDFSPath: the root path of the output FASTQ files in HDFS\n\n" + 29 | "Optional arguments: \n" + 30 | "-bn (optional): the number of lines to be read in one group (batch)\n\n\n" + 31 | "Usage 2: use CS-BWAMEM aligner\n" + 32 | "Usage: cs-bwamem [-bfn INT] [-bPSW (0/1)] [-sbatch INT] [-bPSWJNI (0/1)] [-jniPath STRING] [-oType (0/1/2)] [-oPath STRING] [-localRef INT] [-R STRING] [-isSWExtBatched (0/1)] [-bSWExtSize INT] [-FPGAAcc (0/1)] isPairEnd fastaInputPath fastqHDFSInputPath\n\n" + 33 | "Required arguments (in the following order): \n" + 34 | "isPairEnd: perform pair-end (1) or single-end (0) mapping\n" + 35 | "fastaInputPath: the path of (local) BWA index files (bns, pac, and so on)\n" + 36 | "fastqHDFSInputPath: the path of the raw read files stored in HDFS\n\n" + 37 | "Optional arguments: \n" + 38 | "-bfn (optional): the number of raw read folders in a batch to be processed\n" + 39 | "-bPSW (optional): whether the pair-end Smith Waterman is performed in a batched way\n" + 40 | "-sbatch (optional): the number of reads to be processed in a subbatch\n" + 41 | "-bPSWJNI (optional): whether the native JNI library is called for better performance\n" + 42 | "-jniPath (optional): the JNI library path in the local machine\n" + 43 | "-oChoice (optional): the output format choice\n" + 44 | " 0: no output (pure computation)\n" + 45 | " 1: SAM file output in the local file system (default)\n" + 46 | " 2: ADAM format output in the distributed file system\n" + 47 | " 3: SAM format output in the distributed file system\n" + 48 | "-oPath (optional): the output path; users need to provide correct path in the local or distributed file system\n\n" + 49 | "-localRef (optional): specifiy if each node has reference genome locally. If so, our tool can fetch the reference genome from the local node instead of broadcasting it from the driver node. Note that the path of the reference genome should be place at the same path and specified in the \"fastaInputPath parameter\".\n\n" + 50 | "-R (should be added for common case): Complete read group header line. Example: @RG\tID:foo\tSM:bar\n\n" + 51 | "-isSWExtBatched (optional): whether the SWExtend is executed in a batched way\n" + 52 | " 0: No (default)\n" + 53 | " 1: Yes\n\n" + 54 | "-bSWExtSize (optional): the batch size used for SWExtend\n\n" + 55 | "-FPGAAccSWExt (optional): whether the FPGA accelerator is used for accelerating SWExtend\n" + 56 | " 0: No (default)\n" + 57 | " 1: Yes\n\n" + 58 | "-FPGASWExtThreshold (optional): the threshold of using FPGA accelerator for SWExtend.\n" + 59 | " If the nubmer of seed in one step is larger than this threshold, FPGA acceleration will be applied. Otherwise, CPU is used for computation.\n\n\n" + 60 | "Usage 3: merge the output ADAM folder pieces and save as a new ADAM file in HDFS\n" + 61 | "Usage: merge hdfsServerAddress adamHDFSRootInputPath adamHDFSOutputPath\n\n\n" + 62 | "Usage 4: sort the output ADAM folder pieces and save as a new ADAM file in HDFS\n" + 63 | "Usage: sort hdfsServerAddress adamHDFSRootInputPath adamHDFSOutputPath\n" 64 | } 65 | -------------------------------------------------------------------------------- /src/main/native/ksw.h: -------------------------------------------------------------------------------- 1 | #ifndef __AC_KSW_H 2 | #define __AC_KSW_H 3 | 4 | #include 5 | 6 | #define KSW_XBYTE 0x10000 7 | #define KSW_XSTOP 0x20000 8 | #define KSW_XSUBO 0x40000 9 | #define KSW_XSTART 0x80000 10 | 11 | struct _kswq_t; 12 | typedef struct _kswq_t kswq_t; 13 | 14 | typedef struct { 15 | int score; // best score 16 | int te, qe; // target end and query end 17 | int score2, te2; // second best score and ending position on the target 18 | int tb, qb; // target start and query start 19 | } kswr_t; 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | /** 26 | * Aligning two sequences 27 | * 28 | * @param qlen length of the query sequence (typically =0, *gscore keeps the best score such that 93 | * the entire query sequence is aligned; *gtle keeps the position on the 94 | * target where *gscore is achieved. Returning *gscore and *gtle helps the 95 | * caller to decide whether an end-to-end hit or a partial hit is preferred. 96 | * 97 | * The first 9 parameters are identical to those in ksw_global() 98 | * 99 | * @param h0 alignment score of upstream sequences 100 | * @param _qle (out) length of the query in the alignment 101 | * @param _tle (out) length of the target in the alignment 102 | * @param _gtle (out) length of the target if query is fully aligned 103 | * @param _gscore (out) score of the best end-to-end alignment; negative if not found 104 | * 105 | * @return best semi-local alignment score 106 | */ 107 | int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off); 108 | int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off); 109 | 110 | #ifdef __cplusplus 111 | } 112 | #endif 113 | 114 | #endif 115 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/datatype/BNTSeqType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.datatype 20 | 21 | import java.io._ 22 | import scala.Serializable 23 | import org.apache.hadoop.conf.Configuration; 24 | import org.apache.hadoop.fs.FileSystem; 25 | import org.apache.hadoop.fs.FSDataInputStream; 26 | import org.apache.hadoop.fs.FSDataOutputStream; 27 | import org.apache.hadoop.fs.Path; 28 | 29 | class BNTSeqType extends Serializable { 30 | //length of contents in .pac file 31 | var l_pac: Long = _ 32 | 33 | //length of contents in .ann file 34 | var n_seqs: Int = _ 35 | 36 | //!!!to add!!! 37 | var seed: Int = _ 38 | 39 | //maintaining contents in .ann file 40 | var anns: Array[BNTAnnType] = _ 41 | 42 | //length of contents in .amb file 43 | var n_holes: Int = _ 44 | 45 | //maintaining contents in .amb file 46 | var ambs: Array[BNTAmbType] = _ 47 | 48 | //There is a file pointer to .pac file in original BWA, 49 | //but it seems to be useless 50 | 51 | //loading .ann .amb files 52 | def load(prefix: String) { 53 | 54 | //define a loader for .ann file 55 | def annLoader(filename: String): (Array[BNTAnnType], Long, Int, Int) = { 56 | val conf = new Configuration 57 | val fs = FileSystem.get(conf) 58 | val path = new Path(filename) 59 | var annBufferedReader: BufferedReader = null 60 | if (fs.exists(path)) { 61 | annBufferedReader = new BufferedReader(new InputStreamReader(fs.open(path))) 62 | } 63 | else { 64 | annBufferedReader = new BufferedReader(new FileReader(filename)) //file reader 65 | } 66 | val headLine = annBufferedReader.readLine.split(" ") //the first line of the file, specify three variables: l_pac, n_seqs, and seed 67 | assert(headLine.length == 3) 68 | val l_pac = headLine(0).toLong 69 | val n_seqs = headLine(1).toInt 70 | val seed = headLine(2).toInt 71 | val anns = new Array[BNTAnnType](n_seqs) //create an array for the contents of .ann file 72 | for (i <- 0 until n_seqs) { //fill in each element 73 | val firstLine = annBufferedReader.readLine.split(" ") 74 | val gi = firstLine(0).toInt 75 | val name = firstLine(1) 76 | val anno = "" // fix me! No annotation at all! 77 | val secondLine = annBufferedReader.readLine.split(" ") 78 | assert(secondLine.length == 3) 79 | val offset = secondLine(0).toLong 80 | val len = secondLine(1).toInt 81 | val n_ambs = secondLine(2).toInt 82 | anns(i) = new BNTAnnType(offset, len, n_ambs, gi, name, anno) 83 | } 84 | annBufferedReader.close() //close file and return 85 | (anns, l_pac, n_seqs, seed) 86 | } 87 | //call annLoader to assign variables: anns, l_pac, n_seqs, and seed 88 | val annResult = annLoader(prefix + ".ann") 89 | anns = annResult._1 90 | l_pac = annResult._2 91 | n_seqs = annResult._3 92 | seed = annResult._4 93 | 94 | //define a loader for .amb file 95 | def ambLoader(filename: String): (Array[BNTAmbType], Int) = { 96 | val conf = new Configuration 97 | val fs = FileSystem.get(conf) 98 | val path = new Path(filename) 99 | var ambBufferedReader: BufferedReader = null 100 | if (fs.exists(path)) { 101 | ambBufferedReader = new BufferedReader(new InputStreamReader(fs.open(path))) 102 | } 103 | else { 104 | ambBufferedReader = new BufferedReader(new FileReader(filename)) //file reader 105 | } 106 | val headLine = ambBufferedReader.readLine.split(" ").map(str => str.toLong) //the first line of the file, specify three variables: l_pac, n_seqs, and n_holes; l_pac and n_seqs are the same as those in .ann file 107 | assert(headLine.length == 3) 108 | val n_holes = headLine(2).toInt 109 | var ambs = new Array[BNTAmbType](n_holes) 110 | for (i <- 0 until n_holes) { 111 | val currLine = ambBufferedReader.readLine.split(" ") 112 | assert(currLine.length == 3) 113 | val offset = currLine(0).toLong 114 | val len = currLine(1).toInt 115 | val amb = currLine(2)(0) 116 | ambs(i) = new BNTAmbType(offset, len, amb) 117 | } 118 | ambBufferedReader.close() //close file and return 119 | (ambs, n_holes) 120 | } 121 | //call ambLoader to assign variables: ambs and n_holes 122 | val ambResult = ambLoader(prefix + ".amb") 123 | ambs = ambResult._1 124 | n_holes = ambResult._2 125 | } 126 | 127 | private def writeObject(out: ObjectOutputStream) { 128 | out.writeLong(l_pac) 129 | out.writeInt(n_seqs) 130 | out.writeInt(seed) 131 | out.writeObject(anns) 132 | out.writeInt(n_holes) 133 | out.writeObject(ambs) 134 | } 135 | 136 | private def readObject(in: ObjectInputStream) { 137 | l_pac = in.readLong 138 | n_seqs = in.readInt 139 | seed = in.readInt 140 | anns = in.readObject.asInstanceOf[Array[BNTAnnType]] 141 | n_holes = in.readInt 142 | ambs = in.readObject.asInstanceOf[Array[BNTAmbType]] 143 | } 144 | 145 | private def readObjectNoData() { 146 | 147 | } 148 | 149 | } 150 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/sam/SAMHeader.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.sam 20 | 21 | import cs.ucla.edu.bwaspark.datatype.BNTSeqType 22 | 23 | import java.io.ObjectInputStream 24 | import java.io.ObjectOutputStream 25 | import java.util.Date 26 | import scala.Serializable 27 | 28 | import htsjdk.samtools.{SAMFileHeader,SAMProgramRecord,SAMSequenceRecord,SAMSequenceDictionary,SAMReadGroupRecord} 29 | import htsjdk.samtools.util.Iso8601Date 30 | 31 | class SAMHeader extends Serializable { 32 | var bwaReadGroupID = new String 33 | var readGroupLine = new String 34 | var packageVersion = new String 35 | var bwaPackageLine = new String 36 | 37 | 38 | def bwaGenSAMHeader(bns: BNTSeqType, packageVerIn: String): String = { 39 | packageVersion = packageVerIn 40 | bwaPackageLine = "@PG\tID:bwa\tPN:bwa\tVN:" + packageVersion + "\tCL:" + "bwa" 41 | 42 | var headerStr = new String 43 | var i = 0 44 | while(i < bns.n_seqs) { 45 | headerStr += "@SQ\tSN:" + bns.anns(i).name + "\tLN:" + bns.anns(i).len.toString + '\n' 46 | i += 1 47 | } 48 | if(readGroupLine != "") headerStr = headerStr + readGroupLine + '\n' 49 | headerStr += bwaPackageLine + '\n' 50 | headerStr 51 | } 52 | 53 | 54 | def parseReadGroupString(str: String): SAMReadGroupRecord = { 55 | var id: String = null 56 | var cn: String = null 57 | var ds: String = null 58 | var dt: String = null 59 | var fo: String = null 60 | var ks: String = null 61 | var lb: String = null 62 | var pg: String = null 63 | var pi: String = null 64 | var pl: String = null 65 | var pu: String = null 66 | var sm: String = null 67 | 68 | val strArray = str.split('\t') 69 | 70 | for(strSeg <- strArray) { 71 | val op = strSeg.take(2) 72 | op match { 73 | case "ID" => id = strSeg.drop(3) 74 | case "CN" => cn = strSeg.drop(3) 75 | case "DS" => ds = strSeg.drop(3) 76 | case "DT" => dt = strSeg.drop(3) 77 | case "FO" => fo = strSeg.drop(3) 78 | case "KS" => ks = strSeg.drop(3) 79 | case "LB" => lb = strSeg.drop(3) 80 | //case "PG" => pg = strSeg.drop(3) // Note: we omit PG field for now 81 | case "PI" => pi = strSeg.drop(3) 82 | case "PL" => pl = strSeg.drop(3) 83 | case "PU" => pu = strSeg.drop(3) 84 | case "SM" => sm = strSeg.drop(3) 85 | case _ => None 86 | } 87 | } 88 | 89 | if(id != null) { 90 | val samReadGroup = new SAMReadGroupRecord(id) 91 | if(cn != null) 92 | samReadGroup.setSequencingCenter(cn) 93 | if(ds != null) 94 | samReadGroup.setDescription(ds) 95 | if(dt != null) 96 | samReadGroup.setRunDate(new Iso8601Date(dt)) 97 | if(fo != null) 98 | samReadGroup.setFlowOrder(fo) 99 | if(ks != null) 100 | samReadGroup.setKeySequence(ks) 101 | if(lb != null) 102 | samReadGroup.setLibrary(lb) 103 | if(pi != null) 104 | samReadGroup.setPredictedMedianInsertSize(pi.toInt) 105 | if(pl != null) 106 | samReadGroup.setPlatform(pl) 107 | if(pu != null) 108 | samReadGroup.setPlatformUnit(pu) 109 | if(sm != null) 110 | samReadGroup.setSample(sm) 111 | 112 | samReadGroup 113 | } 114 | else { 115 | println("[Error] Undefined group ID") 116 | exit(1) 117 | } 118 | } 119 | 120 | 121 | def bwaGenSAMHeader(bns: BNTSeqType, packageVerIn: String, readGroupString: String, samFileHeader: SAMFileHeader) { 122 | packageVersion = packageVerIn 123 | var samPG = new SAMProgramRecord("cs-bwamem") 124 | // NOTE setCommandLine() needs to be updated 125 | samPG.setProgramName("cs-bwamem") 126 | samPG.setProgramVersion(packageVersion) 127 | samPG.setCommandLine("cs-bwamem") 128 | samFileHeader.addProgramRecord(samPG) 129 | 130 | var samSeqDict = new SAMSequenceDictionary 131 | var i = 0 132 | while(i < bns.n_seqs) { 133 | samSeqDict.addSequence(new SAMSequenceRecord(bns.anns(i).name, bns.anns(i).len)) 134 | i += 1 135 | } 136 | samFileHeader.setSequenceDictionary(samSeqDict) 137 | 138 | val samReadGroup = parseReadGroupString(readGroupString) 139 | samFileHeader.addReadGroup(samReadGroup) 140 | } 141 | 142 | 143 | def bwaSetReadGroup(str: String): Boolean = { 144 | val rgPattern = """@RG\s+ID:([\w_-]+)""".r 145 | bwaReadGroupID = rgPattern findFirstIn str match { 146 | case Some (rgPattern(rgID)) => rgID 147 | case None => "Not matched" 148 | } 149 | 150 | if(bwaReadGroupID == "Not matched") false 151 | else { 152 | readGroupLine = str 153 | true 154 | } 155 | } 156 | 157 | 158 | private def writeObject(out: ObjectOutputStream) { 159 | out.writeObject(bwaReadGroupID) 160 | out.writeObject(readGroupLine) 161 | out.writeObject(packageVersion) 162 | out.writeObject(bwaPackageLine) 163 | } 164 | 165 | private def readObject(in: ObjectInputStream) { 166 | bwaReadGroupID = in.readObject.asInstanceOf[String] 167 | readGroupLine = in.readObject.asInstanceOf[String] 168 | packageVersion = in.readObject.asInstanceOf[String] 169 | bwaPackageLine = in.readObject.asInstanceOf[String] 170 | } 171 | 172 | private def readObjectNoData() { 173 | 174 | } 175 | 176 | } 177 | 178 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/worker1/MemSortAndDedup.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.worker1 20 | 21 | import scala.util.control.Breaks._ 22 | 23 | import cs.ucla.edu.bwaspark.datatype._ 24 | 25 | object MemSortAndDedup { 26 | /** 27 | * Sort the MemAlnRegs according to the given order 28 | * and remove the redundant MemAlnRegs 29 | * 30 | * @param regArray alignment registers, which are the output of chain to alignment (after memChainToAln() is applied) 31 | * @param maskLevelRedun mask level of redundant alignment registers (from MemOptType object) 32 | */ 33 | def memSortAndDedup(regArray: MemAlnRegArrayType, maskLevelRedun: Float): MemAlnRegArrayType = { 34 | if(regArray.curLength <= 1) { 35 | regArray 36 | } 37 | else { 38 | //println("before dedup, n: " + regsIn.length) 39 | //var regs = regArray.regs.sortBy(_.rEnd) 40 | var regs = regArray.regs.sortBy(r => (r.rEnd, r.rBeg)) 41 | /* 42 | var j = 0 43 | println("#####") 44 | regs.foreach(r => { 45 | print("Reg " + j + "(") 46 | print(r.rBeg + ", " + r.rEnd + ", " + r.qBeg + ", " + r.qEnd + ", " + r.score + ", " + r.trueScore + ", ") 47 | println(r.sub + ", " + r.csub + ", " + r.subNum + ", " + r.width + ", " + r.seedCov + ", " + r.secondary + ")") 48 | j += 1 49 | } ) 50 | */ 51 | var i = 1 52 | while(i < regs.length) { 53 | if(regs(i).rBeg < regs(i-1).rEnd) { 54 | var j = i - 1 55 | var isBreak = false 56 | while(j >= 0 && regs(i).rBeg < regs(j).rEnd && !isBreak) { 57 | // a[j] has been excluded 58 | if(regs(j).qEnd != regs(j).qBeg) { 59 | var oq = 0 60 | var mr: Long = 0 61 | var mq = 0 62 | var or = regs(j).rEnd - regs(i).rBeg // overlap length on the reference 63 | // overlap length on the query 64 | if(regs(j).qBeg < regs(i).qBeg) oq = regs(j).qEnd - regs(i).qBeg 65 | else oq = regs(i).qEnd - regs(j).qBeg 66 | // min ref len in alignment 67 | if(regs(j).rEnd - regs(j).rBeg < regs(i).rEnd - regs(i).rBeg) mr = regs(j).rEnd - regs(j).rBeg 68 | else mr = regs(i).rEnd - regs(i).rBeg 69 | // min qry len in alignment 70 | if(regs(j).qEnd - regs(j).qBeg < regs(i).qEnd - regs(i).qBeg) mq = regs(j).qEnd - regs(j).qBeg 71 | else mq = regs(i).qEnd - regs(i).qBeg 72 | // one of the hits is redundant 73 | if(or > maskLevelRedun * mr && oq > maskLevelRedun * mq) { 74 | if(regs(i).score < regs(j).score) { 75 | regs(i).qEnd = regs(i).qBeg 76 | // testing 77 | //println("(i, j)=(" + i + ", " + j + ") " + or + " " + oq + " " + mr + " " + mq) 78 | //println("i: (" + regs(i).qBeg + " " + regs(i).qEnd + " " + regs(i).rBeg + " " + regs(i).rEnd + 79 | //"); j: (" + regs(j).qBeg + " " + regs(j).qEnd + " " + regs(j).rBeg + " " + regs(j).rEnd + ")") 80 | isBreak = true 81 | } 82 | else { 83 | regs(j).qEnd = regs(j).qBeg 84 | // testing 85 | //println("(i, j)=(" + i + ", " + j + ") " + or + " " + oq + " " + mr + " " + mq) 86 | //println("i: (" + regs(i).qBeg + " " + regs(i).qEnd + " " + regs(i).rBeg + " " + regs(i).rEnd + 87 | //"); j: (" + regs(j).qBeg + " " + regs(j).qEnd + " " + regs(j).rBeg + " " + regs(j).rEnd + ")") 88 | } 89 | } 90 | } 91 | 92 | j -= 1 93 | } 94 | 95 | } 96 | 97 | i += 1 98 | } 99 | 100 | // exclude identical hits 101 | regs = regs.filter(r => (r.qEnd > r.qBeg)) 102 | 103 | /* 104 | var j = 0 105 | println("#####") 106 | regs.foreach(r => { 107 | print("Reg " + j + "(") 108 | print(r.rBeg + ", " + r.rEnd + ", " + r.qBeg + ", " + r.qEnd + ", " + r.score + ", " + r.trueScore + ", ") 109 | println(r.sub + ", " + r.csub + ", " + r.subNum + ", " + r.width + ", " + r.seedCov + ", " + r.secondary + ")") 110 | j += 1 111 | } ) 112 | */ 113 | 114 | regs = regs.sortBy(r => (- r.score, r.rBeg, r.qBeg)) 115 | //println("1st dedup, n: " + regs.length) 116 | 117 | i = 1 118 | while(i < regs.length) { 119 | if(regs(i).score == regs(i-1).score && regs(i).rBeg == regs(i-1).rBeg && regs(i).qBeg == regs(i-1).qBeg) 120 | regs(i).qEnd = regs(i).qBeg 121 | i += 1 122 | } 123 | 124 | regs = regs.filter(r => (r.qEnd > r.qBeg)) 125 | //println("2nd dedup, n: " + regs.length) 126 | /* 127 | j = 0 128 | regs.foreach(r => { 129 | print("Reg " + j + "(") 130 | print(r.rBeg + ", " + r.rEnd + ", " + r.qBeg + ", " + r.qEnd + ", " + r.score + ", " + r.trueScore + ", ") 131 | println(r.sub + ", " + r.csub + ", " + r.subNum + ", " + r.width + ", " + r.seedCov + ", " + r.secondary + ")") 132 | j += 1 133 | } ) 134 | println 135 | */ 136 | regArray.curLength = regs.length 137 | regArray.maxLength = regs.length 138 | regArray.regs = regs 139 | regArray 140 | } 141 | } 142 | } 143 | 144 | -------------------------------------------------------------------------------- /src/main/jni_fpga/sw_extend_fpga.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | #include // generated by javah via maven-native-plugin 26 | #include 27 | #include 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | #define NOT_READY 0 43 | #define DONE 1 44 | #define FLAG_NUM 2 45 | 46 | 47 | void print_current_time_with_ns (void) 48 | { 49 | long ns; // Milliseconds 50 | time_t s; // Seconds 51 | struct timespec spec; 52 | 53 | clock_gettime(CLOCK_REALTIME, &spec); 54 | 55 | s = spec.tv_sec; 56 | ns = spec.tv_nsec; // Convert nanoseconds to milliseconds 57 | 58 | printf("Current time: %"PRIdMAX".%09ld seconds since the Epoch\n", 59 | (intmax_t)s, ns); 60 | } 61 | 62 | 63 | int send_int_array(int* int_buf, int buf_size) { 64 | // For profiling only 65 | //struct timeval tv; 66 | //gettimeofday(&tv, NULL); 67 | //double time_in_mill = (tv.tv_sec) * 1000 + (tv.tv_usec) / 1000 ; // convert tv_sec & tv_usec to millisecond 68 | //printf("Send time (ms): %lf\n", time_in_mill); 69 | print_current_time_with_ns(); 70 | 71 | // sockets 72 | struct sockaddr_in stSockAddr; 73 | int Res; 74 | 75 | // socket setup 76 | int SocketFD = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); 77 | if (-1 == SocketFD) { 78 | perror("cannot create socket"); 79 | exit(EXIT_FAILURE); 80 | } 81 | 82 | memset(&stSockAddr, 0, sizeof(stSockAddr)); 83 | stSockAddr.sin_family = AF_INET; 84 | stSockAddr.sin_port = htons(7000); 85 | Res = inet_pton(AF_INET, "127.0.0.1", &stSockAddr.sin_addr); 86 | 87 | if (0 > Res) { 88 | perror("error: first parameter is not a valid address family"); 89 | close(SocketFD); 90 | exit(EXIT_FAILURE); 91 | } 92 | else if (0 == Res) { 93 | perror("char string (second parameter does not contain valid ipaddress)"); 94 | close(SocketFD); 95 | exit(EXIT_FAILURE); 96 | } 97 | 98 | int connect_ret = connect(SocketFD, (struct sockaddr *)&stSockAddr, sizeof(stSockAddr)); 99 | if (connect_ret == -1) { 100 | perror("connect failed"); 101 | close(SocketFD); 102 | //exit(EXIT_FAILURE); 103 | return 1; 104 | } 105 | else { 106 | //printf("Shmid: %d, Data size: %d, sent\n", int_buf[0], int_buf[1]); 107 | send(SocketFD, int_buf, buf_size * sizeof(int), 0); 108 | (void) shutdown(SocketFD, SHUT_RDWR); 109 | close(SocketFD); 110 | return 0; 111 | } 112 | 113 | } 114 | 115 | 116 | JNIEXPORT jshortArray JNICALL Java_cs_ucla_edu_bwaspark_jni_SWExtendFPGAJNI_swExtendFPGAJNI 117 | (JNIEnv *env, jobject thisObj, jint retTaskNum, jbyteArray arrayIn) 118 | { 119 | // shared memory 120 | int shmid; 121 | char *shm_addr = NULL; 122 | 123 | // polling setting 124 | struct timespec deadline; 125 | deadline.tv_sec = 0; 126 | deadline.tv_nsec = 100; 127 | 128 | printf("Get JNI data\n"); 129 | jbyte* dataArray = (jbyte*) (*env)->GetByteArrayElements(env, arrayIn, NULL); 130 | jsize dataArraySize = (*env)->GetArrayLength(env, arrayIn); 131 | 132 | // shared memory setup 133 | if((shmid = shmget(IPC_PRIVATE, FLAG_NUM * sizeof(int) + dataArraySize * sizeof(jbyte), IPC_CREAT | 0666)) < 0) { 134 | perror("shmget failed."); 135 | exit(1); 136 | } 137 | else 138 | printf("Create shared memory: %d\n", shmid); 139 | 140 | if((shm_addr = (char*) shmat(shmid, NULL, 0)) == (char *) -1) { 141 | perror("Client: shmat failed."); 142 | exit(1); 143 | } 144 | else 145 | printf("Client: attach shared memory: %p\n", shm_addr); 146 | 147 | // initialize the flags of shared memory 148 | *((int*)shm_addr) = NOT_READY; 149 | *((int*)(shm_addr + sizeof(int))) = NOT_READY; 150 | 151 | // put input data 152 | printf("Client: put input data\n"); 153 | //printf("Shmid: %d, Dataarray size: %d\n", shmid, dataArraySize); 154 | memcpy(shm_addr + FLAG_NUM * sizeof(int), dataArray, (int) dataArraySize * sizeof(jbyte)); 155 | *((int*)shm_addr) = DONE; 156 | //printf("After memcpy\n", shmid, dataArraySize); 157 | 158 | // send a request (shmid) and data array size to the FPGA host thread 159 | int int_buf[2]; 160 | int_buf[0] = shmid; 161 | int_buf[1] = dataArraySize / 4; 162 | //printf("Shmid: %d, Data size (# of int): %d, Task Num: %d\n", int_buf[0], int_buf[1], retTaskNum); 163 | printf("Send shmid and data size through socket\n"); 164 | while(send_int_array(int_buf, 2)) clock_nanosleep(CLOCK_REALTIME, 0, &deadline, NULL); 165 | 166 | // poll the shared memory 167 | printf("Poll\n"); 168 | volatile int done = 0; 169 | while(done == 0) { 170 | done = (int) *((int*)(shm_addr + sizeof(int))); 171 | clock_nanosleep(CLOCK_REALTIME, 0, &deadline, NULL); 172 | //usleep(1); 173 | } 174 | 175 | //printf("fill data\n"); 176 | jshortArray ret = (*env)->NewShortArray(env, retTaskNum); 177 | // copy data from the shared memory 178 | int i; 179 | jshort* fill = (jshort*) malloc(retTaskNum * sizeof(jshort)); 180 | for(i = 0; i < retTaskNum; i++) 181 | fill[i] = *((jshort*)(shm_addr + (FLAG_NUM * 2 + i) * sizeof(jshort))); 182 | 183 | // free the shared memory 184 | shmdt(shm_addr); 185 | shmctl(shmid, IPC_RMID, 0); 186 | 187 | (*env)->SetShortArrayRegion(env, ret, 0, retTaskNum, fill); 188 | (*env)->ReleaseByteArrayElements(env, arrayIn, dataArray, 0); 189 | free(fill); 190 | 191 | printf("done\n"); 192 | return ret; 193 | } 194 | -------------------------------------------------------------------------------- /src/main/perl/worker1_verifier.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | # 4 | # * Licensed to the Apache Software Foundation (ASF) under one or more 5 | # * contributor license agreements. See the NOTICE file distributed with 6 | # * this work for additional information regarding copyright ownership. 7 | # * The ASF licenses this file to You under the Apache License, Version 2.0 8 | # * (the "License"); you may not use this file except in compliance with 9 | # * the License. You may obtain a copy of the License at 10 | # * 11 | # * http://www.apache.org/licenses/LICENSE-2.0 12 | # * 13 | # * Unless required by applicable law or agreed to in writing, software 14 | # * distributed under the License is distributed on an "AS IS" BASIS, 15 | # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # * See the License for the specific language governing permissions and 17 | # * limitations under the License. 18 | # 19 | 20 | 21 | 22 | $c_infile = "/home/ytchen/bwa/bwa-0.7.8/log"; 23 | $scala_infile = "/home/ytchen/incubator/stable/bwa-spark-0.2.0/log"; 24 | 25 | # Read the output file from C implementation 26 | $c_read_idx = -1; 27 | 28 | open CIN, $c_infile; 29 | 30 | while() { 31 | chomp; 32 | if($_ eq "#####") { 33 | $c_read_idx++; 34 | } 35 | elsif($_ =~ /^Reg (\d+)\((\d+), (\d+), (\d+), (\d+), (\d+), (\d+), (\d+), (\d+), (\d+), (\d+), (\d+), (\d+)\)/) { 36 | $cread[$c_read_idx][$1][0] = $2; 37 | $cread[$c_read_idx][$1][1] = $3; 38 | $cread[$c_read_idx][$1][2] = $4; 39 | $cread[$c_read_idx][$1][3] = $5; 40 | $cread[$c_read_idx][$1][4] = $6; 41 | $cread[$c_read_idx][$1][5] = $7; 42 | $cread[$c_read_idx][$1][6] = $8; 43 | $cread[$c_read_idx][$1][7] = $9; 44 | $cread[$c_read_idx][$1][8] = $10; 45 | $cread[$c_read_idx][$1][9] = $11; 46 | $cread[$c_read_idx][$1][10] = $12; 47 | $cread[$c_read_idx][$1][11] = $13; 48 | $cread_reg_num[$c_read_idx] = $1; 49 | } 50 | } 51 | 52 | close CIN; 53 | 54 | $c_read_num = $c_read_idx + 1; 55 | 56 | 57 | #for($i = 0; $i < $c_read_num; $i++) { 58 | # for($j = 0; $j < ($cread_reg_num[$i] + 1); $j++) { 59 | # print "Reg $j "; 60 | # for($k = 0; $k < 12; $k++) { 61 | # print "$cread[$i][$j][$k] "; 62 | # } 63 | # print "\n"; 64 | # } 65 | #} 66 | #print "\n"; 67 | 68 | 69 | for($i = 0; $i < $c_read_num; $i++) { 70 | splice(@tmp); 71 | splice(@sorted_tmp); 72 | for($j = 0; $j < ($cread_reg_num[$i] + 1); $j++) { 73 | #print "Reg $j "; 74 | for($k = 0; $k < 12; $k++) { 75 | $tmp[$j][$k] = $cread[$i][$j][$k]; 76 | #print "$tmp[$j][$k] "; 77 | } 78 | #print "\n"; 79 | } 80 | 81 | @sorted_tmp = sort {$a->[0] <=> $b->[0]} @tmp; 82 | for($j = 0; $j < ($cread_reg_num[$i] + 1); $j++) { 83 | for($k = 0; $k < 12; $k++) { 84 | $cread_sorted[$i][$j][$k] = $sorted_tmp[$j][$k]; 85 | } 86 | } 87 | } 88 | 89 | #for($i = 0; $i < $c_read_num; $i++) { 90 | # for($j = 0; $j < ($cread_reg_num[$i] + 1); $j++) { 91 | # print "Reg $j "; 92 | # for($k = 0; $k < 12; $k++) { 93 | # print "$cread_sorted[$i][$j][$k] "; 94 | # } 95 | # print "\n"; 96 | # } 97 | #} 98 | #print "\n"; 99 | 100 | # Read the output file from Scala implementation 101 | 102 | $scala_read_idx = -1; 103 | 104 | open SCALAIN, $scala_infile; 105 | 106 | while() { 107 | chomp; 108 | if($_ eq "#####") { 109 | $scala_read_idx++; 110 | } 111 | elsif($_ =~ /^Reg (\d+)\((\d+), (\d+), (\d+), (\d+), (\d+), (\d+), (\d+), (\d+), (\d+), (\d+), (\d+), (\d+)\)/) { 112 | $scalaread[$scala_read_idx][$1][0] = $2; 113 | $scalaread[$scala_read_idx][$1][1] = $3; 114 | $scalaread[$scala_read_idx][$1][2] = $4; 115 | $scalaread[$scala_read_idx][$1][3] = $5; 116 | $scalaread[$scala_read_idx][$1][4] = $6; 117 | $scalaread[$scala_read_idx][$1][5] = $7; 118 | $scalaread[$scala_read_idx][$1][6] = $8; 119 | $scalaread[$scala_read_idx][$1][7] = $9; 120 | $scalaread[$scala_read_idx][$1][8] = $10; 121 | $scalaread[$scala_read_idx][$1][9] = $11; 122 | $scalaread[$scala_read_idx][$1][10] = $12; 123 | $scalaread[$scala_read_idx][$1][11] = $13; 124 | $scalaread_reg_num[$scala_read_idx] = $1; 125 | } 126 | } 127 | 128 | close SCALAIN; 129 | 130 | $scala_read_num = $scala_read_idx + 1; 131 | 132 | #for($i = 0; $i < $scala_read_num; $i++) { 133 | # for($j = 0; $j < ($scalaread_reg_num[$i] + 1); $j++) { 134 | # print "Reg $j "; 135 | # for($k = 0; $k < 12; $k++) { 136 | # print "$scalaread[$i][$j][$k] "; 137 | # } 138 | # print "\n"; 139 | # } 140 | #} 141 | 142 | for($i = 0; $i < $scala_read_num; $i++) { 143 | splice(@tmp); 144 | splice(@sorted_tmp); 145 | for($j = 0; $j < ($scalaread_reg_num[$i] + 1); $j++) { 146 | #print "Reg $j "; 147 | for($k = 0; $k < 12; $k++) { 148 | $tmp[$j][$k] = $scalaread[$i][$j][$k]; 149 | #print "$tmp[$j][$k] "; 150 | } 151 | #print "\n"; 152 | } 153 | 154 | @sorted_tmp = sort {$a->[0] <=> $b->[0]} @tmp; 155 | for($j = 0; $j < ($scalaread_reg_num[$i] + 1); $j++) { 156 | for($k = 0; $k < 12; $k++) { 157 | $scalaread_sorted[$i][$j][$k] = $sorted_tmp[$j][$k]; 158 | } 159 | } 160 | } 161 | 162 | #for($i = 0; $i < $scala_read_num; $i++) { 163 | # for($j = 0; $j < ($scalaread_reg_num[$i] + 1); $j++) { 164 | # print "Reg $j "; 165 | # for($k = 0; $k < 12; $k++) { 166 | # print "$scalaread_sorted[$i][$j][$k] "; 167 | # } 168 | # print "\n"; 169 | # } 170 | #} 171 | 172 | 173 | # compare the reg number by read 174 | $flag = 1; 175 | for($i = 0; $i < $c_read_num; $i++) { 176 | if($cread_reg_num[$i] != $scalaread_reg_num[$i]) { 177 | print "[Read $i] Different # of regs: $cread_reg_num[$i] $scalaread_reg_num[$i]\n"; 178 | $flag = 0; 179 | } 180 | } 181 | 182 | if($flag == 1) { 183 | print "The number of regs of ALL reads are identical in both C and Scala implementation\n"; 184 | } 185 | 186 | # compare the reg content by read 187 | $allflag = 1; 188 | for($i = 0; $i < $c_read_num; $i++) { 189 | print "Read $i\n"; 190 | for($j = 0; $j < ($cread_reg_num[$i] + 1); $j++) { 191 | $inflag = 1; 192 | for($k = 0; $k < 12; $k++) { 193 | if($scalaread_sorted[$i][$j][$k] != $cread_sorted[$i][$j][$k]) { 194 | $inflag = 0; 195 | $allflag = 0; 196 | last; 197 | } 198 | } 199 | 200 | if($inflag == 0) { 201 | print "[C]Reg $j "; 202 | for($k = 0; $k < 12; $k++) { 203 | print "$cread_sorted[$i][$j][$k] " 204 | } 205 | print "; [Scala]Reg $j "; 206 | for($k = 0; $k < 12; $k++) { 207 | print "$scalaread_sorted[$i][$j][$k] " 208 | } 209 | print "\n"; 210 | } 211 | } 212 | 213 | } 214 | 215 | if($allflag == 1) { 216 | print "Worker1 passed!!!!!!!!\n"; 217 | } 218 | 219 | -------------------------------------------------------------------------------- /src/main/scala/cs/ucla/edu/bwaspark/fastq/FASTQRDDLoader.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | 19 | package cs.ucla.edu.bwaspark.fastq 20 | 21 | import org.apache.spark.SparkContext 22 | import org.apache.spark.SparkContext._ 23 | import org.apache.spark.rdd.RDD 24 | 25 | import scala.List 26 | 27 | import cs.ucla.edu.avro.fastq._ 28 | 29 | import org.apache.hadoop.mapreduce.Job 30 | import org.apache.parquet.hadoop.ParquetInputFormat 31 | import org.apache.parquet.avro.{AvroParquetInputFormat, AvroReadSupport} 32 | import org.apache.parquet.hadoop.util.ContextUtil 33 | import org.apache.parquet.filter.UnboundRecordFilter 34 | 35 | import org.apache.hadoop.fs.Path 36 | import org.apache.hadoop.fs.FileSystem 37 | import org.apache.hadoop.fs.FileStatus 38 | import org.apache.hadoop.conf.Configuration 39 | 40 | import java.util.logging.{Level, Logger} 41 | 42 | class FASTQRDDLoader(sc: SparkContext, rootFilePath: String, numDir: Int) { 43 | // Only prints non-null amino acids 44 | def FASTQPrinter(rec: FASTQRecord) = { 45 | println(rec.getSeqLength()) 46 | } 47 | 48 | // Not used at the current stage 49 | // Cannot find a way to access HDFS directly from Spark... 50 | def findFiles(fs: FileSystem, path: Path): Seq[Path] = { 51 | val statuses = fs.listStatus(path) 52 | val dirs = statuses.filter(s => s.isDirectory).map(s => s.getPath) 53 | dirs.toSeq ++ dirs.flatMap(p => findFiles(fs, p)) 54 | } 55 | 56 | /** 57 | * Load the FASTQ from HDFS into RDD 58 | * 59 | * @param path the input HDFS path 60 | */ 61 | def RDDLoad(path: String): RDD[FASTQRecord] = { 62 | val job = new Job(sc.hadoopConfiguration) 63 | ParquetInputFormat.setReadSupportClass(job, classOf[AvroReadSupport[FASTQRecord]]) 64 | val records = sc.newAPIHadoopFile(path, classOf[ParquetInputFormat[FASTQRecord]], classOf[Void], classOf[FASTQRecord], ContextUtil.getConfiguration(job)).map(p => p._2) 65 | records 66 | } 67 | 68 | /** 69 | * Load all directories from HDFS of the given FASTQ file 70 | * NOTE: Currently we cannot access the HDFS directory tree structure 71 | * We ask users to input the number of subdirectories manually... 72 | * This should be changed later. 73 | */ 74 | def RDDLoadAll(): RDD[FASTQRecord] = { 75 | //val fs = FileSystem.get(sc.hadoopConfiguration) 76 | //val paths = findFiles(fs, new Path(rootFilePath)) 77 | 78 | var i = 0 79 | var paths:List[String] = List() 80 | 81 | // numDir: the number of sub-directories in HDFS (given from user) 82 | // The reason is that currently we cannot directly fetch the directory information from HDFS 83 | while(i < numDir) { 84 | val path = rootFilePath + "/" + i.toString 85 | paths = path :: paths 86 | i += 1 87 | } 88 | 89 | val records = sc.union(paths.map(p => RDDLoad(p))) 90 | records 91 | } 92 | 93 | /** 94 | * Load the Pair-End FASTQ from HDFS into RDD 95 | * 96 | * @param path the input HDFS path 97 | */ 98 | def PairEndRDDLoad(path: String): RDD[PairEndFASTQRecord] = { 99 | val job = new Job(sc.hadoopConfiguration) 100 | ParquetInputFormat.setReadSupportClass(job, classOf[AvroReadSupport[PairEndFASTQRecord]]) 101 | val records = sc.newAPIHadoopFile(path, classOf[ParquetInputFormat[PairEndFASTQRecord]], classOf[Void], classOf[PairEndFASTQRecord], ContextUtil.getConfiguration(job)).map(p => p._2) 102 | records 103 | } 104 | 105 | /** 106 | * Load all directories from HDFS of the given Pair-End FASTQ file 107 | * NOTE: Currently we cannot access the HDFS directory tree structure 108 | * We ask users to input the number of subdirectories manually... 109 | * This should be changed later. 110 | */ 111 | def PairEndRDDLoadAll(): RDD[PairEndFASTQRecord] = { 112 | //val fs = FileSystem.get(sc.hadoopConfiguration) 113 | //val paths = findFiles(fs, new Path(rootFilePath)) 114 | 115 | var i = 0 116 | var paths:List[String] = List() 117 | 118 | // numDir: the number of sub-directories in HDFS (given from user) 119 | // The reason is that currently we cannot directly fetch the directory information from HDFS 120 | while(i < numDir) { 121 | val path = rootFilePath + "/" + i.toString 122 | paths = path :: paths 123 | i += 1 124 | } 125 | 126 | val records = sc.union(paths.map(p => PairEndRDDLoad(p))) 127 | records 128 | } 129 | 130 | /** 131 | * Load the Pair-End FASTQ from HDFS into RDD in a batched fashion 132 | * 133 | * @param nextFolderIdx the index of the next folder to be read 134 | * @param batchFolderNum the number of folders read in this batch 135 | */ 136 | def PairEndRDDLoadOneBatch(nextFolderIdx: Int, batchFolderNum: Int): RDD[PairEndFASTQRecord] = { 137 | var i = nextFolderIdx 138 | var endFolderIdx = nextFolderIdx + batchFolderNum 139 | var paths:List[String] = List() 140 | 141 | // numDir: the number of sub-directories in HDFS (given from user) 142 | // The reason is that currently we cannot directly fetch the directory information from HDFS 143 | while(i < endFolderIdx) { 144 | val path = rootFilePath + "/" + i.toString 145 | paths = path :: paths 146 | i += 1 147 | } 148 | 149 | val records = sc.union(paths.map(p => PairEndRDDLoad(p))) 150 | records 151 | } 152 | 153 | /** 154 | * Load the Single-End FASTQ from HDFS into RDD in a batched fashion 155 | * 156 | * @param nextFolderIdx the index of the next folder to be read 157 | * @param batchFolderNum the number of folders read in this batch 158 | */ 159 | def SingleEndRDDLoadOneBatch(nextFolderIdx: Int, batchFolderNum: Int): RDD[FASTQRecord] = { 160 | var i = nextFolderIdx 161 | var endFolderIdx = nextFolderIdx + batchFolderNum 162 | var paths:List[String] = List() 163 | 164 | // numDir: the number of sub-directories in HDFS (given from user) 165 | // The reason is that currently we cannot directly fetch the directory information from HDFS 166 | while(i < endFolderIdx) { 167 | val path = rootFilePath + "/" + i.toString 168 | paths = path :: paths 169 | i += 1 170 | } 171 | 172 | val records = sc.union(paths.map(p => RDDLoad(p))) 173 | records 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /src/main/java/accUCLA/accAPI/Connector2FPGA.java: -------------------------------------------------------------------------------- 1 | 2 | package accUCLA.api; 3 | 4 | import java.util.*; 5 | import java.io.IOException; 6 | import java.io.ObjectOutputStream; 7 | import java.io.OutputStream; 8 | import java.io.DataInputStream; 9 | import java.io.DataOutputStream; 10 | import java.io.ByteArrayOutputStream; 11 | import java.nio.ByteBuffer; 12 | import java.nio.ByteOrder; 13 | import java.net.Socket; 14 | import java.net.InetAddress; 15 | import java.net.ServerSocket; 16 | 17 | 18 | public class Connector2FPGA { 19 | private static final int bufferSize = 16*1024*1024; 20 | private Socket socket; 21 | private DataOutputStream o2; 22 | private DataInputStream in; 23 | private final String ip; 24 | private final int port; 25 | private Boolean is_connected; 26 | 27 | public Connector2FPGA(String ip, int port) 28 | { 29 | this.ip = ip; 30 | this.port = port; 31 | } 32 | public void buildConnection( int bigData ) throws IOException 33 | { 34 | InetAddress addr = InetAddress.getByName(ip); 35 | socket = new Socket(addr, port); 36 | if( bigData == 1 ) 37 | { 38 | socket.setReceiveBufferSize( bufferSize ); 39 | socket.setSendBufferSize( bufferSize ); 40 | } 41 | o2 = new DataOutputStream(socket.getOutputStream()); 42 | in = new DataInputStream(socket.getInputStream()); 43 | is_connected = true; 44 | } 45 | 46 | public void send( int i ) throws IOException 47 | { 48 | //System.out.println ("ACCAPI: send " + i); 49 | //o2.writeInt(big2LittleEndian.Int(i)); 50 | o2.write(ByteBuffer.allocate(4).order(ByteOrder.nativeOrder()).putInt(i).array(),0,4); 51 | o2.flush(); 52 | } 53 | public void send( String str ) throws IOException 54 | { 55 | o2.writeBytes(str); 56 | o2.flush(); 57 | } 58 | public void send_large_array( byte[] array ) throws IOException 59 | { 60 | int packet_size = 256*1024; 61 | for( int start = 0; start < array.length; start += packet_size ) 62 | { 63 | if( start + packet_size > array.length ) packet_size = array.length - start; 64 | o2.write( array, start, packet_size ); 65 | } 66 | } 67 | public void send_large_array( byte[] array, int length ) throws IOException 68 | { 69 | int packet_size = 256*1024; 70 | for( int start = 0; start < length; start += packet_size ) 71 | { 72 | if( start + packet_size > length ) packet_size = length - start; 73 | o2.write( array, start, packet_size ); 74 | } 75 | } 76 | public void send( float[] float_array ) throws IOException 77 | { 78 | //send_large_array(o2,big2LittleEndian.floatArray(float_array)); 79 | int len = float_array.length; 80 | ByteBuffer buf = ByteBuffer.allocate( 4 * len ).order(ByteOrder.nativeOrder()); 81 | for(int i = 0; i < len; i++) 82 | { 83 | buf.putFloat(float_array[i]); 84 | } 85 | buf.order(ByteOrder.nativeOrder()).position(0); 86 | if(buf.hasArray()) 87 | { 88 | send_large_array(buf.array()); 89 | } 90 | else 91 | { 92 | System.out.println("byte buffer not backed by byte array"); 93 | } 94 | o2.flush(); 95 | } 96 | public void send( float[][] float_array ) throws IOException 97 | { 98 | //send_large_array(big2LittleEndian.floatArray(float_array)); 99 | int len1 = float_array.length; 100 | int len2 = float_array[0].length; 101 | ByteBuffer buf = ByteBuffer.allocate( 4 * len1 * len2 ).order(ByteOrder.nativeOrder()); 102 | for(int i = 0; i < len1; i++) 103 | { 104 | for(int j = 0; j < len2; j++ ) 105 | buf.putFloat(float_array[i][j]); 106 | } 107 | //System.out.println(buf.toString()); 108 | if(buf.hasArray()) 109 | { 110 | //System.out.println(Arrays.toString(buf.array())); 111 | send_large_array(buf.array()); 112 | } 113 | else 114 | { 115 | System.out.println("byte buffer not backed by byte array"); 116 | } 117 | o2.flush(); 118 | } 119 | public void send( int[] int_array ) throws IOException 120 | { 121 | //o2.write(big2LittleEndian.IntArray(int_array)); 122 | int len = int_array.length; 123 | ByteBuffer buf = ByteBuffer.allocate( 4 * len ).order(ByteOrder.nativeOrder()); 124 | for(int i = 0; i < len; i++) 125 | { 126 | buf.putInt(int_array[i]); 127 | } 128 | buf.order(ByteOrder.nativeOrder()).position(0); 129 | if(buf.hasArray()) 130 | { 131 | send_large_array(buf.array()); 132 | } 133 | else 134 | { 135 | System.out.println("byte buffer not backed by byte array"); 136 | } 137 | o2.flush(); 138 | } 139 | public void send( ByteBuffer buf ) throws IOException { 140 | send_large_array(buf.array()); 141 | } 142 | public void send( ByteBuffer buf, int length ) throws IOException { 143 | send_large_array(buf.array(), length); 144 | } 145 | public int receive( ) throws IOException 146 | { 147 | //return big2LittleEndian.Int(in.readInt( ))+5120; 148 | return ByteBuffer.allocate(4).putInt(in.readInt( )).order(ByteOrder.nativeOrder()).getInt(0); 149 | } 150 | public int[] receive_int( int len ) throws IOException 151 | { 152 | byte[] byte_array = new byte[len*4]; 153 | in.readFully(byte_array); 154 | ByteBuffer buf2 = ByteBuffer.wrap(byte_array).order(ByteOrder.nativeOrder()); 155 | int[] result = new int[len]; 156 | for(int i = 0; i < len; i++) 157 | { 158 | result[i] = buf2.getInt(); 159 | } 160 | return result; 161 | } 162 | public ByteBuffer receive_short( int len ) throws IOException 163 | { 164 | byte[] byte_array = new byte[len*2]; 165 | in.readFully(byte_array); 166 | ByteBuffer buf2 = ByteBuffer.wrap(byte_array).order(ByteOrder.nativeOrder()); 167 | return buf2; 168 | //short[] result = new short[len]; 169 | //for(short i = 0; i < len; i++) 170 | //{ 171 | // result[i] = buf2.getShort(); 172 | //} 173 | //return result; 174 | } 175 | public float[] receive_float( int len ) throws IOException 176 | { 177 | byte[] byte_array = new byte[len*4]; 178 | in.readFully(byte_array); 179 | ByteBuffer buf2 = ByteBuffer.wrap(byte_array).order(ByteOrder.nativeOrder()); 180 | float[] result = new float[len]; 181 | for(int i = 0; i < len; i++) 182 | { 183 | result[i] = buf2.getFloat(); 184 | } 185 | return result; 186 | } 187 | public float[][] receive_float( int len1, int len2 ) throws IOException 188 | { 189 | float[][] result = new float[len1][len2]; 190 | float[] data = receive_float( len1 * len2 ); 191 | for( int i = 0; i < len1; i++ ) 192 | { 193 | System.arraycopy(data,i*len2,result[i],0,len2); 194 | } 195 | return result; 196 | } 197 | public void closeConnection( ) throws IOException 198 | { 199 | o2.close(); 200 | in.close(); 201 | socket.close(); 202 | } 203 | } 204 | --------------------------------------------------------------------------------