├── COPYRIGHT
├── LICENSE.md
├── README.md
├── RUBRIC
    ├── RK_RUutils_lite4.py
    ├── __main__.py
    ├── nanonet
    │   ├── LICENSE.md
    │   ├── __init__.py
    │   ├── caller_2d
    │   │   ├── align_kmers.py
    │   │   ├── caller_2d.py
    │   │   ├── common
    │   │   │   ├── bp_tools.h
    │   │   │   ├── data_view.h
    │   │   │   ├── stub_py.cpp
    │   │   │   ├── test
    │   │   │   │   └── test_stub.py
    │   │   │   ├── utils.h
    │   │   │   └── view_numpy_arrays.h
    │   │   ├── pair_align
    │   │   │   ├── mm_align.cpp
    │   │   │   ├── mm_align.h
    │   │   │   ├── nw_align.cpp
    │   │   │   ├── nw_align.h
    │   │   │   ├── pair_align.h
    │   │   │   ├── pair_align_py.cpp
    │   │   │   └── pair_align_py.h
    │   │   ├── viterbi_2d
    │   │   │   ├── viterbi_2d.cpp
    │   │   │   ├── viterbi_2d.h
    │   │   │   ├── viterbi_2d_py.cpp
    │   │   │   └── viterbi_2d_py.h
    │   │   └── viterbi_2d_ocl
    │   │   │   ├── proxyCL.cpp
    │   │   │   ├── proxyCL.h
    │   │   │   ├── viterbi_2d.cl
    │   │   │   ├── viterbi_2d_ocl.cpp
    │   │   │   ├── viterbi_2d_ocl.h
    │   │   │   ├── viterbi_2d_ocl_py.cpp
    │   │   │   └── viterbi_2d_ocl_py.h
    │   ├── cmdargs.py
    │   ├── currennt_to_pickle.py
    │   ├── data
    │   │   ├── default_complement.npy
    │   │   ├── default_model.tmpl
    │   │   ├── default_template.npy
    │   │   ├── r9.4_template.npy
    │   │   ├── r9_complement.npy
    │   │   ├── r9_template.npy
    │   │   └── rtc_mismatch_scores.txt
    │   ├── decoding.cpp
    │   ├── decoding.py
    │   ├── eventdetection
    │   │   ├── filters.c
    │   │   ├── filters.h
    │   │   └── filters.py
    │   ├── fast5.py
    │   ├── features.py
    │   ├── include
    │   │   ├── CL
    │   │   │   └── cl.hpp
    │   │   ├── extras
    │   │   │   ├── CL
    │   │   │   │   └── cl.hpp
    │   │   │   └── stdint.h
    │   │   └── module.h
    │   ├── jobqueue.py
    │   ├── latency_test
    │   │   ├── latency_test.py
    │   │   └── run_gnuplot_on_csv_files.py
    │   ├── nanonetcall.py
    │   ├── nanonetcall_2d.py
    │   ├── nanonettrain.py
    │   ├── nn.py
    │   ├── resolve.py
    │   ├── sample_data
    │   │   ├── 904896_ch170_read104_strand.fast5
    │   │   ├── 904896_ch170_read105_strand.fast5
    │   │   ├── 904896_ch170_read108_strand.fast5
    │   │   ├── 904896_ch170_read111_strand.fast5
    │   │   ├── 904896_ch170_read114_strand.fast5
    │   │   └── sample_out.fa
    │   ├── segment.py
    │   ├── test
    │   │   └── test_nn.py
    │   ├── util.py
    │   └── watcher.py
    └── setup_nanonet.py
└── setup.py


/COPYRIGHT:
--------------------------------------------------------------------------------
1 | Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights in this software.
2 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | BSD-3-Clause
 3 | 
 4 | Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights in this software.
 5 | 
 6 | 
 7 | 
 8 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 9 | 
10 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
13 | 
14 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
17 | 
18 | The RUBRIC application may only be used with devices developed by Oxford Nanopore Technologies Ltd. or Metrichor Ltd. and may require access to real time data generated from such devices that may only be available via the Software [(link)](https://github.com/ragak/RUBRIC) and may be subject to your agreement to additional terms and conditions. The RUBRIC application was not developed by Oxford Nanopore Technologies Ltd. or Metrichor Ltd. The RUBRIC application is licensed solely for noncommercial research use only. Such license does not include a right to redistribute or create derivative works (except by persons with an active Developer License Agreement [(found here)](https://community.nanoporetech.com/info_sheets/developer-channel/v/dpi_s1005_v1_revh_06apr2016/how-to-join-the-developer) with Oxford Nanopore. The RUBRIC application is provided “AS IS” and “Where Available.” Developer, Oxford Nanopore Technologies Ltd. and Metrichor Ltd. disclaim all warranties expressed or implied. Oxford reserves all rights in its Intellectual Property Rights not expressly granted herein and no implied licenses may be created by acts or omissions.  TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT (INCLUDING IN NEGLIGENCE) OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE RUBRIC APPLICATION OR THE USE OR OTHER DEALINGS IN THE RUBRIC APPLICATION. This notice shall be included in all copies or substantial portions of the RUBRIC application.
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ***************************************************************************
  2 | (c) 2018: National Technology & Engineering Solutions of Sandia, LLC (NTESS)
  3 | ***************************************************************************
  4 | 
  5 | RUBRIC
  6 | ======
  7 | Read Until with Basecall and Reference-Informed Criteria
  8 | 
  9 | These scripts allow for real-time filtering of nanopore sequencing reads based upon analysis of the incoming basepairs as in [DOI](https://www.biorxiv.org/content/early/2018/11/02/460014).  RUBRIC was conceived and tested using Nanonet for basecalling, LAST for read-alignment, and a Windows 10 PC operating in safe mode (with networking).
 10 | 
 11 | NOTE:   
 12 | ---
 13 | A version of nanonet has been included in this distribution which is no longer offered or supported by Oxford Nanopore Technologies (ONT).  It has been included to maintain the functionality of this package. 
 14 | 
 15 | Also note that it is advisable to use this package first on a used flow cell to gauge baseline functionality. 
 16 | 
 17 | Installation
 18 | ------------
 19 | **Requirements**
 20 | 
 21 | As the Nanonet basecaller is no longer supported or offered by Oxford Nanopore Technologies (ONT), a version modified to support RUBRIC has been included in this repository. 
 22 | RUBRIC also relies upon the [LAST](http://last.cbrc.jp/) aligner.  Therefore LAST must also be installed and added to PATH. Nanonet will need to be compiled using the [Visual C++ Compiler for Python 2.7](https://www.microsoft.com/en-us/download/details.aspx?id=44266).  RUBRIC relies upon the Read-Until API, which can be obtained directly from ONT.  Most of the results obtained in [DOI](https://www.biorxiv.org/content/early/2018/11/02/460014) were obtained using the RU API that was released alongside MinKNOW version 1.6.11.  *RUBRIC has not been tested on newer versions of the RU API, but may work with some small adjustments.* 
 23 | 
 24 | **Install**
 25 | 
 26 | Once LAST and the C++ compiler have been installed the RUBRIC scripts can be installed.  It is highly recommended to install the scripts in a virtual environment such as conda:
 27 | ``` 
 28 | conda create -n RUBRIC_env python=2.7
 29 | 
 30 | activate RUBRIC_env
 31 | 
 32 | cd \path\to\cloned\repository
 33 | 
 34 | python setup.py install   
 35 | ```
 36 | 
 37 | This setup file first installs the RUBRIC components, and then calls the 'setup_nanonet.py' file (taken and renamed from the original Nanonet repository).   
 38 | 
 39 | RUBRIC relies on an older version of the read_until API, which is included in this repository and is used via a relative import during runtime.  It is recommended that users with the newer read_until API first uninstall the new version before installing RUBRIC. 
 40 | 
 41 | 
 42 | Quick Start
 43 | -----------
 44 | Once installed, the rubric commandline help can be called via RUBRIC can be called simply with:
 45 | 
 46 | ```
 47 | python RUBRIC -h
 48 | ```
 49 | 
 50 | Which should then show:
 51 | ```
 52 | usage: RUBRIC [-h] -r REFERENCE_DATABASE -ho HOST [-a ALIGN]
 53 |               [-as ALIGNER_SETTINGS] [-t TIME] [-q QUEUE_SIZE] [-s]
 54 |               [-l LOWER_THRESHOLD] [-u UPPER_THRESHOLD] [-i IGNORE_EVENTS]
 55 |               [-c CONSIDER_EVENTS]
 56 | 
 57 | Read Until with Basecall and Reference-Informed Criteria (RUBRIC)
 58 | 
 59 | optional arguments:
 60 |   -h, --help            show this help message and exit
 61 |   -r REFERENCE_DATABASE, --reference_database REFERENCE_DATABASE
 62 |                         path to database if LAST or fasta file if graphmap
 63 |   -ho HOST, --host HOST
 64 |                         The host address for the laptop running the MinION
 65 |   -a ALIGN, --aligner ALIGN
 66 |                         Type of aligner - either "graphmap" or "last" (default
 67 |                         last)
 68 |   -as ALIGNER_SETTINGS, --aligner_settings ALIGNER_SETTINGS
 69 |                         A string containing the settings to pass to the
 70 |                         aligner (default: '-fTAB -C2 -q 1 -r 1 -a 1 -b 1 -e
 71 |                         30'
 72 |   -t TIME, --time TIME  This is an error catch for when we cannot keep up with
 73 |                         the rate of sequencing on the device. It takes a
 74 |                         finite amount of time to process through the all the
 75 |                         channels from the sequencer. If we cannot process
 76 |                         through the array quickly enough then we will 'fall
 77 |                         behind' and lose the ability to filter sequences.
 78 |                         Rather than do that we set a threshold after which we
 79 |                         allow the sequencing to complete naturally.
 80 |   -q QUEUE_SIZE, --queue QUEUE_SIZE
 81 |                         The length of the queue for storing reads until
 82 |                         compute resources are available. (default 16)
 83 |   -s, --skip_even       If set, only apply filtering to even pores
 84 |   -l LOWER_THRESHOLD, --lower_threshold LOWER_THRESHOLD
 85 |                         The lower standard deviation threshold to filter reads
 86 |                         before basecalling (default 5)
 87 |   -u UPPER_THRESHOLD, --upper_threshold UPPER_THRESHOLD
 88 |                         The upper standard deviation threshold to filter reads
 89 |                         before basecalling (default 14)
 90 |   -i IGNORE_EVENTS, --ignore_events IGNORE_EVENTS
 91 |                         The number of events to ignore at the beginning of the
 92 |                         read (default 100)
 93 |   -c CONSIDER_EVENTS, --consider_events CONSIDER_EVENTS
 94 |                         The number of events to after the ignored events to be
 95 |                         used for RUBRIC consideration (default 300)
 96 | ```
 97 | 
 98 | **Required Arguments**
 99 | 
100 | Only the path to the reference database and the host address are required arguments.  All other arguments default to empirically-determined optimal conditions observed in [DOI].  After ensuring that the event sampler has started with MinKNOW, one can simply use:
101 | 
102 | ```
103 | python RUBRIC --reference_database \path\to\LAST\database --host ws://localhost:9200/  
104 | ```
105 | 
106 | Note the above command assumes the event sampler is running locally on port 9200.  It is highly desireable to have one computer running MinKNOW (and the event sampler) and one computer that connects remotely and runs RUBRIC.  
107 | 
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/RUBRIC/RK_RUutils_lite4.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import csv
  3 | import os
  4 | import sys
  5 | 
  6 | from Bio import SeqIO
  7 | 
  8 | 
  9 | def file_len(file_name):
 10 |     """
 11 |     calculates length of file
 12 |     """
 13 |     with open(file_name) as f:
 14 |         # print 'file is' + str(fname)
 15 |         for i, l in enumerate(f):
 16 |             pass
 17 |     return i + 1
 18 | 
 19 | 
 20 | def align_reads(aligner, db, in_seq, in_seq_id='noIDgiven', offline=False, number_of_bases=0, verbose=False):
 21 |     """
 22 |     align the reads using graphmap or last
 23 |     if working offline, remember to add 'offline=True' otherwise the script will error out
 24 |     also if using last and there is already a database, toggle isdb to true
 25 |     """
 26 |     current_path = os.getcwd()
 27 |     if offline:
 28 |         # print 'offline'
 29 |         if number_of_bases == 0:
 30 |             sys.exit('you are in offline mode but have not specified number of bases - please fix')
 31 |         else:
 32 |             counter = 0
 33 |             for record in SeqIO.parse(in_seq, "fasta"):
 34 |                 counter += 1
 35 |                 # print 'count is ' + str(counter) ##THIS IS FOR DIAGNOSTICS
 36 |                 new_seq = record.seq
 37 |                 new_seq2 = str(new_seq[0:int(number_of_bases) + 1])
 38 |                 if aligner == 'graphmap':
 39 |                     graph_map(new_seq2, record.id, current_path, db, verbose)
 40 |                 elif aligner == 'last':
 41 |                     # print 'yes database'
 42 |                     result = last(new_seq2, record.id, current_path, db, verbose)
 43 |                     return result
 44 |                 else:
 45 |                     sys.exit('invalid aligner specified in test mode - use either graphmap or last')
 46 |     else:
 47 |         #        print 'realtime'
 48 |         if in_seq_id == 'noIDgiven':
 49 |             sys.exit('no read ID given - if not running in offline mode, please pass read ID as third arguemnt')
 50 |         else:
 51 |             if aligner == 'graphmap':
 52 |                 graph_map(in_seq, in_seq_id, current_path, db, verbose=False)
 53 |             elif aligner == 'last':
 54 |                 result = last(in_seq, in_seq_id, current_path, db, verbose=False)
 55 |                 # print 'made it here '
 56 |                 # print result
 57 |                 return result
 58 |             #                else:
 59 |             #                    if dbName=='noNamegiven':
 60 |             #                        #print 'no database name given'
 61 |             #                        sys.exit('please specify database name for last')
 62 |             #                    else:
 63 |             #                        #print 'databse name given'
 64 |             #                        Lastdb(db,dbName)
 65 |             #                        #print 'databse is ' + str(dbName)
 66 |             #                        result=Last(in_seq,in_seq_id,current_path ,dbName,verbose=False)
 67 |             #                        #print 'made it here '
 68 |             #                        #print result
 69 |             #                        return result
 70 |             else:
 71 |                 sys.exit('invalid aligner specified in realtime mode - use either graphmap or last')
 72 | 
 73 | 
 74 | def graph_map(sequence, id, current_path, db, verbose):
 75 |     """
 76 |     given a sequence and ID, map using graphmap
 77 |     """
 78 |     new_fasta = ">" + str(id) + "\n" + str(sequence)
 79 |     tempfile = open(str(id) + ".fa", "w")
 80 |     tempfile.write(new_fasta)
 81 |     tempfile.close()
 82 |     in_fasta = str(current_path) + "/" + str(id) + ".fa"
 83 |     name = 'tmpOutGrM' + str(id)
 84 |     cmdstring = "graphmap align -r %s -d %s -o %s -v 0 -a anchor -z 0.5" % (db, in_fasta, name)
 85 |     os.system(cmdstring)
 86 |     graph_out = str(current_path) + "/" + str(name)
 87 |     f = open(graph_out, 'r')
 88 |     sam_file = f.read()
 89 |     samlist = sam_file.split('\n')
 90 |     for a in samlist:
 91 |         if a[0] != "@":
 92 |             flagline = a
 93 |             break
 94 |     flag = flagline.split('\t')[1]
 95 |     # print 'flag is '+str(flag) ##THIS IS FOR DIAGNOSTICS
 96 |     sam_check(name, flag, verbose)
 97 |     os.remove(name)
 98 |     os.remove(in_fasta)
 99 | 
100 | 
101 | def last_db(input_fasta, db_name):
102 |     """
103 |     input fasta, output database for last alignment
104 |     in db_name, include path otherwise will be put into current folder.
105 |     for inFasta include path if not in current folder
106 |     """
107 |     cmdstring = "lastdb -cR01 %s %s" % (db_name, input_fasta)
108 |     # print cmdstring
109 |     os.system(cmdstring)
110 | 
111 | 
112 | def last(sequence, id, current_path, db, verbose):
113 |     """
114 |     given a fasta and database, output an alignment file
115 |     """
116 |     newFasta = ">" + str(id) + "\n" + str(sequence)
117 |     tempfile = open(str(id) + ".fa", "w")
118 |     tempfile.write(newFasta)
119 |     tempfile.close()
120 |     inFasta = str(current_path) + "\\" + str(id) + ".fa"
121 |     #    dbName=str(currpath)+"/"+str(db)
122 |     name = 'tmpOutGrM' + str(id)
123 |     cmdstring = "lastal -fTAB -C2 %s %s > %s" % (db, inFasta, name)
124 |     os.system(cmdstring)
125 |     last_out = str(current_path) + "\\" + str(name)
126 |     #    print 'last_out is:'
127 | 
128 |     if file_len(last_out) == 20:
129 |         os.remove(last_out)
130 |         os.remove(inFasta)
131 |         return 'Skip'
132 |     else:
133 |         os.remove(last_out)
134 |         os.remove(inFasta)
135 |         return 'Sequence'
136 | 
137 | 
138 | def last_batch(file, currpath, db, cmdstring):
139 |     """
140 |     takes a batch input in fasta format and outputs a dictionary of calls. also provide current path and databse
141 |     """
142 |     inFasta = str(currpath) + "\\" + str(file)
143 |     updatedDict = {}
144 |     with open(inFasta) as a:
145 |         reader = csv.reader(a, delimiter="\n")
146 |         c = list(reader)
147 |         # print 'we are in lastbatch, len of list is ',len(c)
148 |         for a in range(0, len(c) - 1):
149 |             if a % 2 == 0:
150 |                 # print 'here is a ',c[a]
151 |                 # default all channels to skip
152 |                 updatedDict[c[a][0].split("_")[0].replace('>', '')] = [c[a][0].split("_")[1], "Skip"]
153 |                 # name=c[a][0]
154 |                 # channel=name.split("_")[0]
155 |                 # read=name.split("_")[1]
156 |     name = 'tmpOutGrM' + str(file)
157 |     #    cmdstring="lastal -fTAB -C2 -q 1 -r 1 -a 1 -b 1 -D 100 -e 15 %s %s > %s" % (db,inFasta,name)
158 |     cmdstring = 'lastal' + cmdstring + '%s %s > %s' % (db, inFasta, name)
159 |     os.system(cmdstring)
160 |     last_out = str(currpath) + "\\" + str(name)
161 |     with open(last_out) as f:
162 |         reader = csv.reader(f, delimiter="\t")
163 |         d = list(reader)
164 |         for i in range(19, len(d) - 1):
165 |             name = d[i][6]
166 |             read = name.split("_")[1]
167 |             channel = name.split("_")[0].replace('>', '')
168 |             # this if statement is a sanity check - can be removed to save time
169 |             if channel in updatedDict.keys() and updatedDict[channel][0] == read:
170 |                 updatedDict[channel][1] = "Sequence"
171 |             else:
172 |                 sys.exit('something is very wrong')
173 |     return updatedDict
174 | 
175 | 
176 | def sam_check(name, flag, verbose):
177 |     if flag == '4':
178 |         print 'Skip'
179 |         if verbose:
180 |             print name
181 |     elif flag == '0' or flag == '16':
182 |         print 'Sequence'
183 |         if verbose:
184 |             print name
185 |     else:
186 |         if verbose:
187 |             print name
188 |         print "What is this??? don't be lazy Raga go check it NOW!!!"
189 | 
190 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import re
 4 | import subprocess
 5 | 
 6 | __version__ = '2.0.0'
 7 | __version_info__ = tuple([int(num) for num in __version__.split('.')])
 8 | 
 9 | try:
10 |     import pyopencl as cl
11 | except ImportError:
12 |     cl = None
13 | 
14 | try:
15 |     __currennt_exe__ = os.path.abspath(os.environ['CURRENNT'])
16 | except KeyError:
17 |     __currennt_exe__ = 'currennt'
18 | 
19 | def check_currennt():
20 |     # Check we can run currennt
21 |     try:
22 |         with open(os.devnull, 'w') as devnull:
23 |             subprocess.call([__currennt_exe__, '-h'], stdout=devnull, stderr=devnull)
24 |     except OSError:
25 |         raise OSError("Cannot execute currennt, it must be in your path as 'currennt' or set via the environment variable 'CURRENNT'.")
26 | 
27 | 
28 | def run_currennt(currennt_cfg, device=0):
29 |     sys.stdout.flush()
30 |     os.environ["CURRENNT_CUDA_DEVICE"]="{}".format(device)
31 |     cmd = [__currennt_exe__, currennt_cfg]
32 |     with open(os.devnull, 'wb') as devnull:
33 |         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=devnull)
34 |         stdout, _ = p.communicate()
35 |         p.wait()
36 |         if p.returncode != 0:
37 |             # On windows currennt fails to remove the cache file. Check for
38 |             #   this and move on, else raise an error.
39 |             e = subprocess.CalledProcessError(2, ' '.join(cmd))
40 |             if os.name != 'nt':
41 |                 sys.stderr.write(stdout)
42 |                 raise e
43 |             else:
44 |                 cache_file = re.match(
45 |                     '(FAILED: boost::filesystem::remove.*: )"(.*)"',
46 |                     stdout.splitlines()[-1])
47 |                 if cache_file is not None:
48 |                     cache_file = cache_file.group(2)
49 |                     sys.stderr.write('currennt failed to clear its cache, cleaning up {}\n'.format(cache_file))
50 |                     os.unlink(cache_file)
51 |                 else:
52 |                     sys.stderr.write(stdout)
53 |                     raise e
54 | 
55 | def run_currennt_noisy(currennt_cfg, device=0):
56 |     sys.stdout.flush()
57 |     os.environ["CURRENNT_CUDA_DEVICE"]="{}".format(device)
58 |     cmd = [__currennt_exe__, currennt_cfg]
59 |     subprocess.check_call(cmd)
60 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/align_kmers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from math import log
  3 | import pkg_resources
  4 | 
  5 | from RUBRIC.nanonet import all_kmers, kmers_to_annotated_sequence
  6 | from RUBRIC.nanonet import Aligner
  7 | 
  8 | 
  9 | def _load_substitution_matrix(fname):
 10 |     """ Loads an unwrapped representation of a substitution matrix.
 11 |     
 12 |     :param fname: Filename of substitution matrix file.
 13 |     
 14 |     :returns: Representation of matrix in log-space times 100. Values are
 15 |         32 bit integers, which should all be negative. State ordering is as
 16 |         given by the compute_kmer_mapping() function. Probabilities smaller
 17 |         than 6e-6 are given the value of -1200.
 18 |     """
 19 |     subdata = np.genfromtxt(fname, names=True, dtype=None)
 20 |     pos_to_kmer, kmer_to_pos = all_kmers(length=3, rev_map=True)
 21 |     matrix = np.empty((64, 64), dtype=np.int32)
 22 |     for row in subdata:
 23 |         i = kmer_to_pos[row['kmer1']]
 24 |         j = kmer_to_pos[row['kmer2']]
 25 |         if row['prob'] > 6e-6:
 26 |             val = int(log(row['prob']) * 100)
 27 |         else:
 28 |             val = -1200
 29 |         matrix[i, j] = val
 30 |     return matrix
 31 | 
 32 | matrix_file = pkg_resources.resource_filename('nanonet', 'data/rtc_mismatch_scores.txt')
 33 | sub_matrix = _load_substitution_matrix(matrix_file)
 34 | 
 35 | open_gap = 500
 36 | extend_gap = 500
 37 | outside_gap = min(open_gap - 200, extend_gap)
 38 | gap_pens = {
 39 |     'open0': open_gap,
 40 |     'open1': open_gap,
 41 |     'start0': outside_gap,
 42 |     'start1': outside_gap,
 43 |     'end0': outside_gap,
 44 |     'end1': outside_gap,
 45 |     'extend0': extend_gap,
 46 |     'extend1': extend_gap
 47 | }
 48 | 
 49 | 
 50 | def align_3mer_sequences(sequence0, sequence1, substitution_matrix=sub_matrix, gap_penalties=gap_pens, reverse=True, lowmem=True):
 51 |     """Align two sequences in base-space using 3mers.
 52 | 
 53 |     :param sequence0: String representing a sequence of bases.
 54 |     :param sequence1: String representing a sequence of bases.
 55 |     :param substitution_matrix: 64x64 matrix of substitution scores to use for alignment. Should be
 56 |         a 2D numpy array of type int32.
 57 |     :param gap_penalties: Dictionary of gap penalties. See below.
 58 |     :param reverse: Bool indicating whether the second sequence should be reversed.
 59 |     :param lowmem: Bool indicating whether to use the (slower) low memory implementation.
 60 | 
 61 |     :returns: A tuple of:
 62 | 
 63 |         * Numpy record array with fields 'pos0' and 'pos1', representing the alignment.
 64 |         * Tuple of a scalar value indicating the alignment score and the average
 65 |           continuous alignment length.
 66 |     :rtype: tuple
 67 | 
 68 |     The gap penalty dictionary should be laid out as follows:
 69 |         {start0: penalty for aligning sequence 0 before the start of sequence 1,
 70 |          end0: penatly for aligning sequence 0 after the end of sequence 1,
 71 |          open0: penalty for aligning sequence 0 to a new gap in sequence 1,
 72 |          extend0: penalty for extending a gap within sequence 1,
 73 |          start1: penalty for aligning sequence 1 before the start of sequence 2,
 74 |          end1: penatly for aligning sequence 1 after the end of sequence 2,
 75 |          open1: penalty for aligning sequence 1 to a new gap in sequence 2,
 76 |          extend1: penalty for extending a gap within sequence 2,
 77 |         }
 78 |         The only required field is open0. Gap extension values will default to being the same
 79 |         as opening a gap. Start and end gap penalties default to being the same as the extension
 80 |         penalty. And the second set of penalties will default to the values for the first.
 81 |     .. note::
 82 |        Resulting alignment is in terms of 3mers. So '0' represents bases 0-2, and the
 83 |        largest  value in the alignment will be len(sequence) - 3. Since the alignment is
 84 |        done in terms of 3mers, if the sequence was generated from 5mers then the first and
 85 |        last base should be discarded before calling this function.
 86 | 
 87 |     """
 88 |     submat = [[int(val) for val in line] for line in substitution_matrix]
 89 |     pos_to_kmer, kmer_to_pos = all_kmers(length=3, rev_map=True)
 90 |     seq0 = [kmer_to_pos[sequence0[i:i+3]] for i in xrange(len(sequence0) - 2)]
 91 |     seq1 = [kmer_to_pos[sequence1[i:i+3]] for i in xrange(len(sequence1) - 2)]
 92 |     if reverse:
 93 |         seq1[:] = seq1[::-1]
 94 |     gaps = _gap_penalties_dict_to_list(gap_penalties)
 95 |     aligner = Aligner(submat, gaps, lowmem)
 96 |     alignment, score = aligner.align(seq0, seq1)
 97 |     if reverse:
 98 |         for pos in xrange(len(alignment)):
 99 |             if alignment[pos][1] != -1:
100 |                 alignment[pos] = (alignment[pos][0], len(seq1) - alignment[pos][1] - 1)
101 | 
102 |     # We'll return the average continuously-aligned length as well.
103 |     alignment_lengths = []
104 |     current_alignment_length = 0
105 |     for pos in alignment:
106 |         if pos[0] == -1 or pos[1] == -1:  # I.e. a stay or skip
107 |             if current_alignment_length > 0:
108 |                 alignment_lengths.append(current_alignment_length)
109 |                 current_alignment_length = 0
110 |         else:
111 |             current_alignment_length += 1
112 |     if len(alignment_lengths) > 0:
113 |         average_continuous_length = np.average(alignment_lengths)
114 |     else:
115 |         average_continuous_length = current_alignment_length
116 |     npalignment = np.empty(len(alignment), dtype=[('pos0', int), ('pos1', int)])
117 |     npalignment[:] = alignment
118 |     return npalignment, (score, average_continuous_length)
119 | 
120 | 
121 | def _gap_penalties_dict_to_list(gap_penalties):
122 |     """ Convert dictionary of gap penalties into an array
123 | 
124 |     :param gap_penalties: Dictionary of gap penalties
125 | 
126 |     :returns: List of gap penalties in order which align_1mer_sequences and align_3mer_sequences can use
127 |     """
128 |     gaps = [0] * 8
129 |     gaps[2] = gap_penalties['open0']
130 |     gaps[3] = gap_penalties.get('extend0', gaps[2])
131 |     gaps[0] = gap_penalties.get('start0', gaps[3])
132 |     gaps[1] = gap_penalties.get('end0', gaps[3])
133 |     gaps[6] = gap_penalties.get('open1', gaps[2])
134 |     gaps[7] = gap_penalties.get('extend1', gaps[6])
135 |     gaps[4] = gap_penalties.get('start1', gaps[7])
136 |     gaps[5] = gap_penalties.get('end1', gaps[7])
137 |     return gaps
138 | 
139 | 
140 | def align_basecalls(kmers0, kmers1, substitution_matrix=sub_matrix, gap_penalties=gap_pens, lowmem=True):
141 |     """ Align template to complement basecalls, using the align_3mer_sequences function.
142 | 
143 |     :param kmers0: Template basecalled kmers.
144 |     :param kmers1: Complement basecalled kmers.
145 |     :param substitution_matrix: 64x64 matrix of substitution scores to use for alignment. Should be
146 |         a 2D numpy array of type int32.
147 |     :param gap_penalties: Dictionary of gap penalties. See below.
148 |     :param lowmem: Bool indicating whether to use the (slower) low memory implementation.
149 | 
150 |     :returns: A tuple of:
151 | 
152 |         * Numpy array with fields 'pos0' and 'pos1'
153 |         * Scalar value indicating the alignment score and the average continuous
154 |           alignment length.
155 |     :rtype: tuple
156 | 
157 |     Returns a "filled-in" alignment. So there will be no -1 values (gaps). Instead,
158 |     values can be repeated in either of the sequences.
159 | 
160 |     The returned alignment is trimmed, meaning that it will start and end with events
161 |     that are aligned to each other. Therefore events at the beginning and end of
162 |     either sequence  may have been removed.
163 | 
164 |     .. warning:
165 |        It is possible for the alignment to fail, if too few points end up directly aligned
166 |        to each other. In this case the function will return the tuple (None, None).
167 |     """
168 |     sequence0, index0 = kmers_to_annotated_sequence(kmers0)
169 |     sequence1, index1 = kmers_to_annotated_sequence(kmers1)
170 |     kmer_len = len(kmers0[0])
171 |     trim = kmer_len - 3
172 |     trim_left = int((trim + 1) / 2)
173 |     trim_right = int(trim / 2)
174 |     sequence0 = sequence0[trim_left:(len(sequence0) - trim_right)]
175 |     sequence1 = sequence1[trim_left:(len(sequence1) - trim_right)]
176 |     alignment, score = align_3mer_sequences(sequence0, sequence1, substitution_matrix, gap_penalties, reverse=True)
177 |     # Find positions in the alignment that don't have gaps.
178 |     hits = []
179 |     for i, j in alignment:
180 |         if i != -1 and j != -1:
181 |             p0 = index0[i]
182 |             p1 = index1[j]
183 |             if p0 != -1 and p1 != -1:
184 |                 hits.append((p0, p1))
185 |     if len(hits) < 2:
186 |         # Not enough aligned positions to do anything sensible.
187 |         return None, None
188 |     # Build up a filled-in alignment by interpolating between aligned positions.
189 |     new_alignment = [hits[0]]
190 |     for i in xrange(1, len(hits)):
191 |         delta0 = hits[i][0] - hits[i-1][0]
192 |         delta1 = hits[i-1][1] - hits[i][1]
193 |         if delta0 > 1 and delta1 > 1:
194 |             # Both sequences jump by more than 1 between aligned points.
195 |             # One sequence should increment by one each position. The other will vary.
196 |             n = max(delta0, delta1) - 1
197 |             p0 = hits[i-1][0]
198 |             p1 = hits[i-1][1]
199 |             step0 = float(delta0 - 1) / float(n)
200 |             step1 = float(delta1 - 1) / float(n)
201 |             for k in range(n):
202 |                 p0 += step0
203 |                 p1 -= step1
204 |                 new_alignment.append((int(round(p0)), int(round(p1))))
205 |         elif delta0 > 1:
206 |             # Need to insert repetions into sequence 2.
207 |             for j in range(hits[i-1][0] + 1, hits[i][0]):
208 |                 new_alignment.append((j, hits[i-1][1]))
209 |         elif delta1 > 1:
210 |             # Need to insert repetions into sequence 1.
211 |             for j in range(hits[i-1][1] - 1, hits[i][1], -1):
212 |                 new_alignment.append((hits[i-1][0], j))
213 |         else:
214 |             new_alignment.append(hits[i])
215 |     alignment = np.empty(len(new_alignment), dtype=[('pos0', int), ('pos1', int)])
216 |     for n, p in enumerate(new_alignment):
217 |         alignment[n] = p
218 |     return alignment, score
219 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/common/bp_tools.h:
--------------------------------------------------------------------------------
 1 | #ifndef BP_TOOLS_H
 2 | #define BP_TOOLS_H
 3 | 
 4 | #include <map>
 5 | #include <vector>
 6 | #include <boost/python.hpp>
 7 | #include <boost/numeric/ublas/matrix.hpp>
 8 | 
 9 | namespace ublas = boost::numeric::ublas;
10 | namespace bp = boost::python;
11 | 
12 | 
13 | /// Construct a std::vector from a python list. Elements must match template type.
14 | template <class T>
15 | std::vector<T> list_to_vector(const bp::list& in) {
16 |   int count = bp::len(in);
17 |   std::vector<T> out(count);
18 |   for (int i = 0; i < count; ++i) {
19 |     out[i] = bp::extract<T>(in[i]);
20 |   }
21 |   return out;
22 | }
23 | 
24 | 
25 | /// Construct a std::vector of std::pair objects from a python list of tuples.
26 | template <class T>
27 | std::vector<std::pair<T, T> > list_to_pair_vector(const bp::list& in) {
28 |   int count = bp::len(in);
29 |   std::vector<std::pair<T, T> > out(count);
30 |   for (int i = 0; i < count; ++i) {
31 |     bp::tuple p = bp::extract<bp::tuple>(in[i]);
32 |     T first = bp::extract<T>(p[0]);
33 |     T second = bp::extract<T>(p[1]);
34 |     out[i] = std::make_pair(first, second);
35 |   }
36 |   return out;
37 | }
38 | 
39 | 
40 | /// Construct an ublas::matrix from a python list of lists. Elements must match template type.
41 | template <class T>
42 | ublas::matrix<T> list_to_matrix(const bp::list& in) {
43 |   int nrows = bp::len(in);
44 |   int ncols = bp::len(bp::extract<bp::list>(in[0]));
45 |   ublas::matrix<T> out(nrows, ncols);
46 |   for (int i = 0; i < nrows; ++i) {
47 |     bp::list row = bp::extract<bp::list>(in[i]);
48 |     if (bp::len(row) != ncols) {
49 |       throw std::runtime_error("Error: Not all columns are the same length.");
50 |     }
51 |     for (int j = 0; j < ncols; ++j) {
52 |       out(i, j) = bp::extract<T>(row[j]);
53 |     }
54 |   }
55 |   return out;
56 | }
57 | 
58 | 
59 | /// Construct a std::map from a python dictionary.
60 | template <class KEY, class VAL>
61 | std::map<KEY, VAL> dict_to_map(const bp::dict& in) {
62 |   bp::list items = in.items();
63 |   int count = bp::len(items);
64 |   std::map<KEY, VAL> out;
65 |   for (int i = 0; i < count; ++i) {
66 |     bp::tuple pair = bp::extract<bp::tuple>(items[i]);
67 |     KEY key = bp::extract<KEY>(pair[0]);
68 |     VAL val = bp::extract<VAL>(pair[1]);
69 |     out[key] = val;
70 |   }
71 |   return out;
72 | }
73 | 
74 | 
75 | #endif /* BP_TOOLS_H */
76 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/common/data_view.h:
--------------------------------------------------------------------------------
  1 | #ifndef DATA_VIEW_H
  2 | #define DATA_VIEW_H
  3 | 
  4 | #include <stdint.h>
  5 | #include <vector>
  6 | #include <iterator>
  7 | #include <boost/numeric/ublas/matrix.hpp>
  8 | 
  9 | namespace ublas = boost::numeric::ublas;
 10 | 
 11 | 
 12 | /** Represents a block of data as an STL-style container.
 13 |  *  This object does not own the data it views, and as such
 14 |  *  it can be invalidated if the data it views goes out of
 15 |  *  scope or is deleted. Note that the const-ness of this
 16 |  *  class protects the object itself, but not the data it
 17 |  *  views.
 18 |  **/
 19 | template <class T>
 20 | class VecView {
 21 | public:
 22 |   typedef T         value_type;
 23 |   typedef T*        pointer;
 24 |   typedef const T*  const_pointer;
 25 |   typedef size_t    size_type;
 26 |   typedef ptrdiff_t difference_type;
 27 |   typedef T&        reference;
 28 |   typedef const T&  const_reference;
 29 | 
 30 | protected:
 31 |   pointer ptr_;
 32 |   size_type size_;
 33 |   difference_type stride_;
 34 | 
 35 | public:
 36 |   class iterator : public std::iterator<std::random_access_iterator_tag, value_type, difference_type> {
 37 |   public:
 38 |       typedef T         value_type;
 39 |       typedef T*        pointer;
 40 |       typedef const T*  const_pointer;
 41 |       typedef ptrdiff_t difference_type;
 42 |       typedef T&        reference;
 43 |       typedef const T&  const_reference;
 44 | 
 45 |   protected:
 46 |     pointer ptr_;
 47 |     difference_type stride_;
 48 | 
 49 |   public:
 50 |     iterator() : ptr_(0), stride_(1) {}
 51 | 
 52 |     iterator(pointer p, difference_type s) : ptr_(p), stride_(s) {}
 53 | 
 54 |     operator void *() const {
 55 |       return ptr_;
 56 |     }
 57 | 
 58 |     reference operator[](int n) const {
 59 |       return *(ptr_ + n * stride_);
 60 |     }
 61 | 
 62 |     reference operator*() const {
 63 |       return *ptr_;
 64 |     }
 65 | 
 66 |     pointer operator->() const {
 67 |       return ptr_;
 68 |     }
 69 | 
 70 |     iterator& operator++() {
 71 |       ptr_ += stride_;
 72 |       return *this;
 73 |     }
 74 | 
 75 |     iterator& operator--() {
 76 |       ptr_ -= stride_;
 77 |       return *this;
 78 |     }
 79 | 
 80 |     iterator operator++(int) {
 81 |       iterator temp(*this);
 82 |       ptr_ += stride_;
 83 |       return temp;
 84 |     }
 85 | 
 86 |     iterator operator--(int) {
 87 |       iterator temp(*this);
 88 |       ptr_ -= stride_;
 89 |       return temp;
 90 |     }
 91 | 
 92 |     iterator& operator+=(int n) {
 93 |       ptr_ += n * stride_;
 94 |       return *this;
 95 |     }
 96 | 
 97 |     iterator& operator-=(int n) {
 98 |       ptr_ -= n * stride_;
 99 |       return *this;
100 |     }
101 | 
102 |     iterator operator+(int n) const {
103 |       return iterator(ptr_ + n * stride_, stride_);
104 |     }
105 | 
106 |     iterator operator-(int n) const {
107 |       return iterator(ptr_ - n * stride_, stride_);
108 |     }
109 | 
110 |     difference_type operator-(const iterator& it) const {
111 |       return (ptr_ - it.ptr_) / stride_;
112 |     }
113 | 
114 |     bool operator==(const iterator& it) const {
115 |       return (ptr_ == it.ptr_);
116 |     }
117 | 
118 |     bool operator<(const iterator& it) const {
119 |       return (stride_ > 0) ? (ptr_ < it.ptr_) : (it.ptr_ < ptr_);
120 |     }
121 | 
122 |     bool operator>(const iterator& it) const {
123 |       return (stride_ > 0) ? (ptr_ > it.ptr_) : (it.ptr_ > ptr_);
124 |     }
125 | 
126 |     bool operator<=(const iterator& it) const {
127 |       return (stride_ > 0) ? (ptr_ <= it.ptr_) : (it.ptr_ <= ptr_);
128 |     }
129 | 
130 |     bool operator>=(const iterator& it) const {
131 |       return (stride_ > 0) ? (ptr_ >= it.ptr_) : (it.ptr_ >= ptr_);
132 |     }
133 | 
134 |     bool operator!=(const iterator& it) const {
135 |       return (ptr_ != it.ptr_);
136 |     }
137 |   };
138 | 
139 |   typedef iterator const_iterator;
140 | 
141 |   /// Default constructor.
142 |   VecView() : ptr_(0), size_(0), stride_(1) {}
143 | 
144 |   /** Basic view constructor.
145 |    *  @param p Pointer to data to be viewed.
146 |    *  @param len Number of elements to be viewed.
147 |    *  @param stride Optional stride of elements.
148 |    */
149 |   VecView(const_pointer p, int len, int stride = 1) {
150 |     view(p, len, stride);
151 |   }
152 | 
153 |   /// View a std::vector.
154 |   VecView(const std::vector<value_type>& vec) {
155 |     view(vec);
156 |   }
157 | 
158 |   /// Clears the current view.
159 |   void clear() {
160 |     ptr_ = 0;
161 |     size_ = 0;
162 |     stride_ = 1;
163 |   }
164 | 
165 |   /** Basic view constructor.
166 |    *  @param p Pointer to data to be viewed.
167 |    *  @param len Number of elements to be viewed.
168 |    *  @param stride Optional stride of elements.
169 |    *
170 |    *  The current view (if any) is abandoned.
171 |    */
172 |   void view(const_pointer p, int len, int stride = 1) {
173 |     if (len < 0 || stride == 0) {
174 |       throw std::runtime_error("Size must be >= 0, and stride cannot be zero.");
175 |     }
176 |     ptr_ = const_cast<pointer>(p);
177 |     size_ = size_type(len);
178 |     stride_ = difference_type(stride);
179 |   }
180 | 
181 |   /** View a std::vector.
182 |    *  The current view (if any) is abandoned.
183 |    */
184 |   void view(const std::vector<value_type>& vec) {
185 |     if (vec.empty()) clear();
186 |     else view(&vec[0], int(vec.size()), 1);
187 |   }
188 | 
189 |   /** Return a new object that views a slice of the current one.
190 |    *  @param start The starting position of the slice in the current view.
191 |    *  @param len The number of elements to be viewed.
192 |    *  @param stride Optional stride, which is relative to the current view.
193 |    */
194 |   VecView<T> slice(int start, int len, int stride = 1) const {
195 |     if (start < 0) throw std::runtime_error("Slice cannot have negative start value.");
196 |     if (stride > 0) {
197 |       if (start + len * stride > int(size_)) throw std::runtime_error("Slice out of bounds.");
198 |     }
199 |     else if (stride < 0) {
200 |       if (start >= int(size_) || start + (len - 1) * stride <= 0) throw std::runtime_error("Slice out of bounds.");
201 |     }
202 |     else {
203 |       throw std::runtime_error("Slice cannot have zero stride.");
204 |     }
205 |     return VecView<T>(ptr_ + start * stride_, len, stride_ * stride);
206 |   }
207 | 
208 |   /// The size of the data view.
209 |   size_type size() const {
210 |     return size_;
211 |   }
212 | 
213 |   /// The stride of the view.
214 |   difference_type stride() const {
215 |     return stride_;
216 |   }
217 | 
218 |   /// A pointer to the first element of the raw data of the view.
219 |   pointer data() const {
220 |     return ptr_;
221 |   }
222 | 
223 |   /// Indexing operator.
224 |   reference operator[](int n) const {
225 |     return *(ptr_ + n * stride_);
226 |   }
227 | 
228 |   /// Reverse the current view.
229 |   void reverse() {
230 |     if (ptr_ == 0 || size_ == 0) return;
231 |     ptr_ += ptrdiff_t(size_ - 1) * stride_;
232 |     stride_ = -stride_;
233 |   }
234 | 
235 |   /// Iterator to start of view.
236 |   iterator begin() const {
237 |     return iterator(ptr_, stride_);
238 |   }
239 | 
240 |   /// Iterator to past-the-end of the view.
241 |   iterator end() const {
242 |     return iterator(ptr_ + size_ * stride_, stride_);
243 |   }
244 | };
245 | 
246 | 
247 | template <class T>
248 | inline typename VecView<T>::iterator operator+(int n, const typename VecView<T>::iterator& it) {
249 |   return it + n;
250 | }
251 | 
252 | 
253 | template <class T>
254 | class MatView {
255 | public:
256 |   typedef T         value_type;
257 |   typedef T*        pointer;
258 |   typedef const T*  const_pointer;
259 |   typedef size_t    size_type;
260 |   typedef ptrdiff_t difference_type;
261 |   typedef T&        reference;
262 |   typedef const T&  const_reference;
263 | 
264 |   typedef typename VecView<value_type>::iterator iterator, const_iterator;
265 | 
266 | protected:
267 |   pointer ptr_;
268 |   size_type size1_, size2_;
269 |   difference_type stride1_, stride2_;
270 | 
271 | public:
272 | 
273 |   MatView() : ptr_(0), size1_(0), size2_(0), stride1_(1), stride2_(1) {}
274 | 
275 |   MatView(const_pointer p, int len1, int len2, int stride1, int stride2) {
276 |     view(p, len1, len2, stride1, stride2);
277 |   }
278 | 
279 |   MatView(const_pointer p, int len1, int len2) {
280 |     view(p, len1, len2);
281 |   }
282 | 
283 |   MatView(const ublas::matrix<value_type>& mat) {
284 |     view(mat);
285 |   }
286 | 
287 |   void view(const_pointer p, int len1, int len2) {
288 |     view(p, len1, len2, len2, 1);
289 |   }
290 | 
291 |   void view(const_pointer p, int len1, int len2, int stride1, int stride2) {
292 |     if (len1 < 0 || len2 < 0 || stride1 == 0 || stride2 == 0) {
293 |       throw std::runtime_error("Lengths must be >= 0, and strides cannot be zero.");
294 |     }
295 |     ptr_ = const_cast<pointer>(p);
296 |     size1_ = size_type(len1);
297 |     size2_ = size_type(len2);
298 |     stride1_ = difference_type(stride1);
299 |     stride2_ = difference_type(stride2);
300 |   }
301 | 
302 |   void view(const ublas::matrix<value_type>& mat) {
303 |     int s1 = &mat(1, 0) - &mat(0, 0);
304 |     int s2 = &mat(0, 1) - &mat(0, 0);
305 |     view(&mat(0, 0), int(mat.size1()), int(mat.size2()), s1, s2);
306 |   }
307 | 
308 |   reference operator()(int n, int m) const {
309 |     return *(ptr_ + n * stride1_ + m * stride2_);
310 |   }
311 | 
312 |   pointer data() const {
313 |     return ptr_;
314 |   }
315 | 
316 |   size_type size1() const {
317 |     return size1_;
318 |   }
319 | 
320 |   size_type size2() const {
321 |     return size2_;
322 |   }
323 | 
324 |   difference_type stride1() const {
325 |     return stride1_;
326 |   }
327 | 
328 |   difference_type stride2() const {
329 |     return stride2_;
330 |   }
331 | 
332 |   MatView<value_type> submatrix(int start1, int start2, int len1, int len2, int stride1 = 1, int stride2 = 1) const {
333 |     return MatView<value_type>(ptr_ + start1 * stride1_ + start2 * stride2_, len1, len2, stride1 * stride1_, stride2 * stride2_);
334 |   }
335 | 
336 |   VecView<value_type> row(int n) const {
337 |     return VecView<value_type>(ptr_ + n * stride1_, size2_, stride2_);
338 |   }
339 | 
340 |   VecView<value_type> column(int m) const {
341 |     return VecView<value_type>(ptr_ + m * stride2_, size1_, stride1_);
342 |   }
343 | 
344 |   void transpose() {
345 |     if (ptr_ == 0 || size1_ == 0 || size2_ == 0) return;
346 |     std::swap(stride1_, stride2_);
347 |     std::swap(size1_, size2_);
348 |   }
349 | 
350 |   iterator row_begin(int n) const {
351 |     return iterator(ptr_ + n * stride1_, stride2_);
352 |   }
353 | 
354 |   iterator row_end(int n) const {
355 |     return iterator(ptr_ + n * stride1_ + size2_ * stride2_, stride2_);
356 |   }
357 | 
358 |   iterator column_begin(int m) const {
359 |     return iterator(ptr_ + m * stride2_, stride1_);
360 |   }
361 | 
362 |   iterator column_end(int m) const {
363 |     return iterator(ptr_ + m * stride2_ + size1_ * stride1_, stride1_);
364 |   }
365 | };
366 | 
367 | 
368 | #endif /* DATA_VIEW_H */
369 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/common/stub_py.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdexcept>
  2 | #include <numeric>
  3 | #include <stdint.h>
  4 | #include <bp_tools.h>
  5 | #include <view_numpy_arrays.h>
  6 | #include <utils.h>
  7 | 
  8 | namespace bp = boost::python;
  9 | 
 10 | bp::list check_vector(bp::list& v);
 11 | bp::list check_matrix(bp::list& m);
 12 | bp::dict check_map(bp::dict& d);
 13 | bp::numeric::array check_np_vector(bp::numeric::array& v, const std::string& t);
 14 | bp::numeric::array check_np_matrix(bp::numeric::array& m);
 15 | void check_exp(bp::numeric::array& data, bool fast);
 16 | 
 17 | 
 18 | /// Python class wrapper.
 19 | BOOST_PYTHON_MODULE(stub) {
 20 |   import_array();
 21 |   bp::numeric::array::set_module_and_type("numpy", "ndarray");
 22 |   def("check_vector", &check_vector);
 23 |   def("check_matrix", &check_matrix);
 24 |   def("check_map", &check_map);
 25 |   def("check_np_vector", &check_np_vector);
 26 |   def("check_np_matrix", &check_np_matrix);
 27 |   def("check_exp", &check_exp);
 28 | }
 29 | 
 30 | 
 31 | bp::list check_vector(bp::list& v) {
 32 |   std::vector<double> vec = list_to_vector<double>(v);
 33 |   bp::list out;
 34 |   for (size_t i = 0; i < vec.size(); ++i) {
 35 |     out.append(vec[i]);
 36 |   }
 37 |   return out;
 38 | }
 39 | 
 40 | 
 41 | bp::list check_matrix(bp::list& m) {
 42 |   ublas::matrix<double> mat = list_to_matrix<double>(m);
 43 |   bp::list out;
 44 |   for (size_t i = 0; i < mat.size1(); ++i) {
 45 |     bp::list row;
 46 |     for (size_t j = 0; j < mat.size2(); ++j) {
 47 |       row.append(mat(i, j));
 48 |     }
 49 |     out.append(row);
 50 |   }
 51 |   return out;
 52 | }
 53 | 
 54 | 
 55 | bp::dict check_map(bp::dict& d) {
 56 |   std::map<std::string, int> dm = dict_to_map<std::string, int>(d);
 57 |   bp::dict out;
 58 |   for (std::map<std::string, int>::iterator p = dm.begin(); p != dm.end(); ++p) {
 59 |     out[p->first] = p->second;
 60 |   }
 61 |   return out;
 62 | }
 63 | 
 64 | 
 65 | bp::numeric::array check_np_vector(bp::numeric::array& v, const std::string& t) {
 66 |   if (t == "int32") {
 67 |     VecView<int32_t> vec = view_1d_array<int32_t>(v);
 68 |     //std::cerr << "Stride for " << t << " is " << vec.stride() << std::endl;
 69 |     return vector_to_numpy(vec);
 70 |   }
 71 |   else if (t == "int64") {
 72 |     VecView<int64_t> vec = view_1d_array<int64_t>(v);
 73 |     //std::cerr << "Stride for " << t << " is " << vec.stride() << std::endl;
 74 |     return vector_to_numpy(vec);
 75 |   }
 76 |   else if (t == "float64") {
 77 |     VecView<double> vec = view_1d_array<double>(v);
 78 |     //std::cerr << "Stride for " << t << " is " << vec.stride() << std::endl;
 79 |     return vector_to_numpy(vec);
 80 |   }
 81 |   return new_numpy_1d<int>(0);
 82 | }
 83 | 
 84 | 
 85 | bp::numeric::array check_np_matrix(bp::numeric::array& m) {
 86 |   MatView<double> mat = view_2d_array<double>(m);
 87 |   return matrix_to_numpy(mat);
 88 | }
 89 | 
 90 | 
 91 | void check_exp(bp::numeric::array& data, bool fast) {
 92 |   VecView<float> x = view_1d_array<float>(data);
 93 |   size_t n = x.size();
 94 |   if (fast) {
 95 |     for (size_t i = 0; i < n; ++i) {
 96 |       x[i] = fastpow2(POW2FACTOR * x[i]);
 97 |     }
 98 |   }
 99 |   else {
100 |     for (size_t i = 0; i < n; ++i) {
101 |       x[i] = exp(x[i]);
102 |     }
103 |   }
104 | }
105 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/common/test/test_stub.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import unittest
  3 | import warnings
  4 | import cProfile
  5 | import pstats
  6 | import time
  7 | import StringIO
  8 | import numpy as np
  9 | from dragonet.basecall.common import stub
 10 | 
 11 | 
 12 | class TestStub(unittest.TestCase):
 13 | 
 14 |     def setUp(self):
 15 |         return
 16 | 
 17 |     def tearDown(self):
 18 |         return
 19 | 
 20 |     def test_001_list_to_vector(self):
 21 |         data = [1.0, 2.0, -3.5, 7.6]
 22 |         newdata = stub.check_vector(data)
 23 |         self.assertEqual(data, newdata)
 24 |         return
 25 |     
 26 |     def test_002_list_to_matrix(self):
 27 |         data = [[1.0, 2.0, -3.5],
 28 |                 [7.6, 1.3, 12.8]]
 29 |         newdata = stub.check_matrix(data)
 30 |         self.assertEqual(data, newdata)
 31 |         return
 32 |     
 33 |     def test_003_dict_to_map(self):
 34 |         data = {'foo': 1, 'bar': 2, 'kill': 3, 'me': 4}
 35 |         newdata = stub.check_map(data)
 36 |         self.assertEqual(data, newdata)
 37 |         return
 38 | 
 39 |     def test_004_contiguous(self):
 40 |         data1d = np.array([1, 5, 7, 3, 8, 4, 10, 4, 12, 2], dtype=np.int32)
 41 |         data1d2 = np.zeros(10, dtype=np.float64)
 42 |         data1d2[:] = data1d * 0.5
 43 |         data2d = np.ndarray((2, 5), buffer=(data1d * 0.5), dtype=np.float64)
 44 |         new1d = stub.check_np_vector(data1d, 'int32')
 45 |         np.testing.assert_equal(data1d, new1d)
 46 |         new1d2 = stub.check_np_vector(data1d2, 'float64')
 47 |         np.testing.assert_equal(data1d2, new1d2)
 48 |         new2d = stub.check_np_matrix(data2d)
 49 |         np.testing.assert_equal(data2d, new2d)
 50 |         return
 51 |     
 52 |     def test_005_noncontiguous(self):
 53 |         data1d = np.array([1, 5, 7, 3, 8, 4, 10, 4, 12, 2], dtype=np.int32)
 54 |         data2d = np.ndarray((2, 5), buffer=(data1d * 0.5), dtype=np.float64)
 55 |         data1d = data1d[::2]
 56 |         data2d = data2d[::-1, ::2]
 57 |         new1d = stub.check_np_vector(data1d, 'int32')
 58 |         np.testing.assert_equal(data1d, new1d)
 59 |         new2d = stub.check_np_matrix(data2d)
 60 |         np.testing.assert_equal(data2d, new2d)
 61 |         return
 62 |     
 63 |     def test_005_record_array(self):
 64 |         desc = np.dtype({'names': ['a', 'b', 'c'], 'formats': [np.int32, np.float64, np.int64]}, align=True)
 65 |         data = np.zeros(10, dtype=desc)
 66 |         data1 = data['a']
 67 |         data2 = data['b']
 68 |         data3 = data['c']
 69 |         data1[:] = [1, 5, 7, 3, 8, 4, 10, 4, 12, 2]
 70 |         data2[:] = data1 * 0.5
 71 |         data3[:] = data1 * 2
 72 |         new1 = stub.check_np_vector(data1, 'int32')
 73 |         np.testing.assert_equal(data1, new1)
 74 |         new2 = stub.check_np_vector(data2, 'float64')
 75 |         np.testing.assert_equal(data2, new2)
 76 |         new3 = stub.check_np_vector(data3, 'int64')
 77 |         np.testing.assert_equal(data3, new3)
 78 |         return
 79 | 
 80 |     @unittest.skip('Not needed')
 81 |     def test_006_accumulate(self):
 82 |         data = np.arange(0.0, 10.0, dtype=np.float32)
 83 |         result = stub.check_accumulator(data)
 84 |         self.assertEqual(result, 45.0)
 85 |         result = stub.check_accumulator(data[:-1])
 86 |         self.assertEqual(result, 36.0)
 87 |         data = np.empty(1024, dtype=np.float32)
 88 |         data[:] = np.random.random(1024)
 89 |         
 90 |         #print 'Fast version'
 91 |         pr = cProfile.Profile()
 92 |         pr.enable()
 93 |         result1 = self._loop_checker(data, 1000000, True)
 94 |         pr.disable()
 95 |         s = StringIO.StringIO()
 96 |         ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
 97 |         #ps.print_stats()
 98 |         #print s.getvalue()
 99 | 
100 |         #print 'Normal version'
101 |         pr = cProfile.Profile()
102 |         pr.enable()
103 |         result2 = self._loop_checker(data, 1000000, False)
104 |         pr.disable()
105 |         s = StringIO.StringIO()
106 |         ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
107 |         #ps.print_stats()
108 |         #print s.getvalue()
109 |         
110 |         return
111 | 
112 |     def test_007_check_exp(self):
113 |         warnings.filterwarnings("error")
114 |         raw = np.arange(-80.0, 80.0, 1e-4, dtype=np.float32)
115 |         data1 = np.empty(raw.size, dtype=np.float32)
116 |         data2 = np.empty(raw.size, dtype=np.float32)
117 |         data1[:] = raw
118 |         data2[:] = raw
119 |         #print 'Total count is:', raw.size
120 |         t1 = time.clock()
121 |         stub.check_exp(data1, False)
122 |         t2 = time.clock()
123 |         stub.check_exp(data2, True)
124 |         t3 = time.clock()
125 |         #print 'Stats for positive range.'
126 |         #print 'Time for normal exponential:', t2 - t1
127 |         #print 'Time for fast exponential:', t3 - t2
128 |         error = np.abs((data2 - data1) / data1)
129 |         max_idx = np.argmax(error)
130 |         #print 'Max error:', error[max_idx], 'raw = ', raw[max_idx], 'data1 =', data1[max_idx], 'data2 =', data2[max_idx], 'diff =', data2[max_idx] - data1[max_idx]
131 |         data3 = -1.0 * raw
132 |         data4 = -1.0 * raw
133 |         t1 = time.clock()
134 |         stub.check_exp(data3, False)
135 |         t2 = time.clock()
136 |         stub.check_exp(data4, True)
137 |         t3 = time.clock()
138 |         #print 'Stats for negative range.'
139 |         #print 'Time for normal exponential:', t2 - t1
140 |         #print 'Time for fast exponential:', t3 - t2
141 |         error = np.abs((data4 - data3) / data3)
142 |         max_idx = np.argmax(error)
143 |         #print 'Max error:', error[max_idx], 'raw =', raw[max_idx], 'data1 =', data3[max_idx], 'data2 =', data4[max_idx], 'diff =', data4[max_idx] - data3[max_idx]
144 |         return
145 | 
146 | 
147 |     def _loop_checker(self, data, n, fast):
148 |         return stub.check_accumulator_loop(data, n, fast)
149 | 
150 | 
151 | if __name__ == '__main__':
152 |     unittest.main()
153 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/common/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H
 2 | #define UTILS_H
 3 | 
 4 | #include <emmintrin.h>
 5 | #include <stdint.h>
 6 | #include <iostream>
 7 | #include <cmath>
 8 | #include <vector>
 9 | #include <numeric>
10 | 
11 | 
12 | /// 1.0 / ln(2) - Needed as scaling factor for computing exp(x) from 2^x.
13 | static const float POW2FACTOR = 1.442695040f;
14 | 
15 | inline float square(float x) {return x * x;}
16 | inline float cube(float x) {return x * x * x;}
17 | 
18 | 
19 | /// Fast approximation for computing 2^p in single precision.
20 | inline float fastpow2(float p) {
21 |   float clipp = (p > -125.0f) ? p : -125.0f;
22 |   union {uint32_t i; float f;} v = {uint32_t((1 << 23) * (clipp + 126.94269504f))};
23 |   return v.f;
24 | }
25 | 
26 | /// Fast vectorized approximation for computing 2^p in single precision for 4 numbers.
27 | inline __m128 vfasterpow2(const __m128 p) {
28 |     const __m128 c_126_94269504 = _mm_set_ps1(126.94269504f);
29 |     const __m128 lt125 = _mm_cmplt_ps(p, _mm_set_ps1(-125.0f));
30 |     const __m128 clipp = _mm_or_ps(_mm_andnot_ps(lt125, p), _mm_and_ps(lt125, _mm_set_ps1(-125.0f)));
31 |     union { __m128i i; __m128 f; } v = { _mm_cvttps_epi32(_mm_mul_ps(_mm_set_ps1(1 << 23), _mm_add_ps(clipp, c_126_94269504))) };
32 |     return v.f;
33 | }
34 | 
35 | /** Generic normalization function.
36 |  *  @param NUM_STATES The number of states to normalize over.
37 |  *  @param data An array of floats.
38 |  *  @returns The normalization factor used.
39 |  */
40 | template <int NUM_STATES>
41 | float normalize(float *data) {
42 |   float sum = 0.0f;
43 |   for (int state = 0; state < NUM_STATES; ++state) {
44 |     sum += data[state];
45 |   }
46 |   if (sum < 1e-38f || !std::isfinite(sum)) {
47 |     throw std::runtime_error("Normalization error.");
48 |   }
49 |   float norm = 1.0f / (sum);
50 |   for (int state = 0; state < NUM_STATES; ++state) {
51 |     data[state] *= norm;
52 |   }
53 |   return sum;
54 | }
55 | 
56 | 
57 | #endif /* UTILS_H */
58 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/common/view_numpy_arrays.h:
--------------------------------------------------------------------------------
  1 | #ifndef VIEW_NUMPY_ARRAYS_H
  2 | #define VIEW_NUMPY_ARRAYS_H
  3 | 
  4 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
  5 | 
  6 | #include <algorithm>
  7 | #include <stdint.h>
  8 | #include <boost/python.hpp>
  9 | #include <numpy/ndarrayobject.h>
 10 | #include <data_view.h>
 11 | 
 12 | 
 13 | namespace bp = boost::python;
 14 | 
 15 | 
 16 | inline int numpy_type(bool) {return NPY_BOOL;}
 17 | inline int numpy_type(int16_t) {return NPY_INT16;}
 18 | inline int numpy_type(int32_t) {return NPY_INT32;}
 19 | #ifdef _MSC_VER
 20 |   inline int numpy_type(int) {return NPY_INT32;}
 21 | #endif
 22 | inline int numpy_type(int64_t) {return NPY_INT64;}
 23 | inline int numpy_type(uint16_t) {return NPY_UINT16;}
 24 | inline int numpy_type(uint32_t) {return NPY_UINT32;}
 25 | inline int numpy_type(uint64_t) {return NPY_UINT64;}
 26 | inline int numpy_type(float) {return NPY_FLOAT32;}
 27 | inline int numpy_type(double) {return NPY_FLOAT64;}
 28 | 
 29 | template <class T>
 30 | inline int numpy_type(T&) {
 31 |   throw std::invalid_argument("Unknown type for numpy array.");
 32 |   return 0;
 33 | }
 34 | 
 35 | 
 36 | template <class T>
 37 | VecView<T> view_1d_array(const bp::numeric::array& arr) {
 38 |   PyArrayObject *obj = reinterpret_cast<PyArrayObject*>(arr.ptr());
 39 |   if (obj == 0) {
 40 |     throw std::invalid_argument("Could not covert bp::numeric::array to 1d numpy array.");
 41 |   }
 42 |   if (PyArray_DESCR(obj)->elsize != sizeof(T)) {
 43 |     throw std::invalid_argument("Numpy 1d array type does not match template type.");
 44 |   }
 45 |   if (PyArray_NDIM(obj) != 1) {
 46 |     throw std::length_error("Numpy array must be 1D.");
 47 |   }
 48 |   int length = PyArray_DIM(obj, 0);
 49 |   int stride = PyArray_STRIDE(obj, 0) / sizeof(T);
 50 |   npy_intp ind[1] = {0};
 51 |   T *data = reinterpret_cast<T*>(PyArray_GetPtr(obj, ind));
 52 |   return VecView<T>(data, length, stride);
 53 | }
 54 | 
 55 | 
 56 | template <class T>
 57 | MatView<T> view_2d_array(const bp::numeric::array& arr) {
 58 |   PyArrayObject *obj = reinterpret_cast<PyArrayObject*>(arr.ptr());
 59 |   if (obj == 0) {
 60 |     throw std::invalid_argument("Could not covert bp::numeric::array to 2d numpy array.");
 61 |   }
 62 |   if (PyArray_DESCR(obj)->elsize != sizeof(T)) {
 63 |     throw std::invalid_argument("Numpy 2d array type does not match template type.");
 64 |   }
 65 |   if (PyArray_NDIM(obj) != 2) {
 66 |     throw std::length_error("Numpy array must be 2D.");
 67 |   }
 68 |   int length1 = PyArray_DIM(obj, 0);
 69 |   int length2 = PyArray_DIM(obj, 1);
 70 |   int stride1 = PyArray_STRIDE(obj, 0) / sizeof(T);
 71 |   int stride2 = PyArray_STRIDE(obj, 1) / sizeof(T);
 72 |   npy_intp ind[2] = {0, 0};
 73 |   T *data = reinterpret_cast<T*>(PyArray_GetPtr(obj, ind));
 74 |   return MatView<T>(data, length1, length2, stride1, stride2);
 75 | }
 76 | 
 77 | 
 78 | template <class T>
 79 | bp::numeric::array new_numpy_1d(int n) {
 80 |   npy_intp dims[1] = {n};
 81 |   PyArrayObject *obj = reinterpret_cast<PyArrayObject*>(PyArray_SimpleNew(1, dims, numpy_type(T())));
 82 |   if (obj == 0) throw std::runtime_error("Call to PyArray_SimpleNew() failed.");
 83 |   bp::handle<> handle(reinterpret_cast<PyObject*>(obj));
 84 |   return bp::numeric::array(handle);
 85 | }
 86 | 
 87 | 
 88 | template <class T>
 89 | bp::numeric::array new_numpy_2d(int n, int m) {
 90 |   npy_intp dims[2] = {n, m};
 91 |   PyArrayObject *obj = reinterpret_cast<PyArrayObject*>(PyArray_SimpleNew(2, dims, numpy_type(T())));
 92 |   if (obj == 0) throw std::runtime_error("Call to PyArray_SimpleNew() failed.");
 93 |   bp::handle<> handle(reinterpret_cast<PyObject*>(obj));
 94 |   return bp::numeric::array(handle);
 95 | }
 96 | 
 97 | 
 98 | template <class T>
 99 | bp::numeric::array vector_to_numpy(const VecView<T>& vec) {
100 |   bp::numeric::array arr = new_numpy_1d<T>(vec.size());
101 |   VecView<T> lhs = view_1d_array<T>(arr);
102 |   std::copy(vec.begin(), vec.end(), lhs.begin());
103 |   return arr;
104 | }
105 | 
106 | 
107 | template <class T>
108 | bp::numeric::array matrix_to_numpy(const MatView<T>& mat) {
109 |   bp::numeric::array arr = new_numpy_2d<T>(mat.size1(), mat.size2());
110 |   MatView<T> lhs = view_2d_array<T>(arr);
111 |   for (size_t i = 0; i < mat.size1(); ++i) {
112 |     std::copy(mat.row_begin(i), mat.row_end(i), lhs.row_begin(i));
113 |   }
114 |   return arr;
115 | }
116 | 
117 | 
118 | #endif /* VIEW_NUMPY_ARRAYS_H */
119 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/pair_align/mm_align.cpp:
--------------------------------------------------------------------------------
  1 | #include <mm_align.h>
  2 | 
  3 | using namespace std;
  4 | 
  5 | namespace PairAlign {
  6 | 
  7 | int32_t MMAlign::processBlock(int xpos1, int xpos2, int ypos1, int ypos2,
  8 |                               int32_t m1, int32_t iy1, int32_t m2, int32_t iy2) {
  9 |   int len = xpos2 - xpos1 + 2;
 10 |   int mid = (ypos2 + ypos1 + 1) / 2;
 11 |   processUp(xpos1, xpos2, ypos1, mid, m1, iy1);
 12 |   lastM.swap(buffM);
 13 |   lastIy.swap(buffIy);
 14 |   processDown(xpos1, xpos2, mid, ypos2, m2, iy2);
 15 | 
 16 |   // Find alignment point.
 17 |   int pos = 0;
 18 |   int32_t maxScore = ZERO_PROB_SCORE;
 19 |   bool isMatch = false;
 20 |   for (int i = 0; i < len; ++i) {
 21 |     int dpos = len - i;
 22 |     int32_t mScore = ZERO_PROB_SCORE;
 23 |     if (i + xpos1 > 0) mScore = buffM[i] + lastM[dpos] - subMatrix(seq1[mid], seq2[i + xpos1 - 1]);
 24 |     int32_t deltay = openGapy;
 25 |     if (i + xpos1 == 0) deltay = startGapy;
 26 |     if (i + xpos1 == int(seq2.Size())) deltay = endGapy;
 27 |     int32_t yScore = buffIy[i] + lastIy[dpos - 1] - deltay;
 28 |     int32_t score = max(mScore, yScore);
 29 |     if (score > maxScore) {
 30 |       maxScore = score;
 31 |       pos = i;
 32 |       isMatch = (mScore >= yScore);
 33 |     }
 34 |   }
 35 | 
 36 |   // Push alignment position (if they aligned at this midline).
 37 |   if (isMatch) {
 38 |     matches.push_back(AlignPos(mid, xpos1 + pos - 1));
 39 |   }
 40 | 
 41 |   // Set up next blocks.
 42 |   int32_t newm1 = isMatch ? buffM[pos] : ZERO_PROB_SCORE;
 43 |   int32_t newiy1 = isMatch ? ZERO_PROB_SCORE : buffIy[pos];
 44 |   int dpos = len - pos;
 45 |   int32_t newm2 = isMatch ? lastM[dpos] : ZERO_PROB_SCORE;
 46 |   int32_t newiy2 = isMatch ? ZERO_PROB_SCORE : lastIy[dpos - 1];
 47 | 
 48 |   // Do new lower block.
 49 |   int newxpos2 = pos + xpos1 - 1;
 50 |   if (isMatch) --newxpos2;
 51 |   if (mid > ypos1 && newxpos2 >= xpos1) {
 52 |     processBlock(xpos1, newxpos2, ypos1, mid - 1, m1, iy1, newm2, newiy2);
 53 |   }
 54 | 
 55 |   // Do new upper block.
 56 |   int newxpos1 = pos + xpos1;
 57 |   if (mid < ypos2 && newxpos1 <= xpos2) {
 58 |     processBlock(pos + xpos1, xpos2, mid + 1, ypos2, newm1, newiy1, m2, iy2);
 59 |   }
 60 |   return maxScore;
 61 | }
 62 | 
 63 | 
 64 | void MMAlign::processUp(int xpos1, int xpos2, int ypos1, int ypos2,
 65 |                         int32_t m, int32_t iy) {
 66 |   View<int> view1 = View<int>(seq1, ypos1, ypos2 - ypos1 + 1, 1);
 67 |   View<int> view2 = View<int>(seq2, xpos1, xpos2 - xpos1 + 1, 1);
 68 |   int32_t gx1 = openGapx, gx2 = openGapx;
 69 |   int32_t hx1 = extendGapx, hx2 = extendGapx;
 70 |   if (ypos1 == 0) {
 71 |     gx1 = startGapx;
 72 |     hx1 = startGapx;
 73 |   }
 74 |   if (ypos2 == int(seq1.Size()) - 1) {
 75 |     gx2 = endGapx;
 76 |     hx2 = endGapx;
 77 |   }
 78 |   int32_t gy1 = openGapy, gy2 = openGapy;
 79 |   int32_t hy1 = extendGapy, hy2 = extendGapy;
 80 |   if (xpos1 == 0) {
 81 |     gy1 = startGapy;
 82 |     hy1 = startGapy;
 83 |   }
 84 |   if (xpos2 == int(seq2.Size()) - 1) {
 85 |     gy2 = endGapy;
 86 |     hy2 = endGapy;
 87 |   }
 88 |   int lenx = xpos2 - xpos1 + 2;
 89 |   int leny = ypos2 - ypos1 + 2;
 90 |   hmm(view1, view2, lenx, leny, m, iy, gx1, hx1, gx2, hx2, gy1, hy1, gy2, hy2);
 91 | }
 92 | 
 93 | 
 94 | void MMAlign::processDown(int xpos1, int xpos2, int ypos1, int ypos2,
 95 |                           int32_t m, int32_t iy) {
 96 |   View<int> view1 = View<int>(seq1, ypos2, ypos2 - ypos1 + 1, -1);
 97 |   View<int> view2 = View<int>(seq2, xpos2, xpos2 - xpos1 + 1, -1);
 98 |   int32_t gx1 = openGapx, gx2 = openGapx;
 99 |   int32_t hx1 = extendGapx, hx2 = extendGapx;
100 |   if (ypos1 == 0) {
101 |     gx2 = startGapx;
102 |     hx2 = startGapx;
103 |   }
104 |   if (ypos2 == int(seq1.Size()) - 1) {
105 |     gx1 = endGapx;
106 |     hx1 = endGapx;
107 |   }
108 |   int32_t gy1 = openGapy, gy2 = openGapy;
109 |   int32_t hy1 = extendGapy, hy2 = extendGapy;
110 |   if (xpos1 == 0) {
111 |     gy2 = startGapy;
112 |     hy2 = startGapy;
113 |   }
114 |   if (xpos2 == int(seq2.Size()) - 1) {
115 |     gy1 = endGapy;
116 |     hy1 = endGapy;
117 |   }
118 |   int lenx = xpos2 - xpos1 + 2;
119 |   int leny = ypos2 - ypos1 + 2;
120 |   hmm(view1, view2, lenx, leny, m, iy, gx1, hx1, gx2, hx2, gy1, hy1, gy2, hy2);
121 | }
122 | 
123 | 
124 | void MMAlign::hmm(const View<int>& view1, const View<int>& view2, int lenx, int leny,
125 |                   int32_t m, int32_t iy, int32_t gx1, int32_t hx1, int32_t gx2,
126 |                   int32_t hx2, int32_t gy1, int32_t hy1, int32_t gy2, int32_t hy2) {
127 |   lastM[0] = m;
128 |   lastIy[0] = iy;
129 |   lastIx[0] = ZERO_PROB_SCORE;
130 |   for (int j = 1; j < lenx; ++j) {
131 |     lastM[j] = ZERO_PROB_SCORE;
132 |     lastIy[j] = ZERO_PROB_SCORE;
133 |     if (j == 1) lastIx[j] = max(lastM[0], lastIy[0]) + gx1;
134 |     else lastIx[j] = lastIx[j - 1] + hx1;
135 |   }
136 |   for (int i = 1; i < leny; ++i) {
137 |     M[0] = ZERO_PROB_SCORE;
138 |     Ix[0] = ZERO_PROB_SCORE;
139 |     Iy[0] = max(lastIy[0] + hy1, lastM[0] + gy1);
140 |     int32_t gx = (i == leny - 1) ? gx2 : openGapx;
141 |     int32_t hx = (i == leny - 1) ? hx2 : extendGapx;
142 |     for (int j = 1; j < lenx; ++j) {
143 |       M[j] = TripleMax(lastM[j - 1], lastIx[j - 1], lastIy[j - 1]);
144 |       M[j] += subMatrix(view1[i - 1], view2[j - 1]);
145 |       int32_t gy = (j == lenx - 1) ? gy2 : openGapy;
146 |       int32_t hy = (j == lenx - 1) ? hy2 : extendGapy;
147 |       Iy[j] = TripleMax(lastM[j] + gy, lastIx[j] + gy, lastIy[j] + hy);
148 |       Ix[j] = TripleMax(M[j - 1] + gx, Ix[j - 1] + hx, Iy[j - 1] + gx);
149 |     }
150 |     M.swap(lastM);
151 |     Ix.swap(lastIx);
152 |     Iy.swap(lastIy);
153 |   }
154 | }
155 | 
156 | 
157 | void MMAlign::makeAlignment(vector<AlignPos>& alignment) {
158 |   alignment.clear();
159 |   sort(matches.begin(), matches.end());
160 |   int lastx = -1, lasty = -1;
161 |   for (size_t i = 0; i < matches.size(); ++i) {
162 |     int x = matches[i].Pos2;
163 |     int y = matches[i].Pos1;
164 |     if (y > lasty + 1) {
165 |       for (int p = lasty + 1; p < y; ++p) {
166 |         alignment.push_back(AlignPos(p, -1));
167 |       }
168 |       lasty = y - 1;
169 |     }
170 |     if (x > lastx + 1) {
171 |       for (int p = lastx + 1; p < x; ++p) {
172 |         alignment.push_back(AlignPos(-1, p));
173 |       }
174 |       lastx = x - 1;
175 |     }
176 |     alignment.push_back(AlignPos(y, x));
177 |     lastx = x;
178 |     lasty = y;
179 |   }
180 |   if (lasty < int(seq1.Size()) - 1) {
181 |     for (int p = lasty + 1; p < int(seq1.Size()); ++p) {
182 |       alignment.push_back(AlignPos(p, -1));
183 |     }
184 |   }
185 |   if (lastx < int(seq2.Size()) - 1) {
186 |     for (int p = lastx + 1; p < int(seq2.Size()); ++p) {
187 |       alignment.push_back(AlignPos(-1, p));
188 |     }
189 |   }
190 | }
191 | 
192 | 
193 | } /* namespace PairAlign */
194 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/pair_align/mm_align.h:
--------------------------------------------------------------------------------
  1 | #ifndef MM_ALIGN_H
  2 | #define MM_ALIGN_H
  3 | 
  4 | #include <pair_align.h>
  5 | 
  6 | namespace PairAlign {
  7 | 
  8 | /// Helper class representing a view of a range of a vector.
  9 | template <class T>
 10 | class View {
 11 | private:
 12 |   const std::vector<T> *data;
 13 |   size_t start, size;
 14 |   int dir;
 15 | 
 16 |   void check(size_t a, size_t b) {
 17 |     if (start < a || start > b) {
 18 |       throw std::runtime_error("Error: View start is out of range.");
 19 |     }
 20 |     size_t end = start + size_t((int(size) - 1) * dir);
 21 |     if (end < a || end > b) {
 22 |       throw std::runtime_error("Error: View end is out of range.");
 23 |     }
 24 |     if (dir != 1 && dir != -1) {
 25 |       throw std::runtime_error("Error: Stride value makes no sense.");
 26 |     }
 27 |   }
 28 | 
 29 | public:
 30 |   /// Default constructor.
 31 |   View() {}
 32 | 
 33 |   /** Construct a subview.
 34 |    *  @param[in] rhs View object to view contents of.
 35 |    *  @param[in] begin Start position for new view.
 36 |    *  @param[in] len Length of new view.
 37 |    *  @param[in] dir Either 1 or -1. Indicates direction of view.
 38 |    */
 39 |   View(const View& rhs, int begin, int len, int d) :
 40 |     data(rhs.data),
 41 |     start(rhs.start + begin * rhs.dir),
 42 |     size(len),
 43 |     dir(d * rhs.dir) {
 44 |     check(rhs.start, rhs.start + (rhs.size - 1) * rhs.dir);
 45 |   }
 46 | 
 47 |   /** Construct a view of a std::vector.
 48 |    *  @param[in] x Vector to be viewed.
 49 |    *  @param[in] begin Start position of view.
 50 |    *  @param[in] len Length of view.
 51 |    *  @param[in] dir Either 1 or -1. Indicates direction of view.
 52 |    */
 53 |   View(const std::vector<T>& x, int begin = 0, int len = 0, int d = 1) :
 54 |     data(&x),
 55 |     start(begin),
 56 |     size(len == 0 ? x.size() : len),
 57 |     dir(d) {
 58 |     check(0, x.size() - 1);
 59 |   }
 60 | 
 61 |   /// Indexing operator.
 62 |   const T& operator[](int n) const {return (dir == 1) ? (*data)[start + n] : (*data)[start - n];}
 63 | 
 64 |   /// Returns the lenght of the view.
 65 |   size_t Size() const {return size;}
 66 | };
 67 | 
 68 | 
 69 | /** Myers-Miller implementation supporting gap-extension.
 70 |  *  Note that this is approximately 2x slower than the
 71 |  *  Needleman-Wunsch implementation, but only requires
 72 |  *  linear memory instead of quadratic.
 73 |  */
 74 | class MMAlign : public Aligner {
 75 | private:
 76 |   const ublas::matrix<int32_t>& subMatrix;
 77 |   std::vector<int32_t> M, lastM, buffM;
 78 |   std::vector<int32_t> Iy, lastIy, buffIy;
 79 |   std::vector<int32_t> Ix, lastIx;
 80 |   std::vector<AlignPos> matches;
 81 |   View<int> seq1;
 82 |   View<int> seq2;
 83 |   int32_t startGapx, endGapx, openGapx, extendGapx;
 84 |   int32_t startGapy, endGapy, openGapy, extendGapy;
 85 | 
 86 |   int32_t processBlock(int xpos1, int xpos2, int ypos1, int ypos2,
 87 |                        int32_t m1, int32_t iy1, int32_t m2, int32_t iy2);
 88 |   void processUp(int xpos1, int xpos2, int ypos1, int ypos2,
 89 |                  int32_t m, int32_t iy);
 90 |   void processDown(int xpos1, int xpos2, int ypos1, int ypos2,
 91 |                    int32_t m, int32_t iy);
 92 |   void hmm(const View<int>& view1, const View<int>& view2, int lenx, int leny,
 93 |            int32_t m, int32_t iy, int32_t gx1, int32_t hx1, int32_t gx2,
 94 |            int32_t hx2, int32_t gy1, int32_t hy1, int32_t gy2, int32_t hy2);
 95 |   void makeAlignment(std::vector<AlignPos>& alignment);
 96 | 
 97 | public:
 98 |   /** Constructor.
 99 |    *  @param[in] subMat Substitution matrix. Note this stores a reference. Beware of lifetime.
100 |    *  @param[in] gaps Vector of gap penalties (length 8).
101 |    *
102 |    *  The 8 values in the gap penalty vector should be as follows:
103 |    *    start_gap1  Penalty for aligning sequence 1 to a gap before sequence 2.
104 |    *    end_gap1    Penalty for aligning sequence 1 to a gap after sequence 2.
105 |    *    open_gap1   Penalty for aligning sequence 1 to a new gap within sequence 1.
106 |    *    extend_gap1 Penalty for extending a gap within sequence 2.
107 |    *    start_gap2  Penalty for aligning sequence 2 to a gap before sequence 1.
108 |    *    end_gap2    Penalty for aligning sequence 2 to a gap after sequence 1.
109 |    *    open_gap2   Penalty for aligning sequence 2 to a new gap within sequence 1.
110 |    *    extend_gap2 Penalty for extending a gap within sequence 1.
111 |    */
112 |   MMAlign(const ublas::matrix<int32_t>& subMat, const std::vector<int>& gaps) : subMatrix(subMat) {
113 |     startGapx = -gaps[0];
114 |     endGapx = -gaps[1];
115 |     openGapx = -gaps[2];
116 |     extendGapx = -gaps[3];
117 |     startGapy = -gaps[4];
118 |     endGapy = -gaps[5];
119 |     openGapy = -gaps[6];
120 |     extendGapy = -gaps[7];
121 |   }
122 | 
123 |   /** Align two sequences.
124 |    *  @param[in] sequence1 First sequence of states.
125 |    *  @param[in] sequence2 Second sequence of states.
126 |    *  @param[out] alignment Vector to hold the resulting alignment.
127 |    *  @return The alignment score. This is not normalized.
128 |    *
129 |    *  The alignment object will contain one entry per alignment position. Any contents it
130 |    *  had before the call will be lost. Each entry contains the indexes of the two sequence
131 |    *  elements that align to that position. If one sequence has aligned to a gap at that
132 |    *  position, the value for the other sequence will be -1.
133 |    *
134 |    *  Note that the two sequences must contain only values from 0 to n-1, where n is the
135 |    *  size of the nxn substitution matrix.
136 |    */
137 |   int32_t Align(const std::vector<int>& sequence1, const std::vector<int>& sequence2,
138 |                 std::vector<AlignPos>& alignment) {
139 |     seq1 = View<int>(sequence1);
140 |     seq2 = View<int>(sequence2);
141 |     int len1 = int(sequence1.size());
142 |     int len2 = int(sequence2.size());
143 |     M.clear();
144 |     lastM.clear();
145 |     buffM.clear();
146 |     Ix.clear();
147 |     lastIx.clear();
148 |     Iy.clear();
149 |     lastIy.clear();
150 |     buffIy.clear();
151 |     matches.clear();
152 |     M.resize(len2 + 1);
153 |     lastM.resize(len2 + 1);
154 |     buffM.resize(len2 + 1);
155 |     Ix.resize(len2 + 1);
156 |     lastIx.resize(len2 + 1);
157 |     Iy.resize(len2 + 1);
158 |     lastIy.resize(len2 + 1);
159 |     buffIy.resize(len2 + 1);
160 |     int32_t score = processBlock(0, len2 - 1, 0, len1 - 1, 0, 0, 0, 0);
161 |     makeAlignment(alignment);
162 |     return score;
163 |   }
164 | };
165 | 
166 | } /* namespace PairAlign */
167 | 
168 | 
169 | #endif /* MM_ALIGN_H */
170 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/pair_align/nw_align.cpp:
--------------------------------------------------------------------------------
 1 | #include <algorithm>
 2 | #include <nw_align.h>
 3 | 
 4 | using namespace std;
 5 | 
 6 | 
 7 | namespace PairAlign {
 8 | 
 9 | int32_t NWAlign::Align(const vector<int>& sequence1, const vector<int>& sequence2,
10 |                                 vector<AlignPos>& alignment) {
11 |   int len1 = int(sequence1.size());
12 |   int len2 = int(sequence2.size());
13 |   diagScores.resize(len1 + 1, len2 + 1, false);
14 |   upScores.resize(len1 + 1, len2 + 1, false);
15 |   rightScores.resize(len1 + 1, len2 + 1, false);
16 |   diagScores(0, 0) = 0;
17 |   upScores(0, 0) = 0;
18 |   rightScores(0, 0) = 0;
19 |   // Fill in the left column. This is events from sequence 1 aligning before
20 |   // the beginning of sequence 2.
21 |   for (int i = 1; i <= len1; ++i) {
22 |     upScores(i, 0) = upScores(i - 1, 0) + startGapy;
23 |     diagScores(i, 0) = ZERO_PROB_SCORE;
24 |     rightScores(i, 0) = ZERO_PROB_SCORE;
25 |   }
26 |   // Fill in the bottom row. This is events from sequence 2 aligning before
27 |   // the beginning of sequence 1.
28 |   for (int j = 1; j <= len2; ++j) {
29 |     rightScores(0, j) = rightScores(0, j - 1) + startGapx;
30 |     diagScores(0, j) = ZERO_PROB_SCORE;
31 |     upScores(0, j) = ZERO_PROB_SCORE;
32 |   }
33 |   // Fill in the main body, but not the right column or top row.
34 |   for (int i = 1; i < len1; ++i) {
35 |     for (int j = 1; j < len2; ++j) {
36 |       int32_t mismatch = subMatrix(sequence1[i - 1], sequence2[j - 1]);
37 |       processNode(i, j, openGapx, extendGapx, openGapy, extendGapy, mismatch);
38 |     }
39 |   }
40 |   // Fill in the top row. This is events from sequence 2 aligning to or after
41 |   // the end of sequence 1.
42 |   for (int j = 1; j < len2; ++j) {
43 |     int32_t mismatch = subMatrix(sequence1[len1 - 1], sequence2[j - 1]);
44 |     processNode(len1, j, endGapx, endGapx, openGapy, extendGapy, mismatch);
45 |   }
46 |   // Fill in the right column. This is events from sequence 1 aligning after
47 |   // the end of sequence 2.
48 |   for (int i = 1; i < len1; ++i) {
49 |     int32_t mismatch = subMatrix(sequence1[i - 1], sequence2[len2 - 1]);
50 |     processNode(i, len2, openGapx, extendGapx, endGapy, endGapy, mismatch);
51 |   }
52 |   // Fill in the top-right node.
53 |   int32_t mismatch = subMatrix(sequence1[len1 - 1], sequence2[len2 - 1]);
54 |   processNode(len1, len2, endGapx, endGapx, endGapy, endGapy, mismatch);
55 |   backtrace(alignment);
56 |   return TripleMax(diagScores(len1, len2), upScores(len1, len2), rightScores(len1, len2));
57 | }
58 | 
59 | 
60 | void NWAlign::backtrace(vector<AlignPos>& alignment) {
61 |   alignment.clear();
62 |   size_t i = diagScores.size1() - 1, j = diagScores.size2() - 1;
63 |   while (i > 0 || j > 0) {
64 |     int dir = TripleMaxIndex(diagScores(i, j), upScores(i, j), rightScores(i, j));
65 |     switch(dir) {
66 |     case 0:
67 |       alignment.push_back(AlignPos(--i, --j));
68 |       break;
69 |     case 1:
70 |       alignment.push_back(AlignPos(--i, -1));
71 |       break;
72 |     case 2:
73 |       alignment.push_back(AlignPos(-1, --j));
74 |       break;
75 |     default:
76 |       throw runtime_error("Error: Invalid result in backtrace.");
77 |     }
78 |   }
79 |   reverse(alignment.begin(), alignment.end());
80 | }
81 | 
82 | 
83 | } /* namespace PairAlign */
84 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/pair_align/nw_align.h:
--------------------------------------------------------------------------------
 1 | #ifndef NW_ALIGN_H
 2 | #define NW_ALIGN_H
 3 | 
 4 | #include <pair_align.h>
 5 | 
 6 | namespace PairAlign {
 7 | 
 8 | 
 9 | /** Needleman-Wunsch implementation supporting gap-extension.
10 |  *  Note that this implementation is optimized for speed, but
11 |  *  is quadratic in memory. For aligning long sequences use
12 |  *  the Myers-Miller implementation instead.
13 |  */
14 | class NWAlign : public Aligner {
15 | private:
16 |   const ublas::matrix<int32_t>& subMatrix;
17 |   ublas::matrix<int32_t> diagScores;
18 |   ublas::matrix<int32_t> upScores;
19 |   ublas::matrix<int32_t> rightScores;
20 |   int32_t startGapy, endGapy, openGapy, extendGapy;
21 |   int32_t startGapx, endGapx, openGapx, extendGapx;
22 | 
23 |   void processNode(int i, int j, int32_t ogx, int32_t egx, int32_t ogy, int32_t egy, int32_t m) {
24 |     // Find the best diagonal movement score.
25 |     int32_t score = TripleMax(diagScores(i - 1, j - 1), rightScores(i - 1, j - 1), upScores(i - 1, j - 1));
26 |     // Find the best upward movement score.
27 |     int32_t upScore1 = diagScores(i - 1, j) + ogy;
28 |     int32_t upScore2 = rightScores(i - 1, j) + ogy;
29 |     int32_t upScore3 = upScores(i - 1, j) + egy;
30 |     // Find the best right movement score.
31 |     int32_t rightScore1 = diagScores(i, j - 1) + ogx;
32 |     int32_t rightScore2 = upScores(i, j - 1) + ogx;
33 |     int32_t rightScore3 = rightScores(i, j - 1) + egx;
34 |     diagScores(i, j) = score + m;
35 |     upScores(i, j) = TripleMax(upScore1, upScore2, upScore3);
36 |     rightScores(i, j) = TripleMax(rightScore1, rightScore2, rightScore3);
37 |   }
38 | 
39 |   void backtrace(std::vector<AlignPos>& alignment);
40 | 
41 | public:
42 |   /** Constructor.
43 |    *  @param[in] subMat Substitution matrix. Note this stores a reference. Beware of lifetime.
44 |    *  @param[in] gaps Vector of gap penalties (length 8).
45 |    *
46 |    *  The 8 values in the gap penalty vector should be as follows:
47 |    *    start_gap1  Penalty for aligning sequence 1 to a gap before sequence 2.
48 |    *    end_gap1    Penalty for aligning sequence 1 to a gap after sequence 2.
49 |    *    open_gap1   Penalty for aligning sequence 1 to a new gap within sequence 1.
50 |    *    extend_gap1 Penalty for extending a gap within sequence 2.
51 |    *    start_gap2  Penalty for aligning sequence 2 to a gap before sequence 1.
52 |    *    end_gap2    Penalty for aligning sequence 2 to a gap after sequence 1.
53 |    *    open_gap2   Penalty for aligning sequence 2 to a new gap within sequence 1.
54 |    *    extend_gap2 Penalty for extending a gap within sequence 1.
55 |    */
56 |   NWAlign(const ublas::matrix<int32_t>& subMat, const std::vector<int>& gaps) : subMatrix(subMat) {
57 |     startGapx = -gaps[0];
58 |     endGapx = -gaps[1];
59 |     openGapx = -gaps[2];
60 |     extendGapx = -gaps[3];
61 |     startGapy = -gaps[4];
62 |     endGapy = -gaps[5];
63 |     openGapy = -gaps[6];
64 |     extendGapy = -gaps[7];
65 |   }
66 | 
67 |   /** Align two sequences.
68 |    *  @param[in] sequence1 First sequence of states.
69 |    *  @param[in] sequence2 Second sequence of states.
70 |    *  @param[out] alignment Vector to hold the resulting alignment.
71 |    *  @return The alignment score. This is not normalized.
72 |    *
73 |    *  The alignment object will contain one entry per alignment position. Any contents it
74 |    *  had before the call will be lost. Each entry contains the indexes of the two sequence
75 |    *  elements that align to that position. If one sequence has aligned to a gap at that
76 |    *  position, the value for the other sequence will be -1.
77 |    *
78 |    *  Note that the two sequences must contain only values from 0 to n-1, where n is the
79 |    *  size of the nxn substitution matrix.
80 |    */
81 |   int32_t Align(const std::vector<int>& sequence1, const std::vector<int>& sequence2,
82 |                 std::vector<AlignPos>& alignment);
83 | };
84 | 
85 | } /* namespace PairAlign */
86 | 
87 | 
88 | #endif /* NW_ALIGN_H */
89 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/pair_align/pair_align.h:
--------------------------------------------------------------------------------
 1 | #ifndef PAIR_ALIGN_H
 2 | #define PAIR_ALIGN_H
 3 | 
 4 | #include <stdint.h>
 5 | #include <iostream>
 6 | #include <vector>
 7 | #include <boost/numeric/ublas/matrix.hpp>
 8 | 
 9 | namespace ublas = boost::numeric::ublas;
10 | 
11 | /// Namespace for pairwise alignment code.
12 | namespace PairAlign {
13 | 
14 | /// Magic number representing log of zero (-INF).
15 | static const int32_t ZERO_PROB_SCORE = -1000000000;
16 | 
17 | /// Helper function for the max of three values.
18 | inline int32_t TripleMax(int32_t a, int32_t b, int32_t c) {
19 |   return (a >= b) ? ((a >= c) ? a : c) : ((b >= c) ? b : c);
20 | }
21 | 
22 | /// Helper function for the index of the max of three values.
23 | inline int TripleMaxIndex(int32_t a, int32_t b, int32_t c) {
24 |   return (a >= b) ? ((a >= c) ? 0 : 2) : ((b >= c) ? 1 : 2);
25 | }
26 | 
27 | /// Helper struct for representing a position in an alignment.
28 | struct AlignPos {
29 |   int Pos1;
30 |   int Pos2;
31 |   /// Constructor.
32 |   AlignPos(int p1 = 0, int p2 = 0) : Pos1(p1), Pos2(p2) {}
33 |   /// Comparison operator. Sorts by first pos, then by second.
34 |   bool operator<(const AlignPos& rhs) const {
35 |     if (Pos1 == -1 || rhs.Pos1 == -1 || Pos1 == rhs.Pos1) {
36 |       return Pos2 < rhs.Pos2;
37 |     }
38 |     return Pos1 < rhs.Pos1;
39 |   }
40 | };
41 | 
42 | 
43 | /// Abstract baseclass for pairwise alignment.
44 | class Aligner {
45 | public:
46 |   /// Destructor.
47 |   virtual ~Aligner() {}
48 | 
49 |   /** Align two sequences.
50 |    *  @param[in] sequence1 First sequence of states.
51 |    *  @param[in] sequence2 Second sequence of states.
52 |    *  @param[out] alignment Vector to hold the resulting alignment.
53 |    *  @return The alignment score. This is not normalized.
54 |    *
55 |    *  The alignment object will contain one entry per alignment position. Any contents it
56 |    *  had before the call will be lost. Each entry contains the indexes of the two sequence
57 |    *  elements that align to that position. If one sequence has aligned to a gap at that
58 |    *  position, the value for the other sequence will be -1.
59 |    *
60 |    *  Note that the two sequences must contain only values from 0 to n-1, where n is the
61 |    *  size of the nxn substitution matrix.
62 |    */
63 |   virtual int32_t Align(const std::vector<int>& sequence1, const std::vector<int>& sequence2,
64 |                         std::vector<AlignPos>& alignment) = 0;
65 | };
66 | 
67 | 
68 | } /* namespace PairAlign */
69 | 
70 | 
71 | #endif /* PAIR_ALIGN_H */
72 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/pair_align/pair_align_py.cpp:
--------------------------------------------------------------------------------
 1 | #include <pair_align_py.h>
 2 | #include <nw_align.h>
 3 | #include <mm_align.h>
 4 | 
 5 | using namespace std;
 6 | using ublas::matrix;
 7 | 
 8 | 
 9 | template <class T>
10 | void list_to_vector(bp::list& in, vector<T>& out) {
11 |   out.clear();
12 |   int count = bp::len(in);
13 |   out.resize(count);
14 |   for (int i = 0; i < count; ++i) {
15 |     out[i] = bp::extract<T>(in[i]);
16 |   }
17 | }
18 | 
19 | template <class T>
20 | void list_to_matrix(bp::list& in, matrix<T>& out) {
21 |   out.clear();
22 |   int nrows = bp::len(in);
23 |   int ncols = bp::len(bp::extract<bp::list>(in[0]));
24 |   out.resize(nrows, ncols);
25 |   for (int i = 0; i < nrows; ++i) {
26 |     bp::list row = bp::extract<bp::list>(in[i]);
27 |     if (bp::len(row) != ncols) {
28 |       throw runtime_error("Error: Not all columns are the same length.");
29 |     }
30 |     for (int j = 0; j < ncols; ++j) {
31 |       out(i, j) = bp::extract<T>(row[j]);
32 |     }
33 |   }
34 | }
35 | 
36 | 
37 | PairAlign_Py::PairAlign_Py(bp::list& subMat, bp::list& gapPen, bool lowmem) {
38 |   list_to_matrix(subMat, subMatrix);
39 |   vector<int> gapPenalties;
40 |   list_to_vector(gapPen, gapPenalties);
41 |   if (lowmem) {
42 |     aligner = boost::shared_ptr<PairAlign::Aligner>(new PairAlign::MMAlign(subMatrix, gapPenalties));
43 |   }
44 |   else {
45 |     aligner = boost::shared_ptr<PairAlign::Aligner>(new PairAlign::NWAlign(subMatrix, gapPenalties));
46 |   }
47 | }
48 | 
49 | 
50 | bp::tuple PairAlign_Py::Align(bp::list& sequence1, bp::list& sequence2) {
51 |   vector<PairAlign::AlignPos> alignVec;
52 |   vector<int> seq1, seq2;
53 |   list_to_vector(sequence1, seq1);
54 |   list_to_vector(sequence2, seq2);
55 |   int32_t score = aligner->Align(seq1, seq2, alignVec);
56 |   bp::list alignment;
57 |   for (size_t i = 0; i < alignVec.size(); ++i) {
58 |     alignment.append(bp::make_tuple(alignVec[i].Pos1, alignVec[i].Pos2));
59 |   }
60 |   return bp::make_tuple(alignment, score);
61 | }
62 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/pair_align/pair_align_py.h:
--------------------------------------------------------------------------------
 1 | #ifndef PAIR_ALIGN_PY_H
 2 | #define PAIR_ALIGN_PY_H
 3 | 
 4 | #include <boost/python.hpp>
 5 | #include <boost/shared_ptr.hpp>
 6 | #include <pair_align.h>
 7 | 
 8 | 
 9 | namespace bp = boost::python;
10 | namespace ublas = boost::numeric::ublas;
11 | 
12 | 
13 | /// Boost-Python wrapper class for pairwise alignment classes.
14 | class PairAlign_Py {
15 | private:
16 |   ublas::matrix<int32_t> subMatrix;
17 |   boost::shared_ptr<PairAlign::Aligner> aligner;
18 | 
19 | public:
20 |   /** Constructor
21 |    *  @param[in] subMat Substitution matrix. List of lists.
22 |    *  @param[in] gapPenalties List of gap penalties (length 8).
23 |    *  @param[in] lowmem Flag indicating whether to use the faster Neddleman-Wunsch
24 |    *             implementation or the slower linear-memory Myers-Miller
25 |    *             implementation.
26 |    *
27 |    *  The 8 values in the gap penalty list should be as follows:
28 |    *    start_gap1  Penalty for aligning to a gap before sequence 1.
29 |    *    end_gap1    Penalty for aligning to a gap after sequence 1.
30 |    *    open_gap1   Penalty for opening a gap within sequence 1.
31 |    *    extend_gap1 Penalty for extending a gap within sequence 1.
32 |    *    start_gap2  Penalty for aligning to a gap before sequence 2.
33 |    *    end_gap2    Penalty for aligning to a gap after sequence 2.
34 |    *    open_gap2   Penalty for opening a gap within sequence 2.
35 |    *    extend_gap2 Penalty for extending a gap within sequence 2.
36 |    */
37 |   PairAlign_Py(bp::list& subMat, bp::list& gapPenalties, bool lowmem);
38 | 
39 |   /** Align two sequences.
40 |    *  @param[in] sequence1 First sequence of states.
41 |    *  @param[in] sequence2 Second sequence of states.
42 |    *  @return Tuple containing a list holding the resulting alignment
43 |    *      and an alignment score. This is not normalized.
44 |    *
45 |    *  The alignment object will contain one entry per alignment position. Each entry
46 |    *  is a tuple containing the indexes of the two sequence elements that align to that
47 |    *  position. If one sequence has aligned to a gap at that position, the value for
48 |    *  the other sequence will be -1.
49 |    *
50 |    *  Note that the two sequences must contain only values from 0 to n-1, where n is the
51 |    *  size of the nxn substitution matrix.
52 |    */
53 |   bp::tuple Align(bp::list& sequence1, bp::list& sequence2);
54 | };
55 | 
56 | 
57 | 
58 | /// Python class wrapper.
59 | BOOST_PYTHON_MODULE(pair_align) {
60 |   bp::class_<PairAlign_Py>("Aligner", bp::init<bp::list&, bp::list&, bool>(bp::args("sub_matrix", "gap_penalties", "lowmem")))
61 |     .def("align", &PairAlign_Py::Align);
62 | }
63 | 
64 | 
65 | #endif /* PAIR_ALIGN_PY_H */
66 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/viterbi_2d/viterbi_2d.h:
--------------------------------------------------------------------------------
  1 | #ifndef VITERBI_2D_H
  2 | #define VITERBI_2D_H
  3 | 
  4 | #include <vector>
  5 | #include <string>
  6 | #include <stdint.h>
  7 | #include <boost/numeric/ublas/matrix.hpp>
  8 | #include <data_view.h>
  9 | 
 10 | namespace ublas = boost::numeric::ublas;
 11 | 
 12 | static const int8_t MOVE_DIAG  = 0;
 13 | static const int8_t MOVE_RIGHT = 1;
 14 | static const int8_t MOVE_UP    = 2;
 15 | static const int8_t MOVE_UNDEF = 3;
 16 | static const int32_t ZERO_PROB_SCORE = -1000000000;
 17 | static const double MIN_EMISSION_SCORE = -20.0;
 18 | 
 19 | 
 20 | inline double square(double x) {
 21 |   return x * x;
 22 | }
 23 | 
 24 | inline int32_t prob2score(double x) {
 25 |   if (x < 0.0000000001) return -2400;
 26 |   return int32_t(100.0 * log(x));
 27 | }
 28 | 
 29 | 
 30 | /// Helper class representing a node in the HMM.
 31 | struct Node {
 32 |   int32_t index1;                     ///< Index of event from first sequence.
 33 |   int32_t index2;                     ///< Index of event from second sequence.
 34 |   int32_t leftIndex;                  ///< Index of node to the left of this one.
 35 |   int32_t downIndex;                  ///< Index of node below this one.
 36 |   int32_t diagIndex;                  ///< Index of node diagonal to this one.
 37 |   std::vector<int16_t> statePointers; ///< Viterbi backtrace pointers.
 38 |   std::vector<int8_t> dirPointers;    ///< NW alignment backtrace pointers.
 39 | 
 40 |   /** Initialize node.
 41 |    *  @param[in] i Index of event from first sequence.
 42 |    *  @param[in] j index of event from second sequence.
 43 |    *  @param[in] left Index of node to the left of this one.
 44 |    *  @param[in] down Index of node below this one.
 45 |    *  @param[in] diag Index of node diagonal to this one.
 46 |    *  @param[in] states Number of states in the HMM.
 47 |    */
 48 |   void Init(int i, int j, int left, int down, int diag, int states) {
 49 |     index1 = i;
 50 |     index2 = j;
 51 |     leftIndex = left;
 52 |     downIndex = down;
 53 |     diagIndex = diag;
 54 |     statePointers.resize(states);
 55 |     dirPointers.resize(states);
 56 |   }
 57 | };
 58 | 
 59 | 
 60 | /** Helper class for emission scores.
 61 |  *
 62 |  *  This class provides normal level emissions and gamma distributed noise emissions.
 63 |  *  Note that other emission objects can be substituted by changing the Emission typedef
 64 |  *  immediately following this class definition.
 65 |  */
 66 | class DefaultEmission {
 67 | private:
 68 |   std::vector<double> levels;
 69 |   std::vector<double> noises;
 70 |   std::vector<double> logNoises;
 71 |   std::vector<double> stayWeights;
 72 |   std::vector<double> emWeights;
 73 |   std::vector<double> modelLevels;
 74 |   std::vector<double> modelNoises;
 75 |   std::vector<double> offsets;
 76 |   std::vector<double> levelScales;
 77 |   std::vector<double> noiseScales;
 78 |   std::vector<double> noiseShapes;
 79 |   int numEvents;
 80 |   int numStates;
 81 |   bool useNoise;
 82 | 
 83 | public:
 84 |   /** Constructor.
 85 |    *  @param[in] mdlLevels Model current levels.
 86 |    *  @param[in] mdlLevelSpreads Spreads of model current levels.
 87 |    *  @params[in] mdlNoises Model noise levels.
 88 |    *  @param[in] mdlNoiseSpreads Spreads of model noise levels.
 89 |    *  @param[in] useSd Flag to specify whether to use noise levels in the basecall.
 90 |    */
 91 |   DefaultEmission(const std::vector<double>& mdlLevels, const std::vector<double>& mdlLevelSpreads,
 92 |                   const std::vector<double>& mdlNoises, const std::vector<double>& mdlNoiseSpreads,
 93 |                   bool useSd);
 94 | 
 95 |   /** Assign events to the object with vectors.
 96 |    *  @param[in] means Event current levels.
 97 |    *  @param[in] stdvs Event noise levels.
 98 |    *  @param[in] stayWts Event weights for modifying stay probabilities.
 99 |    *  @param[in] emWts Event weights for modifying emission probabilities.
100 |    */
101 |   void SetEvents(const std::vector<double>& means, const std::vector<double>& stdvs,
102 |                  const std::vector<double>& stayWts, const std::vector<double>& emWts);
103 | 
104 |   /// Set the number of events (for when SetEvents() will not be called.
105 |   void SetNEvents(int n) {numEvents = n;}
106 | 
107 |   /// Returns the number of events.
108 |   int NumEvents() const {return numEvents;}
109 | 
110 |   /// Returns the number of model states.
111 |   int NumStates() const {return numStates;}
112 | 
113 |   /// Returns the model levels.
114 |   const std::vector<double> GetModelLevels() const {return modelLevels;}
115 | 
116 |   /// Returns the stay weights.
117 |   const std::vector<double> GetStayWeights() const {return stayWeights;}
118 | 
119 |   /// Returns the score for event i and state j.
120 |   int32_t Score(int i, int j) const {
121 |     double score = offsets[j] + levelScales[j] * square(levels[i] - modelLevels[j]);
122 |     if (useNoise) score += (noiseShapes[j] - 1.0) * logNoises[i] - noiseScales[j] * noises[i];
123 |     return int32_t(emWeights[i] * std::max(MIN_EMISSION_SCORE, score));
124 |   }
125 | };
126 | 
127 | 
128 | typedef DefaultEmission Emission;
129 | typedef std::vector<std::pair<int32_t, int32_t> > Alignment;
130 | 
131 | 
132 | /// Worker class for performing 2D Viterbi basecall.
133 | class Viterbi2D {
134 | private:
135 |   std::vector<Node> nodes;                         // All HMM nodes, in the order they should be processed.
136 |   int32_t baseStay[3];                             // Stay scores for each direction.
137 |   int32_t baseStep[3];                             // Step scores for each direction.
138 |   int32_t baseSkip[3];                             // Skip scores for each direction.
139 |   ublas::matrix<int32_t> emScore1;                 // Pre-computed emissions for sequence 1.
140 |   ublas::matrix<int32_t> emScore2;                 // Pre-computed emissions for sequence 2.
141 |   ublas::matrix<int32_t> viterbiScore;             // Viterbi scores. Length of sequence 1 by number of states.
142 |   ublas::matrix<int32_t> lastScore;                // Viterbi scores for previous event from sequence 2.
143 |   int numStates;                                   // Number of states in the HMM.
144 |   int numNodes;                                    // Total number of nodes to be processed.
145 |   int numEvents1;                                  // Number of events in sequence 1.
146 |   int numEvents2;                                  // Number of events in sequence 2.
147 | 
148 |   void initNodes(const std::vector<int32_t>& bandStarts, const std::vector<int32_t>& bandEnds);
149 |   void processNodes(const std::vector<double>& wts1, const std::vector<double>& wts2);
150 |   void backTrace(Alignment& alignment, std::vector<int16_t>& states);
151 | 
152 | public:
153 |   /** Constructor.
154 |    *  @param[in] maxNodes The maximum number of nodes to support.
155 |    *  @param[in] maxLen The maximum number of events to support for either sequence.
156 |    *  @param[in] states The number of states in the HMM.
157 |    *  @param[in] trans The six transition probabilities (stay1, step1, skip1, stay2, step2, skip2).
158 |    */
159 |   Viterbi2D(int maxNodes, int maxLen, int states, const std::vector<double>& trans);
160 | 
161 |   /** Perform the basecall with emission objects.
162 |    *  @param[in] data1 Emission object for sequence 1.
163 |    *  @param[in] data2 Emission object for sequence 2.
164 |    *  @param[in] bandStarts For each event in sequence 2, the first candidate position in sequence 1.
165 |    *  @param[in] bandEnds For each event in sequence 2, the last candidate position in sequence 1.
166 |    *  @param[in] priors The prior scores for the "before alignment" node. All zeros means no prior.
167 |    *  @param[out] alignment The final alignment of events.
168 |    *  @param[out] states The final basecalled states.
169 |    */
170 |   void Call(const Emission& data1, const Emission& data2, const std::vector<int32_t>& bandStarts,
171 |             const std::vector<int32_t>& bandEnds, const std::vector<int32_t>& priors,
172 |             Alignment& alignment, std::vector<int16_t>& states);
173 | 
174 |   /** Perform the basecall with precomputed emissions.
175 |    *  @param[in] data1 Precomputed emissions for sequence 1.
176 |    *  @param[in] data2 Precomputed emissions for sequence 2.
177 |    *  @param[in] stayWt1 Stay weights for sequence 1.
178 |    *  @param[in] stayWt2 Stay weights for sequence 2.
179 |    *  @param[in] bandStarts For each event in sequence 2, the first candidate position in sequence 1.
180 |    *  @param[in] bandEnds For each event in sequence 2, the last candidate position in sequence 1.
181 |    *  @param[in] priors The prior scores for the "before alignment" node. All zeros means no prior.
182 |    *  @param[out] alignment The final alignment of events.
183 |    *  @param[out] states The final basecalled states.
184 |    */
185 |   void Call(const MatView<float>& data1, const MatView<float>& data2,
186 |             const VecView<double>& stayWt1, const VecView<double>& stayWt2,
187 |             const std::vector<int32_t>& bandStarts, const std::vector<int32_t>& bandEnds,
188 |             const std::vector<int32_t>& priors, Alignment& alignment, std::vector<int16_t>& states);
189 | };
190 | 
191 | 
192 | #endif /* VITERBI_2D */
193 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/viterbi_2d/viterbi_2d_py.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <viterbi_2d_py.h>
  3 | 
  4 | 
  5 | using namespace std;
  6 | using ublas::matrix;
  7 | 
  8 | 
  9 | Viterbi2D_Py::Viterbi2D_Py(bp::dict& stateInfo, bp::dict& params) {
 10 |   bandSize = bp::extract<int>(params["band_size"]);
 11 |   int kmerLen = bp::extract<int>(params["kmer_len"]);
 12 |   setupKmers(kmerLen);
 13 |   bool rc = bp::extract<bool>(params["seq2_is_rc"]);
 14 |   useNoise = bp::extract<bool>(params["use_sd"]);
 15 |   if (stateInfo.has_key(string("kmers"))) {
 16 |     bp::list kmers = bp::extract<bp::list>(stateInfo["kmers"]);
 17 |     emission1 = dummyEmission(kmers);
 18 |     emission2 = dummyEmission(kmers);
 19 |   }
 20 |   else {
 21 |     bp::dict model1 = bp::extract<bp::dict>(stateInfo["model1"]);
 22 |     bp::dict model2 = bp::extract<bp::dict>(stateInfo["model2"]);
 23 |     emission1 = makeEmission(model1, false);
 24 |     emission2 = makeEmission(model2, rc);
 25 |   }
 26 |   int numStates = emission1->NumStates();
 27 |   int maxNodes = bp::extract<int>(params["max_nodes"]);
 28 |   int maxLen = bp::extract<int>(params["max_len"]);
 29 |   vector<double> trans(6);
 30 |   trans[0] = bp::extract<double>(params["stay1"]);
 31 |   trans[1] = bp::extract<double>(params["step1"]);
 32 |   trans[2] = bp::extract<double>(params["skip1"]);
 33 |   trans[3] = bp::extract<double>(params["stay2"]);
 34 |   trans[4] = bp::extract<double>(params["step2"]);
 35 |   trans[5] = bp::extract<double>(params["skip2"]);
 36 |   viterbi = boost::shared_ptr<Viterbi2D>(new Viterbi2D(maxNodes, maxLen, numStates, trans));
 37 | }
 38 | 
 39 | 
 40 | bp::list Viterbi2D_Py::GetKmerList() const {
 41 |   bp::list kmerList;
 42 |   for (size_t i = 0; i < kmers.size(); ++i) {
 43 |     kmerList.append(kmers[i]);
 44 |   }
 45 |   return kmerList;
 46 | }
 47 | 
 48 | 
 49 | bp::numeric::array Viterbi2D_Py::GetModelLevels1() const {
 50 |   const vector<double> levelVec = emission1->GetModelLevels();
 51 |   bp::numeric::array result = new_numpy_1d<double>(levelVec.size());
 52 |   VecView<double> data = view_1d_array<double>(result);
 53 |   for (size_t i = 0; i < levelVec.size(); ++i) {
 54 |     data[i] = levelVec[i];
 55 |   }
 56 |   return result;
 57 | }
 58 | 
 59 | 
 60 | bp::numeric::array Viterbi2D_Py::GetModelLevels2() const {
 61 |   const vector<double> levelVec = emission2->GetModelLevels();
 62 |   bp::numeric::array result = new_numpy_1d<double>(levelVec.size());
 63 |   VecView<double> data = view_1d_array<double>(result);
 64 |   for (size_t i = 0; i < levelVec.size(); ++i) {
 65 |     data[i] = levelVec[i];
 66 |   }
 67 |   return result;
 68 | }
 69 | 
 70 | 
 71 | bp::dict Viterbi2D_Py::Call(bp::dict& events1, bp::dict& events2, bp::list& alignment, bp::object& prior) {
 72 |   vector<double> means1, stdvs1, stwts1, emwts1, means2, stdvs2, stwts2, emwts2;
 73 |   getEvents(events1, means1, stdvs1, stwts1, emwts1);
 74 |   getEvents(events2, means2, stdvs2, stwts2, emwts2);
 75 |   emission1->SetEvents(means1, stdvs1, stwts1, emwts1);
 76 |   emission2->SetEvents(means2, stdvs2, stwts2, emwts2);
 77 |   Alignment alignIn;
 78 |   alignIn = list_to_pair_vector<int32_t>(alignment);
 79 |   vector<int32_t> bandStarts, bandEnds;
 80 |   makeBands(alignIn, bandStarts, bandEnds);
 81 |   vector<int32_t> priorScores(emission1->NumStates());
 82 |   if (prior) {
 83 |     fill(priorScores.begin(), priorScores.end(), ZERO_PROB_SCORE);
 84 |     int state = states[bp::extract<string>(prior)];
 85 |     priorScores[state] = 0;
 86 |   }
 87 |   Alignment alignOut;
 88 |   vector<int16_t> statesOut;
 89 |   viterbi->Call(*emission1, *emission2, bandStarts, bandEnds, priorScores, alignOut, statesOut);
 90 |   return makeResult(alignOut, statesOut);
 91 | }
 92 | 
 93 | 
 94 | bp::dict Viterbi2D_Py::CallPost(bp::numeric::array& post1, bp::numeric::array& post2,
 95 |                                 bp::numeric::array& stayWt1, bp::numeric::array& stayWt2,
 96 |                                 bp::list& alignment, bp::object& prior) {
 97 |   MatView<float> probs1 = view_2d_array<float>(post1);
 98 |   MatView<float> probs2 = view_2d_array<float>(post2);
 99 |   VecView<double> stayWeight1 = view_1d_array<double>(stayWt1);
100 |   VecView<double> stayWeight2 = view_1d_array<double>(stayWt2);
101 |   int numStates = int(probs1.size2());
102 |   emission1->SetNEvents(int(probs1.size1()));
103 |   emission2->SetNEvents(int(probs2.size1()));
104 |   Alignment alignIn;
105 |   alignIn = list_to_pair_vector<int32_t>(alignment);
106 |   vector<int32_t> bandStarts, bandEnds;
107 |   makeBands(alignIn, bandStarts, bandEnds);
108 |   vector<int32_t> priorScores(numStates);
109 |   if (prior) {
110 |     fill(priorScores.begin(), priorScores.end(), ZERO_PROB_SCORE);
111 |     int state = states[bp::extract<string>(prior)];
112 |     priorScores[state] = 0;
113 |   }
114 |   Alignment alignOut;
115 |   vector<int16_t> statesOut;
116 |   viterbi->Call(probs1, probs2, stayWeight1, stayWeight2, bandStarts, bandEnds, priorScores, alignOut, statesOut);
117 |   return makeResult(alignOut, statesOut);
118 | }
119 | 
120 | 
121 | void Viterbi2D_Py::setupKmers(int kmerLen) {
122 |   int numKmers = 1 << (kmerLen << 1);
123 |   const char letters[] = "ACGT";
124 |   kmers.resize(numKmers);
125 |   states.clear();
126 |   vector<int> pos(kmerLen);
127 |   for (int i = 0; i < numKmers; ++i) {
128 |     string kmer;
129 |     for (int j = 0; j < kmerLen; ++j) {
130 |       kmer += letters[pos[kmerLen - j - 1]];
131 |     }
132 |     kmers[i] = kmer;
133 |     states[kmer] = i;
134 |     bool flag = true;
135 |     int digit = 0;
136 |     while (flag) {
137 |       ++pos[digit];
138 |       if (pos[digit] == 4) {
139 |         pos[digit] = 0;
140 |         ++digit;
141 |         if (digit == kmerLen) {
142 |           flag = false;
143 |         }
144 |       }
145 |       else {
146 |         flag = false;
147 |       }
148 |     }
149 |   }
150 | }
151 | 
152 | 
153 | boost::shared_ptr<Emission> Viterbi2D_Py::makeEmission(bp::dict& model, bool rc) {
154 |   bp::numeric::array levelMean = bp::extract<bp::numeric::array>(model.get("level_mean"));
155 |   bp::numeric::array levelStdv = bp::extract<bp::numeric::array>(model.get("level_stdv"));
156 |   bp::numeric::array sdMean = bp::extract<bp::numeric::array>(model.get("sd_mean"));
157 |   bp::numeric::array sdStdv = bp::extract<bp::numeric::array>(model.get("sd_stdv"));
158 |   bp::list kmer = bp::extract<bp::list>(model.get("kmer"));
159 |   VecView<double> mean = view_1d_array<double>(levelMean);
160 |   VecView<double> sigma = view_1d_array<double>(levelStdv);
161 |   VecView<double> noise = view_1d_array<double>(sdMean);
162 |   VecView<double> noiseSd = view_1d_array<double>(sdStdv);
163 |   int numStates = int(mean.size());
164 |   vector<double> levels(numStates), levelSpreads(numStates), noises(numStates), noiseSpreads(numStates);
165 |   copy(mean.begin(), mean.end(), levels.begin());
166 |   copy(sigma.begin(), sigma.end(), levelSpreads.begin());
167 |   copy(noise.begin(), noise.end(), noises.begin());
168 |   copy(noiseSd.begin(), noiseSd.end(), noiseSpreads.begin());
169 |   vector<string> mdlKmers = list_to_vector<string>(kmer);
170 |   sortModel(levels, levelSpreads, noises, noiseSpreads, mdlKmers, rc);
171 |   return boost::shared_ptr<Emission>(new Emission(levels, levelSpreads, noises, noiseSpreads, useNoise));
172 | }
173 | 
174 | 
175 | boost::shared_ptr<Emission> Viterbi2D_Py::dummyEmission(bp::list& kmers) {
176 |   int numStates = bp::len(kmers);
177 |   vector<double> levels(numStates), levelSpreads(numStates), noises(numStates), noiseSpreads(numStates);
178 |   return boost::shared_ptr<Emission>(new Emission(levels, levelSpreads, noises, noiseSpreads, useNoise));
179 | }
180 | 
181 | 
182 | void Viterbi2D_Py::sortModel(vector<double>& levels, vector<double>& levelSpreads, vector<double>& noises,
183 |                              vector<double>& noiseSpreads, vector<string>& mdlKmers, bool rc) {
184 |   int numKmers = int(levels.size());
185 |   vector<double> newLvl(numKmers), newLvlSprd(numKmers), newSd(numKmers), newSdSprd(numKmers);
186 |   vector<string> newKmer(numKmers);
187 |   map<char, char> rcMap;
188 |   rcMap['A'] = 'T';
189 |   rcMap['C'] = 'G';
190 |   rcMap['G'] = 'C';
191 |   rcMap['T'] = 'A';
192 |   for (int i = 0; i < numKmers; ++i) {
193 |     string kmer = mdlKmers[i];
194 |     if (rc) {
195 |       reverse(kmer.begin(), kmer.end());
196 |       for (string::iterator p = kmer.begin(); p < kmer.end(); ++p) {
197 |         *p = rcMap[*p];
198 |       }
199 |     }
200 |     int pos = states[kmer];
201 |     newLvl[pos] = levels[i];
202 |     newLvlSprd[pos] = levelSpreads[i];
203 |     newSd[pos] = noises[i];
204 |     newSdSprd[pos] = noiseSpreads[i];
205 |     newKmer[pos] = mdlKmers[i];
206 |   }
207 |   levels.swap(newLvl);
208 |   levelSpreads.swap(newLvlSprd);
209 |   noises.swap(newSd);
210 |   noiseSpreads.swap(newSdSprd);
211 |   mdlKmers.swap(newKmer);
212 | }
213 | 
214 | 
215 | void Viterbi2D_Py::getEvents(bp::dict& events, vector<double>& means, vector<double>& stdvs,
216 |                              vector<double>& stayWts, vector<double>& emWts) {
217 |   bp::numeric::array mean = bp::extract<bp::numeric::array>(events.get("mean"));
218 |   bp::numeric::array stdv = bp::extract<bp::numeric::array>(events.get("stdv"));
219 |   bp::numeric::array stayWeight = bp::extract<bp::numeric::array>(events.get("stay_weight"));
220 |   bp::numeric::array emWeight = bp::extract<bp::numeric::array>(events.get("em_weight"));
221 |   VecView<double> meanV = view_1d_array<double>(mean);
222 |   VecView<double> stdvV = view_1d_array<double>(stdv);
223 |   VecView<double> stwtV = view_1d_array<double>(stayWeight);
224 |   VecView<double> emwtV = view_1d_array<double>(emWeight);
225 |   int numEvents = int(meanV.size());
226 |   means.resize(numEvents);
227 |   stdvs.resize(numEvents);
228 |   stayWts.resize(numEvents);
229 |   emWts.resize(numEvents);
230 |   copy(meanV.begin(), meanV.end(), means.begin());
231 |   copy(stdvV.begin(), stdvV.end(), stdvs.begin());
232 |   copy(stwtV.begin(), stwtV.end(), stayWts.begin());
233 |   copy(emwtV.begin(), emwtV.end(), emWts.begin());
234 | 
235 | }
236 | 
237 | 
238 | void Viterbi2D_Py::makeBands(const Alignment& alignIn, vector<int32_t>& bandStarts, vector<int32_t>& bandEnds) {
239 |   int numEvents1 = emission1->NumEvents();
240 |   int numEvents2 = emission2->NumEvents();
241 |   bandStarts.resize(numEvents2);
242 |   bandEnds.resize(numEvents2);
243 |   fill(bandStarts.begin(), bandStarts.end(), int32_t(numEvents1 - 1));
244 |   fill(bandEnds.begin(), bandEnds.end(), int32_t(0));
245 |   int lastX = 0, lastY = numEvents2 - 1;
246 |   int nPos = int(alignIn.size());
247 |   for (int p = 0; p < nPos; ++p) {
248 |     int x = (alignIn[p].first == -1) ? lastX : alignIn[p].first;
249 |     int y = (alignIn[p].second == -1) ? lastY : alignIn[p].second;
250 |     for (int k = y - bandSize; k <= y + bandSize; ++k) {
251 |       if (k < 0 || k >= numEvents2) continue;
252 |       int left = min((int32_t)(x - bandSize), bandStarts[k]);
253 |       int right = max((int32_t)(x + bandSize), bandEnds[k]);
254 |       left = max(0, left);
255 |       right = min(numEvents1 - 1, right);
256 |       bandStarts[k] = left;
257 |       bandEnds[k] = right;
258 |     }
259 |     lastX = x;
260 |     lastY = y;
261 |   }
262 | }
263 | 
264 | 
265 | bp::dict Viterbi2D_Py::makeResult(const Alignment& alignOut, const vector<int16_t>& statesOut) {
266 |   bp::list align;
267 |   bp::list kmersOut;
268 |   int count = int(alignOut.size());
269 |   for (int i = 0; i < count; ++i) {
270 |     kmersOut.append(kmers[statesOut[i]]);
271 |     bp::tuple data = bp::make_tuple(alignOut[i].first, alignOut[i].second);
272 |     align.append(data);
273 |   }
274 |   bp::dict results;
275 |   results["alignment"] = align;
276 |   results["kmers"] = kmersOut;
277 |   return results;
278 | }
279 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/viterbi_2d/viterbi_2d_py.h:
--------------------------------------------------------------------------------
 1 | #ifndef VITERBI_2D_PY_H
 2 | #define VITERBI_2D_PY_H
 3 | 
 4 | #include <map>
 5 | #include <boost/python.hpp>
 6 | #include <boost/shared_ptr.hpp>
 7 | #include <bp_tools.h>
 8 | #include <view_numpy_arrays.h>
 9 | #include <viterbi_2d.h>
10 | 
11 | 
12 | namespace bp = boost::python;
13 | 
14 | 
15 | /// Viterbi 2D basecaller python wrapper.
16 | class Viterbi2D_Py {
17 | private:
18 |   boost::shared_ptr<Viterbi2D> viterbi;
19 |   boost::shared_ptr<Emission> emission1;
20 |   boost::shared_ptr<Emission> emission2;
21 |   std::vector<std::string> kmers;
22 |   std::map<std::string, int> states;
23 |   std::map<std::string, std::string> parms;
24 |   int bandSize;
25 |   bool useNoise;
26 | 
27 |   void setupKmers(int kmerLen);
28 |   boost::shared_ptr<Emission> makeEmission(bp::dict& model, bool rc);
29 |   boost::shared_ptr<Emission> dummyEmission(bp::list& kmers);
30 |   void sortModel(std::vector<double>& levels, std::vector<double>& levelSpreads, std::vector<double>& noises,
31 |                  std::vector<double>& noiseSpreads, std::vector<std::string>& mdlKmers, bool rc);
32 |   void getEvents(bp::dict& events, std::vector<double>& means, std::vector<double>& stdvs,
33 |                  std::vector<double>& stayWts, std::vector<double>& emWts);
34 |   void makeBands(const Alignment& alignIn, std::vector<int32_t>& bandStarts, std::vector<int32_t>& bandEnds);
35 |   bp::dict makeResult(const Alignment& alignOut, const std::vector<int16_t>& statesOut);
36 | 
37 | public:
38 |   /** Constructor.
39 |    *  @param[in] stateInfo Dictionary containing state information.
40 |    *  @param[in] params Dictionary of basecalling parameters.
41 |    *
42 |    *  The state information should either contain 'model1' and 'model2'
43 |    *  fields, containing the models for the template and complement data,
44 |    *  or a 'kmers' field containing a list of the kmers (for posterior
45 |    *  calling).
46 |    */
47 |   Viterbi2D_Py(bp::dict& stateInfo, bp::dict& params);
48 | 
49 |   /** Perform the basecall.
50 |    *  @param[in] data1 Event sequence 1.
51 |    *  @param[in] data2 Event sequence 2.
52 |    *  @param[in] alignment Estimated alignment of sequence 1 to sequence 2.
53 |    *  @param[in] prior The prior kmer for the "before alignment" node. None means no prior.
54 |    *  @return Dictionary contain alignment and called kmers.
55 |    */
56 |   bp::dict Call(bp::dict& events1, bp::dict& events2, bp::list& alignment, bp::object& prior);
57 | 
58 |   /** Perform the basecall using posteriors.
59 |    *  @param[in] post1 Posteriors for sequence 1.
60 |    *  @param[in] post2 Posteriors for sequence 2.
61 |    *  @param[in] stayWt1 Stay weights for sequence 1.
62 |    *  @param[in] stayWt2 Stay weights for sequence 2.
63 |    *  @param[in] alignment Estimated alignment of sequence 1 to sequence 2.
64 |    *  @param[in] prior The prior kmer for the "before alignment" node. None means no prior.
65 |    *  @return Dictionary contain alignment and called kmers.
66 |    */
67 |   bp::dict CallPost(bp::numeric::array& post1, bp::numeric::array& post2,
68 |                     bp::numeric::array& stayWt1, bp::numeric::array& stayWt2,
69 |                     bp::list& alignment, bp::object& prior);
70 | 
71 |   /// Get a list of the base transition probabilities.
72 |   bp::list GetTransitionProbs() const;
73 | 
74 |   /// Get a list of kmers in operational order.
75 |   bp::list GetKmerList() const;
76 | 
77 |   /// Get a list of the model levels for the first sequence.
78 |   bp::numeric::array GetModelLevels1() const;
79 | 
80 |   /// Get a list of the model levels for the second sequence.
81 |   bp::numeric::array GetModelLevels2() const;
82 | };
83 | 
84 | /// Python class wrapper.
85 | BOOST_PYTHON_MODULE(viterbi_2d) {
86 |   import_array();
87 |   bp::numeric::array::set_module_and_type("numpy", "ndarray");
88 |   bp::class_<Viterbi2D_Py>("Viterbi2D", bp::init<bp::dict&, bp::dict&>())
89 |     .def("call", &Viterbi2D_Py::Call)
90 |     .def("call_post", &Viterbi2D_Py::CallPost)
91 |     .def("get_kmer_list", &Viterbi2D_Py::GetKmerList)
92 |     .def("get_model_levels1", &Viterbi2D_Py::GetModelLevels1)
93 |     .def("get_model_levels2", &Viterbi2D_Py::GetModelLevels2);
94 |   bp::scope().attr("ZERO_PROB_SCORE") = bp::object(ZERO_PROB_SCORE);
95 | }
96 | 
97 | 
98 | #endif /* VITERBI_2D_PY_H */
99 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/viterbi_2d_ocl/proxyCL.h:
--------------------------------------------------------------------------------
  1 | #ifndef PROXY_CL_H
  2 | #define PROXY_CL_H
  3 | 
  4 | #include <CL/cl.hpp>
  5 | #ifdef max // it is defined in cl.hpp
  6 | #undef max
  7 | #endif //max
  8 | 
  9 | #include <string>
 10 | #include <vector>
 11 | 
 12 | enum vendor
 13 | {
 14 |     amd,
 15 |     intel,
 16 |     nvidia,
 17 |     apple,
 18 |     other
 19 | };
 20 | 
 21 | enum device_type
 22 | {
 23 |     cpu,
 24 |     gpu,
 25 |     all,
 26 |     undefined
 27 | };
 28 | 
 29 | struct device_info
 30 | {
 31 |     size_t id;
 32 |     std::string name;
 33 |     device_type type;
 34 | 
 35 |     bool operator==(const device_info &data)
 36 |     {
 37 |         return id == data.id && name == data.name && type == data.type;
 38 |     }
 39 | };
 40 | 
 41 | struct device_info_ex
 42 | {
 43 |     size_t id;
 44 |     std::string name;
 45 |     device_type type;
 46 |     size_t max_compute_units;
 47 |     size_t max_work_item_dimensions;
 48 |     size_t max_work_group_size;
 49 |     size_t max_work_items_sizes_x;
 50 |     size_t max_work_items_sizes_y;
 51 |     size_t max_work_items_sizes_z;
 52 |     size_t max_clock_frequency;
 53 |     size_t max_parameter_size;
 54 |     size_t global_mem_cache_type;
 55 |     size_t global_mem_cacheline_size;
 56 |     size_t global_mem_cache_size;
 57 |     size_t global_mem_size;
 58 |     size_t max_constant_buffer_size;
 59 |     size_t local_mem_type;
 60 |     size_t local_mem_size;
 61 |     size_t preferred_vector_width_char;
 62 |     size_t preferred_vector_width_short;
 63 |     size_t preferred_vector_width_int;
 64 |     size_t preferred_vector_width_long;
 65 |     size_t preferred_vector_width_float;
 66 | };
 67 | 
 68 | class proxyCL
 69 | {
 70 | public:
 71 |     proxyCL(){};
 72 |     ~proxyCL(){};
 73 | 
 74 |     bool profiling_enabled() const { return enable_profiling_; }
 75 |     void enable_cuda_build_cache(bool enable) const;
 76 | 
 77 |     size_t get_max_global_mem_size() const { return max_global_mem_size_; }
 78 |     size_t get_max_local_mem_size() const { return max_local_mem_size_; }
 79 |     size_t get_max_work_group_size() const { return max_work_group_size_; }
 80 |     size_t get_work_group_size() const { return work_group_size_; }
 81 |     void set_work_group_size(size_t value) { work_group_size_ = value; }
 82 | 
 83 |     std::vector <std::string> available_vendors_str(std::string &error) const;
 84 |     std::vector <std::string> available_vendors_str_ex(std::string &error) const;
 85 |     std::vector <vendor> available_vendors(std::string &error) const;
 86 |     bool select_vendor(const std::string &vendor, std::string &error);
 87 |     bool select_vendor(vendor v, std::string &error);
 88 |     vendor get_selected_vendor() const { return active_vendor_; }
 89 | 
 90 |     bool create_context(device_type type, std::string &error);
 91 |     bool create_context(std::string &error);
 92 | 
 93 |     std::vector <device_info> available_devices(std::string &error) const;
 94 |     bool select_device(size_t id, std::string &error);
 95 |     device_info_ex get_device_info_ex(size_t id, std::string &error) const;
 96 |     std::string get_device_info(size_t id, std::string &error) const;
 97 | 
 98 |     std::string get_device_extensions(std::string &error) const;
 99 |     bool fp64_extension_support(std::string &error) const;
100 | 
101 |     bool double_fp_support(std::string &error) const;
102 | 
103 |     bool load_kernel_from_source_file(const std::string &file_path, std::string &error);
104 |     bool load_kernel_from_binary_file(const std::string &file_path, const std::string &build_options, std::string &error);
105 |     bool load_kernel_from_source(const std::string &src, std::string &error);
106 |     bool build_kernel(const std::string &build_options, std::string &error);
107 |     bool output_binary(const std::string &path, const std::string &build_options, std::string &error);
108 | 
109 |     bool create_command_queue(bool enable_profiling, bool enable_out_of_order_exec_mode, std::string &error);
110 | 
111 |     cl::Program& get_program() { return program_; }
112 |     cl::Context& get_context() { return context_; }
113 |     cl::CommandQueue& get_command_queue() { return queue_; }
114 | 
115 |     const char* ocl_error_to_string(cl_int err) const;
116 | 
117 | private:
118 |     cl::Platform platform_;
119 |     cl::Context context_;
120 |     cl::Device device_;
121 |     cl::Program program_;
122 |     cl::CommandQueue queue_;
123 |     size_t max_global_mem_size_{};
124 |     size_t max_local_mem_size_{};
125 |     size_t max_work_group_size_{};
126 |     size_t work_group_size_{};
127 |     bool enable_profiling_{};
128 |     vendor active_vendor_ = other;
129 | 
130 | };
131 | 
132 | #endif // PROXY_CL_H
133 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/viterbi_2d_ocl/viterbi_2d.cl:
--------------------------------------------------------------------------------
  1 | #ifdef ENABLE_FP64
  2 | #if __OPENCL_VERSION__ <= CL_VERSION_1_1
  3 | #pragma OPENCL EXTENSION cl_khr_fp64: enable
  4 | #endif
  5 | #endif
  6 | 
  7 | #define MOVE_DIAG   0
  8 | #define MOVE_RIGHT  1
  9 | #define MOVE_UP     2
 10 | #define MOVE_UNDEF  3
 11 | 
 12 | #define ZERO_PROB_SCORE -1000000000
 13 | 
 14 | 
 15 | __kernel __attribute__((reqd_work_group_size(WORK_ITEMS, 1, 1)))
 16 | void ProcessNodes(
 17 |     int firstNode,
 18 |     int firstOffset,
 19 |     int wrapAround,
 20 |     int sliceSize,
 21 |     int index1First,
 22 |     int firstLeftValid,
 23 |     int lastDownValid,
 24 |     int firstDiagonalValid,
 25 |     int lastDiagonalValid,
 26 |     __global int* restrict viterbiScore,     // maxSliceSize * 2 * numStates
 27 |     __global int* restrict transitions,      // 9 (3 Stay, 3 Step, 3 Skip)
 28 |     __global int* restrict stayBuf,          // maxSliceSize * 3 * numStates
 29 |     __global short* restrict ptrs,           // maxSliceSize * 3 * numStates
 30 |     __global int* restrict emScore1,         // maxLen * numStates
 31 |     __global int* restrict emScore2,         // maxLen * numStates
 32 | #ifdef ENABLE_FP64
 33 |     __global double* restrict weights        // 3 * numNodes
 34 | #else
 35 |     __global int* restrict weights_stay      // 3 * numNodes
 36 | #endif
 37 | )
 38 | {
 39 |     int localId = get_local_id(0);
 40 |     int groupId = get_group_id(0);
 41 |     int slicePos = groupId / 3;
 42 |     int nodeIndex = firstNode + slicePos;
 43 |     int dir = groupId % 3;
 44 | 
 45 |     int pos = firstOffset + slicePos * 2;
 46 |     if (dir == 1) { --pos; }
 47 |     else if (dir == 2) { ++pos; }
 48 |     pos = pos % wrapAround;
 49 |     if (pos < 0) pos += wrapAround;
 50 |     viterbiScore += pos * NUM_STATES;
 51 | 
 52 |     stayBuf += NUM_STATES * groupId;
 53 |     ptrs += NUM_STATES * groupId;
 54 | #ifdef ENABLE_FP64
 55 |     int weight_stay = (int)(weights[3 * nodeIndex + dir] * transitions[dir]);
 56 | #else
 57 |     int weight_stay = weights_stay[3 * nodeIndex + dir];
 58 | #endif
 59 |     int step = transitions[3 + dir];
 60 |     int skip = transitions[6 + dir];
 61 | 
 62 |     // Fill in scores from previous nodes.
 63 |     if ((slicePos == 0 && ((dir == MOVE_RIGHT && firstLeftValid == 0) ||
 64 |                           (dir == MOVE_DIAG && firstDiagonalValid == 0))) ||
 65 |         (slicePos == sliceSize - 1 && ((dir == MOVE_UP && lastDownValid == 0) ||
 66 |                                       (dir == MOVE_DIAG && lastDiagonalValid == 0))))
 67 |     {
 68 |         for (int x = 0; x < NUM_STATES; x += WORK_ITEMS) {
 69 |             stayBuf[x + localId] = ZERO_PROB_SCORE;
 70 |         }
 71 |         return;
 72 |     }
 73 | 
 74 |     int index1 = index1First + slicePos;
 75 |     int index2 = index1First - firstOffset - slicePos;
 76 |     emScore1 += NUM_STATES * index1;
 77 |     emScore2 += NUM_STATES * index2;
 78 | 
 79 |     for (int x = 0; x < NUM_STATES; x += WORK_ITEMS) {
 80 |         // Add transitions scores.
 81 |         int state = x + localId;
 82 |         int score = viterbiScore[x + localId] + weight_stay;
 83 | 
 84 |         // Set pointers for stay movement. Scores are already stay scores.
 85 |         int ptr = state;
 86 | 
 87 |         // Find maxima for each direction.
 88 |         for (int from = 0; from < NUM_STATES; from += NUM_STATES/4) {
 89 | 
 90 |             // Check the step movement scores. Update as needed.
 91 |             int buf = viterbiScore[from + (state / 4)] + step;
 92 |             if (buf > score) {
 93 |                 score = buf;
 94 |                 ptr = from + (state / 4);
 95 |             }
 96 | 
 97 |             // Check the skip movement scores. Update as needed.
 98 |             #pragma unroll
 99 |             for (int y = 0; y < 4; ++y) {
100 |                 int fromState = from + (y * NUM_STATES / 16) + (state / 16);
101 |                 int buf = viterbiScore[fromState] + skip;
102 |                 if (buf > score) {
103 |                     score = buf;
104 |                     ptr = fromState;
105 |                 }
106 |             }
107 |         }
108 | 
109 |         // Apply emission scores, depending on direction
110 |         if (dir < 2)  { score += emScore1[state]; }
111 |         if (dir != 1) { score += emScore2[state]; }
112 | 
113 |         // Write result
114 |         stayBuf[state] = score;
115 |         ptrs[state] = ptr;
116 |     }
117 | }
118 | 
119 | 
120 | __kernel  __attribute__((reqd_work_group_size(WORK_ITEMS, 1, 1)))
121 | void PickBest(
122 |     int firstNode,
123 |     int firstOffset,
124 |     int wrapAround,
125 |     __global int* restrict stayBuf_tab,      // maxSliceSize * 3 * numStates
126 |     __global short* restrict ptrs_tab,       // maxSliceSize * 3 * numStates
127 |     __global short* restrict statePointers,  // numNodes * numStates
128 |     __global char* restrict dirPointers,     // numNodes * numStates
129 |     __global int* restrict viterbiScore      // maxSliceSize * 2 * numStates
130 | )
131 | {
132 |     int localId = get_local_id(0);
133 |     int groupId = get_group_id(0);
134 |     int nodeIndex = firstNode + groupId;
135 |     __global int *stayBuf = &stayBuf_tab[3 * NUM_STATES * groupId];
136 |     __global short *ptrs = &ptrs_tab[3 * NUM_STATES * groupId];
137 | 
138 |     // Since firstOffset varies by +/-1 per slice we alternate between writing even and odd buffers
139 |     int pos = (firstOffset + groupId * 2) % wrapAround;
140 |     if (pos < 0) pos += wrapAround;
141 | 
142 |     // Pick the best of the three for each state.
143 |     for (int j = 0; j < NUM_STATES; j += WORK_ITEMS) {
144 |         int state = j + localId;
145 |         char dir = MOVE_UP;
146 |         int score0 = stayBuf[state];
147 |         int score1 = stayBuf[NUM_STATES + state];
148 |         int score = stayBuf[2*NUM_STATES + state];
149 | 
150 |         if (score0 > score1 && score0 > score) {
151 |             dir = MOVE_DIAG;
152 |             score = score0;
153 |         } else if (score1 > score) {
154 |             dir = MOVE_RIGHT;
155 |             score = score1;
156 |         }
157 |         viterbiScore[pos * NUM_STATES + state] = score;
158 |         statePointers[nodeIndex * NUM_STATES + state] = ptrs[dir * NUM_STATES + state];
159 |         dirPointers[nodeIndex * NUM_STATES + state] = dir;
160 |     }
161 | };
162 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/viterbi_2d_ocl/viterbi_2d_ocl.h:
--------------------------------------------------------------------------------
  1 | #ifndef VITERBI_2D_OCL_H
  2 | #define VITERBI_2D_OCL_H
  3 | 
  4 | #include <vector>
  5 | #include <string>
  6 | #include <stdint.h>
  7 | #include <boost/numeric/ublas/matrix.hpp>
  8 | #include <data_view.h>
  9 | 
 10 | #include "proxyCL.h"
 11 | 
 12 | namespace ublas = boost::numeric::ublas;
 13 | 
 14 | static const int8_t MOVE_DIAG  = 0;
 15 | static const int8_t MOVE_RIGHT = 1;
 16 | static const int8_t MOVE_UP    = 2;
 17 | static const int8_t MOVE_UNDEF = 3;
 18 | static const int32_t ZERO_PROB_SCORE = -1000000000;
 19 | static const double MIN_EMISSION_SCORE = -20.0;
 20 | 
 21 | 
 22 | inline double square(double x) {
 23 |   return x * x;
 24 | }
 25 | 
 26 | inline int32_t prob2score(double x) {
 27 |   if (x < 0.0000000001) return -2400;
 28 |   return int32_t(100.0 * log(x));
 29 | }
 30 | 
 31 | /// Struct representing all nodes in the HMM
 32 | struct HmmNodesData {
 33 |     /* The NodeSlice struct represents a diagonal slice through the nodes which can be
 34 |      * scheduled simultaneously as they only have data dependencies on the previous two
 35 |      * slices (the left and down nodes are in the previous slice, the down-left diagonal
 36 |      * node is in the slice before that) and none on nodes within the current slice.
 37 |      *
 38 |      * The diagram below shows the nodes within a narrow band around an alignment of
 39 |      * two base sequences. The first 10 diagonals of nodes represented by the digits 0-9,
 40 |      * further diagonals are indicated by backslashes.
 41 |      * For example, slice 6 is represented as follows:
 42 |      * - It contains 5 nodes (.numNodes = 5)
 43 |      * - The first (top-left) node is at position (2, 4) (.index1 = 2, .index2 = 4)
 44 |      * - The top-left node is node #18 (.firstNode = 18) as the previous
 45 |      *   slices have 1, 2, 3, 4, 4, and 4 nodes, respectively.
 46 |      * - The first node has no valid left node but a valid SW diagonal node
 47 |      *   (.firstLeftValid = false, .firstDiagonalValid = true)
 48 |      * - The last node has no valid diagonal nor down nodes
 49 |      *   (.lastDownValid = false, .lastDiagonalValid = false)
 50 |      *
 51 |      *    ^
 52 |      *    |     \\\\\\\\\
 53 |      * s  |    9\\\\\\\\\
 54 |      * e 4|  6789\\\\\\
 55 |      * q  |3456789\\\\\
 56 |      * 2  |23456789\\
 57 |      *    |12345678
 58 |      *   0|0123456
 59 |      *    *------------------->
 60 |      *     0 2   
 61 |      *         sequence1
 62 |      */
 63 |     struct NodeSlice {
 64 |         int32_t numNodes;                   // Number of nodes in this slice
 65 |         int32_t firstNode;                  // Index of first node in slice
 66 |         int32_t index1;                     // Position of first node in slice along sequence 1
 67 |         int32_t index2;                     // Position of first node in slice along sequence 2
 68 |         bool firstLeftValid;                // Does the first node in this slice have a left node?
 69 |         bool lastDownValid;                 // Does the last node in this slice have a down node?
 70 |         bool firstDiagonalValid;            // Does the first node in this slice have a diagonal node?
 71 |         bool lastDiagonalValid;             // Does the last node in this slice have a diagonal node?
 72 |     };
 73 |     int32_t maxSliceSize;                   // Size of largest slice
 74 |     std::vector<NodeSlice> slices;          // List of slices to be scheduled separately
 75 |     ublas::matrix<int16_t> statePointers;   // Viterbi backtrace pointers.
 76 |     ublas::matrix<int8_t> dirPointers;      // NW alignment backtrace pointers.
 77 | };
 78 | 
 79 | 
 80 | /** Helper class for emission scores.
 81 |  *
 82 |  *  This class provides normal level emissions and gamma distributed noise emissions.
 83 |  *  Note that other emission objects can be substituted by changing the Emission typedef
 84 |  *  immediately following this class definition.
 85 |  */
 86 | class DefaultEmission {
 87 | private:
 88 |   std::vector<double> levels;
 89 |   std::vector<double> noises;
 90 |   std::vector<double> logNoises;
 91 |   std::vector<double> stayWeights;
 92 |   std::vector<double> emWeights;
 93 |   std::vector<double> modelLevels;
 94 |   std::vector<double> modelNoises;
 95 |   std::vector<double> offsets;
 96 |   std::vector<double> levelScales;
 97 |   std::vector<double> noiseScales;
 98 |   std::vector<double> noiseShapes;
 99 |   int numEvents;
100 |   int numStates;
101 |   bool useNoise;
102 | 
103 | public:
104 |   /** Constructor.
105 |    *  @param[in] mdlLevels Model current levels.
106 |    *  @param[in] mdlLevelSpreads Spreads of model current levels.
107 |    *  @params[in] mdlNoises Model noise levels.
108 |    *  @param[in] mdlNoiseSpreads Spreads of model noise levels.
109 |    *  @param[in] useSd Flag to specify whether to use noise levels in the basecall.
110 |    */
111 |     DefaultEmission(const std::vector<double>& mdlLevels, const std::vector<double>& mdlLevelSpreads,
112 |                   const std::vector<double>& mdlNoises, const std::vector<double>& mdlNoiseSpreads,
113 |                   bool useSd);
114 | 
115 |   /** Assign events to the object with vectors.
116 |    *  @param[in] means Event current levels.
117 |    *  @param[in] stdvs Event noise levels.
118 |    *  @param[in] stayWts Event weights for modifying stay probabilities.
119 |    *  @param[in] emWts Event weights for modifying emission probabilities.
120 |    */
121 |   void SetEvents(const std::vector<double>& means, const std::vector<double>& stdvs,
122 |       const std::vector<double>& stayWts, const std::vector<double>& emWts);
123 | 
124 |   /// Set the number of events (for when SetEvents() will not be called.
125 |   void SetNEvents(int n) {numEvents = n;}
126 | 
127 |   /// Returns the number of events.
128 |   int NumEvents() const {return numEvents;}
129 | 
130 |   /// Returns the number of model states.
131 |   int NumStates() const {return numStates;}
132 | 
133 |   /// Returns the model levels.
134 |   const std::vector<double> GetModelLevels() const { return modelLevels; }
135 | 
136 |   /// Returns the stay weights.
137 |   const std::vector<double> GetStayWeights() const { return stayWeights; }
138 | 
139 |   /// Returns the score for event i and state j.
140 |   int32_t Score(int i, int j) const {
141 |     double score = offsets[j] + levelScales[j] * square(levels[i] - modelLevels[j]);
142 |     if (useNoise) score += (noiseShapes[j] - 1.0) * logNoises[i] - noiseScales[j] * noises[i];
143 |     return int32_t(emWeights[i] * std::max(MIN_EMISSION_SCORE, score));
144 |   }
145 | };
146 | 
147 | 
148 | typedef DefaultEmission Emission;
149 | typedef std::vector<std::pair<int32_t, int32_t> > Alignment;
150 | 
151 | 
152 | /// Worker class for performing 2D Viterbi basecall.
153 | class Viterbi2Docl {
154 | private:
155 |   proxyCL &proxy_cl_;
156 |   HmmNodesData nodes;                              // All HMM nodes, in the order they should be processed.
157 |   std::vector<double> transProbs;                  // Nine transition probabilities (stay * dir, step * dir, skip * dir).
158 |   ublas::matrix<int32_t> emScore1;                 // Pre-computed emissions for sequence 1.
159 |   ublas::matrix<int32_t> emScore2;                 // Pre-computed emissions for sequence 2.
160 |   std::vector<int32_t> viterbiScore;               // Viterbi scores for last node.
161 |   int numStates;                                   // Number of states in the HMM.
162 |   int numNodes;                                    // Total number of nodes to be processed.
163 |   int numEvents1;                                  // Number of events in sequence 1.
164 |   int numEvents2;                                  // Number of events in sequence 2.
165 |   bool enable_fp64_;                               // Whether to use double floating point
166 |   cl::Kernel kernelProcessNodes;                   // OpenCL kernel objects
167 |   cl::Kernel kernelPickBest;
168 | 
169 |   void initNodes(const std::vector<int32_t>& bandStarts, const std::vector<int32_t>& bandEnds);
170 |   void processNodes(const std::vector<double>& wts1, const std::vector<double>& wts2,
171 |     const std::vector<int32_t>& priors);
172 |   void backTrace(Alignment& alignment, std::vector<int16_t>& states);
173 | 
174 | public:
175 |   /** Constructor.
176 |    */
177 |   Viterbi2Docl(proxyCL& proxy_cl);
178 | 
179 |   /** Perform the basecall with emission objects.
180 |    *  @param[in] data1 Emission object for sequence 1.
181 |    *  @param[in] data2 Emission object for sequence 2.
182 |    *  @param[in] bandStarts For each event in sequence 2, the first candidate position in sequence 1.
183 |    *  @param[in] bandEnds For each event in sequence 2, the last candidate position in sequence 1.
184 |    *  @param[in] priors The prior scores for the "before alignment" node. All zeros means no prior.
185 |    *  @param[out] alignment The final alignment of events.
186 |    *  @param[out] states The final basecalled states.
187 |    */
188 |   void Call(const Emission& data1, const Emission& data2, const std::vector<int32_t>& bandStarts,
189 |             const std::vector<int32_t>& bandEnds, const std::vector<int32_t>& priors,
190 |             Alignment& alignment, std::vector<int16_t>& states);
191 | 
192 |   /** Perform the basecall with precomputed emissions.
193 |    *  @param[in] data1 Precomputed emissions for sequence 1.
194 |    *  @param[in] data2 Precomputed emissions for sequence 2.
195 |    *  @param[in] stayWt1 Stay weights for sequence 1.
196 |    *  @param[in] stayWt2 Stay weights for sequence 2.
197 |    *  @param[in] bandStarts For each event in sequence 2, the first candidate position in sequence 1.
198 |    *  @param[in] bandEnds For each event in sequence 2, the last candidate position in sequence 1.
199 |    *  @param[in] priors The prior scores for the "before alignment" node. All zeros means no prior.
200 |    *  @param[out] alignment The final alignment of events.
201 |    *  @param[out] states The final basecalled states.
202 |    */
203 |   void Call(const MatView<float>& data1, const MatView<float>& data2,
204 |             const VecView<double>& stayWt1, const VecView<double>& stayWt2,
205 |             const std::vector<int32_t>& bandStarts, const std::vector<int32_t>& bandEnds,
206 |             const std::vector<int32_t>& priors, Alignment& alignment, std::vector<int16_t>& states);
207 | 
208 |   /* Set default transition values and allocate memory
209 |    *  @param[in] len The maximum number of events to support for either sequence.
210 |    *  @param[in] states The number of states in the HMM.
211 |    *  @param[in] trans The six transition probabilities (stay1, step1, skip1, stay2, step2, skip2).
212 |    */
213 |   void InitData(int len, int states, const std::vector<double>& trans);
214 | 
215 |   bool InitCL(const std::string& srcKernelDir, const std::string& binKernelDir,
216 |     std::string &error, bool enable_fp64, size_t num_states, size_t work_group_size = 0);
217 | };
218 | 
219 | #endif /* VITERBI_2D_OCL_H */
220 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/caller_2d/viterbi_2d_ocl/viterbi_2d_ocl_py.h:
--------------------------------------------------------------------------------
  1 | #ifndef VITERBI_2D_OCL_PY_H
  2 | #define VITERBI_2D_OCL_PY_H
  3 | 
  4 | #include <map>
  5 | #include <string>
  6 | #include <vector>
  7 | #include <boost/python.hpp>
  8 | #include <boost/shared_ptr.hpp>
  9 | #include <boost/python/suite/indexing/vector_indexing_suite.hpp>
 10 | #include <bp_tools.h>
 11 | #include <view_numpy_arrays.h>
 12 | 
 13 | #include "proxyCL.h"
 14 | #include "viterbi_2d_ocl.h"
 15 | 
 16 | 
 17 | namespace bp = boost::python;
 18 | 
 19 | /// proxyCL python wrapper.
 20 | class proxyCL_Py : public proxyCL
 21 | {
 22 | public:
 23 |   bp::tuple available_vendors() const
 24 |   {
 25 |     std::string error;
 26 |     std::vector <vendor> vendors = proxyCL::available_vendors(error);
 27 |     return bp::make_tuple(vendors, error);
 28 |   }
 29 | 
 30 |   bp::tuple available_vendors_str() const
 31 |   {
 32 |     std::string error;
 33 |     std::vector <std::string> vendors = proxyCL::available_vendors_str(error);
 34 |     return bp::make_tuple(vendors, error);
 35 |   }
 36 | 
 37 |   bp::tuple available_vendors_str_ex() const
 38 |   {
 39 |     std::string error;
 40 |     std::vector <std::string> vendors = proxyCL::available_vendors_str_ex(error);
 41 |     return bp::make_tuple(vendors, error);
 42 |   }
 43 | 
 44 |   bp::tuple select_vendor(vendor v)
 45 |   {
 46 |     std::string error;
 47 |     bool ret = proxyCL::select_vendor(v, error);
 48 |     return bp::make_tuple(ret, error);
 49 |   }
 50 | 
 51 |   bp::tuple select_vendor_str(const std::string &vendor)
 52 |   {
 53 |     std::string error;
 54 |     bool ret = proxyCL::select_vendor(vendor, error);
 55 |     return bp::make_tuple(ret, error);
 56 |   }
 57 | 
 58 |   bp::tuple create_context(device_type type = undefined)
 59 |   {
 60 |     bool ret;
 61 |     std::string error;
 62 |     if (type == undefined)
 63 |     {
 64 |         ret = proxyCL::create_context(error);
 65 |     }
 66 |     else
 67 |     {
 68 |         ret = proxyCL::create_context(type, error);
 69 |     }
 70 |     return bp::make_tuple(ret, error);
 71 |   }
 72 | 
 73 |   bp::tuple available_devices() const
 74 |   {
 75 |     std::string error;
 76 |     std::vector <device_info> devices = proxyCL::available_devices(error);
 77 |     return bp::make_tuple(devices, error);
 78 |   }
 79 | 
 80 |   bp::tuple select_device(size_t id)
 81 |   {
 82 |     std::string error;
 83 |     bool ret = proxyCL::select_device(id, error);
 84 |     return bp::make_tuple(ret, error);
 85 |   }
 86 | 
 87 |   bp::tuple get_device_info(size_t id) const
 88 |   {
 89 |     std::string error;
 90 |     std::string info = proxyCL::get_device_info(id, error);
 91 |     return bp::make_tuple(info, error);
 92 |   }
 93 | };
 94 | 
 95 | BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(create_context_overloads, create_context, 0, 1);
 96 | 
 97 | /// Viterbi 2D basecaller python wrapper.
 98 | class Viterbi2Docl_Py {
 99 | private:
100 |   boost::shared_ptr<Viterbi2Docl> viterbi;
101 |   boost::shared_ptr<Emission> emission1;
102 |   boost::shared_ptr<Emission> emission2;
103 |   std::vector<std::string> kmers;
104 |   std::map<std::string, int> states;
105 |   std::map<std::string, std::string> parms;
106 |   int bandSize;
107 |   bool useNoise;
108 | 
109 |   void setupKmers(int kmerLen);
110 |   boost::shared_ptr<Emission> makeEmission(bp::dict& model, bool rc);
111 |   boost::shared_ptr<Emission> dummyEmission(bp::list& kmers);
112 |   void sortModel(std::vector<double>& levels, std::vector<double>& levelSpreads, std::vector<double>& noises,
113 |                  std::vector<double>& noiseSpreads, std::vector<std::string>& mdlKmers, bool rc);
114 |   void getEvents(bp::dict& events, std::vector<double>& means, std::vector<double>& stdvs,
115 |                  std::vector<double>& stayWts, std::vector<double>& emWts);
116 |   void makeBands(const Alignment& alignIn, std::vector<int32_t>& bandStarts, std::vector<int32_t>& bandEnds);
117 |   bp::dict makeResult(const Alignment& alignOut, const std::vector<int16_t>& statesOut);
118 | 
119 | public:
120 |   /** Constructor.
121 |    *  @param[in] proxy_cl Initialised proxyCL object.
122 |    */
123 |   Viterbi2Docl_Py(proxyCL_Py& proxy_cl);
124 | 
125 |   /** Perform the basecall.
126 |    *  @param[in] data1 Event sequence 1.
127 |    *  @param[in] data2 Event sequence 2.
128 |    *  @param[in] alignment Estimated alignment of sequence 1 to sequence 2.
129 |    *  @param[in] prior The prior kmer for the "before alignment" node. None means no prior.
130 |    *  @return Dictionary contain alignment and called kmers.
131 |    */
132 |   bp::dict Call(bp::dict& events1, bp::dict& events2, bp::list& alignment, bp::object& prior);
133 | 
134 |   /** Perform the basecall using posteriors.
135 |    *  @param[in] post1 Posteriors for sequence 1.
136 |    *  @param[in] post2 Posteriors for sequence 2.
137 |    *  @param[in] stayWt1 Stay weights for sequence 1.
138 |    *  @param[in] stayWt2 Stay weights for sequence 2.
139 |    *  @param[in] alignment Estimated alignment of sequence 1 to sequence 2.
140 |    *  @param[in] prior The prior kmer for the "before alignment" node. None means no prior.
141 |    *  @return Dictionary contain alignment and called kmers.
142 |    */
143 |   bp::dict CallPost(bp::numeric::array& post1, bp::numeric::array& post2,
144 |                     bp::numeric::array& stayWt1, bp::numeric::array& stayWt2,
145 |                     bp::list& alignment, bp::object& prior);
146 | 
147 |   /** Initialize OpenCL kernel and command queue. This can also be used to just build the binary kernel file.
148 |    *  @param[in] model1 Model to use for first sequence of events.
149 |    *  @param[in] model2 Model to use for second sequence of events.
150 |    *  @param[in] params Dictionary of basecalling parameters.
151 |    */
152 |   bp::tuple InitCL(const std::string& srcKernelDir, const std::string& binKernelDir,
153 |     bool enable_fp64, size_t num_states, size_t work_group_size);
154 | 
155 |   /** Initialize model data and basecalling parameters. This is not necessary when just creating the binary kernel file.
156 |    *  @param[in] stateInfo Dictionary containing state information.
157 |    *  @param[in] params Dictionary of basecalling parameters.
158 |    *
159 |    *  The state information should either contain 'model1' and 'model2'
160 |    *  fields, containing the models for the template and complement data,
161 |    *  or a 'kmers' field containing a list of the kmers (for posterior
162 |    *  calling).
163 |    */
164 |   void InitData(bp::dict& stateInfo, bp::dict& params);
165 | 
166 |   /// Get a list of kmers in operational order.
167 |   bp::list GetKmerList() const;
168 | 
169 |   /// Get a list of the model levels for the first sequence.
170 |   bp::numeric::array GetModelLevels1() const;
171 | 
172 |   /// Get a list of the model levels for the second sequence.
173 |   bp::numeric::array GetModelLevels2() const;
174 | };
175 | 
176 | /// Python class wrapper.
177 | BOOST_PYTHON_MODULE(viterbi_2d_ocl) {
178 |   import_array();
179 |   bp::numeric::array::set_module_and_type("numpy", "ndarray");
180 | 
181 |   bp::scope().attr("ZERO_PROB_SCORE") = bp::object(ZERO_PROB_SCORE);
182 | 
183 |   bp::enum_<vendor>("vendor")
184 |     .value("amd",       vendor::amd)
185 |     .value("intel",     vendor::intel)
186 |     .value("nvidia",    vendor::nvidia)
187 |     .value("apple",     vendor::apple)
188 |     .value("other",     vendor::other)
189 |     ;
190 | 
191 |   bp::enum_<device_type>("device_type")
192 |     .value("cpu", device_type::cpu)
193 |     .value("gpu", device_type::gpu)
194 |     .value("all", device_type::all)
195 |     ;
196 | 
197 |   bp::class_<device_info>("device_info", bp::no_init)
198 |     .def_readonly("id",     &device_info::id)
199 |     .def_readonly("name",   &device_info::name)
200 |     .def_readonly("type",   &device_info::type)
201 |     ;
202 | 
203 |   bp::class_<std::vector<vendor> >("vendor_vec")
204 |     .def(bp::vector_indexing_suite<std::vector<vendor> >())
205 |     ;
206 | 
207 |     bp::class_<std::vector<device_info> >("device_info_vec")
208 |     .def(bp::vector_indexing_suite<std::vector<device_info> >())
209 |     ;
210 | 
211 |   bp::class_<proxyCL_Py, boost::noncopyable>("proxyCL", bp::init<>())
212 |     .def("available_vendors",       &proxyCL_Py::available_vendors)
213 |     .def("available_vendors_str",   &proxyCL_Py::available_vendors_str)
214 |     .def("available_vendors_str_ex",&proxyCL_Py::available_vendors_str_ex)
215 |     .def("enable_cuda_build_cache", &proxyCL_Py::enable_cuda_build_cache)
216 |     .def("select_vendor",           &proxyCL_Py::select_vendor)
217 |     .def("select_vendor_str",       &proxyCL_Py::select_vendor_str)
218 |     .def("create_context",          &proxyCL_Py::create_context, create_context_overloads())
219 |     .def("available_devices",       &proxyCL_Py::available_devices)
220 |     .def("get_device_info",         &proxyCL_Py::get_device_info)
221 |     .def("select_device",           &proxyCL_Py::select_device)
222 |     ;
223 | 
224 |   bp::class_<Viterbi2Docl_Py>("Viterbi2Docl", bp::init<proxyCL_Py&>())
225 |     .def("call",                    &Viterbi2Docl_Py::Call)
226 |     .def("call_post",               &Viterbi2Docl_Py::CallPost)
227 |     .def("init_cl",                 &Viterbi2Docl_Py::InitCL)
228 |     .def("init_data",               &Viterbi2Docl_Py::InitData)
229 |     .def("get_kmer_list",           &Viterbi2Docl_Py::GetKmerList)
230 |     .def("get_model_levels1",       &Viterbi2Docl_Py::GetModelLevels1)
231 |     .def("get_model_levels2",       &Viterbi2Docl_Py::GetModelLevels2)
232 |     ;
233 | }
234 | 
235 | 
236 | #endif /* VITERBI_2D_OCL_PY_H */
237 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/cmdargs.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import multiprocessing
  4 | 
  5 | 
  6 | class FileExist(argparse.Action):
  7 |     """Check if the input file exist."""
  8 |     def __call__(self, parser, namespace, values, option_string=None):
  9 |         if values is not None and not os.path.exists(values):
 10 |              raise RuntimeError("File/path for '{}' does not exist, {}".format(self.dest, values))
 11 |         setattr(namespace, self.dest, values)
 12 | 
 13 | 
 14 | class CheckCPU(argparse.Action):
 15 |     """Make sure people do not overload the machine"""
 16 |     def __call__(self, parser, namespace, values, option_string=None):
 17 |         num_cpu = multiprocessing.cpu_count()
 18 |         if int(values) <= 0 or int(values) > num_cpu:
 19 |             raise RuntimeError('Number of jobs can only be in the range of {} and {}'.format(1, num_cpu))
 20 |         setattr(namespace, self.dest, values)
 21 | 
 22 | 
 23 | class AutoBool(argparse.Action):
 24 |     def __init__(self, option_strings, dest, default=None, required=False, help=None):
 25 |         """Automagically create --foo / --no-foo argument pairs"""
 26 | 
 27 |         if default is None:
 28 |             raise ValueError('You must provide a default with AutoBool action')
 29 |         if len(option_strings)!=1:
 30 |             raise ValueError('Only single argument is allowed with AutoBool action')
 31 |         opt = option_strings[0]
 32 |         if not opt.startswith('--'):
 33 |             raise ValueError('AutoBool arguments must be prefixed with --')
 34 | 
 35 |         opt = opt[2:]
 36 |         opts = ['--' + opt, '--no-' + opt]
 37 |         if default:
 38 |             default_opt = opts[0]
 39 |         else:
 40 |             default_opt = opts[1]
 41 |         super(AutoBool, self).__init__(opts, dest, nargs=0, const=None,
 42 |                                        default=default, required=required,
 43 |                                        help='{} (Default: {})'.format(help, default_opt))
 44 |     def __call__(self, parser, namespace, values, option_strings=None):
 45 |         if option_strings.startswith('--no-'):
 46 |             setattr(namespace, self.dest, False)
 47 |         else:
 48 |             setattr(namespace, self.dest, True)
 49 | 
 50 | class ParseTransitions(argparse.Action):
 51 |     """Handle list of exactly 3 values, check values can be coerced to float and
 52 |     normalise so that they sum to 1.
 53 |     """
 54 |     def __init__(self, **kwdargs):
 55 |         kwdargs['metavar'] = ('stay', 'step', 'skip')
 56 |         super(ParseTransitions, self).__init__(**kwdargs)
 57 | 
 58 |     def __call__(self, parser, namespace, values, option_string=None):
 59 |         # locally import these
 60 |         import numpy as np
 61 |         from dragonet.util.assertions import checkTransitionProbabilities
 62 |         try:
 63 |             values = np.array(values, dtype='float')
 64 |         except:
 65 |             raise ValueError('Illegal value for {} ({})'.format(option_string, values))
 66 |         values = values / np.sum(values)
 67 |         checkTransitionProbabilities(values)
 68 |         setattr(namespace, self.dest, values)
 69 | 
 70 | 
 71 | class ParseToNamedTuple(argparse.Action):
 72 |     """Parse to a namedtuple
 73 |     """
 74 |     def __init__(self, **kwdargs):
 75 |         assert 'metavar' in kwdargs, "Argument 'metavar' must be defined"
 76 |         assert 'type' in kwdargs, "Argument 'type' must be defined"
 77 |         assert len(kwdargs['metavar']) == kwdargs['nargs'], 'Number of arguments and descriptions inconstistent'
 78 |         assert len(kwdargs['type']) == kwdargs['nargs'], 'Number of arguments and types inconstistent'
 79 |         self._types = kwdargs['type']
 80 |         kwdargs['type'] = str
 81 |         self.Values = namedtuple('Values', ' '.join(kwdargs['metavar']))
 82 |         super(ParseToNamedTuple, self).__init__(**kwdargs)
 83 |         self.default = self.Values(*self.default) if self.default is not None else None
 84 | 
 85 |     def __call__(self, parser,  namespace, values, option_string=None):
 86 |         value_dict = self.Values(*[ f(v) for f, v in zip(self._types, values)])
 87 |         setattr(namespace, self.dest, value_dict)
 88 | 
 89 | def TypeOrNone(mytype):
 90 |     """Create an argparse argument type that accepts either given type or 'None'
 91 | 
 92 |     :param mytype: Type function for type to accept, e.g. `int` or `float`
 93 |     """
 94 |     def f(y):
 95 |         try:
 96 |             if y == 'None':
 97 |                 res = None
 98 |             else:
 99 |                 res = mytype(y)
100 |         except:
101 |             raise argparse.ArgumentTypeError('Argument must be None or {}'.format(mytype))
102 |         return res
103 |     return f
104 | 
105 | 
106 | def NonNegative(mytype):
107 |     """Create an argparse argument type that accepts only non-negative values
108 | 
109 |     :param mytype: Type function for type to accept, e.g. `int` or `float`
110 |     """
111 |     def f(y):
112 |         yt = mytype(y)
113 |         if yt < 0:
114 |             raise argparse.ArgumentTypeError('Argument must be non-negative')
115 |         return yt
116 |     return f
117 | 
118 | 
119 | def Positive(mytype):
120 |     """Create an argparse argument type that accepts only positive values
121 | 
122 |     :param mytype: Type function for type to accept, e.g. `int` or `float`
123 |     """
124 |     def f(y):
125 |         yt = mytype(y)
126 |         if yt <= 0:
127 |             raise argparse.ArgumentTypeError('Argument must be positive')
128 |         return yt
129 |     return f
130 | 
131 | 
132 | def Vector(mytype):
133 |     """Return an argparse.Action that will convert a list of values into a numpy
134 |     array of given type
135 |     """
136 | 
137 |     class MyNumpyAction(argparse.Action):
138 |         """Parse a list of values into numpy array"""
139 |         def __call__(self, parser, namespace, values, option_string=None):
140 |             import tang.numpty as np
141 |             try:
142 |                 setattr(namespace, self.dest, np.array(values, dtype=mytype))
143 |             except:
144 |                 raise argparse.ArgumentTypeError('Cannot convert {} to array of {}'.format(values, mytype))
145 |         @staticmethod
146 |         def value_as_string(value):
147 |             return ' '.join(str(x) for x in value)
148 |     return MyNumpyAction
149 | 
150 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/currennt_to_pickle.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import argparse
  3 | import json
  4 | import sys
  5 | 
  6 | import numpy as np
  7 | from RUBRIC.nanonet import nn
  8 | from RUBRIC.nanonet import all_nmers
  9 | from RUBRIC.nanonet.cmdargs import FileExist
 10 | 
 11 | def get_parser():
 12 |     parser = argparse.ArgumentParser(
 13 |         description='Convert currennt json network file into pickle. Makes assumptions about meta data.',
 14 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter
 15 |     )
 16 |     parser.add_argument('input', action=FileExist,
 17 |         help='File containing current network')
 18 |     parser.add_argument('output', help='Output pickle file')
 19 |     
 20 |     parser.add_argument("--kmer_length", type=int, default=5,
 21 |         help="Length of kmers to learn.")
 22 |     parser.add_argument("--bases", type=str, default='ACGT',
 23 |         help="Alphabet of kmers to learn.")
 24 |     parser.add_argument("--window", type=int, nargs='+', default=[-1, 0, 1],
 25 |         help="The detailed list of the entire window.")
 26 |     parser.add_argument("--section", type=str, default='template',
 27 |         help="Section of read which network is trained against.")
 28 |     return parser
 29 | 
 30 | 
 31 | def toarray(x):
 32 |     return np.ascontiguousarray(np.array(x, order='C', dtype=nn.dtype))
 33 | 
 34 | 
 35 | def parse_layer_input(size, weights):
 36 |     return None
 37 | 
 38 | 
 39 | def parse_layer_feedforward(size, weights, fun):
 40 |     M = toarray(weights['input'])
 41 |     M = M.reshape((size, -1)).transpose()
 42 |     b = toarray(weights['bias'])
 43 |     return nn.FeedForward(M, b, fun)
 44 | 
 45 | 
 46 | def parse_layer_feedforward_tanh(size, weights):
 47 |     return parse_layer_feedforward(size, weights, nn.tanh)
 48 | 
 49 | 
 50 | def parse_layer_feedforward_sigmoid(size, weights):
 51 |     return parse_layer_feedforward(size, weights, nn.sigmoid)
 52 | 
 53 | 
 54 | def parse_layer_feedforward_linear(size, weights):
 55 |     return parse_layer_feedforward(size, weights, nn.linear)
 56 | 
 57 | 
 58 | def parse_layer_softmax(size, weights):
 59 |     M = toarray(weights['input'])
 60 |     M = M.reshape((size, -1)).transpose()
 61 |     b = toarray(weights['bias'])
 62 |     return nn.SoftMax(M, b)
 63 | 
 64 | 
 65 | def parse_layer_multiclass(size, weights):
 66 |     return None
 67 | 
 68 | 
 69 | def parse_layer_blstm(size, weights):
 70 |     size = size / 2
 71 |     wgts_input = toarray(weights['input']).reshape((4, 2, size, -1)).transpose((0, 1, 3, 2))
 72 |     wgts_bias = toarray(weights['bias']).reshape((4, 2, -1))
 73 |     wgts_internalMat = toarray(weights['internal'][: 4 * size * size * 2]).reshape((4, 2, size, size)).transpose((0, 1, 3, 2))
 74 |     wgts_internalPeep = toarray(weights['internal'][4 * size * size * 2 :]).reshape((3, 2, size))
 75 | 
 76 |     iM1 = wgts_input[:, 0, :, :]
 77 |     bM1 = wgts_bias[:, 0, :]
 78 |     lM1 = wgts_internalMat[:, 0, :, :]
 79 |     pM1 = wgts_internalPeep[:, 0, :]
 80 |     layer1 = nn.LSTM(iM1, lM1, bM1, pM1)
 81 | 
 82 |     iM2 = wgts_input[:, 1, :, :]
 83 |     bM2 = wgts_bias[:, 1, :]
 84 |     lM2 = wgts_internalMat[:, 1, :, :]
 85 |     pM2 = wgts_internalPeep[:, 1, :]
 86 |     layer2 = nn.LSTM(iM2, lM2, bM2, pM2)
 87 |     return nn.BiRNN(layer1, layer2)
 88 | 
 89 | 
 90 | def parse_layer_lstm(size, weights):
 91 |     iM = toarray(weights['input']).reshape((4, size, -1)).transpose((0, 2, 1))
 92 |     bM = toarray(weights['bias']).reshape((4, size))
 93 |     lM = toarray(weights['internal'][ : 4 * size * size]).reshape((4, size, size)).transpose((0, 2, 1))
 94 |     pM = toarray(weights['internal'][4 * size * size : ]).reshape((3, size))
 95 |     return nn.LSTM(iM, lM, bM, pM)
 96 | 
 97 | 
 98 | LAYER_DICT = {'input' : parse_layer_input,
 99 |               'blstm' : parse_layer_blstm,
100 |               'feedforward_tanh' : parse_layer_feedforward_tanh,
101 |               'feedforward_logistic' : parse_layer_feedforward_sigmoid,
102 |               'feedforward_identity' : parse_layer_feedforward_linear,
103 |               'lstm' : parse_layer_lstm,
104 |               'blstm' : parse_layer_blstm,
105 |               'softmax' : parse_layer_softmax,
106 |               'multiclass_classification' : parse_layer_multiclass}
107 | 
108 | 
109 | def parse_layer(layer_type, size, weights):
110 |     if not layer_type in LAYER_DICT:
111 |         sys.stderr.write('Unsupported layer type {}.\n'.format(layer_type))
112 |         exit(1)
113 |     return LAYER_DICT[layer_type](size, weights)
114 | 
115 | 
116 | def network_to_numpy(in_network):
117 |     """Transform a json representation of a network into a numpy
118 |     representation.
119 |     """
120 | 
121 |     layers = list()
122 |     for layer in in_network['layers']:
123 |         wgts = in_network['weights'][layer['name']] if layer['name'] in in_network['weights'] else None
124 |         layers.append(parse_layer(layer['type'], layer['size'], wgts))
125 |     layers = filter(lambda x: x is not None, layers)
126 | 
127 |     meta = None
128 |     if 'meta' in in_network:
129 |         meta = in_network['meta']
130 |     network = nn.Serial(layers)
131 |     network.meta = meta
132 |     return network
133 | 
134 | 
135 | if __name__ == '__main__':
136 |     args = get_parser().parse_args() 
137 | 
138 |     try:
139 |         with open(args.input, 'r') as fh:
140 |             in_network = json.load(fh)
141 |     except:
142 |         sys.stderr.write('Failed to read from {}.\n'.format(args.input))
143 |         exit(1)
144 | 
145 |     if not 'layers' in in_network:
146 |         sys.stderr.write('Could not find any layers in {} -- is it a network file?\n'.format(args.network))
147 |         exit(1)
148 |     if not 'weights' in in_network:
149 |         sys.stderr.write('Could not find any weights in {} -- is network trained?\n'.format(args.network))
150 |         exit(1)
151 | 
152 |     # Build meta, taking some guesses
153 |     kmers = all_nmers(args.kmer_length, alpha=args.bases)
154 |     kmers.append('X'*args.kmer_length)
155 |     in_network['meta'] = {
156 |         'window':args.window,
157 |         'n_features':in_network['layers'][0]['size'],
158 |         'kmers':kmers,
159 |         'section':args.section
160 |     }
161 | 
162 |     network = network_to_numpy(in_network)
163 |     np.save(args.output, network)
164 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/data/default_complement.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/RUBRIC/2895b15ded82a85142f4d68e4feb5d5526aebb09/RUBRIC/nanonet/data/default_complement.npy


--------------------------------------------------------------------------------
/RUBRIC/nanonet/data/default_model.tmpl:
--------------------------------------------------------------------------------
 1 | {
 2 |     "meta": {"section": "<section>"},
 3 |     "layers": [
 4 |         {
 5 |             "size": <n_features>,
 6 |             "name": "input",
 7 |             "type": "input"
 8 |         },
 9 |         {
10 |             "size": 128,
11 |             "name": "blstm_level_0",
12 |             "bias": 1.0,
13 |             "type": "blstm"
14 |         },
15 |         {
16 |             "size": 64,
17 |             "name": "subsample_level_1",
18 |             "bias": 1.0,
19 |             "type": "feedforward_tanh"
20 |         },
21 |         {
22 |             "size": 128,
23 |             "name": "blstm_level_1",
24 |             "bias": 1.0,
25 |             "type": "blstm"
26 |         },
27 |         {
28 |             "size": 64,
29 |             "name": "subsample_level_2",
30 |             "bias": 1.0,
31 |             "type": "feedforward_tanh"
32 |         },
33 |         {
34 |             "size": <n_states>,
35 |             "name": "output",
36 |             "bias": 1.0,
37 |             "type": "softmax"
38 |         },
39 |         {
40 |             "size": <n_states>,
41 |             "name": "postoutput",
42 |             "type": "multiclass_classification"
43 |         }
44 |     ]
45 | }
46 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/data/default_template.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/RUBRIC/2895b15ded82a85142f4d68e4feb5d5526aebb09/RUBRIC/nanonet/data/default_template.npy


--------------------------------------------------------------------------------
/RUBRIC/nanonet/data/r9.4_template.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/RUBRIC/2895b15ded82a85142f4d68e4feb5d5526aebb09/RUBRIC/nanonet/data/r9.4_template.npy


--------------------------------------------------------------------------------
/RUBRIC/nanonet/data/r9_complement.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/RUBRIC/2895b15ded82a85142f4d68e4feb5d5526aebb09/RUBRIC/nanonet/data/r9_complement.npy


--------------------------------------------------------------------------------
/RUBRIC/nanonet/data/r9_template.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/RUBRIC/2895b15ded82a85142f4d68e4feb5d5526aebb09/RUBRIC/nanonet/data/r9_template.npy


--------------------------------------------------------------------------------
/RUBRIC/nanonet/decoding.cpp:
--------------------------------------------------------------------------------
  1 | #include <Python.h>
  2 | 
  3 | #include <cassert>
  4 | #include <cstdlib>
  5 | #include <limits>
  6 | #include <vector>
  7 | 
  8 | #define MODULE_API_EXPORTS
  9 | #include "module.h"
 10 | #include "stdint.h"
 11 | 
 12 | #include <iostream>
 13 | 
 14 | typedef float ftype;
 15 | using namespace std;
 16 | 
 17 | 
 18 | static PyMethodDef DecodeMethods[] = {
 19 |     {NULL, NULL, 0, NULL}        /* Sentinel */
 20 | };
 21 | 
 22 | PyMODINIT_FUNC initnanonetdecode(void) {
 23 |     (void) Py_InitModule("nanonetdecode", DecodeMethods);
 24 | }
 25 | 
 26 | 
 27 | extern "C" void viterbi_update(
 28 |   ftype* vit_last, ftype* vit_curr, int32_t* max_idx,
 29 |   const size_t num_bases, const size_t num_kmers,
 30 |   const ftype stay, const ftype step, const ftype skip, const ftype slip
 31 | ){
 32 | 
 33 |   for ( size_t kmer=0 ; kmer<num_kmers ; kmer++){
 34 |     max_idx[kmer] = -1;
 35 |     vit_curr[kmer] = -std::numeric_limits<ftype>::infinity();
 36 |   }
 37 | 
 38 |   // Stay
 39 |   for ( size_t kmer=0 ; kmer<num_kmers ; kmer++){
 40 |     if(vit_last[kmer]+stay>vit_curr[kmer]){
 41 |       vit_curr[kmer] = vit_last[kmer]+stay;
 42 |       max_idx[kmer] = kmer;
 43 |     }
 44 |   }
 45 |   // Step
 46 |   for ( size_t kmer=0 ; kmer<num_kmers ; kmer++){
 47 |     const size_t idx = (kmer*num_bases)%num_kmers;
 48 |     for ( size_t i=0 ; i<num_bases ; i++){
 49 |       if(vit_last[kmer]+step>vit_curr[idx+i]){
 50 |         vit_curr[idx+i] = vit_last[kmer]+step;
 51 |         max_idx[idx+i] = kmer;
 52 |       }
 53 |     }
 54 |   }
 55 |   // Skip
 56 |   for ( size_t kmer=0 ; kmer<num_kmers ; kmer++){
 57 |     const size_t idx = (kmer*num_bases*num_bases)%num_kmers;
 58 |     for ( size_t i=0 ; i<num_bases*num_bases ; i++){
 59 |       if(vit_last[kmer]+skip>vit_curr[idx+i]){
 60 |         vit_curr[idx+i] = vit_last[kmer]+skip;
 61 |         max_idx[idx+i] = kmer;
 62 |       }
 63 |     }
 64 |   }
 65 |   // Slip
 66 |   if (slip > -std::numeric_limits<ftype>::infinity()){
 67 |     ftype slip_max = -std::numeric_limits<ftype>::infinity();
 68 |     size_t slip_idx = 0;
 69 |     for ( size_t kmer=0 ; kmer<num_kmers ; kmer++){
 70 |       if(vit_last[kmer]+slip>slip_max){
 71 |         slip_max = vit_last[kmer]+slip;
 72 |         slip_idx = kmer;
 73 |       }
 74 |     }
 75 |     for ( size_t kmer=0 ; kmer<num_kmers ; kmer++){
 76 |       if(slip_max>vit_curr[kmer]){
 77 |         vit_curr[kmer] = slip_max;
 78 |         max_idx[kmer] = slip_idx;
 79 |       }
 80 |     }
 81 |   }
 82 | }
 83 | 
 84 | 
 85 | extern "C" MODULE_API ftype decode_path(ftype * logpost, const size_t num_events, const size_t num_bases, const size_t num_kmers){
 86 |   assert(NULL!=logpost);
 87 |   assert(num_events>0);
 88 |   assert(num_bases>0);
 89 |   assert(num_kmers>0);
 90 | 
 91 |   std::vector<int32_t> max_idx(num_kmers);
 92 |   std::vector<ftype> vit_last(num_kmers);
 93 |   std::vector<ftype> vit_curr(num_kmers);
 94 | 
 95 |   // Treat all movement types equally, disallow slip (allowing slip
 96 |   //   would simply give kmer with maximum posterioir)
 97 |   ftype stay = 0.0;
 98 |   ftype step = 0.0;
 99 |   ftype skip = 0.0;
100 |   ftype slip = -std::numeric_limits<ftype>::infinity();
101 | 
102 |   // Initial values
103 |   for ( size_t kmer=0 ; kmer<num_kmers ; kmer++){
104 |     vit_last[kmer] = logpost[kmer];
105 |   }
106 | 
107 |   for ( size_t ev=1 ; ev<num_events ; ev++){
108 |     const size_t idx1 = (ev-1)*num_kmers;
109 |     const size_t idx2 = ev*num_kmers;
110 | 
111 |     viterbi_update(
112 |       &vit_last[0], &vit_curr[0], &max_idx[0], //.data() not supported on VSC++ for python,
113 |       num_bases, num_kmers,
114 |       stay, step, skip, slip
115 |     );
116 | 
117 |     // Emission
118 |     for ( size_t kmer=0 ; kmer<num_kmers ; kmer++){
119 |       vit_curr[kmer] += logpost[idx2+kmer];
120 |     }
121 | 
122 |     // Traceback information
123 |     for ( size_t kmer=0 ; kmer<num_kmers ; kmer++){
124 |       logpost[idx1+kmer] = max_idx[kmer];
125 |     }
126 |     std::swap( vit_last, vit_curr );
127 |   }
128 | 
129 |   // Decode states
130 |   // Last state by Viterbi matrix
131 |   const size_t idx = (num_events-1)*num_kmers;
132 |   ftype max_val = -std::numeric_limits<ftype>::infinity();
133 |   int max_kmer = -1;
134 |   for ( size_t kmer=0 ; kmer<num_kmers ; kmer++){
135 |     if(vit_last[kmer]>max_val){
136 |       max_val = vit_last[kmer];
137 |       max_kmer = kmer;
138 |     }
139 |   }
140 |   logpost[idx] = max_kmer;
141 |   // Other states by traceback
142 |   for ( size_t ev=(num_events-1) ; ev>0 ; ev--){
143 |     const size_t idx = (ev-1)*num_kmers;
144 |     logpost[idx] = logpost[idx+(int)logpost[idx+num_kmers]];
145 |   }
146 | 
147 |   return max_val;
148 | }
149 | 
150 | 
151 | extern "C" MODULE_API void estimate_transitions(ftype* post, ftype* trans, const size_t num_events, const size_t num_bases, const size_t num_kmers){
152 |   assert(NULL!=post);
153 |   assert(num_events>0);
154 |   assert(num_bases>0);
155 |   assert(num_kmers>0);
156 |   const size_t num_bases_sq = num_bases * num_bases;
157 | 
158 |   for (size_t ev = 1; ev < num_events; ++ev) {
159 |     ftype stay_sum = 0.f;
160 |     ftype step_sum = 0.f;
161 |     ftype skip_sum = 0.f;
162 |     const size_t idx1 = ev * num_kmers;
163 |     const size_t idx0 = idx1 - num_kmers; 
164 |     for (size_t i = 0; i < num_kmers / num_bases_sq; ++i) {
165 |       ftype sum16 = 0.f;
166 |       for (size_t j = 0; j < num_bases; ++j) {
167 |         ftype sum4 = 0.f;
168 |         for (size_t k = 0; k < num_bases; ++k) {
169 |           size_t kmer = i * num_bases_sq + j * num_bases + k;
170 |           ftype p = post[idx1 + kmer];
171 |           stay_sum += post[idx0 + kmer] * p;
172 |           sum4 += p;
173 |         }
174 |         for (size_t step_from = num_bases * i + j; step_from < num_kmers; step_from += num_kmers / num_bases) {
175 |           step_sum += sum4 * post[idx0 + step_from];
176 |         }
177 |         sum16 += sum4;
178 |       }
179 |       for (size_t skip_from = i; skip_from < num_kmers; skip_from += num_kmers / num_bases_sq) {
180 |         skip_sum += sum16 * post[idx0 + skip_from];
181 |       }
182 |     }
183 |     step_sum *= 0.25f;
184 |     skip_sum *= 0.0625f;
185 |     trans[(ev-1) * 3] = stay_sum;
186 |     trans[(ev-1) * 3 + 1] = step_sum;
187 |     trans[(ev-1) * 3 + 2] = skip_sum;
188 |   }
189 | }
190 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/eventdetection/filters.c:
--------------------------------------------------------------------------------
  1 | #include <Python.h>
  2 | 
  3 | #include <stddef.h>
  4 | #include <math.h>
  5 | #include <stdlib.h>
  6 | #include <stdio.h>
  7 | #include <assert.h>
  8 | #include <stdint.h>
  9 | #include <float.h>
 10 | #include <math.h>
 11 | 
 12 | #define MODULE_API_EXPORTS
 13 | #include "module.h"
 14 | #include "filters.h"
 15 | 
 16 | 
 17 | /**
 18 |  * setuptools install command doesn't play nice. We'll make this module
 19 |  * importable as a python module but not export anything. Importing the 
 20 |  * module as:
 21 |  *     import nanonetfilters
 22 |  * will at least allow us to find the file and continue to import it as
 23 |  * a CDLL and wrap with ctypes. That's fine because it means we can pass
 24 |  * numpy arrays as pointers and not worry about writing real python
 25 |  * extensions.
 26 | **/
 27 | static PyMethodDef FilterMethods[] = {
 28 |     {NULL, NULL, 0, NULL}        /* Sentinel */
 29 | };
 30 | 
 31 | PyMODINIT_FUNC initnanonetfilters(void) {
 32 |     (void) Py_InitModule("nanonetfilters", FilterMethods);
 33 | }
 34 | 
 35 | 
 36 | /**
 37 |  *   Compute cumulative sum and sum of squares for a vector of data
 38 |  *   data      double[d_length]   Data to be summed over (in)
 39 |  *   sum       double[d_length]   Vector to store sum (out)
 40 |  *   sumsq     double[d_length]   Vector to store sum of squares (out)
 41 |  *   d_length                     Length of data vector
 42 |  **/
 43 | MODULE_API void compute_sum_sumsq(const double * restrict data, double* restrict sum, double* restrict sumsq, size_t d_length) {
 44 |   size_t i;
 45 | 
 46 |   // Basic contracts
 47 |   assert(NULL!=data);
 48 |   assert(NULL!=sum);
 49 |   assert(NULL!=sumsq);
 50 |   assert(d_length>0);
 51 |   
 52 |   sum[0] = data[0];
 53 |   sumsq[0] = data[0]*data[0];
 54 |   for (i = 1; i < d_length; ++i) {
 55 |     sum[i] = sum[i - 1] + data[i];
 56 |     sumsq[i] = sumsq[i - 1] + data[i]*data[i];
 57 |   }
 58 | }
 59 | 
 60 | /**
 61 |  *    Compute moving average over window, output centred on current coordinate
 62 |  *    sum      double[d_length] Input data, cumulative sum (in)
 63 |  *    out      double[d_length] Ouput data (out)
 64 |  *    d_length Length of data vector
 65 |  *    w_length Length of window to compute mave over.  Made odd if not.
 66 |  **/
 67 | MODULE_API void compute_mave(const double* restrict sum, double* restrict mave, size_t d_length, size_t w_length) {
 68 |   size_t i;
 69 |   size_t h_length;
 70 |   size_t ip;
 71 |   
 72 |   // Simple contracts
 73 |   assert(NULL!=sum);
 74 |   assert(NULL!=mave);
 75 |   assert(d_length>0);
 76 |   assert(w_length>0);
 77 |   // make window length odd
 78 |   if(w_length % 2 == 0){
 79 |       w_length -= 1;
 80 |   }
 81 | 
 82 |   // quick return
 83 |   if (d_length < w_length || w_length < 2) {
 84 |     mave[0] = sum[0];
 85 |     for(i = 1; i < d_length; ++i)
 86 |       mave[i] = sum[i] - sum[i-1];
 87 |     return;
 88 |   }
 89 | 
 90 |   h_length = w_length/2;
 91 |   // fudge boundaries
 92 |   for(i = 0; i < h_length; ++i) {
 93 |     mave[i] = (sum[i+h_length]) / (i+1+h_length);
 94 |     ip = d_length - 1 - i;
 95 |     mave[ip] = (sum[d_length - 1] - sum[ip-h_length-1]) / (i+1+h_length);
 96 |   }
 97 |   // most of the data
 98 |   for(i = h_length; i < d_length - h_length ; ++i) {
 99 |     mave[i] = (sum[i+h_length] - sum[i-h_length-1]) / (w_length);
100 |   }
101 |   return;
102 | }
103 | 
104 | 
105 | /**
106 |  *   Compute windowed t-statistic from summary information
107 |  *   sum       double[d_length]  Cumulative sums of data (in)
108 |  *   sumsq     double[d_length]  Cumulative sum of squares of data (in)
109 |  *   tstat     double[d_length]  T-statistic (out)
110 |  *   d_length                    Length of data vector
111 |  *   w_length                    Window length to calculate t-statistic over
112 |  **/
113 | MODULE_API void compute_tstat(const double* restrict sum, const double* restrict sumsq, double* restrict tstat, size_t d_length, size_t w_length, bool pooled) {
114 |   size_t i;
115 |   const double eta = 1e-100;
116 |   
117 |   // Simple contracts
118 |   assert(NULL!=sum);
119 |   assert(NULL!=sumsq);
120 |   assert(NULL!=tstat);
121 |   
122 |   // Quick return:
123 |   //   t-test not defined for number of points less than 2
124 |   //   need at least as many points as twice the window length
125 |   if (d_length < 2*w_length || w_length < 2) {
126 |     for(i = 0; i < d_length; ++i){
127 |       tstat[i] = 0.0;
128 |     }
129 |     return;
130 |   }
131 | 
132 |   // fudge boundaries
133 |   for (i = 0; i < w_length; ++i) {
134 |     tstat[i] = 0;
135 |     tstat[d_length - i - 1] = 0;
136 |   }
137 | 
138 |   // get to work on the rest
139 |   {
140 |   double sum1, sum2, sumsq1, sumsq2, mean1, mean2, var1, var2;
141 |   
142 |   for (i = w_length; i <= d_length - w_length; ++i) {
143 |     sum1 = sum[i - 1];
144 |     sumsq1 = sumsq[i - 1];
145 |     if (i > w_length) {
146 |       sum1 -= sum[i - w_length - 1];
147 |       sumsq1 -= sumsq[i - w_length - 1];
148 |     }
149 |     sum2 = sum[i + w_length - 1] - sum[i - 1];
150 |     sumsq2 = sumsq[i + w_length - 1] - sumsq[i - 1];
151 |     mean1 = sum1 / w_length;
152 |     mean2 = sum2 / w_length;
153 |     var1 = sumsq1 / w_length - mean1*mean1;
154 |     var2 = sumsq2 / w_length - mean2*mean2;
155 |     if(pooled){
156 |       var1 = ( var1 + var2 ) / 2.0;
157 |       var2 = var1;
158 |     }
159 |     // Prevent problem due to very small variances
160 |     var1 = fmax(var1, eta);
161 |     var2 = fmax(var2, eta);
162 |     
163 |     //t-stat
164 |     //  Formula is a simplified version of Student's t-statistic for the
165 |     //  special case where there are two samples of equal size with
166 |     //  differing variance
167 | 	{
168 |     const double delta = mean2 - mean1;
169 | 	const double totvar = var1 / w_length + var2 / w_length;
170 |     tstat[i] = fabs(delta / sqrt(totvar));
171 | 	}
172 |   }
173 |   }
174 | }
175 | 
176 | 
177 | /**
178 |  *   Compute windowed deltamean value from summary information
179 |  *   sum       double[d_length]  Cumulative sums of data (in)
180 |  *   sumsq     double[d_length]  Cumulative sum of squares of data (in)
181 |  *   deltamean     double[d_length]  deltamean (out)
182 |  *   d_length                    Length of data vector
183 |  *   w_length                    Window length to calculate t-statistic over
184 |  **/
185 | 
186 | MODULE_API void compute_deltamean(const double* restrict sum, const double* restrict sumsq, double* restrict deltamean, size_t d_length, size_t w_length) {
187 |   size_t i;
188 |   double sum1, sum2, mean1, mean2;
189 | 
190 |   // Set boundaries to 0.
191 |   for (i = 0; i < w_length; ++i) {
192 |     deltamean[i] = 0;
193 |     deltamean[d_length - i - 1] = 0;
194 |   }
195 | 
196 |   // compute deltamean for non-boundary data
197 |   for (i = w_length; i <= d_length - w_length; ++i) {
198 |     sum1 = sum[i - 1];
199 |     if (i > w_length) {
200 |       sum1 -= sum[i - w_length - 1];
201 |     }
202 |     sum2 = sum[i + w_length - 1] - sum[i - 1];
203 |     mean1 = sum1 / w_length;
204 |     mean2 = sum2 / w_length;
205 | 
206 |     
207 |     // assume variance of 1.0 - approximately correct and avoids extra division
208 |     {
209 | 	const double delta = mean2 - mean1;
210 |     deltamean[i] = fabs(delta);
211 | 	}
212 |   }
213 | }
214 | 
215 | 
216 | MODULE_API void short_long_peak_detector(DetectorPtr short_detector, DetectorPtr long_detector, const double peak_height, size_t * peaks){
217 |   size_t i, k;
218 |   size_t peak_count = 0;
219 |   DetectorPtr detector;
220 |   DetectorPtr detectors[2] = {short_detector, long_detector};
221 |   double current_value;
222 |   
223 |   assert(short_detector->signal_length == long_detector->signal_length);
224 |   assert(NULL!=peaks);
225 |  
226 | 
227 |   for(i=0; i<short_detector->signal_length; i++){
228 |     for(k=0; k<2; k++){
229 |       detector = detectors[k];
230 |       //Carry on if we've been masked out
231 |       if (detector->masked_to >= i){
232 |         continue;
233 |       }
234 | 
235 |       current_value = detector->signal[i];
236 | 
237 |       if (detector->peak_pos == detector->DEF_PEAK_POS){
238 |         //CASE 1: We've not yet recorded a maximum
239 |         if (current_value < detector->peak_value){
240 |           //Either record a deeper minimum...
241 |           detector->peak_value = current_value;
242 |         }
243 |         else if (current_value - detector->peak_value > peak_height){
244 |           // ...or we've seen a qualifying maximum
245 |           detector->peak_value = current_value;
246 |           detector->peak_pos = i;
247 |           //otherwise, wait to rise high enough to be considered a peak
248 |         }
249 |       }
250 |       else {
251 |         //CASE 2: In an existing peak, waiting to see if it is good
252 |         if (current_value > detector->peak_value){
253 |           //Update the peak
254 |           detector->peak_value = current_value;
255 |           detector->peak_pos = i;
256 |         }
257 | 
258 |         //Dominate other tstat signals if we're going to fire at some point
259 |         if (detector == short_detector){
260 |           if (detector->peak_value > detector->threshold){
261 |             long_detector->masked_to = detector->peak_pos + detector->window_length;
262 |             long_detector->peak_pos = long_detector->DEF_PEAK_POS;
263 |             long_detector->peak_value = long_detector->DEF_PEAK_VAL;
264 |             long_detector->valid_peak = false;
265 |           }
266 |         }
267 | 
268 |         //Have we convinced ourselves we've seen a peak
269 |         if (detector->peak_value - current_value > peak_height && detector->peak_value > detector->threshold){
270 |           detector->valid_peak = true;
271 |         }
272 | 
273 |         //Finally, check the distance if this is a good peak
274 |         if (detector->valid_peak && (i - detector->peak_pos) > detector->window_length / 2){
275 |           //Emit the boundary and reset
276 |           peaks[peak_count] = detector->peak_pos;
277 |           peak_count++;
278 |           detector->peak_pos = detector->DEF_PEAK_POS;
279 |           detector->peak_value = current_value;
280 |           detector->valid_peak = false;
281 |         }
282 |       }
283 |     }
284 |   }
285 | }
286 | 
287 | 
288 | 
289 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/eventdetection/filters.h:
--------------------------------------------------------------------------------
 1 | #ifndef FILTERS_H
 2 | #define FILTERS_H
 3 | 
 4 | #include <stdlib.h>
 5 | #include <stdint.h>
 6 | 
 7 | #if defined(_MSC_VER)
 8 | #   define false   0
 9 | #   define true    1
10 | #   define bool int
11 | #   define _Bool int
12 | #   define fmax max
13 | #   define fmin min
14 | #else
15 | #   include <stdbool.h>
16 | #endif
17 | 
18 | 
19 | 
20 | typedef struct {
21 |  int DEF_PEAK_POS;
22 |  double DEF_PEAK_VAL;
23 |  double * signal;
24 |  size_t signal_length;
25 |  double threshold;
26 |  size_t window_length;
27 |  size_t masked_to;
28 |  int peak_pos;
29 |  double peak_value;
30 |  _Bool valid_peak;
31 | } Detector;
32 | typedef Detector * DetectorPtr;
33 | 
34 | 
35 | MODULE_API void short_long_peak_detector(
36 |   DetectorPtr short_detector,
37 |   DetectorPtr long_detector,
38 |   const double peak_height, 
39 |   size_t * peaks);
40 | 
41 | 
42 | #endif /* FILTERS_H */
43 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/include/module.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | #ifdef _WIN32
 6 | #  ifdef MODULE_API_EXPORTS
 7 | #    define MODULE_API __declspec(dllexport)
 8 | #    define restrict __restrict
 9 | #  else
10 | #    define MODULE_API __declspec(dllimport)
11 | #  endif
12 | #else
13 | #  define MODULE_API
14 | #endif
15 | 
16 | MODULE_API int module_init();
17 | 
18 | #ifdef __cplusplus
19 | }
20 | #endif
21 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/jobqueue.py:
--------------------------------------------------------------------------------
  1 | from uuid import uuid4
  2 | from time import sleep
  3 | import os
  4 | from multiprocessing import Process
  5 | import Queue
  6 | from functools import partial
  7 | 
  8 | from myriad.components import MyriadServer
  9 | from myriad.managers import make_client
 10 | 
 11 | from RUBRIC.nanonet import stderr_redirected
 12 | 
 13 | __timeout__ = 0.5
 14 | __worker_startup_sleep__ = 2
 15 | 
 16 | class JobQueue(object):
 17 | 
 18 |     def __init__(self, jobs, functors):
 19 |         """A simple job queue which can be processed by various functors.
 20 | 
 21 |         :param jobs: iterable of job items.
 22 |         :param functions: tuples of the form (function, n_items), if n_itmes
 23 |             is None then the function should accept a single job items and
 24 |             process it to produce a single result. if n_items >= 2, then
 25 |             function should process a list of items, returning a list of
 26 |             results.
 27 |         """
 28 |         self.jobs = jobs
 29 |         self.functors = functors
 30 | 
 31 |     def __iter__(self):
 32 |         self.start_server()
 33 |         workers = [Process(target=partial(worker, f[0], f[1], self.port, self.authkey)) for f in self.functors]
 34 | 
 35 |         try:
 36 |             for w in workers:
 37 |                 w.start()
 38 | 
 39 |             for result in self.server.imap_unordered(self.jobs, timeout=__timeout__):
 40 |                 yield result
 41 | 
 42 |             for w in workers:
 43 |                 w.terminate()
 44 |         except KeyboardInterrupt:
 45 |             for w in workers:
 46 |                 w.terminate()
 47 |             self.server.manager.join()
 48 |             self.server.manager.shutdown()
 49 | 
 50 |     def start_server(self, ports=(5000,6000)):
 51 |         self.authkey = str(uuid4())
 52 | 
 53 |         server = None
 54 |         for port in xrange(*ports):
 55 |             try:
 56 |                 with stderr_redirected(os.devnull):
 57 |                     server = MyriadServer(None, port, self.authkey)
 58 |             except EOFError:
 59 |                 pass
 60 |             else:
 61 |                 break
 62 |         if server is None:
 63 |             raise RuntimeError("Could not start myriad server.")
 64 | 
 65 |         self.server = server
 66 |         self.port = port
 67 | 
 68 | 
 69 | # On *nix the following could be part of the class above, but not on windows:
 70 | #    https://docs.python.org/2/library/multiprocessing.html#windows
 71 | 
 72 | def worker(function, take_n, port, authkey, timeout=__timeout__):
 73 |     """Worker function for JobQueue. Dispatches to singleton_worker or
 74 |     multi_worker as appropriate.
 75 | 
 76 |     :param function: function to apply in job items.
 77 |     :param take_n: number of items to process, should be None or >=2. Special
 78 |         case of None indicates function takes a single item to produce a single
 79 |         result.
 80 |     """
 81 |     sleep(__worker_startup_sleep__) # nasty, allows all workers to come up before iteration begins
 82 |     manager = make_client('localhost', port, authkey)
 83 |     job_q = manager.get_job_q()
 84 |     job_q_closed = manager.q_closed()
 85 |     result_q = manager.get_result_q()
 86 | 
 87 |     if take_n is None:
 88 |         _singleton_worker(function, job_q, job_q_closed, result_q, timeout=timeout)
 89 |     else:
 90 |         _multi_worker(function, take_n, job_q, job_q_closed, result_q, timeout=timeout)
 91 | 
 92 | 
 93 | def _singleton_worker(function, job_q, job_q_closed, result_q, timeout=__timeout__):
 94 |     while True:
 95 |         try:
 96 |             job = job_q.get_nowait()
 97 |             result = function(job)
 98 |             result_q.put(result)
 99 |         except Queue.Empty:
100 |             if job_q_closed._getvalue().value:
101 |                 break
102 |         sleep(timeout)
103 | 
104 | 
105 | def _multi_worker(function, take_n, job_q, job_q_closed, result_q, timeout=__timeout__):
106 |     while True:
107 |         jobs = []
108 |         try:
109 |             for _ in xrange(take_n):
110 |                 job = job_q.get_nowait()
111 |                 jobs.append(job)
112 |         except Queue.Empty:
113 |             if job_q_closed._getvalue().value:
114 |                 break
115 |         else:
116 |             for i, res in enumerate(function(jobs)):
117 |                 result_q.put(res)
118 |         sleep(timeout)
119 |     if len(jobs) > 0:
120 |         for i, res in enumerate(function(jobs)):
121 |             result_q.put(res)
122 | 
123 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/latency_test/latency_test.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append("..")
  3 | from RUBRIC.read_until import ReadUntil
  4 | import time
  5 | import errno
  6 | from socket import error as socket_error
  7 | import csv
  8 | 
  9 | class MessageStats:
 10 |     def __init__(self, pre_encode_time, received_time, total_channel_count, total_event_count):
 11 |         self.pre_encode_time = pre_encode_time
 12 |         self.received_time = received_time
 13 |         self.total_channel_count = total_channel_count
 14 |         self.total_event_count = total_event_count
 15 |         self.time_diff = received_time - pre_encode_time
 16 |         self.events_per_channel = total_event_count / total_channel_count
 17 | 
 18 |     def __repr__(self):
 19 |         return "MessageStats(pre_encode_time={0}, received_time={1}, total_channel_count={2}, total_event_count={3})".format(
 20 |                 self.pre_encode_time,
 21 |                 self.received_time,
 22 |                 self.total_channel_count,
 23 |                 self.total_event_count)
 24 | 
 25 | class LatencyTestReadUntil(ReadUntil):
 26 | 
 27 |     def __init__(self, **kwargs):
 28 |         self.message_stats = []
 29 |         super(LatencyTestReadUntil, self).__init__(**kwargs)
 30 | 
 31 |     def received_server_message(self, msg):
 32 |         if super(LatencyTestReadUntil, self).received_server_message(msg):
 33 |             print "Messages received: ", len(self.message_stats)
 34 |             self.message_stats.append(MessageStats(msg.pre_encode_time,
 35 |                                       int(time.time()),
 36 |                                       len(msg.channels_update),
 37 |                                       sum([len(d.events) for d in msg.channels_update.values()])))
 38 | 
 39 | 
 40 | class RunningState:
 41 |     def __init__(self):
 42 |         self.keep_running=True
 43 | 
 44 |     def closed(self, *args):
 45 |         self.keep_running=False
 46 | 
 47 | def run_latency_test():
 48 |     """Runs ReadUntil with particular setup conditions for a given duration,
 49 |     then moves onto running another set of setup conditions for a given
 50 |     duration. All the time accumulates statistics about each message coming
 51 |     back. When the test finishes, when all configurations have been run, the
 52 |     statistics are written to csv files ready for analysis/plotting."""
 53 |     host = "ws://localhost:9200"
 54 | 
 55 |     
 56 |     time_and_setup_conditions = [
 57 |         (120, {"ignore_first_events": 0, "padding_length_events": 0, "events_length": 100, "repetitions": 1}),
 58 |         (120, {"ignore_first_events": 0, "padding_length_events": 0, "events_length": 200, "repetitions": 1}),
 59 |         (120, {"ignore_first_events": 0, "padding_length_events": 0, "events_length": 500, "repetitions": 1}),
 60 |         (120, {"ignore_first_events": 0, "padding_length_events": 0, "events_length": 800, "repetitions": 1}),
 61 |         (120, {"ignore_first_events": 0, "padding_length_events": 0, "events_length": 1000, "repetitions": 1}),
 62 |         (240, {"ignore_first_events": 0, "padding_length_events": 0, "events_length": 1200, "repetitions": 1}),
 63 |     ]
 64 |     time_and_setup_iter = iter(time_and_setup_conditions)
 65 |     total_run_time = sum([x[0] for x in time_and_setup_conditions])
 66 | 
 67 |     state=RunningState()
 68 |     duration, setup_conditions = time_and_setup_iter.next()
 69 |     with LatencyTestReadUntil(host=host,
 70 |                               setup_conditions=setup_conditions,
 71 |                               connection_closed=state.closed) as my_client:
 72 |         # Start sending stuff to our analyser
 73 |         my_client.start()
 74 |         change_time = time.time() + duration
 75 |         print "Client connection started. Will run for {0} seconds".format(total_run_time)
 76 |         while state.keep_running:
 77 |             time_now = time.time()
 78 |             if (time_now > change_time):
 79 |                 try:
 80 |                     duration, setup_conditions = time_and_setup_iter.next()
 81 |                 except StopIteration:
 82 |                     my_client.stop()
 83 |                     break
 84 |                 print "Changing to new conditions:", setup_conditions
 85 |                 my_client.update_conditions(setup_conditions)
 86 |                 change_time = time_now + duration
 87 |         make_report(my_client.message_stats)
 88 | 
 89 | def make_report(ms):
 90 |     series={}
 91 |     for m in ms:
 92 |         series.setdefault(m.events_per_channel, []).append((m.time_diff, m.total_event_count))
 93 |     for s, rows in series.items():
 94 |         f = open("{0}_events_per_channel.csv".format(s), "ab")
 95 |         wr = csv.writer(f, delimiter=' ')
 96 |         for row in rows:
 97 |             wr.writerow(row)
 98 | 
 99 |  
100 | if __name__ == "__main__":
101 |     try:
102 |         run_latency_test()
103 |     except socket_error as serr:
104 |         if serr.errno != errno.ECONNREFUSED:
105 |              raise serr
106 |         print "Server not started?"
107 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/latency_test/run_gnuplot_on_csv_files.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import glob
 3 | import sys
 4 | 
 5 | files=sorted(glob.glob("*_events_per_channel.csv"),
 6 |              cmp=lambda x, y: cmp(int(x.split("_")[0]), int(y.split("_")[0]))) 
 7 | 
 8 | gnuplot = subprocess.Popen(["gnuplot"], stdin=subprocess.PIPE)
 9 | 
10 | def plot_args(f):
11 |     return "\"{0}\" using 2:1 title '{0}' with points".format(f)
12 | 
13 | title = "Time taken in seconds to send different sized messages"
14 | gnuplot.stdin.write("set term dumb 72 40\n")
15 | gnuplot.stdin.write("set xlabel 'message size/events'\n")
16 | gnuplot.stdin.write("set ylabel 'time/s'\n")
17 | gnuplot.stdin.write("set title '{0}'\n".format(title))
18 | gnuplot.stdin.write("plot ")
19 | gnuplot.stdin.write(", \\\n".join([plot_args(f) for f in files]))
20 | gnuplot.stdin.write("\n")
21 | gnuplot.stdin.write("ex\n")
22 | gnuplot.stdin.flush()
23 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/nanonettrain.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | import json
  5 | import os
  6 | import sys
  7 | import pkg_resources
  8 | import tempfile
  9 | import numpy as np
 10 | 
 11 | from RUBRIC.nanonet import run_currennt_noisy
 12 | from RUBRIC.nanonet.cmdargs import FileExist, AutoBool
 13 | from RUBRIC.nanonet import iterate_fast5
 14 | from RUBRIC.nanonet import make_currennt_training_input_multi
 15 | from RUBRIC.nanonet import random_string, conf_line, tang_imap
 16 | from RUBRIC.nanonet import network_to_numpy
 17 | 
 18 | 
 19 | def get_parser():
 20 |     parser = argparse.ArgumentParser(
 21 |         description="A simple ANN training wrapper.",
 22 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter
 23 |     )
 24 |     
 25 |     parser.add_argument("--train", action=FileExist,
 26 |         help="Input training data, either a path to fast5 files or a single netcdf file", required=True)
 27 |     parser.add_argument("--train_list", action=FileExist, default=None,
 28 |         help="Strand list constaining training set")
 29 |     parser.add_argument("--section", default='template', choices=('template', 'complement'),
 30 |         help="Section of reads to train")
 31 |     
 32 |     parser.add_argument("--val", action=FileExist,
 33 |         help="Input validation data, either a path to fast5 files or a single netcdf file", required=True)
 34 |     parser.add_argument("--val_list", action=FileExist, default=None,
 35 |         help="Strand list constaining validation set")
 36 |     parser.add_argument("--workspace", default=tempfile.gettempdir(),
 37 |         help="Path for storing training and validation NetCDF files, if not specified a temporary file is used.")
 38 |     
 39 |     parser.add_argument("--output", help="Output prefix", required=True)
 40 | 
 41 |     parser.add_argument("--model", action=FileExist,
 42 |         default=pkg_resources.resource_filename('nanonet', 'data/default_model.tmpl'),
 43 |         help="ANN configuration file")
 44 |     parser.add_argument("--kmer_length", type=int, default=5,
 45 |         help="Length of kmers to learn.")
 46 |     parser.add_argument("--bases", type=str, default='ACGT',
 47 |         help="Alphabet of kmers to learn.")
 48 | 
 49 |     parser.add_argument("--device", type=int, default=0,
 50 |         help="ID of CUDA device to use.")
 51 |     parser.add_argument("--cuda", default=False, action=AutoBool,
 52 |         help="Use CUDA acceleration.")
 53 |     parser.add_argument("--window", type=int, nargs='+', default=[-1, 0, 1],
 54 |         help="The detailed list of the entire window.")
 55 |     
 56 |     training_parameter_group = parser.add_argument_group("Training Parameters.")
 57 |     training_parameter_group.add_argument("--max_epochs", type=int, default=500,
 58 |         help="Max training epocs, default 500")
 59 |     training_parameter_group.add_argument("--max_epochs_no_best", type=int, default=50,
 60 |         help="Stop training when no improvment for number of epocs, default 50" )
 61 |     training_parameter_group.add_argument("--validate_every", type=int, default=5,
 62 |         help="Run validation data set every number of epocs.")
 63 |     training_parameter_group.add_argument("--parallel_sequences", type=int, default=125,
 64 |         help="Number of sequences in a min-batch")
 65 |     training_parameter_group.add_argument("--learning_rate", type=float, default=1e-5,
 66 |         help="Learning rate parameters of SGD." )
 67 |     training_parameter_group.add_argument("--momentum", type=float, default=0.9,
 68 |         help="Momentum parameter of SGD." )
 69 |     training_parameter_group.add_argument("--cache_path", default=tempfile.gettempdir(),
 70 |         help="Path for currennt temporary files.")
 71 | 
 72 |     return parser
 73 | 
 74 | 
 75 | def prepare_input_file(in_out, **kwargs):
 76 |     path, in_list, output = in_out 
 77 | 
 78 |     print "Creating training data NetCDF: {}".format(output)
 79 |     fast5_files = list(iterate_fast5(path, paths=True, strand_list=in_list))
 80 |     return make_currennt_training_input_multi(
 81 |         fast5_files=fast5_files, 
 82 |         netcdf_file=output,
 83 |         **kwargs
 84 |     )
 85 | 
 86 | 
 87 | def main():
 88 |     if len(sys.argv) == 1: 
 89 |         sys.argv.append("-h")
 90 |     args = get_parser().parse_args()
 91 | 
 92 |     if not args.cuda:
 93 |         args.nseqs = 1
 94 |    
 95 |     if not os.path.exists(args.workspace):
 96 |         os.makedirs(args.workspace)
 97 |  
 98 |     # file names for training
 99 |     tag = random_string()
100 |     modelfile  = os.path.abspath(args.model)
101 |     outputfile = os.path.abspath(args.output)
102 |     temp_name = os.path.abspath(os.path.join(
103 |         args.workspace, 'nn_data_{}_'.format(tag)
104 |     ))
105 |     config_name = os.path.abspath(os.path.join(
106 |         args.workspace, 'nn_{}.cfg'.format(tag)
107 |     ))
108 |     
109 |     # Create currennt training input files
110 |     trainfile = '{}{}'.format(temp_name, 'train.netcdf') 
111 |     valfile = '{}{}'.format(temp_name, 'validation.netcdf')
112 |     inputs = (
113 |         (args.train, args.train_list, trainfile),
114 |         (args.val, args.val_list, valfile),
115 |     )
116 |     fix_kwargs = {
117 |         'window':args.window,
118 |         'kmer_len':args.kmer_length,
119 |         'alphabet':args.bases,
120 |         'callback_kwargs':{'section':args.section, 'kmer_len':args.kmer_length}
121 |     }
122 |     for results in tang_imap(prepare_input_file, inputs, fix_kwargs=fix_kwargs, threads=2):
123 |         n_chunks, n_features, out_kmers = results
124 |         if n_chunks == 0:
125 |             raise RuntimeError("No training data written.")
126 | 
127 | 
128 |     # fill-in templated items in model
129 |     n_states = len(out_kmers)
130 |     with open(modelfile, 'r') as model:
131 |         mod = model.read()
132 |     mod = mod.replace('<section>', args.section)
133 |     mod = mod.replace('<n_features>', str(n_features))
134 |     mod = mod.replace('<n_states>', str(n_states))
135 |     try:
136 |         mod_meta = json.loads(mod)['meta']
137 |     except Exception as e:
138 |         mod_meta = dict()
139 |     mod_meta['n_features'] = n_features
140 |     mod_meta['kmers'] = out_kmers
141 |     mod_meta['window'] = args.window
142 | 
143 |     modelfile = os.path.abspath(os.path.join(
144 |         args.workspace, 'input_model.jsn'
145 |     ))
146 |     with open(modelfile, 'w') as model:
147 |         model.write(mod)
148 |     final_network = "{}_final.jsn".format(outputfile)
149 |     best_network_prefix = "{}_auto".format(outputfile)
150 |     # currennt appends some bits here
151 | 
152 |     # currennt cfg files
153 |     with open(config_name, 'w') as currennt_cfg:
154 |         if not args.cuda:
155 |             currennt_cfg.write(conf_line('cuda', 'false'))
156 |         # IO
157 |         currennt_cfg.write(conf_line("cache_path", args.cache_path))
158 |         currennt_cfg.write(conf_line("network", modelfile))
159 |         currennt_cfg.write(conf_line("train_file", trainfile))
160 |         currennt_cfg.write(conf_line("val_file", valfile))
161 |         currennt_cfg.write(conf_line("save_network", final_network))
162 |         currennt_cfg.write(conf_line("autosave_prefix", best_network_prefix))
163 |         # Tunable parameters
164 |         currennt_cfg.write(conf_line("max_epochs", args.max_epochs))
165 |         currennt_cfg.write(conf_line("max_epochs_no_best", args.max_epochs_no_best))
166 |         currennt_cfg.write(conf_line("validate_every", args.validate_every))
167 |         currennt_cfg.write(conf_line("parallel_sequences", args.parallel_sequences))
168 |         currennt_cfg.write(conf_line("learning_rate", args.learning_rate))
169 |         currennt_cfg.write(conf_line("momentum", args.momentum))
170 |         # Fixed parameters
171 |         currennt_cfg.write(conf_line("train", "true"))
172 |         currennt_cfg.write(conf_line("weights_dist", "normal"))
173 |         currennt_cfg.write(conf_line("weights_normal_sigma", "0.1"))
174 |         currennt_cfg.write(conf_line("weights_normal_mean", "0"))
175 |         currennt_cfg.write(conf_line("stochastic", "true"))
176 |         currennt_cfg.write(conf_line("input_noise_sigma", "0.0"))
177 |         currennt_cfg.write(conf_line("shuffle_fractions", "false"))
178 |         currennt_cfg.write(conf_line("shuffle_sequences", "true"))
179 |         currennt_cfg.write(conf_line("autosave_best", "true"))
180 |     
181 |     # run currennt
182 |     print "\n\nRunning currennt with: {}".format(config_name)
183 |     run_currennt_noisy(config_name, device=args.device)
184 | 
185 |     # Currennt won't pass through our meta in the model, amend the output
186 |     # and write out a numpy version of the network
187 |     best_network = "{}.best.jsn".format(best_network_prefix)
188 |     best_network_numpy = "{}_best.npy".format(outputfile)
189 | 
190 |     print "Adding model meta to currennt best network: {}".format(best_network)
191 |     mod = json.load(open(best_network, 'r'))
192 |     mod['meta'] = mod_meta
193 |     json.dump(mod, open(best_network, 'w'))
194 |     print "Transforming network to numpy pickle: {}".format(best_network_numpy)
195 |     mod = network_to_numpy(mod)
196 |     np.save(best_network_numpy, mod)
197 |         
198 | 
199 | 
200 | if __name__ == '__main__':
201 |     main() 
202 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/resolve.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | from datetime import datetime
 3 | import glob
 4 | import os
 5 | import re
 6 | import sys
 7 | 
 8 | reads_dir = sys.argv[1]
 9 | 
10 | sampler_reads = {str(n+1): {} for n in range(512)}
11 | with open('read_log.csv', 'rb') as log_file:
12 |     reader = csv.reader(log_file)
13 |     reader.next() # skip header
14 |     i = 0
15 |     for row in reader:
16 |         [date_str, channel_name, read_number, delay] = row
17 |         date = datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S.%f')
18 |         sampler_reads[channel_name][read_number] = date
19 | 
20 | with open('missing_files.csv', 'wb') as missing_f, open('matched_files.csv', 'wb') as matched_f:
21 |     missing = csv.writer(missing_f)
22 |     missing.writerow(["Channel", "Read", "File"])
23 |     matched = csv.writer(matched_f)
24 |     matched.writerow(["Channel", "Read", "Time from seen to written", "File"])
25 | 
26 |     regex = re.compile('.*_ch([0-9]+)_read([0-9]+)_.*\\.fast5')
27 |     for (dirpath, dirnames, filenames) in os.walk(reads_dir):
28 |         for filename in filenames:
29 |             match = regex.match(filename)
30 |             if match:
31 |                 channel_name = match.group(1)
32 |                 read_number = match.group(2)
33 |                 pretty_path = os.path.join(os.path.basename(dirpath), filename)
34 |                 try:
35 |                     date = sampler_reads[channel_name][read_number]
36 |                 except KeyError:
37 |                     missing.writerow([channel_name, read_number, pretty_path])
38 |                 else:
39 |                     path = os.path.join(dirpath, filename)
40 |                     last_mod = datetime.fromtimestamp(os.path.getmtime(path))
41 |                     matched.writerow([channel_name, read_number, str(last_mod - date), pretty_path])
42 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/sample_data/904896_ch170_read104_strand.fast5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/RUBRIC/2895b15ded82a85142f4d68e4feb5d5526aebb09/RUBRIC/nanonet/sample_data/904896_ch170_read104_strand.fast5


--------------------------------------------------------------------------------
/RUBRIC/nanonet/sample_data/904896_ch170_read105_strand.fast5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/RUBRIC/2895b15ded82a85142f4d68e4feb5d5526aebb09/RUBRIC/nanonet/sample_data/904896_ch170_read105_strand.fast5


--------------------------------------------------------------------------------
/RUBRIC/nanonet/sample_data/904896_ch170_read108_strand.fast5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/RUBRIC/2895b15ded82a85142f4d68e4feb5d5526aebb09/RUBRIC/nanonet/sample_data/904896_ch170_read108_strand.fast5


--------------------------------------------------------------------------------
/RUBRIC/nanonet/sample_data/904896_ch170_read111_strand.fast5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/RUBRIC/2895b15ded82a85142f4d68e4feb5d5526aebb09/RUBRIC/nanonet/sample_data/904896_ch170_read111_strand.fast5


--------------------------------------------------------------------------------
/RUBRIC/nanonet/sample_data/904896_ch170_read114_strand.fast5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/RUBRIC/2895b15ded82a85142f4d68e4feb5d5526aebb09/RUBRIC/nanonet/sample_data/904896_ch170_read114_strand.fast5


--------------------------------------------------------------------------------
/RUBRIC/nanonet/test/test_nn.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | from RUBRIC import nanonet as nn
  4 | import numpy as np
  5 | 
  6 | class ANNTest(unittest.TestCase):
  7 |     @classmethod
  8 |     def setUpClass(self):
  9 |         print '* ANN'
 10 |         np.random.seed(0xdeadbeef)
 11 |         self.prec = 6
 12 |         self._NSTEP = 10
 13 |         self._NFEATURES = 4
 14 |         self._SIZE = 5
 15 | 
 16 |         self.W = np.random.normal(size=(self._NFEATURES, self._SIZE))
 17 |         self.b = np.random.normal(size=self._SIZE)
 18 |         self.x = np.random.normal(size=(self._NSTEP, self._NFEATURES))
 19 |         self.res = (self.x.dot(self.W) + self.b).astype(nn.dtype)
 20 | 
 21 |     def test_000_single_layer_linear(self):
 22 |         network = nn.FeedForward(self.W, self.b, nn.linear)
 23 |         self.assertEqual(network.in_size, self._NFEATURES)
 24 |         self.assertEqual(network.out_size, self._SIZE)
 25 |         np.testing.assert_array_almost_equal(network.run(self.x), self.res)
 26 | 
 27 |     def test_001_single_layer_tanh(self):
 28 |         network = nn.FeedForward(self.W, self.b, nn.tanh)
 29 |         self.assertEqual(network.in_size, self._NFEATURES)
 30 |         self.assertEqual(network.out_size, self._SIZE)
 31 |         np.testing.assert_array_almost_equal(network.run(self.x), np.tanh(self.res))
 32 | 
 33 |     def test_002_parallel_layers(self):
 34 |         l1 = nn.FeedForward(self.W, self.b, nn.tanh)
 35 |         l2 = nn.FeedForward(self.W, self.b, nn.tanh)
 36 |         network = nn.Parallel([l1, l2])
 37 |         self.assertEqual(network.in_size, self._NFEATURES)
 38 |         self.assertEqual(network.out_size, 2 * self._SIZE)
 39 | 
 40 |         res = network.run(self.x)
 41 |         np.testing.assert_array_equal(res[:,:self._SIZE], res[:,self._SIZE:])
 42 | 
 43 |     def test_003_simple_serial(self):
 44 |         W2 = np.random.normal(size=(self._SIZE, self._SIZE))
 45 |         res = self.x.dot(self.W).dot(W2)
 46 | 
 47 |         l1 = nn.FeedForward(self.W, fun=nn.linear)
 48 |         l2 = nn.FeedForward(W2, fun=nn.linear)
 49 |         network = nn.Serial([l1, l2])
 50 |         self.assertEqual(network.in_size, self._NFEATURES)
 51 |         self.assertEqual(network.out_size, self._SIZE)
 52 | 
 53 |         np.testing.assert_array_almost_equal(network.run(self.x), res)
 54 | 
 55 |     def test_004_reverse(self):
 56 |         network1 = nn.FeedForward(self.W, self.b, nn.tanh)
 57 |         res1 = network1.run(self.x)
 58 |         network2 = nn.Reverse(network1)
 59 |         res2 = network2.run(self.x)
 60 |         self.assertEqual(network1.in_size, network2.in_size,)
 61 |         self.assertEqual(network1.out_size, network2.out_size)
 62 | 
 63 |         np.testing.assert_array_equal(res1, res2, self.prec)
 64 | 
 65 |     def test_005_poormans_birnn(self):
 66 |         layer1 = nn.FeedForward(self.W, self.b, nn.tanh)
 67 |         layer2 = nn.FeedForward(self.W, self.b, nn.tanh)
 68 |         network = nn.BiRNN(layer1, layer2)
 69 | 
 70 |         res = network.run(self.x)
 71 |         np.testing.assert_array_equal(res[:,:self._SIZE], res[:,self._SIZE:], self.prec)
 72 | 
 73 |     def test_006_softmax(self):
 74 |         network = nn.SoftMax(self.W, self.b)
 75 | 
 76 |         res = network.run(self.x)
 77 |         res_sum = res.sum(axis=1)
 78 |         self.assertTrue(np.allclose(res_sum, 1.0))
 79 | 
 80 |     def test_007_rnn_no_state(self):
 81 |         W1 = np.vstack((np.zeros((self._SIZE, self._SIZE)), self.W))
 82 |         network = nn.SimpleRNN(W1, b=self.b, fun=nn.linear)
 83 | 
 84 |         res = network.run(self.x)
 85 |         np.testing.assert_almost_equal(res, self.res, self.prec)
 86 | 
 87 |     def test_008_rnn_no_input(self):
 88 |         W1 = np.random.normal(size=(self._SIZE, self._SIZE))
 89 |         W2 = np.vstack((W1, np.zeros((self._NFEATURES, self._SIZE))))
 90 |         network = nn.SimpleRNN(W2, fun=nn.linear)
 91 | 
 92 |         res = network.run(self.x)
 93 |         np.testing.assert_almost_equal(res, 0.0, self.prec)
 94 | 
 95 |     def test_009_rnn_no_input_with_bias(self):
 96 |         W1 = np.random.normal(size=(self._SIZE, self._SIZE))
 97 |         W2 = np.vstack((W1, np.zeros((self._NFEATURES, self._SIZE))))
 98 |         network = nn.SimpleRNN(W2, b=self.b, fun=nn.linear)
 99 | 
100 |         res = network.run(self.x)
101 |         res2 = np.zeros(self._SIZE, dtype=nn.dtype)
102 |         for i in xrange(self._NSTEP):
103 |             res2 = res2.dot(W1) + self.b
104 |             np.testing.assert_allclose(res[i], res2, self.prec)
105 | 
106 |     def test_010_birnn_no_input_with_bias(self):
107 |         W1 = np.random.normal(size=(self._SIZE, self._SIZE))
108 |         W2 = np.vstack((W1, np.zeros((self._NFEATURES, self._SIZE))))
109 |         layer1 = nn.SimpleRNN(W2, b=self.b)
110 |         layer2 = nn.SimpleRNN(W2, b=self.b)
111 |         network = nn.BiRNN(layer1, layer2)
112 | 
113 |         res = network.run(self.x)
114 |         np.testing.assert_almost_equal(res[:,:self._SIZE], res[::-1,self._SIZE:], self.prec)
115 | 


--------------------------------------------------------------------------------
/RUBRIC/nanonet/watcher.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from multiprocessing import Process, Queue
 3 | 
 4 | try:
 5 |     from watchdog.observers import Observer
 6 |     from watchdog.events import RegexMatchingEventHandler
 7 | except ImportError:
 8 |     raise ImportError('Nanonet component error: cannot import optional watchdog module. Install with pip.')
 9 | 
10 | 
11 | class Fast5Watcher(object):
12 | 
13 |     def __init__(self, path, timeout=10, regex='.*\.fast5$', initial_jobs=None):
14 |         """Watch a path and yield modified files
15 | 
16 |         :param path: path to watch for files.
17 |         :param timeout: timeout period for newly modified files.
18 |         :param regex: regex filter for files to consifer.
19 |         :param initial_jobs: pre-existing files to process.
20 |         """
21 |         self.path = path
22 |         self.timeout = timeout
23 |         self.regex = regex
24 |         self.initial_jobs = initial_jobs
25 |         self.q = Queue()
26 |         self.watcher = Process(target=self._watcher)
27 |         self.yielded = set()
28 | 
29 |     def _watcher(self):
30 |         handler = RegexMatchingEventHandler(regexes=[self.regex], ignore_directories=True)
31 |         handler.on_modified = lambda x: self.q.put(x.src_path)
32 |         observer = Observer()
33 |         observer.schedule(handler, self.path)
34 |         observer.start()
35 |         try:
36 |             while True:
37 |                 time.sleep(1)
38 |         except KeyboardInterrupt:
39 |             observer.stop()
40 |         observer.join()
41 | 
42 |     def __iter__(self):
43 |         self.watcher.start()
44 | 
45 |         if self.initial_jobs is not None:
46 |             for item in self.initial_jobs:
47 |                 if item not in self.yielded:
48 |                     yield item
49 |                     self.yielded.add(item)
50 | 
51 |         while True:
52 |             try:
53 |                 item = self.q.get(True, self.timeout)
54 |             except:
55 |                 break
56 |             else:
57 |                 if item not in self.yielded:
58 |                     yield item
59 |                     self.yielded.add(item)
60 |         self.watcher.terminate()
61 | 


--------------------------------------------------------------------------------
/RUBRIC/setup_nanonet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import platform
  3 | import re
  4 | import sys
  5 | 
  6 | import numpy
  7 | from setuptools import setup, find_packages, Extension
  8 | 
  9 | print """
 10 | *******************************************************************
 11 | This Source Code Form is subject to the terms of the Mozilla Public
 12 | License, v. 2.0. If a copy of the MPL was not distributed with this
 13 | file, You can obtain one at http://mozilla.org/MPL/2.0/.
 14 | 
 15 | (c) 2016 Oxford Nanopore Technologies Ltd.
 16 | *******************************************************************
 17 | """
 18 | 
 19 | # Get the version number from __init__.py
 20 | pkg_name = 'nanonet'
 21 | pkg_path = os.path.join(os.path.dirname(__file__), pkg_name)
 22 | verstrline = open(os.path.join(pkg_path, '__init__.py'), 'r').read()
 23 | vsre = r"^__version__ = ['\"]([^'\"]*)['\"]"
 24 | mo = re.search(vsre, verstrline, re.M)
 25 | if mo:
 26 |     version = mo.group(1)
 27 | else:
 28 |     raise RuntimeError('Unable to find version string in "{}/__init__.py".'.format(pkg_name))
 29 | 
 30 | system = platform.system()
 31 | print "System is {}".format(system)
 32 | print "By default the 2D basecaller (standard and OpenCL) are not built."
 33 | print "To enable these use 'with2d' and 'opencl2d' command line options."
 34 | print
 35 | 
 36 | with_2d = True if 'with2d' in sys.argv else False
 37 | if with_2d:
 38 |     sys.argv.remove('with2d')
 39 | 
 40 | opencl_2d = True if 'opencl2d' in sys.argv else False
 41 | if opencl_2d:
 42 |     with_2d = True
 43 |     sys.argv.remove('opencl2d')
 44 | 
 45 | mingw = True if "mingw" in sys.argv else False
 46 | if mingw:
 47 |     sys.argv.remove('mingw')
 48 |     # patch distutils to force our compiler class. With distutils build
 49 |     #   command we can use the commandline option to set compiler but
 50 |     #   develop command does accept this. C++ extensions also aren't
 51 |     #   recognised as being unchanged meaning they get built twice with
 52 |     #       build --compiler=mingw32 develop
 53 |     #   all very annoying.
 54 |     import distutils.cygwinccompiler
 55 |     from nanoccompiler import Mingw64CCompiler
 56 | 
 57 |     distutils.cygwinccompiler.Mingw32CCompiler = Mingw64CCompiler
 58 |     distutils.ccompiler.get_default_compiler = lambda x: 'mingw32'
 59 | 
 60 | main_include = os.path.join(os.path.dirname(__file__), 'nanonet', 'include')
 61 | include_dirs = [main_include]
 62 | event_detect_include = []
 63 | boost_inc = []
 64 | boost_lib_path = []
 65 | boost_libs = []
 66 | opencl_include = []
 67 | opencl_lib_path = []
 68 | opencl_libs = []
 69 | 
 70 | c_compile_args = ['-pedantic', '-Wall', '-std=c99']
 71 | cpp_compile_args = []
 72 | optimisation = ['-DNDEBUG']
 73 | 
 74 | if system == 'Darwin':
 75 |     print "Adding OSX compile/link options"
 76 |     optimisation.extend(['-O3', '-fstrict-aliasing'])
 77 |     cpp_compile_args.extend(['-std=c++0x', '-Wno-unused-local-typedefs'])
 78 |     # may wish to edit - required for 2D
 79 |     boost_inc = ['/opt/local/include/']
 80 |     boost_libs.append('boost_python-mt')
 81 | elif system == 'Windows':
 82 |     event_detect_include.append(os.path.join(pkg_path, 'eventdetection'))
 83 |     if not mingw:
 84 |         print "Adding windows (MSVC) compile/link options"
 85 |         optimisation = ['/O2', '/Gs-']
 86 |         c_compile_args = ['/wd4820']
 87 |         cpp_compile_args.extend(['/EHsc', '/wd4996'])
 88 |         include_dirs.append(os.path.join(main_include, 'extras'))
 89 |         boost_location = os.path.join('c:', os.sep, 'local', 'boost_1_55_0')
 90 |         boost_lib_name = 'lib64-msvc-9.0'
 91 |         if opencl_2d:
 92 |             raise NotImplementedError('OpenCL 2D caller not currently supported on Windows with MSVC.')
 93 |     else:
 94 |         print "Adding windows (mingw64) compile/link options"
 95 |         optimisation.extend(['-O3', '-fstrict-aliasing'])
 96 |         c_compile_args.extend(['-DMS_WIN64', '-D_hypot=hypot'])
 97 |         cpp_compile_args.extend(['-DMS_WIN64', '-D_hypot=hypot', '-Wno-unused-local-typedefs'])
 98 |         boost_location = os.environ.get(
 99 |             'BOOST_ROOT', os.path.join('c:', os.sep, 'local', 'boost_1_55_0'))
100 |         boost_lib_name = os.environ.get(
101 |             'BOOST_LIB', os.path.join('stage', 'lib'))
102 |         boost_libs.append(
103 |             os.environ.get('BOOST_PYTHON', 'boost_python-mgw48-mt-1_55'))
104 |         # may wish to edit - required for OpenCL 2D, this will compile
105 |         #   but likely die at runtime.
106 |         if opencl_2d:
107 |             raise NotImplementedError('OpenCL 2D caller not currently supported on Windows with mingw64.')
108 |         # nvidia_opencl = os.path.join('c:', os.sep,
109 |         #    'Program Files', 'NVIDIA GPU Computing Toolkit', 'CUDA', 'v7.5')
110 |         # opencl_include = [os.environ.get('OPENCL_INC', os.path.join(nvidia_opencl, 'include'))]
111 |         # opencl_lib_path = [os.environ.get('OPENCL_LIB', os.path.join(nvidia_opencl, 'lib', 'x64'))]
112 |         # opencl_libs.append('OpenCL')
113 |     boost_lib_path = [os.path.join(boost_location, boost_lib_name)]
114 |     boost_inc = [boost_location]
115 | else:
116 |     print "Adding Linux(?) compile/link options"
117 |     optimisation.extend(['-O3', '-fstrict-aliasing'])
118 |     cpp_compile_args.extend(['-std=c++0x', '-Wno-unused-local-typedefs'])
119 |     boost_libs.append('boost_python')
120 |     # may wish to edit - required for OpenCL 2D
121 |     opencl_include = [os.environ.get('OPENCL_INC')]
122 |     opencl_lib_path = [os.environ.get('OPENCL_LIB', os.path.join(os.sep, 'opt', 'intel', 'opencl'))]
123 |     opencl_libs.append('OpenCL')
124 | c_compile_args.extend(optimisation)
125 | cpp_compile_args.extend(optimisation)
126 | 
127 | extensions = []
128 | 
129 | extensions.append(Extension(
130 |     'nanonetfilters',
131 |     sources=[os.path.join(pkg_path, 'eventdetection', 'filters.c')],
132 |     include_dirs=include_dirs + event_detect_include,
133 |     extra_compile_args=c_compile_args
134 | ))
135 | 
136 | extensions.append(Extension(
137 |     'nanonetdecode',
138 |     sources=[os.path.join(pkg_path, 'decoding.cpp')],
139 |     include_dirs=include_dirs,
140 |     extra_compile_args=cpp_compile_args
141 | ))
142 | 
143 | if with_2d:
144 |     caller_2d_path = os.path.join('nanonet', 'caller_2d')
145 |     extensions.append(Extension(
146 |         'nanonet.caller_2d.viterbi_2d.viterbi_2d',
147 |         include_dirs=[os.path.join(caller_2d_path, x) for x in
148 |                       ('viterbi_2d', 'common')] +
149 |                      [numpy.get_include()] + boost_inc + include_dirs,
150 |         sources=[os.path.join(caller_2d_path, 'viterbi_2d', x) for x in
151 |                  ('viterbi_2d_py.cpp', 'viterbi_2d.cpp')],
152 |         depends=[os.path.join(caller_2d_path, x) for x in
153 |                  ('viterbi_2d_py.h', 'viterbi_2d.h')] +
154 |                 [os.path.join(caller_2d_path, 'common', x) for x in
155 |                  ('bp_tools.h', 'data_view.h', 'utils.h', 'view_numpy_arrays.h')],
156 |         extra_compile_args=cpp_compile_args,
157 |         library_dirs=boost_lib_path,
158 |         libraries=boost_libs
159 |     ))
160 | 
161 |     extensions.append(Extension(
162 |         'nanonet.caller_2d.pair_align.pair_align',
163 |         include_dirs=[os.path.join(caller_2d_path, 'pair_align')] +
164 |                      boost_inc + include_dirs,
165 |         sources=[os.path.join(caller_2d_path, 'pair_align', x) for x in
166 |                  ('pair_align_py.cpp', 'nw_align.cpp', 'mm_align.cpp')],
167 |         depends=[os.path.join(caller_2d_path, 'pair_align', x) for x in
168 |                  ('pair_align_py.h', 'pair_align.h', 'nw_align.h', 'mm_align.h')],
169 |         extra_compile_args=cpp_compile_args,
170 |         library_dirs=boost_lib_path,
171 |         libraries=boost_libs
172 |     ))
173 | 
174 |     extensions.append(Extension(
175 |         'nanonet.caller_2d.common.stub',
176 |         include_dirs=[os.path.join(caller_2d_path, 'common')] +
177 |                      [numpy.get_include()] + boost_inc + include_dirs,
178 |         sources=[os.path.join(caller_2d_path, 'common', 'stub_py.cpp')],
179 |         depends=[os.path.join(caller_2d_path, 'common', x) for x in
180 |                  ('bp_tools.h', 'data_view.h', 'utils.h', 'view_numpy_arrays.h')],
181 |         extra_compile_args=cpp_compile_args,
182 |         library_dirs=boost_lib_path,
183 |         libraries=boost_libs
184 |     ))
185 | 
186 | if opencl_2d:
187 |     print "Setting up OpenCL 2D basecall extension, this may need some tinkering"
188 |     extensions.append(Extension(
189 |         'nanonet.caller_2d.viterbi_2d_ocl.viterbi_2d_ocl',
190 |         include_dirs=[os.path.join(caller_2d_path, x) for x in
191 |                       ('viterbi_2d_ocl', 'common')] +
192 |                      [numpy.get_include()] + boost_inc + include_dirs + opencl_include,
193 |         sources=[os.path.join(caller_2d_path, 'viterbi_2d_ocl', x) for x in
194 |                  ('viterbi_2d_ocl_py.cpp', 'viterbi_2d_ocl.cpp', 'proxyCL.cpp')],
195 |         depends=[os.path.join(caller_2d_path, 'viterbi_2d_ocl', x) for x in
196 |                  ('viterbi_2d_ocl.py.h', 'viterbi_2d_ocl.h', 'proxyCL.h')] +
197 |                 [os.path.join(caller_2d_path, 'common', x) for x in
198 |                  ('bp_tools.h', 'data_view.h', 'utils.h', 'view_numpy_arrays.h')],
199 |         extra_compile_args=cpp_compile_args,
200 |         library_dirs=boost_lib_path + opencl_lib_path,
201 |         libraries=boost_libs + opencl_libs
202 |     ))
203 | 
204 | requires = [
205 |     'h5py',
206 |     'myriad >=0.1.2',
207 |     'numpy',
208 | ]
209 | extra_requires = {
210 |     'currennt': ['netCDF4'],
211 |     'watcher': ['watchdog'],
212 |     'opencl': ['pyopencl'],
213 |     'simulate': ['biopython'],
214 | }
215 | 
216 | # Making a whl for windows
217 | bdist_args = dict()
218 | if system == 'Windows' and "bdist_wheel" in sys.argv:
219 |     from setuptools import Distribution
220 |     from distutils.spawn import find_executable
221 |     from glob import glob
222 | 
223 | 
224 |     class BinaryDistribution(Distribution):
225 |         def is_pure(self):
226 |             return False
227 | 
228 |         def has_ext_modules(self):
229 |             return True
230 | 
231 | 
232 |     blibs = [os.path.join(boost_location, boost_lib_name, 'lib{}.dll'.format(x)) for x in boost_libs]
233 |     mingwdir = os.path.dirname(find_executable('gcc'))
234 |     mingwlibs = glob(os.path.join(mingwdir, '*.dll'))
235 |     mingwlibs = [os.path.join(mingwdir, x) for x in mingwlibs]
236 |     dlls = [os.path.relpath(x) for x in blibs + mingwlibs]
237 |     bdist_args = {
238 |         'scripts': dlls,
239 |         'distclass': BinaryDistribution
240 |     }
241 | 
242 | setup(
243 |     name='nanonet',
244 |     version=version,
245 |     description='A simple recurrent neural network based basecaller nanopore data.',
246 |     maintainer='Chris Wright',
247 |     maintainer_email='chris.wright@nanoporetech.com',
248 |     url='http://www.nanoporetech.com',
249 |     packages=find_packages(exclude=["*.test", "*.test.*", "test.*", "test"]),
250 |     package_data={'nanonet.data': ['nanonet/data/*']},
251 |     include_package_data=True,
252 |     tests_require=requires,
253 |     install_requires=requires,
254 |     extras_require=extra_requires,
255 |     dependency_links=[],
256 |     zip_safe=True,
257 |     ext_modules=extensions,
258 |     test_suite='discover_tests',
259 |     entry_points={
260 |         'console_scripts': [
261 |             'nanonetcall = nanonet.nanonetcall:main',
262 |             'nanonet2d = nanonet.nanonetcall_2d:main',
263 |             'nanonettrain = nanonet.nanonettrain:main',
264 |             'simulate_minion = nanonet.simulate.simulate_minion:main',
265 |         ]
266 |     },
267 |     **bdist_args
268 | )
269 | 
270 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from setuptools import setup
 4 | 
 5 | setup(
 6 |     name='RUBRIC',
 7 |     version='1.0',
 8 |     packages=['RUBRIC'],
 9 |     url='https://github.com/ragak/RUBRIC',
10 |     license='MPL 2.0',
11 |     author='Harrison Edwards, Raga Krishnakumar, and Michael Bartsch',
12 |     author_email='harrison.edwards@mail.utoronto.ca',
13 |     description='Read-Until with Basecalling and Reference-Informed Criteria (RUBRIC)',
14 |     install_requires=['configargparse', 'h5py', 'numpy', 'termcolor', 'thrift==0.9.2', 'ws4py', 'biopython', 'psutil']
15 | )
16 | 
17 | # some extra lines to run the nanonet setup file...
18 | cwd = os.getcwd()
19 | cmd_string = os.path.join(cwd, 'RUBRIC', 'setup_nanonet.py')
20 | cmd_string = 'python ' + cmd_string + ' develop'
21 | os.system(cmd_string)
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------