├── .gitignore ├── LICENSE.md ├── Makefile ├── README.md ├── docs ├── conf.py └── index.rst ├── read_until.gif ├── read_until ├── __init__.py ├── base.py ├── identification.py ├── jsonrpc.py └── simple.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyd 2 | *.pyc 3 | *.so 4 | *# 5 | *~ 6 | *.swp 7 | .#* 8 | *.egg-info 9 | build 10 | dist 11 | deb_dist 12 | tmp/ 13 | *.gz 14 | *__pycache__* 15 | *.fna 16 | 17 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | This Source Code Form is subject to the terms of the Mozilla Public 2 | License, v. 2.0. If a copy of the MPL was not distributed with this 3 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | 5 | (c) 2016 Oxford Nanopore Technologies Ltd. 6 | 7 | 8 | Mozilla Public License Version 2.0 9 | ================================== 10 | 11 | ### 1. Definitions 12 | 13 | **1.1. “Contributor”** 14 | means each individual or legal entity that creates, contributes to 15 | the creation of, or owns Covered Software. 16 | 17 | **1.2. “Contributor Version”** 18 | means the combination of the Contributions of others (if any) used 19 | by a Contributor and that particular Contributor's Contribution. 20 | 21 | **1.3. “Contribution”** 22 | means Covered Software of a particular Contributor. 23 | 24 | **1.4. “Covered Software”** 25 | means Source Code Form to which the initial Contributor has attached 26 | the notice in Exhibit A, the Executable Form of such Source Code 27 | Form, and Modifications of such Source Code Form, in each case 28 | including portions thereof. 29 | 30 | **1.5. “Incompatible With Secondary Licenses”** 31 | means 32 | 33 | * **(a)** that the initial Contributor has attached the notice described 34 | in Exhibit B to the Covered Software; or 35 | * **(b)** that the Covered Software was made available under the terms of 36 | version 1.1 or earlier of the License, but not also under the 37 | terms of a Secondary License. 38 | 39 | **1.6. “Executable Form”** 40 | means any form of the work other than Source Code Form. 41 | 42 | **1.7. “Larger Work”** 43 | means a work that combines Covered Software with other material, in 44 | a separate file or files, that is not Covered Software. 45 | 46 | **1.8. “License”** 47 | means this document. 48 | 49 | **1.9. “Licensable”** 50 | means having the right to grant, to the maximum extent possible, 51 | whether at the time of the initial grant or subsequently, any and 52 | all of the rights conveyed by this License. 53 | 54 | **1.10. “Modifications”** 55 | means any of the following: 56 | 57 | * **(a)** any file in Source Code Form that results from an addition to, 58 | deletion from, or modification of the contents of Covered 59 | Software; or 60 | * **(b)** any new file in Source Code Form that contains any Covered 61 | Software. 62 | 63 | **1.11. “Patent Claims” of a Contributor** 64 | means any patent claim(s), including without limitation, method, 65 | process, and apparatus claims, in any patent Licensable by such 66 | Contributor that would be infringed, but for the grant of the 67 | License, by the making, using, selling, offering for sale, having 68 | made, import, or transfer of either its Contributions or its 69 | Contributor Version. 70 | 71 | **1.12. “Secondary License”** 72 | means either the GNU General Public License, Version 2.0, the GNU 73 | Lesser General Public License, Version 2.1, the GNU Affero General 74 | Public License, Version 3.0, or any later versions of those 75 | licenses. 76 | 77 | **1.13. “Source Code Form”** 78 | means the form of the work preferred for making modifications. 79 | 80 | **1.14. “You” (or “Your”)** 81 | means an individual or a legal entity exercising rights under this 82 | License. For legal entities, “You” includes any entity that 83 | controls, is controlled by, or is under common control with You. For 84 | purposes of this definition, “control” means **(a)** the power, direct 85 | or indirect, to cause the direction or management of such entity, 86 | whether by contract or otherwise, or **(b)** ownership of more than 87 | fifty percent (50%) of the outstanding shares or beneficial 88 | ownership of such entity. 89 | 90 | 91 | ### 2. License Grants and Conditions 92 | 93 | #### 2.1. Grants 94 | 95 | Each Contributor hereby grants You a world-wide, royalty-free, 96 | non-exclusive license: 97 | 98 | * **(a)** under intellectual property rights (other than patent or trademark) 99 | Licensable by such Contributor to use, reproduce, make available, 100 | modify, display, perform, distribute, and otherwise exploit its 101 | Contributions, either on an unmodified basis, with Modifications, or 102 | as part of a Larger Work; and 103 | * **(b)** under Patent Claims of such Contributor to make, use, sell, offer 104 | for sale, have made, import, and otherwise transfer either its 105 | Contributions or its Contributor Version. 106 | 107 | #### 2.2. Effective Date 108 | 109 | The licenses granted in Section 2.1 with respect to any Contribution 110 | become effective for each Contribution on the date the Contributor first 111 | distributes such Contribution. 112 | 113 | #### 2.3. Limitations on Grant Scope 114 | 115 | The licenses granted in this Section 2 are the only rights granted under 116 | this License. No additional rights or licenses will be implied from the 117 | distribution or licensing of Covered Software under this License. 118 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 119 | Contributor: 120 | 121 | * **(a)** for any code that a Contributor has removed from Covered Software; 122 | or 123 | * **(b)** for infringements caused by: **(i)** Your and any other third party's 124 | modifications of Covered Software, or **(ii)** the combination of its 125 | Contributions with other software (except as part of its Contributor 126 | Version); or 127 | * **(c)** under Patent Claims infringed by Covered Software in the absence of 128 | its Contributions. 129 | 130 | This License does not grant any rights in the trademarks, service marks, 131 | or logos of any Contributor (except as may be necessary to comply with 132 | the notice requirements in Section 3.4). 133 | 134 | #### 2.4. Subsequent Licenses 135 | 136 | No Contributor makes additional grants as a result of Your choice to 137 | distribute the Covered Software under a subsequent version of this 138 | License (see Section 10.2) or under the terms of a Secondary License (if 139 | permitted under the terms of Section 3.3). 140 | 141 | #### 2.5. Representation 142 | 143 | Each Contributor represents that the Contributor believes its 144 | Contributions are its original creation(s) or it has sufficient rights 145 | to grant the rights to its Contributions conveyed by this License. 146 | 147 | #### 2.6. Fair Use 148 | 149 | This License is not intended to limit any rights You have under 150 | applicable copyright doctrines of fair use, fair dealing, or other 151 | equivalents. 152 | 153 | #### 2.7. Conditions 154 | 155 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 156 | in Section 2.1. 157 | 158 | 159 | ### 3. Responsibilities 160 | 161 | #### 3.1. Distribution of Source Form 162 | 163 | All distribution of Covered Software in Source Code Form, including any 164 | Modifications that You create or to which You contribute, must be under 165 | the terms of this License. You must inform recipients that the Source 166 | Code Form of the Covered Software is governed by the terms of this 167 | License, and how they can obtain a copy of this License. You may not 168 | attempt to alter or restrict the recipients' rights in the Source Code 169 | Form. 170 | 171 | #### 3.2. Distribution of Executable Form 172 | 173 | If You distribute Covered Software in Executable Form then: 174 | 175 | * **(a)** such Covered Software must also be made available in Source Code 176 | Form, as described in Section 3.1, and You must inform recipients of 177 | the Executable Form how they can obtain a copy of such Source Code 178 | Form by reasonable means in a timely manner, at a charge no more 179 | than the cost of distribution to the recipient; and 180 | 181 | * **(b)** You may distribute such Executable Form under the terms of this 182 | License, or sublicense it under different terms, provided that the 183 | license for the Executable Form does not attempt to limit or alter 184 | the recipients' rights in the Source Code Form under this License. 185 | 186 | #### 3.3. Distribution of a Larger Work 187 | 188 | You may create and distribute a Larger Work under terms of Your choice, 189 | provided that You also comply with the requirements of this License for 190 | the Covered Software. If the Larger Work is a combination of Covered 191 | Software with a work governed by one or more Secondary Licenses, and the 192 | Covered Software is not Incompatible With Secondary Licenses, this 193 | License permits You to additionally distribute such Covered Software 194 | under the terms of such Secondary License(s), so that the recipient of 195 | the Larger Work may, at their option, further distribute the Covered 196 | Software under the terms of either this License or such Secondary 197 | License(s). 198 | 199 | #### 3.4. Notices 200 | 201 | You may not remove or alter the substance of any license notices 202 | (including copyright notices, patent notices, disclaimers of warranty, 203 | or limitations of liability) contained within the Source Code Form of 204 | the Covered Software, except that You may alter any license notices to 205 | the extent required to remedy known factual inaccuracies. 206 | 207 | #### 3.5. Application of Additional Terms 208 | 209 | You may choose to offer, and to charge a fee for, warranty, support, 210 | indemnity or liability obligations to one or more recipients of Covered 211 | Software. However, You may do so only on Your own behalf, and not on 212 | behalf of any Contributor. You must make it absolutely clear that any 213 | such warranty, support, indemnity, or liability obligation is offered by 214 | You alone, and You hereby agree to indemnify every Contributor for any 215 | liability incurred by such Contributor as a result of warranty, support, 216 | indemnity or liability terms You offer. You may include additional 217 | disclaimers of warranty and limitations of liability specific to any 218 | jurisdiction. 219 | 220 | 221 | ### 4. Inability to Comply Due to Statute or Regulation 222 | 223 | If it is impossible for You to comply with any of the terms of this 224 | License with respect to some or all of the Covered Software due to 225 | statute, judicial order, or regulation then You must: **(a)** comply with 226 | the terms of this License to the maximum extent possible; and **(b)** 227 | describe the limitations and the code they affect. Such description must 228 | be placed in a text file included with all distributions of the Covered 229 | Software under this License. Except to the extent prohibited by statute 230 | or regulation, such description must be sufficiently detailed for a 231 | recipient of ordinary skill to be able to understand it. 232 | 233 | 234 | ### 5. Termination 235 | 236 | **5.1.** The rights granted under this License will terminate automatically 237 | if You fail to comply with any of its terms. However, if You become 238 | compliant, then the rights granted under this License from a particular 239 | Contributor are reinstated **(a)** provisionally, unless and until such 240 | Contributor explicitly and finally terminates Your grants, and **(b)** on an 241 | ongoing basis, if such Contributor fails to notify You of the 242 | non-compliance by some reasonable means prior to 60 days after You have 243 | come back into compliance. Moreover, Your grants from a particular 244 | Contributor are reinstated on an ongoing basis if such Contributor 245 | notifies You of the non-compliance by some reasonable means, this is the 246 | first time You have received notice of non-compliance with this License 247 | from such Contributor, and You become compliant prior to 30 days after 248 | Your receipt of the notice. 249 | 250 | **5.2.** If You initiate litigation against any entity by asserting a patent 251 | infringement claim (excluding declaratory judgment actions, 252 | counter-claims, and cross-claims) alleging that a Contributor Version 253 | directly or indirectly infringes any patent, then the rights granted to 254 | You by any and all Contributors for the Covered Software under Section 255 | 2.1 of this License shall terminate. 256 | 257 | **5.3.** In the event of termination under Sections 5.1 or 5.2 above, all 258 | end user license agreements (excluding distributors and resellers) which 259 | have been validly granted by You or Your distributors under this License 260 | prior to termination shall survive termination. 261 | 262 | 263 | ### 6. Disclaimer of Warranty 264 | 265 | > Covered Software is provided under this License on an “as is” 266 | > basis, without warranty of any kind, either expressed, implied, or 267 | > statutory, including, without limitation, warranties that the 268 | > Covered Software is free of defects, merchantable, fit for a 269 | > particular purpose or non-infringing. The entire risk as to the 270 | > quality and performance of the Covered Software is with You. 271 | > Should any Covered Software prove defective in any respect, You 272 | > (not any Contributor) assume the cost of any necessary servicing, 273 | > repair, or correction. This disclaimer of warranty constitutes an 274 | > essential part of this License. No use of any Covered Software is 275 | > authorized under this License except under this disclaimer. 276 | 277 | ### 7. Limitation of Liability 278 | 279 | > Under no circumstances and under no legal theory, whether tort 280 | > (including negligence), contract, or otherwise, shall any 281 | > Contributor, or anyone who distributes Covered Software as 282 | > permitted above, be liable to You for any direct, indirect, 283 | > special, incidental, or consequential damages of any character 284 | > including, without limitation, damages for lost profits, loss of 285 | > goodwill, work stoppage, computer failure or malfunction, or any 286 | > and all other commercial damages or losses, even if such party 287 | > shall have been informed of the possibility of such damages. This 288 | > limitation of liability shall not apply to liability for death or 289 | > personal injury resulting from such party's negligence to the 290 | > extent applicable law prohibits such limitation. Some 291 | > jurisdictions do not allow the exclusion or limitation of 292 | > incidental or consequential damages, so this exclusion and 293 | > limitation may not apply to You. 294 | 295 | 296 | ### 8. Litigation 297 | 298 | Any litigation relating to this License may be brought only in the 299 | courts of a jurisdiction where the defendant maintains its principal 300 | place of business and such litigation shall be governed by laws of that 301 | jurisdiction, without reference to its conflict-of-law provisions. 302 | Nothing in this Section shall prevent a party's ability to bring 303 | cross-claims or counter-claims. 304 | 305 | 306 | ### 9. Miscellaneous 307 | 308 | This License represents the complete agreement concerning the subject 309 | matter hereof. If any provision of this License is held to be 310 | unenforceable, such provision shall be reformed only to the extent 311 | necessary to make it enforceable. Any law or regulation which provides 312 | that the language of a contract shall be construed against the drafter 313 | shall not be used to construe this License against a Contributor. 314 | 315 | 316 | ### 10. Versions of the License 317 | 318 | #### 10.1. New Versions 319 | 320 | Mozilla Foundation is the license steward. Except as provided in Section 321 | 10.3, no one other than the license steward has the right to modify or 322 | publish new versions of this License. Each version will be given a 323 | distinguishing version number. 324 | 325 | #### 10.2. Effect of New Versions 326 | 327 | You may distribute the Covered Software under the terms of the version 328 | of the License under which You originally received the Covered Software, 329 | or under the terms of any subsequent version published by the license 330 | steward. 331 | 332 | #### 10.3. Modified Versions 333 | 334 | If you create software not governed by this License, and you want to 335 | create a new license for such software, you may create and use a 336 | modified version of this License if you rename the license and remove 337 | any references to the name of the license steward (except to note that 338 | such modified license differs from this License). 339 | 340 | #### 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses 341 | 342 | If You choose to distribute Source Code Form that is Incompatible With 343 | Secondary Licenses under the terms of this version of the License, the 344 | notice described in Exhibit B of this License must be attached. 345 | 346 | ## Exhibit A - Source Code Form License Notice 347 | 348 | This Source Code Form is subject to the terms of the Mozilla Public 349 | License, v. 2.0. If a copy of the MPL was not distributed with this 350 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 351 | 352 | If it is not possible or desirable to put the notice in a particular 353 | file, then You may include the notice in a location (such as a LICENSE 354 | file in a relevant directory) where a recipient would be likely to look 355 | for such a notice. 356 | 357 | You may add additional accurate notices of copyright ownership. 358 | 359 | ## Exhibit B - “Incompatible With Secondary Licenses” Notice 360 | 361 | This Source Code Form is "Incompatible With Secondary Licenses", as 362 | defined by the Mozilla Public License, v. 2.0. 363 | 364 | 365 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install docs 2 | 3 | 4 | venv: venv/bin/activate 5 | IN_VENV=. ./venv/bin/activate 6 | 7 | venv/bin/activate: 8 | test -d venv || virtualenv venv --python=python3 9 | ${IN_VENV} && pip install pip --upgrade 10 | ${IN_VENV} && pip install numpy # needs to get done before other things 11 | ${IN_VENV} && pip install -r requirements.txt 12 | 13 | 14 | # You can set these variables from the command line. 15 | SPHINXOPTS = 16 | SPHINXBUILD = sphinx-build 17 | PAPER = 18 | BUILDDIR = _build 19 | 20 | # Internal variables. 21 | PAPEROPT_a4 = -D latex_paper_size=a4 22 | PAPEROPT_letter = -D latex_paper_size=letter 23 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 24 | 25 | DOCSRC = docs 26 | 27 | docs: venv 28 | ${IN_VENV} && pip install sphinx sphinx_rtd_theme sphinx-argparse 29 | ${IN_VENV} && cd $(DOCSRC) && $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 30 | @echo 31 | @echo "Build finished. The HTML pages are in $(DOCSRC)/$(BUILDDIR)/html." 32 | touch $(DOCSRC)/$(BUILDDIR)/html/.nojekyll 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Read Until 2 | ========== 3 | 4 | Adaptive sampling enables a large number of applications, traditionally associated with complex molecular biology methods, to be carried out by the sequencer itself. Adaptive sampling enables the following: 5 | 6 | **Enrichment**: Users can ask the system to enrich for strands that contain a target region of interest, a haplotype of choice or an organism of interest against a complex background 7 | 8 | **Depletion**: Users can reject strands from an organism which is of no interest (e.g. host depletion). In the case of pathogen detection or microbiome applications in human health this could be enabled as a "human filter" ensuring that this sensitive, confidential data is never committed to disk. 9 | 10 | **Balancing**: Users can use adaptive sampling to balance their barcodes, ensuring they achieve target depths for each barcode and also even out coverage across a genome by rejecting strands representing regions of the genome already at their target depth in favour of regions that have lower coverage. 11 | 12 | The read until API is provided "as is" as a research tool. Issue reporting has been disabled on the github website; users with questions should go to the Nanopore community and post comments [here](https://community.nanoporetech.com/posts/adaptive-sampling-release). Usage currently requires some advanced programming capability. Efforts are ongoing by the Oxford Nanopore team to release simpler versions of this tool enabling more and more users to deploy it successfully. 13 | 14 | Please add new feature requests to the feature request pinboard under the tag "Adaptive Sampling" (link) 15 | 16 | The Read Until API provides a mechanism for an application to connect to a 17 | MinKNOW server to obtain read data in real-time. The data can be analysed in the 18 | way most fit for purpose, and a return call can be made to the server to unblock 19 | the read in progress. 20 | 21 | 22 | ![Read Until Example](https://github.com/nanoporetech/read_until_api/raw/dev/read_until.gif) 23 | 24 | Installation 25 | ------------ 26 | 27 | The client requires MinKNOW for MinION 20.06 or later (MinKNOW-Core 4.04). 28 | 29 | The package can be installed into a python3 virtual environment For example on Ubuntu: 30 | 31 | ```bash 32 | python3 -m venv read_until_env 33 | source read_until_env/bin/activate 34 | pip install --upgrade pip 35 | # Install from github: 36 | pip install git+https://github.com/nanoporetech/read_until_api 37 | # Or from a local clone 38 | python setup.py install 39 | ``` 40 | 41 | Two demonstration programs are provided (and are installed into 42 | MinKNOW/ont-python/bin/): 43 | 44 | 1. `read_until_simple`: this serves as a simple test, and the code 45 | (module `read_until.simple`) demonstrates use of basic functionality 46 | for developers. 47 | 2. `read_until_ident`: this is a rather more fully featured example, using 48 | the API to identify reads via basecalling and alignment. To run it 49 | requires the optional dependencies of scrappy and mappy. To use the 50 | `scrappy` basecaller efficiently it is important to set 51 | the blas library to be single threaded, this is ordinarily done with: 52 | 53 | export OPENBLAS_NUM_THREADS=1 54 | 55 | or similar. 56 | 57 | 58 | Client Overview 59 | --------------- 60 | 61 | The python Read Until package provides a high level interface to requisite parts 62 | of MinKNOW's [gRPC](https://grpc.io/) interface. Developer's can focus on 63 | creating rich analyses, rather than the lower level details of handling the data 64 | that MinKNOW provides. The purpose of the read until functionality is to 65 | selectively, based on any conceiveable analysis, "unblock" sequencing channels 66 | to increases the time spent sequencing analytes of interest. MinKNOW can be 67 | requested to send a continuous stream of "read chunks" (of a configurable 68 | minimum size), which the client can analyse. 69 | 70 | The main client code is located in the `read_until.base.ReadUntilClient` class, 71 | which can be imported as simply: 72 | 73 | from read_until import ReadUntilClient 74 | 75 | The interface to this class is thoroughly documented, with additional comments 76 | throughout for developers who wish to develop their own custom client from the 77 | gRPC stream. Developers are encouraged to read the code and inline documentation 78 | (a HTML version of which can be built using the `docs` make target). 79 | 80 | The gRPC stream managed by the client is bidirectional: it carries both raw data 81 | "read chunks" to the client and "action responses" to MinKNOW. The client 82 | implements two queues. The first is the `.action_queue` and is fairly 83 | straight-forward: requests to MinKNOW to unblock channels are temporarily stored 84 | here, bundled together and then dispatched. 85 | 86 | The second queue is more elaborate, it is implemented in 87 | `read_until.base.ReadCache`. The client stores read chunks here in preparation 88 | for analysis. The queue is additionally keyed on channel such that it only ever 89 | stores a single chunk from each sequencer channel; thereby protecting consumers 90 | of the client from reads which have already ended. A restriction of this 91 | approach is that consumers cannot combine data from multiple chunks of the same 92 | read. If this behaviour is required, a client can be constructed with an 93 | alternative implementation of a `ReadCache` (passed as a parameter on 94 | construction of the `ReadUntilClient` instance). However since the effectiveness 95 | of a read until application depends crucially on the latency of analysis, it is 96 | recommended to design analyses which require as little data as possible and set 97 | the received chunk size accordingly. 98 | 99 | For many developers the details of these queues may be unimportant, at least in 100 | getting started. Of more immediate importance are several methods of the 101 | `ReadUntilClient` class: 102 | 103 | *`.run()`* 104 | instruct the class to start retrieving read chunks from MinKNOW. 105 | 106 | *`.get_read_chunks()`* 107 | obtain the most recent data retrieved from MinKNOW. 108 | 109 | *`.unblock_read()`* 110 | request that a read be ejected from a channel. 111 | 112 | *`.stop_recieving_read()`* 113 | request that no more data for a read be sent to the client by MinKNOW. It is not 114 | guaranteed that further data will not be sent, and in the general case the 115 | client does not filter subsequent data from its consumers (although when the 116 | client is created with the `one_chunk` option, the client will provide 117 | additional filtering of the data received from MinKNOW). 118 | 119 | Examples of use of the client are given in the codebase, but most simply can be 120 | reduced to: 121 | 122 | from concurrent.futures import ThreadPoolExecutor 123 | import numpy 124 | from read_until import ReadUntilClient 125 | 126 | def analysis(client, *args, **kwargs): 127 | while client.is_running: 128 | for channel, read in client.get_read_chunks(): 129 | raw_data = numpy.fromstring(read.raw_data, client.signal_dtype) 130 | # do something with raw data... and maybe call: 131 | # client.stop_receiving_read(channel, read.number) 132 | # client.unblock_read(channel, read.number) 133 | 134 | read_until_client = ReadUntilClient() 135 | read_until_client.run() 136 | with ThreadPoolExecutor() as executor: 137 | executor.submit(analysis, read_until_client) 138 | 139 | 140 | Extending the client 141 | -------------------- 142 | 143 | The `ReadUntilClient` class has been implemented to provide an abstraction which 144 | does not require an in-depth knowledge of the MinKNOW gRPC interface. To extend 145 | the client however some knowledge of the messages passed between MinKNOW and a 146 | client is required. Whilst the provided client shows how to contruct and decode 147 | basic messages, the following (an extract from Protocol Buffers definition 148 | files) serves as a more complete reference. 149 | 150 | **Messages sent from a client to MinKNOW** 151 | 152 | message GetLiveReadsRequest { 153 | enum RawDataType { 154 | // Don't change the previously specified setting for raw data sent 155 | // with live reads note: If sent when there is no last setting, NONE 156 | // is assumed. 157 | KEEP_LAST = 0; 158 | // No raw data required for live reads 159 | NONE = 1; 160 | // Calibrated raw data should be sent to the user with each read 161 | CALIBRATED = 2; 162 | // Uncalibrated data should be sent to the user with each read 163 | UNCALIBRATED = 3; 164 | } 165 | 166 | message UnblockAction { 167 | // Duration of unblock in seconds. 168 | double duration = 1; 169 | } 170 | 171 | message StopFurtherData {} 172 | 173 | message Action { 174 | string action_id = 1; 175 | 176 | // Channel name to unblock 177 | uint32 channel = 2; 178 | 179 | // Identifier for the read to act on. If the read requested is no 180 | // longer in progress, the action fails. 181 | oneof read { string id = 3; uint32 number = 4; } 182 | 183 | oneof action { 184 | // Unblock a read and skip further data from this read. 185 | UnblockAction unblock = 5; 186 | 187 | // Skip further data from this read, doesn't affect the read 188 | // data. 189 | StopFurtherData stop_further_data = 6; 190 | } 191 | } 192 | 193 | message StreamSetup { 194 | // The first channel (inclusive) for which to return data. Note 195 | // that channel numbering starts at 1. 196 | uint32 first_channel = 1; 197 | 198 | // The last channel (inclusive) for which to return data. 199 | uint32 last_channel = 2; 200 | 201 | // Specify the type of raw data to retrieve 202 | RawDataType raw_data_type = 3; 203 | 204 | // Minimum chunk size read data is returned in. 205 | uint64 sample_minimum_chunk_size = 4; 206 | } 207 | 208 | message Actions { repeated Action actions = 2; } 209 | 210 | oneof request { 211 | // Read setup request, initialises channel numbers and type of data 212 | // returned. Must be specified in the first message sent to MinKNOW. 213 | // Once MinKNOW has the first setup message reads are sent to the 214 | // caller as requested. The user can then resend a setup message as 215 | // frequently as they need to in order to reconfigure live reads - 216 | // for example by changing if raw data is sent with reads or not. 217 | StreamSetup setup = 1; 218 | 219 | // Actions to take given data returned to the user - can only be 220 | // sent once the setup message above has been sent. 221 | Actions actions = 2; 222 | } 223 | } 224 | 225 | 226 | **Messages received by a client from MinKNOW** 227 | 228 | message GetLiveReadsResponse { 229 | message ReadData { 230 | // The id of this read, this id is unique for every read ever 231 | // produced. 232 | string id = 1; 233 | 234 | // The MinKNOW assigned number of this read. Read numbers always 235 | // increment throughout the experiment, and are unique per channel, 236 | // however they are not necessarily contiguous. 237 | uint32 number = 2; 238 | 239 | // Absolute start point of this read 240 | uint64 start_sample = 3; 241 | 242 | // Absolute start point through the experiment of this chunk 243 | uint64 chunk_start_sample = 4; 244 | 245 | // Length of the chunk in samples 246 | uint64 chunk_length = 5; 247 | 248 | // All Classifications given to intermediate chunks by analysis 249 | repeated int32 chunk_classifications = 6; 250 | 251 | // Any raw data selected by the request. The type of the elements 252 | // will depend on whether calibrated data was chosen. The 253 | // get_data_types() RPC call should be used to determine the 254 | // precise format of the data, but in general terms, uncalibrated 255 | // data will be signed integers and calibrated data will be 256 | // floating-point numbers. 257 | bytes raw_data = 7; 258 | 259 | // The median of the read previous to this read. intended to allow 260 | // querying of the approximate level of this read, comapred to the 261 | // last. For example, a user could try to verify this is a strand be 262 | // ensuring the median of the current read is lower than the 263 | // median_before level. 264 | float median_before = 8; 265 | 266 | // The media pA level of this read from all aggregated read chunks 267 | // so far. 268 | float median = 9; 269 | }; 270 | 271 | message ActionResponse { 272 | string action_id = 1; 273 | enum Response { SUCCESS = 0; FAILED_READ_FINISHED = 1; } 274 | Response response = 2; 275 | } 276 | 277 | // The number of samples collected before the first sample included is 278 | // this response. This gives the position of the first data point on 279 | // each channel in the overall stream of data being acquired from the 280 | // device (since this period of data acquisition was started). 281 | uint64 samples_since_start = 1; 282 | 283 | // The number of seconds elapsed since data acquisition started. 284 | // This is the same as ``samples_since_start``, but expressed in 285 | // seconds. 286 | double seconds_since_start = 2; 287 | 288 | // In progress reads for the requested channels. Sparsely populated as 289 | // not all channels have new/incomplete reads. 290 | map channels = 4; 291 | 292 | // List of responses to requested actions, informing the caller of 293 | // results to requested unblocks or discards of data. 294 | repeated ActionResponse action_reponses = 5; 295 | } 296 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is execfile()d with the current directory set to its containing dir. 4 | # 5 | # Note that not all possible configuration values are present in this 6 | # autogenerated file. 7 | # 8 | # All configuration values have a default; values that are commented out 9 | # serve to show the default. 10 | 11 | import sys, os, re, subprocess 12 | import sphinx_rtd_theme 13 | 14 | 15 | # General information about the project. 16 | __pkg_name__ = u'read_until' 17 | project = __pkg_name__.capitalize() 18 | copyright = u'2017, Oxford Nanopore Technologies' 19 | 20 | 21 | # If extensions (or modules to document with autodoc) are in another directory, 22 | # add these directories to sys.path here. If the directory is relative to the 23 | # documentation root, use os.path.abspath to make it absolute, like shown here. 24 | sys.path.insert(0, os.path.abspath('..')) 25 | 26 | # -- General configuration ----------------------------------------------------- 27 | 28 | # If your documentation needs a minimal Sphinx version, state it here. 29 | #needs_sphinx = '1.0' 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be extensions 32 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 33 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx.ext.intersphinx', 34 | 'sphinx.ext.mathjax', 'sphinxarg.ext'] 35 | mathjax_path = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" 36 | 37 | # Add any paths that contain templates here, relative to this directory. 38 | templates_path = ['_templates'] 39 | 40 | # The suffix of source filenames. 41 | source_suffix = '.rst' 42 | 43 | # The encoding of source files. 44 | #source_encoding = 'utf-8-sig' 45 | 46 | # The master toctree document. 47 | master_doc = 'index' 48 | 49 | 50 | # Generate API documentation: 51 | if subprocess.call(['sphinx-apidoc', '-o', './', "../{}".format(__pkg_name__)]) != 0: 52 | sys.stderr.write('Failed to generate API documentation!\n') 53 | 54 | # The version info for the project you're documenting, acts as replacement for 55 | # |version| and |release|, also used in various other places throughout the 56 | # built documents. 57 | # 58 | 59 | # Get the version number from __init__.py 60 | verstrline = open(os.path.join('..', __pkg_name__, '__init__.py'), 'r').read() 61 | vsre = r"^__version__ = ['\"]([^'\"]*)['\"]" 62 | mo = re.search(vsre, verstrline, re.M) 63 | if mo: 64 | __version__ = mo.group(1) 65 | else: 66 | raise RuntimeError('Unable to find version string in "{}/__init__.py".'.format(__pkg_name__)) 67 | 68 | # The short X.Y version. 69 | version = __version__ 70 | # The full version, including alpha/beta/rc tags. 71 | release = __version__ 72 | 73 | # The language for content autogenerated by Sphinx. Refer to documentation 74 | # for a list of supported languages. 75 | #language = None 76 | 77 | # There are two options for replacing |today|: either, you set today to some 78 | # non-false value, then it is used: 79 | #today = '' 80 | # Else, today_fmt is used as the format for a strftime call. 81 | #today_fmt = '%B %d, %Y' 82 | 83 | # List of patterns, relative to source directory, that match files and 84 | # directories to ignore when looking for source files. 85 | exclude_patterns = ['_build','*test*'] 86 | 87 | # The reST default role (used for this markup: `text`) to use for all documents. 88 | #default_role = None 89 | 90 | # If true, '()' will be appended to :func: etc. cross-reference text. 91 | #add_function_parentheses = True 92 | 93 | # If true, the current module name will be prepended to all description 94 | # unit titles (such as .. function::). 95 | #add_module_names = True 96 | 97 | # If true, sectionauthor and moduleauthor directives will be shown in the 98 | # output. They are ignored by default. 99 | #show_authors = False 100 | 101 | # The name of the Pygments (syntax highlighting) style to use. 102 | pygments_style = 'sphinx' 103 | 104 | # A list of ignored prefixes for module index sorting. 105 | #modindex_common_prefix = [] 106 | 107 | 108 | # -- Options for HTML output --------------------------------------------------- 109 | 110 | # The theme to use for HTML and HTML Help pages. See the documentation for 111 | # a list of builtin themes. 112 | html_theme = 'sphinx_rtd_theme' 113 | 114 | # Theme options are theme-specific and customize the look and feel of a theme 115 | # further. For a list of options available for each theme, see the 116 | # documentation. 117 | #html_theme_options = {} 118 | 119 | # Add any paths that contain custom themes here, relative to this directory. 120 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 121 | 122 | # The name for this set of Sphinx documents. If None, it defaults to 123 | # " v documentation". 124 | #html_title = None 125 | 126 | # A shorter title for the navigation bar. Default is the same as html_title. 127 | #html_short_title = None 128 | 129 | # The name of an image file (relative to this directory) to place at the top 130 | # of the sidebar. 131 | #html_logo = None 132 | 133 | # The name of an image file (within the static path) to use as favicon of the 134 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 135 | # pixels large. 136 | #html_favicon = None 137 | 138 | # Add any paths that contain custom static files (such as style sheets) here, 139 | # relative to this directory. They are copied after the builtin static files, 140 | # so a file named "default.css" will overwrite the builtin "default.css". 141 | #html_static_path = ['_static'] 142 | 143 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 144 | # using the given strftime format. 145 | #html_last_updated_fmt = '%b %d, %Y' 146 | 147 | # If true, SmartyPants will be used to convert quotes and dashes to 148 | # typographically correct entities. 149 | #html_use_smartypants = True 150 | 151 | # Custom sidebar templates, maps document names to template names. 152 | #html_sidebars = {} 153 | 154 | # Additional templates that should be rendered to pages, maps page names to 155 | # template names. 156 | #html_additional_pages = {} 157 | 158 | # If false, no module index is generated. 159 | #html_domain_indices = True 160 | 161 | # If false, no index is generated. 162 | #html_use_index = True 163 | 164 | # If true, the index is split into individual pages for each letter. 165 | #html_split_index = False 166 | 167 | # If true, links to the reST sources are added to the pages. 168 | #html_show_sourcelink = True 169 | 170 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 171 | #html_show_sphinx = True 172 | 173 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 174 | #html_show_copyright = True 175 | 176 | # If true, an OpenSearch description file will be output, and all pages will 177 | # contain a tag referring to it. The value of this option must be the 178 | # base URL from which the finished HTML is served. 179 | #html_use_opensearch = '' 180 | 181 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 182 | #html_file_suffix = None 183 | 184 | # Output file base name for HTML help builder. 185 | htmlhelp_basename = '{}doc'.format(project) 186 | 187 | 188 | # -- Options for LaTeX output -------------------------------------------------- 189 | 190 | latex_elements = { 191 | # The paper size ('letterpaper' or 'a4paper'). 192 | #'papersize': 'letterpaper', 193 | 194 | # The font size ('10pt', '11pt' or '12pt'). 195 | #'pointsize': '10pt', 196 | 197 | # Additional stuff for the LaTeX preamble. 198 | #'preamble': '', 199 | } 200 | 201 | # Grouping the document tree into LaTeX files. List of tuples 202 | # (source start file, target name, title, author, documentclass [howto/manual]). 203 | latex_documents = [ 204 | ('index', '{}.tex'.format(project), u'{} Documentation'.format(project), 205 | u'Oxford Nanopore Technologies', 'manual'), 206 | ] 207 | 208 | # The name of an image file (relative to this directory) to place at the top of 209 | # the title page. 210 | #latex_logo = None 211 | 212 | # For "manual" documents, if this is true, then toplevel headings are parts, 213 | # not chapters. 214 | #latex_use_parts = False 215 | 216 | # If true, show page references after internal links. 217 | #latex_show_pagerefs = False 218 | 219 | # If true, show URL addresses after external links. 220 | #latex_show_urls = False 221 | 222 | # Documents to append as an appendix to all manuals. 223 | #latex_appendices = [] 224 | 225 | # If false, no module index is generated. 226 | #latex_domain_indices = True 227 | 228 | 229 | # -- Options for manual page output -------------------------------------------- 230 | 231 | # One entry per manual page. List of tuples 232 | # (source start file, name, description, authors, manual section). 233 | man_pages = [ 234 | ('index', project, u'{} Documentation'.format(project), 235 | [u'Oxford Nanopore Technologies'], 1) 236 | ] 237 | 238 | # If true, show URL addresses after external links. 239 | #man_show_urls = False 240 | 241 | 242 | # -- Options for Texinfo output ------------------------------------------------ 243 | 244 | # Grouping the document tree into Texinfo files. List of tuples 245 | # (source start file, target name, title, author, 246 | # dir menu entry, description, category) 247 | texinfo_documents = [ 248 | ('index', project, u'{} Documentation'.format(project), 249 | u'Oxford Nanopore Technologies', project, 'One line description of project.', 250 | 'Miscellaneous'), 251 | ] 252 | 253 | # Documents to append as an appendix to all manuals. 254 | #texinfo_appendices = [] 255 | 256 | # If false, no module index is generated. 257 | #texinfo_domain_indices = True 258 | 259 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 260 | #texinfo_show_urls = 'footnote' 261 | 262 | 263 | # -- Options for Epub output --------------------------------------------------- 264 | 265 | # Bibliographic Dublin Core info. 266 | epub_title = project 267 | epub_author = u'Oxford Nanopore Technologies' 268 | epub_publisher = u'Oxford Nanopore Technologies' 269 | epub_copyright = u'2017, Oxford Nanopore Technologies' 270 | 271 | # The language of the text. It defaults to the language option 272 | # or en if the language is not set. 273 | #epub_language = '' 274 | 275 | # The scheme of the identifier. Typical schemes are ISBN or URL. 276 | #epub_scheme = '' 277 | 278 | # The unique identifier of the text. This can be a ISBN number 279 | # or the project homepage. 280 | #epub_identifier = '' 281 | 282 | # A unique identification for the text. 283 | #epub_uid = '' 284 | 285 | # A tuple containing the cover image and cover page html template filenames. 286 | #epub_cover = () 287 | 288 | # HTML files that should be inserted before the pages created by sphinx. 289 | # The format is a list of tuples containing the path and title. 290 | #epub_pre_files = [] 291 | 292 | # HTML files shat should be inserted after the pages created by sphinx. 293 | # The format is a list of tuples containing the path and title. 294 | #epub_post_files = [] 295 | 296 | # A list of files that should not be packed into the epub file. 297 | #epub_exclude_files = [] 298 | 299 | # The depth of the table of contents in toc.ncx. 300 | #epub_tocdepth = 3 301 | 302 | # Allow duplicate toc entries. 303 | #epub_tocdup = True 304 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to the Read Until API 2 | ============================= 3 | 4 | The Read Until API provides a mechanism for a client script to connect to a 5 | MinKNOW server. The server can be asked to push a raw data to the client 6 | script in real-time. The data can be analysed in the way most fit for purpose, 7 | and a return call can be made to the server to unblock the read in progress. 8 | 9 | Installation 10 | ------------ 11 | 12 | The package can be installed into MinKNOW's python environment using the 13 | python interpreter in the MinKNOW root directory. For example on Ubuntu: 14 | 15 | .. code-block:: bash 16 | 17 | sudo /opt/ONT/MinKNOW/ont-python/bin/python setup.py install 18 | 19 | 20 | Two demonstration programs are provided (and are installed into 21 | MinKNOW/ont-python/bin/): 22 | 23 | i) read_until_simple: this serves as a simple test, and the code 24 | demonstrates use of basic functionality for develops 25 | (read_until.simple). 26 | ii) read_until_ident: this is a rather more fully featured example of use 27 | of the API to identify reads via basecalling and alignment. To run it 28 | requires the optional dependencies of scrappy and mappy. These can be 29 | installed via `ont-python/bin/python -m pip install mappy scrappie`. 30 | To use the `scrappy` basecaller efficiently it is important to set blas 31 | the blas library to be single threaded, this is ordinarily done with: 32 | 33 | .. code-block:: bash 34 | 35 | export OPENBLAS_NUM_THREADS=1 36 | 37 | 38 | Full API reference 39 | ------------------ 40 | 41 | .. toctree:: 42 | :maxdepth: 3 43 | 44 | read_until 45 | 46 | Indices and tables 47 | ------------------ 48 | 49 | * :ref:`genindex` 50 | * :ref:`modindex` 51 | * :ref:`search` 52 | 53 | -------------------------------------------------------------------------------- /read_until.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nanoporetech/read_until_api/501450835bec121629ec9b51e10b708ad4b67947/read_until.gif -------------------------------------------------------------------------------- /read_until/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '2.1.0' 2 | 3 | from read_until.base import * 4 | -------------------------------------------------------------------------------- /read_until/base.py: -------------------------------------------------------------------------------- 1 | from collections import Counter, defaultdict, OrderedDict 2 | from itertools import count as _count 3 | from threading import Event, Lock, Thread 4 | import logging 5 | import sys 6 | import time 7 | import uuid 8 | 9 | 10 | try: 11 | import queue 12 | except ImportError: 13 | import Queue as queue 14 | 15 | import numpy 16 | 17 | import minknow_api 18 | 19 | 20 | if sys.version_info[0] < 3: 21 | NullRaw = '' 22 | else: 23 | NullRaw = bytes('', 'utf8') 24 | 25 | 26 | __all__ = ['ReadCache', 'ReadUntilClient', 'NullRaw'] 27 | 28 | # This replaces the results of an old call to MinKNOWs 29 | # jsonRPC interface. That interface does not respond 30 | # correctly when a run has been configured using the 31 | # newer gRPC interace. This information is not currently 32 | # available with the gRPC interface so as a temporary 33 | # measure we list a standard set of values here. 34 | CLASS_MAP = { 35 | 'read_classification_map': { 36 | '83': 'strand', 37 | '67': 'strand1', 38 | '77': 'multiple', 39 | '90': 'zero', 40 | '65': 'adapter', 41 | '66': 'mux_uncertain', 42 | '70': 'user2', 43 | '68': 'user1', 44 | '69': 'event', 45 | '80': 'pore', 46 | '85': 'unavailable', 47 | '84': 'transition', 48 | '78': 'unclassed', 49 | } 50 | } 51 | 52 | 53 | class ReadCache(object): 54 | def __init__(self, size=100): 55 | """An ordered and keyed queue of a maximum size to store read chunks. 56 | 57 | :param size: maximum number of entries, when more entries are added 58 | the oldest current entries will be removed. 59 | 60 | The attributes .missed and .replaced count the total number of reads 61 | never popped, and the number of reads chunks replaced by a chunk from 62 | the same read. 63 | 64 | """ 65 | 66 | if size < 1: 67 | raise AttributeError("'size' must be >1.") 68 | self.size = size 69 | self.dict = OrderedDict() 70 | self.lock = Lock() 71 | self.missed = 0 72 | self.replaced = 0 73 | 74 | 75 | def __getitem__(self, key): 76 | with self.lock: 77 | return self.dict[key] 78 | 79 | 80 | def __setitem__(self, key, value): 81 | with self.lock: 82 | counted = False 83 | while len(self.dict) >= self.size: 84 | counted = True 85 | k, v = self.dict.popitem(last=False) 86 | if k == key and v.number == value.number: 87 | self.replaced += 1 88 | else: 89 | self.missed += 1 90 | if key in self.dict: 91 | if not counted: 92 | if self.dict[key].number == value.number: 93 | self.replaced += 1 94 | else: 95 | self.missed += 1 96 | del self.dict[key] 97 | self.dict[key] = value 98 | 99 | 100 | def __delitem__(self, key): 101 | with self.lock: 102 | del self.dict[key] 103 | 104 | 105 | def __len__(self): 106 | return len(self.dict) 107 | 108 | 109 | def popitem(self, last=True): 110 | """Return the newest (or oldest) entry. 111 | 112 | :param last: if `True` return the newest entry, else the oldest. 113 | 114 | """ 115 | with self.lock: 116 | return self.dict.popitem(last=last) 117 | 118 | 119 | def popitems(self, items, last=True): 120 | """Return a list of the newest (or oldest) entries. 121 | 122 | :param items: maximum number of items to return, zero items may 123 | be return (i.e. an empty list). 124 | :param last: if `True` return the newest entry, else the oldest. 125 | 126 | """ 127 | with self.lock: 128 | data = list() 129 | for _ in range(items): 130 | try: 131 | item = self.dict.popitem(last=last) 132 | except KeyError as e: 133 | pass 134 | else: 135 | data.append(item) 136 | return data 137 | 138 | 139 | def _format_iter(data): 140 | # make a nice text string from iter 141 | data = list(data) 142 | result = '' 143 | if len(data) == 1: 144 | result = data[0] 145 | elif len(data) == 2: 146 | result = ' and '.join(data) 147 | else: 148 | result = ', '.join(data[:-1]) 149 | result += ', and {}'.format(data[-1]) 150 | return result 151 | 152 | 153 | # Helper to generate new thread names 154 | _counter = _count() 155 | next(_counter) 156 | def _new_thread_name(template="read_until-%d"): 157 | return template % next(_counter) 158 | 159 | 160 | # The maximum allowed minimum read chunk size. Filtering of small read chunks 161 | # from the gRPC stream is buggy. The value 0 effectively disables the 162 | # filtering functionality. 163 | ALLOWED_MIN_CHUNK_SIZE = 0 164 | 165 | 166 | class ReadUntilClient(object): 167 | 168 | def __init__(self, mk_host='127.0.0.1', mk_port=8000, cache_size=512, cache_type=ReadCache, 169 | filter_strands=True, one_chunk=True, prefilter_classes={'strand', 'adapter'}): 170 | """A basic Read Until client. The class handles basic interaction 171 | with the MinKNOW gRPC stream and provides a thread-safe queue 172 | containing the most recent read data on each channel. 173 | 174 | :param mk_port: MinKNOW gRPC port for the sequencing device. 175 | :param cache_size: maximum number of read chunks to cache from 176 | gRPC stream. Setting this to the number of device channels 177 | will allow caching of the most recent data per channel. 178 | :param cache_type: a type derived from `ReadCache` for managing 179 | incoming read chunks. 180 | :param filter_strands: pre-filter stream to keep only strand-like reads. 181 | :param one_chunk: attempt to receive only one_chunk per read. When 182 | enabled a request to stop receiving more data for a read is 183 | immediately staged when the first chunk is cached. 184 | :param prefilter_classes: a set of read classes to accept through 185 | prefilter. Ignored if filter_strands is `False`. 186 | 187 | To set up and use a client: 188 | 189 | >>> read_until_client = ReadUntilClient() 190 | 191 | This creates an initial connection to a MinKNOW instance in 192 | preparation for setting up live reads stream. To initiate the stream: 193 | 194 | >>> read_until_client.run() 195 | 196 | The client is now recieving data and can send s 197 | Calls to methods of `read_until_client` can then be made in a separate 198 | thread. For example an continually running analysis function can be 199 | submitted to the executor as: 200 | 201 | >>> def analysis(client, *args, **kwargs): 202 | ... while client.is_running: 203 | ... for channel, read in client.get_read_chunks(): 204 | ... raw_data = numpy.fromstring(read.raw_data, client.signal_dtype) 205 | ... # do something with raw data... and maybe call: 206 | ... # client.stop_receiving_read(channel, read.number) 207 | ... # client.unblock_read(channel, read.number) 208 | >>> with ThreadPoolExecutor() as executor: 209 | ... executor.submit(analysis_function, read_until_client) 210 | 211 | To stop processing the gRPC read stream: 212 | 213 | >>> read_until_client.reset() 214 | 215 | If an analysis function is set up as above in response to 216 | `client.is_running`, calling the above call will cause the 217 | analysis function to return. 218 | 219 | """ 220 | self.logger = logging.getLogger('ReadUntil') 221 | 222 | self.mk_host = mk_host 223 | self.mk_grpc_port = mk_port 224 | self.cache_size = cache_size 225 | self.CacheType = cache_type 226 | self.filter_strands = filter_strands 227 | self.one_chunk = one_chunk 228 | self.prefilter_classes = prefilter_classes 229 | 230 | client_type = 'single chunk' if self.one_chunk else 'many chunk' 231 | filters = ' '.join(self.prefilter_classes) 232 | filter_to = 'without prefilter' 233 | if self.filter_strands: 234 | if len(self.prefilter_classes) == 0: 235 | raise ValueError('Read filtering set but no filter classes given.') 236 | classes = _format_iter(self.prefilter_classes) 237 | filter_to = 'filtering to {} read chunks'.format(classes) 238 | self.logger.info('Creating {} client with {} data queue {}.'.format( 239 | client_type, self.CacheType.__name__, filter_to)) 240 | 241 | self.logger.warn("Using pre-defined read classification map.") 242 | class_map = CLASS_MAP 243 | self.read_classes = { 244 | int(k):v for k, v in 245 | class_map['read_classification_map'].items() 246 | } 247 | self.strand_classes = set() 248 | for key, value in self.read_classes.items(): 249 | if value in self.prefilter_classes: 250 | self.strand_classes.add(key) 251 | self.logger.debug('Strand-like classes are {}.'.format(self.strand_classes)) 252 | 253 | self.grpc_port = self.mk_grpc_port 254 | self.logger.info('Creating rpc connection on port {}.'.format(self.grpc_port)) 255 | self.connection = minknow_api.Connection(host=self.mk_host, port=self.grpc_port) 256 | self.logger.info('Got rpc connection.') 257 | self.msgs = self.connection.data._pb 258 | 259 | self.signal_dtype = minknow_api.data.get_numpy_types(self.connection).calibrated_signal 260 | 261 | # setup the queues and running status 262 | self._process_thread = None 263 | self.reset() 264 | 265 | 266 | def run(self, **kwargs): 267 | """Run Read Until analysis. 268 | 269 | :param **kwargs: keywork args for gRPC stream setup. Valid keys are: 270 | `first_channel`, `last_channel`, `raw_data_type`, and 271 | `sample_minimum_chunk_size`. 272 | """ 273 | self._process_thread = Thread( 274 | target=self._run, 275 | name=_new_thread_name(), 276 | kwargs=kwargs 277 | ) 278 | self._process_thread.start() 279 | self.logger.info("Processing started") 280 | 281 | 282 | def reset(self, timeout=5): 283 | """Reset the state of the client to an initial (not running) state with 284 | no data or requests in queues. 285 | 286 | """ 287 | # self._process_reads is blocking => it runs in a thread. 288 | if self._process_thread is not None: 289 | self.logger.info("Reset request received, shutting down...") 290 | self.running.clear() 291 | self._process_thread.join() # block, try hard for .cancel() on stream 292 | if self._process_thread.is_alive(): 293 | self.logger.warn("Stream handler did not finish correctly.") 294 | else: 295 | self.logger.info("Stream handler exited successfully.") 296 | self._process_thread = None 297 | 298 | # a flag to indicate whether gRPC stream is being processed. Any 299 | # running ._runner() will respond to this. 300 | self.running = Event() 301 | # the action_queue is used to store unblock/stop_receiving_data 302 | # requests before they are put on the gRPC stream. 303 | self.action_queue = queue.Queue() 304 | # the data_queue is used to store the latest chunk per channel 305 | self.data_queue = self.CacheType(size=self.cache_size) 306 | # stores all sent action ids -> unblock/stop 307 | self.sent_actions = dict() 308 | 309 | 310 | @property 311 | def aquisition_progress(self): 312 | """Get MinKNOW data acquisition progress. 313 | 314 | :returns: a structure with attributes .acquired and .processed. 315 | 316 | """ 317 | return self.connection.acquisition.get_progress().raw_per_channel 318 | 319 | 320 | @property 321 | def queue_length(self): 322 | """The length of the read queue.""" 323 | return len(self.data_queue) 324 | 325 | 326 | @property 327 | def missed_reads(self): 328 | """Number of reads ejected from queue (i.e reads had one or more chunks 329 | enter into the analysis queue but were replaced with a distinct read 330 | before being pulled from the queue.""" 331 | return self.data_queue.missed 332 | 333 | 334 | @property 335 | def missed_chunks(self): 336 | """Number of read chunks replaced in queue by a chunk from the same 337 | read (a single read may have its queued chunk replaced more than once). 338 | 339 | """ 340 | return self.data_queue.replaced 341 | 342 | 343 | @property 344 | def is_running(self): 345 | """The processing status of the gRPC stream.""" 346 | return self.running.is_set() 347 | 348 | 349 | def get_read_chunks(self, batch_size=1, last=True): 350 | """Get read chunks, removing them from the queue. 351 | 352 | :param batch_size: maximum number of reads. 353 | :param last: get the most recent (else oldest)? 354 | 355 | """ 356 | return self.data_queue.popitems(items=batch_size, last=True) 357 | 358 | 359 | def unblock_read(self, read_channel, read_number, duration=0.1): 360 | """Request that a read be unblocked. 361 | 362 | :param read_channel: a read's channel number. 363 | :param read_number: a read's read number (the nth read per channel). 364 | :param duration: time in seconds to apply unblock voltage. 365 | 366 | """ 367 | self._put_action(read_channel, read_number, 'unblock', duration=duration) 368 | 369 | 370 | def stop_receiving_read(self, read_channel, read_number): 371 | """Request to receive no more data for a read. 372 | 373 | :param read_channel: a read's channel number. 374 | :param read_number: a read's read number (the nth read per channel). 375 | 376 | """ 377 | self._put_action(read_channel, read_number, 'stop_further_data') 378 | 379 | 380 | def _run(self, **kwargs): 381 | self.running.set() 382 | # .get_live_reads() takes an iterable of requests and generates 383 | # raw data chunks and responses to our requests: the iterable 384 | # thereby controls the lifetime of the stream. ._runner() as 385 | # implemented below initialises the stream then transfers 386 | # action requests from the action_queue to the stream. 387 | reads = self.connection.data.get_live_reads( 388 | self._runner(**kwargs) 389 | ) 390 | 391 | # ._process_reads() as implemented below is responsible for 392 | # placing action requests on the queue and logging the responses. 393 | # We really want to be calling reads.cancel() below so catch 394 | # everything and anything. 395 | try: 396 | self._process_reads(reads) 397 | except Exception as e: 398 | self.logger.info(e) 399 | 400 | # signal to the server that we are done with the stream. 401 | reads.cancel() 402 | 403 | 404 | def _runner(self, first_channel=1, last_channel=512, min_chunk_size=ALLOWED_MIN_CHUNK_SIZE, action_batch=1000, action_throttle=0.001): 405 | """Yield the stream initializer request followed by action requests 406 | placed into the action_queue. 407 | 408 | :param first_channel: lowest channel for which to receive raw data. 409 | :param last_channel: highest channel (inclusive) for which to receive data. 410 | :param min_chunk_size: minimum number of raw samples in a raw data chunk. 411 | :param action_batch: maximum number of actions to batch in a single response. 412 | 413 | """ 414 | # see note at top of this module 415 | if min_chunk_size > ALLOWED_MIN_CHUNK_SIZE: 416 | self.logger.warning("Reducing min_chunk_size to {}".format(ALLOWED_MIN_CHUNK_SIZE)) 417 | min_chunk_size = ALLOWED_MIN_CHUNK_SIZE 418 | 419 | self.logger.info( 420 | "Sending init command, channels:{}-{}, min_chunk:{}".format( 421 | first_channel, last_channel, min_chunk_size) 422 | ) 423 | yield self.msgs.GetLiveReadsRequest( 424 | setup=self.msgs.GetLiveReadsRequest.StreamSetup( 425 | first_channel=first_channel, 426 | last_channel=last_channel, 427 | raw_data_type=self.msgs.GetLiveReadsRequest.CALIBRATED, 428 | sample_minimum_chunk_size=min_chunk_size 429 | ) 430 | ) 431 | 432 | t0 = time.time() 433 | while self.is_running: 434 | t0 = time.time() 435 | # get as many items as we can up to the maximum, without blocking 436 | actions = list() 437 | for _ in range(action_batch): 438 | try: 439 | action = self.action_queue.get_nowait() 440 | except queue.Empty: 441 | break 442 | else: 443 | actions.append(action) 444 | 445 | n_actions = len(actions) 446 | if n_actions > 0: 447 | self.logger.debug('Sending {} actions.'.format(n_actions)) 448 | action_group = self.msgs.GetLiveReadsRequest( 449 | actions=self.msgs.GetLiveReadsRequest.Actions(actions=actions) 450 | ) 451 | yield action_group 452 | 453 | # limit response interval 454 | t1 = time.time() 455 | if t0 + action_throttle > t1: 456 | time.sleep(action_throttle + t0 - t1) 457 | else: 458 | self.logger.info("Reset signal received by action handler.") 459 | 460 | 461 | def _process_reads(self, reads): 462 | """Process the gRPC stream data, storing read chunks in the data_queue. 463 | 464 | :param reads: gRPC data stream iterable as produced by get_live_reads(). 465 | 466 | """ 467 | response_counter = defaultdict(Counter) 468 | 469 | unique_reads = set() 470 | 471 | read_count = 0 472 | samples_behind = 0 473 | raw_data_bytes = 0 474 | last_msg_time = time.time() 475 | for reads_chunk in reads: 476 | if not self.is_running: 477 | self.logger.info('Stopping processing of reads due to reset.') 478 | break 479 | # In each iteration, we get: 480 | # i) responses to our previous actions (success/fail) 481 | # ii) raw data for current reads 482 | 483 | # record a count of success and fails 484 | if len(reads_chunk.action_responses): 485 | for response in reads_chunk.action_responses: 486 | action_type = self.sent_actions[response.action_id] 487 | response_counter[action_type][response.response] += 1 488 | 489 | progress = self.aquisition_progress 490 | for read_channel in reads_chunk.channels: 491 | read_count += 1 492 | read = reads_chunk.channels[read_channel] 493 | if self.one_chunk: 494 | if read.id in unique_reads: 495 | # previous stop request wasn't enacted in time, don't 496 | # put the read back in the queue to avoid situation 497 | # where read has been popped from queue already and 498 | # we reinsert. 499 | self.logger.debug( 500 | 'Rereceived {}:{} after stop request.'.format( 501 | read_channel, read.number 502 | )) 503 | continue 504 | self.stop_receiving_read(read_channel, read.number) 505 | unique_reads.add(read.id) 506 | read_samples_behind = progress.acquired - read.chunk_start_sample 507 | samples_behind += read_samples_behind 508 | raw_data_bytes += len(read.raw_data) 509 | 510 | strand_like = any([x in self.strand_classes for x in read.chunk_classifications]) 511 | if not self.filter_strands or strand_like: 512 | self.data_queue[read_channel] = read 513 | 514 | now = time.time() 515 | if last_msg_time + 1 < now: 516 | self.logger.info( 517 | "Interval update: {} read sections, {} unique reads (ever), " 518 | "average {:.0f} samples behind. {:.2f} MB raw data, " 519 | "{} reads in queue, {} reads missed, {} chunks replaced." 520 | .format( 521 | read_count, len(unique_reads), 522 | samples_behind/read_count, raw_data_bytes/1024/1024, 523 | self.queue_length, self.missed_reads, self.missed_chunks 524 | ) 525 | ) 526 | self.logger.info("Response summary: {}".format(response_counter)) 527 | 528 | read_count = 0 529 | samples_behind = 0 530 | raw_data_bytes = 0 531 | last_msg_time = now 532 | 533 | 534 | def _put_action(self, read_channel, read_number, action, **params): 535 | """Stores an action requests on the queue ready to be placed on the 536 | gRPC stream. 537 | 538 | :param read_channel: a read's channel number. 539 | :param read_number: a read's read number (the nth read per channel). 540 | :param action: either 'stop_further_data' or 'unblock'. 541 | :param params: dictionary of parameters for action. Allowed values 542 | are: 'duration' for `action='unblock'`. 543 | 544 | """ 545 | action_id = str(uuid.uuid4()) 546 | action_kwargs = { 547 | 'action_id': action_id, 548 | 'channel': read_channel, 549 | 'number': read_number, 550 | } 551 | self.sent_actions[action_id] = action 552 | if action == 'stop_further_data': 553 | action_kwargs[action] = self.msgs.GetLiveReadsRequest.StopFurtherData() 554 | elif action == 'unblock': 555 | action_kwargs[action] = self.msgs.GetLiveReadsRequest.UnblockAction() 556 | if 'duration' in params: 557 | action_kwargs[action].duration = params['duration'] 558 | else: 559 | raise ValueError("'action' parameter must must be 'stop_further_data' or 'unblock'.") 560 | 561 | action_request = self.msgs.GetLiveReadsRequest.Action(**action_kwargs) 562 | self.action_queue.put(action_request) 563 | self.logger.debug('Action {} on channel {}, read {} : {}'.format( 564 | action_id, read_channel, read_number, action 565 | )) 566 | 567 | 568 | -------------------------------------------------------------------------------- /read_until/identification.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict, Counter 2 | import concurrent 3 | import functools 4 | import logging 5 | import os 6 | import random 7 | import sys 8 | import time 9 | from uuid import uuid4 10 | 11 | import numpy 12 | 13 | try: 14 | import mappy 15 | import scrappy 16 | except ImportError: 17 | raise ImportError("'mappy' and 'scrappy' must be installed to use this functionality.") 18 | 19 | import read_until 20 | import read_until.simple as read_until_extras 21 | 22 | 23 | def basecall_data(raw): 24 | seq, score, pos, start, end, base_probs = scrappy.basecall_raw(raw) 25 | if sys.version_info[0] < 3: 26 | seq = seq.encode() 27 | return seq, score 28 | 29 | 30 | def divide_analysis(client, map_index, genome_cut=2200000, batch_size=10, delay=1, throttle=0.1, unblock_duration=0.1): 31 | """Analysis using scrappy and mappy to accept/reject reads based on 32 | channel and identity as determined by alignment of basecall to 33 | reference. Channels are split into three groups (by division modulo 3 34 | of the channel number): the first group is left to proceed "naturally", 35 | for the second (third) attempts are made to sequence only reads from 36 | before (after) a reference locus. 37 | 38 | :param client: an instance of a `ReadUntilClient` object. 39 | :param map_index: a minimap2 index file. 40 | :param genome_cut: reference locus for determining read acceptance 41 | in the two filtered channel groups. 42 | :param batch_size: number of reads to pull from `client` at a time. 43 | :param delay: number of seconds to wait before starting analysis. 44 | :param throttle: minimum interval between requests to `client`. 45 | :param unblock_duration: time in seconds to apply unblock voltage. 46 | 47 | :returns: a dictionary of Counters of actions taken per channel group. 48 | 49 | """ 50 | logger = logging.getLogger('Analysis') 51 | logger.info('Starting analysis of reads in {}s.'.format(delay)) 52 | time.sleep(delay) 53 | 54 | logger.info('Loading index') 55 | mapper = mappy.Aligner(map_index, preset='map_ont') 56 | 57 | action_counters = defaultdict(Counter) 58 | max_pos = 0 59 | while client.is_running: 60 | t0 = time.time() 61 | read_batch = client.get_read_chunks(batch_size=batch_size, last=True) 62 | for channel, read in read_batch: 63 | channel_group = (channel % 3) 64 | if channel_group == 0: 65 | # leave these channels alone 66 | logger.debug('Skipping channel {}({}).'.format(channel, 0)) 67 | action_counters[channel_group]['skipped'] += 1 68 | client.stop_receiving_read(channel, read.number) 69 | else: 70 | # convert the read data into a numpy array of correct type 71 | raw_data = numpy.fromstring(read.raw_data, client.signal_dtype) 72 | read.raw_data = read_until.NullRaw 73 | basecall, score = basecall_data(raw_data) 74 | aligns = list(mapper.map(basecall)) 75 | if len(aligns) == 0: 76 | # Defer decision for another time 77 | action_counters[channel_group]['unaligned'] += 1 78 | logger.debug("read_{}_{} doesn't align.".format(channel, read.number)) 79 | else: 80 | # choose a random alignment as surrugate for detecting a best 81 | align = random.choice(aligns) 82 | logger.debug('{}:{}-{}, read_{}_{}:{}-{}, blen:{}, class:{}'.format( 83 | align.ctg, align.r_st, align.r_en, channel, read.number, align.q_st, align.q_en, align.blen, 84 | [client.read_classes[x] for x in read.chunk_classifications] 85 | )) 86 | first_half = align.r_st < genome_cut 87 | action_counters[channel_group]['section_{}'.format(int(first_half))] += 1 88 | unblock = ( 89 | (channel_group == 1 and first_half) or 90 | (not channel_group == 1 and not first_half) 91 | ) 92 | if unblock: 93 | # Bad read for channel 94 | action_counters[channel_group]['unblock'] += 1 95 | logger.debug('Unblocking channel {}({}) ref:{}.'.format(channel, channel_group, align.r_st)) 96 | client.unblock_read(channel, read.number, duration=unblock_duration) 97 | else: 98 | # Good read for channel 99 | action_counters[channel_group]['stop'] += 1 100 | logger.debug('Good channel {}({}) ref:{}.'.format(channel, channel_group, align.r_st)) 101 | if not client.one_chunk: 102 | client.stop_receiving_read(channel, read.number) 103 | 104 | t1 = time.time() 105 | if t0 + throttle > t1: 106 | time.sleep(throttle + t0 - t1) 107 | 108 | # end while loop 109 | logger.info('Received client stop signal.') 110 | 111 | return action_counters 112 | 113 | 114 | def filter_targets(client, mapper, targets, batch_size=10, delay=1, throttle=0.1, control_group=16, unblock_unknown=False, basecalls_output=None, unblock_duration=0.1): 115 | """Analysis using scrappy and mappy to accept/reject reads based on 116 | channel and identity as determined by alignment of basecall to 117 | reference. Channels are split into two groups (by division modulo 118 | `control_group` of the channel number): the first group is left to proceed 119 | "naturally", the second rejects reads not aligning to target sequences. 120 | 121 | :param client: an instance of a `ReadUntilClient` object. 122 | :param mapper: an instance of `mappy.Aligner`. 123 | :param targets: a list of acceptable reference targets (chr, start, end). 124 | :param batch_size: number of reads to pull from `client` at a time. 125 | :param delay: number of seconds to wait before starting analysis. 126 | :param throttle: minimum interval between requests to `client`. 127 | :param control_group: channels for which (channel %% control_group) == 0 128 | will form the control group. 129 | :param unblock_unknown: whether or not to unblock reads which cannot be 130 | positively identified (i.e. show no alignment to reference whether 131 | on or off target). 132 | :param basecalls_output: filename prefix for writing basecalls. 133 | :param unblock_duration: time in seconds to apply unblock voltage. 134 | 135 | :returns: a dictionary of Counters of actions taken per channel group. 136 | 137 | """ 138 | logger = logging.getLogger('Analysis') 139 | logger.info('Starting analysis of reads in {}s.'.format(delay)) 140 | time.sleep(delay) 141 | thread_id = str(uuid4()) 142 | if basecalls_output is None: 143 | basecalls_output = os.devnull 144 | else: 145 | basecalls_output = '{}_{}.fa'.format(basecalls_output, thread_id) 146 | 147 | with open(basecalls_output, 'w') as fasta: 148 | action_counters = defaultdict(Counter) 149 | max_pos = 0 150 | while client.is_running: 151 | t0 = time.time() 152 | read_batch = client.get_read_chunks(batch_size=batch_size, last=True) 153 | for channel, read in read_batch: 154 | channel_group = 'test' if (channel % control_group) else 'control' 155 | if channel_group == 'control': 156 | # leave these channels alone 157 | logger.debug('Skipping channel {}({}).'.format(channel, 0)) 158 | action_counters[channel_group]['skipped'] += 1 159 | client.stop_receiving_read(channel, read.number) 160 | else: 161 | # convert the read data into a numpy array of correct type 162 | raw_data = numpy.fromstring(read.raw_data, client.signal_dtype) 163 | read.raw_data = read_until.NullRaw 164 | basecall, score = basecall_data(raw_data) 165 | aligns = list(mapper.map(basecall)) 166 | fasta_action = '' 167 | if len(aligns) == 0: 168 | action_counters[channel_group]['unaligned'] += 1 169 | if unblock_unknown: 170 | logger.debug('Unblocking unidentified channel {}:{}:{}.'.format( 171 | channel, read.number, read.chunk_start_sample)) 172 | client.unblock_read(channel, read.number) 173 | fasta_action = 'unaligned/unblocked' 174 | else: 175 | # Defer decision for another time (if client is setup 176 | # to show us more). 177 | logger.debug("Leaving unidentified channel {}:{}:{}".format( 178 | channel, read.number, read.chunk_start_sample)) 179 | fasta_action = 'unaligned/left' 180 | else: 181 | # choose a random alignment as surrugate for detecting a best 182 | align = random.choice(aligns) 183 | logger.debug('{}:{}-{}, read_{}_{}:{}-{}, blen:{}, class:{}'.format( 184 | align.ctg, align.r_st, align.r_en, 185 | channel, read.number, align.q_st, align.q_en, align.blen, 186 | [client.read_classes[x] for x in read.chunk_classifications] 187 | )) 188 | unblock = True 189 | hit = 'off_target' 190 | for target in targets: 191 | if align.ctg == target[0]: 192 | # This could be a little more permissive 193 | if (align.r_st > target[1] and align.r_st < target[2]) or \ 194 | (align.r_en > target[1] and align.r_en < target[2]): 195 | unblock = False 196 | hit = '{}:{}-{}'.format(*target) 197 | 198 | # store on target 199 | action_counters[channel_group][hit] += 1 200 | if unblock: 201 | logger.debug('Unblocking channel {}:{}:{}.'.format(channel, read.number, read.chunk_start_sample)) 202 | client.unblock_read(channel, read.number, duration=unblock_duration) 203 | fasta_action = '{}/unblocked'.format(hit) 204 | else: 205 | logger.debug('Good channel {}:{}:{}, aligns to {}.'.format(channel, read.number, read.chunk_start_sample, hit)) 206 | if not client.one_chunk: 207 | client.stop_receiving_read(channel, read.number) 208 | fasta_action = '{}/stopped'.format(hit) 209 | fasta_action += ' {}:{}-{}'.format(align.ctg, align.r_st, align.r_en) 210 | 211 | fasta.write('>{} {} {} {} {}\n{}\n'.format( 212 | read.id, score, channel, read.number, fasta_action, basecall 213 | )) 214 | 215 | t1 = time.time() 216 | if t0 + throttle > t1: 217 | time.sleep(throttle + t0 - t1) 218 | 219 | # end while loop 220 | logger.info('Received client stop signal.') 221 | 222 | return action_counters 223 | 224 | 225 | def main(): 226 | parser = read_until_extras._get_parser() 227 | parser.description = 'Read until with basecall-alignment filter.' 228 | parser.add_argument('map_index', help='minimap alignment index.') 229 | parser.add_argument('--targets', default=None, nargs='+', 230 | help='list of target regions chr:start-end.') 231 | parser.add_argument('--control_group', default=16, type=int, 232 | help='Inverse proportion of channels in control group.') 233 | parser.add_argument('--unblock_unknown', default=False, 234 | action='store_true', 235 | help='Inverse proportion of channels in control group.') 236 | parser.add_argument('--basecalls_output', 237 | help='Filename prefix for on-the-fly basecalls.') 238 | args = parser.parse_args() 239 | 240 | logging.basicConfig(format='[%(asctime)s - %(name)s] %(message)s', 241 | datefmt='%H:%M:%S', level=args.log_level) 242 | logger = logging.getLogger('Manager') 243 | 244 | read_until_client = read_until.ReadUntilClient( 245 | mk_host=args.host, mk_port=args.port, 246 | one_chunk=args.one_chunk, filter_strands=True) 247 | 248 | if args.targets is None: 249 | analysis_function = functools.partial( 250 | divide_analysis, read_until_client, args.map_index, 251 | delay=args.analysis_delay, 252 | unblock_duration=args.unblock_duration, 253 | ) 254 | else: 255 | logger.info('Loading index') 256 | mapper = mappy.Aligner(args.map_index, preset='map_ont') 257 | regions = list() 258 | for target in args.targets: 259 | ref, coords = target.split(':') 260 | start, stop = (int(x) for x in coords.split('-')) 261 | regions.append((ref, start, stop)) 262 | analysis_function = functools.partial( 263 | filter_targets, read_until_client, mapper, regions, 264 | delay=args.analysis_delay, control_group=args.control_group, 265 | unblock_unknown=args.unblock_unknown, basecalls_output=args.basecalls_output, 266 | unblock_duration=args.unblock_duration, 267 | ) 268 | 269 | # run read until, and capture statistics 270 | action_counters = read_until_extras.run_workflow( 271 | read_until_client, analysis_function, args.workers, args.run_time, 272 | runner_kwargs={ 273 | 'min_chunk_size':args.min_chunk_size 274 | } 275 | ) 276 | 277 | # summarise statatistics 278 | total_counters = defaultdict(Counter) 279 | for worker_counts in action_counters: 280 | if worker_counts is None: 281 | logger.warn('A worker failed to return data.') 282 | else: 283 | all_keys = set(total_counters.keys()) | set(worker_counts.keys()) 284 | for key in all_keys: 285 | total_counters[key] += worker_counts[key] 286 | 287 | groups = list(total_counters.keys()) 288 | actions = set() 289 | for group in groups: 290 | actions |= set(total_counters[group].keys()) 291 | 292 | msg = ['Action summary:', '\t'.join(('group', 'action'.ljust(9), 'count'))] 293 | for group in groups: 294 | for action in actions: 295 | msg.append( 296 | '\t'.join((str(x) for x in ( 297 | group, str(action).ljust(9), total_counters[group][action] 298 | ))) 299 | ) 300 | msg = '\n'.join(msg) 301 | logger.info(msg) 302 | -------------------------------------------------------------------------------- /read_until/jsonrpc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014 Giuseppe Ciotta 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # 1. Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # 2. Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # 3. The name of the author may not be used to endorse or promote products 13 | # derived from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 | # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 | # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 | # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 | # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | 26 | 27 | # This has been modified from the original to work with MinKNOW's "jsonrpc" 28 | # responses which do not entirely fulfill the jsonrpc specification. 29 | 30 | import random 31 | import sys 32 | import json 33 | import functools 34 | import collections 35 | 36 | import requests 37 | 38 | 39 | class JSONRPCError(Exception): 40 | """Root exception for all errors related to this library""" 41 | 42 | 43 | class TransportError(JSONRPCError): 44 | """An error occurred while performing a connection to the server""" 45 | 46 | def __init__(self, message, cause=None, server_response=None): 47 | self.message = message 48 | self.cause = cause 49 | self.server_response = server_response 50 | 51 | def __str__(self): 52 | return self.message 53 | 54 | 55 | class ProtocolError(JSONRPCError): 56 | """An error occurred while dealing with the JSON-RPC protocol""" 57 | 58 | def __init__(self, message, server_data=None, server_response=None): 59 | self.message = message 60 | self.server_data = server_data # the deserialized server data 61 | self.server_response = server_response 62 | 63 | def __str__(self): 64 | return self.message 65 | 66 | 67 | class Client(object): 68 | """A connection to a HTTP JSON-RPC server, backed by requests""" 69 | 70 | def __init__(self, url, session=None, **requests_kwargs): 71 | self.session = session or requests.Session() 72 | self.session.headers.update({ 73 | 'Content-Type': 'application/json', 74 | 'Accept': 'application/json-rpc', 75 | }) 76 | self.request = functools.partial(self.session.post, url, **requests_kwargs) 77 | 78 | def send_request(self, method_name, is_notification, params): 79 | """Issue the HTTP request to the server and return the method result (if not a notification)""" 80 | request_body = self.serialize(method_name, params, is_notification) 81 | try: 82 | response = self.request(data=request_body) 83 | except requests.RequestException as requests_exception: 84 | raise TransportError('Error calling method %r' % method_name, cause=requests_exception) 85 | 86 | if response.status_code != requests.codes.ok: 87 | raise TransportError('Got non-200 response from server, status code: %s' % response.status_code, 88 | server_response=response) 89 | 90 | if not is_notification: 91 | return self.parse_response(response) 92 | 93 | @staticmethod 94 | def parse_response(response): 95 | """Parse the data returned by the server according to the JSON-RPC spec. Try to be liberal in what we accept.""" 96 | try: 97 | server_data = response.json() 98 | except ValueError as value_error: 99 | raise ProtocolError('Cannot deserialize response body: %s' % value_error, server_response=response) 100 | 101 | if not isinstance(server_data, dict): 102 | raise ProtocolError('Response is not a dictionary', server_response=response, server_data=server_data) 103 | 104 | # jsonrpc spec says error should only be present if there were an error 105 | # but MinKNOW returns '0' when no error is present. It also does not 106 | # return a structure with 'code' and 'message', just a bare message. 107 | error = server_data.get('error') 108 | if error and error != '0': 109 | code = -32000 110 | message = error 111 | raise ProtocolError('Error: %s %s' % (code, message), server_response=response, server_data=server_data) 112 | elif 'result' not in server_data: 113 | raise ProtocolError('Response without a result field', server_response=response, server_data=server_data) 114 | else: 115 | return server_data['result'] 116 | 117 | @staticmethod 118 | def dumps(data): 119 | """Override this method to customize the serialization process (eg. datetime handling)""" 120 | return json.dumps(data) 121 | 122 | def serialize(self, method_name, params, is_notification): 123 | """Generate the raw JSON message to be sent to the server""" 124 | data = {'jsonrpc': '2.0', 'method': method_name} 125 | if params: 126 | data['params'] = params 127 | if not is_notification: 128 | # some JSON-RPC servers complain when receiving str(uuid.uuid4()). Let's pick something simpler. 129 | data['id'] = random.randint(1, sys.maxsize) 130 | return self.dumps(data) 131 | 132 | def __getattr__(self, method_name): 133 | return Method(self.__request, method_name) 134 | 135 | def __request(self, method_name, args=None, kwargs=None): 136 | """Perform the actual RPC call. If _notification=True, send a notification and don't wait for a response""" 137 | is_notification = kwargs.pop('_notification', False) 138 | if args and kwargs: 139 | raise ProtocolError('JSON-RPC spec forbids mixing arguments and keyword arguments') 140 | 141 | # from the specs: 142 | # "If resent, parameters for the rpc call MUST be provided as a Structured value. 143 | # Either by-position through an Array or by-name through an Object." 144 | if len(args) == 1 and isinstance(args[0], collections.Mapping): 145 | args = dict(args[0]) 146 | 147 | return self.send_request(method_name, is_notification, args or kwargs) 148 | 149 | 150 | class Method(object): 151 | def __init__(self, request_method, method_name): 152 | if method_name.startswith("_"): # prevent rpc-calls for private methods 153 | raise AttributeError("invalid attribute '%s'" % method_name) 154 | self.__request_method = request_method 155 | self.__method_name = method_name 156 | 157 | def __getattr__(self, method_name): 158 | if method_name.startswith("_"): # prevent rpc-calls for private methods 159 | raise AttributeError("invalid attribute '%s'" % method_name) 160 | return Method(self.__request_method, "%s.%s" % (self.__method_name, method_name)) 161 | 162 | def __call__(self, *args, **kwargs): 163 | return self.__request_method(self.__method_name, args, kwargs) 164 | -------------------------------------------------------------------------------- /read_until/simple.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import concurrent.futures 3 | import functools 4 | import logging 5 | from multiprocessing.pool import ThreadPool 6 | from multiprocessing import TimeoutError 7 | import signal 8 | import sys 9 | import traceback 10 | import time 11 | 12 | import numpy 13 | 14 | import read_until 15 | 16 | class ThreadPoolExecutorStackTraced(concurrent.futures.ThreadPoolExecutor): 17 | """ThreadPoolExecutor records only the text of an exception, 18 | this class will give back a bit more.""" 19 | 20 | 21 | def submit(self, fn, *args, **kwargs): 22 | """Submits the wrapped function instead of `fn`""" 23 | return super(ThreadPoolExecutorStackTraced, self).submit( 24 | self._function_wrapper, fn, *args, **kwargs) 25 | 26 | 27 | def _function_wrapper(self, fn, *args, **kwargs): 28 | """Wraps `fn` in order to preserve the traceback of any kind of 29 | raised exception 30 | 31 | """ 32 | try: 33 | return fn(*args, **kwargs) 34 | except Exception: 35 | raise sys.exc_info()[0](traceback.format_exc()) 36 | 37 | 38 | def ignore_sigint(): 39 | signal.signal(signal.SIGINT, signal.SIG_IGN) 40 | 41 | 42 | def _get_parser(): 43 | parser = argparse.ArgumentParser('Read until API demonstration..') 44 | parser.add_argument('--host', default='127.0.0.1', 45 | help='MinKNOW server host.') 46 | parser.add_argument('--port', type=int, default=8000, 47 | help='MinKNOW gRPC server port.') 48 | parser.add_argument('--workers', default=1, type=int, 49 | help='worker threads.') 50 | parser.add_argument('--analysis_delay', type=int, default=1, 51 | help='Period to wait before starting analysis.') 52 | parser.add_argument('--run_time', type=int, default=30, 53 | help='Period to run the analysis.') 54 | parser.add_argument('--unblock_duration', type=float, default=0.1, 55 | help='Time (in seconds) to apply unblock voltage.') 56 | parser.add_argument('--one_chunk', default=False, action='store_true', 57 | help='Minimum read chunk size to receive.') 58 | parser.add_argument('--min_chunk_size', type=int, default=2000, 59 | help='Minimum read chunk size to receive. NOTE: this functionality ' 60 | 'is currently disabled; read chunks received will be unfiltered.') 61 | parser.add_argument( 62 | '--debug', help="Print all debugging information", 63 | action="store_const", dest="log_level", 64 | const=logging.DEBUG, default=logging.WARNING, 65 | ) 66 | parser.add_argument( 67 | '--verbose', help="Print verbose messaging.", 68 | action="store_const", dest="log_level", 69 | const=logging.INFO, 70 | ) 71 | return parser 72 | 73 | 74 | def simple_analysis(client, batch_size=10, delay=1, throttle=0.1, unblock_duration=0.1): 75 | """A simple demo analysis leveraging a `ReadUntilClient` to manage 76 | queuing and expiry of read data. 77 | 78 | :param client: an instance of a `ReadUntilClient` object. 79 | :param batch_size: number of reads to pull from `client` at a time. 80 | :param delay: number of seconds to wait before starting analysis. 81 | :param throttle: minimum interval between requests to `client`. 82 | :param unblock_duration: time in seconds to apply unblock voltage. 83 | 84 | """ 85 | 86 | logger = logging.getLogger('Analysis') 87 | logger.warn( 88 | 'Initialising simple analysis. ' 89 | 'This will likely not achieve anything useful. ' 90 | 'Enable --verbose or --debug logging to see more.' 91 | ) 92 | # we sleep a little simply to ensure the client has started initialised 93 | logger.info('Starting analysis of reads in {}s.'.format(delay)) 94 | time.sleep(delay) 95 | 96 | while client.is_running: 97 | t0 = time.time() 98 | # get the most recent read chunks from the client 99 | read_batch = client.get_read_chunks(batch_size=batch_size, last=True) 100 | for channel, read in read_batch: 101 | # convert the read data into a numpy array of correct type 102 | raw_data = numpy.fromstring(read.raw_data, client.signal_dtype) 103 | read.raw_data = read_until.NullRaw 104 | 105 | # make a decision that the read is good at we don't need more data? 106 | if read.median_before > read.median and \ 107 | read.median_before - read.median > 60: 108 | client.stop_receiving_read(channel, read.number) 109 | # we can also call the following for reads we don't like 110 | client.unblock_read(channel, read.number, duration=unblock_duration) 111 | 112 | # limit the rate at which we make requests 113 | t1 = time.time() 114 | if t0 + throttle > t1: 115 | time.sleep(throttle + t0 - t1) 116 | else: 117 | logger.info('Finished analysis of reads as client stopped.') 118 | 119 | 120 | def run_workflow(client, analysis_worker, n_workers, run_time, 121 | runner_kwargs=dict()): 122 | """Run an analysis function against a ReadUntilClient. 123 | 124 | :param client: `ReadUntilClient` instance. 125 | :param analysis worker: a function to process reads. It should exit in 126 | response to `client.is_running == False`. 127 | :param n_workers: number of incarnations of `analysis_worker` to run. 128 | :param run_time: time (in seconds) to run workflow. 129 | :param runner_kwargs: keyword arguments for `client.run()`. 130 | 131 | :returns: a list of results, on item per worker. 132 | 133 | """ 134 | logger = logging.getLogger('Manager') 135 | 136 | results = [] 137 | pool = ThreadPool(n_workers) # initializer=ignore_sigint) 138 | logger.info("Creating {} workers".format(n_workers)) 139 | try: 140 | # start the client 141 | client.run(**runner_kwargs) 142 | # start a pool of workers 143 | for _ in range(n_workers): 144 | results.append(pool.apply_async(analysis_worker)) 145 | pool.close() 146 | # wait a bit before closing down 147 | time.sleep(run_time) 148 | logger.info("Sending reset") 149 | client.reset() 150 | pool.join() 151 | except KeyboardInterrupt: 152 | logger.info("Caught ctrl-c, terminating workflow.") 153 | client.reset() 154 | 155 | # collect results (if any) 156 | collected = [] 157 | for result in results: 158 | try: 159 | res = result.get(3) 160 | except TimeoutError: 161 | logger.warn("Worker function did not exit successfully.") 162 | collected.append(None) 163 | except Exception as e: 164 | logger.warn("Worker raise exception: {}".format(repr(e))) 165 | else: 166 | logger.info("Worker exited successfully.") 167 | collected.append(res) 168 | pool.terminate() 169 | return collected 170 | 171 | 172 | def main(): 173 | args = _get_parser().parse_args() 174 | 175 | logging.basicConfig(format='[%(asctime)s - %(name)s] %(message)s', 176 | datefmt='%H:%M:%S', level=args.log_level) 177 | 178 | read_until_client = read_until.ReadUntilClient( 179 | mk_host=args.host, mk_port=args.port, 180 | one_chunk=args.one_chunk, filter_strands=True) 181 | 182 | analysis_worker = functools.partial( 183 | simple_analysis, read_until_client, delay=args.analysis_delay, 184 | unblock_duration=args.unblock_duration) 185 | 186 | results = run_workflow( 187 | read_until_client, analysis_worker, args.workers, args.run_time, 188 | runner_kwargs={ 189 | 'min_chunk_size':args.min_chunk_size 190 | } 191 | ) 192 | # simple analysis doesn't return results 193 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | grpcio 2 | requests 3 | minknow-api 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import shutil 4 | import re 5 | import shutil 6 | import platform 7 | from glob import glob 8 | from setuptools import setup, find_packages, Extension 9 | from setuptools import Distribution, Command 10 | from setuptools.command.install import install 11 | import pkg_resources 12 | 13 | 14 | __pkg_name__ = 'read_until' 15 | __author__ = 'cwright' 16 | __description__ = 'Read Until API' 17 | 18 | 19 | __path__ = os.path.dirname(__file__) 20 | __pkg_path__ = os.path.join(os.path.join(__path__, __pkg_name__)) 21 | 22 | # Get the version number from __init__.py 23 | verstrline = open(os.path.join(__pkg_name__, '__init__.py'), 'r').read() 24 | vsre = r"^__version__ = ['\"]([^'\"]*)['\"]" 25 | mo = re.search(vsre, verstrline, re.M) 26 | if mo: 27 | __version__ = mo.group(1) 28 | else: 29 | raise RuntimeError('Unable to find version string in "{}/__init__.py".'.format(__pkg_name__)) 30 | 31 | 32 | # Get requirements from file, we prefer to have 33 | # preinstalled these with pip to make use of wheels. 34 | dir_path = os.path.dirname(__file__) 35 | install_requires = [] 36 | with open(os.path.join(dir_path, 'requirements.txt')) as fh: 37 | reqs = ( 38 | r.split('#')[0].strip() 39 | for r in fh.read().splitlines() if not r.startswith('#') 40 | ) 41 | for req in reqs: 42 | if req == '': 43 | continue 44 | if req.startswith('git+https'): 45 | req = req.split('/')[-1].split('@')[0] 46 | install_requires.append(req) 47 | 48 | extra_requires = { 49 | 'identification': ['scrappy', 'mappy'] 50 | } 51 | extensions = [] 52 | 53 | 54 | setup( 55 | name=__pkg_name__, 56 | version=__version__, 57 | url='https://github.com/nanoporetech/{}'.format(__pkg_name__), 58 | author=__author__, 59 | author_email='{}@nanoporetech.com'.format(__author__), 60 | description=__description__, 61 | dependency_links=[], 62 | ext_modules=extensions, 63 | install_requires=install_requires, 64 | tests_require=[].extend(install_requires), 65 | extras_require=extra_requires, 66 | # don't include any testing subpackages in dist 67 | packages=find_packages(exclude=['*.test', '*.test.*', 'test.*', 'test']), 68 | package_data={}, 69 | zip_safe=False, 70 | entry_points={ 71 | 'console_scripts': [ 72 | 'read_until_simple = {}.simple:main'.format(__pkg_name__), 73 | 'read_until_ident = {}.identification:main'.format(__pkg_name__) 74 | ] 75 | }, 76 | ) 77 | --------------------------------------------------------------------------------