├── .gitignore ├── LICENSE ├── README.md ├── read_until_api_v2 ├── __init__.py ├── _version.py ├── load_minknow_rpc.py ├── main.py ├── read_cache.py └── utils.py ├── requirements.txt ├── requirements_dev.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | .venv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | # pycharm 105 | .idea 106 | 107 | # No Nanopore code 108 | rpc/ 109 | minknow/ 110 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | read_until_api_v2 2 | === 3 | 4 | Python3 implementation of the read_until_api 5 | 6 | --- 7 | 8 | Table of Contents 9 | === 10 | - [Features]() 11 | - [Installation]() 12 | 13 | Features 14 | === 15 | 16 | Setup and Installation 17 | === 18 | 19 | Installation 20 | ------------ 21 | 22 | ```bash 23 | # Clone repositories 24 | git clone https://github.com/looselab/read_until_api_v2.git 25 | git clone https://github.com/looselab/ru.git 26 | 27 | # Build ru code 28 | cd ru 29 | python3 -m venv venv3 30 | source ./venv3/bin/activate 31 | pip install --upgrade pip -r requirements.txt 32 | python setup.py develop 33 | 34 | # Build read until api 35 | cd ../read_until_api_v2 36 | pip install -r requirements.txt 37 | python setup.py develop 38 | ``` 39 | -------------------------------------------------------------------------------- /read_until_api_v2/__init__.py: -------------------------------------------------------------------------------- 1 | from read_until_api_v2.main import * 2 | from read_until_api_v2._version import __version__ 3 | -------------------------------------------------------------------------------- /read_until_api_v2/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = "3.0.0" 2 | -------------------------------------------------------------------------------- /read_until_api_v2/load_minknow_rpc.py: -------------------------------------------------------------------------------- 1 | """load_minknow_rpc.py 2 | 3 | This is how we get around having to use python2 and install inside the MinKNOW 4 | directory. We simply: 5 | - Get the default MinKNOW installation directory 6 | - Gently reach in and rip out its beating heart (the minknow.rpc module) 7 | - Make a copy here, substituting import statements on the fly to allow imports 8 | - Patch the __init__.py to account for finding certificates 9 | """ 10 | import fileinput 11 | import fnmatch 12 | import os 13 | import platform 14 | import shutil 15 | import sys 16 | 17 | from google.protobuf.json_format import MessageToDict 18 | 19 | PATCH_INIT = """ 20 | import platform 21 | import os 22 | def _minknow_path(operating_system=platform.system()): 23 | return { 24 | "Darwin": os.path.join(os.sep, "Applications", "MinKNOW.app", "Contents", "Resources"), 25 | "Linux": os.path.join(os.sep, "opt", "ont", "minknow"), 26 | "Windows": os.path.join(os.sep, "C:\\\Program Files", "OxfordNanopore", "MinKNOW"), 27 | }.get(operating_system, None) 28 | """ 29 | 30 | READ_UNTIL_DIR = os.path.dirname(os.path.realpath(__file__)) 31 | OPER = platform.system() 32 | MK_PATH = os.path.join("ont-python", "lib", "python2.7", "site-packages", "minknow") 33 | 34 | 35 | def _minknow_path(operating_system=OPER): 36 | """Return default MinKNOW path.""" 37 | return { 38 | "Darwin": os.path.join( 39 | os.sep, "Applications", "MinKNOW.app", "Contents", "Resources" 40 | ), 41 | "Linux": os.path.join(os.sep, "opt", "ont", "minknow"), 42 | "Windows": os.path.join( 43 | os.sep, "C:\\\Program Files", "OxfordNanopore", "MinKNOW" 44 | ), 45 | }.get(operating_system, None) 46 | 47 | 48 | def _mk_module_path(module, operating_system=OPER): 49 | """Return MinKNOW site-package dir relative to ont-python dir.""" 50 | mk_m_path = { 51 | "Windows": os.path.join("ont-python", "Lib", "site-packages", "minknow") 52 | }.get(operating_system, MK_PATH) 53 | return os.path.join(mk_m_path, module) 54 | 55 | 56 | def copy_files( 57 | source_dir, 58 | destination_dir, 59 | file_pattern, 60 | target_module, 61 | current_package=vars(sys.modules[__name__])["__package__"], 62 | ): 63 | """Copy a module from another location 64 | 65 | Parameters 66 | ---------- 67 | source_dir : str 68 | Source directory to copy from 69 | destination_dir : str 70 | Destination directory, where files are copied to 71 | file_pattern : str 72 | File pattern to copy, eg '*.py' 73 | target_module : str 74 | Module to copy from the source_dir 75 | current_package : str 76 | Current package, this is automatically detected, but may not work 77 | for nested modules 78 | 79 | Returns 80 | ------- 81 | None 82 | """ 83 | 84 | def failed(exc): 85 | raise exc 86 | 87 | destination_dir = os.path.join(READ_UNTIL_DIR, destination_dir) 88 | for dir_path, dirs, files in os.walk(source_dir, topdown=True, onerror=failed): 89 | for file in fnmatch.filter(files, file_pattern): 90 | shutil.copy2(os.path.join(dir_path, file), destination_dir) 91 | edit_file( 92 | os.path.join(destination_dir, file), 93 | "minknow.{}".format(target_module), 94 | "{}.{}".format(current_package, target_module), 95 | ) 96 | edit_file( 97 | os.path.join(destination_dir, file), 98 | "minknow.paths.minknow_base_dir()", 99 | "_minknow_path()", 100 | ) 101 | edit_file(os.path.join(destination_dir, file), "import minknow.paths", "") 102 | if file == "__init__.py": 103 | with open(os.path.join(destination_dir, file), "a") as out: 104 | out.write(PATCH_INIT) 105 | 106 | break # no recursion 107 | 108 | 109 | def edit_file(filename, text_to_search, replacement_text): 110 | """Edit file inplace replacing matching text.""" 111 | with fileinput.FileInput(filename, inplace=True) as file: 112 | for line in file: 113 | print(line.replace(text_to_search, replacement_text), end="") 114 | 115 | 116 | def load_rpc(always_reload=False): 117 | """Load the minknow.rpc module""" 118 | destination_rpc = ("rpc",) 119 | 120 | for module in destination_rpc: 121 | # Remove current RPC 122 | if always_reload and os.path.exists(os.path.join(READ_UNTIL_DIR, module)): 123 | shutil.rmtree(os.path.join(READ_UNTIL_DIR, module)) 124 | 125 | # Make new RPC directory 126 | if not os.path.exists(os.path.join(READ_UNTIL_DIR, module)): 127 | os.makedirs(os.path.join(READ_UNTIL_DIR, module)) 128 | 129 | # Copy files from MinKNOW 130 | if not os.path.isfile(os.path.join(READ_UNTIL_DIR, module, "__init__.py")): 131 | if _minknow_path() is not None: 132 | source_rpc = os.path.join(_minknow_path(), _mk_module_path(module)) 133 | else: 134 | raise NotImplementedError("MinKNOW not found on this platform") 135 | 136 | if os.path.exists(source_rpc): 137 | copy_files(source_rpc, module, "*.py", module) 138 | else: 139 | raise ValueError("MinKNOW not found on this computer") 140 | 141 | sys.path.insert(0, os.path.join(READ_UNTIL_DIR, module)) 142 | 143 | 144 | def get_rpc_connection(target_device, host="127.0.0.1", port=9501, reload=False): 145 | """Return gRPC connection and message port for a target device 146 | 147 | Parameters 148 | ---------- 149 | target_device : str 150 | ... 151 | host : str 152 | ... 153 | port : int 154 | ... 155 | reload : bool 156 | ... 157 | 158 | Returns 159 | ------- 160 | connection 161 | message_port 162 | """ 163 | load_rpc(reload) 164 | import grpc 165 | from . import rpc 166 | 167 | rpc._load() 168 | from .rpc import manager_pb2 as manager 169 | from .rpc import manager_pb2_grpc as manager_grpc 170 | 171 | channel = grpc.insecure_channel("{}:{}".format(host, port)) 172 | stub = manager_grpc.ManagerServiceStub(channel) 173 | list_request = manager.ListDevicesRequest() 174 | response = stub.list_devices(list_request) 175 | for device in response.active: 176 | if device.name == target_device: 177 | rpc_connection = rpc.Connection(host=host,port=device.ports.insecure_grpc) 178 | message_port = device.ports.jsonrpc 179 | return rpc_connection, message_port 180 | 181 | raise ValueError("Device not recognised.") 182 | 183 | 184 | def parse_message(message): 185 | """Parse gRPC message to dict.""" 186 | return MessageToDict( 187 | message, preserving_proto_field_name=True, including_default_value_fields=True, 188 | ) 189 | -------------------------------------------------------------------------------- /read_until_api_v2/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Main module. 4 | 5 | This is a python3 implementation of the read_until_api found at 6 | https://github.com/nanoporetech/read_until_api/ 7 | 8 | It enables the user to run Read Until in a virtual environment and doesn't 9 | require installing within the MinKNOW directory. 10 | 11 | """ 12 | # Core lib 13 | import logging 14 | import queue 15 | import sys 16 | import time 17 | import uuid 18 | from collections import defaultdict, Counter 19 | from pathlib import Path 20 | from threading import Event, Thread 21 | 22 | # Pypi packages 23 | import numpy as np 24 | 25 | # Internal modules 26 | from read_until_api_v2.load_minknow_rpc import get_rpc_connection, parse_message 27 | from read_until_api_v2.utils import nice_join, _import, setup_logger, new_thread_name 28 | 29 | 30 | 31 | __all__ = ["ReadUntilClient"] 32 | 33 | # This replaces the results of an old call to MinKNOWs 34 | # jsonRPC interface. That interface does not respond 35 | # correctly when a run has been configured using the 36 | # newer gRPC interface. This information is not currently 37 | # available with the gRPC interface so as a temporary 38 | # measure we list a standard set of values here. 39 | CLASS_MAP = { 40 | "read_classification_map": { 41 | 83: "strand", 42 | 67: "strand1", 43 | 77: "multiple", 44 | 90: "zero", 45 | 65: "adapter", 46 | 66: "mux_uncertain", 47 | 70: "user2", 48 | 68: "user1", 49 | 69: "event", 50 | 80: "pore", 51 | 85: "unavailable", 52 | 84: "transition", 53 | 78: "unclassed", 54 | } 55 | } 56 | 57 | 58 | # The maximum allowed minimum read chunk size. Filtering of small read chunks 59 | # from the gRPC stream is buggy. The value 0 effectively disables the filtering 60 | # functionality. 61 | ALLOWED_MIN_CHUNK_SIZE = 0 62 | 63 | 64 | class ReadUntilClient: 65 | """A Read Until Client for interacting with MinKNOW 66 | 67 | The class handles basic interactions with MinKNOW's gRPC stream and provides 68 | a thread-safe queue, ReadCache, containing the latest data from each channel 69 | 70 | Attributes 71 | ---------- 72 | is_running 73 | missed_chunks 74 | missed_reads 75 | queue_length 76 | acquisition_progress 77 | connection : read_until.rpc.Connection 78 | The gRPC connection to MinKNOW 79 | 80 | Methods 81 | ------- 82 | run() 83 | reset() 84 | get_read_chunks() 85 | unblock_read() 86 | stop_receiving_read() 87 | 88 | """ 89 | 90 | def __init__( 91 | self, 92 | mk_host="127.0.0.1", 93 | mk_port=9501, 94 | device=None, 95 | cache_size=512, 96 | cache_type="ReadCache", 97 | signal_calibration=False, 98 | filter_strands=True, 99 | one_chunk=False, 100 | pre_filter_classes=None, 101 | reload_rpc=True, 102 | log_file=None, 103 | ): 104 | """A basic Read Until client. 105 | 106 | This class handles the interactions with the MinKNOW gRPC stream. 107 | It requires a thread-safe queue/cache to operate. There are two 108 | prodived in `read_cache.py`. 109 | 110 | Parameters 111 | ---------- 112 | mk_host : str 113 | The host to connect to MinKNOW on, default: "127.0.0.1" 114 | mk_port : int 115 | The insecure channel port for MinKNOW, default: 9501 116 | device : str 117 | The device to get the connection for. E.G MinION: MN18458, GridION: 118 | GA10000, PromethION: 1-A1-D1 119 | cache_size : int 120 | The maximum size of the read cache, default: 512 121 | cache_type : str or class 122 | The cache for managing incoming data from the gRPC. 123 | If a string is provided that cache will be loaded from read_cache.py. 124 | Otherwise, if a class is provided it will be used as the cache. See 125 | read_cache.py for descriptions and requirements of the cache. 126 | signal_calibration : bool 127 | Request calibrated or uncalibrated signal from the gRPC, default: False 128 | filter_strands : bool 129 | Filter incoming data for only strand like classifications. If True 130 | strand classes must be provided in pre_filter_classes, default: True 131 | one_chunk : bool 132 | default: True 133 | pre_filter_classes : set (or iterable) 134 | Classes to filter reads by. Ignored if `filter_strands` is False, 135 | default: {'strand', 'adapter'} 136 | reload_rpc : bool 137 | Repload the RPC when initiating the client, default: True 138 | log_file : str 139 | Filepath to log messages to if not provided use console, default: None 140 | 141 | Examples 142 | -------- 143 | 144 | To set up and use a client: 145 | 146 | >>> read_until_client = ReadUntilClient() 147 | 148 | This creates an initial connection to a MinKNOW instance in 149 | preparation for setting up live reads stream. To initiate the stream: 150 | 151 | >>> read_until_client.run() 152 | 153 | The client is now recieving data and can send calls to methods 154 | of `read_until_client` can then be made in a separate thread. 155 | For example an continually running analysis function can be 156 | submitted to the executor as: 157 | 158 | >>> def analysis(client, *args, **kwargs): 159 | ... while client.is_running: 160 | ... for channel, read in client.get_read_chunks(): 161 | ... raw_data = np.fromstring(read.raw_data, client.signal_dtype) 162 | ... # do something with raw data... and maybe call: 163 | ... # client.stop_receiving_read(channel, read.number) 164 | ... # client.unblock_read(channel, read.number) 165 | >>> with ThreadPoolExecutor() as executor: 166 | ... executor.submit(analysis_function, read_until_client) 167 | 168 | To stop processing the gRPC read stream: 169 | 170 | >>> read_until_client.reset() 171 | 172 | If an analysis function is set up as above in response to 173 | `client.is_running`, calling the above call will cause the 174 | analysis function to return. 175 | 176 | """ 177 | # TODO: infer flowcell size from device we get back for cache size 178 | # eg: cache_size becomes "infer" or int 179 | # c = self.connection 180 | # len(parse_message(c.device.get_channels_layout())['channel_records']) 181 | if pre_filter_classes is None: 182 | pre_filter_classes = {"strand", "adapter"} 183 | 184 | self.logger = setup_logger( 185 | __name__, 186 | # "ReadUntilClient_v2", 187 | log_file=log_file, 188 | log_format="%(asctime)s %(name)s %(message)s", 189 | level=logging.INFO, 190 | ) 191 | self.device = device 192 | self.mk_host = mk_host 193 | self.mk_port = mk_port 194 | self.reload_rpc = reload_rpc 195 | self.cache_size = cache_size 196 | 197 | # Alternatively, check that cache is sub of BaseCache 198 | if isinstance(cache_type, str): 199 | current_package = vars(sys.modules[__name__])["__package__"] 200 | self.CacheType = _import( 201 | "{}.read_cache.{}".format(current_package, cache_type) 202 | ) 203 | else: 204 | self.CacheType = cache_type 205 | 206 | self.filter_strands = filter_strands 207 | self.one_chunk = one_chunk 208 | self.pre_filter_classes = pre_filter_classes 209 | 210 | if self.filter_strands and not self.pre_filter_classes: 211 | raise ValueError("Read filtering set but no filter classes given.") 212 | 213 | self.logger.info( 214 | "Client type: {} chunk".format("single" if self.one_chunk else "many") 215 | ) 216 | self.logger.info("Cache type: {}".format(self.CacheType.__name__)) 217 | 218 | pre_filter_classes_str = "no filter" 219 | if self.pre_filter_classes: 220 | pre_filter_classes_str = nice_join(self.pre_filter_classes, " ", "and") 221 | 222 | self.logger.info("Filter for classes: {}".format(pre_filter_classes_str)) 223 | 224 | self.strand_classes = set( 225 | int(k) 226 | for k, v in CLASS_MAP["read_classification_map"].items() 227 | if v in self.pre_filter_classes 228 | ) 229 | self.logger.debug("Strand-like classes are: {}.".format(self.strand_classes)) 230 | self.logger.info("Creating rpc connection for device {}.".format(self.device)) 231 | 232 | # try: 233 | # from . import rpc 234 | # 235 | # rpc._load() 236 | # except Exception as e: 237 | # self.logger.warning("RPC module not found\n{}".format(e)) 238 | # self.logger.info("Attempting to load RPC") 239 | 240 | self.connection, self.message_port = get_rpc_connection( 241 | target_device=self.device, 242 | host=self.mk_host, 243 | port=self.mk_port, 244 | reload=self.reload_rpc, 245 | ) 246 | 247 | self.logger.info("Loaded RPC") 248 | self.msgs = self.connection.data._pb 249 | 250 | log_waiting = True 251 | while parse_message(self.connection.acquisition.current_status())["status"] != "PROCESSING": 252 | if log_waiting: 253 | self.logger.info("Waiting for device to start processing") 254 | log_waiting = False 255 | 256 | self.mk_run_dir = Path(parse_message( 257 | self.connection.protocol.get_current_protocol_run() 258 | )["output_path"]) 259 | 260 | # Create the output dir if it doesn't already exist 261 | # Sometimes we are faster than MinKNOW, this isn't a problem on OS X 262 | 263 | if self.mk_host in ("127.0.0.1","localhost"): 264 | #we are running locally so: 265 | self.mk_run_dir.mkdir(parents=True, exist_ok=True) 266 | self.unblock_logger = setup_logger( 267 | # Necessary to use a str of the Path for 3.5 compatibility 268 | "unblocks", 269 | log_file=str(self.mk_run_dir / "unblocked_read_ids.txt"), 270 | ) 271 | else: 272 | #we are running remotely: 273 | self.mk_run_dir=Path(".") 274 | self.unblock_logger = setup_logger( 275 | # Necessary to use a str of the Path for 3.5 compatibility 276 | "unblocks", 277 | log_file="unblocked_read_ids.txt", 278 | ) 279 | 280 | # Get signal calibrations 281 | self.calibration, self.calibration_dtype = { 282 | True: (self.msgs.GetLiveReadsRequest.CALIBRATED, "calibrated_signal",), 283 | False: (self.msgs.GetLiveReadsRequest.UNCALIBRATED, "uncalibrated_signal",), 284 | }.get(signal_calibration) 285 | 286 | _data_types = parse_message(self.connection.data.get_data_types())[ 287 | self.calibration_dtype 288 | ] 289 | 290 | _signal_dtype = { 291 | "FLOATING_POINT": {2: "float16", 4: "float32"}, 292 | "SIGNED_INTEGER": {2: "int16", 4: "int32"}, 293 | "UNSIGNED_INTEGER": {2: "uint16", 4: "uint32"}, 294 | }.get(_data_types["type"], {}).get(_data_types["size"], None) 295 | 296 | if _signal_dtype is not None: 297 | self.signal_dtype = np.dtype(_signal_dtype) 298 | else: 299 | raise NotImplementedError("Unrecognized signal dtype") 300 | 301 | self.logger.info("Signal data-type: {}".format(self.signal_dtype)) 302 | # setup the queues and running status 303 | self._process_thread = None 304 | self.reset() 305 | 306 | def run(self, **kwargs): 307 | """Start the ReadUntilClient to get data from gRPC stream 308 | 309 | Other Parameters 310 | ---------------- 311 | first_channel : int 312 | last_channel : int 313 | raw_data_type : np.dtype 314 | sample_minimum_chunk_size : int 315 | """ 316 | self._process_thread = Thread( 317 | target=self._run, name=new_thread_name(), kwargs=kwargs 318 | ) 319 | self._process_thread.start() 320 | self.logger.info("Processing started") 321 | 322 | def reset(self, timeout=5): 323 | """Reset the state of the client to an initial (not running) 324 | state with no data or requests in queues. 325 | 326 | """ 327 | # self._process_reads is blocking => it runs in a thread. 328 | if self._process_thread is not None: 329 | self.logger.info("Reset request received, shutting down...") 330 | self.running.clear() 331 | # block, try hard for .cancel() on stream 332 | self._process_thread.join() 333 | if self._process_thread.is_alive(): 334 | self.logger.warning("Stream handler did not finish correctly.") 335 | else: 336 | self.logger.info("Stream handler exited successfully.") 337 | self._process_thread = None 338 | 339 | # a flag to indicate whether gRPC stream is being processed. Any 340 | # running ._runner() will respond to this. 341 | self.running = Event() 342 | # the action_queue is used to store unblock/stop_receiving_data 343 | # requests before they are put on the gRPC stream. 344 | self.action_queue = queue.Queue() 345 | # the data_queue is used to store the latest chunk per channel 346 | self.data_queue = self.CacheType(size=self.cache_size) 347 | # stores all sent action ids -> unblock/stop 348 | self.sent_actions = dict() 349 | 350 | @property 351 | def acquisition_progress(self): 352 | """Get MinKNOW data acquisition progress. 353 | 354 | Returns 355 | ------- 356 | An object with attributes .acquired and .processed 357 | 358 | """ 359 | return self.connection.acquisition.get_progress().raw_per_channel 360 | 361 | @property 362 | def queue_length(self): 363 | """The length of the read queue.""" 364 | return len(self.data_queue) 365 | 366 | @property 367 | def missed_reads(self): 368 | """Number of reads ejected from queue (i.e reads had one or more chunks 369 | enter into the analysis queue but were replaced with a distinct read 370 | before being pulled from the queue.""" 371 | return self.data_queue.missed 372 | 373 | @property 374 | def missed_chunks(self): 375 | """Number of read chunks replaced in queue by a chunk from the same 376 | read (a single read may have its queued chunk replaced more than once). 377 | 378 | """ 379 | return self.data_queue.replaced 380 | 381 | @property 382 | def is_running(self): 383 | """The processing status of the gRPC stream.""" 384 | return self.running.is_set() 385 | 386 | def get_read_chunks(self, batch_size=1, last=True): 387 | """Get read chunks, removing them from the queue. 388 | 389 | Parameters 390 | ---------- 391 | batch_size : int 392 | The maximum number of reads to retrieve from the ReadCache 393 | last : bool 394 | If True return most recently added reads, otherwise oldest 395 | 396 | Returns 397 | ------- 398 | list 399 | List of tuples as (channel_number, raw_read) 400 | """ 401 | return self.data_queue.popitems(n=batch_size, last=last) 402 | 403 | def unblock_read(self, read_channel, read_number, duration=0.1, read_id=None): 404 | """Request that a read be unblocked. 405 | 406 | Parameters 407 | ---------- 408 | read_channel : int 409 | The channel number for the read to be unblocked 410 | read_number : int 411 | The read number for the read to be unblocked 412 | duration : float 413 | The time, in seconds, to apply the unblock voltage 414 | read_id : str 415 | The read id (UUID4) of the read to be unblocked 416 | 417 | Returns 418 | ------- 419 | None 420 | """ 421 | if read_id is not None: 422 | self.unblock_logger.debug(read_id) 423 | self._put_action(read_channel, read_number, "unblock", duration=duration) 424 | 425 | def stop_receiving_read(self, read_channel, read_number): 426 | """Request to receive no more data for a read. 427 | 428 | Parameters 429 | ---------- 430 | read_channel : int 431 | The channel number for the read to be unblocked 432 | read_number : int 433 | The read number for the read to be unblocked 434 | 435 | Returns 436 | ------- 437 | None 438 | """ 439 | self._put_action(read_channel, read_number, "stop_further_data") 440 | 441 | def _run(self, **kwargs): 442 | """Manage conversion and processing of read chunks""" 443 | self.running.set() 444 | # .get_live_reads() takes an iterable of requests and generates 445 | # raw data chunks and responses to our requests: the iterable 446 | # thereby controls the lifetime of the stream. ._runner() as 447 | # implemented below initialises the stream then transfers 448 | # action requests from the action_queue to the stream. 449 | reads = self.connection.data.get_live_reads(self._runner(**kwargs)) 450 | 451 | # ._process_reads() as implemented below is responsible for 452 | # placing action requests on the queue and logging the responses. 453 | # We really want to be calling reads.cancel() below so catch 454 | # everything and anything. 455 | try: 456 | self._process_reads(reads) 457 | except Exception as e: 458 | self.logger.info(e) 459 | self.logger.info("MinKNOW may have finished acquisition, press Ctrl + C to exit ") 460 | # TODO: Catch the RPC error here to handle nicely? 461 | 462 | # signal to the server that we are done with the stream. 463 | reads.cancel() 464 | 465 | def _runner( 466 | self, 467 | first_channel=1, 468 | last_channel=512, 469 | min_chunk_size=ALLOWED_MIN_CHUNK_SIZE, 470 | action_batch=1000, 471 | action_throttle=0.1, 472 | ): 473 | """Yield the stream initializer request followed by action requests 474 | placed into the action_queue. 475 | 476 | Parameters 477 | ---------- 478 | first_channel : int 479 | Lowest channel for which to receive raw data. 480 | last_channel : int 481 | Highest channel (inclusive) for which to receive data. 482 | min_chunk_size : int 483 | Minimum number of raw samples in a raw data chunk. 484 | action_batch : int 485 | Maximum number of actions to batch in a single response. 486 | action_throttle : float 487 | Time in seconds to wait between sending action batches 488 | 489 | Yields 490 | ------ 491 | read_chunks 492 | From rpc.connection.data._pb.GetLiveReadRequest.StreamSetup 493 | """ 494 | # see note at top of this module 495 | if min_chunk_size > ALLOWED_MIN_CHUNK_SIZE: 496 | self.logger.warning( 497 | "Reducing min_chunk_size to {}".format(ALLOWED_MIN_CHUNK_SIZE) 498 | ) 499 | min_chunk_size = ALLOWED_MIN_CHUNK_SIZE 500 | 501 | self.logger.info( 502 | "Sending init command, channels:{}-{}, min_chunk:{}".format( 503 | first_channel, last_channel, min_chunk_size 504 | ) 505 | ) 506 | yield self.msgs.GetLiveReadsRequest( 507 | setup=self.msgs.GetLiveReadsRequest.StreamSetup( 508 | first_channel=first_channel, 509 | last_channel=last_channel, 510 | raw_data_type=self.calibration, 511 | sample_minimum_chunk_size=min_chunk_size, 512 | ) 513 | ) 514 | 515 | while self.is_running: 516 | t0 = time.time() 517 | # get as many items as we can up to the maximum, without blocking 518 | actions = list() 519 | for _ in range(action_batch): 520 | try: 521 | action = self.action_queue.get_nowait() 522 | except queue.Empty: 523 | break 524 | else: 525 | actions.append(action) 526 | 527 | n_actions = len(actions) 528 | if n_actions > 0: 529 | self.logger.debug("Sending {} actions.".format(n_actions)) 530 | action_group = self.msgs.GetLiveReadsRequest( 531 | actions=self.msgs.GetLiveReadsRequest.Actions(actions=actions) 532 | ) 533 | yield action_group 534 | 535 | # limit response interval 536 | t1 = time.time() 537 | if t0 + action_throttle > t1: 538 | time.sleep(action_throttle + t0 - t1) 539 | else: 540 | self.logger.info("Reset signal received by action handler.") 541 | 542 | def _process_reads(self, reads): 543 | """Process the gRPC stream data, storing read chunks in the data_queue. 544 | 545 | Parameters 546 | ---------- 547 | reads : iterable 548 | Iterable of gRPC data stream as produced by get_live_reads() 549 | 550 | Returns 551 | ------- 552 | None 553 | """ 554 | response_counter = defaultdict(Counter) 555 | 556 | unique_reads = set() 557 | 558 | read_count = 0 559 | samples_behind = 0 560 | raw_data_bytes = 0 561 | for reads_chunk in reads: 562 | if not self.is_running: 563 | self.logger.info("Stopping processing of reads due to reset.") 564 | break 565 | # In each iteration, we get: 566 | # i) responses to our previous actions (success/fail) 567 | # ii) raw data for current reads 568 | 569 | # record a count of success and fails 570 | if len(reads_chunk.action_reponses): 571 | for response in reads_chunk.action_reponses: 572 | action_type = self.sent_actions[response.action_id] 573 | response_counter[action_type][response.response] += 1 574 | 575 | progress = self.acquisition_progress 576 | for read_channel in reads_chunk.channels: 577 | read_count += 1 578 | read = reads_chunk.channels[read_channel] 579 | if self.one_chunk: 580 | if read.id in unique_reads: 581 | # previous stop request wasn't enacted in time, don't 582 | # put the read back in the queue to avoid situation 583 | # where read has been popped from queue already and 584 | # we reinsert. 585 | self.logger.debug( 586 | "Rereceived {}:{} after stop request.".format( 587 | read_channel, read.number 588 | ) 589 | ) 590 | continue 591 | self.stop_receiving_read(read_channel, read.number) 592 | unique_reads.add(read.id) 593 | read_samples_behind = progress.acquired - read.chunk_start_sample 594 | samples_behind += read_samples_behind 595 | raw_data_bytes += len(read.raw_data) 596 | 597 | self.logger.debug("Classification: {}, {}, {}".format(read_channel,read.number,read.chunk_classifications)) 598 | # Removed list constructor around generator statement 599 | strand_like = any( 600 | x in self.strand_classes for x in read.chunk_classifications 601 | ) 602 | if not self.filter_strands or strand_like: 603 | self.data_queue[read_channel] = read 604 | 605 | def _put_action(self, read_channel, read_number, action, **params): 606 | """Stores an action requests on the queue ready to be placed on the 607 | gRPC stream. 608 | 609 | Parameters 610 | ---------- 611 | read_channel : int 612 | A read's channel number 613 | read_number : int 614 | A read's read number (the nth read per channel) 615 | action : str 616 | One of either 'stop_further_data' or 'unblock' 617 | 618 | Other Parameters 619 | ---------------- 620 | duration : float 621 | Allowed when action is 'unblock' 622 | 623 | Returns 624 | ------- 625 | None 626 | 627 | Raises 628 | ------ 629 | ValueError 630 | If action is not one of 'stop_further_data' or 'unblock' 631 | """ 632 | action_id = str(uuid.uuid4()) 633 | action_kwargs = { 634 | "action_id": action_id, 635 | "channel": read_channel, 636 | "number": read_number, 637 | } 638 | self.sent_actions[action_id] = action 639 | if action == "stop_further_data": 640 | action_kwargs[action] = self.msgs.GetLiveReadsRequest.StopFurtherData() 641 | elif action == "unblock": 642 | action_kwargs[action] = self.msgs.GetLiveReadsRequest.UnblockAction() 643 | if "duration" in params: 644 | action_kwargs[action].duration = params["duration"] 645 | else: 646 | raise ValueError("action parameter must be stop_further_data or unblock") 647 | 648 | action_request = self.msgs.GetLiveReadsRequest.Action(**action_kwargs) 649 | self.action_queue.put(action_request) 650 | -------------------------------------------------------------------------------- /read_until_api_v2/read_cache.py: -------------------------------------------------------------------------------- 1 | """read_cache.py 2 | 3 | This module contains the read cache classes that are used to hold reads as they 4 | are streamed from MinKNOW to the ReadUntil API. 5 | 6 | We provide a BaseCache class that implements the required methods for retrieving 7 | reads from the queue in a thread safe manner. It is an ordered and keyed queue, 8 | of a maximum size, based on collections.OrderedDict. 9 | 10 | See BaseCache for details on implementing your own cache. 11 | """ 12 | from collections import OrderedDict 13 | from collections.abc import MutableMapping 14 | from threading import RLock 15 | 16 | 17 | __all__ = ["AccumulatingCache", "ReadCache"] 18 | 19 | 20 | class BaseCache(MutableMapping): 21 | """A thread-safe dict-like container with a maximum size 22 | 23 | This BaseCache contains all the required methods for working as an ordered 24 | cache with a max size except for __setitem__, which should be user defined. 25 | 26 | Parameters 27 | ---------- 28 | size : int 29 | The maximum number of items to hold 30 | 31 | Attributes 32 | ---------- 33 | size : int 34 | The maximum size of the cache 35 | missed : int 36 | The number items never removed from the queue 37 | replaced : int 38 | The number of items replaced by a newer item (reads chunks replaced by a 39 | chunk from the same read) 40 | _container : OrderedDict 41 | An instance of an OrderedDict that forms the read cache 42 | lock : threading.Rlock 43 | The instance of the lock used to make the cache thread-safe 44 | 45 | Examples 46 | -------- 47 | 48 | When inheriting from BaseCache only the __setitem__ method needs to be 49 | included. The attribute `_container` is an instance of OrderedDict that 50 | forms the cache so this is the object that must be updated. 51 | 52 | This example is not likely to be a good cache. 53 | 54 | >>> class DerivedCache(BaseCache): 55 | ... def __setitem__(self, key, value): 56 | ... # The lock is required to maintain thread-safety 57 | ... with self.lock: 58 | ... # Logic to apply when adding items to the cache 59 | ... self._container[key] = value 60 | """ 61 | 62 | def __init__(self, size): 63 | # Using this test instead of @abc.abstractmethod so 64 | # that sub-classes don't require a __init__ method 65 | if self.__class__ == BaseCache: 66 | raise TypeError( 67 | "Can't instantiate abstract class {}".format(BaseCache.__name__) 68 | ) 69 | 70 | if size < 1: 71 | raise ValueError("size must be > 1") 72 | 73 | self.size = size 74 | self.replaced = 0 75 | self.missed = 0 76 | self._container = OrderedDict() 77 | self.lock = RLock() 78 | 79 | def __getitem__(self, key): 80 | """Delegate with lock.""" 81 | with self.lock: 82 | return self._container.get(key) 83 | 84 | def __setitem__(self, k, v): 85 | """Raise NotImplementedError if not overridden.""" 86 | raise NotImplementedError("__setitem__ should be overridden by your cache.") 87 | 88 | def __delitem__(self, key): 89 | """Delegate with lock.""" 90 | with self.lock: 91 | self._container.__delitem__(key) 92 | 93 | def __len__(self): 94 | """Delegate with lock.""" 95 | with self.lock: 96 | return len(self._container) 97 | 98 | def __iter__(self): 99 | """Raise NotImplementedError as unlikely to be thread-safe.""" 100 | raise NotImplementedError("Iteration is unlikely to be thread-safe.") 101 | 102 | def __contains__(self, item): 103 | """Delegate with lock.""" 104 | with self.lock: 105 | return self._container.__contains__(item) 106 | 107 | def popitem(self, last=True): 108 | """Delegate with lock.""" 109 | with self.lock: 110 | return self._container.popitem(last=last) 111 | 112 | def popitems(self, n=1, last=True): 113 | """Return a list of popped items from the cache. 114 | 115 | Parameters 116 | ---------- 117 | n : int 118 | The maximum number of items to return 119 | last : bool 120 | If True, return the newest entries (LIFO); if False oldest entries (FIFO) 121 | 122 | Returns 123 | ------- 124 | list 125 | Output list of upto n (key, value) pairs from the cache 126 | """ 127 | if n > self.size: 128 | n = self.size 129 | 130 | with self.lock: 131 | data = [] 132 | while len(self._container) > self.size - n: 133 | data.append(self._container.popitem(last=last)) 134 | 135 | return data 136 | 137 | def keys(self): 138 | """Return a list of keys currently in the cache.""" 139 | with self.lock: 140 | return list(self._container.keys()) 141 | 142 | 143 | class ReadCache(BaseCache): 144 | def __setitem__(self, key, value): 145 | with self.lock: 146 | already_replaced = False 147 | while len(self._container) >= self.size: 148 | already_replaced = True 149 | k, v = self._container.popitem(last=False) 150 | if k == key and v.number == value.number: 151 | self.replaced += 1 152 | else: 153 | self.missed += 1 154 | 155 | if key in self._container: 156 | if not already_replaced: 157 | if self._container[key].number == value.number: 158 | self.replaced += 1 159 | else: 160 | self.missed += 1 161 | 162 | self._container.__delitem__(key) 163 | self._container.__setitem__(key, value) 164 | 165 | 166 | class AccumulatingCache(BaseCache): 167 | def __setitem__(self, key, value): 168 | with self.lock: 169 | if key not in self._container: 170 | # Key not in dict 171 | self._container.__setitem__(key, value) 172 | else: 173 | # Key exists 174 | if self[key].number == value.number: 175 | # Same read, so update raw data 176 | self[key].raw_data += value.raw_data 177 | self.missed += 1 178 | else: 179 | # New read 180 | self._container.__setitem__(key, value) 181 | self.replaced += 1 182 | # Move most recently updated keys to right 183 | # TODO: test moving the other way e.g left, 184 | # shouldn't make too much of a difference. 185 | self._container.move_to_end(key) 186 | -------------------------------------------------------------------------------- /read_until_api_v2/utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | from itertools import count as _count 4 | from multiprocessing import TimeoutError 5 | from multiprocessing.pool import ThreadPool 6 | 7 | _counter = _count() 8 | 9 | 10 | def new_thread_name(template="read_until-{:04d}"): 11 | """Helper to generate new thread names""" 12 | return template.format(next(_counter)) 13 | 14 | 15 | def setup_logger(name, log_format="%(message)s", log_file=None, mode="a", level=logging.DEBUG): 16 | """Setup loggers 17 | 18 | Parameters 19 | ---------- 20 | name : str 21 | Name to give the logger 22 | log_format : str 23 | logging format string using % formatting 24 | log_file : str 25 | File to record logs to, sys.stderr if not set 26 | mode : str 27 | If log_file is specified, open the file in this mode. Defaults to 'a' 28 | level : logging.LEVEL 29 | Where logging.LEVEL is one of (DEBUG, INFO, WARNING, ERROR, CRITICAL) 30 | 31 | Returns 32 | ------- 33 | logger 34 | """ 35 | formatter = logging.Formatter(log_format) 36 | if log_file is not None: 37 | handler = logging.FileHandler(log_file, mode=mode) 38 | else: 39 | handler = logging.StreamHandler() 40 | handler.setFormatter(formatter) 41 | 42 | logger = logging.getLogger(name) 43 | logger.setLevel(level) 44 | logger.addHandler(handler) 45 | logger.propagate = False 46 | 47 | return logger 48 | 49 | 50 | def nice_join(seq, sep=", ", conjunction="or"): 51 | """Join lists nicely""" 52 | seq = [str(x) for x in seq] 53 | 54 | if len(seq) <= 1 or conjunction is None: 55 | return sep.join(seq) 56 | else: 57 | return "{} {} {}".format(sep.join(seq[:-1]), conjunction, seq[-1]) 58 | 59 | 60 | def _import(name): 61 | """Dynamically import modules and classes, used to get the ReadCache 62 | 63 | https://stackoverflow.com/a/547867/3279716 64 | https://docs.python.org/2.4/lib/built-in-funcs.html 65 | 66 | Parameters 67 | ---------- 68 | name : str 69 | The module/class path. E.g: "read_until.read_cache.{}".format("ReadCache") 70 | 71 | Returns 72 | ------- 73 | module 74 | """ 75 | components = name.split('.') 76 | mod = __import__(components[0]) 77 | for comp in components[1:]: 78 | mod = getattr(mod, comp) 79 | return mod 80 | 81 | 82 | def run_workflow(client, partial_analysis_func, n_workers, run_time, runner_kwargs=None): 83 | """Run an analysis function against a ReadUntilClient 84 | 85 | Parameters 86 | ---------- 87 | client : read_until.ReadUntilClient 88 | An instance of the ReadUntilClient object 89 | partial_analysis_func : partial function 90 | Analysis function to process reads, should 91 | exit when client.is_running == False 92 | n_workers : int 93 | Number of analysis worker functions to run 94 | run_time : int 95 | Time, in seconds, to run the analysis for 96 | runner_kwargs : dict 97 | Keyword arguments to pass to client.run() 98 | 99 | Returns 100 | ------- 101 | list 102 | Results from the analysis function, one item per worker 103 | 104 | """ 105 | if runner_kwargs is None: 106 | runner_kwargs = dict() 107 | 108 | logger = logging.getLogger("Manager") 109 | 110 | results = [] 111 | pool = ThreadPool(n_workers) 112 | logger.info("Creating {} workers".format(n_workers)) 113 | try: 114 | # start the client 115 | client.run(**runner_kwargs) 116 | # start a pool of workers 117 | for _ in range(n_workers): 118 | results.append(pool.apply_async(partial_analysis_func)) 119 | pool.close() 120 | # wait a bit before closing down 121 | time.sleep(run_time) 122 | logger.info("Sending reset") 123 | client.reset() 124 | pool.join() 125 | except KeyboardInterrupt: 126 | logger.info("Caught ctrl-c, terminating workflow.") 127 | client.reset() 128 | except Exception: 129 | client.reset() 130 | raise 131 | 132 | # collect results (if any) 133 | collected = [] 134 | for result in results: 135 | try: 136 | res = result.get(5) 137 | except TimeoutError: 138 | logger.warning("Worker function did not exit successfully.") 139 | # collected.append(None) 140 | except Exception as e: 141 | logger.exception("EXCEPT", exc_info=e) 142 | # logger.warning("Worker raise exception: {}".format(repr(e))) 143 | else: 144 | logger.info("Worker exited successfully.") 145 | collected.append(res) 146 | pool.terminate() 147 | return collected 148 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | grpcio 3 | google 4 | protobuf 5 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | pip>=19.3.1 2 | bump2version==0.5.11 3 | wheel==0.33.6 4 | flake8==3.7.8 5 | coverage==4.5.4 6 | Sphinx==1.8.5 7 | twine==1.14.0 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """The setup script.""" 5 | from setuptools import setup, find_packages 6 | 7 | PKG = "read_until_api_v2" 8 | 9 | __version__ = "" # Define empty version that is overwritten below 10 | exec(open("{}/_version.py".format(PKG)).read()) 11 | 12 | with open('README.md') as readme_file: 13 | readme = readme_file.read() 14 | 15 | setup_requirements = [] 16 | test_requirements = [] 17 | 18 | with open('requirements.txt') as f: 19 | requirements = f.read().splitlines() 20 | 21 | setup( 22 | author="Alex Payne", 23 | author_email='alexander.payne@nottingham.ac.uk', 24 | python_requires='>=3.5, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=2.*', 25 | classifiers=[ 26 | 'Development Status :: 2 - Pre-Alpha', 27 | 'Intended Audience :: Developers', 28 | 'Natural Language :: English', 29 | 'Programming Language :: Python :: 3.5', 30 | 'Programming Language :: Python :: 3.6', 31 | 'Programming Language :: Python :: 3.7', 32 | 'Programming Language :: Python :: 3.8', 33 | ], 34 | description="Python3 implementation of the read_until_api", 35 | install_requires=requirements, 36 | long_description=readme, 37 | include_package_data=True, 38 | keywords='read_until_api_v2', 39 | name='read_until_api_v2', 40 | packages=find_packages(include=['read_until_api_v2', 'read_until_api_v2.*']), 41 | setup_requires=setup_requirements, 42 | test_suite='tests', 43 | tests_require=test_requirements, 44 | url='https://github.com/LooseLab/read_until_api_v2', 45 | version=__version__, 46 | zip_safe=False, 47 | ) 48 | --------------------------------------------------------------------------------