├── .gitignore ├── LICENSE ├── README.md ├── cobolt ├── __init__.py ├── model │ ├── __init__.py │ ├── clustering.py │ ├── cobolt.py │ └── coboltmodel.py ├── tests │ ├── __init__.py │ ├── test_clustering.py │ ├── test_data │ │ ├── joint_a │ │ │ ├── barcodes.tsv │ │ │ ├── counts.mtx │ │ │ └── features.tsv │ │ ├── joint_b │ │ │ ├── barcodes.tsv │ │ │ ├── counts.mtx │ │ │ └── features.tsv │ │ ├── single_a │ │ │ ├── barcodes.tsv │ │ │ ├── counts.mtx │ │ │ └── features.tsv │ │ └── single_b │ │ │ ├── barcodes.tsv │ │ │ ├── counts.mtx │ │ │ └── features.tsv │ ├── test_load_data.py │ └── test_model.py └── utils │ ├── __init__.py │ ├── data.py │ └── dataset.py ├── docs ├── README.md ├── multiome10X.py ├── tutorial.ipynb └── tutorial.py ├── setup.py └── summary.png /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea/ 3 | *.pyc 4 | .pytest_cache/ 5 | *_cache/ 6 | */*/__pycache__/* 7 | */__pycache__/* 8 | venv/ 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cobolt: Joint analysis of multimodal single-cell sequencing data 2 | 3 | Cobolt is a Python package for 4 | 5 | 1. the analysis of the single-cell data from joint-modality platforms; 6 | 2. the integration of joint-modality single-cell datasets with single-omic data sets. 7 | 8 | ## Introduction 9 | 10 | ![summary](summary.png) 11 | 12 | Cobolt is a method developed for modeling multi-omic single-cell sequencing datasets. It provides an integrated analysis of multi-omic data by learning a latent representation shared by the different data modalities. Cobolt jointly models these multiple modalities via a novel application of Multimodal Variational Autoencoder to a hierarchical generative model. It uses a probablity model that is robust to sparse counts and high numbers of features, therefore works for omics such as chromatin accessibility or methylation status. 13 | 14 | Details of the method can be found in the manuscript on [bioRxiv](https://www.biorxiv.org/content/10.1101/2021.04.03.438329v1). 15 | 16 | ## Installation 17 | 18 | Cobolt requires Python v3.8 or higher. To install Cobolt from GitHub, run the following: 19 | 20 | ```bash 21 | pip install git+https://github.com/epurdom/cobolt.git#egg=cobolt 22 | ``` 23 | 24 | ## Tutorials 25 | 26 | - [Integrating multi-modality platforms with single-modality platforms](https://github.com/epurdom/cobolt/blob/master/docs/tutorial.ipynb) 27 | 28 | ## Bug Reports 29 | 30 | Cobolt is in active development. Submit bug reports and make feature requests by opening a issue [here](https://github.com/epurdom/cobolt/issues). 31 | -------------------------------------------------------------------------------- /cobolt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epurdom/cobolt/cea9a5c6297326aca00a10aeaa198d21b07e4889/cobolt/__init__.py -------------------------------------------------------------------------------- /cobolt/model/__init__.py: -------------------------------------------------------------------------------- 1 | from cobolt.model.coboltmodel import CoboltModel 2 | from cobolt.model.cobolt import Cobolt 3 | 4 | __all__ = ['CoboltModel', 'Cobolt'] 5 | -------------------------------------------------------------------------------- /cobolt/model/clustering.py: -------------------------------------------------------------------------------- 1 | 2 | from sklearn.neighbors import kneighbors_graph 3 | import igraph as ig 4 | import leidenalg 5 | import numpy as np 6 | 7 | 8 | class ClusterUtil: 9 | 10 | def __init__(self, k=20, key=0): 11 | self.key=key 12 | self.k=k 13 | 14 | def check_version(self, k, key): 15 | return self.k == k and self.key == key 16 | 17 | def fit(self, latent): 18 | self.latent = latent 19 | self.snn_mat = snn_from_data(latent, self.k) 20 | self.graph = graph_from_snn(self.snn_mat) 21 | self.cluster = {} 22 | 23 | def run_louvain(self, overwrite=False): 24 | """To test !!!!!!!!!!!!!!!!!!!!""" 25 | key = 'louvain' 26 | if key in self.cluster and not overwrite: 27 | print("Clustering results already exist. To rerun, set overwrite to True.") 28 | else: 29 | print("Running Louvain clustering algorithm.") 30 | undirected = self.graph.copy() 31 | undirected.to_undirected(combine_edges='sum') 32 | res = undirected.community_multilevel( 33 | weights=self.graph.es['weight'], 34 | return_levels=False 35 | ) 36 | self.cluster[key] = np.array(res.membership) 37 | 38 | def run_leiden(self, resolution=1, seed=0, overwrite=False): 39 | key = 'leiden_{:.3f}'.format(resolution) 40 | if key in self.cluster and not overwrite: 41 | print("Clustering results already exist. To rerun, set overwrite to True.") 42 | else: 43 | print("Running Leiden clustering algorithm with resolution {:.3f}.".format(resolution)) 44 | kwargs = {'weights': np.array(self.graph.es['weight']).astype(np.float64), 45 | 'resolution_parameter': resolution} 46 | partition = leidenalg.find_partition( 47 | self.graph, 48 | partition_type=leidenalg.RBConfigurationVertexPartition, 49 | seed=seed, 50 | **kwargs 51 | ) 52 | self.cluster[key] = np.array(partition.membership) 53 | 54 | def get_clusters(self, algo="leiden", resolution=1): 55 | if algo == "leiden": 56 | key = '{}_{:.3f}'.format(algo, resolution) 57 | elif algo == "louvain": 58 | key = "louvain" 59 | else: 60 | raise ValueError("Clustering algorithm must be leiden or louvain.") 61 | if key not in self.cluster: 62 | if algo == "leiden": 63 | self.run_leiden(resolution) 64 | elif algo == "louvain": 65 | self.run_louvain() 66 | return self.cluster[key] 67 | 68 | 69 | def snn_from_data(latent, k): 70 | knn_mat = kneighbors_graph(latent, k, mode='connectivity', include_self=False) 71 | snn_mat = knn_mat.dot(knn_mat.T) 72 | snn_mat.data[:] = snn_mat.data / (k + k - snn_mat.data) 73 | snn_mat.setdiag(0) 74 | return snn_mat 75 | 76 | 77 | def graph_from_snn(snn_mat): 78 | snn_mat.eliminate_zeros() 79 | graph = ig.Graph(n=snn_mat.shape[0], 80 | edges=list(zip(*snn_mat.nonzero())), 81 | edge_attrs={'weight': snn_mat.data}, 82 | directed=True) 83 | return graph 84 | -------------------------------------------------------------------------------- /cobolt/model/cobolt.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import os 4 | from scipy import sparse 5 | from tqdm import tqdm 6 | from sklearn.manifold import TSNE 7 | import random 8 | import itertools 9 | from xgboost import XGBRegressor 10 | import numpy as np 11 | from cobolt.model.coboltmodel import CoboltModel 12 | from cobolt.model.clustering import ClusterUtil 13 | from cobolt.utils import MultiomicDataset 14 | import umap 15 | import torch 16 | from torch.utils.data import DataLoader, Subset, SubsetRandomSampler 17 | import matplotlib.pyplot as plt 18 | from matplotlib import cm 19 | from typing import List 20 | 21 | 22 | class Cobolt: 23 | """ 24 | A Cobolt model 25 | 26 | Parameters 27 | ---------- 28 | dataset 29 | A MultiomicDataset object. 30 | n_latent 31 | Number of latent variables used in the Cobolt model. 32 | device 33 | The device on which the model will be trained, such as 'cpu' or 'cuda'. 34 | If not specified, the device will be set to 'cuda' if available. 35 | lr 36 | Learning rate for the Adam optimizer. 37 | annealing_epochs 38 | Number of annealing epochs for the cost annealing scheme. 39 | alpha 40 | Parameter of the Dirichlet prior distribution. 41 | hidden_dims 42 | A list of integers indicating the number of hidden dimensions to use for 43 | the encoder neural networks. The number of fully connected layers are 44 | determined by the length of the list. 45 | intercept_adj 46 | Whether to use the intercept term for batch correction. 47 | slope_adj 48 | Whether to use the slope term for batch correction. 49 | train_prop 50 | The proportion of random samples to use for training. 51 | """ 52 | def __init__(self, 53 | dataset: MultiomicDataset, 54 | n_latent: int, 55 | device: str = None, 56 | lr: float = 0.005, 57 | annealing_epochs: int = 30, 58 | batch_size: int = 128, 59 | alpha: float = None, 60 | hidden_dims: List = None, 61 | intercept_adj: bool = True, 62 | slope_adj: bool = True, 63 | train_prop: float = 1): 64 | if device is None: 65 | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 66 | else: 67 | self.device = device 68 | self.lr = lr 69 | self.epoch = 0 70 | self.annealing_epochs = annealing_epochs 71 | self.batch_size = batch_size 72 | self.history = {"loss": []} 73 | 74 | self.dataset = dataset 75 | self.n_latent = n_latent 76 | self.alpha = 50.0 / n_latent if alpha is None else alpha 77 | self.hidden_dims = [128, 64] if hidden_dims is None else hidden_dims 78 | self.model = CoboltModel( 79 | in_channels=dataset.get_feature_shape(), 80 | hidden_dims=hidden_dims, 81 | n_dataset=dataset.n_dataset, 82 | latent_dim=n_latent, 83 | intercept_adj=intercept_adj, 84 | slope_adj=slope_adj, 85 | log=True 86 | ).to(self.device) 87 | self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr) 88 | 89 | self.test_train_split(train_prop) 90 | self.train_omic = self.get_train_omic() 91 | 92 | self.latent_raw = {} 93 | self.latent = {} 94 | self.reduction_raw = {} 95 | self.reduction = {} 96 | self.cluster_model = None 97 | 98 | def train(self, 99 | num_epochs: int = 100): 100 | """ 101 | Function for training the Cobolt model. 102 | 103 | Parameters 104 | ---------- 105 | num_epochs 106 | Number of epochs/iterations. 107 | """ 108 | for epoch in tqdm(range(1, num_epochs + 1)): 109 | if self.epoch < self.annealing_epochs: 110 | annealing_factor = float(self.epoch) / float(self.annealing_epochs) 111 | else: 112 | annealing_factor = 1.0 113 | 114 | this_loss = [] 115 | for omics in self.train_omic: 116 | this_idx = np.intersect1d(self.dataset.get_comb_idx(omics), self.train_idx) 117 | if len(this_idx) == 0: 118 | continue 119 | dt_loader = DataLoader( 120 | dataset=self.dataset, 121 | batch_size=self.batch_size, 122 | collate_fn=lambda x: collate_wrapper(x, omics), 123 | sampler=SubsetRandomSampler(this_idx)) 124 | this_size = len(this_idx) 125 | for x in dt_loader: 126 | # Forward pass 127 | x = [[x_i.to(self.device) if x_i is not None else None for x_i in y] for y in x] 128 | latent_loss, recon_loss = self.model(x, elbo_combn=[omics]) 129 | # Backprop and optimize 130 | loss = annealing_factor * latent_loss + recon_loss 131 | self.optimizer.zero_grad() 132 | loss.backward() 133 | self.optimizer.step() 134 | 135 | this_loss.append((latent_loss.item() + recon_loss.item())/this_size) 136 | 137 | self.history['loss'].append(sum(this_loss)) 138 | self.epoch += 1 139 | 140 | if np.isnan(self.history['loss'][-1]): 141 | raise ValueError("DIVERGED. Try a smaller learning rate.") 142 | 143 | def get_latent(self, omic_combn, data="train", return_barcode=False): 144 | return self._get_latent_helper( 145 | omic_combn, data, what="latent", return_barcode=return_barcode 146 | ) 147 | 148 | def get_topic_prop(self, omic_combn, data="train", return_barcode=False): 149 | return self._get_latent_helper( 150 | omic_combn, data, what="topic_prop", return_barcode=return_barcode 151 | ) 152 | 153 | def _get_latent_helper(self, 154 | omic_combn, 155 | data="train", 156 | what="latent", 157 | return_barcode=False): 158 | if data == "train": 159 | sample_idx = self.train_idx 160 | elif data == "test": 161 | sample_idx = self.test_idx 162 | else: 163 | raise ValueError 164 | 165 | if self.epoch == 0: 166 | raise Exception("Model haven't been trained yet.") 167 | 168 | sample_idx = np.intersect1d(self.dataset.get_comb_idx(omic_combn), sample_idx) 169 | dl = DataLoader( 170 | dataset=Subset(self.dataset, sample_idx), 171 | batch_size=128, 172 | collate_fn=lambda x: collate_wrapper(x, omic_combn), 173 | shuffle=False 174 | ) 175 | latent = [] 176 | for i, x in enumerate(dl): 177 | x = [[x_i.to(self.device) if x_i is not None else None for x_i in y] for y in x] 178 | if what == "latent": 179 | latent += [self.model.get_latent(x, elbo_bool=omic_combn)] 180 | elif what == "topic_prop": 181 | latent += [self.model.get_topic_prop(x, elbo_bool=omic_combn)] 182 | res = np.concatenate(latent) 183 | if return_barcode: 184 | return res, self.dataset.get_barcode()[sample_idx] 185 | return res 186 | 187 | def calc_all_latent(self, 188 | target: List[bool] = None): 189 | """ 190 | Calculate the latent variable estimation. 191 | 192 | Parameters 193 | ---------- 194 | target 195 | A list of boolean indicating which posterior distribution is used 196 | as benchmark for correction. 197 | """ 198 | n_modality = len(self.dataset.omic) 199 | if target is None: 200 | target = [True] * n_modality 201 | target_dt, target_barcode = self.get_latent(target, return_barcode=True) 202 | dt_corrected = [target_dt] 203 | barcode_corrected = target_barcode 204 | for i, x in enumerate(self.dataset.omic): 205 | om_combn = [False] * n_modality 206 | om_combn[i] = True 207 | raw_dt, raw_barcode = self.get_latent(om_combn, return_barcode=True) 208 | bool_train = np.isin(raw_barcode, target_barcode) 209 | bool_test = ~np.isin(raw_barcode, barcode_corrected) 210 | if sum(bool_test) != 0: 211 | raw_dt_train = raw_dt[bool_train, ] 212 | raw_dt_test = raw_dt[bool_test] 213 | raw_bc_train = raw_barcode[bool_train] 214 | raw_bc_test = raw_barcode[bool_test] 215 | barcode_dict = {x: i for i, x in enumerate(raw_bc_train)} 216 | reorder = [barcode_dict[i] for i in target_barcode] 217 | raw_dt_train = raw_dt_train[reorder, ] 218 | this_predicted = [] 219 | for i in range(self.n_latent): 220 | xgb_model = XGBRegressor() 221 | xgb_model.fit(X=raw_dt_train, y=target_dt[:, i].copy()) 222 | this_predicted.append(xgb_model.predict(raw_dt_test)) 223 | dt_corrected.append(np.asarray(this_predicted).T) 224 | barcode_corrected = np.concatenate((barcode_corrected, raw_bc_test)) 225 | dt_corrected = np.vstack(dt_corrected) 226 | dt_corrected = (dt_corrected.T - np.mean(dt_corrected, axis=1)).T 227 | self.latent = { 228 | "latent": dt_corrected, 229 | "barcode": barcode_corrected, 230 | "epoch": self.epoch 231 | } 232 | 233 | def calc_all_latent_raw(self): 234 | n_modality = len(self.dataset.omic) 235 | dt, barcode = self.get_latent([True] * n_modality, return_barcode=True) 236 | posterior = ["joint"] * len(barcode) 237 | for i, x in enumerate(self.dataset.omic): 238 | om_combn = [False] * n_modality 239 | om_combn[i] = True 240 | raw_dt, raw_barcode = self.get_latent(om_combn, return_barcode=True) 241 | dt = np.vstack((dt, raw_dt)) 242 | barcode = np.concatenate((barcode, raw_barcode)) 243 | posterior.extend([x] * len(raw_barcode)) 244 | self.latent_raw = { 245 | "latent": dt, 246 | "barcode": barcode, 247 | "posterior": np.asarray(posterior), 248 | "epoch": self.epoch 249 | } 250 | 251 | def get_all_latent(self, correction=True): 252 | """ 253 | Return the latent variable estimation. 254 | 255 | Parameters 256 | ---------- 257 | correction 258 | Whether to return the corrected latent variable estimation. 259 | 260 | Returns 261 | ------- 262 | latent 263 | Latent variable estimation. 264 | barcode 265 | Corresponding cell barcode of the latent variables. 266 | posterior 267 | Which posterior distribution is used for latent variable 268 | estimation. Only provided if correction is set to `False`. 269 | """ 270 | if correction: 271 | if not self.latent or self.latent["epoch"] != self.epoch: 272 | self.calc_all_latent() 273 | return self.latent["latent"], self.latent["barcode"] 274 | else: 275 | if not self.latent_raw or self.latent["epoch"] != self.epoch: 276 | self.calc_all_latent_raw() 277 | return self.latent_raw["latent"], self.latent_raw["barcode"], self.latent_raw["posterior"] 278 | 279 | def run_UMAP(self, 280 | correction=True, 281 | n_components=2, 282 | n_neighbors=30, 283 | min_dist=0.1, 284 | metric='euclidean'): 285 | """ 286 | Run UMAP on the latent variable estimation. 287 | 288 | Parameters 289 | ---------- 290 | correction 291 | Whether to use corrected latent variables. 292 | n_components 293 | The dimension of the space to embed into, which is usually set to 294 | 2 or 3. 295 | n_neighbors 296 | The size of the neighborhood for UMAP. 297 | min_dist: 298 | The effective minimum distance between embeded points. 299 | metric: 300 | The metric to use to compute distances for UMAP. 301 | 302 | Notes 303 | ----- 304 | `n_components`, `n_neighbors`, `min_dist`, and `metric` are UMAP 305 | parameters. We direct users to python package `umap` for additional 306 | details. 307 | """ 308 | print("Running UMAP {} latent variable correction.".format("with" if correction else "without")) 309 | dt = self.get_all_latent(correction=correction) 310 | latent = dt[0] 311 | reducer = umap.UMAP( 312 | n_neighbors=n_neighbors, 313 | min_dist=min_dist, 314 | n_components=n_components, 315 | metric=metric 316 | ) 317 | embedding = reducer.fit_transform(latent) 318 | if correction: 319 | self.reduction["UMAP" + str(n_components)] = { 320 | "embedding": embedding, 321 | "barcode": dt[1], 322 | "epoch": self.epoch 323 | } 324 | else: 325 | self.reduction_raw["UMAP" + str(n_components)] = { 326 | "embedding": embedding, 327 | "barcode": dt[1], 328 | "posterior": dt[2], 329 | "epoch": self.epoch 330 | } 331 | 332 | def run_tSNE(self, correction=True, perplexity=30): 333 | print("Running tSNE {} latent variable correction.".format("with" if correction else "without")) 334 | dt = self.get_all_latent(correction=correction) 335 | latent = dt[0] 336 | embedding = TSNE(n_components=2, perplexity=perplexity).fit_transform(latent) 337 | if correction: 338 | self.reduction["tSNE"] = { 339 | "embedding": embedding, 340 | "barcode": dt[1], 341 | "epoch": self.epoch 342 | } 343 | else: 344 | self.reduction_raw["tSNE"] = { 345 | "embedding": embedding, 346 | "barcode": dt[1], 347 | "posterior": dt[2], 348 | "epoch": self.epoch 349 | } 350 | 351 | def clustering(self, k=20, algo="leiden", resolution=1, seed=0, overwrite=False): 352 | """ 353 | Run clustering on the corrected latent variables. 354 | 355 | Parameters 356 | ---------- 357 | k 358 | Number of nearest neighbors to use in the KNN graph construction. 359 | algo 360 | Clustering algorithm to use. Available options are "leiden" or 361 | "louvain". 362 | resolution 363 | Clustering resolution to use for leiden clustering. Not used if algo 364 | is set to "louvain". 365 | seed 366 | Random seed to use for leiden clustering. Not used if algo is set 367 | to "louvain". 368 | overwrite 369 | Whether to overwrite previous results with the same clustering 370 | parameters. 371 | """ 372 | if not self.cluster_model or not self.cluster_model.check_version(k, self.epoch): 373 | self.cluster_model = ClusterUtil(k=k, key=self.epoch) 374 | dt = self.get_all_latent(correction=True) 375 | latent = dt[0] 376 | self.cluster_model.fit(latent) 377 | if algo == "leiden": 378 | self.cluster_model.run_leiden(resolution=resolution, seed=seed, overwrite=overwrite) 379 | elif algo == "louvain": 380 | self.cluster_model.run_louvain(overwrite=overwrite) 381 | else: 382 | raise ValueError("Clustering algorithm not supported.") 383 | 384 | def get_clusters(self, algo="leiden", resolution=1, return_barcode=False): 385 | """ 386 | Return the clustering results. 387 | 388 | Parameters 389 | ---------- 390 | algo 391 | Clustering algorithm to use. Available options are "leiden" or 392 | "louvain". 393 | resolution 394 | Clustering resolution to use for leiden clustering. Not used if algo 395 | is set to "louvain". 396 | return_barcode 397 | Whether to return the cells barcode. 398 | 399 | Returns 400 | ------- 401 | clusters 402 | An integer array indicating the clustering results. 403 | barcode 404 | An array of cell barcode. Only provided if `return_barcode` is set 405 | to `True`. 406 | """ 407 | if not self.cluster_model: 408 | print("Clustering has not been run yet. Call `clustering` function first.") 409 | else: 410 | if return_barcode: 411 | latent, barcode = self.get_all_latent(correction=True) 412 | return self.cluster_model.get_clusters(algo, resolution), barcode 413 | else: 414 | return self.cluster_model.get_clusters(algo, resolution) 415 | 416 | def scatter_plot(self, 417 | reduc="UMAP", 418 | algo="leiden", 419 | resolution=1, 420 | correction=True, 421 | annotation=None, 422 | s=1, 423 | figsize=(10, 5)): 424 | if correction: 425 | use_reduc = self.reduction 426 | else: 427 | use_reduc = self.reduction_raw 428 | 429 | if reduc == "UMAP": 430 | if "UMAP2" not in use_reduc or use_reduc["UMAP2"]["epoch"] != self.epoch: 431 | self.run_UMAP(correction=correction) 432 | dt = use_reduc["UMAP2"]["embedding"] 433 | barcode = use_reduc["UMAP2"]["barcode"] 434 | elif reduc == "tSNE": 435 | if "tSNE" not in use_reduc or use_reduc["tSNE"]["epoch"] != self.epoch: 436 | self.run_tSNE(correction=correction) 437 | dt = use_reduc["tSNE"]["embedding"] 438 | barcode = use_reduc["tSNE"]["barcode"] 439 | else: 440 | raise ValueError("Reduction must be UMAP or tSNE") 441 | 442 | if annotation is None: 443 | annotation = self.get_clusters(algo, resolution) 444 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize) 445 | scatter1 = ax1.scatter(dt[:, 0], dt[:, 1], c=annotation, s=s, cmap=cm.rainbow) 446 | ax1.legend(*scatter1.legend_elements(), loc="upper left", title="Cluster") 447 | datasource = np.array([self.dataset.dataset[b] for b in barcode]) 448 | for i in np.unique(datasource): 449 | mask = datasource == i 450 | ax2.scatter(dt[mask, 0], dt[mask, 1], label=i, s=s) 451 | ax2.legend(loc="upper left", title="Dataset") 452 | fig.show() 453 | else: 454 | raise NotImplementedError 455 | 456 | def get_train_omic(self, sample=5): 457 | n_omic = len(self.dataset.omic) 458 | if n_omic + 1 + sample < 2 ** n_omic - 1: 459 | train_omic = [[False] * n_omic for i in range(n_omic)] 460 | for i in range(n_omic): 461 | train_omic[i][i] = True 462 | train_omic.append([True] * n_omic) 463 | i = 0 464 | while i < sample: 465 | s = random.sample(range(n_omic), random.choice(range(1, n_omic))) 466 | s = [True if i in s else False for i in range(n_omic)] 467 | if not s in train_omic: 468 | train_omic.append(s) 469 | i += 1 470 | else: 471 | train_omic = [list(i) for i in itertools.product([False, True], repeat=n_omic)] 472 | train_omic = [i for i in train_omic if any(i)] 473 | return train_omic 474 | 475 | def test_train_split(self, train_prop): 476 | n_samples = len(self.dataset) 477 | permuted_idx = np.random.permutation(n_samples) 478 | self.train_idx = permuted_idx[:int(n_samples * train_prop)] 479 | self.test_idx = permuted_idx[int(n_samples * train_prop):] 480 | self.barcode = self.dataset.get_barcode() 481 | self.train_barcode = self.barcode[self.train_idx] 482 | 483 | 484 | def collate_wrapper(batch, omic_combn): 485 | dataset = [x[1] for x in batch] 486 | batch = [x[0] for x in batch] 487 | dataset = [torch.tensor(list(x)) if include else None 488 | for x, include in zip(zip(*dataset), omic_combn)] 489 | batch = [torch.from_numpy(sparse.vstack(x).toarray()).float() if include else None 490 | for x, include in zip(zip(*batch), omic_combn)] 491 | return batch, dataset 492 | 493 | 494 | def shuffle_dataloaders(dt_loader, dt_type): 495 | dt_idx = [[i] * len(x) for i, x in enumerate(dt_loader)] 496 | dt_idx = sum(dt_idx, []) 497 | random.Random(4).shuffle(dt_idx) 498 | dt_iter = [iter(x) for x in dt_loader] 499 | i = 0 500 | while i < len(dt_idx): 501 | yield next(dt_iter[dt_idx[i]]), dt_type[dt_idx[i]] 502 | i += 1 503 | -------------------------------------------------------------------------------- /cobolt/model/coboltmodel.py: -------------------------------------------------------------------------------- 1 | # code reference: 2 | # https://github.com/AntixK/PyTorch-VAE 3 | # https://github.com/mhw32/multimodal-vae-public 4 | 5 | import itertools 6 | import torch 7 | from torch import nn 8 | import numpy as np 9 | from typing import List 10 | import matplotlib.pyplot as plt 11 | import seaborn as sns 12 | from torch import logsumexp 13 | from torch.distributions import Normal 14 | 15 | 16 | def xavier_init(fan_in, fan_out, constant=1): 17 | low = -constant * np.sqrt(6.0 / (fan_in + fan_out)) 18 | high = constant * np.sqrt(6.0 / (fan_in + fan_out)) 19 | return (low - high) * torch.rand(fan_in, fan_out) + high 20 | 21 | 22 | class CoboltModel(nn.Module): 23 | 24 | def __init__(self, 25 | in_channels: List, 26 | latent_dim: int, 27 | n_dataset: List, 28 | hidden_dims: List = None, 29 | alpha: float = None, 30 | max_capacity: int = 25, 31 | capacity_max_iter: int = 1e5, 32 | loss_type: str = 'B', 33 | intercept_adj: bool = True, 34 | slope_adj: bool = True, 35 | log: bool = True): 36 | super(CoboltModel, self).__init__() 37 | 38 | self.latent_dim = latent_dim 39 | if alpha is None: 40 | self.alpha = 50.0 / latent_dim 41 | else: 42 | self.alpha = alpha 43 | self.loss_type = loss_type 44 | self.C_max = torch.Tensor([max_capacity]) 45 | self.C_stop_iter = capacity_max_iter 46 | self.device = "cuda" if torch.cuda.is_available() else "cpu" 47 | self.experts = ProductOfExperts() 48 | self.n_dataset = n_dataset 49 | self.intercept_adj = intercept_adj 50 | self.slope_adj = slope_adj 51 | self.log = log 52 | 53 | self.beta = nn.ParameterList() 54 | for in_ch in in_channels: 55 | self.beta.append(torch.nn.Parameter(xavier_init(latent_dim, in_ch), requires_grad=True)) 56 | self.beta_dataset = nn.ParameterList() 57 | for in_ch, n_d in zip(in_channels, n_dataset): 58 | self.beta_dataset.append(torch.nn.Parameter(xavier_init(n_d, in_ch), requires_grad=True)) 59 | self.beta_dataset_mtp = nn.ParameterList() 60 | for in_ch, n_d in zip(in_channels, n_dataset): 61 | self.beta_dataset_mtp.append(torch.nn.Parameter(torch.rand(n_d, in_ch), requires_grad=True)) 62 | 63 | if hidden_dims is None: 64 | hidden_dims = [128, 64] 65 | 66 | # Constructing Laplace Approximation to Dirichlet Prior 67 | # The greater the alpha, the higher the mode. That is, the probs will 68 | # be more centered around (1/latent_dim, ..., 1/latent_dim) 69 | self.a = self.alpha * torch.ones(1, self.latent_dim) 70 | self.mu2 = (torch.log(self.a) - torch.mean(torch.log(self.a), 1)).to(device=self.device) 71 | self.var2 = (((1 / self.a) * (1 - (2.0 / self.latent_dim))) + 72 | (1.0 / (self.latent_dim * self.latent_dim)) * torch.sum(1 / self.a, 1)).to(device=self.device) 73 | 74 | self.encoder = nn.ModuleList() 75 | self.fc_mu = nn.ModuleList() 76 | self.fc_var = nn.ModuleList() 77 | for in_ch in in_channels: 78 | # Build Encoder 79 | modules = [] 80 | current_in = in_ch 81 | for h_dim in hidden_dims: 82 | modules.append( 83 | nn.Sequential( 84 | nn.Linear(current_in, h_dim), 85 | nn.BatchNorm1d(h_dim), 86 | nn.LeakyReLU()) 87 | ) 88 | current_in = h_dim 89 | self.encoder.append(nn.Sequential(*modules)) 90 | self.fc_mu.append(nn.Linear(hidden_dims[-1], latent_dim)) 91 | self.fc_var.append(nn.Linear(hidden_dims[-1], latent_dim)) 92 | 93 | def reparameterize(self, mu, var): 94 | # std = torch.exp(0.5 * log_var) 95 | std = var.sqrt() 96 | # torch.randn_like 97 | # Returns a tensor with the same size as input that is filled with 98 | # random numbers from a normal distribution with mean 0 and variance 1. 99 | eps = torch.randn_like(std) 100 | return eps * std + mu 101 | 102 | def encode(self, x: List): 103 | batch_size = [x_i.size(0) for x_i in x if x_i is not None][0] 104 | qz_m, qz_logv = prior_expert(self.mu2, self.var2, batch_size) 105 | qz_m = qz_m.to(self.device) 106 | qz_logv = qz_logv.to(self.device) 107 | mu = [qz_m] 108 | log_var = [qz_logv] 109 | for x_i, encoder, fc_mu, fc_var in zip(x, self.encoder, self.fc_mu, self.fc_var): 110 | if x_i is not None: 111 | if self.log: 112 | x_i = torch.log(x_i + 1) 113 | result = encoder(x_i) 114 | mu += [fc_mu(result).unsqueeze(0)] 115 | log_var += [fc_var(result).unsqueeze(0)] 116 | else: 117 | mu += [qz_m] # this is a placeholder 118 | log_var += [qz_logv] # won't be used b.c. elbo_combn 119 | mu = torch.cat(mu, dim=0) 120 | log_var = torch.cat(log_var, dim=0) 121 | return mu, log_var 122 | 123 | def forward(self, x: List, elbo_combn=None): 124 | x, dataset = x 125 | n_modality = len(x) 126 | if elbo_combn is None: 127 | elbo_combn = \ 128 | [list(i) 129 | for i in itertools.product([False, True], repeat=n_modality) 130 | if sum(i) != 0] 131 | 132 | mu, log_var = self.encode(x) 133 | recon_loss = 0 134 | latent_loss = 0 135 | for elbo_bool in elbo_combn: 136 | mu_subset, var = self.experts(mu[[True] + elbo_bool], 137 | log_var[[True] + elbo_bool]) 138 | z = self.reparameterize(mu_subset, var) 139 | beta_subset = [self.beta[i] for i, j in enumerate(elbo_bool) if j] 140 | x_subset = [x[i] for i, j in enumerate(elbo_bool) if j] 141 | beta_dataset_subset = [self.beta_dataset[i] for i, j in enumerate(elbo_bool) if j] 142 | beta_dataset_mtp_subset = [self.beta_dataset_mtp[i] for i, j in enumerate(elbo_bool) if j] 143 | dataset_subset = [dataset[i] for i, j in enumerate(elbo_bool) if j] 144 | n_fac = [self.n_dataset[i] for i, j in enumerate(elbo_bool) if j] 145 | for beta_i, x_i, beta_dt_i, beta_dt_mtp_i, dt_i, n_f in \ 146 | zip(beta_subset, x_subset, beta_dataset_subset, beta_dataset_mtp_subset, dataset_subset, n_fac): 147 | slope_adj = torch.matmul(fac_to_mat(dt_i, n_f), beta_dt_mtp_i) if self.intercept_adj else 0 148 | intercept_adj = torch.matmul(fac_to_mat(dt_i, n_f), beta_dt_i) if self.slope_adj else 0 149 | recon_loss += - torch.sum( 150 | x_i * torch.log(torch.softmax( 151 | torch.matmul(torch.softmax(z, dim=1), beta_i) * (slope_adj + 1) + intercept_adj, 152 | dim=1))) 153 | latent_loss += self.latent_loss(mu_subset, var, self.mu2, self.var2) 154 | 155 | return latent_loss, recon_loss 156 | 157 | def latent_loss(self, mu0, var0, mu1, var1): 158 | latent_loss = 0.5 * torch.sum( # minimize risk, maximize ELBO 159 | var0 / var1 + (mu1 - mu0) / var1 * (mu1 - mu0) - self.latent_dim + \ 160 | torch.log(var1) - torch.log(var0)) 161 | return latent_loss 162 | 163 | @torch.no_grad() 164 | def get_beta(self): 165 | return [beta.cpu().numpy().T for beta in self.beta] 166 | 167 | @torch.no_grad() 168 | def get_topic_prop(self, x, elbo_bool=None): 169 | x, dataset = x 170 | # TODO: This is not the posterior mean of \theta 171 | mu, log_var = self.encode(x) 172 | if elbo_bool is None: 173 | elbo_bool = [True]*len(x) 174 | mu_subset, var = self.experts(mu[[True] + elbo_bool], 175 | log_var[[True] + elbo_bool]) 176 | return torch.softmax(mu_subset, dim=1).cpu().numpy() 177 | 178 | @torch.no_grad() 179 | def get_latent(self, x, elbo_bool=None): 180 | x, dataset = x 181 | mu, log_var = self.encode(x) 182 | if elbo_bool is None: 183 | elbo_bool = [True]*len(x) 184 | mu_subset, var = self.experts(mu[[True] + elbo_bool], 185 | log_var[[True] + elbo_bool]) 186 | return mu_subset.cpu().numpy() 187 | 188 | @torch.no_grad() 189 | def get_posterior(self, x, elbo_bool=None): 190 | mu, log_var = self.encode(x) 191 | if elbo_bool is None: 192 | elbo_bool = [True]*len(x) 193 | mu_subset, var = self.experts(mu[[True] + elbo_bool], 194 | log_var[[True] + elbo_bool]) 195 | return mu_subset, var 196 | 197 | @torch.no_grad() 198 | def get_marginal_likelihood(self, x, elbo_bool=None, rep=100): 199 | """ 200 | Reference scVI 201 | """ 202 | if elbo_bool is None: 203 | elbo_bool = [True]*len(x) 204 | 205 | mu, log_var = self.encode(x) 206 | mu_subset, var = self.experts(mu[[True] + elbo_bool], 207 | log_var[[True] + elbo_bool]) 208 | 209 | beta_subset = [self.beta[i] for i, j in enumerate(elbo_bool) if j] 210 | x_subset = [x[i] for i, j in enumerate(elbo_bool) if j] 211 | l_sum = torch.zeros(x[0].size()[0], rep) 212 | for i in range(rep): 213 | z = self.reparameterize(mu_subset, var) 214 | z1 = self.reparameterize(mu_subset, var) 215 | recon_loss = torch.zeros(x[0].size()[0]).to(device=self.device) 216 | for beta_i, x_i in zip(beta_subset, x_subset): 217 | recon_loss += - torch.sum( 218 | x_i * torch.log(torch.softmax( 219 | torch.matmul(torch.softmax(z1, dim=1), beta_i), 220 | dim=1)), dim=1) 221 | p_z = Normal(torch.zeros_like(self.mu2), torch.ones_like(self.var2).sqrt()).log_prob(z).sum(dim=-1) 222 | q_z_x = Normal(mu_subset, var.sqrt()).log_prob(z).sum(dim=-1) 223 | p_x_z = - recon_loss 224 | l_sum[:, i] += (p_z + p_x_z - q_z_x).cpu().numpy() 225 | batch_log_lkl = logsumexp(l_sum, dim=-1) - np.log(rep) 226 | return - torch.sum(batch_log_lkl).item() 227 | 228 | @torch.no_grad() 229 | def plot_beta(self, path=None): 230 | dt = self.get_beta() 231 | for i, x in enumerate(dt): 232 | fig = plt.figure() 233 | sns.heatmap(x, cmap="Blues") 234 | if path is not None: 235 | fig.savefig(path + 'beta' + str(i) + '.png') 236 | plt.close() 237 | 238 | 239 | class ProductOfExperts(nn.Module): 240 | def forward(self, mu, log_var): 241 | """Return parameters for product of independent experts. 242 | See https://arxiv.org/pdf/1410.7827.pdf for equations. 243 | 244 | @param mu: M x D for M experts 245 | @param log_var: M x D for M experts 246 | """ 247 | # precision of i-th Gaussian expert at point x 248 | var = torch.exp(log_var) 249 | T = 1. / var # the variance matrices are diagonal 250 | pd_mu = torch.sum(mu * T, dim=0) / torch.sum(T, dim=0) 251 | pd_var = 1. / torch.sum(T, dim=0) 252 | return pd_mu, pd_var 253 | 254 | 255 | def prior_expert(mu, var, batch_size): 256 | mu = mu.repeat((batch_size, 1)).unsqueeze(0) 257 | log_var = torch.log(var.repeat((batch_size, 1)).unsqueeze(0)) 258 | return mu, log_var 259 | 260 | 261 | def fac_to_mat(fac, n_f): 262 | return torch.stack([(fac == i).float() for i in range(n_f)]).T 263 | -------------------------------------------------------------------------------- /cobolt/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epurdom/cobolt/cea9a5c6297326aca00a10aeaa198d21b07e4889/cobolt/tests/__init__.py -------------------------------------------------------------------------------- /cobolt/tests/test_clustering.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | import numpy as np 4 | from cobolt.model.clustering import ClusterUtil 5 | 6 | 7 | class TestClusterUtil: 8 | def test_check_version(self): 9 | obj = ClusterUtil(20, 30) 10 | assert not obj.check_version(10, 30) 11 | assert not obj.check_version(20, 10) 12 | assert not obj.check_version(0, 0) 13 | 14 | def test_fit(self): 15 | dt = np.random.sample((100, 200)) 16 | obj = ClusterUtil(20, 30) 17 | obj.fit(dt) 18 | assert obj.snn_mat.max() <= 1 19 | assert obj.snn_mat.min() >= 0 20 | assert obj.snn_mat.shape == (100, 100) 21 | assert obj.latent.shape == (100, 200) 22 | assert len(obj.graph.vs) == 100 23 | assert obj.graph.es['weight'] is not None 24 | 25 | def test_run_louvain(self, capfd): 26 | dt = np.random.sample((100, 200)) 27 | obj = ClusterUtil(20, 30) 28 | obj.fit(dt) 29 | res1 = obj.get_clusters("louvain") 30 | out, err = capfd.readouterr() 31 | assert out.startswith("Running") 32 | assert len(res1) == 100 33 | obj.run_louvain() 34 | out, err = capfd.readouterr() 35 | assert out.startswith("Clustering results") 36 | res2 = obj.get_clusters("louvain") 37 | assert res1 is res2 38 | obj.run_louvain(overwrite=True) 39 | out, err = capfd.readouterr() 40 | assert out.startswith("Running") 41 | res3 = obj.get_clusters("louvain") 42 | assert res1 is not res3 43 | 44 | def test_run_leiden(self, capfd): 45 | dt = np.random.sample((100, 200)) 46 | obj = ClusterUtil(20, 30) 47 | obj.fit(dt) 48 | res1 = obj.get_clusters("leiden", resolution=1) 49 | out, err = capfd.readouterr() 50 | assert out.startswith("Running") 51 | assert len(res1) == 100 52 | res2 = obj.get_clusters("leiden", resolution=0.5) 53 | out, err = capfd.readouterr() 54 | assert out.startswith("Running") 55 | assert res1 is not res2 56 | obj.run_leiden(resolution=1, overwrite=False) 57 | out, err = capfd.readouterr() 58 | assert out.startswith("Clustering results") 59 | res3 = obj.get_clusters("leiden", resolution=1) 60 | assert res1 is res3 61 | obj.run_leiden(resolution=1, overwrite=True) 62 | out, err = capfd.readouterr() 63 | assert out.startswith("Running") 64 | res4 = obj.get_clusters("leiden", resolution=1) 65 | assert res1 is not res4 66 | -------------------------------------------------------------------------------- /cobolt/tests/test_data/joint_a/barcodes.tsv: -------------------------------------------------------------------------------- 1 | 09A_CAGCCCCGCCTT 2 | 09A_CGCCTACCATGA 3 | 09A_GATGCGCGGCTA 4 | 09A_GGTCCGAGTCCT 5 | 09A_TCTCCCGGCACC 6 | 09A_ACCAAATCTTGT 7 | 09A_CTGCTCGAGCAT 8 | 09A_CTCCCCCTAGCC 9 | 09A_CACGGAATTAGG 10 | 09A_ACGGACTACGCT 11 | 09A_TTTCCCCATGGC 12 | 09A_GCTGCAAGGGAT 13 | 09A_TAAGGAAGAGGA 14 | 09A_CAGACCTACTTT 15 | 09A_AACGGCGTCGGT 16 | 09A_CATGTGTCACCC 17 | 09A_GACAAGAAGATC 18 | 09A_TCTCTTTAGTGC 19 | 09A_GACACCCGCACC 20 | 09A_CAGAACTGTCAT 21 | 09A_AACCCAGGGAGC 22 | 09A_AAATTCTTTAGC 23 | 09A_GCCCTTGGTCAA 24 | 09A_ATTAATCGAGTA 25 | 09A_CGGTCCGACTTA 26 | 09A_CATGTTCAATAA 27 | 09A_TCATGGACGAGG 28 | 09A_TATCCTTACCAG 29 | 09A_TTTATGTCAACT 30 | 09A_GATCTCACGCCC 31 | 09A_CTCTCGCTGTAG 32 | 09A_TGCACCTGGGGG 33 | 09A_AGGTCTCATCAC 34 | 09A_AGTAGTGACTAC 35 | 09A_GACCTTGGAATA 36 | 09A_GCTCAGCATTTA 37 | 09A_ATATCTCCGAGC 38 | 09A_CTCAACGCAGAA 39 | 09A_GCTATGCTCCCG 40 | 09A_GCTACAGCGATG 41 | 09A_ATATGGGCCGCT 42 | 09A_GCTCCAAAGCGG 43 | 09A_GGTATCGAAATA 44 | 09A_CTCGGGTTTAGC 45 | 09A_GCCAGTACCACA 46 | 09A_GAGCCATCAATT 47 | 09A_GTCTTCTTTCTG 48 | 09A_ACGTGGTGACTT 49 | 09A_TCGAATCCCGAA 50 | 09A_CTGCGACGTGAC 51 | 09A_CAGTAGCCCGTT 52 | 09A_GAGTAGCACTTC 53 | 09A_GAGGTAACCCGA 54 | 09A_TGCATAGGTTCC 55 | 09A_TCACATTGTGTC 56 | 09A_CGTTGTGCCACT 57 | 09A_GCGAAAGCCTGC 58 | 09A_GTCCAATTCCCA 59 | 09A_GATAATATCGCC 60 | 09A_CCGGTACTGCCT 61 | 09A_ATCAGAAACTAG 62 | 09A_AATGGATGGCAC 63 | 09A_CGAACATCCTTG 64 | 09A_TTGACTGTCACA 65 | 09A_TAAATTCCACGG 66 | 09A_TGCCGAACAGTT 67 | 09A_TCTCGTTAACCA 68 | 09A_CTCAGGTATCCT 69 | 09A_ATCTCATCCCTT 70 | 09A_AGTCCGCGATTC 71 | 09A_CCCTTTAACCCC 72 | 09A_GCTACTATACCA 73 | 09A_CCTCAGGACCGG 74 | 09A_AACCGACAATGC 75 | 09A_CGTCCACTAATC 76 | 09A_GAGTGTCAGTTT 77 | 09A_CCACGTCTAGCT 78 | 09A_GAATGTGTATTA 79 | 09A_CTGTTTCGTTGC 80 | 09A_CCAGGCGCCTTT 81 | 09A_TATCCAGGTATT 82 | 09A_CATCTCAATCTT 83 | 09A_TACTGCCTGAGA 84 | 09A_ACCAATGTTTGA 85 | 09A_AGCCCACCAGTC 86 | 09A_AGCTTGCACTGG 87 | 09A_GCAATCAGCTAC 88 | 09A_TATGTTTACCAC 89 | 09A_CGTCTGCATCCA 90 | 09A_TTTAGCCTTCAA 91 | 09A_AAGAACCTAGGT 92 | 09A_CCACGCCAAATA 93 | 09A_AAGGTTGTGAAT 94 | 09A_AGACTGAGCAAT 95 | 09A_CAATATGCTGAG 96 | 09A_CGCCTTCCGGGT 97 | 09A_CGAGAAATGGCT 98 | 09A_CCGGCCCTCCGT 99 | 09A_ACTCATGGGAAC 100 | 09A_CTCCATTAATAG 101 | -------------------------------------------------------------------------------- /cobolt/tests/test_data/joint_a/counts.mtx: -------------------------------------------------------------------------------- 1 | %%MatrixMarket matrix coordinate integer general 2 | % 3 | 100 100 436 4 | 3 1 1 5 | 13 1 1 6 | 17 1 1 7 | 45 1 1 8 | 60 1 1 9 | 65 1 1 10 | 76 1 1 11 | 92 1 1 12 | 4 2 3 13 | 5 2 1 14 | 10 2 2 15 | 22 2 1 16 | 29 2 1 17 | 45 2 1 18 | 49 2 1 19 | 60 2 1 20 | 65 2 2 21 | 6 3 1 22 | 7 3 2 23 | 29 3 1 24 | 41 3 2 25 | 45 3 3 26 | 59 3 1 27 | 5 4 1 28 | 7 4 1 29 | 34 4 1 30 | 37 4 1 31 | 45 4 3 32 | 64 4 2 33 | 76 4 1 34 | 7 5 1 35 | 45 5 4 36 | 58 5 1 37 | 70 5 1 38 | 81 5 1 39 | 7 6 1 40 | 10 6 1 41 | 16 6 1 42 | 29 6 1 43 | 42 6 1 44 | 45 6 1 45 | 60 6 2 46 | 75 6 1 47 | 81 6 1 48 | 96 6 1 49 | 23 7 1 50 | 38 7 1 51 | 39 7 1 52 | 41 7 1 53 | 60 7 1 54 | 38 8 1 55 | 60 8 1 56 | 45 9 2 57 | 57 9 1 58 | 60 9 1 59 | 76 9 1 60 | 4 10 1 61 | 29 10 1 62 | 46 10 1 63 | 60 10 1 64 | 71 10 1 65 | 6 11 1 66 | 10 11 1 67 | 16 11 1 68 | 33 11 1 69 | 60 11 1 70 | 63 11 1 71 | 65 11 1 72 | 72 11 1 73 | 7 12 1 74 | 10 12 1 75 | 23 12 1 76 | 31 12 1 77 | 60 12 3 78 | 31 13 1 79 | 56 13 2 80 | 60 13 2 81 | 63 13 1 82 | 92 13 1 83 | 17 14 1 84 | 65 14 2 85 | 29 15 2 86 | 45 15 1 87 | 60 15 1 88 | 67 15 1 89 | 76 15 1 90 | 91 15 1 91 | 19 16 2 92 | 50 16 1 93 | 74 16 1 94 | 6 17 1 95 | 45 17 1 96 | 67 17 1 97 | 99 17 1 98 | 7 18 2 99 | 33 18 1 100 | 42 18 1 101 | 43 18 1 102 | 45 18 1 103 | 54 18 1 104 | 10 19 1 105 | 29 19 4 106 | 45 19 1 107 | 51 19 1 108 | 54 19 1 109 | 81 19 1 110 | 4 20 1 111 | 8 20 1 112 | 17 20 1 113 | 23 20 1 114 | 31 20 1 115 | 41 20 1 116 | 45 20 1 117 | 70 20 2 118 | 8 21 1 119 | 10 21 1 120 | 31 21 1 121 | 41 21 2 122 | 60 21 2 123 | 16 22 1 124 | 29 22 1 125 | 45 22 1 126 | 47 22 2 127 | 49 22 1 128 | 7 23 2 129 | 15 23 1 130 | 29 23 1 131 | 32 23 1 132 | 38 23 1 133 | 44 23 1 134 | 37 24 1 135 | 45 24 1 136 | 46 24 1 137 | 62 24 1 138 | 73 24 1 139 | 7 25 2 140 | 29 25 1 141 | 41 25 1 142 | 46 25 1 143 | 60 25 1 144 | 15 26 1 145 | 29 26 1 146 | 46 26 2 147 | 63 26 1 148 | 4 27 1 149 | 7 27 1 150 | 10 27 1 151 | 23 27 1 152 | 27 27 1 153 | 29 27 1 154 | 45 27 1 155 | 49 27 1 156 | 60 27 2 157 | 77 27 1 158 | 85 27 1 159 | 23 28 1 160 | 29 28 2 161 | 41 28 1 162 | 56 28 1 163 | 45 29 2 164 | 23 30 1 165 | 33 30 2 166 | 37 30 1 167 | 41 30 1 168 | 42 30 1 169 | 100 30 1 170 | 28 31 1 171 | 100 31 1 172 | 39 32 1 173 | 45 32 1 174 | 24 33 1 175 | 38 33 1 176 | 60 33 1 177 | 71 33 1 178 | 79 33 1 179 | 10 34 1 180 | 29 34 1 181 | 41 34 1 182 | 42 34 2 183 | 81 34 1 184 | 7 35 1 185 | 33 35 1 186 | 45 35 1 187 | 60 35 1 188 | 76 35 1 189 | 10 36 1 190 | 22 36 1 191 | 37 36 1 192 | 60 36 1 193 | 92 36 1 194 | 45 37 1 195 | 65 37 1 196 | 6 38 1 197 | 7 38 1 198 | 43 38 1 199 | 65 38 1 200 | 17 39 1 201 | 37 39 3 202 | 46 39 1 203 | 63 39 1 204 | 74 39 1 205 | 78 39 1 206 | 17 40 2 207 | 42 40 1 208 | 21 41 1 209 | 33 41 1 210 | 41 41 1 211 | 56 41 2 212 | 23 42 1 213 | 25 42 1 214 | 56 42 1 215 | 79 42 1 216 | 4 43 1 217 | 29 43 2 218 | 37 43 1 219 | 45 43 2 220 | 60 44 3 221 | 66 44 1 222 | 92 44 1 223 | 10 45 1 224 | 41 45 1 225 | 45 45 1 226 | 75 45 1 227 | 15 46 1 228 | 23 46 2 229 | 29 46 2 230 | 47 46 1 231 | 51 46 1 232 | 63 46 1 233 | 92 46 1 234 | 29 47 1 235 | 41 47 1 236 | 45 47 1 237 | 71 47 1 238 | 91 47 1 239 | 7 48 2 240 | 16 48 1 241 | 4 49 1 242 | 10 49 2 243 | 17 49 1 244 | 25 49 1 245 | 37 49 4 246 | 47 49 1 247 | 54 49 1 248 | 45 50 2 249 | 7 51 1 250 | 10 51 1 251 | 60 51 1 252 | 91 51 1 253 | 42 52 1 254 | 54 52 1 255 | 61 52 1 256 | 7 53 1 257 | 25 53 2 258 | 41 53 1 259 | 73 53 1 260 | 45 54 2 261 | 67 54 1 262 | 7 55 1 263 | 45 55 1 264 | 60 55 1 265 | 81 55 1 266 | 85 55 1 267 | 7 56 1 268 | 43 56 1 269 | 71 56 2 270 | 8 57 1 271 | 10 57 1 272 | 45 57 2 273 | 60 57 1 274 | 10 58 3 275 | 45 58 1 276 | 65 58 1 277 | 14 59 1 278 | 29 59 1 279 | 77 59 1 280 | 10 60 1 281 | 45 60 1 282 | 81 60 1 283 | 31 61 1 284 | 37 61 1 285 | 49 61 1 286 | 78 61 1 287 | 6 62 1 288 | 7 62 1 289 | 29 62 2 290 | 33 62 1 291 | 63 62 1 292 | 8 63 1 293 | 18 63 1 294 | 28 63 3 295 | 29 63 2 296 | 54 63 1 297 | 7 64 1 298 | 29 64 2 299 | 45 64 1 300 | 51 64 1 301 | 65 64 1 302 | 37 65 1 303 | 45 65 1 304 | 65 65 1 305 | 2 66 1 306 | 6 66 2 307 | 10 66 1 308 | 45 66 1 309 | 60 66 1 310 | 67 66 1 311 | 71 66 1 312 | 10 67 1 313 | 20 67 1 314 | 41 67 1 315 | 42 67 2 316 | 49 67 2 317 | 75 67 1 318 | 81 67 1 319 | 2 68 1 320 | 4 68 1 321 | 7 68 1 322 | 23 68 2 323 | 29 68 1 324 | 54 68 2 325 | 92 68 1 326 | 7 69 2 327 | 31 69 1 328 | 56 69 1 329 | 42 70 2 330 | 45 71 1 331 | 65 71 1 332 | 3 72 1 333 | 29 72 2 334 | 45 72 2 335 | 78 72 1 336 | 7 73 1 337 | 31 73 1 338 | 41 73 1 339 | 81 73 1 340 | 10 74 1 341 | 25 74 1 342 | 38 74 1 343 | 41 74 1 344 | 44 74 1 345 | 46 74 1 346 | 57 74 1 347 | 63 74 1 348 | 65 74 1 349 | 92 74 1 350 | 23 75 1 351 | 29 75 1 352 | 45 75 1 353 | 66 75 1 354 | 92 75 2 355 | 7 76 1 356 | 23 76 1 357 | 41 76 1 358 | 81 76 2 359 | 8 77 1 360 | 25 77 1 361 | 63 77 1 362 | 10 78 1 363 | 63 78 1 364 | 10 79 1 365 | 32 79 1 366 | 33 79 1 367 | 45 79 1 368 | 65 79 1 369 | 41 80 1 370 | 37 81 2 371 | 45 81 3 372 | 60 81 1 373 | 63 81 1 374 | 29 82 1 375 | 46 82 1 376 | 10 83 1 377 | 15 83 1 378 | 29 84 1 379 | 31 84 1 380 | 41 84 1 381 | 60 84 3 382 | 44 85 1 383 | 46 85 1 384 | 56 85 1 385 | 62 85 1 386 | 7 86 1 387 | 29 87 1 388 | 8 88 1 389 | 16 88 1 390 | 17 88 1 391 | 33 88 1 392 | 37 88 1 393 | 63 88 1 394 | 7 89 1 395 | 10 89 1 396 | 22 89 1 397 | 45 89 1 398 | 56 89 1 399 | 71 89 1 400 | 75 89 1 401 | 100 89 1 402 | 2 90 1 403 | 23 90 1 404 | 28 90 1 405 | 29 90 1 406 | 45 90 3 407 | 60 90 1 408 | 10 91 1 409 | 45 91 1 410 | 10 92 2 411 | 29 92 1 412 | 33 92 1 413 | 7 93 1 414 | 10 93 1 415 | 45 93 1 416 | 29 94 1 417 | 34 94 1 418 | 62 94 1 419 | 63 94 1 420 | 8 95 1 421 | 29 95 1 422 | 41 95 1 423 | 22 96 2 424 | 45 96 2 425 | 63 96 1 426 | 7 97 1 427 | 67 97 1 428 | 81 97 1 429 | 10 98 1 430 | 39 98 1 431 | 45 98 1 432 | 77 98 1 433 | 5 99 1 434 | 43 99 2 435 | 45 99 1 436 | 20 100 1 437 | 22 100 2 438 | 60 100 1 439 | 75 100 1 440 | -------------------------------------------------------------------------------- /cobolt/tests/test_data/joint_a/features.tsv: -------------------------------------------------------------------------------- 1 | 0610005C13Rik 2 | 0610007P14Rik 3 | 0610009B22Rik 4 | 0610009E02Rik 5 | 0610009L18Rik 6 | 0610009O20Rik 7 | 0610010F05Rik 8 | 0610030E20Rik 9 | 0610031O16Rik 10 | 0610037L13Rik 11 | 0610038B21Rik 12 | 0610039K10Rik 13 | 0610040B10Rik 14 | 0610040F04Rik 15 | 0610040J01Rik 16 | 0610043K17Rik 17 | 1110001J03Rik 18 | 1110002E22Rik 19 | 1110002J07Rik 20 | 1110002L01Rik 21 | 1110002O04Rik 22 | 1110004E09Rik 23 | 1110004F10Rik 24 | 1110006O24Rik 25 | 1110007C09Rik 26 | 1110008E08Rik 27 | 1110008F13Rik 28 | 1110008L16Rik 29 | 1110008P14Rik 30 | 1110015O18Rik 31 | 1110017D15Rik 32 | 1110018N20Rik 33 | 1110019D14Rik 34 | 1110020A21Rik 35 | 1110025M09Rik 36 | 1110028F18Rik 37 | 1110032A03Rik 38 | 1110032F04Rik 39 | 1110034G24Rik 40 | 1110035H17Rik 41 | 1110037F02Rik 42 | 1110038B12Rik 43 | 1110038F14Rik 44 | 1110046J04Rik 45 | 1110051M20Rik 46 | 1110059E24Rik 47 | 1110059G10Rik 48 | 1110060G06Rik 49 | 1110065P20Rik 50 | 1190002F15Rik 51 | 1190002N15Rik 52 | 1190003K10Rik 53 | 1190005I06Rik 54 | 1190007I07Rik 55 | 1200007C13Rik 56 | 1200014J11Rik 57 | 1300002E11Rik 58 | 1300017J02Rik 59 | 1500002C15Rik 60 | 1500004A13Rik 61 | 1500009C09Rik 62 | 1500009L16Rik 63 | 1500011B03Rik 64 | 1500011K16Rik 65 | 1500012F01Rik 66 | 1500012K07Rik 67 | 1500015A07Rik 68 | 1500015L24Rik 69 | 1500015O10Rik 70 | 1500017E21Rik 71 | 1500026H17Rik 72 | 1500035N22Rik 73 | 1520401A03Rik 74 | 1600002D24Rik 75 | 1600002H07Rik 76 | 1600002K03Rik 77 | 1600010M07Rik 78 | 1600012H06Rik 79 | 1600014C10Rik 80 | 1600014C23Rik 81 | 1600020E01Rik 82 | 1600023N17Rik 83 | 1600029O15Rik 84 | 1700001C19Rik 85 | 1700001G01Rik 86 | 1700001G11Rik 87 | 1700001J03Rik 88 | 1700001J11Rik 89 | 1700001K19Rik 90 | 1700001L05Rik 91 | 1700001L19Rik 92 | 1700001O22Rik 93 | 1700001P01Rik 94 | 1700003D09Rik 95 | 1700003E16Rik 96 | 1700003F12Rik 97 | 1700003G18Rik 98 | 1700003H04Rik 99 | 1700003L19Rik 100 | 1700003M02Rik 101 | -------------------------------------------------------------------------------- /cobolt/tests/test_data/joint_b/barcodes.tsv: -------------------------------------------------------------------------------- 1 | 09A_CAGCCCCGCCTT 2 | 09A_CGCCTACCATGA 3 | 09A_GATGCGCGGCTA 4 | 09A_GGTCCGAGTCCT 5 | 09A_TCTCCCGGCACC 6 | 09A_ACCAAATCTTGT 7 | 09A_CTGCTCGAGCAT 8 | 09A_CTCCCCCTAGCC 9 | 09A_CACGGAATTAGG 10 | 09A_ACGGACTACGCT 11 | 09A_TTTCCCCATGGC 12 | 09A_GCTGCAAGGGAT 13 | 09A_TAAGGAAGAGGA 14 | 09A_CAGACCTACTTT 15 | 09A_AACGGCGTCGGT 16 | 09A_CATGTGTCACCC 17 | 09A_GACAAGAAGATC 18 | 09A_TCTCTTTAGTGC 19 | 09A_GACACCCGCACC 20 | 09A_CAGAACTGTCAT 21 | 09A_AACCCAGGGAGC 22 | 09A_AAATTCTTTAGC 23 | 09A_GCCCTTGGTCAA 24 | 09A_ATTAATCGAGTA 25 | 09A_CGGTCCGACTTA 26 | 09A_CATGTTCAATAA 27 | 09A_TCATGGACGAGG 28 | 09A_TATCCTTACCAG 29 | 09A_TTTATGTCAACT 30 | 09A_GATCTCACGCCC 31 | 09A_CTCTCGCTGTAG 32 | 09A_TGCACCTGGGGG 33 | 09A_AGGTCTCATCAC 34 | 09A_AGTAGTGACTAC 35 | 09A_GACCTTGGAATA 36 | 09A_GCTCAGCATTTA 37 | 09A_ATATCTCCGAGC 38 | 09A_CTCAACGCAGAA 39 | 09A_GCTATGCTCCCG 40 | 09A_GCTACAGCGATG 41 | 09A_ATATGGGCCGCT 42 | 09A_GCTCCAAAGCGG 43 | 09A_GGTATCGAAATA 44 | 09A_CTCGGGTTTAGC 45 | 09A_GCCAGTACCACA 46 | 09A_GAGCCATCAATT 47 | 09A_GTCTTCTTTCTG 48 | 09A_ACGTGGTGACTT 49 | 09A_TCGAATCCCGAA 50 | 09A_CTGCGACGTGAC 51 | 09A_CAGTAGCCCGTT 52 | 09A_GAGTAGCACTTC 53 | 09A_GAGGTAACCCGA 54 | 09A_TGCATAGGTTCC 55 | 09A_TCACATTGTGTC 56 | 09A_CGTTGTGCCACT 57 | 09A_GCGAAAGCCTGC 58 | 09A_GTCCAATTCCCA 59 | 09A_GATAATATCGCC 60 | 09A_CCGGTACTGCCT 61 | 09A_ATCAGAAACTAG 62 | 09A_AATGGATGGCAC 63 | 09A_CGAACATCCTTG 64 | 09A_TTGACTGTCACA 65 | 09A_TAAATTCCACGG 66 | 09A_TGCCGAACAGTT 67 | 09A_TCTCGTTAACCA 68 | 09A_CTCAGGTATCCT 69 | 09A_ATCTCATCCCTT 70 | 09A_AGTCCGCGATTC 71 | 09A_CCCTTTAACCCC 72 | 09A_GCTACTATACCA 73 | 09A_CCTCAGGACCGG 74 | 09A_AACCGACAATGC 75 | 09A_CGTCCACTAATC 76 | 09A_GAGTGTCAGTTT 77 | 09A_CCACGTCTAGCT 78 | 09A_GAATGTGTATTA 79 | 09A_CTGTTTCGTTGC 80 | 09A_CCAGGCGCCTTT 81 | 09A_TATCCAGGTATT 82 | 09A_CATCTCAATCTT 83 | 09A_TACTGCCTGAGA 84 | 09A_ACCAATGTTTGA 85 | 09A_AGCCCACCAGTC 86 | 09A_AGCTTGCACTGG 87 | 09A_GCAATCAGCTAC 88 | 09A_TATGTTTACCAC 89 | 09A_CGTCTGCATCCA 90 | 09A_TTTAGCCTTCAA 91 | 09A_AAGAACCTAGGT 92 | 09A_CCACGCCAAATA 93 | 09A_AAGGTTGTGAAT 94 | 09A_AGACTGAGCAAT 95 | 09A_CAATATGCTGAG 96 | 09A_CGCCTTCCGGGT 97 | 09A_CGAGAAATGGCT 98 | 09A_CCGGCCCTCCGT 99 | 09A_ACTCATGGGAAC 100 | 09A_CTCCATTAATAG 101 | -------------------------------------------------------------------------------- /cobolt/tests/test_data/joint_b/counts.mtx: -------------------------------------------------------------------------------- 1 | %%MatrixMarket matrix coordinate integer general 2 | % 3 | 500 100 336 4 | 310 1 1 5 | 322 1 1 6 | 327 1 1 7 | 334 1 1 8 | 386 1 1 9 | 88 2 1 10 | 94 2 1 11 | 164 2 1 12 | 165 2 1 13 | 167 2 1 14 | 188 2 1 15 | 222 2 1 16 | 327 2 1 17 | 338 2 1 18 | 341 2 1 19 | 400 2 1 20 | 434 2 1 21 | 54 3 1 22 | 106 3 1 23 | 184 3 1 24 | 334 3 1 25 | 366 3 1 26 | 386 3 1 27 | 48 4 1 28 | 88 4 1 29 | 94 4 1 30 | 216 4 1 31 | 218 4 1 32 | 268 4 1 33 | 309 4 1 34 | 327 4 1 35 | 36 5 1 36 | 310 5 1 37 | 347 5 1 38 | 432 5 1 39 | 112 6 1 40 | 178 6 1 41 | 437 6 1 42 | 216 7 1 43 | 317 7 1 44 | 334 7 1 45 | 350 7 1 46 | 360 8 1 47 | 84 9 1 48 | 235 9 1 49 | 194 10 1 50 | 319 10 1 51 | 85 11 1 52 | 168 11 1 53 | 17 12 1 54 | 114 12 1 55 | 54 13 1 56 | 309 13 1 57 | 88 14 1 58 | 213 14 1 59 | 304 14 1 60 | 75 15 1 61 | 164 15 1 62 | 387 17 1 63 | 488 17 1 64 | 3 18 1 65 | 4 18 1 66 | 343 18 1 67 | 48 19 1 68 | 105 19 1 69 | 138 19 1 70 | 194 19 1 71 | 235 19 1 72 | 334 19 1 73 | 120 20 1 74 | 48 21 1 75 | 235 21 1 76 | 309 21 1 77 | 310 21 1 78 | 333 21 1 79 | 454 21 1 80 | 498 21 1 81 | 35 22 1 82 | 222 22 1 83 | 241 22 1 84 | 334 22 1 85 | 338 22 1 86 | 263 23 1 87 | 472 23 1 88 | 488 23 1 89 | 235 24 2 90 | 333 24 1 91 | 454 24 1 92 | 32 25 1 93 | 216 25 1 94 | 232 25 1 95 | 270 25 1 96 | 295 25 1 97 | 454 25 1 98 | 235 26 1 99 | 356 26 1 100 | 260 27 1 101 | 325 27 1 102 | 356 27 1 103 | 440 27 1 104 | 17 28 1 105 | 107 28 1 106 | 152 28 1 107 | 156 28 1 108 | 165 28 1 109 | 277 28 1 110 | 310 28 1 111 | 334 28 1 112 | 454 28 2 113 | 456 28 1 114 | 39 29 1 115 | 218 29 1 116 | 235 29 1 117 | 160 30 1 118 | 216 30 1 119 | 217 30 1 120 | 223 30 1 121 | 308 30 1 122 | 313 30 1 123 | 334 30 1 124 | 441 30 1 125 | 448 30 1 126 | 48 31 1 127 | 48 32 1 128 | 150 32 1 129 | 164 32 1 130 | 295 32 1 131 | 345 32 1 132 | 386 32 1 133 | 107 33 1 134 | 156 33 1 135 | 168 33 1 136 | 334 33 1 137 | 386 33 1 138 | 88 34 1 139 | 399 34 1 140 | 85 35 1 141 | 310 35 1 142 | 140 36 1 143 | 222 36 1 144 | 308 36 1 145 | 317 36 1 146 | 394 36 1 147 | 454 36 1 148 | 202 37 1 149 | 216 37 1 150 | 235 37 1 151 | 241 37 1 152 | 276 37 1 153 | 360 37 1 154 | 155 38 1 155 | 222 38 1 156 | 234 38 1 157 | 3 39 1 158 | 33 39 1 159 | 67 39 1 160 | 135 39 1 161 | 380 39 1 162 | 386 39 1 163 | 455 39 1 164 | 72 40 1 165 | 85 40 1 166 | 88 40 1 167 | 162 40 1 168 | 277 40 1 169 | 327 40 1 170 | 380 41 1 171 | 405 41 1 172 | 438 41 1 173 | 468 41 1 174 | 97 42 1 175 | 308 42 1 176 | 386 42 1 177 | 48 43 1 178 | 162 43 1 179 | 17 44 1 180 | 72 44 1 181 | 112 44 1 182 | 158 44 1 183 | 410 45 1 184 | 33 47 1 185 | 107 48 1 186 | 216 48 1 187 | 245 48 1 188 | 400 48 1 189 | 406 48 1 190 | 48 49 1 191 | 92 49 1 192 | 298 49 1 193 | 308 49 1 194 | 309 49 1 195 | 481 49 1 196 | 235 50 1 197 | 104 51 1 198 | 167 51 1 199 | 228 51 1 200 | 235 51 1 201 | 326 51 1 202 | 356 51 2 203 | 196 52 1 204 | 362 52 1 205 | 4 53 1 206 | 164 55 1 207 | 194 55 1 208 | 308 56 1 209 | 400 56 1 210 | 442 56 1 211 | 488 56 1 212 | 339 58 1 213 | 432 58 1 214 | 440 58 1 215 | 105 59 1 216 | 356 59 1 217 | 386 59 1 218 | 70 60 1 219 | 91 60 1 220 | 316 60 1 221 | 363 61 1 222 | 467 61 1 223 | 111 62 1 224 | 308 62 1 225 | 314 62 1 226 | 454 62 1 227 | 64 63 1 228 | 226 63 1 229 | 495 63 1 230 | 16 64 1 231 | 235 64 2 232 | 430 64 1 233 | 187 65 1 234 | 235 65 1 235 | 217 66 1 236 | 235 66 1 237 | 400 66 1 238 | 92 67 1 239 | 88 68 1 240 | 105 68 1 241 | 216 68 1 242 | 222 68 2 243 | 327 68 1 244 | 334 68 1 245 | 56 69 1 246 | 64 69 1 247 | 75 69 1 248 | 216 69 2 249 | 164 70 1 250 | 270 70 1 251 | 315 70 1 252 | 368 70 1 253 | 113 71 1 254 | 164 71 1 255 | 171 71 1 256 | 329 71 1 257 | 216 72 1 258 | 317 72 1 259 | 362 73 1 260 | 339 74 1 261 | 395 74 1 262 | 34 75 1 263 | 48 75 1 264 | 60 75 1 265 | 88 75 1 266 | 116 75 1 267 | 171 75 1 268 | 217 75 1 269 | 222 75 1 270 | 268 75 1 271 | 393 75 1 272 | 34 76 1 273 | 325 76 1 274 | 100 77 1 275 | 138 77 1 276 | 85 78 1 277 | 88 78 1 278 | 217 78 1 279 | 315 78 1 280 | 333 78 1 281 | 235 79 1 282 | 488 79 1 283 | 96 80 1 284 | 216 80 1 285 | 186 81 1 286 | 216 81 1 287 | 334 81 1 288 | 341 81 1 289 | 360 81 1 290 | 48 83 1 291 | 60 83 1 292 | 72 83 1 293 | 85 83 2 294 | 88 83 1 295 | 104 83 1 296 | 216 83 1 297 | 305 83 1 298 | 310 83 1 299 | 327 83 1 300 | 310 84 1 301 | 456 84 1 302 | 216 85 1 303 | 318 85 1 304 | 482 85 1 305 | 216 86 1 306 | 222 86 1 307 | 84 87 1 308 | 88 87 1 309 | 168 87 1 310 | 327 87 1 311 | 365 87 1 312 | 385 87 1 313 | 48 88 1 314 | 84 88 1 315 | 85 88 1 316 | 347 88 1 317 | 48 89 1 318 | 184 89 1 319 | 240 89 1 320 | 88 90 1 321 | 216 90 2 322 | 440 90 1 323 | 49 91 1 324 | 150 91 1 325 | 252 91 1 326 | 441 91 1 327 | 234 92 1 328 | 356 93 1 329 | 41 94 1 330 | 478 94 1 331 | 48 95 1 332 | 149 95 1 333 | 216 95 1 334 | 310 95 1 335 | 315 95 1 336 | 486 95 1 337 | 454 96 1 338 | 164 97 1 339 | 120 100 1 340 | -------------------------------------------------------------------------------- /cobolt/tests/test_data/joint_b/features.tsv: -------------------------------------------------------------------------------- 1 | chr1:3094893-3095434 2 | chr1:3113818-3113891 3 | chr1:3119627-3120693 4 | chr1:3121450-3121584 5 | chr1:3167222-3167372 6 | chr1:3181216-3181468 7 | chr1:3204882-3205120 8 | chr1:3213076-3213149 9 | chr1:3217219-3217452 10 | chr1:3228244-3228369 11 | chr1:3235591-3235664 12 | chr1:3265307-3265524 13 | chr1:3292493-3293009 14 | chr1:3297644-3297768 15 | chr1:3299709-3299943 16 | chr1:3309994-3310393 17 | chr1:3322375-3322835 18 | chr1:3324306-3324445 19 | chr1:3330267-3330729 20 | chr1:3333790-3333981 21 | chr1:3361072-3361157 22 | chr1:3366586-3366762 23 | chr1:3368397-3368605 24 | chr1:3369541-3369868 25 | chr1:3379678-3379806 26 | chr1:3386269-3386365 27 | chr1:3388538-3388771 28 | chr1:3391939-3392441 29 | chr1:3399197-3399270 30 | chr1:3399930-3400273 31 | chr1:3406182-3406487 32 | chr1:3411913-3411986 33 | chr1:3433002-3433079 34 | chr1:3433835-3434474 35 | chr1:3455434-3455607 36 | chr1:3457839-3458105 37 | chr1:3466526-3466685 38 | chr1:3497132-3497205 39 | chr1:3515011-3515084 40 | chr1:3532659-3532742 41 | chr1:3552656-3552748 42 | chr1:3576399-3576487 43 | chr1:3616536-3616711 44 | chr1:3659452-3659606 45 | chr1:3662461-3662558 46 | chr1:3662896-3663029 47 | chr1:3670579-3670780 48 | chr1:3670975-3672068 49 | chr1:3672252-3672343 50 | chr1:3710086-3710492 51 | chr1:3742354-3742449 52 | chr1:3797469-3797542 53 | chr1:3853824-3854030 54 | chr1:3900396-3900673 55 | chr1:3903555-3903761 56 | chr1:3909423-3909635 57 | chr1:3919408-3919531 58 | chr1:3932512-3932830 59 | chr1:3979169-3979242 60 | chr1:3990484-3990719 61 | chr1:3994861-3995063 62 | chr1:4089610-4090069 63 | chr1:4098064-4098390 64 | chr1:4139144-4139373 65 | chr1:4142662-4142784 66 | chr1:4146159-4146348 67 | chr1:4186795-4187219 68 | chr1:4273254-4273426 69 | chr1:4332441-4332725 70 | chr1:4392710-4392913 71 | chr1:4412645-4412790 72 | chr1:4414272-4415158 73 | chr1:4426978-4427051 74 | chr1:4491904-4492152 75 | chr1:4571436-4572174 76 | chr1:4611716-4612065 77 | chr1:4703195-4703853 78 | chr1:4722686-4723002 79 | chr1:4747479-4747571 80 | chr1:4748208-4748647 81 | chr1:4766800-4767048 82 | chr1:4775425-4775498 83 | chr1:4780276-4780357 84 | chr1:4785481-4786089 85 | chr1:4807495-4808228 86 | chr1:4820354-4820465 87 | chr1:4833747-4833866 88 | chr1:4857460-4858697 89 | chr1:4868989-4869069 90 | chr1:4873469-4873623 91 | chr1:4881497-4881689 92 | chr1:4907984-4908521 93 | chr1:4914570-4914685 94 | chr1:4917513-4917696 95 | chr1:4918809-4918882 96 | chr1:4923129-4923323 97 | chr1:4926453-4926617 98 | chr1:4927073-4927230 99 | chr1:4934774-4934864 100 | chr1:4935062-4935319 101 | chr1:4969904-4970129 102 | chr1:4971296-4971444 103 | chr1:5018054-5018163 104 | chr1:5018692-5018966 105 | chr1:5019221-5019674 106 | chr1:5020635-5020970 107 | chr1:5022737-5023330 108 | chr1:5033227-5033621 109 | chr1:5045909-5046074 110 | chr1:5070187-5070589 111 | chr1:5071202-5071553 112 | chr1:5082863-5083591 113 | chr1:5103291-5103487 114 | chr1:5106684-5106874 115 | chr1:5128868-5129069 116 | chr1:5130154-5130717 117 | chr1:5134092-5134371 118 | chr1:5137928-5138561 119 | chr1:5150671-5150797 120 | chr1:5176982-5177759 121 | chr1:5183539-5183815 122 | chr1:5207970-5208480 123 | chr1:5213909-5213982 124 | chr1:5220832-5221027 125 | chr1:5228732-5229204 126 | chr1:5232304-5232493 127 | chr1:5249376-5249731 128 | chr1:5257919-5257992 129 | chr1:5279329-5279434 130 | chr1:5288166-5288246 131 | chr1:5297376-5297589 132 | chr1:5297794-5297975 133 | chr1:5315506-5316189 134 | chr1:5316394-5316581 135 | chr1:5334507-5335021 136 | chr1:5336641-5336783 137 | chr1:5346169-5346523 138 | chr1:5382346-5382738 139 | chr1:5382868-5382972 140 | chr1:5394645-5394835 141 | chr1:5401082-5401215 142 | chr1:5416969-5417286 143 | chr1:5588271-5588551 144 | chr1:5588770-5588843 145 | chr1:5642384-5642564 146 | chr1:5689443-5689623 147 | chr1:5753564-5753680 148 | chr1:5894576-5894694 149 | chr1:5905017-5905104 150 | chr1:5915481-5915685 151 | chr1:5916929-5917275 152 | chr1:5917433-5917579 153 | chr1:5926109-5926336 154 | chr1:5955574-5955695 155 | chr1:5955938-5956153 156 | chr1:6039871-6040018 157 | chr1:6049826-6049909 158 | chr1:6094286-6094552 159 | chr1:6132028-6132203 160 | chr1:6135138-6135352 161 | chr1:6152617-6152715 162 | chr1:6178764-6179041 163 | chr1:6188680-6188814 164 | chr1:6214268-6215317 165 | chr1:6215459-6215610 166 | chr1:6219800-6219977 167 | chr1:6224174-6224431 168 | chr1:6272374-6272571 169 | chr1:6283091-6283283 170 | chr1:6290372-6290475 171 | chr1:6306556-6307040 172 | chr1:6320669-6320867 173 | chr1:6330854-6330927 174 | chr1:6359418-6359491 175 | chr1:6362041-6362257 176 | chr1:6364062-6364140 177 | chr1:6370527-6370632 178 | chr1:6382681-6383119 179 | chr1:6401695-6401780 180 | chr1:6406321-6406694 181 | chr1:6410288-6410397 182 | chr1:6441302-6441672 183 | chr1:6478054-6478168 184 | chr1:6482940-6484001 185 | chr1:6487143-6487393 186 | chr1:6499697-6499803 187 | chr1:6547223-6547466 188 | chr1:6594536-6594806 189 | chr1:6658271-6658344 190 | chr1:6658401-6658529 191 | chr1:6717690-6717986 192 | chr1:6722890-6722963 193 | chr1:6729545-6729636 194 | chr1:6729748-6730057 195 | chr1:6757712-6757852 196 | chr1:6760548-6760856 197 | chr1:6761410-6761498 198 | chr1:6770059-6770287 199 | chr1:6776556-6776629 200 | chr1:6805146-6805219 201 | chr1:6805362-6805563 202 | chr1:6810151-6810666 203 | chr1:6824269-6824733 204 | chr1:6829862-6829947 205 | chr1:6858065-6858158 206 | chr1:6859178-6859328 207 | chr1:6866880-6867269 208 | chr1:6907640-6908023 209 | chr1:6928028-6928182 210 | chr1:6928625-6929204 211 | chr1:6973107-6973219 212 | chr1:7000920-7001007 213 | chr1:7010317-7010482 214 | chr1:7040703-7040850 215 | chr1:7062657-7062866 216 | chr1:7088511-7089531 217 | chr1:7105056-7105656 218 | chr1:7113023-7113595 219 | chr1:7118015-7118418 220 | chr1:7131425-7131580 221 | chr1:7138221-7138330 222 | chr1:7138548-7139484 223 | chr1:7148130-7148654 224 | chr1:7155170-7155598 225 | chr1:7161153-7161343 226 | chr1:7170918-7171316 227 | chr1:7199682-7199759 228 | chr1:7204728-7205055 229 | chr1:7206367-7206946 230 | chr1:7219073-7219265 231 | chr1:7250060-7250346 232 | chr1:7292984-7293380 233 | chr1:7347997-7348105 234 | chr1:7360019-7360400 235 | chr1:7397426-7398548 236 | chr1:7447812-7447885 237 | chr1:7448093-7448181 238 | chr1:7495274-7495403 239 | chr1:7498172-7498404 240 | chr1:7523516-7523778 241 | chr1:7534995-7535637 242 | chr1:7543468-7543809 243 | chr1:7641273-7641669 244 | chr1:7731157-7731635 245 | chr1:7750777-7751065 246 | chr1:7758682-7758951 247 | chr1:7768778-7768928 248 | chr1:7769093-7769225 249 | chr1:7891680-7891847 250 | chr1:8063470-8063823 251 | chr1:8133001-8133174 252 | chr1:8135031-8135397 253 | chr1:8171624-8171779 254 | chr1:8410580-8410751 255 | chr1:8468831-8469143 256 | chr1:8570861-8571014 257 | chr1:8583020-8583458 258 | chr1:8584533-8584657 259 | chr1:8587455-8587584 260 | chr1:8595824-8595906 261 | chr1:8595980-8596099 262 | chr1:8644590-8644814 263 | chr1:8678918-8679074 264 | chr1:8736378-8736465 265 | chr1:8744569-8744677 266 | chr1:8745251-8745324 267 | chr1:8747483-8747599 268 | chr1:8781078-8781371 269 | chr1:8781952-8782025 270 | chr1:8790916-8791317 271 | chr1:8805172-8805245 272 | chr1:8817160-8817243 273 | chr1:8842871-8843027 274 | chr1:8847891-8848103 275 | chr1:8865003-8865139 276 | chr1:8866526-8866910 277 | chr1:8867784-8868122 278 | chr1:8921744-8922168 279 | chr1:8940901-8941010 280 | chr1:8944849-8945284 281 | chr1:8947644-8947997 282 | chr1:8952234-8952327 283 | chr1:8979325-8979398 284 | chr1:8982808-8982899 285 | chr1:8998849-8998922 286 | chr1:8999215-8999288 287 | chr1:9024399-9024736 288 | chr1:9032856-9033034 289 | chr1:9033216-9033300 290 | chr1:9090908-9090981 291 | chr1:9091034-9091266 292 | chr1:9091273-9091346 293 | chr1:9101012-9101096 294 | chr1:9125402-9126092 295 | chr1:9127548-9127799 296 | chr1:9140929-9141124 297 | chr1:9168442-9168634 298 | chr1:9171852-9172667 299 | chr1:9203267-9203374 300 | chr1:9207783-9207912 301 | chr1:9209015-9209113 302 | chr1:9210320-9210502 303 | chr1:9248698-9248833 304 | chr1:9254196-9254342 305 | chr1:9258132-9258350 306 | chr1:9289344-9289964 307 | chr1:9298547-9298753 308 | chr1:9298870-9300082 309 | chr1:9541183-9541570 310 | chr1:9545100-9545567 311 | chr1:9548005-9548260 312 | chr1:9564583-9564870 313 | chr1:9570173-9570651 314 | chr1:9573234-9573551 315 | chr1:9577654-9578176 316 | chr1:9591675-9592255 317 | chr1:9600975-9601811 318 | chr1:9602496-9602659 319 | chr1:9609366-9609638 320 | chr1:9611687-9612120 321 | chr1:9616238-9616593 322 | chr1:9619485-9619676 323 | chr1:9619842-9619948 324 | chr1:9621577-9621703 325 | chr1:9622537-9622761 326 | chr1:9627252-9627959 327 | chr1:9629801-9630629 328 | chr1:9645158-9645383 329 | chr1:9645771-9646116 330 | chr1:9649379-9649554 331 | chr1:9659031-9659104 332 | chr1:9673467-9673570 333 | chr1:9700230-9700839 334 | chr1:9747666-9748720 335 | chr1:9772910-9773089 336 | chr1:9784128-9784440 337 | chr1:9790242-9790586 338 | chr1:9797934-9798724 339 | chr1:9804380-9804865 340 | chr1:9812316-9812608 341 | chr1:9815742-9816153 342 | chr1:9816423-9816572 343 | chr1:9823541-9824013 344 | chr1:9829563-9829636 345 | chr1:9847819-9848560 346 | chr1:9851551-9851768 347 | chr1:9859382-9859455 348 | chr1:9862582-9862743 349 | chr1:9876897-9877061 350 | chr1:9891970-9892176 351 | chr1:9908587-9908935 352 | chr1:9932085-9932171 353 | chr1:9936107-9936283 354 | chr1:9936414-9936487 355 | chr1:9937834-9937907 356 | chr1:9943725-9944334 357 | chr1:9953087-9953162 358 | chr1:9968015-9968088 359 | chr1:9991855-9992117 360 | chr1:9993877-9994142 361 | chr1:10007402-10007546 362 | chr1:10008766-10009488 363 | chr1:10012881-10013056 364 | chr1:10014561-10014761 365 | chr1:10037907-10038328 366 | chr1:10039522-10040328 367 | chr1:10056524-10056662 368 | chr1:10057885-10058649 369 | chr1:10076301-10076485 370 | chr1:10087573-10087656 371 | chr1:10089567-10089848 372 | chr1:10093808-10093881 373 | chr1:10102220-10102515 374 | chr1:10116723-10116814 375 | chr1:10127391-10127582 376 | chr1:10133272-10133426 377 | chr1:10139511-10139656 378 | chr1:10141269-10141781 379 | chr1:10142073-10142192 380 | chr1:10157222-10157652 381 | chr1:10164504-10164686 382 | chr1:10165054-10165283 383 | chr1:10171607-10171680 384 | chr1:10193286-10193490 385 | chr1:10229130-10229298 386 | chr1:10232233-10233438 387 | chr1:10247288-10247987 388 | chr1:10279358-10279470 389 | chr1:10286919-10287196 390 | chr1:10321850-10321977 391 | chr1:10323167-10323313 392 | chr1:10430073-10430198 393 | chr1:10493602-10493746 394 | chr1:10535577-10535650 395 | chr1:10570691-10571275 396 | chr1:10596095-10596750 397 | chr1:10607044-10607194 398 | chr1:10636690-10637002 399 | chr1:10637565-10637656 400 | chr1:10699925-10700416 401 | chr1:10701031-10701243 402 | chr1:10708699-10708775 403 | chr1:10719425-10719498 404 | chr1:10719663-10720447 405 | chr1:10720685-10721292 406 | chr1:10759741-10759866 407 | chr1:10764157-10764269 408 | chr1:10771946-10772134 409 | chr1:10777848-10778127 410 | chr1:10780912-10781297 411 | chr1:10781468-10781740 412 | chr1:10794242-10794391 413 | chr1:10794440-10794541 414 | chr1:10835825-10835997 415 | chr1:10837352-10837498 416 | chr1:10861501-10861650 417 | chr1:10871333-10871795 418 | chr1:10877556-10877846 419 | chr1:10880486-10880646 420 | chr1:10899434-10899682 421 | chr1:10943016-10943235 422 | chr1:10955295-10955374 423 | chr1:10990862-10991366 424 | chr1:10993212-10994172 425 | chr1:11026724-11026851 426 | chr1:11045843-11045916 427 | chr1:11065355-11065806 428 | chr1:11066086-11066495 429 | chr1:11090918-11091161 430 | chr1:11097414-11097576 431 | chr1:11099697-11100203 432 | chr1:11117143-11117532 433 | chr1:11120540-11120729 434 | chr1:11139919-11140245 435 | chr1:11151758-11151831 436 | chr1:11169027-11169478 437 | chr1:11169614-11169996 438 | chr1:11177722-11178198 439 | chr1:11178355-11178428 440 | chr1:11179782-11180389 441 | chr1:11181143-11181333 442 | chr1:11202664-11202792 443 | chr1:11217491-11217742 444 | chr1:11217817-11217999 445 | chr1:11231059-11231145 446 | chr1:11239743-11239816 447 | chr1:11247215-11247400 448 | chr1:11268334-11268569 449 | chr1:11320481-11320588 450 | chr1:11321179-11321462 451 | chr1:11329647-11329900 452 | chr1:11357340-11357499 453 | chr1:11407149-11407454 454 | chr1:11413706-11414552 455 | chr1:11427303-11428058 456 | chr1:11433026-11433135 457 | chr1:11434203-11434644 458 | chr1:11442958-11443182 459 | chr1:11447357-11447430 460 | chr1:11451407-11451480 461 | chr1:11458251-11458728 462 | chr1:11458853-11458939 463 | chr1:11478544-11478655 464 | chr1:11507790-11507908 465 | chr1:11516928-11517019 466 | chr1:11517404-11517565 467 | chr1:11549757-11550196 468 | chr1:11551302-11551541 469 | chr1:11565036-11565275 470 | chr1:11566068-11566359 471 | chr1:11566884-11567103 472 | chr1:11573603-11574092 473 | chr1:11578209-11578409 474 | chr1:11580009-11580496 475 | chr1:11580744-11580988 476 | chr1:11585397-11585505 477 | chr1:11588132-11588655 478 | chr1:11600555-11600688 479 | chr1:11638560-11638811 480 | chr1:11650183-11650461 481 | chr1:11652784-11653130 482 | chr1:11657891-11658055 483 | chr1:11659711-11659784 484 | chr1:11660495-11660619 485 | chr1:11731356-11731492 486 | chr1:11739495-11739695 487 | chr1:11742446-11742863 488 | chr1:11750576-11751042 489 | chr1:11765361-11765536 490 | chr1:11784093-11784177 491 | chr1:11788408-11788573 492 | chr1:11795823-11795985 493 | chr1:11842706-11842867 494 | chr1:11895157-11895304 495 | chr1:11895850-11896171 496 | chr1:11911007-11911278 497 | chr1:11931458-11931631 498 | chr1:11937005-11937394 499 | chr1:11978107-11978237 500 | chr1:11979314-11979607 501 | -------------------------------------------------------------------------------- /cobolt/tests/test_data/single_a/barcodes.tsv: -------------------------------------------------------------------------------- 1 | AAACCCAAGACCATGG-1L8TX_181211_01_A02 2 | AAACCCAAGAGCTGAC-1L8TX_181211_01_A02 3 | AAACCCAAGCTCTTCC-1L8TX_181211_01_A02 4 | AAACCCAAGGCGCTTC-1L8TX_181211_01_A02 5 | AAACCCAAGTCGAATA-1L8TX_181211_01_A02 6 | AAACCCACAACCCTAA-1L8TX_181211_01_A02 7 | AAACCCACAAGCAATA-1L8TX_181211_01_A02 8 | AAACCCACAATCCTTT-1L8TX_181211_01_A02 9 | AAACCCACACACAGCC-1L8TX_181211_01_A02 10 | AAACCCACACACGGTC-1L8TX_181211_01_A02 11 | AAACCCACACATATGC-1L8TX_181211_01_A02 12 | AAACCCACACCAGTAT-1L8TX_181211_01_A02 13 | AAACCCACACGCACCA-1L8TX_181211_01_A02 14 | AAACCCACAGTCGCTG-1L8TX_181211_01_A02 15 | AAACCCACATAACAGA-1L8TX_181211_01_A02 16 | AAACCCACATATTCGG-1L8TX_181211_01_A02 17 | AAACCCAGTACTTCCC-1L8TX_181211_01_A02 18 | AAACCCAGTAGGCAGT-1L8TX_181211_01_A02 19 | AAACCCAGTATAGCTC-1L8TX_181211_01_A02 20 | AAACCCAGTCAGACTT-1L8TX_181211_01_A02 21 | AAACCCAGTGTGCTTA-1L8TX_181211_01_A02 22 | AAACCCAGTTCTCCCA-1L8TX_181211_01_A02 23 | AAACCCATCAAGCCAT-1L8TX_181211_01_A02 24 | AAACCCATCACCTTGC-1L8TX_181211_01_A02 25 | AAACCCATCAGACCCG-1L8TX_181211_01_A02 26 | AAACCCATCCAAGAGG-1L8TX_181211_01_A02 27 | AAACCCATCCAAGCAT-1L8TX_181211_01_A02 28 | AAACCCATCCGAAATC-1L8TX_181211_01_A02 29 | AAACCCATCCGAGTGC-1L8TX_181211_01_A02 30 | AAACCCATCTAAGAAG-1L8TX_181211_01_A02 31 | AAACCCATCTAGTGAC-1L8TX_181211_01_A02 32 | AAACCCATCTTTACAC-1L8TX_181211_01_A02 33 | AAACGAAAGAAGTGTT-1L8TX_181211_01_A02 34 | AAACGAAAGAGAGGGC-1L8TX_181211_01_A02 35 | AAACGAAAGCTTTCTT-1L8TX_181211_01_A02 36 | AAACGAAAGGCTCCCA-1L8TX_181211_01_A02 37 | AAACGAAAGGTAATCA-1L8TX_181211_01_A02 38 | AAACGAAAGTAATACG-1L8TX_181211_01_A02 39 | AAACGAACAAAGGCTG-1L8TX_181211_01_A02 40 | AAACGAACAATTCACG-1L8TX_181211_01_A02 41 | AAACGAACAATTCTTC-1L8TX_181211_01_A02 42 | AAACGAACACGGTGAA-1L8TX_181211_01_A02 43 | AAACGAACAGAACGCA-1L8TX_181211_01_A02 44 | AAACGAACATCAGCAT-1L8TX_181211_01_A02 45 | AAACGAAGTAACAGGC-1L8TX_181211_01_A02 46 | AAACGAAGTAGCGCTC-1L8TX_181211_01_A02 47 | AAACGAAGTCCGAAGA-1L8TX_181211_01_A02 48 | AAACGAAGTGAGCTCC-1L8TX_181211_01_A02 49 | AAACGAAGTGCATGAG-1L8TX_181211_01_A02 50 | AAACGAAGTGTCCTAA-1L8TX_181211_01_A02 51 | AAACGAATCATCGCTC-1L8TX_181211_01_A02 52 | AAACGAATCGATAACC-1L8TX_181211_01_A02 53 | AAACGAATCTAAACGC-1L8TX_181211_01_A02 54 | AAACGAATCTATTGTC-1L8TX_181211_01_A02 55 | AAACGAATCTGGACCG-1L8TX_181211_01_A02 56 | AAACGAATCTGTGCTC-1L8TX_181211_01_A02 57 | AAACGAATCTTTCCAA-1L8TX_181211_01_A02 58 | AAACGCTAGAAATTCG-1L8TX_181211_01_A02 59 | AAACGCTAGACGCATG-1L8TX_181211_01_A02 60 | AAACGCTAGATGGCGT-1L8TX_181211_01_A02 61 | AAACGCTAGCGACATG-1L8TX_181211_01_A02 62 | AAACGCTAGCGTACAG-1L8TX_181211_01_A02 63 | AAACGCTAGTCATGGG-1L8TX_181211_01_A02 64 | AAACGCTCAATGAACA-1L8TX_181211_01_A02 65 | AAACGCTCACACTTAG-1L8TX_181211_01_A02 66 | AAACGCTCACCAAATC-1L8TX_181211_01_A02 67 | AAACGCTCACCCTATC-1L8TX_181211_01_A02 68 | AAACGCTCACCCTTAC-1L8TX_181211_01_A02 69 | AAACGCTGTATACAGA-1L8TX_181211_01_A02 70 | AAACGCTGTATCATGC-1L8TX_181211_01_A02 71 | AAACGCTGTCCTATAG-1L8TX_181211_01_A02 72 | AAACGCTGTCGTTGCG-1L8TX_181211_01_A02 73 | AAACGCTGTGTTACAC-1L8TX_181211_01_A02 74 | AAACGCTGTGTTTACG-1L8TX_181211_01_A02 75 | AAACGCTGTTAAGAAC-1L8TX_181211_01_A02 76 | AAACGCTGTTCACGAT-1L8TX_181211_01_A02 77 | AAACGCTGTTTACGAC-1L8TX_181211_01_A02 78 | AAACGCTTCGTAACTG-1L8TX_181211_01_A02 79 | AAAGAACAGAGTCACG-1L8TX_181211_01_A02 80 | AAAGAACAGCCAACCC-1L8TX_181211_01_A02 81 | AAAGAACAGGGAGGCA-1L8TX_181211_01_A02 82 | AAAGAACAGTTAGTGA-1L8TX_181211_01_A02 83 | AAAGAACCAACAAAGT-1L8TX_181211_01_A02 84 | AAAGAACCAACCCGCA-1L8TX_181211_01_A02 85 | AAAGAACCACAATTCG-1L8TX_181211_01_A02 86 | AAAGAACCAGGACAGT-1L8TX_181211_01_A02 87 | AAAGAACCATCTCATT-1L8TX_181211_01_A02 88 | AAAGAACGTACGATTC-1L8TX_181211_01_A02 89 | AAAGAACGTGCCTTCT-1L8TX_181211_01_A02 90 | AAAGAACGTTTGAACC-1L8TX_181211_01_A02 91 | AAAGAACTCACTCCGT-1L8TX_181211_01_A02 92 | AAAGAACTCAGCCTCT-1L8TX_181211_01_A02 93 | AAAGAACTCGGAGTAG-1L8TX_181211_01_A02 94 | AAAGGATAGATTAGAC-1L8TX_181211_01_A02 95 | AAAGGATAGCAAGCCA-1L8TX_181211_01_A02 96 | AAAGGATAGGCCTGAA-1L8TX_181211_01_A02 97 | AAAGGATAGGGTATAT-1L8TX_181211_01_A02 98 | AAAGGATAGTCAGCCC-1L8TX_181211_01_A02 99 | AAAGGATAGTTGCGAG-1L8TX_181211_01_A02 100 | AAAGGATCAAGTCCCG-1L8TX_181211_01_A02 101 | -------------------------------------------------------------------------------- /cobolt/tests/test_data/single_a/counts.mtx: -------------------------------------------------------------------------------- 1 | %%MatrixMarket matrix coordinate integer general 2 | % 3 | 103 100 771 4 | 52 1 1 5 | 1 3 42 6 | 2 3 9 7 | 7 3 2 8 | 13 3 1 9 | 16 3 3 10 | 20 3 1 11 | 23 3 31 12 | 26 3 1 13 | 29 3 1 14 | 33 3 1 15 | 35 3 2 16 | 37 3 5 17 | 38 3 2 18 | 48 3 1 19 | 49 3 2 20 | 56 3 1 21 | 61 3 2 22 | 66 3 4 23 | 67 3 2 24 | 71 3 1 25 | 78 3 2 26 | 79 3 4 27 | 35 4 1 28 | 1 5 14 29 | 11 5 5 30 | 13 5 7 31 | 16 5 12 32 | 20 5 4 33 | 23 5 6 34 | 29 5 4 35 | 34 5 1 36 | 35 5 1 37 | 61 5 1 38 | 67 5 2 39 | 70 5 1 40 | 79 5 3 41 | 87 5 1 42 | 1 6 14 43 | 2 6 1 44 | 13 6 2 45 | 16 6 2 46 | 20 6 4 47 | 21 6 1 48 | 23 6 31 49 | 29 6 2 50 | 35 6 2 51 | 61 6 2 52 | 79 6 2 53 | 84 6 2 54 | 20 7 2 55 | 23 7 1 56 | 87 7 2 57 | 34 8 2 58 | 17 9 2 59 | 23 9 1 60 | 68 10 2 61 | 2 11 2 62 | 31 12 1 63 | 17 13 2 64 | 26 13 2 65 | 66 13 2 66 | 1 14 1 67 | 61 14 1 68 | 23 15 2 69 | 101 15 2 70 | 1 16 20 71 | 2 16 1 72 | 7 16 1 73 | 13 16 3 74 | 16 16 7 75 | 17 16 2 76 | 20 16 8 77 | 23 16 20 78 | 35 16 6 79 | 48 16 2 80 | 53 16 2 81 | 61 16 1 82 | 66 16 1 83 | 75 16 2 84 | 79 16 8 85 | 82 16 2 86 | 85 16 1 87 | 86 16 2 88 | 101 16 2 89 | 34 18 1 90 | 1 19 10 91 | 16 19 7 92 | 20 19 4 93 | 23 19 16 94 | 34 19 1 95 | 35 19 2 96 | 39 19 2 97 | 48 19 2 98 | 75 19 3 99 | 76 19 2 100 | 79 19 4 101 | 82 19 2 102 | 1 20 12 103 | 11 20 5 104 | 13 20 8 105 | 17 20 3 106 | 20 20 4 107 | 23 20 19 108 | 35 20 2 109 | 47 20 2 110 | 48 20 3 111 | 64 20 1 112 | 66 20 6 113 | 67 20 5 114 | 76 20 4 115 | 79 20 2 116 | 85 20 2 117 | 87 20 2 118 | 100 20 3 119 | 1 21 28 120 | 2 21 4 121 | 11 21 2 122 | 13 21 1 123 | 16 21 3 124 | 17 21 2 125 | 20 21 4 126 | 23 21 23 127 | 34 21 2 128 | 35 21 6 129 | 36 21 1 130 | 48 21 4 131 | 53 21 1 132 | 56 21 3 133 | 58 21 6 134 | 59 21 3 135 | 66 21 6 136 | 75 21 1 137 | 79 21 2 138 | 82 21 2 139 | 85 21 1 140 | 86 21 1 141 | 13 22 1 142 | 23 22 2 143 | 34 22 2 144 | 93 22 1 145 | 1 23 16 146 | 7 23 2 147 | 10 23 2 148 | 13 23 1 149 | 16 23 13 150 | 20 23 4 151 | 21 23 3 152 | 23 23 32 153 | 34 23 1 154 | 35 23 2 155 | 48 23 2 156 | 66 23 3 157 | 79 23 4 158 | 94 23 2 159 | 23 24 1 160 | 79 24 2 161 | 2 26 1 162 | 34 27 1 163 | 69 27 1 164 | 23 28 1 165 | 59 28 1 166 | 1 29 12 167 | 2 29 2 168 | 7 29 1 169 | 11 29 5 170 | 20 29 2 171 | 23 29 23 172 | 34 29 1 173 | 47 29 4 174 | 61 29 1 175 | 79 29 3 176 | 1 30 3 177 | 7 30 3 178 | 13 30 2 179 | 16 30 2 180 | 20 30 4 181 | 23 30 2 182 | 29 30 1 183 | 37 30 3 184 | 39 30 1 185 | 66 30 2 186 | 75 30 1 187 | 79 30 1 188 | 47 32 1 189 | 1 33 10 190 | 2 33 1 191 | 9 33 1 192 | 10 33 1 193 | 13 33 2 194 | 16 33 4 195 | 20 33 2 196 | 23 33 22 197 | 34 33 2 198 | 35 33 5 199 | 48 33 1 200 | 59 33 3 201 | 66 33 2 202 | 75 33 1 203 | 79 33 4 204 | 1 34 1 205 | 13 34 1 206 | 49 34 1 207 | 70 34 1 208 | 84 34 4 209 | 1 35 1 210 | 87 35 2 211 | 20 36 2 212 | 23 36 4 213 | 30 36 1 214 | 1 37 16 215 | 2 37 2 216 | 16 37 7 217 | 30 37 1 218 | 34 37 1 219 | 35 37 4 220 | 48 37 2 221 | 66 37 2 222 | 67 37 4 223 | 79 37 5 224 | 87 37 3 225 | 79 38 1 226 | 23 39 3 227 | 1 40 6 228 | 14 40 1 229 | 23 40 13 230 | 39 40 2 231 | 48 40 3 232 | 69 40 1 233 | 79 40 1 234 | 93 40 2 235 | 1 41 1 236 | 23 41 3 237 | 1 42 25 238 | 2 42 2 239 | 7 42 1 240 | 10 42 2 241 | 11 42 3 242 | 13 42 5 243 | 16 42 5 244 | 20 42 6 245 | 23 42 27 246 | 29 42 1 247 | 30 42 2 248 | 35 42 7 249 | 38 42 2 250 | 39 42 1 251 | 47 42 2 252 | 48 42 2 253 | 59 42 2 254 | 61 42 2 255 | 67 42 2 256 | 23 43 2 257 | 1 44 53 258 | 2 44 8 259 | 23 44 18 260 | 35 44 3 261 | 45 44 2 262 | 82 44 2 263 | 94 44 2 264 | 1 45 61 265 | 2 45 6 266 | 13 45 5 267 | 16 45 15 268 | 17 45 2 269 | 20 45 2 270 | 21 45 2 271 | 23 45 22 272 | 26 45 2 273 | 29 45 2 274 | 34 45 1 275 | 35 45 14 276 | 37 45 1 277 | 48 45 5 278 | 49 45 4 279 | 52 45 2 280 | 53 45 4 281 | 58 45 2 282 | 59 45 4 283 | 60 45 1 284 | 64 45 2 285 | 66 45 5 286 | 67 45 2 287 | 79 45 10 288 | 87 45 2 289 | 103 45 2 290 | 23 46 2 291 | 35 46 2 292 | 1 47 9 293 | 11 47 1 294 | 13 47 3 295 | 23 47 13 296 | 29 47 1 297 | 35 47 5 298 | 37 47 2 299 | 39 47 2 300 | 61 47 2 301 | 75 47 3 302 | 77 47 1 303 | 78 47 1 304 | 85 47 2 305 | 1 48 4 306 | 11 48 8 307 | 13 48 9 308 | 16 48 7 309 | 20 48 3 310 | 23 48 44 311 | 29 48 3 312 | 35 48 6 313 | 36 48 2 314 | 39 48 2 315 | 48 48 1 316 | 66 48 1 317 | 67 48 1 318 | 69 48 1 319 | 71 48 1 320 | 79 48 4 321 | 80 48 1 322 | 93 48 4 323 | 100 48 2 324 | 23 49 1 325 | 48 49 1 326 | 49 49 2 327 | 1 50 7 328 | 2 50 2 329 | 16 50 6 330 | 20 50 5 331 | 23 50 9 332 | 35 50 5 333 | 37 50 1 334 | 39 50 2 335 | 52 50 1 336 | 64 50 1 337 | 66 50 2 338 | 69 50 1 339 | 75 50 2 340 | 79 50 8 341 | 10 51 1 342 | 13 51 4 343 | 14 52 1 344 | 1 53 12 345 | 13 53 1 346 | 16 53 4 347 | 20 53 1 348 | 23 53 13 349 | 34 53 2 350 | 35 53 4 351 | 48 53 3 352 | 50 53 2 353 | 53 53 2 354 | 66 53 3 355 | 69 53 2 356 | 71 53 2 357 | 75 53 4 358 | 76 53 2 359 | 82 53 2 360 | 84 53 2 361 | 1 54 9 362 | 16 54 2 363 | 20 54 2 364 | 23 54 18 365 | 29 54 1 366 | 34 54 1 367 | 35 54 4 368 | 38 54 1 369 | 39 54 1 370 | 52 54 4 371 | 58 54 2 372 | 61 54 1 373 | 66 54 2 374 | 69 54 1 375 | 79 54 4 376 | 100 54 2 377 | 36 55 1 378 | 1 56 2 379 | 29 56 1 380 | 1 58 10 381 | 2 58 2 382 | 13 58 2 383 | 16 58 6 384 | 23 58 42 385 | 35 58 7 386 | 48 58 2 387 | 59 58 4 388 | 75 58 1 389 | 79 58 5 390 | 84 58 3 391 | 1 59 15 392 | 10 59 2 393 | 13 59 2 394 | 16 59 3 395 | 20 59 5 396 | 21 59 2 397 | 23 59 19 398 | 29 59 2 399 | 30 59 1 400 | 34 59 1 401 | 35 59 3 402 | 38 59 2 403 | 39 59 1 404 | 49 59 2 405 | 61 59 4 406 | 79 59 5 407 | 88 59 1 408 | 93 59 2 409 | 103 59 1 410 | 1 60 14 411 | 2 60 2 412 | 16 60 2 413 | 23 60 15 414 | 35 60 3 415 | 38 60 2 416 | 58 60 1 417 | 79 60 1 418 | 1 61 19 419 | 10 61 2 420 | 11 61 2 421 | 13 61 4 422 | 16 61 2 423 | 23 61 24 424 | 26 61 2 425 | 35 61 2 426 | 61 61 2 427 | 66 61 1 428 | 69 61 1 429 | 75 61 2 430 | 87 61 2 431 | 103 61 2 432 | 1 62 33 433 | 11 62 1 434 | 13 62 2 435 | 16 62 9 436 | 23 62 19 437 | 26 62 2 438 | 35 62 2 439 | 39 62 2 440 | 48 62 2 441 | 59 62 2 442 | 63 62 2 443 | 69 62 2 444 | 82 62 2 445 | 90 62 2 446 | 93 62 2 447 | 100 62 2 448 | 103 62 2 449 | 1 63 2 450 | 13 63 2 451 | 23 63 1 452 | 52 63 2 453 | 101 63 1 454 | 1 64 18 455 | 7 64 2 456 | 13 64 2 457 | 16 64 7 458 | 20 64 2 459 | 21 64 2 460 | 23 64 29 461 | 26 64 1 462 | 35 64 7 463 | 38 64 1 464 | 48 64 1 465 | 49 64 6 466 | 52 64 1 467 | 54 64 3 468 | 62 64 2 469 | 79 64 4 470 | 11 65 2 471 | 31 65 1 472 | 1 66 2 473 | 17 66 1 474 | 1 68 2 475 | 1 69 1 476 | 49 69 2 477 | 1 70 38 478 | 2 70 3 479 | 9 70 1 480 | 10 70 1 481 | 13 70 5 482 | 16 70 8 483 | 17 70 2 484 | 20 70 3 485 | 23 70 22 486 | 26 70 2 487 | 29 70 2 488 | 35 70 9 489 | 37 70 4 490 | 38 70 1 491 | 48 70 2 492 | 49 70 3 493 | 51 70 1 494 | 56 70 1 495 | 59 70 2 496 | 61 70 2 497 | 66 70 1 498 | 67 70 2 499 | 75 70 2 500 | 79 70 10 501 | 80 70 1 502 | 82 70 3 503 | 93 70 4 504 | 103 70 2 505 | 1 71 7 506 | 7 71 2 507 | 11 71 6 508 | 13 71 2 509 | 16 71 6 510 | 20 71 7 511 | 23 71 20 512 | 29 71 2 513 | 37 71 2 514 | 48 71 2 515 | 66 71 2 516 | 75 71 1 517 | 77 71 1 518 | 82 71 2 519 | 84 71 1 520 | 103 71 2 521 | 34 72 3 522 | 70 72 2 523 | 39 73 2 524 | 23 74 1 525 | 1 75 12 526 | 2 75 3 527 | 4 75 1 528 | 10 75 2 529 | 11 75 2 530 | 13 75 2 531 | 16 75 5 532 | 20 75 6 533 | 23 75 19 534 | 26 75 1 535 | 29 75 2 536 | 35 75 14 537 | 37 75 2 538 | 48 75 2 539 | 58 75 1 540 | 61 75 3 541 | 66 75 2 542 | 67 75 2 543 | 71 75 5 544 | 79 75 2 545 | 80 75 2 546 | 87 75 1 547 | 93 75 2 548 | 94 75 2 549 | 21 76 2 550 | 34 76 1 551 | 84 76 1 552 | 103 76 1 553 | 1 78 11 554 | 7 78 2 555 | 11 78 5 556 | 13 78 7 557 | 16 78 7 558 | 20 78 3 559 | 23 78 21 560 | 29 78 3 561 | 35 78 7 562 | 36 78 1 563 | 58 78 1 564 | 59 78 3 565 | 62 78 2 566 | 66 78 2 567 | 77 78 3 568 | 79 78 2 569 | 84 78 2 570 | 103 78 1 571 | 13 79 2 572 | 1 80 3 573 | 11 80 2 574 | 13 80 1 575 | 16 80 2 576 | 20 80 2 577 | 23 80 14 578 | 35 80 1 579 | 36 80 1 580 | 39 80 2 581 | 50 80 2 582 | 66 80 3 583 | 79 80 2 584 | 85 80 4 585 | 2 82 1 586 | 50 82 1 587 | 64 82 1 588 | 1 83 1 589 | 34 83 1 590 | 1 84 24 591 | 2 84 2 592 | 9 84 1 593 | 13 84 4 594 | 16 84 4 595 | 17 84 6 596 | 20 84 5 597 | 23 84 24 598 | 34 84 1 599 | 35 84 9 600 | 37 84 2 601 | 49 84 2 602 | 58 84 4 603 | 66 84 4 604 | 67 84 2 605 | 76 84 4 606 | 79 84 8 607 | 83 84 1 608 | 84 84 2 609 | 85 84 5 610 | 87 84 3 611 | 94 84 2 612 | 1 86 2 613 | 20 86 2 614 | 1 87 38 615 | 13 87 2 616 | 16 87 4 617 | 23 87 9 618 | 26 87 1 619 | 30 87 1 620 | 35 87 2 621 | 36 87 2 622 | 37 87 1 623 | 52 87 2 624 | 54 87 1 625 | 59 87 2 626 | 64 87 2 627 | 66 87 2 628 | 67 87 1 629 | 75 87 2 630 | 79 87 3 631 | 85 87 1 632 | 87 87 2 633 | 94 87 1 634 | 13 88 2 635 | 35 88 1 636 | 1 89 48 637 | 2 89 3 638 | 3 89 1 639 | 11 89 4 640 | 13 89 10 641 | 16 89 9 642 | 20 89 8 643 | 23 89 56 644 | 26 89 4 645 | 29 89 5 646 | 31 89 2 647 | 34 89 3 648 | 35 89 8 649 | 38 89 1 650 | 39 89 4 651 | 47 89 2 652 | 48 89 7 653 | 52 89 1 654 | 58 89 7 655 | 59 89 6 656 | 61 89 3 657 | 66 89 4 658 | 67 89 4 659 | 69 89 1 660 | 76 89 2 661 | 79 89 11 662 | 80 89 1 663 | 82 89 4 664 | 85 89 4 665 | 87 89 2 666 | 88 89 1 667 | 1 90 7 668 | 2 90 1 669 | 10 90 1 670 | 11 90 2 671 | 13 90 8 672 | 16 90 2 673 | 17 90 2 674 | 20 90 7 675 | 23 90 19 676 | 26 90 2 677 | 34 90 3 678 | 35 90 8 679 | 50 90 2 680 | 58 90 2 681 | 59 90 2 682 | 61 90 1 683 | 66 90 1 684 | 76 90 2 685 | 79 90 1 686 | 82 90 1 687 | 87 90 2 688 | 11 91 2 689 | 49 91 1 690 | 59 91 2 691 | 76 91 1 692 | 10 92 1 693 | 49 92 2 694 | 79 92 1 695 | 23 94 1 696 | 26 94 2 697 | 34 94 2 698 | 1 95 14 699 | 2 95 3 700 | 7 95 1 701 | 13 95 2 702 | 16 95 4 703 | 20 95 2 704 | 23 95 15 705 | 58 95 3 706 | 61 95 2 707 | 67 95 4 708 | 79 95 2 709 | 103 95 1 710 | 1 96 9 711 | 2 96 2 712 | 7 96 1 713 | 9 96 1 714 | 10 96 1 715 | 13 96 3 716 | 16 96 4 717 | 20 96 4 718 | 23 96 20 719 | 35 96 4 720 | 37 96 1 721 | 38 96 2 722 | 49 96 1 723 | 58 96 1 724 | 66 96 3 725 | 71 96 2 726 | 75 96 2 727 | 79 96 2 728 | 82 96 2 729 | 87 96 1 730 | 93 96 1 731 | 1 97 2 732 | 16 97 1 733 | 20 97 2 734 | 23 97 13 735 | 29 97 1 736 | 48 97 2 737 | 49 97 1 738 | 52 97 5 739 | 59 97 2 740 | 61 97 4 741 | 64 97 1 742 | 72 97 2 743 | 75 97 2 744 | 79 97 6 745 | 82 97 3 746 | 87 97 2 747 | 103 97 1 748 | 1 99 55 749 | 2 99 5 750 | 4 99 4 751 | 16 99 5 752 | 23 99 9 753 | 29 99 1 754 | 35 99 5 755 | 66 99 3 756 | 67 99 2 757 | 71 99 2 758 | 100 99 2 759 | 1 100 82 760 | 2 100 18 761 | 11 100 2 762 | 13 100 2 763 | 16 100 10 764 | 20 100 3 765 | 23 100 27 766 | 35 100 1 767 | 37 100 1 768 | 58 100 2 769 | 59 100 2 770 | 66 100 6 771 | 67 100 3 772 | 75 100 1 773 | 79 100 2 774 | 103 100 3 775 | -------------------------------------------------------------------------------- /cobolt/tests/test_data/single_a/features.tsv: -------------------------------------------------------------------------------- 1 | Xkr4 2 | Gm1992 3 | Gm37381 4 | Rp1 5 | Sox17 6 | Gm37323 7 | Mrpl15 8 | Lypla1 9 | Gm37988 10 | Tcea1 11 | Rgs20 12 | Gm16041 13 | Atp6v1h 14 | Oprk1 15 | Npbwr1 16 | Rb1cc1 17 | 4732440D04Rik 18 | Alkal1 19 | St18 20 | Pcmtd1 21 | Gm26901 22 | Gm30414 23 | Sntg1 24 | Rrs1 25 | Adhfe1 26 | 3110035E14Rik 27 | Gm29520 28 | Mybl1 29 | Vcpip1 30 | 1700034P13Rik 31 | 1110002O04Rik 32 | 1500015O10Rik 33 | 1700001O22Rik 34 | 1110008P14Rik 35 | 1110051M20Rik 36 | 1500011K16Rik 37 | 1110034G24Rik 38 | 1700003F12Rik 39 | 1110008F13Rik 40 | 0610039K10Rik 41 | 1200007C13Rik 42 | 1110015O18Rik 43 | 1110032F04Rik 44 | 1700003H04Rik 45 | 1110002E22Rik 46 | 0610031O16Rik 47 | 1110017D15Rik 48 | 0610043K17Rik 49 | 0610037L13Rik 50 | 1110065P20Rik 51 | 1500002C15Rik 52 | 1500035N22Rik 53 | 0610040J01Rik 54 | 1500011B03Rik 55 | 1110006O24Rik 56 | 0610040B10Rik 57 | 1700001J03Rik 58 | 1110019D14Rik 59 | 0610030E20Rik 60 | 1700003E16Rik 61 | 1600020E01Rik 62 | 0610040F04Rik 63 | 1110035H17Rik 64 | 1600014C10Rik 65 | 0610005C13Rik 66 | 1600010M07Rik 67 | 1110004F10Rik 68 | 1110002J07Rik 69 | 1600002K03Rik 70 | 1190007I07Rik 71 | 1500009L16Rik 72 | 0610038B21Rik 73 | 1110028F18Rik 74 | 1190005I06Rik 75 | 1110032A03Rik 76 | 1190002N15Rik 77 | 1300017J02Rik 78 | 1110059G10Rik 79 | 0610010F05Rik 80 | 0610009B22Rik 81 | 1700001P01Rik 82 | 1110046J04Rik 83 | 1700001L19Rik 84 | 1110002L01Rik 85 | 1110008L16Rik 86 | 1700001K19Rik 87 | 1110038F14Rik 88 | 1500009C09Rik 89 | 1700001L05Rik 90 | 1700003L19Rik 91 | 1300002E11Rik 92 | 1110008E08Rik 93 | 1110004E09Rik 94 | 1600012H06Rik 95 | 1520401A03Rik 96 | 1600014C23Rik 97 | 1700001C19Rik 98 | 1110020A21Rik 99 | 1700001G01Rik 100 | 0610009O20Rik 101 | 1500015A07Rik 102 | 1500015L24Rik 103 | 1110059E24Rik 104 | -------------------------------------------------------------------------------- /cobolt/tests/test_data/single_b/barcodes.tsv: -------------------------------------------------------------------------------- 1 | AGCGATAGAACCAGGTAAGAGATGATAGAGGC 2 | AGCGATAGAACCAGGTAAGAGATGTATAGCCT 3 | AGCGATAGAACCAGGTAATGACGTCAGGACGT 4 | AGCGATAGAACCAGGTAGGATAACATAGAGGC 5 | AGCGATAGAACCAGGTATAGCCTTAGGCGAAG 6 | AGCGATAGAACCAGGTATAGCCTTATAGAGGC 7 | AGCGATAGAACCAGGTATAGCCTTGGCTCTGA 8 | AGCGATAGAACCAGGTATAGCCTTGTACTGAC 9 | AGCGATAGAACCAGGTATTCGTTGTAATCTTA 10 | AGCGATAGAACCAGGTATTCGTTGTATAGCCT 11 | AGCGATAGAACCAGGTGAAGTATGAGGCGAAG 12 | AGCGATAGAACCAGGTGAAGTATGTATAGCCT 13 | AGCGATAGAACCAGGTGGATACTACAGGACGT 14 | AGCGATAGAACCAGGTTAAGATCCATAGAGGC 15 | AGCGATAGAACCAGGTTAAGATCCGGCTCTGA 16 | AGCGATAGAACCAGGTTTACGACCCCTATCCT 17 | AGCGATAGAACCAGGTTTACGACCTATAGCCT 18 | AGCGATAGAACCAGGTTTCATCCATAATCTTA 19 | AGCGATAGAAGAGGCAAAGAGATGTATAGCCT 20 | AGCGATAGAAGAGGCAAAGGAGTAAGGCGAAG 21 | AGCGATAGAAGAGGCAAAGGCTATATAGAGGC 22 | AGCGATAGAAGAGGCAAAGGCTATCCTATCCT 23 | AGCGATAGAAGAGGCAAAGGCTATTAATCTTA 24 | AGCGATAGAAGAGGCAAAGGCTATTATAGCCT 25 | AGCGATAGAAGAGGCAAATGACGTCAGGACGT 26 | AGCGATAGAAGAGGCAACTGCATAAGGCGAAG 27 | AGCGATAGAAGAGGCAACTGCATAATAGAGGC 28 | AGCGATAGAAGAGGCAATCTGAGTATAGAGGC 29 | AGCGATAGAAGAGGCACGTCTAATATAGAGGC 30 | AGCGATAGAAGAGGCACGTCTAATCCTATCCT 31 | AGCGATAGAAGAGGCACTCTCTATCCTATCCT 32 | AGCGATAGAAGAGGCACTCTCTATGGCTCTGA 33 | AGCGATAGAAGAGGCACTCTCTATGTACTGAC 34 | AGCGATAGAAGAGGCAGAGCCTTAATAGAGGC 35 | AGCGATAGAAGAGGCAGAGCCTTACAGGACGT 36 | AGCGATAGAAGAGGCAGCGTAAGAGGCTCTGA 37 | AGCGATAGAAGAGGCAGTAAGGAGCCTATCCT 38 | AGCGATAGAAGAGGCAGTAAGGAGGGCTCTGA 39 | AGCGATAGAAGAGGCAGTAAGGAGGTACTGAC 40 | AGCGATAGAAGAGGCATCGACTAGAGGCGAAG 41 | AGCGATAGAAGAGGCATCGACTAGATAGAGGC 42 | AGCGATAGAAGAGGCATCTCTCCGAGGCGAAG 43 | AGCGATAGAAGAGGCATCTCTCCGTATAGCCT 44 | AGCGATAGAAGAGGCATTATGCGACCTATCCT 45 | AGCGATAGAAGAGGCATTATGCGAGTACTGAC 46 | AGCGATAGAAGCAACGAACGAACGCCTATCCT 47 | AGCGATAGAAGCAACGAAGAGATGCAGGACGT 48 | AGCGATAGAAGCAACGAAGAGATGGTACTGAC 49 | AGCGATAGAAGCAACGAATGACGTAGGCGAAG 50 | AGCGATAGAAGCAACGAGGATAACATAGAGGC 51 | AGCGATAGAAGCAACGAGGATAACGGCTCTGA 52 | AGCGATAGAAGCAACGATAGCCTTCAGGACGT 53 | AGCGATAGAAGCAACGATAGCCTTCCTATCCT 54 | AGCGATAGAAGCAACGGAAGTATGGGCTCTGA 55 | AGCGATAGAAGCAACGGGTTAGACGGCTCTGA 56 | AGCGATAGAAGCAACGGGTTAGACTAATCTTA 57 | AGCGATAGAAGCAACGTCCGGTAACCTATCCT 58 | AGCGATAGAAGCAACGTCCGGTAAGGCTCTGA 59 | AGCGATAGAAGCAACGTCCGGTAAGTACTGAC 60 | AGCGATAGAAGCAACGTCCGGTAATATAGCCT 61 | AGCGATAGAAGCAACGTGCCTTACGGCTCTGA 62 | AGCGATAGAAGCAACGTTACGACCCAGGACGT 63 | AGCGATAGAAGCAACGTTACGACCCCTATCCT 64 | AGCGATAGAAGCAACGTTACGACCTATAGCCT 65 | AGCGATAGAAGCAACGTTCATCCAATAGAGGC 66 | AGCGATAGAAGCAACGTTGGAAGTTAATCTTA 67 | AGCGATAGACGAATCTAAGAGATGGTACTGAC 68 | AGCGATAGACGAATCTAGGATAACGTACTGAC 69 | AGCGATAGACGAATCTATTCGTTGATAGAGGC 70 | AGCGATAGACGAATCTATTCGTTGCAGGACGT 71 | AGCGATAGACGAATCTATTCGTTGGTACTGAC 72 | AGCGATAGACGAATCTGAAGTATGATAGAGGC 73 | AGCGATAGACGAATCTTAAGATCCTAATCTTA 74 | AGCGATAGACGAATCTTCCGGTAACCTATCCT 75 | AGCGATAGACGAATCTTGCCTTACGTACTGAC 76 | AGCGATAGACGAATCTTGCCTTACTATAGCCT 77 | AGCGATAGACGAATCTTTCATCCACCTATCCT 78 | AGCGATAGACGAATCTTTGGAAGTCAGGACGT 79 | AGCGATAGACGTTCGAAACGAACGCCTATCCT 80 | AGCGATAGACGTTCGAAACGAACGGGCTCTGA 81 | AGCGATAGACGTTCGAAAGAGATGATAGAGGC 82 | AGCGATAGACGTTCGAAAGAGATGCCTATCCT 83 | AGCGATAGACGTTCGAAAGAGATGTAATCTTA 84 | AGCGATAGACGTTCGAAAGAGATGTATAGCCT 85 | AGCGATAGACGTTCGAAATGACGTATAGAGGC 86 | AGCGATAGACGTTCGAAATGACGTCAGGACGT 87 | AGCGATAGACGTTCGAAATGACGTCCTATCCT 88 | AGCGATAGACGTTCGAAATGACGTGTACTGAC 89 | AGCGATAGACGTTCGAAATGACGTTATAGCCT 90 | AGCGATAGACGTTCGAAGGATAACAGGCGAAG 91 | AGCGATAGACGTTCGAAGGATAACATAGAGGC 92 | AGCGATAGACGTTCGAAGGATAACGGCTCTGA 93 | AGCGATAGACGTTCGAAGGATAACGTACTGAC 94 | AGCGATAGACGTTCGAATAGCCTTCCTATCCT 95 | AGCGATAGACGTTCGAATTCGTTGATAGAGGC 96 | AGCGATAGACGTTCGAATTCGTTGCCTATCCT 97 | AGCGATAGACGTTCGAATTCGTTGGGCTCTGA 98 | AGCGATAGACGTTCGAATTCGTTGTAATCTTA 99 | AGCGATAGACGTTCGAATTCGTTGTATAGCCT 100 | AGCGATAGACGTTCGAGAAGTATGGGCTCTGA 101 | -------------------------------------------------------------------------------- /cobolt/tests/test_data/single_b/counts.mtx: -------------------------------------------------------------------------------- 1 | %%MatrixMarket matrix coordinate integer general 2 | % 3 | 510 100 1019 4 | 3 2 1 5 | 4 2 1 6 | 17 2 3 7 | 23 2 1 8 | 28 2 1 9 | 32 2 1 10 | 34 2 1 11 | 44 2 1 12 | 49 2 1 13 | 54 2 1 14 | 55 2 2 15 | 68 2 1 16 | 84 2 2 17 | 112 2 1 18 | 116 2 1 19 | 120 2 1 20 | 135 2 1 21 | 138 2 2 22 | 139 2 2 23 | 141 2 1 24 | 159 2 1 25 | 160 2 1 26 | 182 2 4 27 | 187 2 1 28 | 209 2 1 29 | 210 2 1 30 | 216 2 1 31 | 230 2 1 32 | 244 2 1 33 | 269 2 1 34 | 276 2 1 35 | 278 2 1 36 | 308 2 1 37 | 326 2 1 38 | 327 2 2 39 | 334 2 2 40 | 345 2 1 41 | 359 2 1 42 | 366 2 1 43 | 375 2 1 44 | 386 2 1 45 | 395 2 2 46 | 424 2 1 47 | 448 2 1 48 | 454 2 3 49 | 472 2 2 50 | 473 2 1 51 | 474 2 1 52 | 481 2 1 53 | 1 3 2 54 | 4 3 1 55 | 13 3 1 56 | 34 3 1 57 | 36 3 1 58 | 44 3 1 59 | 48 3 1 60 | 62 3 1 61 | 72 3 1 62 | 83 3 3 63 | 84 3 1 64 | 85 3 1 65 | 87 3 1 66 | 88 3 1 67 | 107 3 1 68 | 120 3 2 69 | 150 3 1 70 | 216 3 4 71 | 218 3 1 72 | 222 3 1 73 | 223 3 1 74 | 228 3 1 75 | 235 3 4 76 | 250 3 1 77 | 277 3 1 78 | 279 3 1 79 | 282 3 2 80 | 308 3 1 81 | 310 3 1 82 | 327 3 3 83 | 333 3 2 84 | 335 3 1 85 | 339 3 1 86 | 341 3 1 87 | 343 3 1 88 | 348 3 1 89 | 356 3 1 90 | 357 3 1 91 | 359 3 1 92 | 362 3 2 93 | 365 3 2 94 | 384 3 1 95 | 385 3 1 96 | 424 3 1 97 | 454 3 3 98 | 469 3 1 99 | 489 3 1 100 | 490 3 1 101 | 503 3 2 102 | 93 4 1 103 | 107 4 1 104 | 112 4 1 105 | 216 4 2 106 | 310 4 1 107 | 366 4 1 108 | 164 5 1 109 | 165 5 1 110 | 184 5 1 111 | 206 5 1 112 | 222 5 1 113 | 310 5 1 114 | 326 5 1 115 | 334 5 1 116 | 17 6 2 117 | 22 6 1 118 | 27 6 1 119 | 30 6 1 120 | 35 6 1 121 | 36 6 1 122 | 48 6 1 123 | 88 6 1 124 | 110 6 1 125 | 216 6 1 126 | 225 6 1 127 | 232 6 1 128 | 308 6 1 129 | 310 6 1 130 | 317 6 1 131 | 322 6 1 132 | 341 6 1 133 | 343 6 1 134 | 380 6 2 135 | 386 6 1 136 | 480 6 1 137 | 88 7 1 138 | 16 8 1 139 | 25 8 1 140 | 28 8 1 141 | 47 8 1 142 | 48 8 2 143 | 72 8 3 144 | 75 8 1 145 | 87 8 1 146 | 88 8 2 147 | 91 8 1 148 | 92 8 1 149 | 105 8 1 150 | 111 8 1 151 | 112 8 1 152 | 164 8 1 153 | 168 8 1 154 | 172 8 1 155 | 184 8 1 156 | 219 8 2 157 | 228 8 1 158 | 257 8 1 159 | 308 8 1 160 | 333 8 1 161 | 338 8 2 162 | 343 8 1 163 | 360 8 1 164 | 365 8 2 165 | 368 8 1 166 | 378 8 1 167 | 380 8 1 168 | 385 8 1 169 | 386 8 1 170 | 395 8 2 171 | 398 8 1 172 | 408 8 1 173 | 430 8 2 174 | 457 8 1 175 | 488 8 1 176 | 496 8 1 177 | 505 8 1 178 | 509 8 1 179 | 164 9 1 180 | 386 9 1 181 | 3 10 1 182 | 10 10 1 183 | 16 10 1 184 | 28 10 1 185 | 48 10 1 186 | 80 10 1 187 | 120 10 1 188 | 171 10 1 189 | 217 10 1 190 | 281 10 1 191 | 308 10 1 192 | 310 10 1 193 | 333 10 1 194 | 338 10 1 195 | 356 10 1 196 | 386 10 1 197 | 484 10 1 198 | 506 10 1 199 | 88 11 2 200 | 120 11 1 201 | 133 11 1 202 | 138 11 1 203 | 164 11 1 204 | 235 11 2 205 | 365 11 1 206 | 454 11 1 207 | 90 12 1 208 | 164 12 1 209 | 166 12 1 210 | 211 12 1 211 | 216 12 1 212 | 308 12 1 213 | 368 12 1 214 | 385 12 1 215 | 386 12 3 216 | 396 12 1 217 | 444 12 1 218 | 454 12 2 219 | 456 12 1 220 | 36 13 1 221 | 48 13 1 222 | 84 13 1 223 | 164 13 1 224 | 235 13 1 225 | 302 13 1 226 | 440 13 1 227 | 504 13 1 228 | 88 14 1 229 | 48 15 1 230 | 37 16 1 231 | 42 16 1 232 | 48 16 3 233 | 49 16 1 234 | 54 16 1 235 | 62 16 1 236 | 81 16 1 237 | 85 16 1 238 | 100 16 1 239 | 106 16 1 240 | 107 16 2 241 | 116 16 1 242 | 117 16 1 243 | 120 16 1 244 | 137 16 1 245 | 140 16 1 246 | 164 16 2 247 | 167 16 1 248 | 171 16 1 249 | 194 16 1 250 | 203 16 1 251 | 217 16 2 252 | 224 16 1 253 | 235 16 1 254 | 239 16 1 255 | 278 16 1 256 | 308 16 1 257 | 309 16 1 258 | 360 16 1 259 | 361 16 1 260 | 365 16 1 261 | 366 16 2 262 | 386 16 2 263 | 442 16 1 264 | 454 16 1 265 | 33 18 1 266 | 48 18 1 267 | 88 18 1 268 | 91 18 1 269 | 134 18 1 270 | 138 18 1 271 | 142 18 1 272 | 150 18 1 273 | 213 18 1 274 | 216 18 1 275 | 276 18 1 276 | 295 18 1 277 | 308 18 2 278 | 316 18 2 279 | 338 18 1 280 | 454 18 1 281 | 477 18 1 282 | 510 18 1 283 | 92 20 1 284 | 308 20 1 285 | 371 20 1 286 | 13 21 1 287 | 17 21 1 288 | 100 21 1 289 | 105 21 1 290 | 120 21 1 291 | 122 21 1 292 | 127 21 1 293 | 156 21 1 294 | 164 21 1 295 | 180 21 1 296 | 200 21 1 297 | 207 21 1 298 | 217 21 1 299 | 221 21 1 300 | 235 21 1 301 | 292 21 1 302 | 308 21 1 303 | 315 21 2 304 | 319 21 1 305 | 326 21 1 306 | 334 21 1 307 | 386 21 1 308 | 471 21 1 309 | 64 22 1 310 | 84 22 1 311 | 112 22 1 312 | 164 22 1 313 | 210 22 1 314 | 216 22 1 315 | 235 22 1 316 | 277 22 1 317 | 280 22 1 318 | 298 22 1 319 | 306 22 1 320 | 321 22 1 321 | 327 22 3 322 | 334 22 1 323 | 343 22 1 324 | 356 22 1 325 | 366 22 1 326 | 378 22 1 327 | 396 22 2 328 | 405 22 1 329 | 406 22 1 330 | 440 22 1 331 | 472 22 1 332 | 3 26 1 333 | 77 26 1 334 | 80 26 1 335 | 84 26 1 336 | 85 26 1 337 | 186 26 1 338 | 222 26 1 339 | 235 26 1 340 | 308 26 1 341 | 326 26 2 342 | 327 26 1 343 | 338 26 1 344 | 368 26 1 345 | 4 28 1 346 | 14 28 1 347 | 30 28 1 348 | 133 28 1 349 | 210 28 2 350 | 235 28 1 351 | 297 28 1 352 | 308 28 1 353 | 310 28 1 354 | 322 28 1 355 | 326 28 1 356 | 327 28 1 357 | 338 28 1 358 | 371 28 1 359 | 386 28 1 360 | 407 28 1 361 | 424 28 1 362 | 468 28 1 363 | 326 29 1 364 | 3 30 2 365 | 28 30 1 366 | 34 30 1 367 | 36 30 1 368 | 74 30 1 369 | 85 30 1 370 | 102 30 1 371 | 104 30 1 372 | 105 30 2 373 | 235 30 1 374 | 250 30 1 375 | 298 30 1 376 | 327 30 2 377 | 345 30 1 378 | 356 30 1 379 | 361 30 1 380 | 371 30 1 381 | 427 30 1 382 | 454 30 1 383 | 107 33 1 384 | 311 33 1 385 | 31 35 1 386 | 48 35 1 387 | 107 35 1 388 | 216 35 2 389 | 320 35 1 390 | 454 35 1 391 | 333 36 1 392 | 48 37 1 393 | 186 37 1 394 | 222 37 1 395 | 235 37 1 396 | 356 37 1 397 | 365 37 1 398 | 404 37 1 399 | 467 37 1 400 | 178 41 1 401 | 13 43 1 402 | 17 43 1 403 | 37 43 1 404 | 60 43 1 405 | 88 43 2 406 | 102 43 1 407 | 222 43 1 408 | 244 43 1 409 | 276 43 2 410 | 301 43 1 411 | 306 43 1 412 | 309 43 1 413 | 327 43 1 414 | 334 43 1 415 | 360 43 1 416 | 385 43 1 417 | 386 43 1 418 | 404 43 1 419 | 474 43 1 420 | 72 44 1 421 | 107 44 1 422 | 216 44 1 423 | 235 44 1 424 | 164 45 1 425 | 107 46 1 426 | 184 46 1 427 | 310 46 1 428 | 318 46 1 429 | 333 46 1 430 | 338 46 1 431 | 343 46 1 432 | 356 46 1 433 | 376 46 1 434 | 440 46 1 435 | 447 46 1 436 | 3 47 1 437 | 17 47 2 438 | 31 47 1 439 | 47 47 1 440 | 48 47 1 441 | 184 47 1 442 | 201 47 1 443 | 213 47 1 444 | 222 47 1 445 | 235 47 1 446 | 298 47 1 447 | 334 47 1 448 | 356 47 1 449 | 385 47 1 450 | 387 47 1 451 | 434 47 1 452 | 440 47 1 453 | 457 47 1 454 | 48 48 1 455 | 54 48 1 456 | 62 48 1 457 | 65 48 1 458 | 72 48 1 459 | 92 48 2 460 | 95 48 1 461 | 99 48 1 462 | 107 48 1 463 | 111 48 1 464 | 132 48 1 465 | 164 48 1 466 | 182 48 1 467 | 187 48 3 468 | 216 48 2 469 | 222 48 1 470 | 235 48 1 471 | 254 48 1 472 | 263 48 1 473 | 270 48 1 474 | 306 48 1 475 | 308 48 1 476 | 317 48 1 477 | 318 48 1 478 | 322 48 1 479 | 326 48 2 480 | 327 48 2 481 | 356 48 1 482 | 360 48 1 483 | 365 48 1 484 | 366 48 1 485 | 368 48 1 486 | 386 48 1 487 | 454 48 2 488 | 467 48 1 489 | 470 48 1 490 | 473 48 1 491 | 488 48 2 492 | 112 49 1 493 | 110 50 1 494 | 208 50 1 495 | 257 50 1 496 | 334 50 1 497 | 440 50 1 498 | 454 50 1 499 | 473 50 1 500 | 73 52 1 501 | 84 52 1 502 | 103 52 1 503 | 105 52 1 504 | 108 52 1 505 | 216 52 1 506 | 333 52 2 507 | 334 52 1 508 | 338 52 1 509 | 356 52 1 510 | 366 52 1 511 | 423 52 1 512 | 424 52 1 513 | 437 52 1 514 | 88 53 1 515 | 216 53 2 516 | 334 53 1 517 | 339 53 1 518 | 365 53 1 519 | 366 53 1 520 | 72 55 1 521 | 77 55 1 522 | 112 55 2 523 | 164 55 1 524 | 216 55 2 525 | 235 55 1 526 | 241 55 1 527 | 327 55 1 528 | 340 55 1 529 | 386 55 1 530 | 216 56 1 531 | 310 56 1 532 | 339 56 1 533 | 3 57 1 534 | 28 57 1 535 | 74 57 1 536 | 85 57 1 537 | 92 57 1 538 | 105 57 1 539 | 164 57 1 540 | 208 57 2 541 | 216 57 2 542 | 235 57 2 543 | 277 57 2 544 | 298 57 1 545 | 308 57 1 546 | 310 57 1 547 | 316 57 1 548 | 326 57 2 549 | 327 57 1 550 | 329 57 1 551 | 333 57 1 552 | 334 57 2 553 | 365 57 1 554 | 386 57 3 555 | 396 57 1 556 | 414 57 1 557 | 427 57 1 558 | 450 57 1 559 | 454 57 1 560 | 474 57 1 561 | 487 57 1 562 | 498 57 1 563 | 508 57 1 564 | 75 58 1 565 | 334 58 1 566 | 48 59 1 567 | 75 59 1 568 | 88 59 1 569 | 164 59 1 570 | 185 59 3 571 | 216 59 4 572 | 235 59 2 573 | 310 59 2 574 | 333 59 3 575 | 376 59 1 576 | 403 59 1 577 | 72 62 1 578 | 80 62 1 579 | 84 62 1 580 | 85 62 1 581 | 235 62 1 582 | 241 62 1 583 | 280 62 1 584 | 305 62 1 585 | 310 62 1 586 | 317 62 2 587 | 326 62 1 588 | 356 62 1 589 | 373 62 1 590 | 19 63 1 591 | 46 63 1 592 | 48 63 2 593 | 60 63 1 594 | 88 63 2 595 | 120 63 1 596 | 125 63 1 597 | 135 63 1 598 | 185 63 2 599 | 188 63 1 600 | 203 63 1 601 | 216 63 3 602 | 222 63 1 603 | 235 63 2 604 | 303 63 1 605 | 308 63 2 606 | 320 63 1 607 | 326 63 2 608 | 327 63 1 609 | 329 63 1 610 | 333 63 3 611 | 334 63 1 612 | 356 63 1 613 | 366 63 2 614 | 386 63 1 615 | 395 63 1 616 | 404 63 2 617 | 405 63 1 618 | 436 63 1 619 | 437 63 1 620 | 92 64 1 621 | 107 64 1 622 | 114 64 1 623 | 133 64 1 624 | 184 64 3 625 | 279 64 1 626 | 308 64 1 627 | 310 64 1 628 | 333 64 1 629 | 334 64 1 630 | 335 64 1 631 | 341 64 1 632 | 350 64 1 633 | 92 65 1 634 | 222 65 1 635 | 472 65 2 636 | 505 65 1 637 | 34 66 2 638 | 53 66 1 639 | 74 66 1 640 | 203 66 1 641 | 235 66 2 642 | 241 66 1 643 | 257 66 1 644 | 277 66 1 645 | 325 66 1 646 | 327 66 1 647 | 343 66 1 648 | 345 66 2 649 | 379 66 1 650 | 386 66 1 651 | 389 66 3 652 | 395 66 1 653 | 405 66 1 654 | 424 66 1 655 | 455 66 1 656 | 466 66 1 657 | 467 66 1 658 | 471 66 1 659 | 473 66 1 660 | 475 66 1 661 | 4 67 1 662 | 29 67 1 663 | 88 67 1 664 | 91 67 1 665 | 164 67 5 666 | 165 67 1 667 | 206 67 1 668 | 207 67 1 669 | 216 67 2 670 | 281 67 1 671 | 298 67 1 672 | 302 67 1 673 | 308 67 2 674 | 345 67 1 675 | 362 67 1 676 | 386 67 1 677 | 404 67 1 678 | 454 67 1 679 | 456 67 2 680 | 474 67 2 681 | 487 67 1 682 | 27 68 1 683 | 48 68 1 684 | 85 68 1 685 | 88 68 1 686 | 120 68 1 687 | 164 68 1 688 | 184 68 1 689 | 217 68 1 690 | 229 68 1 691 | 235 68 2 692 | 242 68 1 693 | 280 68 1 694 | 310 68 3 695 | 314 68 1 696 | 317 68 1 697 | 326 68 2 698 | 366 68 1 699 | 377 68 1 700 | 386 68 1 701 | 400 68 1 702 | 405 68 1 703 | 454 68 1 704 | 474 68 1 705 | 105 69 1 706 | 235 69 1 707 | 241 69 1 708 | 384 69 1 709 | 386 69 1 710 | 442 69 1 711 | 454 69 2 712 | 480 69 1 713 | 506 69 1 714 | 164 70 1 715 | 80 71 1 716 | 84 71 3 717 | 85 71 1 718 | 88 71 2 719 | 92 71 1 720 | 105 71 3 721 | 118 71 1 722 | 137 71 1 723 | 165 71 1 724 | 169 71 3 725 | 184 71 1 726 | 216 71 3 727 | 222 71 1 728 | 233 71 1 729 | 235 71 1 730 | 244 71 2 731 | 268 71 2 732 | 270 71 1 733 | 305 71 1 734 | 307 71 1 735 | 308 71 1 736 | 311 71 1 737 | 315 71 1 738 | 316 71 1 739 | 326 71 1 740 | 333 71 1 741 | 345 71 1 742 | 353 71 1 743 | 366 71 1 744 | 377 71 1 745 | 380 71 1 746 | 382 71 3 747 | 387 71 1 748 | 393 71 1 749 | 421 71 1 750 | 428 71 1 751 | 443 71 2 752 | 453 71 1 753 | 454 71 3 754 | 472 71 1 755 | 3 72 1 756 | 14 72 1 757 | 16 72 1 758 | 30 72 1 759 | 48 72 1 760 | 53 72 1 761 | 84 72 1 762 | 92 72 1 763 | 93 72 1 764 | 105 72 1 765 | 115 72 1 766 | 120 72 1 767 | 133 72 1 768 | 142 72 1 769 | 143 72 1 770 | 184 72 1 771 | 196 72 1 772 | 235 72 1 773 | 268 72 1 774 | 298 72 1 775 | 310 72 1 776 | 317 72 1 777 | 322 72 1 778 | 327 72 1 779 | 329 72 1 780 | 333 72 1 781 | 334 72 2 782 | 335 72 1 783 | 343 72 1 784 | 366 72 3 785 | 395 72 1 786 | 396 72 1 787 | 405 72 1 788 | 417 72 1 789 | 454 72 2 790 | 491 72 1 791 | 28 73 1 792 | 45 73 1 793 | 48 73 2 794 | 75 73 1 795 | 138 73 1 796 | 164 73 1 797 | 171 73 1 798 | 184 73 1 799 | 210 73 1 800 | 216 73 1 801 | 307 73 1 802 | 308 73 1 803 | 334 73 1 804 | 356 73 1 805 | 366 73 1 806 | 386 73 1 807 | 395 73 1 808 | 396 73 1 809 | 403 73 1 810 | 424 73 1 811 | 431 73 1 812 | 438 73 1 813 | 443 73 1 814 | 488 73 1 815 | 333 74 1 816 | 32 76 1 817 | 47 76 1 818 | 235 76 1 819 | 386 76 1 820 | 393 76 1 821 | 417 76 1 822 | 480 76 1 823 | 112 77 2 824 | 216 77 2 825 | 235 77 1 826 | 84 78 2 827 | 107 78 2 828 | 150 78 1 829 | 201 78 1 830 | 216 78 4 831 | 230 78 1 832 | 235 78 1 833 | 241 78 1 834 | 281 78 1 835 | 306 78 1 836 | 308 78 1 837 | 309 78 1 838 | 311 78 1 839 | 318 78 1 840 | 334 78 1 841 | 341 78 1 842 | 420 78 1 843 | 434 78 1 844 | 436 78 1 845 | 447 78 1 846 | 488 78 1 847 | 48 80 1 848 | 310 80 1 849 | 216 82 1 850 | 3 84 1 851 | 9 84 2 852 | 13 84 1 853 | 16 84 1 854 | 35 84 1 855 | 48 84 1 856 | 55 84 1 857 | 62 84 1 858 | 107 84 1 859 | 112 84 1 860 | 114 84 1 861 | 120 84 1 862 | 127 84 1 863 | 135 84 1 864 | 150 84 2 865 | 164 84 2 866 | 171 84 2 867 | 198 84 1 868 | 210 84 3 869 | 216 84 2 870 | 232 84 1 871 | 241 84 1 872 | 255 84 1 873 | 270 84 1 874 | 275 84 1 875 | 277 84 1 876 | 280 84 1 877 | 301 84 1 878 | 302 84 1 879 | 308 84 2 880 | 309 84 1 881 | 310 84 1 882 | 319 84 1 883 | 326 84 2 884 | 338 84 1 885 | 341 84 2 886 | 356 84 3 887 | 365 84 1 888 | 382 84 2 889 | 386 84 2 890 | 395 84 2 891 | 396 84 1 892 | 399 84 1 893 | 400 84 1 894 | 402 84 1 895 | 434 84 1 896 | 442 84 1 897 | 454 84 1 898 | 470 84 2 899 | 475 84 1 900 | 477 84 2 901 | 480 84 2 902 | 488 84 1 903 | 508 84 1 904 | 34 86 1 905 | 228 86 1 906 | 273 86 1 907 | 314 86 1 908 | 327 86 1 909 | 366 86 1 910 | 506 86 1 911 | 164 88 1 912 | 48 89 1 913 | 267 89 1 914 | 412 89 1 915 | 50 90 1 916 | 321 90 1 917 | 470 90 1 918 | 366 91 1 919 | 378 91 1 920 | 104 92 1 921 | 120 92 1 922 | 167 92 1 923 | 216 92 1 924 | 235 92 1 925 | 334 92 1 926 | 335 92 1 927 | 366 92 1 928 | 1 93 1 929 | 3 93 1 930 | 12 93 1 931 | 30 93 1 932 | 37 93 1 933 | 45 93 2 934 | 48 93 3 935 | 69 93 1 936 | 72 93 1 937 | 83 93 1 938 | 88 93 3 939 | 102 93 1 940 | 106 93 1 941 | 116 93 1 942 | 152 93 4 943 | 164 93 3 944 | 178 93 1 945 | 184 93 1 946 | 207 93 1 947 | 216 93 2 948 | 217 93 1 949 | 222 93 2 950 | 237 93 1 951 | 240 93 1 952 | 278 93 1 953 | 280 93 1 954 | 308 93 1 955 | 309 93 1 956 | 310 93 1 957 | 316 93 1 958 | 325 93 1 959 | 326 93 2 960 | 327 93 1 961 | 334 93 1 962 | 338 93 1 963 | 343 93 1 964 | 356 93 2 965 | 358 93 1 966 | 359 93 1 967 | 360 93 3 968 | 362 93 1 969 | 365 93 1 970 | 371 93 1 971 | 378 93 1 972 | 385 93 1 973 | 386 93 1 974 | 387 93 1 975 | 395 93 1 976 | 398 93 1 977 | 404 93 1 978 | 453 93 1 979 | 454 93 2 980 | 455 93 1 981 | 467 93 3 982 | 472 93 1 983 | 477 93 1 984 | 480 93 1 985 | 481 93 1 986 | 488 93 2 987 | 505 93 2 988 | 1 96 1 989 | 9 96 1 990 | 16 96 1 991 | 28 96 1 992 | 88 96 1 993 | 92 96 1 994 | 123 96 1 995 | 133 96 1 996 | 144 96 1 997 | 160 96 1 998 | 175 96 1 999 | 181 96 1 1000 | 208 96 1 1001 | 216 96 2 1002 | 218 96 1 1003 | 219 96 1 1004 | 230 96 1 1005 | 232 96 1 1006 | 235 96 1 1007 | 280 96 1 1008 | 308 96 1 1009 | 323 96 1 1010 | 325 96 1 1011 | 326 96 1 1012 | 333 96 2 1013 | 339 96 1 1014 | 380 96 1 1015 | 404 96 1 1016 | 432 96 1 1017 | 443 96 1 1018 | 444 96 1 1019 | 455 96 1 1020 | 235 98 2 1021 | 333 98 1 1022 | 386 100 1 1023 | -------------------------------------------------------------------------------- /cobolt/tests/test_data/single_b/features.tsv: -------------------------------------------------------------------------------- 1 | chr1:3094893-3095434 2 | chr1:3113818-3113891 3 | chr1:3119627-3120693 4 | chr1:3121450-3121584 5 | chr1:3167222-3167372 6 | chr1:3181216-3181468 7 | chr1:3204882-3205120 8 | chr1:3213076-3213149 9 | chr1:3217219-3217452 10 | chr1:3228244-3228369 11 | chr1:3235591-3235664 12 | chr1:3265307-3265524 13 | chr1:3292493-3293009 14 | chr1:3297644-3297768 15 | chr1:3299709-3299943 16 | chr1:3309994-3310393 17 | chr1:3322375-3322835 18 | chr1:3324306-3324445 19 | chr1:3330267-3330729 20 | chr1:3333790-3333981 21 | chr1:3361072-3361157 22 | chr1:3366586-3366762 23 | chr1:3368397-3368605 24 | chr1:3369541-3369868 25 | chr1:3379678-3379806 26 | chr1:3386269-3386365 27 | chr1:3388538-3388771 28 | chr1:3391939-3392441 29 | chr1:3399197-3399270 30 | chr1:3399930-3400273 31 | chr1:3406182-3406487 32 | chr1:3411913-3411986 33 | chr1:3433002-3433079 34 | chr1:3433835-3434474 35 | chr1:3455434-3455607 36 | chr1:3457839-3458105 37 | chr1:3466526-3466685 38 | chr1:3497132-3497205 39 | chr1:3515011-3515084 40 | chr1:3532659-3532742 41 | chr1:3552656-3552748 42 | chr1:3576399-3576487 43 | chr1:3616536-3616711 44 | chr1:3659452-3659606 45 | chr1:3662461-3662558 46 | chr1:3662896-3663029 47 | chr1:3670579-3670780 48 | chr1:3670975-3672068 49 | chr1:3672252-3672343 50 | chr1:3710086-3710492 51 | chr1:3742354-3742449 52 | chr1:3797469-3797542 53 | chr1:3853824-3854030 54 | chr1:3900396-3900673 55 | chr1:3903555-3903761 56 | chr1:3909423-3909635 57 | chr1:3919408-3919531 58 | chr1:3932512-3932830 59 | chr1:3979169-3979242 60 | chr1:3990484-3990719 61 | chr1:3994861-3995063 62 | chr1:4089610-4090069 63 | chr1:4098064-4098390 64 | chr1:4139144-4139373 65 | chr1:4142662-4142784 66 | chr1:4146159-4146348 67 | chr1:4186795-4187219 68 | chr1:4273254-4273426 69 | chr1:4332441-4332725 70 | chr1:4392710-4392913 71 | chr1:4412645-4412790 72 | chr1:4414272-4415158 73 | chr1:4426978-4427051 74 | chr1:4491904-4492152 75 | chr1:4571436-4572174 76 | chr1:4611716-4612065 77 | chr1:4703195-4703853 78 | chr1:4722686-4723002 79 | chr1:4747479-4747571 80 | chr1:4748208-4748647 81 | chr1:4766800-4767048 82 | chr1:4775425-4775498 83 | chr1:4780276-4780357 84 | chr1:4785481-4786089 85 | chr1:4807495-4808228 86 | chr1:4820354-4820465 87 | chr1:4833747-4833866 88 | chr1:4857460-4858697 89 | chr1:4868989-4869069 90 | chr1:4873469-4873623 91 | chr1:4881497-4881689 92 | chr1:4907984-4908521 93 | chr1:4914570-4914685 94 | chr1:4917513-4917696 95 | chr1:4918809-4918882 96 | chr1:4923129-4923323 97 | chr1:4926453-4926617 98 | chr1:4927073-4927230 99 | chr1:4934774-4934864 100 | chr1:4935062-4935319 101 | chr1:4969904-4970129 102 | chr1:4971296-4971444 103 | chr1:5018054-5018163 104 | chr1:5018692-5018966 105 | chr1:5019221-5019674 106 | chr1:5020635-5020970 107 | chr1:5022737-5023330 108 | chr1:5033227-5033621 109 | chr1:5045909-5046074 110 | chr1:5070187-5070589 111 | chr1:5071202-5071553 112 | chr1:5082863-5083591 113 | chr1:5103291-5103487 114 | chr1:5106684-5106874 115 | chr1:5128868-5129069 116 | chr1:5130154-5130717 117 | chr1:5134092-5134371 118 | chr1:5137928-5138561 119 | chr1:5150671-5150797 120 | chr1:5176982-5177759 121 | chr1:5183539-5183815 122 | chr1:5207970-5208480 123 | chr1:5213909-5213982 124 | chr1:5220832-5221027 125 | chr1:5228732-5229204 126 | chr1:5232304-5232493 127 | chr1:5249376-5249731 128 | chr1:5257919-5257992 129 | chr1:5279329-5279434 130 | chr1:5288166-5288246 131 | chr1:5297376-5297589 132 | chr1:5297794-5297975 133 | chr1:5315506-5316189 134 | chr1:5316394-5316581 135 | chr1:5334507-5335021 136 | chr1:5336641-5336783 137 | chr1:5346169-5346523 138 | chr1:5382346-5382738 139 | chr1:5382868-5382972 140 | chr1:5394645-5394835 141 | chr1:5401082-5401215 142 | chr1:5416969-5417286 143 | chr1:5588271-5588551 144 | chr1:5588770-5588843 145 | chr1:5642384-5642564 146 | chr1:5689443-5689623 147 | chr1:5753564-5753680 148 | chr1:5894576-5894694 149 | chr1:5905017-5905104 150 | chr1:5915481-5915685 151 | chr1:5916929-5917275 152 | chr1:5917433-5917579 153 | chr1:5926109-5926336 154 | chr1:5955574-5955695 155 | chr1:5955938-5956153 156 | chr1:6039871-6040018 157 | chr1:6049826-6049909 158 | chr1:6094286-6094552 159 | chr1:6132028-6132203 160 | chr1:6135138-6135352 161 | chr1:6152617-6152715 162 | chr1:6178764-6179041 163 | chr1:6188680-6188814 164 | chr1:6214268-6215317 165 | chr1:6215459-6215610 166 | chr1:6219800-6219977 167 | chr1:6224174-6224431 168 | chr1:6272374-6272571 169 | chr1:6283091-6283283 170 | chr1:6290372-6290475 171 | chr1:6306556-6307040 172 | chr1:6320669-6320867 173 | chr1:6330854-6330927 174 | chr1:6359418-6359491 175 | chr1:6362041-6362257 176 | chr1:6364062-6364140 177 | chr1:6370527-6370632 178 | chr1:6382681-6383119 179 | chr1:6401695-6401780 180 | chr1:6406321-6406694 181 | chr1:6410288-6410397 182 | chr1:6441302-6441672 183 | chr1:6478054-6478168 184 | chr1:6482940-6484001 185 | chr1:6487143-6487393 186 | chr1:6499697-6499803 187 | chr1:6547223-6547466 188 | chr1:6594536-6594806 189 | chr1:6658271-6658344 190 | chr1:6658401-6658529 191 | chr1:6717690-6717986 192 | chr1:6722890-6722963 193 | chr1:6729545-6729636 194 | chr1:6729748-6730057 195 | chr1:6757712-6757852 196 | chr1:6760548-6760856 197 | chr1:6761410-6761498 198 | chr1:6770059-6770287 199 | chr1:6776556-6776629 200 | chr1:6805146-6805219 201 | chr1:6805362-6805563 202 | chr1:6810151-6810666 203 | chr1:6824269-6824733 204 | chr1:6829862-6829947 205 | chr1:6858065-6858158 206 | chr1:6859178-6859328 207 | chr1:6866880-6867269 208 | chr1:6907640-6908023 209 | chr1:6928028-6928182 210 | chr1:6928625-6929204 211 | chr1:6973107-6973219 212 | chr1:7000920-7001007 213 | chr1:7010317-7010482 214 | chr1:7040703-7040850 215 | chr1:7062657-7062866 216 | chr1:7088511-7089531 217 | chr1:7105056-7105656 218 | chr1:7113023-7113595 219 | chr1:7118015-7118418 220 | chr1:7131425-7131580 221 | chr1:7138221-7138330 222 | chr1:7138548-7139484 223 | chr1:7148130-7148654 224 | chr1:7155170-7155598 225 | chr1:7161153-7161343 226 | chr1:7170918-7171316 227 | chr1:7199682-7199759 228 | chr1:7204728-7205055 229 | chr1:7206367-7206946 230 | chr1:7219073-7219265 231 | chr1:7250060-7250346 232 | chr1:7292984-7293380 233 | chr1:7347997-7348105 234 | chr1:7360019-7360400 235 | chr1:7397426-7398548 236 | chr1:7447812-7447885 237 | chr1:7448093-7448181 238 | chr1:7495274-7495403 239 | chr1:7498172-7498404 240 | chr1:7523516-7523778 241 | chr1:7534995-7535637 242 | chr1:7543468-7543809 243 | chr1:7641273-7641669 244 | chr1:7731157-7731635 245 | chr1:7750777-7751065 246 | chr1:7758682-7758951 247 | chr1:7768778-7768928 248 | chr1:7769093-7769225 249 | chr1:7891680-7891847 250 | chr1:8063470-8063823 251 | chr1:8133001-8133174 252 | chr1:8135031-8135397 253 | chr1:8171624-8171779 254 | chr1:8410580-8410751 255 | chr1:8468831-8469143 256 | chr1:8570861-8571014 257 | chr1:8583020-8583458 258 | chr1:8584533-8584657 259 | chr1:8587455-8587584 260 | chr1:8595824-8595906 261 | chr1:8595980-8596099 262 | chr1:8644590-8644814 263 | chr1:8678918-8679074 264 | chr1:8736378-8736465 265 | chr1:8744569-8744677 266 | chr1:8745251-8745324 267 | chr1:8747483-8747599 268 | chr1:8781078-8781371 269 | chr1:8781952-8782025 270 | chr1:8790916-8791317 271 | chr1:8805172-8805245 272 | chr1:8817160-8817243 273 | chr1:8842871-8843027 274 | chr1:8847891-8848103 275 | chr1:8865003-8865139 276 | chr1:8866526-8866910 277 | chr1:8867784-8868122 278 | chr1:8921744-8922168 279 | chr1:8940901-8941010 280 | chr1:8944849-8945284 281 | chr1:8947644-8947997 282 | chr1:8952234-8952327 283 | chr1:8979325-8979398 284 | chr1:8982808-8982899 285 | chr1:8998849-8998922 286 | chr1:8999215-8999288 287 | chr1:9024399-9024736 288 | chr1:9032856-9033034 289 | chr1:9033216-9033300 290 | chr1:9090908-9090981 291 | chr1:9091034-9091266 292 | chr1:9091273-9091346 293 | chr1:9101012-9101096 294 | chr1:9125402-9126092 295 | chr1:9127548-9127799 296 | chr1:9140929-9141124 297 | chr1:9168442-9168634 298 | chr1:9171852-9172667 299 | chr1:9203267-9203374 300 | chr1:9207783-9207912 301 | chr1:9209015-9209113 302 | chr1:9210320-9210502 303 | chr1:9248698-9248833 304 | chr1:9254196-9254342 305 | chr1:9258132-9258350 306 | chr1:9289344-9289964 307 | chr1:9298547-9298753 308 | chr1:9298870-9300082 309 | chr1:9541183-9541570 310 | chr1:9545100-9545567 311 | chr1:9548005-9548260 312 | chr1:9564583-9564870 313 | chr1:9570173-9570651 314 | chr1:9573234-9573551 315 | chr1:9577654-9578176 316 | chr1:9591675-9592255 317 | chr1:9600975-9601811 318 | chr1:9602496-9602659 319 | chr1:9609366-9609638 320 | chr1:9611687-9612120 321 | chr1:9616238-9616593 322 | chr1:9619485-9619676 323 | chr1:9619842-9619948 324 | chr1:9621577-9621703 325 | chr1:9622537-9622761 326 | chr1:9627252-9627959 327 | chr1:9629801-9630629 328 | chr1:9645158-9645383 329 | chr1:9645771-9646116 330 | chr1:9649379-9649554 331 | chr1:9659031-9659104 332 | chr1:9673467-9673570 333 | chr1:9700230-9700839 334 | chr1:9747666-9748720 335 | chr1:9772910-9773089 336 | chr1:9784128-9784440 337 | chr1:9790242-9790586 338 | chr1:9797934-9798724 339 | chr1:9804380-9804865 340 | chr1:9812316-9812608 341 | chr1:9815742-9816153 342 | chr1:9816423-9816572 343 | chr1:9823541-9824013 344 | chr1:9829563-9829636 345 | chr1:9847819-9848560 346 | chr1:9851551-9851768 347 | chr1:9859382-9859455 348 | chr1:9862582-9862743 349 | chr1:9876897-9877061 350 | chr1:9891970-9892176 351 | chr1:9908587-9908935 352 | chr1:9932085-9932171 353 | chr1:9936107-9936283 354 | chr1:9936414-9936487 355 | chr1:9937834-9937907 356 | chr1:9943725-9944334 357 | chr1:9953087-9953162 358 | chr1:9968015-9968088 359 | chr1:9991855-9992117 360 | chr1:9993877-9994142 361 | chr1:10007402-10007546 362 | chr1:10008766-10009488 363 | chr1:10012881-10013056 364 | chr1:10014561-10014761 365 | chr1:10037907-10038328 366 | chr1:10039522-10040328 367 | chr1:10056524-10056662 368 | chr1:10057885-10058649 369 | chr1:10076301-10076485 370 | chr1:10087573-10087656 371 | chr1:10089567-10089848 372 | chr1:10093808-10093881 373 | chr1:10102220-10102515 374 | chr1:10116723-10116814 375 | chr1:10127391-10127582 376 | chr1:10133272-10133426 377 | chr1:10139511-10139656 378 | chr1:10141269-10141781 379 | chr1:10142073-10142192 380 | chr1:10157222-10157652 381 | chr1:10164504-10164686 382 | chr1:10165054-10165283 383 | chr1:10171607-10171680 384 | chr1:10193286-10193490 385 | chr1:10229130-10229298 386 | chr1:10232233-10233438 387 | chr1:10247288-10247987 388 | chr1:10279358-10279470 389 | chr1:10286919-10287196 390 | chr1:10321850-10321977 391 | chr1:10323167-10323313 392 | chr1:10430073-10430198 393 | chr1:10493602-10493746 394 | chr1:10535577-10535650 395 | chr1:10570691-10571275 396 | chr1:10596095-10596750 397 | chr1:10607044-10607194 398 | chr1:10636690-10637002 399 | chr1:10637565-10637656 400 | chr1:10699925-10700416 401 | chr1:10701031-10701243 402 | chr1:10708699-10708775 403 | chr1:10719425-10719498 404 | chr1:10719663-10720447 405 | chr1:10720685-10721292 406 | chr1:10759741-10759866 407 | chr1:10764157-10764269 408 | chr1:10771946-10772134 409 | chr1:10777848-10778127 410 | chr1:10780912-10781297 411 | chr1:10781468-10781740 412 | chr1:10794242-10794391 413 | chr1:10794440-10794541 414 | chr1:10835825-10835997 415 | chr1:10837352-10837498 416 | chr1:10861501-10861650 417 | chr1:10871333-10871795 418 | chr1:10877556-10877846 419 | chr1:10880486-10880646 420 | chr1:10899434-10899682 421 | chr1:10943016-10943235 422 | chr1:10955295-10955374 423 | chr1:10990862-10991366 424 | chr1:10993212-10994172 425 | chr1:11026724-11026851 426 | chr1:11045843-11045916 427 | chr1:11065355-11065806 428 | chr1:11066086-11066495 429 | chr1:11090918-11091161 430 | chr1:11097414-11097576 431 | chr1:11099697-11100203 432 | chr1:11117143-11117532 433 | chr1:11120540-11120729 434 | chr1:11139919-11140245 435 | chr1:11151758-11151831 436 | chr1:11169027-11169478 437 | chr1:11169614-11169996 438 | chr1:11177722-11178198 439 | chr1:11178355-11178428 440 | chr1:11179782-11180389 441 | chr1:11181143-11181333 442 | chr1:11202664-11202792 443 | chr1:11217491-11217742 444 | chr1:11217817-11217999 445 | chr1:11231059-11231145 446 | chr1:11239743-11239816 447 | chr1:11247215-11247400 448 | chr1:11268334-11268569 449 | chr1:11320481-11320588 450 | chr1:11321179-11321462 451 | chr1:11329647-11329900 452 | chr1:11357340-11357499 453 | chr1:11407149-11407454 454 | chr1:11413706-11414552 455 | chr1:11427303-11428058 456 | chr1:11433026-11433135 457 | chr1:11434203-11434644 458 | chr1:11442958-11443182 459 | chr1:11447357-11447430 460 | chr1:11451407-11451480 461 | chr1:11458251-11458728 462 | chr1:11458853-11458939 463 | chr1:11478544-11478655 464 | chr1:11507790-11507908 465 | chr1:11516928-11517019 466 | chr1:11517404-11517565 467 | chr1:11549757-11550196 468 | chr1:11551302-11551541 469 | chr1:11565036-11565275 470 | chr1:11566068-11566359 471 | chr1:11566884-11567103 472 | chr1:11573603-11574092 473 | chr1:11578209-11578409 474 | chr1:11580009-11580496 475 | chr1:11580744-11580988 476 | chr1:11585397-11585505 477 | chr1:11588132-11588655 478 | chr1:11600555-11600688 479 | chr1:11638560-11638811 480 | chr1:11650183-11650461 481 | chr1:11652784-11653130 482 | chr1:11657891-11658055 483 | chr1:11659711-11659784 484 | chr1:11660495-11660619 485 | chr1:11731356-11731492 486 | chr1:11739495-11739695 487 | chr1:11742446-11742863 488 | chr1:11750576-11751042 489 | chr1:11765361-11765536 490 | chr1:11784093-11784177 491 | chr1:11788408-11788573 492 | chr1:11795823-11795985 493 | chr1:11842706-11842867 494 | chr1:11895157-11895304 495 | chr1:11895850-11896171 496 | chr1:11911007-11911278 497 | chr1:11931458-11931631 498 | chr1:11937005-11937394 499 | chr1:11978107-11978237 500 | chr1:11979314-11979607 501 | chr1:137255151-137255224 502 | chr1:137278769-137278992 503 | chr1:137289503-137289637 504 | chr1:137290815-137290888 505 | chr1:137298236-137298523 506 | chr1:137302913-137303368 507 | chr1:137312974-137313104 508 | chr1:137324061-137324261 509 | chr1:137325103-137325353 510 | chr1:137330827-137330940 511 | -------------------------------------------------------------------------------- /cobolt/tests/test_load_data.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | import os 4 | import numpy as np 5 | import scipy 6 | from cobolt.utils import SingleData, MultiData, MultiomicDataset 7 | 8 | THIS_DIR = os.path.dirname(os.path.abspath(__file__)) 9 | 10 | 11 | def load_test_data(): 12 | ja = SingleData.from_file( 13 | path=os.path.join(THIS_DIR, "test_data", "joint_a"), 14 | dataset_name="joint", feature_name="a") 15 | jb = SingleData.from_file( 16 | path=os.path.join(THIS_DIR, "test_data", "joint_b"), 17 | dataset_name="joint", feature_name="b") 18 | sa = SingleData.from_file( 19 | path=os.path.join(THIS_DIR, "test_data", "single_a"), 20 | dataset_name="single_a", feature_name="a") 21 | sb = SingleData.from_file( 22 | path=os.path.join(THIS_DIR, "test_data", "single_b"), 23 | dataset_name="single_b", feature_name="b") 24 | return ja, jb, sa, sb 25 | 26 | 27 | class TestSingleData: 28 | def test_construction(self): 29 | feature_name = "a" 30 | ja, jb, sa, sb = load_test_data() 31 | count, feature, barcode = ja.get_data() 32 | assert ja.get_dataset_name() == "joint" 33 | assert count[feature_name].shape == (100, 100) 34 | assert feature[feature_name].shape == (100, ) 35 | assert barcode.shape == (100, ) 36 | assert isinstance(feature[feature_name], np.ndarray) 37 | assert isinstance(barcode, np.ndarray) 38 | assert isinstance(count[feature_name], scipy.sparse.csr.csr_matrix) 39 | 40 | def test_filter_features(self): 41 | feature_name = "a" 42 | ja, jb, sa, sb = load_test_data() 43 | ja.filter_features(min_count=2, min_cell=1) 44 | count, feature, barcode = ja.get_data() 45 | assert count[feature_name].shape == (100, 46) 46 | assert (count[feature_name].sum(axis=0) > 2).all() 47 | assert ((count[feature_name] != 0).sum(axis=0) > 1).all() 48 | assert feature[feature_name].shape == (46, ) 49 | assert barcode.shape == (100, ) 50 | 51 | def test_filter_cells(self): 52 | feature_name = "a" 53 | ja, jb, sa, sb = load_test_data() 54 | ja.filter_cells(min_count=2, min_feature=1) 55 | count, feature, barcode = ja.get_data() 56 | assert count[feature_name].shape == (85, 100) 57 | assert (count[feature_name].sum(axis=1) > 2).all() 58 | assert ((count[feature_name] != 0).sum(axis=1) > 1).all() 59 | assert feature[feature_name].shape == (100, ) 60 | assert barcode.shape == (85, ) 61 | 62 | def test_filter_barcode(self): 63 | feature_name = "a" 64 | ja, jb, sa, sb = load_test_data() 65 | ja.filter_barcode(cells=[ 66 | 'joint~09A_CAGCCCCGCCTT', 67 | 'joint~09A_CGCCTACCATGA' 68 | ]) 69 | count, feature, barcode = ja.get_data() 70 | assert count[feature_name].shape == (2, 100) 71 | assert feature[feature_name].shape == (100, ) 72 | assert barcode.shape == (2, ) 73 | 74 | 75 | class TestMultiData: 76 | def test_construction(self): 77 | ja, jb, sa, sb = load_test_data() 78 | multi = MultiData(ja, jb, sa, sb).get_data() 79 | assert list(multi.keys()) == ['a', 'b'] 80 | assert len(multi['a']['feature']) == 73 81 | assert len(multi['a']['barcode']) == 200 82 | assert len(multi['a']['dataset']) == 200 83 | assert multi['a']['dataset_name'] == ['joint', 'single_a'] 84 | assert multi['a']['counts'].shape == (200, 73) 85 | assert sum(multi['a']['dataset'] == 0) == 100 86 | assert sum(multi['a']['dataset'] == 1) == 100 87 | assert len(multi['b']['feature']) == 500 88 | assert len(multi['b']['barcode']) == 200 89 | assert len(multi['b']['dataset']) == 200 90 | assert multi['b']['dataset_name'] == ['joint', 'single_b'] 91 | assert multi['b']['counts'].shape == (200, 500) 92 | assert sum(multi['b']['dataset'] == 0) == 100 93 | assert sum(multi['b']['dataset'] == 1) == 100 94 | 95 | 96 | class TestDataset: 97 | def test_construction(self): 98 | ja, jb, sa, sb = load_test_data() 99 | multi = MultiomicDataset.from_singledata(ja, jb, sa, sb) 100 | assert len(multi) == 300 101 | assert multi.get_feature_shape() == [73, 500] 102 | assert multi.get_barcode().shape == (300,) 103 | assert multi.get_comb_idx([True, True]).shape == (100,) 104 | assert multi.get_comb_idx([True, False]).shape == (200,) 105 | assert multi.get_comb_idx([False, True]).shape == (200,) 106 | with pytest.raises(ValueError): 107 | multi.get_comb_idx([False, False]) 108 | with pytest.raises(ValueError): 109 | multi.get_comb_idx([False, True, True]) 110 | -------------------------------------------------------------------------------- /cobolt/tests/test_model.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | import os 4 | from cobolt.utils import MultiomicDataset 5 | from cobolt.model import Cobolt 6 | from cobolt.tests.test_load_data import load_test_data 7 | 8 | THIS_DIR = os.path.dirname(os.path.abspath(__file__)) 9 | 10 | 11 | class TestModel: 12 | def test_model(self): 13 | ja, jb, sa, sb = load_test_data() 14 | multi = MultiomicDataset.from_singledata(ja, jb, sa, sb) 15 | model = Cobolt(dataset=multi, n_latent=10) 16 | model.train(num_epochs=1) 17 | latent, barcode = model.get_all_latent(correction=True) 18 | assert latent.shape == (300, 10) 19 | assert barcode.shape == (300,) 20 | model.clustering() 21 | clusters = model.get_clusters() 22 | assert clusters.shape == (300,) 23 | clusters, bcd = model.get_clusters(return_barcode=True) 24 | assert (barcode == bcd).all() 25 | -------------------------------------------------------------------------------- /cobolt/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from cobolt.utils.dataset import MultiomicDataset 2 | from cobolt.utils.data import SingleData, MultiData 3 | 4 | __all__ = ['MultiomicDataset', 'SingleData', 'MultiData'] 5 | -------------------------------------------------------------------------------- /cobolt/utils/data.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from scipy import io, sparse 4 | import os 5 | import pandas as pd 6 | import numpy as np 7 | 8 | 9 | class SingleData(object): 10 | """ 11 | A single modality of a single dataset 12 | 13 | Parameters 14 | ---------- 15 | feature_name 16 | Name of the modality 17 | dataset_name 18 | Name of the dataset 19 | feature 20 | Array of length F containing feature names 21 | count 22 | Matrix of dimension BxF containing data counts 23 | barcode 24 | Array of length B containing cell barcode 25 | """ 26 | def __init__(self, 27 | feature_name: str, 28 | dataset_name: str, 29 | feature: np.ndarray, 30 | count: sparse.csr.csr_matrix, 31 | barcode: np.ndarray): 32 | self.feature_name = feature_name 33 | self.dataset_name = dataset_name 34 | unique_feature, feature_idx = np.unique(feature, return_index=True) 35 | if len(feature) != len(unique_feature): 36 | print("Removing duplicated features.") 37 | feature = unique_feature 38 | count = count[:, feature_idx] 39 | self.feature = feature 40 | self.barcode = np.array([dataset_name + "~" + x for x in barcode]) 41 | self.count = count 42 | self.is_valid() 43 | 44 | @classmethod 45 | def from_file(cls, 46 | path: str, 47 | feature_name: str, 48 | dataset_name: str, 49 | feature_file: str = "features.tsv", 50 | count_file: str = "counts.mtx", 51 | barcode_file: str = "barcodes.tsv", 52 | feature_header=None, 53 | barcode_header=None, 54 | feature_column: int = 0, 55 | barcode_column: int = 0): 56 | """ 57 | Read single modality of a single dataset from files. By default, 58 | the files for feature names, cell barcodes, and the count 59 | matrix are `features.tsv`, `counts.mtx`, and `barcodes.tsv`. 60 | 61 | Parameters 62 | ---------- 63 | path 64 | The path to the directory with data files 65 | feature_name 66 | Name of the modality 67 | dataset_name 68 | Name of the dataset 69 | feature_file 70 | Name of a tab-separated file of the feature names 71 | count_file 72 | Name of the file of the count matrix 73 | barcode_file 74 | Name of a tab-separated file of the barcode 75 | feature_header 76 | Row number to use as column names for the feature file. If `None`, 77 | no column names will be used. 78 | barcode_header 79 | Row number to use as column names for the barcode file. If `None`, 80 | no column names will be used. 81 | feature_column 82 | The name or the index of the column stores the feature name 83 | information in the feature file 84 | barcode_column 85 | The name or the index of the column stores the barcoe name 86 | information in the barcode file 87 | 88 | Returns 89 | ------- 90 | A SingleOmic object 91 | """ 92 | count = io.mmread(os.path.join(path, count_file)).T.tocsr().astype(float) 93 | feature = pd.read_csv( 94 | os.path.join(path, feature_file), 95 | header=feature_header, usecols=[feature_column] 96 | )[0].values.astype('str') 97 | barcode = pd.read_csv( 98 | os.path.join(path, barcode_file), 99 | header=barcode_header, usecols=[barcode_column] 100 | )[0].values.astype('str') 101 | return cls(feature_name, dataset_name, feature, count, barcode) 102 | 103 | def __getitem__(self, items): 104 | x, y = items 105 | return SingleData(self.feature_name, self.dataset_name, self.feature[x], 106 | self.count[x, y], self.barcode[y]) 107 | 108 | def __str__(self): 109 | return "A SingleData object.\n" + \ 110 | "Dataset name: {}. Feature name: {}.\n".format( 111 | self.dataset_name, self.feature_name) + \ 112 | "Number of features: {}. Number of cells {}.".format( 113 | str(len(self.feature)), str(len(self.barcode))) 114 | 115 | def filter_features(self, min_count=10, min_cell=5, upper_quantile=1, lower_quantile=0): 116 | feature_count = np.sum(self.count, axis=0) 117 | feature_n = np.sum(self.count != 0, axis=0) 118 | bool_quality = np.array( 119 | (feature_n > min_cell) & (feature_count > min_count) & 120 | (feature_count >= np.quantile(feature_count, lower_quantile)) & 121 | (feature_count <= np.quantile(feature_count, upper_quantile)) 122 | ).flatten() 123 | self.feature = self.feature[bool_quality] 124 | self.count = self.count[:, bool_quality] 125 | 126 | def filter_cells(self, min_count=10, min_feature=5, upper_quantile=1, lower_quantile=0): 127 | feature_count = np.sum(self.count, axis=1) 128 | feature_n = np.sum(self.count != 0, axis=1) 129 | bool_quality = np.array( 130 | (feature_n > min_feature) & (feature_count > min_count) & 131 | (feature_count >= np.quantile(feature_count, lower_quantile)) & 132 | (feature_count <= np.quantile(feature_count, upper_quantile)) 133 | ).flatten() 134 | self.barcode = self.barcode[bool_quality] 135 | self.count = self.count[bool_quality, :] 136 | 137 | def filter_barcode(self, cells): 138 | bool_cells = np.isin(self.barcode, cells) 139 | self.count = self.count[bool_cells, :] 140 | self.barcode = self.barcode[bool_cells] 141 | 142 | def subset_features(self, feature): 143 | bool_features = np.isin(self.feature, feature) 144 | self.count = self.count[:, bool_features] 145 | self.feature = self.feature[bool_features] 146 | 147 | def rename_features(self, feature): 148 | unique_feature, feature_idx = np.unique(feature, return_index=True) 149 | if len(feature) != len(unique_feature): 150 | print("Removing duplicated features.") 151 | feature = unique_feature 152 | self.count = self.count[:, feature_idx] 153 | self.feature = np.array(feature) 154 | 155 | def get_data(self): 156 | return {self.feature_name: self.count}, {self.feature_name: self.feature}, self.barcode 157 | 158 | def get_dataset_name(self): 159 | return self.dataset_name 160 | 161 | def is_valid(self): 162 | if self.count.shape[0] != self.barcode.shape[0]: 163 | raise ValueError("The dimensions of the count matrix and the barcode array are not consistent.") 164 | if self.count.shape[1] != self.feature.shape[0]: 165 | raise ValueError("The dimensions of the count matrix and the barcode array are not consistent.") 166 | 167 | 168 | class MultiData(object): 169 | 170 | def __init__(self, *single_data): 171 | self.data = {} 172 | for dt in single_data: 173 | ct, ft, bc = dt.get_data() 174 | for mod in ct.keys(): 175 | if mod not in self.data.keys(): 176 | self.data[mod] = { 177 | 'feature': [ft[mod]], 178 | 'barcode': [bc], 179 | 'counts': [ct[mod]], 180 | 'dataset': [dt.get_dataset_name()] 181 | } 182 | else: 183 | self.data[mod]['feature'].append(ft[mod]) 184 | self.data[mod]['barcode'].append(bc) 185 | self.data[mod]['counts'].append(ct[mod]) 186 | self.data[mod]['dataset'].append(dt.get_dataset_name()) 187 | for mod in self.data.keys(): 188 | self.data[mod] = merge_modality(self.data[mod]) 189 | 190 | def get_data(self): 191 | return self.data 192 | 193 | 194 | def merge_modality(dt): 195 | batch = [np.zeros(x.shape) + i for i, x in enumerate(dt['barcode'])] 196 | batch = np.concatenate(batch) 197 | barcode = np.concatenate(dt['barcode']) 198 | 199 | feature = dt['feature'][0] 200 | for f in dt['feature'][1:]: 201 | feature = np.intersect1d(feature, f) 202 | 203 | counts = [] 204 | for i in range(len(dt['counts'])): 205 | common = np.intersect1d(feature, dt['feature'][i], return_indices=True) 206 | counts += [dt['counts'][i][:, common[2]]] 207 | counts = sparse.vstack(counts) 208 | 209 | return { 210 | 'feature': feature, 211 | 'counts': counts, 212 | 'barcode': barcode, 213 | 'dataset': batch, 214 | 'dataset_name': dt['dataset'] 215 | } 216 | -------------------------------------------------------------------------------- /cobolt/utils/dataset.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from scipy import sparse 4 | import random 5 | import itertools 6 | import torch 7 | from torch.utils.data import DataLoader 8 | from cobolt.utils.data import MultiData 9 | 10 | 11 | class MultiomicDataset(torch.utils.data.Dataset): 12 | def __init__(self, dt: MultiData): 13 | self.dt = dt.get_data() 14 | self.omic = list(self.dt.keys()) 15 | self.barcode = self._get_unique_barcode() 16 | self.dataset = self._get_dataset() 17 | self.n_dataset = [np.unique(self.dt[om]['dataset']).shape[0] for om in self.omic] 18 | b_dict = {om: {b: i for i, b in enumerate(self.dt[om]['barcode'])} for om in self.omic} 19 | self.barcode_in_om = {om: {b: (b_dict[om][b] if b in b_dict[om] else None) for b in self.barcode} 20 | for om in self.omic} 21 | 22 | def __len__(self): 23 | """Number of samples in the data""" 24 | return self.barcode.shape[0] 25 | 26 | def __getitem__(self, index: int): 27 | """Generates one sample of data""" 28 | b = self.barcode[index] 29 | dat = [self.dt[om]['counts'][self.barcode_in_om[om][b]] if self.barcode_in_om[om][b] is not None else None 30 | for om in self.omic] 31 | dataset = [self.dt[om]['dataset'][self.barcode_in_om[om][b]] if self.barcode_in_om[om][b] is not None else None 32 | for om in self.omic] 33 | return dat, dataset 34 | 35 | def __str__(self): 36 | n_modality = len(self.omic) 37 | s1 = "A MultiomicDataset object with {} omics:\n".format(n_modality) 38 | s2 = "".join(["- {}: {} features, {} cells, {} batches.\n".format( 39 | om, len(self.dt[om]['feature']), len(self.dt[om]['barcode']), self.n_dataset[i]) 40 | for i, om in enumerate(self.omic)]) 41 | s3 = "Joint cells:\n" 42 | joint_omic = [list(i) for i in itertools.product([False, True], repeat=n_modality) if sum(i) > 1] 43 | s4 = "\n".join(["- {}: {} cells.".format( 44 | ", ".join([om for i, om in enumerate(self.omic) if om_combn[i]]), 45 | len(self.get_comb_idx(om_combn))) 46 | for om_combn in joint_omic]) 47 | return s1 + s2 + s3 + s4 48 | 49 | @classmethod 50 | def from_singledata(cls, *single_data): 51 | return cls(MultiData(*single_data)) 52 | 53 | def _get_unique_barcode(self): 54 | barcode = np.concatenate([self.dt[om]['barcode'] for om in self.omic]) 55 | return np.unique(barcode) 56 | 57 | def _get_dataset(self): 58 | dt_dict = {} 59 | for om in self.omic: 60 | dataset_names = [self.dt[om]['dataset_name'][int(i)] for i in self.dt[om]['dataset']] 61 | for b, d in zip(self.dt[om]['barcode'], dataset_names): 62 | if b not in dt_dict: 63 | dt_dict[b] = d 64 | elif d != dt_dict[b]: 65 | raise ValueError("Duplicate barcode found: {}".format(b)) 66 | return dt_dict 67 | 68 | def get_barcode(self): 69 | return self.barcode 70 | 71 | def get_comb_idx(self, omic_combn): 72 | if not any(omic_combn): 73 | raise ValueError("Omics combination can not be all False.") 74 | if len(omic_combn) != len(self.get_feature_shape()): 75 | raise ValueError( 76 | "omic_combn should be a boolean list of length {}".format( 77 | len(self.get_feature_shape()))) 78 | 79 | bl = [self.dt[om]['barcode'] for om, include in zip(self.omic, omic_combn) if include] 80 | b = bl[0] 81 | for x in bl[1:]: 82 | b = np.intersect1d(b, x) 83 | return np.where(np.isin(self.barcode, b))[0] 84 | 85 | def get_feature_shape(self): 86 | return [self.dt[om]['feature'].shape[0] for om in self.omic] 87 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epurdom/cobolt/cea9a5c6297326aca00a10aeaa198d21b07e4889/docs/README.md -------------------------------------------------------------------------------- /docs/multiome10X.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epurdom/cobolt/cea9a5c6297326aca00a10aeaa198d21b07e4889/docs/multiome10X.py -------------------------------------------------------------------------------- /docs/tutorial.py: -------------------------------------------------------------------------------- 1 | 2 | from cobolt.utils import SingleData, MultiomicDataset 3 | from cobolt.model import Cobolt 4 | import os 5 | 6 | dat_dir = "../../example_data" 7 | 8 | snare_mrna = SingleData.from_file(path=os.path.join(dat_dir, "snare"), 9 | dataset_name="SNARE-seq", 10 | feature_name="GeneExpr", 11 | count_file="gene_counts.mtx", 12 | feature_file="genes.tsv") 13 | snare_mrna.filter_features(upper_quantile=0.99, lower_quantile=0.7) 14 | snare_atac = SingleData.from_file(path=os.path.join(dat_dir, "snare"), 15 | dataset_name="SNARE-seq", 16 | feature_name="ChromAccess", 17 | count_file="peak_counts.mtx", 18 | feature_file="peaks.tsv") 19 | snare_atac.filter_features(upper_quantile=0.99, lower_quantile=0.7) 20 | 21 | 22 | mop_mrna = SingleData.from_file(path=os.path.join(dat_dir, "mrna"), 23 | dataset_name="mRNA", 24 | feature_name="GeneExpr", 25 | feature_file="genes.tsv") 26 | mop_mrna.filter_features(upper_quantile=0.99, lower_quantile=0.7) 27 | 28 | mop_atac = SingleData.from_file(path=os.path.join(dat_dir, "atac"), 29 | dataset_name="ATAC", 30 | feature_name="ChromAccess", 31 | feature_file="peaks.tsv") 32 | mop_atac.filter_features(upper_quantile=0.99, lower_quantile=0.7) 33 | 34 | multi_dt = MultiomicDataset.from_singledata( 35 | mop_mrna, mop_atac, snare_atac, snare_mrna) 36 | print(multi_dt) 37 | 38 | model = Cobolt(dataset=multi_dt, n_latent=10) 39 | model.train(num_epochs=5) 40 | 41 | model.calc_all_latent() 42 | 43 | model.clustering(algo="louvain") 44 | c1 = model.get_clusters("louvain") 45 | model.clustering(algo="leiden", resolution=0.5) 46 | c2 = model.get_clusters("leiden", 0.5) 47 | 48 | model.scatter_plot(reduc="UMAP", correction=True) 49 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="cobolt", # Replace with your own username 8 | version="0.0.1", 9 | author="boyinggong", 10 | author_email="boyinggong@berkeley.edu", 11 | description="A package for joint analysis of multimodal single-cell sequencing data", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/boyinggong/cobolt", 15 | project_urls={ 16 | "Bug Tracker": "https://github.com/boyinggong/cobolt/issues", 17 | }, 18 | classifiers=[ 19 | "Programming Language :: Python :: 3", 20 | "License :: OSI Approved :: MIT License", 21 | "Operating System :: OS Independent", 22 | ], 23 | install_requires=[ 24 | 'numpy', 25 | 'scipy', 26 | 'torch', 27 | 'umap-learn', 28 | 'python-igraph', 29 | 'scikit-learn', 30 | 'xgboost', 31 | 'pandas', 32 | 'seaborn', 33 | 'leidenalg' 34 | ], 35 | packages=setuptools.find_packages(exclude=['cobolt.tests']), 36 | python_requires=">=3.7", 37 | ) 38 | -------------------------------------------------------------------------------- /summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epurdom/cobolt/cea9a5c6297326aca00a10aeaa198d21b07e4889/summary.png --------------------------------------------------------------------------------