├── .gitignore ├── .readthedocs.yml ├── .travis.yml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Pipfile ├── Pipfile.lock ├── README.rst ├── _readme.md ├── astir ├── __init__.py ├── astir.py ├── data │ ├── __init__.py │ ├── data_readers.py │ ├── rds_reader.R │ ├── scdataset.py │ └── test_rds.csv └── models │ ├── __init__.py │ ├── abstract.py │ ├── cellstate.py │ ├── cellstate_recognet.py │ ├── celltype.py │ └── celltype_recognet.py ├── bin └── astir ├── docs ├── Makefile ├── astir.data.rst ├── astir.models.rst ├── astir.rst ├── conf.py ├── development.rst ├── index.rst ├── installation.rst ├── make.bat ├── requirements.txt ├── source │ └── _static │ │ └── figs │ │ └── astir.png └── tutorials │ ├── index.rst │ └── notebooks │ ├── data │ ├── cell-states.csv │ ├── cell-types.csv │ └── sample_data.csv │ ├── data_loading.ipynb │ ├── getting_started.ipynb │ └── img │ ├── celltype_protein_cluster.png │ ├── hdf5_schematics.png │ └── hierarchical_celltype_cluster.png ├── envs └── imc.yml ├── mkdocs.yml ├── mypy.ini ├── pytest.ini ├── requirements.txt ├── scripts ├── build-docs.sh ├── docs-live.sh ├── format.sh ├── lint.sh └── test.sh ├── setup.py └── tests ├── __init__.py ├── conftest.py ├── models ├── __init__.py ├── test_cellstate.py ├── test_celltype.py └── test_scdataset.py ├── output ├── test-data ├── adata_small.h5ad ├── bad_data.csv ├── bad_marker.yml ├── basel_100.loom ├── design.csv ├── jackson-2020-markers.yml ├── sce.csv ├── test-dir-read │ ├── one.csv │ └── two.csv ├── test_data.csv ├── test_rds.csv ├── test_rds.rds ├── test_rds_design.csv └── test_rds_marker.yml ├── test_astir.py ├── test_bin_astir.py ├── test_code_smell.py └── test_notebooks.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Output files of the models 2 | # *.loom 3 | # *.png 4 | # *.hdf5 5 | # *.csv 6 | # *.pt 7 | cellstate_summary.hdf5 8 | celltype_summary.hdf5 9 | docs/tutorials/notebooks/data/a_summary.hdf5 10 | docs/tutorials/notebooks/data/astir_summary.hdf5 11 | docs/tutorials/notebooks/data/test_data_type_assignments.csv 12 | 13 | 14 | 15 | # Compiled python modules. 16 | *.pyc 17 | 18 | # Setuptools distribution folder. 19 | /dist/ 20 | 21 | # Python egg metadata, regenerated from source files by setuptools. 22 | /*.egg-info 23 | 24 | .Rproj.user 25 | .Rhistory 26 | .RData 27 | .Ruserdata 28 | 29 | _build 30 | _static 31 | _templates 32 | 33 | docs/_build/* 34 | 35 | # Byte-compiled / optimized / DLL files 36 | __pycache__/ 37 | *.py[cod] 38 | *$py.class 39 | 40 | # C extensions 41 | *.so 42 | 43 | # Distribution / packaging 44 | .Python 45 | build/ 46 | develop-eggs/ 47 | dist/ 48 | downloads/ 49 | eggs/ 50 | .eggs/ 51 | lib/ 52 | lib64/ 53 | parts/ 54 | sdist/ 55 | var/ 56 | wheels/ 57 | pip-wheel-metadata/ 58 | share/python-wheels/ 59 | *.egg-info/ 60 | .installed.cfg 61 | *.egg 62 | MANIFEST 63 | 64 | # PyInstaller 65 | # Usually these files are written by a python script from a template 66 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 67 | *.manifest 68 | *.spec 69 | 70 | # Installer logs 71 | pip-log.txt 72 | pip-delete-this-directory.txt 73 | 74 | # Unit test / coverage reports 75 | htmlcov/ 76 | .tox/ 77 | .nox/ 78 | .coverage 79 | .coverage.* 80 | .cache 81 | nosetests.xml 82 | coverage.xml 83 | *.cover 84 | *.py,cover 85 | .hypothesis/ 86 | .pytest_cache/ 87 | 88 | # Translations 89 | *.mo 90 | *.pot 91 | 92 | # Django stuff: 93 | *.log 94 | local_settings.py 95 | db.sqlite3 96 | db.sqlite3-journal 97 | 98 | # Flask stuff: 99 | instance/ 100 | .webassets-cache 101 | 102 | # Scrapy stuff: 103 | .scrapy 104 | 105 | # Sphinx documentation 106 | docs/_build/ 107 | 108 | # PyBuilder 109 | target/ 110 | 111 | # Jupyter Notebook 112 | .ipynb_checkpoints 113 | 114 | # IPython 115 | profile_default/ 116 | ipython_config.py 117 | 118 | # pyenv 119 | .python-version 120 | 121 | # pipenv 122 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 123 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 124 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 125 | # install all needed dependencies. 126 | #Pipfile.lock 127 | 128 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 129 | __pypackages__/ 130 | 131 | # Celery stuff 132 | celerybeat-schedule 133 | celerybeat.pid 134 | 135 | # SageMath parsed files 136 | *.sage.py 137 | 138 | # Environments 139 | .env 140 | .venv 141 | env/ 142 | venv/ 143 | ENV/ 144 | env.bak/ 145 | venv.bak/ 146 | 147 | # Spyder project settings 148 | .spyderproject 149 | .spyproject 150 | 151 | # Rope project settings 152 | .ropeproject 153 | 154 | # mkdocs documentation 155 | /site 156 | 157 | # mypy 158 | .mypy_cache/ 159 | .dmypy.json 160 | dmypy.json 161 | 162 | # Pyre type checker 163 | .pyre/ 164 | /test.py 165 | /test.sh 166 | /docs/tutorials/notebooks/cell-states.csv 167 | /docs/tutorials/notebooks/cell-types.csv 168 | /astir/tests/celltype_training_summary.hdf5 169 | /astir/tests/output 170 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | build: 9 | image: latest 10 | 11 | # Build documentation in the docs/ directory with Sphinx 12 | sphinx: 13 | configuration: docs/conf.py 14 | 15 | # Build documentation with MkDocs 16 | #mkdocs: 17 | # configuration: mkdocs.yml 18 | 19 | # Optionally build your docs in additional formats such as PDF 20 | formats: 21 | - pdf 22 | 23 | # Optionally set the version of Python and requirements required to build your docs 24 | python: 25 | version: 3.7 26 | install: 27 | - requirements: docs/requirements.txt -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: focal 2 | 3 | language: python 4 | 5 | cache: pip 6 | 7 | python: 8 | # - "3.6" 9 | # - "3.7" 10 | - "3.8" 11 | # - "nightly" 12 | 13 | # matrix: 14 | # allow_failures: 15 | # - python: "nightly" 16 | 17 | install: 18 | - pip install pipenv --upgrade-strategy=only-if-needed 19 | - pipenv install --dev 20 | - pip install -r requirements.txt 21 | 22 | script: 23 | - bash scripts/test.sh 24 | 25 | after_script: 26 | - bash <(curl -s https://codecov.io/bash) -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [Unreleased] 9 | 10 | ## [0.0.11] - 2020-08-22 11 | 12 | - Conception! 13 | 14 | [Unreleased]: https://https://github.com/camlab-bioml/astir/kieranrcampbell/astir/compare/v0.0.11...HEAD 15 | [0.0.11]: https://https://github.com/camlab-bioml/astir/kieranrcampbell/astir/releases/tag/v0.0.11 16 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | When contributing to this repository, please first discuss the change you wish to make via issue, 4 | email, or any other method with the owners of this repository before making a change. 5 | 6 | Please note we have a code of conduct, please follow it in all your interactions with the project. 7 | 8 | ## First Steps to fix an issue or bug 9 | 10 | - Read the documentation (working on adding more) 11 | - create the minimally reproducible issue 12 | - try to edit the relevant code and see if it fixes it 13 | - submit the fix to the provlem as a pull request 14 | - include an explanation of what you did and why 15 | 16 | ## First steps to contribute new features 17 | 18 | - Create an issue to discuss the feature's scope and its fit for this package 19 | - run pytest to ensure your local version of code passes all unit tests 20 | - try to edit the relevant code and implement your new feature in a backwards compatible manner 21 | - create new tests as you go, and run the test suite as you go 22 | - update the documentation as you go 23 | 24 | ### Please format and lint as you go with the following scripts 25 | 26 | ```bash 27 | scripts/lint.sh 28 | scripts/format.sh 29 | ``` 30 | 31 | ### Requirements to merge code (Pull Request Process) 32 | 33 | - you must include test coverage 34 | - you must update the documentation 35 | - you must run the above scripts to format and line 36 | 37 | ## Pull Request Process 38 | 39 | 1. Ensure you include test coverage for all changes 40 | 2. Ensure your code is formatted properly via the command above 41 | 3. Update the documentation and the README.md with details of changes to the interface, this includes new environment 42 | variables, function names, decorators, etc.. 43 | 4. Increase the version numbers in any examples files and the README.md to the new version that this 44 | Pull Request would represent. The versioning scheme we use is [SemVer](http://semver.org/). 45 | 5. You may merge the Pull Request in once you have the sign-off of another developers, or if you 46 | do not have permission to do that, you may request the reviewer to merge it for you. 47 | 48 | --- 49 | 50 | ## Code of Conduct 51 | 52 | ### Our Pledge 53 | 54 | In the interest of fostering an open and welcoming environment, we as 55 | contributors and maintainers pledge to making participation in our project and 56 | our community a harassment-free experience for everyone, regardless of age, body 57 | size, disability, ethnicity, gender identity and expression, level of experience, 58 | nationality, personal appearance, race, religion, or sexual identity and 59 | orientation. 60 | 61 | ### Our Standards 62 | 63 | Examples of behavior that contributes to creating a positive environment 64 | include: 65 | 66 | - Using welcoming and inclusive language 67 | - Being respectful of differing viewpoints and experiences 68 | - Gracefully accepting constructive criticism 69 | - Focusing on what is best for the community 70 | - Showing empathy towards other community members 71 | 72 | Examples of unacceptable behavior by participants include: 73 | 74 | - The use of sexualized language or imagery and unwelcome sexual attention or 75 | advances 76 | - Trolling, insulting/derogatory comments, and personal or political attacks 77 | - Public or private harassment 78 | - Publishing others' private information, such as a physical or electronic 79 | address, without explicit permission 80 | - Other conduct which could reasonably be considered inappropriate in a 81 | professional setting 82 | 83 | ### Our Responsibilities 84 | 85 | Project maintainers are responsible for clarifying the standards of acceptable 86 | behavior and are expected to take appropriate and fair corrective action in 87 | response to any instances of unacceptable behavior. 88 | 89 | Project maintainers have the right and responsibility to remove, edit, or 90 | reject comments, commits, code, wiki edits, issues, and other contributions 91 | that are not aligned to this Code of Conduct, or to ban temporarily or 92 | permanently any contributor for other behaviors that they deem inappropriate, 93 | threatening, offensive, or harmful. 94 | 95 | ### Scope 96 | 97 | This Code of Conduct applies both within project spaces and in public spaces 98 | when an individual is representing the project or its community. Examples of 99 | representing a project or community include using an official project e-mail 100 | address, posting via an official social media account, or acting as an appointed 101 | representative at an online or offline event. Representation of a project may be 102 | further defined and clarified by project maintainers. 103 | 104 | ### Enforcement 105 | 106 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 107 | reported by contacting the project team at kieranrcampbell@pm.me. All 108 | complaints will be reviewed and investigated and will result in a response that 109 | is deemed necessary and appropriate to the circumstances. The project team is 110 | obligated to maintain confidentiality with regard to the reporter of an incident. 111 | Further details of specific enforcement policies may be posted separately. 112 | 113 | Project maintainers who do not follow or enforce the Code of Conduct in good 114 | faith may face temporary or permanent repercussions as determined by other 115 | members of the project's leadership. 116 | 117 | ### Attribution 118 | 119 | This Code of Conduct is adapted from the [PurpleBooth's Contributing Template][contributing-template-url] 120 | 121 | [contributing-template-url]: https://gist.github.com/PurpleBooth/b24679402957c63ec426/5c4f62c1e50c1e6654e76e873aba3df2b0cdeea2 122 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | tests/test-data/jackson-2020-markers.yml 2 | tests/test-data/test_data.csv 3 | README.rst -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | sphinx = "*" 8 | sphinx-autodoc-typehints = "*" 9 | sphinx-rtd-theme = "*" 10 | autodocsumm = "*" 11 | nbsphinx = "*" 12 | nbsphinx-link = "*" 13 | pytest = "*" 14 | rinohtype = "*" 15 | black = "*" 16 | pytest-cov = "*" 17 | mypy = "*" 18 | isort = "*" 19 | 20 | [packages] 21 | torch = "*" 22 | pandas = "*" 23 | numpy = "*" 24 | nbformat = "*" 25 | pyyaml = "*" 26 | sklearn = "*" 27 | argparse = "*" 28 | matplotlib = "*" 29 | loompy = "*" 30 | tqdm = "*" 31 | anndata = "*" 32 | rootpath = "*" 33 | nbconvert = "*" 34 | seaborn = "*" 35 | jupyter_client = "*" 36 | ipykernel = "*" 37 | ipython = "*" 38 | h5py = "*" 39 | tbb = "*" 40 | FlowCytometryTools = "*" 41 | fastcluster = "*" 42 | tables = "*" 43 | typeguard = "*" 44 | 45 | 46 | 47 | [requires] 48 | python_version = "3.8" 49 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =================================================================================== 2 | astir - Automated cell identity from single-cell multiplexed imaging and proteomics 3 | =================================================================================== 4 | 5 | |Build Status| |PyPI| |Code Style| 6 | 7 | .. |Build Status| image:: https://travis-ci.com/camlab-bioml/astir.svg?branch=master 8 | :target: https://travis-ci.org/camlab-bioml/astir 9 | .. |Code Style| image:: https://img.shields.io/badge/code%20style-black-black 10 | :target: https://github.com/python/black 11 | .. |PyPI| image:: https://img.shields.io/badge/pypi-v2.1-orange 12 | :target: https://pypi.org/project/pypi/ 13 | 14 | 15 | ``astir`` is a modelling framework for the assignment of cell type across a range of single-cell technologies such as Imaging Mass Cytometry (IMC). ``astir`` is built using `pytorch `_ and uses recognition networks for fast minibatch stochastic variational inference. 16 | 17 | Key applications: 18 | 19 | - Automated assignment of cell type and state from highly multiplexed imaging and proteomic data 20 | - Diagnostic measures to check quality of resulting type and state inferences 21 | - Ability to map new data to cell types and states trained on existing data using recognition neural networks 22 | - A range of plotting and data loading utilities 23 | 24 | 25 | .. image:: https://www.camlab.ca/img/astir.png 26 | :align: center 27 | :alt: automated single-cell pathology 28 | 29 | Getting started 30 | --------------------- 31 | 32 | Launch the interactive tutorial: |in collab| |on github| 33 | 34 | .. |in collab| image:: https://camo.githubusercontent.com/52feade06f2fecbf006889a904d221e6a730c194/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 35 | :target: https://colab.research.google.com/github/camlab-bioml/Astir-Vignette/blob/main/astir_tutorial.ipynb 36 | .. |on github| image:: https://img.shields.io/badge/on-github-black 37 | :target: https://github.com/camlab-bioml/Astir-Vignette 38 | 39 | 40 | See the full `documentation `_ and check out the `tutorials `_. 41 | 42 | 43 | Authors 44 | --------------------- 45 | 46 | | Jinyu Hou, Sunyun Lee, Michael Geuenich, Kieran Campbell 47 | | Lunenfeld-Tanenbaum Research Institute & University of Toronto 48 | -------------------------------------------------------------------------------- /_readme.md: -------------------------------------------------------------------------------- 1 | # astir 2 | 3 | ![astir](https://github.com/camlab-bioml/astir/workflows/astir/badge.svg) 4 | 5 | astir: automated assignment of cell type and state (phenotype) from single-cell proteomic data, with a focus on imaging mass cytometry (IMC). 6 | 7 | ![Image of astir](docs/source/_static/figs/astir.png) 8 | 9 | ## Installation 10 | 11 | ```bash 12 | git clone https://github.com/camlab-bioml/astir.git 13 | cd astir 14 | pip install -e . 15 | ``` 16 | 17 | ## Basic usage 18 | 19 | ### Input 20 | 21 | `astir` takes as input a cell-by-gene expression matrix, which can optionally be read in from a csv: 22 | 23 | ```csv 24 | "","EGFR","Ruthenium_1","Ruthenium_2","Ruthenium_3","Ruthenium_4" 25 | "BaselTMA_SP41_186_X5Y4_3679",0.346787047240784,0.962155972321163,0.330768187877474,1.21347557766054,1.26704845953417 26 | "BaselTMA_SP41_153_X7Y5_246",0.833751713754184,1.07555159349581,0.419977137830632,1.36904891724053,1.38510442154998 27 | "BaselTMA_SP41_20_X12Y5_197",0.110005567928629,0.908453513733461,0.301166333489085,1.28738891851379,1.30072755877247 28 | "BaselTMA_SP41_14_X1Y8_84",0.282666026986334,0.865982850277527,0.35037342731126,1.24080330000694,1.26476734524879 29 | ``` 30 | 31 | and a python dictionary or equivalently `yaml` file relating markers to cell types and cell states: 32 | 33 | ```yaml 34 | cell_states: 35 | RTK_signalling: 36 | - Her2 37 | - EGFR 38 | ... 39 | cell_types: 40 | Epithelial (basal): 41 | - E-Cadherin 42 | - pan Cytokeratin 43 | - Cytokeratin 5 44 | - Cytokeratin 14 45 | - Her2 46 | B cells: 47 | - CD45 48 | - CD20 49 | ... 50 | ``` 51 | 52 | 53 | ### Running 54 | 55 | From the command line: 56 | 57 | ```bash 58 | astir expression_mat.csv markers.yml output_assignments.csv 59 | ``` 60 | 61 | From within Python with existing data: 62 | 63 | ```python 64 | from astir import Astir 65 | 66 | a = Astir(expr_df, marker_dict) 67 | a.fit() 68 | 69 | cell_probabilities = a.get_assignments() 70 | ``` 71 | 72 | From within Python reading from csv and yaml: 73 | 74 | ```python 75 | from astir.data_readers import from_csv_yaml 76 | 77 | a = from_csv_yaml('expression_mat.csv', 'markers.yml') 78 | 79 | a.fit() 80 | 81 | cell_probabilities = a.get_assignments() 82 | ``` -------------------------------------------------------------------------------- /astir/__init__.py: -------------------------------------------------------------------------------- 1 | from .astir import Astir 2 | from .data.data_readers import ( 3 | from_anndata_yaml, 4 | from_csv_dir_yaml, 5 | from_csv_yaml, 6 | from_loompy_yaml, 7 | ) 8 | from .data.scdataset import SCDataset 9 | from .models.cellstate import CellStateModel 10 | from .models.celltype import CellTypeModel 11 | 12 | __all__ = ["Astir", "NotClassifiableError"] 13 | -------------------------------------------------------------------------------- /astir/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_readers import ( 2 | from_anndata_yaml, 3 | from_csv_dir_yaml, 4 | from_csv_yaml, 5 | from_loompy_yaml, 6 | ) 7 | from .scdataset import SCDataset 8 | 9 | __all__ = [ 10 | "from_csv_yaml", 11 | "from_csv_dir_yaml", 12 | "from_loompy_yaml", 13 | "from_anndata_yaml", 14 | "SCDataset", 15 | ] 16 | -------------------------------------------------------------------------------- /astir/data/data_readers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | from typing import Any 4 | 5 | import anndata 6 | import loompy 7 | import matplotlib 8 | from matplotlib import MatplotlibDeprecationWarning 9 | import numpy as np 10 | import pandas as pd 11 | import torch 12 | import yaml 13 | from sklearn.preprocessing import OneHotEncoder 14 | 15 | warnings.simplefilter(action="ignore", category=FutureWarning) 16 | warnings.filterwarnings("ignore", category=MatplotlibDeprecationWarning) 17 | 18 | 19 | def from_csv_yaml( 20 | csv_input: str, 21 | marker_yaml: str, 22 | design_csv: str = None, 23 | create_design_mat: bool = True, 24 | random_seed: int = 1234, 25 | dtype: torch.dtype = torch.float64, 26 | sep: str = ',', 27 | usecols: list = None 28 | ) -> Any: 29 | """Create an Astir object from an expression CSV and marker YAML 30 | 31 | :param csv_input: Path to input csv containing expression for cells (rows) by proteins (columns). First column is 32 | cell identifier, and additional column names are gene identifiers. 33 | :param marker_yaml: Path to input YAML file containing marker gene information. Should include cell_type and cell_state 34 | entries. See documention. 35 | :param design_csv: Path to design matrix as a CSV. Rows should be cells, and columns covariates. First column is cell 36 | identifier, and additional column names are covariate identifiers. 37 | :param create_design_mat: Determines whether a design matrix is created. Defaults to True. 38 | :param random_seed: The random seed to be used to initialize variables, 39 | defaults to 1234 40 | :param dtype: datatype of the model parameters, defaults to torch.float64 41 | """ 42 | df_gex = pd.read_csv(csv_input, sep=sep, usecols=usecols, index_col=0) 43 | 44 | design = None 45 | if design_csv is not None and create_design_mat == True: 46 | design = pd.read_csv(design_csv, index_col=0) 47 | with open(marker_yaml, "r") as stream: 48 | marker_dict = yaml.safe_load(stream) 49 | from astir.astir import Astir 50 | 51 | return Astir(df_gex, marker_dict, design, random_seed, dtype=dtype) 52 | 53 | 54 | def from_csv_dir_yaml( 55 | input_dir: str, 56 | marker_yaml: str, 57 | create_design_mat: bool = True, 58 | random_seed: int = 1234, 59 | dtype: torch.dtype = torch.float64, 60 | ) -> Any: 61 | """Create an Astir object a directory containing multiple csv files 62 | 63 | :param input_dir: Path to a directory containing multiple CSV files, each in the format expected by 64 | `from_csv_yaml` 65 | :param marker_yaml: Path to input YAML file containing marker gene information. Should include cell_type and cell_state 66 | entries. See documention. 67 | :param design_csv: Path to design matrix as a CSV. Rows should be cells, and columns covariates. First column is cell 68 | identifier, and additional column names are covariate identifiers 69 | :param create_design_mat: Determines whether a design matrix is created. Defaults to True. 70 | :param random_seed: The random seed to be used to initialize variables, 71 | defaults to 1234 72 | :param dtype: datatype of the model parameters, defaults to torch.float64 73 | """ 74 | # TODO: add text explaining concatenation 75 | # Parse the input directory 76 | csv_files = [ 77 | os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith("csv") 78 | ] 79 | 80 | # Read to gene expression df and parse 81 | dfs = [pd.read_csv(f, index_col=0) for f in csv_files] 82 | df_gex = pd.concat(dfs, axis=0) 83 | 84 | design = None 85 | if create_design_mat == True: 86 | # Construct a sample specific design matrix 87 | design_list = [np.repeat(str(i), dfs[i].shape[0]) for i in range(len(dfs))] 88 | design = ( 89 | OneHotEncoder() 90 | .fit_transform(np.concatenate(design_list, axis=0).reshape(-1, 1)) 91 | .todense() 92 | ) 93 | design = design[:, :-1] # remove final column 94 | design = np.concatenate( 95 | [np.ones((design.shape[0], 1)), design], axis=1 96 | ) # add in intercept! 97 | 98 | with open(marker_yaml, "r") as stream: 99 | marker_dict = yaml.safe_load(stream) 100 | from astir.astir import Astir 101 | 102 | return Astir(df_gex, marker_dict, design, random_seed, dtype) 103 | 104 | 105 | def from_loompy_yaml( 106 | loom_file: str, 107 | marker_yaml: str, 108 | protein_name_attr: str = "protein", 109 | cell_name_attr: str = "cell_name", 110 | batch_name_attr: str = "batch", 111 | create_design_mat: bool = True, 112 | random_seed: int = 1234, 113 | dtype: torch.dtype = torch.float64, 114 | ) -> Any: 115 | """Create an Astir object from a loom file and a marker yaml 116 | 117 | :param loom_file: Path to a loom file, where rows correspond to proteins and columns to cells 118 | :param marker_yaml: Path to input YAML file containing marker gene information. Should include cell_type and cell_state 119 | entries. See documention. 120 | :param protein_name_attr: The attribute (key) in the row attributes that identifies the protein names 121 | (required to match with the marker gene information), defaults to 122 | protein 123 | :param cell_name_attr: The attribute (key) in the column attributes that 124 | identifies the name of each cell, defaults to cell_name 125 | :param batch_name_attr: The attribute (key) in the column attributes that identifies the batch. A design matrix 126 | will be built using this (if present) using a one-hot encoding to 127 | control for batch, defaults to batch 128 | :param create_design_mat: Determines whether a design matrix is created. Defaults to True. 129 | :param random_seed: The random seed to be used to initialize variables, 130 | defaults to 1234 131 | :param dtype: datatype of the model parameters, defaults to torch.float64 132 | :return: An object of class `astir_bash.py.Astir` using data imported from the loom files 133 | """ 134 | # TODO: This function is memory inefficient and goes against the philosophy of loom files. Should be improved 135 | batch_list = None 136 | with loompy.connect(loom_file) as ds: 137 | df_gex = pd.DataFrame(ds[:, :].T) 138 | df_gex.columns = ds.ra[protein_name_attr] 139 | 140 | if cell_name_attr in ds.ca.keys(): 141 | df_gex.index = ds.ca[cell_name_attr] 142 | 143 | if batch_name_attr in ds.ca.keys(): 144 | batch_list = ds.ca[batch_name_attr] 145 | 146 | design = None 147 | 148 | if batch_list is not None and create_design_mat == True: 149 | design = OneHotEncoder().fit_transform(batch_list.reshape(-1, 1)).todense() 150 | design = design[:, :-1] # remove final column 151 | design = np.concatenate([np.ones((design.shape[0], 1)), design], axis=1) 152 | 153 | with open(marker_yaml, "r") as stream: 154 | marker_dict = yaml.safe_load(stream) 155 | from astir.astir import Astir 156 | 157 | return Astir(df_gex, marker_dict, design, random_seed, dtype) 158 | 159 | 160 | def from_anndata_yaml( 161 | anndata_file: str, 162 | marker_yaml: str, 163 | protein_name: str = None, 164 | cell_name: str = None, 165 | batch_name: str = "batch", 166 | create_design_mat: bool = True, 167 | random_seed: int = 1234, 168 | dtype: torch.dtype = torch.float64, 169 | ) -> Any: 170 | """Create an Astir object from an :class:`anndata.Anndata` file and a 171 | marker yaml 172 | 173 | :param anndata_file: Path to an :class:`anndata.Anndata` `h5py` file 174 | :param marker_yaml: Path to input YAML file containing marker gene information. Should include cell_type and cell_state 175 | entries. See documention. 176 | :param protein_name: The column of `adata.var` containing protein names. If this is none, defaults to `adata.var_names` 177 | :param cell_name: The column of `adata.obs` containing cell names. If this is none, defaults to `adata.obs_names` 178 | :param batch_name: The column of `adata.obs` containing batch names. A design matrix 179 | will be built using this (if present) using a one-hot encoding to 180 | control for batch, defaults to 'batch' 181 | :param create_design_mat: Determines whether a design matrix is created. Defaults to True. 182 | :param random_seed: The random seed to be used to initialize variables, 183 | defaults to 1234 184 | :param dtype: datatype of the model parameters, defaults to torch.float64 185 | :return: An object of class `astir_bash.py.Astir` using data imported from the loom files 186 | """ 187 | # TODO: This function is memory inefficient and goes against the philosophy of loom files. Should be improved 188 | batch_list = None 189 | 190 | ad = anndata.read_h5ad(anndata_file) 191 | 192 | df_gex = pd.DataFrame(ad.X) 193 | 194 | if protein_name is not None: 195 | df_gex.columns = ad.var[protein_name] 196 | else: 197 | df_gex.columns = ad.var_names 198 | 199 | if cell_name is not None: 200 | df_gex.index = ad.obs[cell_name] 201 | else: 202 | df_gex.index = ad.obs_names 203 | 204 | if batch_name is not None: 205 | batch_list = ad.obs[batch_name] 206 | 207 | design = None 208 | 209 | if batch_list is not None and create_design_mat == True: 210 | design = ( 211 | OneHotEncoder() 212 | .fit_transform(batch_list.to_numpy().reshape(-1, 1)) 213 | .todense() 214 | ) 215 | design = design[:, :-1] # remove final column 216 | design = np.concatenate([np.ones((design.shape[0], 1)), design], axis=1) 217 | 218 | with open(marker_yaml, "r") as stream: 219 | marker_dict = yaml.safe_load(stream) 220 | from astir.astir import Astir 221 | 222 | return Astir(df_gex, marker_dict, design, random_seed, dtype) 223 | -------------------------------------------------------------------------------- /astir/data/rds_reader.R: -------------------------------------------------------------------------------- 1 | local({r <- getOption("repos") 2 | r["CRAN"] <- "http://cran.r-project.org" 3 | options(repos=r) 4 | }) 5 | if("argparse" %in% rownames(installed.packages()) == FALSE) {install.packages("argparse")} 6 | if("SingleCellExperiment" %in% rownames(installed.packages()) == FALSE) { 7 | if("BiocManager" %in% rownames(installed.packages()) == FALSE) {install.packages("BiocManager")} 8 | BiocManager::install("SingleCellExperiment") 9 | } 10 | 11 | suppressPackageStartupMessages({ 12 | library(SingleCellExperiment) 13 | library(argparse) 14 | }) 15 | options(warn=-1) 16 | 17 | parser <- ArgumentParser(description='Input .rds file to be processed') 18 | parser$add_argument('rds_in', type='character', help='input rds file') 19 | parser$add_argument('csv_out', type='character', help='output csv file') 20 | parser$add_argument('--assay', type='character', help='assay') 21 | parser$add_argument('--design_col', type='character', help='design column') 22 | parser$add_argument('--design_csv', type='character', help='output design csv file') 23 | parser$add_argument('--winsorize', type='character', help='the winsorize limit will be c(, 1-)') 24 | args <- parser$parse_args() 25 | 26 | sce <- readRDS(args$rds_in) 27 | 28 | winsorize_one <- function(y, 29 | w_limits) { 30 | q <- quantile(y, p = w_limits) 31 | y[y < q[1]] <- q[1] 32 | y[y > q[2]] <- q[2] 33 | y 34 | } 35 | 36 | winsorize <- function(sce, 37 | exprs_values = "logcounts", 38 | w_limits = c(0.05, 0.95)) { 39 | ## Save unwinsorized expression values 40 | assay(sce, paste0(exprs_values, "_unwinsorized")) <- assay(sce, exprs_values) 41 | 42 | assay(sce, exprs_values) <- t(apply(assay(sce, exprs_values), 43 | 1, 44 | winsorize_one, 45 | w_limits)) 46 | sce 47 | } 48 | 49 | win = as.numeric(args$winsorize) 50 | for (channel in rownames(sce)) { 51 | sce[channel, ] = winsorize(sce[channel, ], exprs_values=args$assay, w_limits=c(win, 1-win)) 52 | } 53 | 54 | # print(sce) 55 | sce_df <- t(data.frame(assay(sce, args$assay))) 56 | write.csv(sce_df, args$csv_out, row.names=TRUE) 57 | 58 | design = args$design_col 59 | if (design != "") { 60 | mm <- as.data.frame(colData(sce)) 61 | ind = which(colnames(mm) == design) 62 | colnames(mm)[ind] <- 'target_col' 63 | # print(head(mm)) 64 | design_mm <- model.matrix(~target_col, mm) 65 | if (args$design_csv != "") { 66 | write.csv(design_mm, args$design_csv, row.names=TRUE) 67 | } 68 | } 69 | 70 | # Rscript rds_reader.R ../../tests/test-data/test_rds.rds ../../tests/test-data/test_rds.csv --assay logcounts --winsorize 0.005 71 | -------------------------------------------------------------------------------- /astir/data/scdataset.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import Any, Dict, List, Optional, Tuple, Union 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import torch 7 | from sklearn.preprocessing import StandardScaler 8 | from torch.utils.data import DataLoader, Dataset 9 | 10 | 11 | class SCDataset(Dataset): 12 | """Container for single-cell proteomic data in the form of 13 | a pytorch dataset 14 | 15 | :param expr_input: Input expression data. See details :`expr_input` is either a `pd.DataFrame` 16 | or a three-element `tuple`. When it is `pd.DataFrame`, its index and column should indicate the cell 17 | name and feature name of the dataset; when it is a three-element `tuple`, it should be in the form 18 | of `Tuple[Union[np.array, torch.Tensor], List[str], List[str]]` and its first element should 19 | be the actual dataset as either `np.array` or `torch.tensor`, the second element should be 20 | a list containing the name of the columns or the names of features, the third element should be a 21 | list containing the name of the indices or the names of the cells.: 22 | :param marker_dict: Marker dictionary containing cell type and 23 | information. See details :The dictionary maps the name of cell type/state to protein features. : 24 | :param design: A design matrix 25 | :param include_other_column: Should an additional 'other' column be included? 26 | :param dtype: torch datatype of the model 27 | """ 28 | 29 | def __init__( 30 | self, 31 | expr_input: Union[ 32 | pd.DataFrame, Tuple[Union[np.array, torch.Tensor], List[str], List[str]] 33 | ], 34 | marker_dict: Dict[str, List[str]], 35 | include_other_column: bool, 36 | design: Optional[Union[np.array, pd.DataFrame]] = None, 37 | dtype: torch.dtype = torch.float64, 38 | device: torch.device = torch.device("cpu"), 39 | ) -> None: 40 | """Initialize an SCDataset object.""" 41 | self._dtype = dtype 42 | self._marker_dict = marker_dict 43 | self._m_features = sorted( 44 | list(set([l for s in marker_dict.values() for l in s])) 45 | ) 46 | self._classes = list(marker_dict.keys()) 47 | 48 | self._device = device 49 | ## sanitize features 50 | if len(self._classes) <= 1 and include_other_column: 51 | raise NotClassifiableError( 52 | "Classification failed. There " 53 | + "should be at least two cell classes to classify the data into." 54 | ) 55 | self._marker_mat = self._construct_marker_mat( 56 | include_other_column=include_other_column 57 | ) 58 | 59 | if isinstance(expr_input, pd.DataFrame): 60 | self._exprs = self._process_df_input(expr_input) 61 | self._expr_features = list(expr_input.columns) 62 | self._cell_names = list(expr_input.index) 63 | elif isinstance(expr_input, tuple): 64 | self._expr_features = expr_input[1] 65 | self._cell_names = expr_input[2] 66 | self._exprs = self._process_tp_input(expr_input[0]) 67 | self._design = self._fix_design(design) 68 | ## sanitize df 69 | if self._exprs.shape[0] <= 0: 70 | raise NotClassifiableError( 71 | "Classification failed. There " 72 | + "should be at least one row of data to be classified." 73 | ) 74 | # adding epsilon to avoid NaN's down the line, which arise due to normalization when the stdev is 0 75 | self._exprs_mean = self._exprs.mean(0) + 1e-6 76 | self._exprs_std = self._exprs.std(0) + 1e-6 77 | 78 | def _process_df_input(self, df_input: pd.DataFrame) -> torch.Tensor: 79 | """Processes input as pd.DataFrame and convert it into torch.Tensor 80 | 81 | :param df_input: the input 82 | :raises NotClassifiableError: raised when there is no overlap between the 83 | data and the marker 84 | :return: the processed input as a torch.Tensor 85 | """ 86 | try: 87 | Y_np = df_input[self._m_features].values 88 | return torch.from_numpy(Y_np).to(device=self._device, dtype=self._dtype) 89 | except (KeyError): 90 | raise NotClassifiableError( 91 | "Classification failed. There's no " 92 | + "overlap between marked features and expression features for " 93 | + "the classification of cell type/state." 94 | ) 95 | 96 | def _process_tp_input(self, in_data: Union[torch.Tensor, np.array]) -> torch.Tensor: 97 | """Process the input as Tuple[np.array, np.array, np.array] and convert it 98 | to torch.Tensor. 99 | 100 | :param in_data: input as a np.array or torch.tensor 101 | :raises NotClassifiableError: raised when there is no overlap between marked 102 | features and expression feature. 103 | :return: the processed input as a torch.Tensor 104 | """ 105 | ind = [ 106 | self._expr_features.index(name) 107 | for name in self._m_features 108 | if name in self._expr_features 109 | ] 110 | if len(ind) <= 0: 111 | raise NotClassifiableError( 112 | "Classification failed. There's no " 113 | + "overlap between marked features and expression features for " 114 | + "the classification of cell type/state." 115 | ) 116 | if len(ind) < len(self._m_features): 117 | warnings.warn("Classified features are less than marked features.") 118 | Y_np = in_data[:, ind] 119 | if torch.is_tensor(Y_np): 120 | return Y_np.to(device=self._device, dtype=self._dtype) 121 | return torch.from_numpy(Y_np).to(device=self._device, dtype=self._dtype) 122 | 123 | def _construct_marker_mat(self, include_other_column: bool) -> torch.Tensor: 124 | """Construct a marker matrix. 125 | 126 | :param include_other_column: indicates whether or not include other columns. 127 | :return: A marker matrix. The rows are features and the coloumns are 128 | the corresponding classes (type/state). 129 | """ 130 | G = self.get_n_features() 131 | C = self.get_n_classes() 132 | 133 | marker_mat = torch.zeros( 134 | (G, C + 1 if include_other_column else C), dtype=self._dtype 135 | ).to(self._device) 136 | for g, feature in enumerate(self._m_features): 137 | for c, cell_class in enumerate(self._classes): 138 | if feature in self._marker_dict[cell_class]: 139 | marker_mat[g, c] = 1.0 140 | return marker_mat 141 | 142 | def __len__(self) -> int: 143 | """Length of the input file 144 | 145 | :return: total number of cells 146 | """ 147 | # N 148 | return self._exprs.shape[0] 149 | 150 | def __getitem__( 151 | self, idx: Union[slice, int] 152 | ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 153 | """Returns the protein expression of the indexed cell on the SCDataset 154 | object 155 | 156 | :param idx: the index of the cell 157 | :return: raw protein expression, normalized protein expression, 158 | sanitized design matrix of the cell at index 159 | """ 160 | y = self._exprs[idx, :] 161 | x = (y - self._exprs_mean) / self._exprs_std 162 | return y, x, self._design[idx, :] 163 | 164 | def _fix_design(self, design: Union[np.array, pd.DataFrame]) -> torch.Tensor: 165 | """Sanitize the design matrix. 166 | 167 | :param design: the unsanitized design matrix 168 | :raises NotClassifiableError: raised when the design matrix has 169 | different number of rows from the expression data 170 | :return: the sanitized design matrix 171 | """ 172 | d = None 173 | if design is None: 174 | d = torch.ones((self._exprs.shape[0], 1)).to( 175 | device=self._device, dtype=self._dtype 176 | ) 177 | else: 178 | if isinstance(design, pd.DataFrame): 179 | design = design.values 180 | d = torch.from_numpy(design).to(device=self._device, dtype=self._dtype) 181 | 182 | if d.shape[0] != self._exprs.shape[0]: 183 | raise NotClassifiableError( 184 | "Number of rows of design matrix " 185 | + "must equal number of rows of expression data" 186 | ) 187 | return d 188 | 189 | def rescale(self) -> None: 190 | """Normalize the expression data.""" 191 | self._exprs = self._exprs / (self.get_sigma()) 192 | 193 | def get_dtype(self) -> torch.dtype: 194 | """Get the dtype of the `SCDataset`. 195 | 196 | :return: `self._dtype` 197 | :rtype: torch.dtype 198 | """ 199 | return self._dtype 200 | 201 | def get_exprs(self) -> torch.Tensor: 202 | """Return the expression data as a :class:`torch.Tensor`.""" 203 | return self._exprs 204 | 205 | def get_exprs_df(self) -> pd.DataFrame: 206 | """Return the expression data as a :class:`pandas.DataFrame`.""" 207 | df = pd.DataFrame(self._exprs.detach().cpu().numpy()) 208 | df.index = self.get_cell_names() 209 | df.columns = self.get_features() 210 | return df 211 | 212 | def get_marker_mat(self) -> torch.Tensor: 213 | """Return the marker matrix as a :class:`torch.Tensor`.""" 214 | return self._marker_mat 215 | 216 | def get_mu(self) -> torch.Tensor: 217 | """Get the mean expression of each protein as a :class:`torch.Tensor`.""" 218 | return self._exprs_mean 219 | 220 | def get_sigma(self) -> torch.Tensor: 221 | """Get the standard deviation of each protein 222 | 223 | :return: standard deviation of each protein 224 | """ 225 | return self._exprs_std 226 | 227 | def get_n_classes(self) -> int: 228 | """Get the number of 'classes': either the number of cell types or cell states.""" 229 | return len(self._classes) 230 | 231 | def get_n_cells(self) -> int: 232 | """Get the number of cells: either the number of cell types or cell states.""" 233 | return len(self.get_cell_names()) 234 | 235 | def get_n_features(self) -> int: 236 | """Get the number of features (proteins).""" 237 | return len(self._m_features) 238 | 239 | def get_features(self) -> List[str]: 240 | """Get the features (proteins). 241 | 242 | :return: return self._m_features 243 | :rtype: List[str] 244 | """ 245 | return self._m_features 246 | 247 | def get_cell_names(self) -> List[str]: 248 | """Get the cell names. 249 | 250 | :return: return self._cell_names 251 | :rtype: List[str] 252 | """ 253 | return self._cell_names 254 | 255 | def get_classes(self) -> List[str]: 256 | """Get the cell types/states. 257 | 258 | :return: return self._classes 259 | :rtype: List[str] 260 | """ 261 | return self._classes 262 | 263 | def get_design(self) -> torch.Tensor: 264 | """Get the design matrix. 265 | 266 | :return: return self._design 267 | :rtype: torch.Tensor 268 | """ 269 | return self._design 270 | 271 | def normalize( 272 | self, 273 | percentile_lower: float = 0, 274 | percentile_upper: float = 99.9, 275 | cofactor: float = 5.0, 276 | ) -> None: 277 | """Normalize the expression data 278 | 279 | This performs a two-step normalization: 280 | 1. A `log(1+x)` transformation to the data 281 | 2. Winsorizes to (`percentile_lower`, `percentile_upper`) 282 | 283 | :param percentile_lower: the lower bound percentile for 284 | winsorization, defaults to 0 285 | :param percentil_upper: the upper bound percentile for winsorization, 286 | defaults to 99.9 287 | :param cofactor: a cofactor constant, defaults to 5.0 288 | """ 289 | 290 | with torch.no_grad(): 291 | exprs = self.get_exprs().cpu().numpy() 292 | exprs = np.arcsinh(exprs / cofactor) 293 | q_low = np.percentile(exprs, (percentile_lower), axis=0) 294 | q_high = np.percentile(exprs, (percentile_upper), axis=0) 295 | 296 | for g in range(exprs.shape[1]): 297 | exprs[:, g][exprs[:, g] < q_low[g]] = q_low[g] 298 | exprs[:, g][exprs[:, g] > q_high[g]] = q_high[g] 299 | 300 | self._exprs = torch.tensor(exprs) 301 | 302 | def get_mu_init(self, n_putative_cells: int = 10) -> np.ndarray: 303 | """Intelligent initialization for mu parameters 304 | 305 | See manuscript for details 306 | 307 | :param n_putative_cells: Number of cells to guess as given cell type 308 | """ 309 | df_exprs = self.get_exprs_df() 310 | 311 | df_scaled = df_exprs.copy() 312 | scaler = StandardScaler() 313 | df_scaled[df_scaled.columns] = scaler.fit_transform( 314 | df_scaled[df_scaled.columns] 315 | ) 316 | df_scaled.head() 317 | 318 | putative_cells = {} 319 | 320 | for cell_type, type_markers in self._marker_dict.items(): 321 | scored_by_marker = df_scaled[type_markers].mean(1) 322 | putative_cells[cell_type] = scored_by_marker.nlargest( 323 | n_putative_cells 324 | ).index 325 | 326 | mean_inits = {} 327 | 328 | for feature in self._m_features: 329 | ## List of cell types that do not contain feature 330 | celltypes_no_feature = [ 331 | c for c in self._classes if not feature in self._marker_dict[c] 332 | ] 333 | indices_to_use = [putative_cells[ct] for ct in celltypes_no_feature] 334 | idx = indices_to_use[0] 335 | for i in range(1, len(indices_to_use)): 336 | idx = idx.union(indices_to_use[i]) 337 | mean_inits[feature] = df_exprs.loc[idx][feature].mean() 338 | 339 | # adding an epsilon to avoid NaN's when logarithmizing during model initialization 340 | mean_init = pd.Series(mean_inits).to_numpy() + 1e-6 341 | 342 | return mean_init 343 | 344 | 345 | class NotClassifiableError(RuntimeError): 346 | pass 347 | -------------------------------------------------------------------------------- /astir/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .abstract import AstirModel 2 | from .cellstate import CellStateModel 3 | from .cellstate_recognet import StateRecognitionNet 4 | from .celltype import CellTypeModel 5 | from .celltype_recognet import TypeRecognitionNet 6 | 7 | __all__ = [ 8 | "CellTypeModel", 9 | "CellStateModel", 10 | "AstirModel", 11 | "TypeRecognitionNet", 12 | "StateRecognitionNet", 13 | ] 14 | -------------------------------------------------------------------------------- /astir/models/abstract.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import Dict, List, Optional, Tuple, Union 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import torch 7 | 8 | from astir.data import SCDataset 9 | 10 | 11 | class AstirModel: 12 | """Abstract class to perform statistical inference to assign. This module is the super class of 13 | `CellTypeModel` and `CellStateModel` and is not supposed to be instantiated. 14 | """ 15 | 16 | def __init__( 17 | self, 18 | dset: Optional[SCDataset], 19 | random_seed: int, 20 | dtype: torch.dtype, 21 | device: torch.device = torch.device("cpu"), 22 | ) -> None: 23 | 24 | if not isinstance(random_seed, int): 25 | raise NotClassifiableError("Random seed is expected to be an integer.") 26 | torch.manual_seed(random_seed) 27 | np.random.seed(random_seed) 28 | 29 | if dtype != torch.float32 and dtype != torch.float64: 30 | raise NotClassifiableError( 31 | "dtype must be one of torch.float32 and torch.float64." 32 | ) 33 | elif dset is not None and dtype != dset.get_dtype(): 34 | raise NotClassifiableError("dtype must be the same as `dset`.") 35 | self._dtype: torch.dtype = dtype 36 | self._data: Dict[str, torch.Tensor] = {} 37 | self._variables: Dict[str, torch.Tensor] = {} 38 | self._losses: torch.Tensor = torch.tensor([], dtype=self._dtype) 39 | self._assignment: pd.DataFrame = pd.DataFrame() 40 | 41 | self._dset = dset 42 | # self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 43 | self._device = device 44 | self._is_converged = False 45 | 46 | def get_losses(self) -> torch.Tensor: 47 | """Getter for losses. 48 | 49 | :return: self.losses 50 | """ 51 | if len(self._losses) == 0: 52 | raise Exception("The model has not been trained yet") 53 | return self._losses 54 | 55 | def get_scdataset(self) -> SCDataset: 56 | """Getter for the `SCDataset`. 57 | 58 | :return: `self._dset` 59 | """ 60 | if self._dset is None: 61 | raise Exception("the dataset is not provided") 62 | return self._dset 63 | 64 | def get_data(self) -> Dict[str, torch.Tensor]: 65 | """Get model data 66 | 67 | :return: data 68 | """ 69 | if self._data == {}: 70 | raise Exception("The model has not been initialized yet") 71 | return self._data 72 | 73 | def get_variables(self) -> Dict[str, torch.Tensor]: 74 | """Returns all variables 75 | 76 | :return: self._variables 77 | """ 78 | if self._variables == {}: 79 | raise Exception("The model has not been initialized yet") 80 | return self._variables 81 | 82 | def is_converged(self) -> bool: 83 | """Returns True if the model converged 84 | 85 | :return: self._is_converged 86 | """ 87 | return self._is_converged 88 | 89 | def get_assignment(self) -> pd.DataFrame: 90 | """Get the final assignment of the dataset. 91 | 92 | :return: the final assignment of the dataset 93 | """ 94 | if self._assignment.shape == (0, 0): 95 | raise Exception("The model has not been trained yet") 96 | return self._assignment 97 | 98 | def _param_init(self) -> None: 99 | """Initializes parameters and design matrices.""" 100 | raise NotImplementedError("AbstractModel is not supposed to be instantiated.") 101 | 102 | def _forward( 103 | self, Y: torch.Tensor, X: torch.Tensor, design: torch.Tensor 104 | ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor, torch.Tensor]]: 105 | """One forward pass""" 106 | raise NotImplementedError("AbstractModel is not supposed to be instantiated.") 107 | 108 | def fit( 109 | self, 110 | max_epochs: int, 111 | learning_rate: float, 112 | batch_size: int, 113 | delta_loss: float, 114 | delta_loss_batch: int, 115 | msg: str, 116 | ) -> None: 117 | """Runs train loops until the convergence reaches delta_loss for 118 | delta_loss_batch sizes or for max_epochs number of times 119 | """ 120 | raise NotImplementedError("AbstractModel is not supposed to be instantiated.") 121 | 122 | 123 | class NotClassifiableError(RuntimeError): 124 | """Raised when the input data is not classifiable.""" 125 | 126 | pass 127 | -------------------------------------------------------------------------------- /astir/models/cellstate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Cell State Model 3 | """ 4 | import warnings 5 | from collections import OrderedDict 6 | from typing import Dict, Generator, List, Optional, Tuple, Union 7 | 8 | import h5py 9 | import numpy as np 10 | import pandas as pd 11 | import torch 12 | from torch.utils.data import DataLoader 13 | from tqdm import trange 14 | 15 | from astir.data import SCDataset 16 | 17 | from .abstract import AstirModel 18 | from .cellstate_recognet import StateRecognitionNet 19 | 20 | 21 | class CellStateModel(AstirModel): 22 | """Class to perform statistical inference to on the activation 23 | of states (pathways) across cells 24 | 25 | :param dset: the input gene expression dataset, defaults to None 26 | :param const: See parameter ``const`` in 27 | :func:`astir.models.StateRecognitionNet`, defaults to 2 28 | :param dropout_rate: See parameter ``dropout_rate`` in 29 | :func:`astir.models.StateRecognitionNet`, defaults to 0 30 | :param batch_norm: See parameter ``batch_norm`` in 31 | :func:`astir.models.StateRecognitionNet`, defaults to False 32 | :param random_seed: the random seed number to reproduce results, defaults to 42 33 | :param dtype: torch datatype to use in the model, defaults to torch.float64 34 | :param device: torch.device's cpu or gpu, defaults to torch.device("cpu") 35 | """ 36 | 37 | def __init__( 38 | self, 39 | dset: SCDataset = None, 40 | const: int = 2, 41 | dropout_rate: float = 0, 42 | batch_norm: bool = False, 43 | random_seed: int = 42, 44 | dtype: torch.dtype = torch.float64, 45 | device: torch.device = torch.device("cpu"), 46 | ) -> None: 47 | super().__init__(dset, random_seed, dtype, device) 48 | 49 | # Setting random seeds 50 | self.random_seed = random_seed 51 | torch.manual_seed(self.random_seed) 52 | np.random.seed(self.random_seed) 53 | 54 | self._optimizer: Optional[torch.optim.Adam] = None 55 | self.const, self.dropout_rate, self.batch_norm = const, dropout_rate, batch_norm 56 | if self._dset is not None: 57 | self._param_init() 58 | 59 | # Convergence flag 60 | self._is_converged = False 61 | 62 | def _param_init(self) -> None: 63 | """Initializes sets of parameters""" 64 | if self._dset is None: 65 | raise Exception("the dataset is not provided") 66 | N = len(self._dset) 67 | C = self._dset.get_n_classes() 68 | G = self._dset.get_n_features() 69 | 70 | initializations = { 71 | "log_sigma": torch.log(self._dset.get_sigma().mean()), 72 | "mu": torch.reshape(self._dset.get_mu(), (1, -1)), 73 | } 74 | 75 | # Include beta or not 76 | d = torch.distributions.Uniform( 77 | torch.tensor(0.0, dtype=self._dtype), torch.tensor(1.5, dtype=self._dtype) 78 | ) 79 | initializations["log_w"] = torch.log(d.sample((C, G))) 80 | 81 | self._variables = { 82 | n: i.to(self._device).detach().clone().requires_grad_() 83 | for (n, i) in initializations.items() 84 | } 85 | 86 | self._data = { 87 | "rho": self._dset.get_marker_mat().T.to(self._device), 88 | } 89 | 90 | self._recog = StateRecognitionNet( 91 | C, 92 | G, 93 | const=self.const, 94 | dropout_rate=self.dropout_rate, 95 | batch_norm=self.batch_norm, 96 | ).to(device=self._device, dtype=self._dtype) 97 | 98 | def load_hdf5(self, hdf5_name: str) -> None: 99 | """Initializes Cell State Model from a hdf5 file type 100 | 101 | :param hdf5_name: file path 102 | """ 103 | self._assignment = pd.read_hdf( 104 | hdf5_name, "cellstate_model/cellstate_assignments" 105 | ) 106 | with h5py.File(hdf5_name, "r") as f: 107 | grp = f["cellstate_model"] 108 | param = grp["parameters"] 109 | self._variables = { 110 | "mu": torch.tensor(np.array(param["mu"])), 111 | "log_sigma": torch.tensor(np.array(param["log_sigma"])), 112 | "log_w": torch.tensor(np.array(param["log_w"])), 113 | } 114 | self._data = {"rho": torch.tensor(np.array(param["rho"]))} 115 | self._losses = torch.tensor(np.array(grp["losses"]["losses"])) 116 | 117 | rec = grp["recog_net"] 118 | hidden1_W = torch.tensor(np.array(rec["linear1.weight"])) 119 | hidden2_W = torch.tensor(np.array(rec["linear2.weight"])) 120 | hidden3_mu_W = torch.tensor(np.array(rec["linear3_mu.weight"])) 121 | hidden3_std_W = torch.tensor(np.array(rec["linear3_std.weight"])) 122 | state_dict = { 123 | "linear1.weight": hidden1_W, 124 | "linear1.bias": torch.tensor(np.array(rec["linear1.bias"])), 125 | "linear2.weight": hidden2_W, 126 | "linear2.bias": torch.tensor(np.array(rec["linear2.bias"])), 127 | "linear3_mu.weight": hidden3_mu_W, 128 | "linear3_mu.bias": torch.tensor(np.array(rec["linear3_mu.bias"])), 129 | "linear3_std.weight": hidden3_std_W, 130 | "linear3_std.bias": torch.tensor(np.array(rec["linear3_std.bias"])), 131 | } 132 | state_dict = OrderedDict(state_dict) 133 | self._recog = StateRecognitionNet( 134 | hidden3_mu_W.shape[0], 135 | hidden1_W.shape[1], 136 | const=self.const, 137 | dropout_rate=self.dropout_rate, 138 | batch_norm=self.batch_norm, 139 | ).to(device=self._device, dtype=self._dtype) 140 | self._recog.load_state_dict(state_dict) 141 | self._recog.eval() 142 | 143 | def _loss_fn( 144 | self, 145 | mu_z: torch.Tensor, 146 | std_z: torch.Tensor, 147 | z_sample: torch.Tensor, 148 | y_in: torch.Tensor, 149 | ) -> torch.Tensor: 150 | """Returns the calculated loss 151 | 152 | :param mu_z: the predicted mean of z 153 | :param std_z: the predicted standard deviation of z 154 | :param z_sample: the sampled z values 155 | :param y_in: the input data 156 | :return: the loss 157 | """ 158 | S = y_in.shape[0] 159 | 160 | # log posterior q(z) approx p(z|y) 161 | q_z_dist = torch.distributions.Normal(loc=mu_z, scale=torch.exp(std_z)) 162 | log_q_z = q_z_dist.log_prob(z_sample) 163 | 164 | # log likelihood p(y|z) 165 | rho_w = torch.mul(self._data["rho"], torch.exp(self._variables["log_w"])) 166 | mean = self._variables["mu"] + torch.matmul(z_sample, rho_w) 167 | std = torch.exp(self._variables["log_sigma"]).reshape(1, -1) 168 | p_y_given_z_dist = torch.distributions.Normal(loc=mean, scale=std) 169 | log_p_y_given_z = p_y_given_z_dist.log_prob(y_in) 170 | 171 | # log prior p(z) 172 | p_z_dist = torch.distributions.Normal(0, 1) 173 | log_p_z = p_z_dist.log_prob(z_sample) 174 | 175 | loss = (1 / S) * ( 176 | torch.sum(log_q_z) - torch.sum(log_p_y_given_z) - torch.sum(log_p_z) 177 | ) 178 | 179 | return loss 180 | 181 | def _forward( 182 | self, 183 | Y: Optional[torch.Tensor], 184 | X: Optional[torch.Tensor] = None, 185 | design: Optional[torch.Tensor] = None, 186 | ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 187 | """One forward pass 188 | 189 | :param Y: dataset to do forward pass on 190 | :return: mu_z, std_z, z_sample 191 | """ 192 | mu_z, std_z = self._recog(Y) 193 | 194 | std = torch.exp(std_z) 195 | eps = torch.randn_like(std) 196 | z_sample = eps * std + mu_z 197 | 198 | return mu_z, std_z, z_sample 199 | 200 | def fit( 201 | self, 202 | max_epochs: int = 50, 203 | learning_rate: float = 1e-3, 204 | batch_size: int = 128, 205 | delta_loss: float = 1e-3, 206 | delta_loss_batch: int = 10, 207 | msg: str = "", 208 | ) -> None: 209 | """ Runs train loops until the convergence reaches delta_loss for\ 210 | delta_loss_batch sizes or for max_epochs number of times 211 | 212 | :param max_epochs: number of train loop iterations, defaults to 50 213 | :param learning_rate: the learning rate, defaults to 0.01 214 | :param batch_size: the batch size, defaults to 128 215 | :param delta_loss: stops iteration once the loss rate reaches\ 216 | delta_loss, defaults to 0.001 217 | :param delta_loss_batch: the batch size to consider delta loss,\ 218 | defaults to 10 219 | :param msg: iterator bar message, defaults to empty string 220 | """ 221 | if self._dset is None: 222 | raise Exception("the dataset is not provided") 223 | 224 | # Returns early if the model has already converged 225 | if self._is_converged: 226 | return 227 | 228 | # Create an optimizer if there is no optimizer 229 | if self._optimizer is None: 230 | opt_params = list(self._recog.parameters()) 231 | opt_params += list(self._variables.values()) # type: ignore 232 | self._optimizer = torch.optim.Adam(opt_params, lr=learning_rate) 233 | 234 | iterator = trange( 235 | max_epochs, 236 | desc="training restart" + msg, 237 | unit="epochs", 238 | bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{rate_fmt}{postfix}]", 239 | ) 240 | train_iterator = DataLoader( 241 | self._dset, batch_size=min(batch_size, len(self._dset)) 242 | ) 243 | for ep in iterator: 244 | for i, (y_in, x_in, _) in enumerate(train_iterator): 245 | self._optimizer.zero_grad() 246 | 247 | mu_z, std_z, z_samples = self._forward( 248 | x_in.type(self._dtype).to(self._device) 249 | ) 250 | 251 | loss = self._loss_fn( 252 | mu_z, std_z, z_samples, x_in.type(self._dtype).to(self._device) 253 | ) 254 | 255 | loss.backward() 256 | 257 | self._optimizer.step() 258 | 259 | loss_detached = loss.cpu().detach().item() 260 | 261 | self._losses = torch.cat( 262 | (self._losses, torch.tensor([loss_detached], dtype=self._dtype)) 263 | ) 264 | 265 | if len(self._losses) > delta_loss_batch: 266 | curr_mean = torch.mean(self._losses[-delta_loss_batch:]) 267 | prev_mean = torch.mean(self._losses[-delta_loss_batch - 1 : -1]) 268 | curr_delta_loss = (prev_mean - curr_mean) / prev_mean 269 | delta_cond_met = 0 <= curr_delta_loss.item() < delta_loss 270 | else: 271 | delta_cond_met = False 272 | 273 | iterator.set_postfix_str("current loss: " + str(round(loss_detached, 1))) 274 | 275 | if delta_cond_met: 276 | self._is_converged = True 277 | iterator.close() 278 | break 279 | 280 | g = self.get_final_mu_z().detach().cpu().numpy() 281 | self._assignment = pd.DataFrame(g) 282 | self._assignment.columns = self._dset.get_classes() 283 | self._assignment.index = self._dset.get_cell_names() 284 | 285 | def get_recognet(self) -> StateRecognitionNet: 286 | """Getter for the recognition net 287 | 288 | :return: the recognition net 289 | """ 290 | return self._recog 291 | 292 | def get_final_mu_z(self, new_dset: Optional[SCDataset] = None) -> torch.Tensor: 293 | """Returns the mean of the predicted z values for each core 294 | 295 | :param new_dset: returns the predicted z values of this dataset on 296 | the existing model. If None, it predicts using the existing 297 | dataset, defaults to None 298 | :return: the mean of the predicted z values for each core 299 | """ 300 | if self._dset is None: 301 | raise Exception("the dataset is not provided") 302 | if new_dset is None: 303 | _, x_in, _ = self._dset[:] # should be the scaled 304 | # one 305 | else: 306 | _, x_in, _ = new_dset[:] 307 | final_mu_z, _, _ = self._forward(x_in.type(self._dtype).to(self._device)) 308 | 309 | return final_mu_z 310 | 311 | def get_correlations(self) -> np.array: 312 | """Returns a C (# of pathways) X G (# of proteins) matrix 313 | where each element represents the correlation value of the pathway 314 | and the protein 315 | 316 | :return: matrix of correlation between all pathway and protein pairs. 317 | """ 318 | if self._dset is None: 319 | raise Exception("No dataset input to the model") 320 | 321 | state_assignment = self.get_final_mu_z().detach().cpu().numpy() 322 | y_in = self._dset.get_exprs() 323 | 324 | feature_names = self._dset.get_features() 325 | state_names = self._dset.get_classes() 326 | G = self._dset.get_n_features() 327 | C = self._dset.get_n_classes() 328 | corr_mat = np.zeros((C, G)) 329 | # Make a matrix of correlations between all states and proteins 330 | for c, state in enumerate(state_names): 331 | for g, feature in enumerate(feature_names): 332 | states = state_assignment[:, c] 333 | protein = y_in[:, g].cpu() 334 | corr_mat[c, g] = np.corrcoef(protein, states)[0, 1] 335 | 336 | return corr_mat 337 | 338 | def diagnostics(self) -> pd.DataFrame: 339 | """Run diagnostics on cell type assignments 340 | 341 | See :meth:`astir.Astir.diagnostics_cellstate` for full documentation 342 | """ 343 | if self._dset is None: 344 | raise Exception("the dataset is not provided") 345 | feature_names = self._dset.get_features() 346 | state_names = self._dset.get_classes() 347 | 348 | corr_mat = self.get_correlations() 349 | 350 | # Correlation values of all marker proteins 351 | marker_mat = self._dset.get_marker_mat().T.cpu().numpy() 352 | marker_corr = marker_mat * corr_mat 353 | marker_corr[marker_mat == 0] = np.inf 354 | 355 | # Smallest correlation values for each pathway 356 | min_marker_corr = np.min(marker_corr, axis=1).reshape(-1, 1) 357 | min_marker_proteins = np.take(feature_names, np.argmin(marker_corr, axis=1)) 358 | 359 | # Correlation values of all non marker proteins 360 | non_marker_mat = 1 - self._dset.get_marker_mat().T.cpu().numpy() 361 | non_marker_corr = non_marker_mat * corr_mat 362 | non_marker_corr[non_marker_mat == 0] = -np.inf 363 | 364 | # Any correlation values where non marker proteins is greater than 365 | # the smallest correlation values of marker proteins 366 | bad_corr_marker = np.array(non_marker_corr > min_marker_corr, dtype=np.int32) 367 | 368 | # Problem summary 369 | indices = np.argwhere(bad_corr_marker > 0) 370 | 371 | col_names = [ 372 | "pathway", 373 | "protein A", 374 | "correlation of protein A", 375 | "protein B", 376 | "correlation of protein B", 377 | "note", 378 | ] 379 | 380 | problems = [] 381 | for index in indices: 382 | state_index = index[0] 383 | protein_index = index[1] 384 | state = state_names[index[0]] 385 | marker_protein = min_marker_proteins[state_index] 386 | non_marker_protein = feature_names[protein_index] 387 | problem = { 388 | "pathway": state, 389 | "marker_protein": marker_protein, 390 | "corr_of_marker_protein": min_marker_corr[state_index][0], 391 | "non_marker_protein": non_marker_protein, 392 | "corr_of_non_marker_protein": non_marker_corr[ 393 | state_index, protein_index 394 | ], 395 | "msg": "{} is marker for {} but {} isn't".format( 396 | marker_protein, state, non_marker_protein 397 | ), 398 | } 399 | problems.append(problem) 400 | 401 | if len(problems) > 0: 402 | df_issues = pd.DataFrame(problems) 403 | df_issues.columns = col_names 404 | else: 405 | df_issues = pd.DataFrame(columns=col_names) 406 | 407 | return df_issues 408 | 409 | 410 | class NotClassifiableError(RuntimeError): 411 | """Raised when the input data is not classifiable.""" 412 | 413 | pass 414 | -------------------------------------------------------------------------------- /astir/models/cellstate_recognet.py: -------------------------------------------------------------------------------- 1 | """ 2 | State Recognition Neural Network Model 3 | """ 4 | 5 | import math 6 | from typing import Tuple 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | 12 | 13 | # The recognition net 14 | class StateRecognitionNet(nn.Module): 15 | """State Recognition Neural Network to get mean of z and standard 16 | deviation of z. The neural network architecture looks like this: G -> 17 | const * C -> const * C -> G (for mu) or -> G (for std). With batch 18 | normal layers after each activation output layers and dropout 19 | activation units 20 | 21 | :param C: the number of pathways 22 | :param G: the number of proteins 23 | :param const: the size of the hidden layers are const times proportional 24 | to C, defaults to 2 25 | :param dropout_rate: the dropout rate, defaults to 0 26 | :param batch_norm: apply batch normal layers if True, defaults to False 27 | """ 28 | 29 | def __init__( 30 | self, 31 | C: int, 32 | G: int, 33 | const: int = 2, 34 | dropout_rate: float = 0, 35 | batch_norm: bool = False, 36 | ) -> None: 37 | super(StateRecognitionNet, self).__init__() 38 | self.batch_norm = batch_norm 39 | 40 | hidden_layer_size = math.ceil(const * C) 41 | # First hidden layer 42 | self.linear1 = nn.Linear(G, hidden_layer_size).float() 43 | self.dropout1 = nn.Dropout(dropout_rate) 44 | 45 | # Second hidden layer 46 | self.linear2 = nn.Linear(hidden_layer_size, hidden_layer_size).float() 47 | self.dropout2 = nn.Dropout(dropout_rate) 48 | 49 | # Output layer for mu 50 | self.linear3_mu = nn.Linear(hidden_layer_size, C).float() 51 | self.dropout_mu = nn.Dropout(dropout_rate) 52 | 53 | # Output layer for std 54 | self.linear3_std = nn.Linear(hidden_layer_size, C).float() 55 | self.dropout_std = nn.Dropout(dropout_rate) 56 | 57 | # Batch normal layers 58 | if self.batch_norm: 59 | self.bn1 = nn.BatchNorm1d(num_features=hidden_layer_size).float() 60 | self.bn2 = nn.BatchNorm1d(num_features=hidden_layer_size).float() 61 | self.bn_out_mu = nn.BatchNorm1d(num_features=C).float() 62 | self.bn_out_std = nn.BatchNorm1d(num_features=C).float() 63 | 64 | def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: 65 | """One forward pass of the StateRecognitionNet 66 | 67 | :param x: the input to the recognition network model 68 | :return: the value from the output layer of the network 69 | """ 70 | # Input --linear1--> Hidden1 71 | x = self.linear1(x) 72 | if self.batch_norm: 73 | x = self.bn1(x) 74 | x = F.relu(x) 75 | x = self.dropout1(x) 76 | 77 | # Hidden1 --linear2--> Hidden2 78 | x = self.linear2(x) 79 | if self.batch_norm: 80 | x = self.bn2(x) 81 | x = F.relu(x) 82 | x = self.dropout2(x) 83 | 84 | # Hidden2 --linear3_mu--> mu 85 | mu_z = self.linear3_mu(x) 86 | if self.batch_norm: 87 | mu_z = self.bn_out_mu(mu_z) 88 | mu_z = self.dropout_mu(mu_z) 89 | 90 | # Hidden2 --linear3_std--> std 91 | std_z = self.linear3_std(x) 92 | if self.batch_norm: 93 | std_z = self.bn_out_std(std_z) 94 | std_z = self.dropout_std(std_z) 95 | 96 | return mu_z, std_z 97 | -------------------------------------------------------------------------------- /astir/models/celltype_recognet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class TypeRecognitionNet(nn.Module): 7 | """Type Recognition Neural Network. 8 | 9 | :param C: number of classes 10 | :param G: number of features 11 | :param hidden_size: size of hidden layers, defaults to 10 12 | """ 13 | 14 | def __init__(self, C: int, G: int, hidden_size: int = 20) -> None: 15 | super(TypeRecognitionNet, self).__init__() 16 | self.hidden_1 = nn.Linear(G, hidden_size) 17 | self.hidden_2 = nn.Linear(hidden_size, C + 1) 18 | 19 | def forward(self, x: torch.Tensor) -> torch.Tensor: 20 | """One forward pass. 21 | 22 | :param x: the input vector 23 | :return: the calculated cost value 24 | """ 25 | x = self.hidden_1(x) 26 | x = F.leaky_relu(x) 27 | x = self.hidden_2(x) 28 | 29 | return F.softmax(x, dim=1), F.log_softmax(x, dim=1) 30 | -------------------------------------------------------------------------------- /bin/astir: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import torch 5 | import pandas as pd 6 | import yaml 7 | import torch 8 | 9 | import os 10 | import sys 11 | import rootpath 12 | import subprocess 13 | 14 | module_path = rootpath.detect() 15 | if module_path not in sys.path: 16 | sys.path.append(module_path) 17 | 18 | module_path = os.path.join(rootpath.detect(), "astir") 19 | if module_path not in sys.path: 20 | sys.path.append(module_path) 21 | 22 | 23 | from astir import Astir 24 | from astir.data import from_csv_yaml 25 | 26 | 27 | parser = argparse.ArgumentParser(description='Run astir') 28 | # parser.add_argument("expr_csv", help="Path to CSV expression matrix with cells as rows and proteins as columns. First column is cell ID") 29 | 30 | subparsers = parser.add_subparsers(title="Criterion", help="specify if the cells are to be classified by types or states or if the user wants convert a SingleCellExperiment data.") 31 | type_parser = subparsers.add_parser("type") 32 | state_parser = subparsers.add_parser("state") 33 | 34 | type_parser.add_argument("--prob_max", "-pm", help="For cell type only. Classifies celltypes according to the max probability values", default=False, action="store_true") 35 | 36 | 37 | def parser_setup(parser): 38 | parser.add_argument("expr_csv", help="Path to CSV expression matrix with cells as rows and proteins as columns. First column is cell ID") 39 | parser.add_argument("marker_yaml", help="Path to YAML file of cell markers") 40 | parser.add_argument("output_csv", help="Output CSV of cell assignments") 41 | parser.add_argument("--design", "-d", 42 | help="Path to design matrix CSV, default to None", 43 | type=str, 44 | default="None") 45 | parser.add_argument("--random_seed", "-s", 46 | help="random seed for variable initialization, default to 1234", 47 | type=int, 48 | default=1234) 49 | parser.add_argument("--dtype", "-t", 50 | help="dtype of torch tensors, valid input include torch.float32 or torch.float64, default to torch.float32", 51 | type=str, 52 | default="torch.float64") 53 | parser.add_argument("--max_epochs", "-m", 54 | help="Number of training epochs, default to 50", 55 | type=int, 56 | default=50) 57 | parser.add_argument("--learning_rate", "-r", 58 | help="learning rate, default to 1e-2", 59 | type=float, 60 | default=1e-2) 61 | parser.add_argument("--batch_size", "-b", 62 | help="batch size, default to 1024", 63 | type=int, 64 | default=1024) 65 | parser.add_argument("--delta_loss", "-l", 66 | type=float, 67 | help="the model will stop if delta loss between epochs is reached, default to 1e-3", 68 | default=1e-3) 69 | parser.add_argument("--n_init", "-n", 70 | help="number of models to initialize before choosing the best one to finish the training, default to 3", 71 | type=int, 72 | default=3) 73 | parser.add_argument("--n_init_epochs", "-i", 74 | help="number of initial epochs before the actual training, default to 5", 75 | type=int, 76 | default=5) 77 | 78 | parser_setup(type_parser) 79 | 80 | parser_setup(state_parser) 81 | state_parser.add_argument("--delta_loss_batch", 82 | help="the batch size to consider delta loss, default to 10", 83 | type=int, default=10) 84 | state_parser.add_argument("--const", "-c", 85 | help="constant, default to 2", 86 | type=int, default=2) 87 | state_parser.add_argument("--dropout_rate", 88 | help="dropout rate, default to 0", 89 | type=float, default=0.0) 90 | state_parser.add_argument("--batch_norm", 91 | help="apply batch normalization if set to True, default to False", 92 | type=bool, default=False) 93 | 94 | 95 | def run_type(args): 96 | if args.design == "None": 97 | args.design = None 98 | 99 | if args.dtype == "torch.float64": 100 | args.dtype = torch.float64 101 | elif args.dtype == "torch.float32": 102 | args.dtype = torch.float32 103 | 104 | a = from_csv_yaml(args.expr_csv, args.marker_yaml, design_csv=args.design, random_seed=args.random_seed, dtype=args.dtype) 105 | a.fit_type(max_epochs = args.max_epochs, 106 | learning_rate = args.learning_rate, 107 | batch_size = args.batch_size, 108 | delta_loss = args.delta_loss, 109 | n_init = args.n_init, 110 | n_init_epochs = args.n_init_epochs) 111 | 112 | if args.prob_max: 113 | a.type_to_csv(args.output_csv, assignment_type="max") 114 | else: 115 | a.type_to_csv(args.output_csv, assignment_type="threshold") 116 | 117 | prob_fn = os.path.splitext(args.output_csv)[0] + ".probabilities.csv" 118 | a.get_celltype_probabilities().to_csv(prob_fn) 119 | 120 | 121 | def run_state(args): 122 | if args.design == "None": 123 | args.design = None 124 | 125 | if args.dtype == "torch.float64": 126 | args.dtype = torch.float64 127 | elif args.dtype == "torch.float32": 128 | args.dtype = torch.float32 129 | 130 | a = from_csv_yaml(args.expr_csv, args.marker_yaml, design_csv=args.design, random_seed=args.random_seed, dtype=args.dtype) 131 | a.fit_state(max_epochs = args.max_epochs, 132 | learning_rate = args.learning_rate, 133 | batch_size = args.batch_size, 134 | delta_loss = args.delta_loss, 135 | n_init = args.n_init, 136 | n_init_epochs = args.n_init_epochs, 137 | delta_loss_batch = args.delta_loss_batch, 138 | const=args.const, 139 | dropout_rate=args.dropout_rate, 140 | batch_norm=args.batch_norm) 141 | a.state_to_csv(args.output_csv) 142 | 143 | type_parser.set_defaults(func=run_type) 144 | state_parser.set_defaults(func=run_state) 145 | 146 | convert_parser = subparsers.add_parser("convert") 147 | convert_parser.add_argument("in_rds", 148 | help="input rds file", 149 | type=str) 150 | convert_parser.add_argument("out_csv", 151 | help="output csv file", 152 | type=str 153 | ) 154 | convert_parser.add_argument("--assay", "-a", 155 | help="the assay to convert, default to logcounts", 156 | type=str, 157 | default="logcounts" 158 | ) 159 | convert_parser.add_argument("--design_col", 160 | help="design column, default to None", 161 | type=str, 162 | default="" 163 | ) 164 | convert_parser.add_argument("--design_csv", 165 | help="output design csv, default to None", 166 | type=str, 167 | default="" 168 | ) 169 | convert_parser.add_argument("--winsorize", "-w", 170 | help="the winsorize limit will be (, 1-), default to 0.0", 171 | type=float, 172 | default=0.0 173 | ) 174 | 175 | def convert_rds(args): 176 | path = os.path.dirname(os.path.realpath(__file__)) 177 | cmd = ["Rscript", path + "/../astir/data/rds_reader.R", args.in_rds, 178 | args.out_csv, "--assay", args.assay, "--design_col", args.design_col, 179 | "--design_csv", args.design_csv, "--winsorize", str(args.winsorize)] 180 | subprocess.call(cmd, cwd=os.getcwd()) 181 | convert_parser.set_defaults(func=convert_rds) 182 | 183 | args = parser.parse_args() 184 | 185 | if hasattr(args, 'func'): 186 | args.func(args) 187 | else: 188 | parser.print_help() 189 | 190 | # astir type ./basel_subset_expression.csv ./jackson-2020-markers.yml ./output.csv 191 | 192 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/astir.data.rst: -------------------------------------------------------------------------------- 1 | astir.data package 2 | ==================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: astir.data 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: -------------------------------------------------------------------------------- /docs/astir.models.rst: -------------------------------------------------------------------------------- 1 | astir.models package 2 | ==================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: astir.models 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/astir.rst: -------------------------------------------------------------------------------- 1 | astir package 2 | ==================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. autoclass:: astir.Astir 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | 16 | sys.path.insert(0, os.path.abspath("..")) 17 | 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = "astir" 22 | copyright = "2020, Jinyu Hou, Sunyun Lee, Michael Geuenich, Kieran Campbell" 23 | author = "Jinyu Hou, Sunyun Lee, Michael Geuenich, Kieran Campbell" 24 | 25 | 26 | # -- General configuration --------------------------------------------------- 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [ 32 | "sphinx.ext.autodoc", 33 | "sphinx.ext.doctest", 34 | "sphinx.ext.viewcode", 35 | "sphinx_autodoc_typehints", 36 | "sphinx.ext.mathjax", 37 | "sphinx_rtd_theme", 38 | "sphinx.ext.intersphinx", 39 | "autodocsumm", 40 | "nbsphinx", 41 | "nbsphinx_link", 42 | ] 43 | 44 | 45 | # Add any paths that contain templates here, relative to this directory. 46 | templates_path = ["_templates"] 47 | 48 | # List of patterns, relative to source directory, that match files and 49 | # directories to ignore when looking for source files. 50 | # This pattern also affects html_static_path and html_extra_path. 51 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 52 | 53 | autodoc_default_options = {"autosummary": True} 54 | 55 | # -- Options for HTML output ------------------------------------------------- 56 | 57 | # The theme to use for HTML and HTML Help pages. See the documentation for 58 | # a list of builtin themes. 59 | # 60 | html_theme = "sphinx_rtd_theme" 61 | 62 | # Add any paths that contain custom static files (such as style sheets) here, 63 | # relative to this directory. They are copied after the builtin static files, 64 | # so a file named "default.css" will overwrite the builtin "default.css". 65 | html_static_path = [] 66 | 67 | # The name of the Pygments (syntax highlighting) style to use. 68 | pygments_style = "sphinx" 69 | -------------------------------------------------------------------------------- /docs/development.rst: -------------------------------------------------------------------------------- 1 | Development 2 | ----------- 3 | 4 | How to render the docs 5 | ~~~~~~~~~~~~~~~~~~~~~~ 6 | Install Sphinx 7 | 8 | .. code:: 9 | 10 | $ pip install sphinx 11 | 12 | From the main project directory `cd` into docs directory 13 | 14 | .. code:: 15 | 16 | $ cd docs 17 | 18 | Build the existing reStructuredText files 19 | 20 | .. code:: 21 | 22 | $ make html 23 | 24 | If the above command causes "Could not import extension " 25 | pip install them until the build succeeds. 26 | 27 | Open ``astir/docs/html/index.html`` in your favourite browser either by copying 28 | the absolute path in your browser URL bar. 29 | If you are using `PyCharm` editor, you can 30 | 31 | right click on ``index.html`` in the file browser -> `Open in Browser` -> 32 | select your favourite browser 33 | 34 | 35 | 36 | How to run nosetests and add a test 37 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 38 | 39 | Running nosetests 40 | ################# 41 | 42 | Method 1 43 | ******** 44 | Run one test module at a time 45 | 46 | .. code:: 47 | 48 | $ nosetests astir/tests/test_astir.py 49 | $ nosetests astir/tests/models/test_cellstate.py 50 | 51 | Method 2 52 | ******** 53 | Run all test modules at once 54 | 55 | ..code:: 56 | 57 | $ nosetests 58 | 59 | in any project module directory. You might need install the nose package. 60 | 61 | Adding a unittest 62 | ################# 63 | 64 | 65 | Best git practices 66 | ~~~~~~~~~~~~~~~~~~ 67 | 68 | The best git practice is to start your own local branch, and commit to your local branch's 69 | remote branch once in awhile. Once your branch is ready to merge into the 70 | origin master repo, you want to git merge, pull, and push. 71 | 72 | 73 | Git clone and start a new branch 74 | ################################ 75 | 76 | This is the first step you want to take and won't have to repeat unless you want 77 | to clone on another machine or create a new branch. 78 | 79 | .. code:: 80 | 81 | $ git clone https://github.com/camlab-bioml/astir.git 82 | $ git checkout -b 83 | 84 | 85 | Update your copy in the repo (git add, commit, push) 86 | #################################################### 87 | 88 | You might want to do git commits once in awhile to save your work or create new checkpoint. 89 | 90 | .. code:: 91 | 92 | $ git add ... 93 | $ git commit -m "" 94 | 95 | Additionally push your work in local branch to its remote branch 96 | 97 | .. code:: 98 | 99 | $ git push origin 100 | 101 | or 102 | 103 | .. code:: 104 | 105 | $ git push 106 | 107 | If you are using the second command make sure that your local branch, called `branch-name`, 108 | is pushing to its remote branch, called `origin/branch-name` 109 | 110 | 111 | Update origin/master (git merge, pull) 112 | ###################################### 113 | 114 | To update Master remote branch 115 | 116 | First, commit and push all your current work to your remote branch 117 | 118 | Second, checkout master 119 | 120 | .. code:: 121 | 122 | $ git checkout master 123 | 124 | This changes your working branch to `local master`. 125 | 126 | You can view your current working branch with the following command 127 | 128 | .. code:: 129 | 130 | $ git branch 131 | 132 | .. code:: 133 | 134 | $ git merge 135 | 136 | Resolve any merge conflicts you get. Once the merge is complete and 137 | all conflicts are resolved 138 | 139 | Update the local master branch by 140 | 141 | .. code:: 142 | 143 | $ git pull origin master 144 | 145 | or depending on your setup you may even be able to run 146 | 147 | .. code:: 148 | 149 | $ git pull 150 | 151 | To merge a branch into the current one 152 | Again resolve any conflicts 153 | 154 | Update remote master by following the steps outlined in 155 | `Update your copy in the repo` -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. include:: ../README.rst 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 1 7 | :hidden: 8 | 9 | installation 10 | tutorials/index 11 | development 12 | 13 | .. toctree:: 14 | :maxdepth: 1 15 | :caption: Package Reference 16 | :hidden: 17 | 18 | astir 19 | astir.data 20 | astir.models 21 | 22 | 23 | Indices and tables 24 | ================== 25 | 26 | * :ref:`genindex` 27 | * :ref:`modindex` 28 | * :ref:`search` 29 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ------------ 3 | 4 | Prerequisites 5 | ~~~~~~~~~~~~~~ 6 | 7 | Install python 3.7 8 | Astir uses python 3.* 9 | 10 | 11 | Astir installation 12 | ~~~~~~~~~~~~~~~~~~ 13 | PyPI 14 | #### 15 | 16 | .. code:: 17 | 18 | pip3 install astir 19 | 20 | 21 | Dev 22 | ### 23 | Clone this repo and run 24 | 25 | .. code:: 26 | 27 | git clone https://github.com/camlab-bioml/astir.git 28 | cd astir 29 | pip install -e . 30 | 31 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | pandas 3 | numpy 4 | nbformat 5 | pyyaml 6 | sklearn 7 | argparse 8 | matplotlib 9 | loompy 10 | tqdm 11 | anndata 12 | rootpath 13 | nbconvert 14 | nbformat 15 | seaborn 16 | jupyter_client 17 | ipykernel 18 | ipython 19 | h5py 20 | tbb 21 | FlowCytometryTools 22 | fastcluster 23 | sphinx>=3.0 24 | sphinx-autodoc-typehints 25 | sphinx_rtd_theme>=0.3.1 26 | autodocsumm 27 | nbsphinx 28 | nbsphinx-link 29 | tables 30 | typeguard 31 | mypy -------------------------------------------------------------------------------- /docs/source/_static/figs/astir.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camlab-bioml/astir/61fce25cff8baefcd4102d4f7bd990064740b0a6/docs/source/_static/figs/astir.png -------------------------------------------------------------------------------- /docs/tutorials/index.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ==================== 3 | 4 | .. toctree:: 5 | :caption: Available tutorials 6 | :maxdepth: 1 7 | 8 | notebooks/getting_started.ipynb 9 | notebooks/data_loading.ipynb -------------------------------------------------------------------------------- /docs/tutorials/notebooks/data/cell-states.csv: -------------------------------------------------------------------------------- 1 | ,RTK_signalling,proliferation,mTOR_signalling,apoptosis 2 | BaselTMA_SP41_186_X5Y4_3679,0.5683861877028941,0.9008611646823064,0.656078413193121,0.5481102160359178 3 | BaselTMA_SP41_153_X7Y5_246,0.42135746208636826,0.0,0.5248592191573491,0.8209456743672587 4 | BaselTMA_SP41_20_X12Y5_197,0.9448294958510268,0.5611590125046978,0.9792766571467469,0.7787873053589222 5 | BaselTMA_SP41_14_X1Y8_84,0.8584260864711544,0.7057874727678192,0.9380682885345356,0.6973194282038573 6 | BaselTMA_SP41_166_X15Y4_266,0.9336716085602808,0.5740305924835645,0.9805683620417673,0.7649673316695325 7 | BaselTMA_SP41_117_X13Y3_4413,0.9165273797565595,0.5190921054609731,0.9634443093655509,0.6829250738143054 8 | BaselTMA_SP41_133_X3Y6_1249,0.8784402258793473,0.5106262743544459,0.947323839676601,0.7679660150043578 9 | BaselTMA_SP41_206_X2Y2_140,0.9448144374018663,0.5611763837173367,0.9792784004044072,0.7787686542141508 10 | BaselTMA_SP41_112_X5Y8_690,0.909172886097597,0.6063945375512645,0.9812380411002166,0.7376820562225291 11 | BaselTMA_SP41_134_X3Y8_449,0.9364515293792278,0.5708237153908312,0.9802465415002943,0.7684104953774414 12 | BaselTMA_SP41_133_X3Y6_1318,0.8296926689978149,0.7448375371412519,0.8876450109533947,0.6624633849940122 13 | BaselTMA_SP41_117_X13Y3_2100,0.5883466657351801,0.7843375778335371,0.7683718381788094,0.692656601258574 14 | BaselTMA_SP41_211_X4Y6_2201,0.8673760515114027,0.6674996766099005,0.968481991997465,0.6966913264794624 15 | BaselTMA_SP41_203_X8Y8_1760,0.9329213155943029,0.5748961197709008,0.980655220518789,0.7640380312761016 16 | BaselTMA_SP41_141_X11Y2_5273,0.8132161021410352,0.5832067855531516,0.8145415330364812,0.7702550510482213 17 | BaselTMA_SP41_14_X1Y8_930,0.613207498314766,0.7372233874007071,0.7433014583163639,0.8043511347153588 18 | BaselTMA_SP41_191_X15Y7_1306,0.8197467646255996,0.8082792363283259,0.9506648177424831,0.7165168964663032 19 | BaselTMA_SP41_186_X5Y4_1592,0.8227321647892625,0.688208760637938,0.889146405570119,0.6914900584251994 20 | BaselTMA_SP41_141_X11Y2_5215,0.9630460687418108,0.5305704528800943,0.975010256531057,0.7936639177436892 21 | BaselTMA_SP41_206_X1Y2_1390,0.8740477966545119,0.6375826141755034,0.9835778092729073,0.6913027459975284 22 | BaselTMA_SP41_133_X3Y6_2298,0.8977672477883155,0.6339746950029904,0.9774666091381966,0.7360528968958097 23 | BaselTMA_SP41_134_X3Y8_1196,0.8150595056511724,0.7867417930482816,0.8937710469779542,0.675873654559721 24 | BaselTMA_SP41_11_X13Y7_3460,0.9639256105961279,0.5283094217503538,0.9744269457642891,0.7936797460454461 25 | BaselTMA_SP41_117_X13Y3_660,0.6929122083179012,0.7678609863042838,0.8413514044225394,0.7131695289898465 26 | BaselTMA_SP41_159_X12Y4_2161,1.0,0.4355732807376605,0.9505024677641046,0.7943289434865678 27 | BaselTMA_SP41_134_X3Y8_176,0.9684840304576476,0.509142043533747,0.9667865834909903,0.7867131425630083 28 | BaselTMA_SP41_204_X16Y4_237_233,0.8413612604166607,0.7235045050522202,0.8018374590345302,0.6565492240150937 29 | BaselTMA_SP41_11_X13Y7_3290,0.9028288609935102,0.5991236964321753,0.990644141296899,0.7269380142152048 30 | BaselTMA_SP41_204_X16Y4_237_8,0.6274522437762514,0.5999156777230648,0.6702779859899637,0.6891782253746888 31 | BaselTMA_SP41_134_X3Y8_234,0.4933364031951663,0.9214781462869337,0.6233128444024684,0.5612948239965897 32 | BaselTMA_SP41_11_X13Y7_1243,0.7093485652795622,0.6179435071132653,0.7992886383979854,0.7862472108645938 33 | BaselTMA_SP41_135_X8Y5_3127,0.8261351283923714,0.7297916839400705,0.85764896730103,0.6330945930104982 34 | BaselTMA_SP41_11_X13Y7_3217,0.9334666222972604,0.5711085747600408,0.9843471260233847,0.7638020328901459 35 | BaselTMA_SP41_126_X14Y7_1746,0.9713240543491568,0.5092903022449871,0.9695203095680865,0.7938128889989964 36 | BaselTMA_SP41_18_X13Y5_762,0.9437276618670937,0.5621023834419077,0.9797937921742678,0.7773280348535863 37 | BaselTMA_SP41_203_X8Y8_527,0.8610991541402861,0.35395403285158245,0.8621955080812572,0.8358712524463917 38 | BaselTMA_SP41_134_X3Y8_880,0.7749596510379675,0.8395675083095964,0.8350608766036457,0.612416246610902 39 | BaselTMA_SP41_14_X1Y8_2568,0.9047780764139626,0.6505566720463135,0.9483150102471798,0.7457623042284587 40 | BaselTMA_SP41_18_X13Y5_1219,0.9403854325953545,0.5662856206282599,0.9797911289380057,0.7732829624966033 41 | BaselTMA_SP41_11_X13Y7_2886,0.9464318580923868,0.523924468744974,0.9674894493583532,0.7510450716088619 42 | BaselTMA_SP41_112_X5Y8_298,0.9493661783546589,0.5556216410810064,0.9791127837351369,0.7843186666535382 43 | BaselTMA_SP41_206_X1Y2_3026,0.9349917881554249,0.5463770947101666,0.9767292674308604,0.7464257122956492 44 | BaselTMA_SP41_135_X8Y5_430,0.9532774703180191,0.5509606302494978,0.978837130671838,0.7891201327060118 45 | BaselTMA_SP41_100_X15Y5_2042,0.9403117827302105,0.5663705820645004,0.9797996550943329,0.7731917409981252 46 | BaselTMA_SP41_204_X16Y4_237_30,0.8670915212412714,0.7222734953719226,0.92714365227904,0.730658873001723 47 | BaselTMA_SP41_134_X3Y8_1748,0.8662435753032766,0.6555144576982328,0.8542324924830527,0.6617317488506399 48 | BaselTMA_SP41_186_X5Y4_1179,0.0,1.0,0.0,0.0 49 | BaselTMA_SP41_159_X12Y4_1391,0.5119090911396756,0.8797231848366114,0.620562695610923,0.5660433074053394 50 | BaselTMA_SP41_135_X8Y5_2977,0.9401257379206864,0.5426375317926229,0.9752604355445853,0.7540401010407788 51 | BaselTMA_SP41_165_X5Y6_308,0.9315982859235313,0.574333712028669,0.9832915011637139,0.7617966564284824 52 | BaselTMA_SP41_117_X13Y3_4099,0.5532031314329302,0.7948063868844072,0.7375400413799149,0.6865074931630626 53 | BaselTMA_SP41_117_X13Y3_1932,0.37408086803722784,0.7291052607443487,0.5471294184983165,0.7001065762902585 54 | BaselTMA_SP41_14_X1Y8_3119,0.9016882426424039,0.6173901755540867,0.9819959315950801,0.7383644238146597 55 | BaselTMA_SP41_165_X5Y6_1554,0.9510592064218791,0.5539725070517864,0.9785554679619805,0.7865033212979875 56 | BaselTMA_SP41_134_X3Y8_1549,0.9595003357226713,0.34762974333199126,0.7922051925818899,0.6150737744884648 57 | BaselTMA_SP41_177_X16Y5_130,0.9475763269380485,0.5579903072557786,0.9789586672738932,0.7821894846750118 58 | BaselTMA_SP41_126_X14Y7_1090,0.9399217711854111,0.566820493831913,0.9798448052028084,0.7727086791829946 59 | BaselTMA_SP41_203_X8Y8_2049,0.9231667984531386,0.5861487919180602,0.98178446275,0.7519562485218166 60 | BaselTMA_SP41_204_X16Y4_238_343,0.48411244490527455,0.7641365605096178,0.48340653299525516,0.4840349738901043 61 | BaselTMA_SP41_206_X1Y2_3145,0.9423150242146562,0.5640596711983,0.9795677476763837,0.7756729225952445 62 | BaselTMA_SP41_141_X11Y2_848,0.9416201625415418,0.5648612537350907,0.9796481890898104,0.7748122784795639 63 | BaselTMA_SP41_11_X13Y7_4100,0.9226986748757543,0.5858778655904018,0.9828027669757085,0.7511424333131026 64 | BaselTMA_SP41_101_X10Y8_1777,0.7572473323788173,0.7782294013641524,0.8921011628001956,0.7037733019938961 65 | BaselTMA_SP41_186_X5Y4_783,0.317496400415106,0.9496248223519934,0.4071641065889076,0.3923494916279402 66 | BaselTMA_SP41_186_X5Y4_2069,0.6444420908360678,0.6343234832843391,0.6468149142661326,0.6330790130403744 67 | BaselTMA_SP41_141_X11Y2_2159,0.48167205849320194,0.8423292999858946,0.6090247432206038,0.5908965400960932 68 | BaselTMA_SP41_129_X7Y8_1460,0.789037760261192,0.778308755473205,0.9130462756594768,0.6215828044285233 69 | BaselTMA_SP41_11_X13Y7_4434,0.9408081453775343,0.5045657700076324,0.9639889317809095,0.7239387020312057 70 | BaselTMA_SP41_203_X8Y8_1690,0.928492852115993,0.5800047322133148,0.9811678863849423,0.7585530100386647 71 | BaselTMA_SP41_191_X15Y7_3242,0.8428501285502802,0.7685351460653259,0.9450636805064839,0.7253147568194419 72 | BaselTMA_SP41_106_X2Y9_1894,0.8659078057497382,0.6895785113455724,0.962581425319511,0.7046860785229926 73 | BaselTMA_SP41_112_X5Y8_1944,0.9593329043309012,0.5338063567738868,0.9735619553063308,0.7876267835912836 74 | BaselTMA_SP41_11_X13Y7_2486,0.8408027916193499,0.738815060118734,0.9583858859679801,0.7134461970401588 75 | BaselTMA_SP41_100_X15Y5_2376,0.9216199309220999,0.5877263844750931,0.9822094576620096,0.7499806354020282 76 | BaselTMA_SP41_134_X3Y8_1484,0.9071021047767458,0.612097574742227,0.9812624883537395,0.7386492761454231 77 | BaselTMA_SP41_14_X1Y8_978,0.7999464718828199,0.8110307752336916,0.7950614850674159,0.5872780162692106 78 | BaselTMA_SP41_203_X8Y8_2532,0.9085244516887326,0.631336742857399,0.9676686007181949,0.7511455100009246 79 | BaselTMA_SP41_186_X5Y4_1448,0.73022780966513,0.8401935563522124,0.8303268945979548,0.6473252488075487 80 | BaselTMA_SP41_133_X3Y6_3169,0.9417875947051213,0.5083866868015429,1.0,0.804412496828526 81 | BaselTMA_SP41_126_X14Y7_637,0.9535342656249489,0.542822704553951,0.9964665482125974,0.792939912592704 82 | BaselTMA_SP41_153_X7Y5_316,0.6475168372299157,0.8163887779051545,0.644122165900938,0.5251679875876055 83 | BaselTMA_SP41_100_X15Y5_3182,0.9562162752640873,0.475267510991313,0.9471400850438012,0.7292380695724001 84 | BaselTMA_SP41_104_X8Y4_175_1418,0.9539498854103686,0.3682129365144746,0.9782905647564721,0.8937668875996561 85 | BaselTMA_SP41_191_X15Y7_4,0.8845976630140503,0.6586378494769886,0.9837276610024772,0.7335352932845282 86 | BaselTMA_SP41_126_X14Y7_390,0.9860303172361092,0.46640912056147205,0.9566208973936159,0.7892745042321492 87 | BaselTMA_SP41_135_X8Y5_15,0.848395086846682,0.6635741715469687,0.9363894257521937,0.6453337664855221 88 | BaselTMA_SP41_135_X8Y5_3269,0.9326720530291621,0.5332976234294362,0.9711516955222596,0.7300693203953456 89 | BaselTMA_SP41_186_X5Y4_2055,0.79439828246075,0.7763597680412304,0.8180027031760888,0.6273177255531899 90 | BaselTMA_SP41_166_X15Y4_2203,0.8679087898414615,0.7232129520382664,0.9598491753381967,0.7453549987330189 91 | BaselTMA_SP41_100_X15Y5_1530,0.9882947948504436,0.4566752219525483,0.9526939065073442,0.7856129665204523 92 | BaselTMA_SP41_186_X6Y4_501,0.9564710849291278,0.5271768877528183,0.9690818292581777,0.7751739527063678 93 | BaselTMA_SP41_133_X3Y6_3842,0.8699284167151282,0.411558582549592,0.9160328052134746,0.7857598477320293 94 | BaselTMA_SP41_14_X1Y8_294,0.8639389339490104,0.6542170614268386,0.840618935203876,0.6535802190420854 95 | BaselTMA_SP41_101_X10Y8_1109,0.8895013444787155,0.6568051455469311,0.9625414501094288,0.7325145158209928 96 | BaselTMA_SP41_165_X5Y6_151,0.9481249077408093,0.5452837952820326,0.9806407264661297,0.7748002888850297 97 | BaselTMA_SP41_114_X13Y4_1057,0.881551432927461,0.4470017473540504,0.8990080030828952,1.0 98 | BaselTMA_SP41_141_X11Y2_2596,0.767853353934209,0.6848471476026506,0.8567732652719334,0.7220459645355986 99 | BaselTMA_SP41_100_X15Y5_170,0.9529773290758339,0.548220158898451,0.9778985866220806,0.7861694006359575 100 | BaselTMA_SP41_14_X1Y8_2604,0.8362411994298711,0.6926172166191192,0.9082564852125949,0.7008674704600535 101 | BaselTMA_SP41_186_X5Y4_81,0.6916978133183866,0.7191789182185242,0.7050115316884292,0.6600712745008946 102 | -------------------------------------------------------------------------------- /docs/tutorials/notebooks/data/cell-types.csv: -------------------------------------------------------------------------------- 1 | ,cell_type 2 | BaselTMA_SP41_186_X5Y4_3679,Unknown 3 | BaselTMA_SP41_153_X7Y5_246,Unknown 4 | BaselTMA_SP41_20_X12Y5_197,Unknown 5 | BaselTMA_SP41_14_X1Y8_84,Unknown 6 | BaselTMA_SP41_166_X15Y4_266,Unknown 7 | BaselTMA_SP41_117_X13Y3_4413,Unknown 8 | BaselTMA_SP41_133_X3Y6_1249,Unknown 9 | BaselTMA_SP41_206_X2Y2_140,Unknown 10 | BaselTMA_SP41_112_X5Y8_690,Unknown 11 | BaselTMA_SP41_134_X3Y8_449,Unknown 12 | BaselTMA_SP41_133_X3Y6_1318,Unknown 13 | BaselTMA_SP41_117_X13Y3_2100,Unknown 14 | BaselTMA_SP41_211_X4Y6_2201,Unknown 15 | BaselTMA_SP41_203_X8Y8_1760,Unknown 16 | BaselTMA_SP41_141_X11Y2_5273,Unknown 17 | BaselTMA_SP41_14_X1Y8_930,Unknown 18 | BaselTMA_SP41_191_X15Y7_1306,Unknown 19 | BaselTMA_SP41_186_X5Y4_1592,Unknown 20 | BaselTMA_SP41_141_X11Y2_5215,Unknown 21 | BaselTMA_SP41_206_X1Y2_1390,Unknown 22 | BaselTMA_SP41_133_X3Y6_2298,Unknown 23 | BaselTMA_SP41_134_X3Y8_1196,Unknown 24 | BaselTMA_SP41_11_X13Y7_3460,Unknown 25 | BaselTMA_SP41_117_X13Y3_660,Unknown 26 | BaselTMA_SP41_159_X12Y4_2161,Unknown 27 | BaselTMA_SP41_134_X3Y8_176,Unknown 28 | BaselTMA_SP41_204_X16Y4_237_233,Unknown 29 | BaselTMA_SP41_11_X13Y7_3290,Unknown 30 | BaselTMA_SP41_204_X16Y4_237_8,Unknown 31 | BaselTMA_SP41_134_X3Y8_234,Unknown 32 | BaselTMA_SP41_11_X13Y7_1243,Unknown 33 | BaselTMA_SP41_135_X8Y5_3127,Unknown 34 | BaselTMA_SP41_11_X13Y7_3217,Unknown 35 | BaselTMA_SP41_126_X14Y7_1746,Unknown 36 | BaselTMA_SP41_18_X13Y5_762,Unknown 37 | BaselTMA_SP41_203_X8Y8_527,Unknown 38 | BaselTMA_SP41_134_X3Y8_880,Unknown 39 | BaselTMA_SP41_14_X1Y8_2568,Unknown 40 | BaselTMA_SP41_18_X13Y5_1219,Unknown 41 | BaselTMA_SP41_11_X13Y7_2886,Unknown 42 | BaselTMA_SP41_112_X5Y8_298,Unknown 43 | BaselTMA_SP41_206_X1Y2_3026,Unknown 44 | BaselTMA_SP41_135_X8Y5_430,Unknown 45 | BaselTMA_SP41_100_X15Y5_2042,Unknown 46 | BaselTMA_SP41_204_X16Y4_237_30,Unknown 47 | BaselTMA_SP41_134_X3Y8_1748,Unknown 48 | BaselTMA_SP41_186_X5Y4_1179,Unknown 49 | BaselTMA_SP41_159_X12Y4_1391,Unknown 50 | BaselTMA_SP41_135_X8Y5_2977,Unknown 51 | BaselTMA_SP41_165_X5Y6_308,Unknown 52 | BaselTMA_SP41_117_X13Y3_4099,Unknown 53 | BaselTMA_SP41_117_X13Y3_1932,Unknown 54 | BaselTMA_SP41_14_X1Y8_3119,Unknown 55 | BaselTMA_SP41_165_X5Y6_1554,Unknown 56 | BaselTMA_SP41_134_X3Y8_1549,Unknown 57 | BaselTMA_SP41_177_X16Y5_130,Unknown 58 | BaselTMA_SP41_126_X14Y7_1090,Unknown 59 | BaselTMA_SP41_203_X8Y8_2049,Unknown 60 | BaselTMA_SP41_204_X16Y4_238_343,Unknown 61 | BaselTMA_SP41_206_X1Y2_3145,Unknown 62 | BaselTMA_SP41_141_X11Y2_848,Unknown 63 | BaselTMA_SP41_11_X13Y7_4100,Unknown 64 | BaselTMA_SP41_101_X10Y8_1777,Unknown 65 | BaselTMA_SP41_186_X5Y4_783,Unknown 66 | BaselTMA_SP41_186_X5Y4_2069,Unknown 67 | BaselTMA_SP41_141_X11Y2_2159,Unknown 68 | BaselTMA_SP41_129_X7Y8_1460,Unknown 69 | BaselTMA_SP41_11_X13Y7_4434,Unknown 70 | BaselTMA_SP41_203_X8Y8_1690,Unknown 71 | BaselTMA_SP41_191_X15Y7_3242,Unknown 72 | BaselTMA_SP41_106_X2Y9_1894,Unknown 73 | BaselTMA_SP41_112_X5Y8_1944,Unknown 74 | BaselTMA_SP41_11_X13Y7_2486,Unknown 75 | BaselTMA_SP41_100_X15Y5_2376,Unknown 76 | BaselTMA_SP41_134_X3Y8_1484,Unknown 77 | BaselTMA_SP41_14_X1Y8_978,Unknown 78 | BaselTMA_SP41_203_X8Y8_2532,Unknown 79 | BaselTMA_SP41_186_X5Y4_1448,Unknown 80 | BaselTMA_SP41_133_X3Y6_3169,Unknown 81 | BaselTMA_SP41_126_X14Y7_637,Unknown 82 | BaselTMA_SP41_153_X7Y5_316,Unknown 83 | BaselTMA_SP41_100_X15Y5_3182,Unknown 84 | BaselTMA_SP41_104_X8Y4_175_1418,Unknown 85 | BaselTMA_SP41_191_X15Y7_4,Unknown 86 | BaselTMA_SP41_126_X14Y7_390,Unknown 87 | BaselTMA_SP41_135_X8Y5_15,Unknown 88 | BaselTMA_SP41_135_X8Y5_3269,Unknown 89 | BaselTMA_SP41_186_X5Y4_2055,Unknown 90 | BaselTMA_SP41_166_X15Y4_2203,Unknown 91 | BaselTMA_SP41_100_X15Y5_1530,Unknown 92 | BaselTMA_SP41_186_X6Y4_501,Unknown 93 | BaselTMA_SP41_133_X3Y6_3842,Unknown 94 | BaselTMA_SP41_14_X1Y8_294,Unknown 95 | BaselTMA_SP41_101_X10Y8_1109,Unknown 96 | BaselTMA_SP41_165_X5Y6_151,Unknown 97 | BaselTMA_SP41_114_X13Y4_1057,Unknown 98 | BaselTMA_SP41_141_X11Y2_2596,Unknown 99 | BaselTMA_SP41_100_X15Y5_170,Unknown 100 | BaselTMA_SP41_14_X1Y8_2604,Unknown 101 | BaselTMA_SP41_186_X5Y4_81,Unknown 102 | -------------------------------------------------------------------------------- /docs/tutorials/notebooks/img/celltype_protein_cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camlab-bioml/astir/61fce25cff8baefcd4102d4f7bd990064740b0a6/docs/tutorials/notebooks/img/celltype_protein_cluster.png -------------------------------------------------------------------------------- /docs/tutorials/notebooks/img/hdf5_schematics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camlab-bioml/astir/61fce25cff8baefcd4102d4f7bd990064740b0a6/docs/tutorials/notebooks/img/hdf5_schematics.png -------------------------------------------------------------------------------- /docs/tutorials/notebooks/img/hierarchical_celltype_cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camlab-bioml/astir/61fce25cff8baefcd4102d4f7bd990064740b0a6/docs/tutorials/notebooks/img/hierarchical_celltype_cluster.png -------------------------------------------------------------------------------- /envs/imc.yml: -------------------------------------------------------------------------------- 1 | name: imc 2 | channels: 3 | - bioconda 4 | - pytorch 5 | - anaconda 6 | - conda-forge 7 | - defaults 8 | dependencies: 9 | - _libgcc_mutex=0.1=conda_forge 10 | - _openmp_mutex=4.5=1_llvm 11 | - aioeasywebdav=2.4.0=py38_1000 12 | - aiohttp=3.6.2=py38h516909a_0 13 | - appdirs=1.4.3=py_1 14 | - async-timeout=3.0.1=py_1000 15 | - attrs=19.3.0=py_0 16 | - backcall=0.1.0=py38_0 17 | - bcrypt=3.1.7=py38h1e0a361_1 18 | - blas=1.0=mkl 19 | - boto3=1.12.46=pyh9f0ad1d_0 20 | - botocore=1.15.46=pyh9f0ad1d_0 21 | - brotlipy=0.7.0=py38h1e0a361_1000 22 | - bzip2=1.0.8=h516909a_2 23 | - ca-certificates=2020.4.5.1=hecc5488_0 24 | - cachetools=3.1.1=py_0 25 | - cairo=1.16.0=hcf35c78_1003 26 | - certifi=2020.4.5.1=py38h32f6830_0 27 | - cffi=1.14.0=py38hd463f26_0 28 | - chardet=3.0.4=py38h32f6830_1006 29 | - configargparse=1.2.3=pyh9f0ad1d_0 30 | - crc32c=2.0=py38h516909a_0 31 | - cryptography=2.8=py38h766eaa4_2 32 | - cudatoolkit=10.2.89=hfd86e86_0 33 | - datrie=0.8.2=py38h1e0a361_0 34 | - decorator=4.4.2=py_0 35 | - docutils=0.15.2=py38_0 36 | - dropbox=9.4.0=py_0 37 | - expat=2.2.9=he1b5a44_2 38 | - fftw=3.3.8=mpi_mpich_h3f9e1be_1010 39 | - filechunkio=1.8=py_2 40 | - fontconfig=2.13.1=h86ecdb6_1001 41 | - freetype=2.10.1=he06d7ca_0 42 | - ftputil=3.4=py_0 43 | - gdk-pixbuf=2.36.12=h3f25603_1005 44 | - gettext=0.19.8.1=hc5be6a0_1002 45 | - ghostscript=9.22=hf484d3e_1001 46 | - giflib=5.1.7=h516909a_1 47 | - gitdb=4.0.4=py_0 48 | - gitpython=3.1.1=py_0 49 | - glib=2.64.2=h6f030ca_0 50 | - gobject-introspection=1.58.2=py38h03d966d_1004 51 | - google-api-core=1.17.0=py38h32f6830_0 52 | - google-auth=1.14.1=pyh9f0ad1d_0 53 | - google-cloud-core=1.3.0=py_0 54 | - google-cloud-storage=1.28.0=pyh9f0ad1d_0 55 | - google-resumable-media=0.5.0=py_1 56 | - googleapis-common-protos=1.51.0=py38h32f6830_2 57 | - graphite2=1.3.13=he1b5a44_1001 58 | - graphviz=2.38.0=hf68f40c_1011 59 | - harfbuzz=2.4.0=h9f30f68_3 60 | - icu=64.2=he1b5a44_1 61 | - idna=2.9=py_1 62 | - imagemagick=7.0.8_54=pl526h39023e4_0 63 | - importlib-metadata=1.6.0=py38h32f6830_0 64 | - importlib_metadata=1.6.0=0 65 | - ipython=7.13.0=py38h5ca1d4c_0 66 | - ipython_genutils=0.2.0=py_1 67 | - jbig=2.1=h516909a_2002 68 | - jedi=0.16.0=py38_1 69 | - jinja2=2.11.2=pyh9f0ad1d_0 70 | - jmespath=0.9.5=py_0 71 | - joblib=0.14.1=py_0 72 | - jpeg=9c=h14c3975_1001 73 | - jsonschema=3.2.0=py38h32f6830_1 74 | - jupyter_core=4.6.3=py38h32f6830_1 75 | - ld_impl_linux-64=2.34=h53a641e_0 76 | - libblas=3.8.0=14_mkl 77 | - libcblas=3.8.0=14_mkl 78 | - libcroco=0.6.13=h8d621e5_1 79 | - libffi=3.2.1=he1b5a44_1007 80 | - libgcc-ng=9.2.0=h24d8f2e_2 81 | - libgfortran-ng=7.3.0=hdf63c60_5 82 | - libiconv=1.15=h516909a_1006 83 | - liblapack=3.8.0=14_mkl 84 | - libpng=1.6.37=hed695b0_1 85 | - libprotobuf=3.11.4=h8b12597_0 86 | - librsvg=2.44.14=h11c8777_0 87 | - libstdcxx-ng=9.2.0=hdf63c60_2 88 | - libtiff=4.1.0=hc3755c2_3 89 | - libtool=2.4.6=h14c3975_1002 90 | - libuuid=2.32.1=h14c3975_1000 91 | - libwebp=1.0.2=hf4e8a37_4 92 | - libxcb=1.13=h14c3975_1002 93 | - libxml2=2.9.10=hee79883_0 94 | - llvm-openmp=10.0.0=hc9558a2_0 95 | - lz4-c=1.9.2=he1b5a44_0 96 | - markupsafe=1.1.1=py38h1e0a361_1 97 | - mkl=2019.5=281 98 | - mkl-service=2.3.0=py38h516909a_0 99 | - mkl_fft=1.1.0=py38hc1659b7_1 100 | - mkl_random=1.1.0=py38hb3f55d8_0 101 | - mpi=1.0=mpich 102 | - mpich=3.3.2=hc856adb_0 103 | - multidict=4.7.5=py38h516909a_0 104 | - nbformat=5.0.6=py_0 105 | - ncurses=6.1=hf484d3e_1002 106 | - networkx=2.4=py_1 107 | - ninja=1.10.0=hc9558a2_0 108 | - numpy=1.18.1=py38h4f9e942_0 109 | - numpy-base=1.18.1=py38hde5b4d6_1 110 | - openjpeg=2.3.1=h981e76c_3 111 | - openssl=1.1.1g=h516909a_0 112 | - pandas=1.0.3=py38hcb8c335_1 113 | - pango=1.40.14=he7ab937_1005 114 | - paramiko=2.7.1=py38_0 115 | - parso=0.6.2=py_0 116 | - pcre=8.44=he1b5a44_0 117 | - perl=5.26.2=h516909a_1006 118 | - pexpect=4.8.0=py38_0 119 | - pickleshare=0.7.5=py38_1000 120 | - pip=20.0.2=py_2 121 | - pixman=0.38.0=h516909a_1003 122 | - pkg-config=0.29.2=h516909a_1006 123 | - prettytable=0.7.2=py_3 124 | - prompt-toolkit=3.0.4=py_0 125 | - prompt_toolkit=3.0.4=0 126 | - protobuf=3.11.4=py38h950e882_1 127 | - psutil=5.7.0=py38h1e0a361_1 128 | - pthread-stubs=0.4=h14c3975_1001 129 | - ptyprocess=0.6.0=py38_0 130 | - pyasn1=0.4.8=py_0 131 | - pyasn1-modules=0.2.7=py_0 132 | - pycparser=2.20=py_0 133 | - pygments=2.6.1=py_0 134 | - pygraphviz=1.5=py38h1e0a361_1002 135 | - pynacl=1.3.0=py38h516909a_1001 136 | - pyopenssl=19.1.0=py_1 137 | - pyrsistent=0.16.0=py38h1e0a361_0 138 | - pysftp=0.2.9=py_1 139 | - pysocks=1.7.1=py38h32f6830_1 140 | - python=3.8.2=he5300dc_7_cpython 141 | - python-dateutil=2.8.1=py_0 142 | - python-irodsclient=0.8.2=py_0 143 | - python_abi=3.8=1_cp38 144 | - pytorch=1.5.0=py3.8_cuda10.2.89_cudnn7.6.5_0 145 | - pytz=2019.3=py_0 146 | - pyyaml=5.3.1=py38h1e0a361_0 147 | - ratelimiter=1.2.0=py38_1000 148 | - readline=8.0=hf8c457e_0 149 | - requests=2.23.0=pyh8c360ce_2 150 | - rsa=4.0=py_0 151 | - s3transfer=0.3.3=py38h32f6830_1 152 | - scikit-learn=0.22.2.post1=py38hcdab131_0 153 | - scipy=1.4.1=py38h18bccfc_3 154 | - setuptools=46.1.3=py38h32f6830_0 155 | - six=1.14.0=py_1 156 | - smmap=3.0.2=pyh9f0ad1d_0 157 | - snakemake=5.15.0=0 158 | - snakemake-minimal=5.15.0=py_0 159 | - sqlite=3.30.1=hcee41ef_0 160 | - tk=8.6.10=hed695b0_0 161 | - toposort=1.5=py_3 162 | - traitlets=4.3.3=py38h32f6830_1 163 | - urllib3=1.25.9=py_0 164 | - wcwidth=0.1.9=py_0 165 | - wheel=0.34.2=py_1 166 | - wrapt=1.12.1=py38h1e0a361_1 167 | - xmlrunner=1.7.7=py_0 168 | - xorg-kbproto=1.0.7=h14c3975_1002 169 | - xorg-libice=1.0.10=h516909a_0 170 | - xorg-libsm=1.2.3=h84519dc_1000 171 | - xorg-libx11=1.6.9=h516909a_0 172 | - xorg-libxau=1.0.9=h14c3975_0 173 | - xorg-libxdmcp=1.1.3=h516909a_0 174 | - xorg-libxext=1.3.4=h516909a_0 175 | - xorg-libxpm=3.5.13=h516909a_0 176 | - xorg-libxrender=0.9.10=h516909a_1002 177 | - xorg-libxt=1.1.5=h516909a_1003 178 | - xorg-renderproto=0.11.1=h14c3975_1002 179 | - xorg-xextproto=7.3.0=h14c3975_1002 180 | - xorg-xproto=7.0.31=h14c3975_1007 181 | - xz=5.2.5=h516909a_0 182 | - yaml=0.2.4=h516909a_0 183 | - yarl=1.3.0=py38h516909a_1000 184 | - zipp=3.1.0=py_0 185 | - zlib=1.2.11=h516909a_1006 186 | - zstd=1.4.4=h6597ccf_3 187 | # prefix: /home/ltri/campbell/kcampbel/.conda/envs/imc 188 | prefix: /home/ltri/campbell/kcampbel/.conda/envs/imc 189 | 190 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: astir 2 | site_description: ``astir`` is a modelling framework for the assignment of cell type and cell state across a range of single-cell technologies such as Imaging Mass Cytometry (IMC). ``astir`` is built using `pytorch `_ and uses recognition networks for fast minibatch stochastic variational inference. 3 | site_url: https://https://github.com/camlab-bioml/astir/kieranrcampbell/astir 4 | 5 | theme: 6 | name: 'material' 7 | 8 | repo_name: kieranrcampbell/astir 9 | repo_url: https://https://github.com/camlab-bioml/astir/kieranrcampbell/astir 10 | edit_uri: '' 11 | 12 | nav: 13 | - astir: 'index.md' 14 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | disallow_untyped_defs = True 3 | ignore_missing_imports = True 4 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | error::UserWarning 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | pandas 3 | numpy 4 | nbformat 5 | pyyaml 6 | scikit-learn 7 | argparse 8 | matplotlib 9 | loompy 10 | tqdm 11 | anndata 12 | rootpath 13 | nbconvert 14 | nbformat 15 | seaborn 16 | jupyter_client 17 | ipykernel 18 | ipython 19 | h5py 20 | tbb 21 | FlowCytometryTools 22 | fastcluster 23 | sphinx 24 | sphinx-autodoc-typehints 25 | sphinx-rtd-theme 26 | autodocsumm 27 | nbsphinx 28 | nbsphinx-link 29 | tables 30 | typeguard 31 | mypy 32 | scanpy 33 | -------------------------------------------------------------------------------- /scripts/build-docs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | python -m mkdocs build 4 | 5 | cp ./docs/index.rst ./README.rst 6 | -------------------------------------------------------------------------------- /scripts/docs-live.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env bash 3 | 4 | set -e 5 | 6 | mkdocs serve --dev-addr 0.0.0.0:8008 7 | -------------------------------------------------------------------------------- /scripts/format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | set -x 3 | 4 | autoflake --remove-all-unused-imports --recursive --remove-unused-variables --in-place astir tests --exclude=__init__.py 5 | black astir tests 6 | isort --multi-line=3 --trailing-comma --force-grid-wrap=0 --combine-as --line-width 88 --recursive --thirdparty astir --apply astir tests 7 | -------------------------------------------------------------------------------- /scripts/lint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -x 5 | 6 | # mypy astir --disallow-untyped-defs 7 | # black astir tests --check 8 | black tests --check 9 | black astir/astir.py 10 | black astir tests --check 11 | # isort --multi-line=3 --trailing-comma --force-grid-wrap=0 --combine-as --line-width 88 --recursive --check-only --thirdparty astir astir tests 12 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -x 5 | 6 | pytest --cov=astir --cov=tests --cov-report=term-missing ${@} 7 | bash ./scripts/lint.sh 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.rst", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="astir", 8 | version="0.1.2", 9 | author="Jinyu Hou, Sunyun Lee, Michael Geuenich, Kieran Campbell", 10 | author_email="kierancampbell@lunenfeld.ca", 11 | description=" ", 12 | long_description=long_description, 13 | # long_description_content_type="text/markdown", 14 | url="https://github.com/camlab-bioml/astir", 15 | packages=setuptools.find_packages(), 16 | classifiers=[ 17 | "Programming Language :: Python :: 3", 18 | "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", 19 | "Operating System :: OS Independent", 20 | "Topic :: Scientific/Engineering :: Bio-Informatics", 21 | ], 22 | license="GPLv2", 23 | install_requires=[ 24 | "torch", 25 | "pandas", 26 | "numpy", 27 | "nbformat", 28 | "pyyaml", 29 | "scikit-learn", 30 | "argparse", 31 | "matplotlib", 32 | "loompy", 33 | "tqdm", 34 | "anndata", 35 | "rootpath", 36 | "nbconvert", 37 | "nbformat", 38 | "seaborn", 39 | "jupyter_client", 40 | "ipykernel", 41 | "ipython", 42 | "h5py", 43 | "FlowCytometryTools", 44 | "fastcluster", 45 | "sphinx", 46 | "sphinx_autodoc_typehints", 47 | "sphinx_rtd_theme", 48 | "autodocsumm", 49 | "nbsphinx", 50 | "nbsphinx_link", 51 | "tables", 52 | "typeguard", 53 | "mypy", 54 | ], 55 | project_urls={ 56 | "Documentation": "https://astir.readthedocs.io/en/latest/", 57 | "Source Code": "https://github.com/camlab-bioml/astir", 58 | }, 59 | include_package_data=True, 60 | zip_safe=False, 61 | # test_suite="nose.collector", 62 | # test_require=["nose"], 63 | scripts=["bin/astir"], 64 | ) 65 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camlab-bioml/astir/61fce25cff8baefcd4102d4f7bd990064740b0a6/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camlab-bioml/astir/61fce25cff8baefcd4102d4f7bd990064740b0a6/tests/conftest.py -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camlab-bioml/astir/61fce25cff8baefcd4102d4f7bd990064740b0a6/tests/models/__init__.py -------------------------------------------------------------------------------- /tests/models/test_cellstate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | from unittest import TestCase 4 | 5 | import pandas as pd 6 | import torch 7 | import yaml 8 | 9 | from astir.data import SCDataset, from_csv_yaml 10 | from astir.models import CellStateModel 11 | 12 | 13 | class TestCellStateModel(TestCase): 14 | """Unittest class for CellStateModel class 15 | 16 | This class assumes that all data initializating functions in Astir class 17 | are working 18 | """ 19 | 20 | def __init__(self, *args, **kwargs): 21 | super(TestCellStateModel, self).__init__(*args, **kwargs) 22 | 23 | warnings.filterwarnings("ignore", category=UserWarning) 24 | 25 | self.expr_csv_file = os.path.join( 26 | os.path.dirname(__file__), "../test-data/sce.csv" 27 | ) 28 | self.marker_yaml_file = os.path.join( 29 | os.path.dirname(__file__), "../test-data/jackson-2020-markers.yml" 30 | ) 31 | 32 | input_expr = pd.read_csv(self.expr_csv_file) 33 | with open(self.marker_yaml_file, "r") as stream: 34 | marker_dict = yaml.safe_load(stream) 35 | 36 | state_dict = marker_dict["cell_states"] 37 | self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 38 | 39 | self._dset = SCDataset( 40 | include_other_column=False, 41 | expr_input=input_expr, 42 | marker_dict=state_dict, 43 | design=None, 44 | dtype=torch.float32, 45 | device=self._device, 46 | ) 47 | 48 | self.model = CellStateModel( 49 | dset=self._dset, random_seed=42, dtype=torch.float32, device=self._device 50 | ) 51 | 52 | self.model.fit(max_epochs=1) 53 | self.data = self.model.get_data() 54 | self.variables = self.model.get_variables() 55 | 56 | def test_basic_instance_creation(self): 57 | """Testing if the instance is created or not""" 58 | self.assertIsInstance(self.model, CellStateModel) 59 | 60 | def test_dtype(self): 61 | params = list(self.data.values()) + list(self.variables.values()) 62 | comp = [ss.dtype == torch.float32 for ss in params] 63 | self.assertTrue(all(comp)) 64 | -------------------------------------------------------------------------------- /tests/models/test_celltype.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | from unittest import TestCase 4 | 5 | import pandas as pd 6 | import torch 7 | import yaml 8 | 9 | from astir.data import SCDataset, from_csv_yaml 10 | from astir.models import CellTypeModel 11 | 12 | 13 | class TestCellTypeModel(TestCase): 14 | """Unittest class for CellTypeModel class 15 | 16 | This class assumes that all data initializating functions in Astir class 17 | are working 18 | """ 19 | 20 | def __init__(self, *args, **kwargs): 21 | super(TestCellTypeModel, self).__init__(*args, **kwargs) 22 | 23 | self.expr_csv_file = os.path.join( 24 | os.path.dirname(__file__), "../test-data/sce.csv" 25 | ) 26 | self.marker_yaml_file = os.path.join( 27 | os.path.dirname(__file__), "../test-data/jackson-2020-markers.yml" 28 | ) 29 | 30 | self.input_expr = pd.read_csv(self.expr_csv_file) 31 | with open(self.marker_yaml_file, "r") as stream: 32 | marker_dict = yaml.safe_load(stream) 33 | 34 | self.type_dict = marker_dict["cell_types"] 35 | self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 36 | 37 | self._dset = SCDataset( 38 | include_other_column=True, 39 | expr_input=self.input_expr, 40 | marker_dict=self.type_dict, 41 | design=None, 42 | device=self._device, 43 | ) 44 | 45 | self.model = CellTypeModel(dset=self._dset, random_seed=42, device=self._device) 46 | self.model.fit(max_epochs=1) 47 | 48 | def test_basic_instance_creation(self): 49 | """Testing if the instance is created or not""" 50 | self.assertIsInstance(self.model, CellTypeModel) 51 | 52 | def test_dtype32(self): 53 | ds = SCDataset( 54 | include_other_column=True, 55 | expr_input=self.input_expr, 56 | marker_dict=self.type_dict, 57 | design=None, 58 | dtype=torch.float32, 59 | ) 60 | m = CellTypeModel( 61 | dset=ds, random_seed=42, dtype=torch.float32, device=self._device 62 | ) 63 | m.fit(max_epochs=1) 64 | data = m.get_data() 65 | variables = m.get_variables() 66 | params = list(data.values()) + list(variables.values()) 67 | comp = [ss.dtype == torch.float32 for ss in params] 68 | self.assertTrue(all(comp)) 69 | 70 | def test_dtype64(self): 71 | ds = SCDataset( 72 | include_other_column=True, 73 | expr_input=self.input_expr, 74 | marker_dict=self.type_dict, 75 | design=None, 76 | dtype=torch.float64, 77 | ) 78 | m = CellTypeModel( 79 | dset=ds, random_seed=42, dtype=torch.float64, device=self._device 80 | ) 81 | m.fit(max_epochs=1) 82 | data = m.get_data() 83 | variables = m.get_variables() 84 | params = list(data.values()) + list(variables.values()) 85 | comp = [ss.dtype == torch.float64 for ss in params] 86 | self.assertTrue(all(comp)) 87 | 88 | def test_trainability(self): 89 | s = [param.requires_grad for param in self.model._recog.parameters()] 90 | self.assertTrue(all(s)) 91 | -------------------------------------------------------------------------------- /tests/models/test_scdataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | import pandas as pd 5 | import torch 6 | import yaml 7 | 8 | from astir.data import SCDataset 9 | 10 | 11 | class TestSCDataset(unittest.TestCase): 12 | def __init__(self, *args, **kwargs): 13 | super(TestSCDataset, self).__init__(*args, **kwargs) 14 | 15 | self.expr_csv_file = os.path.join( 16 | os.path.dirname(__file__), "../test-data/test_data.csv" 17 | ) 18 | 19 | self.marker_yaml_file = os.path.join( 20 | os.path.dirname(__file__), "../test-data/jackson-2020-markers.yml" 21 | ) 22 | 23 | self.design_file = os.path.join( 24 | os.path.dirname(__file__), "../test-data/design.csv" 25 | ) 26 | 27 | # Initializing expected values for unittesting 28 | # self._param_init_expr_pd() 29 | self.input_expr = pd.read_csv(self.expr_csv_file, index_col=0) 30 | with open(self.marker_yaml_file, "r") as stream: 31 | self.marker_dict = yaml.safe_load(stream) 32 | 33 | self.state_markers = self.marker_dict["cell_states"] 34 | 35 | self.marker_genes = list( 36 | set([l for s in self.state_markers.values() for l in s]) 37 | ) 38 | 39 | self.expr = self.input_expr[self.marker_genes] 40 | 41 | self.design = pd.read_csv(self.design_file, index_col=0) 42 | 43 | self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 44 | 45 | # Initializing the actual model 46 | self.ds = SCDataset( 47 | include_other_column=False, 48 | expr_input=self.input_expr, 49 | marker_dict=self.state_markers, 50 | design=None, 51 | dtype=torch.float32, 52 | device=self._device, 53 | ) 54 | 55 | def _expr_input_tuple(self): 56 | pass 57 | 58 | def test_basic_instance_creation(self): 59 | 60 | self.assertIsInstance(self.ds, SCDataset) 61 | 62 | def test_marker_genes(self): 63 | """Testing if _m_proteins field is declared correctly 64 | Also tests get_features() and get_n_features() methods 65 | """ 66 | expected_gene_count = len(self.marker_genes) 67 | actual_gene_count = self.ds.get_n_features() 68 | 69 | expected_gene_names = sorted(self.marker_genes) 70 | actual_gene_names = sorted(self.ds.get_features()) 71 | 72 | self.assertEqual(expected_gene_count, actual_gene_count) 73 | 74 | self.assertEqual(expected_gene_names, actual_gene_names) 75 | 76 | def test_len_constant_N(self): 77 | self.assertEqual(self.expr.shape[0], len(self.ds)) 78 | 79 | def test_get_classes(self): 80 | """Testing if _classes field is declared correctly 81 | Also tests get_classes() and get_n_classes() methods 82 | """ 83 | expected_class_count = len(self.state_markers.keys()) 84 | actual_class_count = self.ds.get_n_classes() 85 | 86 | expected_classes = sorted(self.state_markers.keys()) 87 | actual_classes = sorted(self.ds.get_classes()) 88 | 89 | self.assertEqual(expected_class_count, actual_class_count) 90 | 91 | self.assertEqual(expected_classes, actual_classes) 92 | 93 | def test_marker_mat_not_include_other(self): 94 | """ 95 | Also tests constant G and C 96 | """ 97 | expected_G = len(self.marker_genes) 98 | # actual_G = self.ds.get_protein_amount() 99 | 100 | expected_C = len(self.state_markers) 101 | # actual_C = self.ds.get_class_amount() 102 | 103 | expected_marker_mat = torch.zeros((expected_G, expected_C)).to(self._device) 104 | actual_marker_mat = self.ds.get_marker_mat() 105 | 106 | for g, protein in enumerate(sorted(self.marker_genes)): 107 | for c, state in enumerate(self.state_markers): 108 | if protein in self.state_markers[state]: 109 | expected_marker_mat[g, c] = 1.0 110 | 111 | self.assertTrue( 112 | torch.all(torch.eq(expected_marker_mat, actual_marker_mat)).item() 113 | ) 114 | 115 | def test_cell_names(self): 116 | 117 | expected_cell_names = sorted(self.expr.index) 118 | actual_cell_names = sorted(self.ds.get_cell_names()) 119 | 120 | self.assertTrue(expected_cell_names, actual_cell_names) 121 | 122 | # To implement: significant 123 | def test_marker_mat_include_other(self): 124 | self.type_markers = self.marker_dict["cell_types"] 125 | self.marker_genes = list( 126 | set([l for s in self.type_markers.values() for l in s]) 127 | ) 128 | 129 | self.ds = SCDataset( 130 | include_other_column=True, 131 | expr_input=self.input_expr, 132 | marker_dict=self.type_markers, 133 | design=None, 134 | dtype=torch.float32, 135 | device=self._device, 136 | ) 137 | 138 | G = self.ds.get_n_features() 139 | C = self.ds.get_n_classes() 140 | 141 | expected_marker_mat = torch.zeros((G, C + 1)).to(self._device) 142 | actual_marker_mat = self.ds.get_marker_mat() 143 | for g, feature in enumerate(sorted(self.marker_genes)): 144 | for c, cell_class in enumerate(self.type_markers): 145 | if feature in self.type_markers[cell_class]: 146 | expected_marker_mat[g, c] = 1.0 147 | 148 | self.assertTrue( 149 | torch.all( 150 | torch.eq(expected_marker_mat, actual_marker_mat).to(self._device) 151 | ).item() 152 | ) 153 | 154 | def test_fix_design_none(self): 155 | 156 | expected_design = torch.ones((len(self.ds), 1)).to( 157 | device=self._device, dtype=torch.float64 158 | ) 159 | actual_design = self.ds.get_design() 160 | 161 | self.assertTrue( 162 | torch.all(torch.eq(expected_design, actual_design).to(self._device)).item() 163 | ) 164 | 165 | def test_fix_design_not_none(self): 166 | self.design = self.design.to_numpy() 167 | 168 | self.ds = SCDataset( 169 | include_other_column=False, 170 | expr_input=self.input_expr, 171 | marker_dict=self.state_markers, 172 | design=self.design, 173 | dtype=torch.float64, 174 | device=self._device, 175 | ) 176 | 177 | expected_design = torch.from_numpy(self.design).to( 178 | device=self._device, dtype=torch.float64 179 | ) 180 | actual_design = self.ds.get_design() 181 | 182 | self.assertTrue( 183 | torch.all(torch.eq(expected_design, actual_design).to(self._device)).item() 184 | ) 185 | 186 | def test_dtype(self): 187 | comp = [] 188 | comp.append(self.ds.get_exprs().dtype == torch.float32) 189 | comp.append(self.ds.get_design().dtype == torch.float32) 190 | comp.append(self.ds.get_mu().dtype == torch.float32) 191 | comp.append(self.ds.get_sigma().dtype == torch.float32) 192 | self.assertTrue(all(comp)) 193 | 194 | 195 | if __name__ == "__main__": 196 | unittest.main() 197 | -------------------------------------------------------------------------------- /tests/output: -------------------------------------------------------------------------------- 1 | ,RTK_signalling,proliferation,mTOR_signalling,apoptosis 2 | BaselTMA_SP41_126_X14Y7_1,0.13713952543366864,0.7431854267793516,0.6385685612712785,0.45868812958069766 3 | BaselTMA_SP41_126_X14Y7_2,0.20194524055993684,0.6728100274445307,0.6777237901975813,0.46859389894261283 4 | BaselTMA_SP41_126_X14Y7_3,0.35749183866205975,0.5053879581474894,0.7689809823932953,0.4905463079256117 5 | BaselTMA_SP41_126_X14Y7_4,0.38603729299672335,0.38299923158720317,0.9550026480907963,0.6088960260464755 6 | BaselTMA_SP41_126_X14Y7_5,0.17193790217650923,0.6078047273781626,0.8612526541333863,0.7027014308708499 7 | BaselTMA_SP41_126_X14Y7_6,0.7966653533215745,0.1979767103672266,0.7276919540663056,0.35355911086799013 8 | BaselTMA_SP41_126_X14Y7_7,0.12638002812597623,0.6618243013557779,0.93254462273618,1.0 9 | BaselTMA_SP41_126_X14Y7_8,0.14223221834019986,0.6141810204977661,0.927534169832808,0.9338706879847776 10 | BaselTMA_SP41_126_X14Y7_9,0.3982996698751394,0.40453404229264606,0.8710225054300793,0.7150574457285224 11 | BaselTMA_SP41_126_X14Y7_10,0.6715965727405956,0.2325866502034389,0.8387927535674671,0.46040092753236284 12 | BaselTMA_SP41_126_X14Y7_11,0.5655219792109464,0.4026595202086594,0.5922827749935371,0.7800396801337 13 | BaselTMA_SP41_126_X14Y7_12,0.19299595716465157,0.5388155151632528,0.924100338273469,0.7871460795082657 14 | BaselTMA_SP41_126_X14Y7_13,0.5231731011644818,0.31699570246539327,0.888101299084746,0.5302829354937058 15 | BaselTMA_SP41_126_X14Y7_14,0.48600694517846016,0.32133880446159435,0.9316549285935142,0.5689477602641079 16 | BaselTMA_SP41_126_X14Y7_15,0.5195523350739091,0.3884085727784476,0.7641425975206431,0.4488135887651087 17 | BaselTMA_SP41_126_X14Y7_16,0.29881890921449644,0.464997828145267,0.8918904083211739,0.7804480271998768 18 | BaselTMA_SP41_126_X14Y7_17,0.6568150227239202,0.23608211328426684,0.8530690846538946,0.47382377920738283 19 | BaselTMA_SP41_126_X14Y7_18,0.595886536874125,0.24249583159594218,0.9273076086914471,0.5398401018379366 20 | BaselTMA_SP41_126_X14Y7_19,0.7818320427700235,0.20670753980888396,0.6950746277191855,0.5805231551131481 21 | BaselTMA_SP41_126_X14Y7_20,1.0,0.0,0.8199158330132749,0.369319568525722 22 | BaselTMA_SP41_126_X14Y7_21,0.9770925014260556,0.07348678633887414,0.6941889986216969,0.39945664876659304 23 | BaselTMA_SP41_126_X14Y7_22,0.4225074564137305,0.43087205201666773,0.7755652102875286,0.7120276137572694 24 | BaselTMA_SP41_126_X14Y7_23,0.6507835993367126,0.2330822168870987,0.8674166474993803,0.48521853099328377 25 | BaselTMA_SP41_126_X14Y7_24,0.23570287378963614,0.7580086917575279,0.47584105711983365,0.32491659144533885 26 | BaselTMA_SP41_126_X14Y7_25,0.11344820250662127,0.9659352119946879,0.26486841459728766,0.2144230628204661 27 | BaselTMA_SP41_126_X14Y7_26,0.8547781348781726,0.10100711314560237,0.8318138594142072,0.412061700327928 28 | BaselTMA_SP41_126_X14Y7_27,0.3100146229634679,0.7229038909270428,0.43891376959066053,0.28211632237898376 29 | BaselTMA_SP41_126_X14Y7_28,0.3171023911038444,0.410509223731622,1.0,0.6565087695789374 30 | BaselTMA_SP41_126_X14Y7_29,0.13552675379539678,0.5593742673064103,0.935019662886387,0.6585201464678092 31 | BaselTMA_SP41_126_X14Y7_30,0.458806002331515,0.49762978006292086,0.5351307050657678,0.9388277237920843 32 | BaselTMA_SP41_126_X14Y7_31,0.30829016253800334,0.4651670033222812,0.8751297689672386,0.7864084254122327 33 | BaselTMA_SP41_126_X14Y7_32,0.14523750570761948,0.8548112442610558,0.42576587422206624,0.3150810814805807 34 | BaselTMA_SP41_126_X14Y7_33,0.3165940153760783,0.6294823327938511,0.5989245116962811,0.3869715322541496 35 | BaselTMA_SP41_126_X14Y7_34,0.4162033163623539,0.39020085914116676,0.8982663929109734,0.5623369735547743 36 | BaselTMA_SP41_126_X14Y7_35,0.42574362851847686,0.46705196826595624,0.7449494353909594,0.4572749812401152 37 | BaselTMA_SP41_126_X14Y7_36,0.319578329318082,0.4326038130644537,0.9539397877222642,0.6239374935428276 38 | BaselTMA_SP41_126_X14Y7_37,0.21453709493280992,0.9609150275924503,0.13486027649287047,0.10192460023777924 39 | BaselTMA_SP41_126_X14Y7_38,0.3108853831661479,0.44280768564973266,0.9436149597016474,0.6390591371421649 40 | BaselTMA_SP41_126_X14Y7_39,0.30930798182627045,0.4823277295927936,0.8355878112059111,0.7924255314847559 41 | BaselTMA_SP41_126_X14Y7_40,0.0,0.905435261296775,0.5314056323913237,0.42145198457824407 42 | BaselTMA_SP41_126_X14Y7_41,0.42552394751496486,0.42770611996996516,0.7544412204573957,0.8496403642489123 43 | BaselTMA_SP41_126_X14Y7_42,0.24666315191842458,0.5342364711629021,0.8699341717603589,0.5865129605066718 44 | BaselTMA_SP41_126_X14Y7_43,0.24863815058167063,0.5507176816473853,0.8381371085822537,0.5651884666052598 45 | BaselTMA_SP41_126_X14Y7_44,0.6975172046271325,0.24233641146291915,0.783183527483233,0.4156324571182694 46 | BaselTMA_SP41_126_X14Y7_45,0.43073704904383686,0.3838228691814483,0.8898923453440927,0.553071404493052 47 | BaselTMA_SP41_126_X14Y7_46,0.3775757053828658,0.5142066591512949,0.7252461715598448,0.4562062370518587 48 | BaselTMA_SP41_126_X14Y7_47,0.22243631604252012,0.6058557864925211,0.7716448759443886,0.5263252992485811 49 | BaselTMA_SP41_126_X14Y7_48,0.2607109487035584,1.0,0.0,0.0 50 | BaselTMA_SP41_126_X14Y7_49,0.3049109928278424,0.5321913712646148,0.7924753424413511,0.51951335256223 51 | -------------------------------------------------------------------------------- /tests/test-data/adata_small.h5ad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camlab-bioml/astir/61fce25cff8baefcd4102d4f7bd990064740b0a6/tests/test-data/adata_small.h5ad -------------------------------------------------------------------------------- /tests/test-data/bad_marker.yml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Cellstates: 5 | RTK_signalling: 6 | - DNA1 7 | - DNA2 8 | cell_growth: 9 | - Ki-67 10 | - phospho mTOR 11 | mTOR_signalling: 12 | - GATA3 13 | - phospho S6 14 | apoptosis: 15 | - cleaved PARP 16 | - Cleaved Caspase3 17 | 18 | 19 | -------------------------------------------------------------------------------- /tests/test-data/basel_100.loom: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camlab-bioml/astir/61fce25cff8baefcd4102d4f7bd990064740b0a6/tests/test-data/basel_100.loom -------------------------------------------------------------------------------- /tests/test-data/design.csv: -------------------------------------------------------------------------------- 1 | "","(Intercept)","sce$coreBaselTMA_SP41_101_X10Y8","sce$coreBaselTMA_SP41_104_X7Y4","sce$coreBaselTMA_SP41_104_X8Y4_175","sce$coreBaselTMA_SP41_104_X8Y4_233","sce$coreBaselTMA_SP41_106_X2Y9","sce$coreBaselTMA_SP41_11_X13Y7","sce$coreBaselTMA_SP41_112_X5Y8","sce$coreBaselTMA_SP41_114_X13Y4","sce$coreBaselTMA_SP41_114_X14Y4","sce$coreBaselTMA_SP41_117_X13Y3","sce$coreBaselTMA_SP41_117_X14Y3","sce$coreBaselTMA_SP41_126_X14Y7","sce$coreBaselTMA_SP41_129_X7Y8","sce$coreBaselTMA_SP41_133_X3Y6","sce$coreBaselTMA_SP41_134_X3Y8","sce$coreBaselTMA_SP41_135_X8Y5","sce$coreBaselTMA_SP41_14_X1Y8","sce$coreBaselTMA_SP41_141_X11Y2","sce$coreBaselTMA_SP41_141_X12Y2","sce$coreBaselTMA_SP41_153_X7Y5","sce$coreBaselTMA_SP41_159_X11Y4","sce$coreBaselTMA_SP41_159_X12Y4","sce$coreBaselTMA_SP41_165_X5Y6","sce$coreBaselTMA_SP41_166_X15Y4","sce$coreBaselTMA_SP41_177_X16Y5","sce$coreBaselTMA_SP41_18_X13Y5","sce$coreBaselTMA_SP41_186_X5Y4","sce$coreBaselTMA_SP41_186_X6Y4","sce$coreBaselTMA_SP41_187_X3Y2","sce$coreBaselTMA_SP41_187_X4Y2","sce$coreBaselTMA_SP41_191_X15Y7","sce$coreBaselTMA_SP41_196_X5Y7","sce$coreBaselTMA_SP41_20_X12Y5","sce$coreBaselTMA_SP41_203_X8Y8","sce$coreBaselTMA_SP41_204_X16Y4_237","sce$coreBaselTMA_SP41_204_X16Y4_238","sce$coreBaselTMA_SP41_206_X1Y2","sce$coreBaselTMA_SP41_206_X2Y2","sce$coreBaselTMA_SP41_211_X4Y6" 2 | "1",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0 3 | "2",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 4 | "3",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0 5 | "4",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 6 | "5",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 7 | "6",1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 8 | "7",1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 9 | "8",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0 10 | "9",1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 11 | "10",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 12 | "11",1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 13 | "12",1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 14 | "13",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 15 | "14",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 16 | "15",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 17 | "16",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 18 | "17",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 19 | "18",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0 20 | "19",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 21 | "20",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 22 | "21",1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 23 | "22",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 24 | "23",1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 25 | "24",1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 26 | "25",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 27 | "26",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 28 | "27",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0 29 | "28",1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 30 | "29",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0 31 | "30",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 32 | "31",1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 33 | "32",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 34 | "33",1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 35 | "34",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 36 | "35",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 37 | "36",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 38 | "37",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 39 | "38",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 40 | "39",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 41 | "40",1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 42 | "41",1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 43 | "42",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 44 | "43",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 45 | "44",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 46 | "45",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0 47 | "46",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 48 | "47",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0 49 | "48",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 50 | "49",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 51 | -------------------------------------------------------------------------------- /tests/test-data/jackson-2020-markers.yml: -------------------------------------------------------------------------------- 1 | 2 | cell_states: 3 | RTK_signalling: 4 | - Her2 5 | - EGFR 6 | proliferation: 7 | - Ki-67 8 | - phospho Histone 9 | mTOR_signalling: 10 | - phospho mTOR 11 | - phospho S6 12 | apoptosis: 13 | - cleaved PARP 14 | - Cleaved Caspase3 15 | 16 | cell_types: 17 | stromal: 18 | - Vimentin 19 | - Fibronectin 20 | B cells: 21 | - CD45 22 | - CD20 23 | T cells: 24 | - CD45 25 | - CD3 26 | macrophage: 27 | - CD45 28 | - CD68 29 | epithelial(basal): 30 | - E-Cadherin 31 | - pan Cytokeratin 32 | - Cytokeratin 5 33 | - Cytokeratin 14 34 | - Her2 35 | epithelial(luminal): 36 | - E-Cadherin 37 | - pan Cytokeratin 38 | - Cytokeratin 7 39 | - Cytokeratin 8/18 40 | - Cytokeratin 19 41 | - Cytokeratin 5 42 | - Her2 43 | 44 | hierarchy: 45 | epithelial_cells: 46 | - epithelial(luminal) 47 | - epithelial(basal) 48 | # - epithelial(other) 49 | immune cells: 50 | non-lymphocytes: 51 | - macrophage 52 | lymphocytes: 53 | - T cells 54 | - B cells 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /tests/test-data/test-dir-read/one.csv: -------------------------------------------------------------------------------- 1 | "","EGFR","Ruthenium_1","Ruthenium_2","Ruthenium_3","Ruthenium_4","Ruthenium_5","Ruthenium_6","Ruthenium_7","E-Cadherin","DNA1","DNA2","Rabbit IgG H L","GATA3","Histone H3 antibody 1","Ki-67","SMA","Vimentin","cleaved PARP","Cleaved Caspase3","Her2","p53","pan Cytokeratin","Cytokeratin 19","Progesterone Receptor A/B antibody 1","Progesterone Receptor A/B antibody 2","c-Myc","Fibronectin","Cytokeratin 14","Slug","CD20","vWF","CD31","Histone H3 antibody 2","Cytokeratin 5","CD44","CD45","CD68","CD3","Carbonic Anhydrase IX","Cytokeratin 8/18","Cytokeratin 7","Twist","phospho Histone","phospho mTOR","phospho S6" 2 | "BaselTMA_SP41_126_X14Y7_1",0.281752771745428,1.31958781810241,0.597380072662003,1.78286258012158,1.75782433204907,1.99185744340211,2.58056400999575,2.28716705396776,1.8143094838686,2.26163835871481,2.85745429700996,0.153254092409689,0.218156920379724,1.67452982248683,0.0415625002251949,0.0712772918353616,0.444139725057488,0.293131435903423,0.293131435903423,0.483006805598638,0,1.18751223171639,0.0799563776315769,0.122841765135889,0.122841765135889,0.243465420557963,1.03973438831891,0.134128155146678,0.246095547312871,0.207883573332857,0,0,0.49913252131538,0.178349656238537,3.80842587638293,0.0447332863514713,0.184805194048002,0,0.928929162714277,0.025525779641982,0.0434231703349215,0.209742440797169,0.137454250163813,0.572811188966434,0.215508336047807 3 | "BaselTMA_SP41_126_X14Y7_2",0.303016424879695,1.31958781810241,0.597380072662003,1.78286258012158,1.75782433204907,1.99185744340211,2.58056400999575,2.28716705396776,1.51768483657457,1.613059980226,1.93977298946158,0,0.104099530009894,1.30414953455425,0.258647854185918,0.179905035860049,0.270070346973278,0.269995556739188,0.269995556739188,0.513386095964132,0.124381164441986,0.749379200845966,0.0669218226368246,0.189137064182174,0.189137064182174,0.147830889471134,1.14764402398214,0.0269507329965758,0.117598975755152,0.0215064833013768,0,0,0.182321559277481,0.0811467853470927,3.37104347012115,0.0468019851802111,0.0804057172161878,0.110805851822127,0.752101441431739,0,0.032055860595542,0.108013160836267,0.0484275934259086,0.539647309997281,0.655731021108987 4 | "BaselTMA_SP41_126_X14Y7_3",0.252373591929453,1.31958781810241,0.597380072662003,1.78286258012158,1.75782433204907,1.99185744340211,2.58056400999575,2.28716705396776,1.24643337481527,2.13874433914606,2.75314619116723,0.0612817239818973,0.0994445250565181,1.45047307552165,0.081559900454294,0.166532782140787,0.233909456892897,0.253298162954306,0.253298162954306,0.633226423035353,0.0583059596412607,1.21652113650208,0.186294085811945,0.155385304368762,0.155385304368762,0.250264917703643,0.988906212339524,0.0235147624338673,0.108889136643846,0.00887829026542762,0.00700873857806561,0.00700873857806561,0.407286238353478,0.0761116154963944,3.28244078218969,0.0284985582815762,0.203248021274544,0.0206172021971442,0.740759201163638,0.0833109946128711,0.0815033267848188,0.119058055107812,0.0630969787835357,0.409734520196029,0.437844717513601 5 | "BaselTMA_SP41_126_X14Y7_4",0.397732141326238,1.30685199496848,0.534495793659021,1.67821741785772,1.75782433204907,1.9614302314342,2.52855073877333,2.18381402772155,1.83978462756571,1.81601501739998,2.33715322835657,0.0368180548997214,0.035844882193481,1.18364710505427,0.831303329536159,0.830879359804224,0.542361835868107,0.348377503475095,0.348377503475095,0.709272254696804,0.0861824834861172,1.35430267477242,0.346272876410815,0.241098688399882,0.241098688399882,0.165295478983948,0.842710446351632,0.114420092168108,0.0863638967806044,0.0530270737099543,0.0665261604158528,0.0665261604158528,0.258153476642932,0.164059309696648,3.72178336085638,0.0690533090513159,0.305199716511606,0.0602644884847739,1.0959675377634,0.184603159618805,0.131531313002247,0.160778092929216,0.0906664483203359,0.305717938537705,0.132236468695897 6 | "BaselTMA_SP41_126_X14Y7_5",0.426352363450394,1.17343905015891,0.597380072662003,1.58930314723782,1.38983901810637,1.78988748334799,2.34374306012192,2.12333449194922,1.61834734172774,1.35521448777051,1.81264185807688,0.038586450615388,0.0719628676148131,0.675414098389089,0.155552925832135,0.306354251115869,0.759944254743028,0.46805827165428,0.46805827165428,0.482230235295827,0,0.629398171772225,0.12440719285688,0.135211618881543,0.135211618881543,0.260330432633911,1.07335705762628,0.0553682124940572,0.0446588354307538,0.0191269324719215,0.0710114837824813,0.0710114837824813,0.219566808403905,0.0953228955472686,3.78193608766637,0.233776903387249,0.135083909465158,0.0571945265673294,1.42798304820572,0.0353712104042453,0.0384479631255821,0.0144342602153511,0.127032488088794,0.261205097984432,0.157785714279719 7 | "BaselTMA_SP41_126_X14Y7_6",0.609903609649237,1.31958781810241,0.372367846281158,1.78286258012158,1.50101098171655,1.91774736855616,2.57742682498001,2.28716705396776,1.9784894649437,1.74194650291225,2.20566406217482,0,0.276471473109737,1.63945084696482,0.542475932177256,1.81695533696929,1.20776421472246,0.31280033985516,0.31280033985516,0.915340793386204,0.155110899096183,1.31132500439165,0.33857250307486,0.392151184354905,0.392151184354905,0.0432483518904006,1.30384683749297,0,0.321245292642739,0.122130917971635,0.153152366032903,0.153152366032903,0.46163540261567,0.238640796201489,4.25353731506461,0.292395183840898,0.410143431345782,0.0403477664871872,1.42987285773009,0.169140917827723,0.125414606855289,0,0.0732652610050874,0.467445374083094,0.143676239430917 8 | "BaselTMA_SP41_126_X14Y7_7",0.378273994641866,1.02309345484598,0.508287981414855,1.6678490878493,1.57199137751667,1.79316832485582,2.45316771376931,2.16937958589658,1.95202912431456,2.26163835871481,2.84924235042934,0.0367073013762609,0.143904949371807,1.93552161238719,0.0974311915849248,0.646225993108243,0.666724442699186,0.499302204133687,0.499302204133687,0.538990574677631,0.123797597755547,1.19515337905834,0.145308818386877,0.358448007124586,0.358448007124586,0.258737238762547,1.47234737844586,0.135282533579763,0.073474673245632,0.0568982011248754,0.0604383190074023,0.0604383190074023,0.775791828442325,0.195056487433125,3.86719830765511,0.225182259834438,0.236481768486859,0.00653958867991183,1.15129950325041,0.0455341977110848,0.0974200795935418,0.172891676713793,0.109492404799103,0.425186253511379,0.132594197855554 9 | "BaselTMA_SP41_126_X14Y7_8",0.433318499070447,1.02144772702908,0.379583030368888,1.48233434475344,1.46248840039406,1.69173666753727,2.28494401869665,2.00368533559941,2.16328230764477,1.78611018785754,2.33096353152789,0.0415644016526728,0.0661214541572935,1.413064810116,0.167915238452389,0.31823853161661,0.479027926368563,0.435822538708403,0.435822538708403,0.599664499074605,0.0588073675249883,1.23269089469998,0.230692324129598,0.37271412016763,0.37271412016763,0.254613984278178,1.13562080829842,0.0465206421031809,0.161365246121161,0.106829515082629,0.0942926547603922,0.0942926547603922,0.497869574102834,0.139924037168949,3.84194937328078,0.146820805647934,0.138061178204614,0.00548440684875311,1.11870351689516,0.145760753051907,0.135387292236118,0.0623356846001655,0.0805003264295715,0.441512101894891,0.165296316182786 10 | "BaselTMA_SP41_126_X14Y7_9",0.554613318737411,1.20464396576545,0.577642773075184,1.67304306621983,1.56311983738021,1.72451674367243,2.46620106627875,2.20203835261983,2.14503544934655,1.5654611792378,1.98795262476879,0.107617950218012,0.112150935457405,1.37935198571939,0.294009768451207,0.946665099397284,1.02147974097619,0.421806244945565,0.421806244945565,0.792418752220183,0.106627993124653,1.63971011636084,0.301515601607164,0.380333597838085,0.380333597838085,0.501687379831502,1.09628041204639,0.0983596724637716,0.130034790876899,0.207883573332857,0.00761080638330121,0.00761080638330121,0.456610750890645,0.0896210293968119,4.04298577103731,0.0630212173416578,0.30240826572314,0.0373502981646327,0.826435124479329,0.121386468226999,0.227358881054528,0.152215241717528,0.126400073551901,0.420150652151168,0.156433841543841 11 | "BaselTMA_SP41_126_X14Y7_10",0.50913395949692,1.22058622932062,0.527537389178274,1.71558506204996,1.57943590200115,1.88985474424116,2.42839274009776,2.21243523037744,2.37355197431657,2.03232893709605,2.63087273640171,0.0327224360920273,0.325118021388983,2.38462296011013,0.265314858445117,1.18812379053559,1.05776762379102,0.387794912779111,0.387794912779111,1.13720884838478,0.0328188609439452,1.90993683854487,0.274004114584755,0.406666784475912,0.406666784475912,0.407673921593465,1.06827540001609,0.0950650365792463,0.245997127041375,0.0785149333702716,0.047644453635302,0.047644453635302,0.848886628160954,0.253882640339735,3.7418323410385,0.0490141226369128,0.237072417186584,0.0390821080535325,0.6743577041355,0.276578525276985,0.435942418245782,0.128295309976712,0.109421290304796,0.600302307273537,0.0731060951400031 12 | "BaselTMA_SP41_126_X14Y7_11",0.608830025730392,1.25152160427507,0.344457228255887,1.61965905012165,1.48286884452225,1.85917210483041,2.37363944950833,2.06779669468505,2.36021253092643,1.44066417134664,2.06677271166277,0.0404141929581712,0.12726467468555,1.54903783373294,0.0604751369610325,0.359222746493563,0.886523169811,0.485117986598223,0.485117986598223,0.947499053795506,0.0205949432778254,1.2777115832032,0.202603895993156,0.441209489241267,0.441209489241267,0.299101119377199,0.773251382580783,0.0803546502009576,0.182464201381651,0.0690267366753256,0.0996816852290423,0.0996816852290423,0.424364356848866,0.168878213446364,3.61808628369392,0.0279386882643336,0.210603545166566,0.0169132921706009,0.946982221000932,0.0742639503865018,0.295328260059555,0.114796803504975,0.166931321669432,0.570790542230526,0.157564936689404 13 | "BaselTMA_SP41_126_X14Y7_12",0.443653863521087,1.11521645906745,0.445161935258024,1.5231109526391,1.40970203769659,1.67320377816335,2.26292386076838,2.03608392257045,2.02076231395761,1.62376201592666,2.17061498093648,0.043196895302235,0.12520524042104,1.51594569721507,0.125256550479163,0.441748454204352,0.647734958851728,0.399569200378657,0.399569200378657,0.653227727606058,0.0882794039711532,1.33235521133485,0.231975257499933,0.375581552251365,0.375581552251365,0.254370880024891,0.690528596276582,0.10419856924114,0.149967869948951,0.0378098305449082,0.0218063018879478,0.0218063018879478,0.583722859182437,0.0881895216705521,3.38219175382825,0.0428244977856538,0.13869240665371,0.0599346395170729,0.722333100485728,0.177645906369811,0.219370995303025,0.120532691355742,0.116622398400743,0.462027722684399,0.187038336411999 14 | "BaselTMA_SP41_126_X14Y7_13",0.552635706478001,1.20432262239729,0.378310835130766,1.63510577444156,1.41690446008722,1.78127973683651,2.36680011720726,2.11435223899551,2.32381821311352,1.59479116588725,2.26517405121178,0.112480049481033,0.239245288121319,1.62108943421845,0.330806345240921,0.334645762029237,0.786412825935814,0.442779032506085,0.442779032506085,0.882964054846778,0,1.53173118681668,0.198599000667451,0.438773757840415,0.438773757840415,0.25995743013612,0.654536032837098,0.031200957917212,0.160227078670634,0.0447750862101473,0.0905552952119225,0.0905552952119225,0.45047031162161,0.226610389074651,3.62791246277059,0.166247674839988,0.146081438430856,0.107837712233924,0.893368117120901,0.176641581400065,0.223902521302041,0.128088530757384,0.212955779303418,0.456351211042372,0.295319378680642 15 | "BaselTMA_SP41_126_X14Y7_14",0.574417195982534,1.12990422914934,0.541641878717257,1.53591767840073,1.50188838637846,1.80739010009332,2.3196965921288,2.09829587005942,2.14607730737287,2.1405145142947,2.70459065116314,0.0506444960015956,0.193189193398403,1.51787972746613,0.724698593287406,0.310107592706018,0.65930033597213,0.393258086767271,0.393258086767271,0.711733164298637,0.103964801006653,1.57208373745252,0.339913262103744,0.326502017534756,0.326502017534756,0.244289594026535,0.908101147756714,0.148476691716738,0.146458306754174,0.0109124331535944,0.0394267490474644,0.0394267490474644,0.670860722215978,0.141035262478867,3.71785832258826,0.15741410650475,0.111172598652131,0.0263364768806063,0.984096349800148,0.106150595113424,0.185679853415902,0.115548268432496,0.117830779333529,0.490467269038208,0.112207669469491 16 | "BaselTMA_SP41_126_X14Y7_15",0.445141373351121,0.983549809032694,0.537630323548894,1.48867791273499,1.35947910529097,1.69340884048838,2.23931092928345,2.01271251277941,2.07140504464823,1.68988351768388,2.24375827665897,0.0239659793004077,0.0961201340299801,1.48551664789288,0.013582111402314,0.330363942281392,0.721158464025108,0.42904638938744,0.42904638938744,0.711109588015052,0.0853290689952641,1.44040195243329,0.356297107828498,0.287137656814369,0.287137656814369,0.229259377531225,0.401458022810847,0.14242762842043,0.114452093006097,0.0211902227314997,0.101271569675967,0.101271569675967,0.6780266285872,0.308925481282205,3.44972173256667,0.0791919849231345,0.159892290753897,0.0433953036656595,0.954703828926171,0.150253142016165,0.29956040263481,0.120342892891634,0.123618400948608,0.612554704071968,1.53077728953142 17 | "BaselTMA_SP41_126_X14Y7_16",0.423224124113962,0.986487058046945,0.361349919667782,1.42214013904286,1.38399461532553,1.64234047781656,2.17922874397926,1.91237774683103,2.18052005538034,1.77461076402444,2.36074129925337,0.0584691410635083,0.154197683858691,1.67281129272234,0.296603607824735,0.69280541135784,0.626312549782295,0.473788819528183,0.473788819528183,0.767828846237009,0.0510328736045171,1.4454307208539,0.302253906556529,0.282563968677143,0.282563968677143,0.28150367519358,0.606046190982351,0.0842701746540736,0.215652452975277,0.0516370026272807,0.0324127454550036,0.0324127454550036,0.729162644025991,0.289874636366653,3.44824549456649,0.0606348083608117,0.150566193834433,0.0183875461655388,1.53535223790493,0.156964149606285,0.231969500388552,0.0925539915963191,0.08672909528526,0.420317833648897,0.221002661580778 18 | "BaselTMA_SP41_126_X14Y7_17",0.498678062102147,1.17781379532244,0.522682949700635,1.51458531530078,1.40931511169072,1.74668912260261,2.30674389457336,1.9950499197128,2.20254759244282,1.93580948875749,2.53811296009035,0.0962792182483904,0.0873258979628482,1.5202803378887,0.507469063395971,0.483955276339326,0.740435581379996,0.486125527866747,0.486125527866747,0.984527287596058,0.0885008911311593,1.59905638571433,0.157846277987796,0.179280825975571,0.179280825975571,0.244728271936502,0.660520682149438,0.0590714514463765,0.288551575565284,0.032123799210221,0.131254415692856,0.131254415692856,0.50692263890735,0.225686237019723,3.48779953460387,0.05774164794599,0.115660518792748,0.0631778616343239,1.76746974870673,0.162186983688376,0.271038520848474,0.1180840476944,0.181912414195249,0.581563621271709,0.197118834439433 19 | "BaselTMA_SP41_126_X14Y7_18",0.551295539365493,1.10544722602193,0.323332706594479,1.64705154912145,1.54944374772256,1.91626417786334,2.39241224670681,2.10341448766267,2.13209703295856,1.91448956634526,2.54847390364756,0.0153521513906716,0.154836931864209,1.47523230693975,0.365308909023129,0.293447296150629,0.533473450322357,0.509065035505689,0.509065035505689,0.770741695920629,0.112149112021286,1.15747577746381,0.179127668579003,0.197534320541308,0.197534320541308,0.453477219832709,1.08040945487045,0.240022210168695,0.297897890813239,0.197467929723259,0.121092621027361,0.121092621027361,0.622657670813786,0.223826120537171,3.71997508326293,0.0933103259939839,0.563758929281138,0.0383864502156451,1.52655587223294,0.036484511329554,0.124137599062604,0.138446692919138,0.156746296737235,0.535765892451459,0.666997378692888 20 | "BaselTMA_SP41_126_X14Y7_19",0.566751369369273,1.04381736994478,0.597380072662003,1.69952350465586,1.61695685736026,1.77902779771975,2.370488975039,2.0766191568681,2.39643995938127,1.02094989843374,1.44499924627171,0.124155173315278,0.0827724742166557,1.01722024284261,0.335172377899056,0.261676850385538,0.909118854078818,0.563572034094173,0.563572034094173,0.987686382296719,0.0919551040440306,1.91586952770624,0.425899072028039,0.406057368348545,0.406057368348545,0.281018087465073,0.714751505032067,0.21776186811034,0.28163326936891,0.0938841368378904,0.0749973388020032,0.0749973388020032,0.390677068825128,0.295840321093668,4.08808532517434,0.0439164726716929,0.128377803572521,0.00257318735542014,2.02313136906579,0.31693994469329,0.299870074804783,0.145659902493801,0.0762762995139594,0.419048297486753,0.214751513476061 21 | -------------------------------------------------------------------------------- /tests/test-data/test-dir-read/two.csv: -------------------------------------------------------------------------------- 1 | "","EGFR","Ruthenium_1","Ruthenium_2","Ruthenium_3","Ruthenium_4","Ruthenium_5","Ruthenium_6","Ruthenium_7","E-Cadherin","DNA1","DNA2","Rabbit IgG H L","GATA3","Histone H3 antibody 1","Ki-67","SMA","Vimentin","cleaved PARP","Cleaved Caspase3","Her2","p53","pan Cytokeratin","Cytokeratin 19","Progesterone Receptor A/B antibody 1","Progesterone Receptor A/B antibody 2","c-Myc","Fibronectin","Cytokeratin 14","Slug","CD20","vWF","CD31","Histone H3 antibody 2","Cytokeratin 5","CD44","CD45","CD68","CD3","Carbonic Anhydrase IX","Cytokeratin 8/18","Cytokeratin 7","Twist","phospho Histone","phospho mTOR","phospho S6" 2 | "BaselTMA_SP41_126_X14Y7_1",0.281752771745428,1.31958781810241,0.597380072662003,1.78286258012158,1.75782433204907,1.99185744340211,2.58056400999575,2.28716705396776,1.8143094838686,2.26163835871481,2.85745429700996,0.153254092409689,0.218156920379724,1.67452982248683,0.0415625002251949,0.0712772918353616,0.444139725057488,0.293131435903423,0.293131435903423,0.483006805598638,0,1.18751223171639,0.0799563776315769,0.122841765135889,0.122841765135889,0.243465420557963,1.03973438831891,0.134128155146678,0.246095547312871,0.207883573332857,0,0,0.49913252131538,0.178349656238537,3.80842587638293,0.0447332863514713,0.184805194048002,0,0.928929162714277,0.025525779641982,0.0434231703349215,0.209742440797169,0.137454250163813,0.572811188966434,0.215508336047807 3 | "BaselTMA_SP41_126_X14Y7_2",0.303016424879695,1.31958781810241,0.597380072662003,1.78286258012158,1.75782433204907,1.99185744340211,2.58056400999575,2.28716705396776,1.51768483657457,1.613059980226,1.93977298946158,0,0.104099530009894,1.30414953455425,0.258647854185918,0.179905035860049,0.270070346973278,0.269995556739188,0.269995556739188,0.513386095964132,0.124381164441986,0.749379200845966,0.0669218226368246,0.189137064182174,0.189137064182174,0.147830889471134,1.14764402398214,0.0269507329965758,0.117598975755152,0.0215064833013768,0,0,0.182321559277481,0.0811467853470927,3.37104347012115,0.0468019851802111,0.0804057172161878,0.110805851822127,0.752101441431739,0,0.032055860595542,0.108013160836267,0.0484275934259086,0.539647309997281,0.655731021108987 4 | "BaselTMA_SP41_126_X14Y7_3",0.252373591929453,1.31958781810241,0.597380072662003,1.78286258012158,1.75782433204907,1.99185744340211,2.58056400999575,2.28716705396776,1.24643337481527,2.13874433914606,2.75314619116723,0.0612817239818973,0.0994445250565181,1.45047307552165,0.081559900454294,0.166532782140787,0.233909456892897,0.253298162954306,0.253298162954306,0.633226423035353,0.0583059596412607,1.21652113650208,0.186294085811945,0.155385304368762,0.155385304368762,0.250264917703643,0.988906212339524,0.0235147624338673,0.108889136643846,0.00887829026542762,0.00700873857806561,0.00700873857806561,0.407286238353478,0.0761116154963944,3.28244078218969,0.0284985582815762,0.203248021274544,0.0206172021971442,0.740759201163638,0.0833109946128711,0.0815033267848188,0.119058055107812,0.0630969787835357,0.409734520196029,0.437844717513601 5 | "BaselTMA_SP41_126_X14Y7_4",0.397732141326238,1.30685199496848,0.534495793659021,1.67821741785772,1.75782433204907,1.9614302314342,2.52855073877333,2.18381402772155,1.83978462756571,1.81601501739998,2.33715322835657,0.0368180548997214,0.035844882193481,1.18364710505427,0.831303329536159,0.830879359804224,0.542361835868107,0.348377503475095,0.348377503475095,0.709272254696804,0.0861824834861172,1.35430267477242,0.346272876410815,0.241098688399882,0.241098688399882,0.165295478983948,0.842710446351632,0.114420092168108,0.0863638967806044,0.0530270737099543,0.0665261604158528,0.0665261604158528,0.258153476642932,0.164059309696648,3.72178336085638,0.0690533090513159,0.305199716511606,0.0602644884847739,1.0959675377634,0.184603159618805,0.131531313002247,0.160778092929216,0.0906664483203359,0.305717938537705,0.132236468695897 6 | "BaselTMA_SP41_126_X14Y7_5",0.426352363450394,1.17343905015891,0.597380072662003,1.58930314723782,1.38983901810637,1.78988748334799,2.34374306012192,2.12333449194922,1.61834734172774,1.35521448777051,1.81264185807688,0.038586450615388,0.0719628676148131,0.675414098389089,0.155552925832135,0.306354251115869,0.759944254743028,0.46805827165428,0.46805827165428,0.482230235295827,0,0.629398171772225,0.12440719285688,0.135211618881543,0.135211618881543,0.260330432633911,1.07335705762628,0.0553682124940572,0.0446588354307538,0.0191269324719215,0.0710114837824813,0.0710114837824813,0.219566808403905,0.0953228955472686,3.78193608766637,0.233776903387249,0.135083909465158,0.0571945265673294,1.42798304820572,0.0353712104042453,0.0384479631255821,0.0144342602153511,0.127032488088794,0.261205097984432,0.157785714279719 7 | "BaselTMA_SP41_126_X14Y7_6",0.609903609649237,1.31958781810241,0.372367846281158,1.78286258012158,1.50101098171655,1.91774736855616,2.57742682498001,2.28716705396776,1.9784894649437,1.74194650291225,2.20566406217482,0,0.276471473109737,1.63945084696482,0.542475932177256,1.81695533696929,1.20776421472246,0.31280033985516,0.31280033985516,0.915340793386204,0.155110899096183,1.31132500439165,0.33857250307486,0.392151184354905,0.392151184354905,0.0432483518904006,1.30384683749297,0,0.321245292642739,0.122130917971635,0.153152366032903,0.153152366032903,0.46163540261567,0.238640796201489,4.25353731506461,0.292395183840898,0.410143431345782,0.0403477664871872,1.42987285773009,0.169140917827723,0.125414606855289,0,0.0732652610050874,0.467445374083094,0.143676239430917 8 | "BaselTMA_SP41_126_X14Y7_7",0.378273994641866,1.02309345484598,0.508287981414855,1.6678490878493,1.57199137751667,1.79316832485582,2.45316771376931,2.16937958589658,1.95202912431456,2.26163835871481,2.84924235042934,0.0367073013762609,0.143904949371807,1.93552161238719,0.0974311915849248,0.646225993108243,0.666724442699186,0.499302204133687,0.499302204133687,0.538990574677631,0.123797597755547,1.19515337905834,0.145308818386877,0.358448007124586,0.358448007124586,0.258737238762547,1.47234737844586,0.135282533579763,0.073474673245632,0.0568982011248754,0.0604383190074023,0.0604383190074023,0.775791828442325,0.195056487433125,3.86719830765511,0.225182259834438,0.236481768486859,0.00653958867991183,1.15129950325041,0.0455341977110848,0.0974200795935418,0.172891676713793,0.109492404799103,0.425186253511379,0.132594197855554 9 | "BaselTMA_SP41_126_X14Y7_8",0.433318499070447,1.02144772702908,0.379583030368888,1.48233434475344,1.46248840039406,1.69173666753727,2.28494401869665,2.00368533559941,2.16328230764477,1.78611018785754,2.33096353152789,0.0415644016526728,0.0661214541572935,1.413064810116,0.167915238452389,0.31823853161661,0.479027926368563,0.435822538708403,0.435822538708403,0.599664499074605,0.0588073675249883,1.23269089469998,0.230692324129598,0.37271412016763,0.37271412016763,0.254613984278178,1.13562080829842,0.0465206421031809,0.161365246121161,0.106829515082629,0.0942926547603922,0.0942926547603922,0.497869574102834,0.139924037168949,3.84194937328078,0.146820805647934,0.138061178204614,0.00548440684875311,1.11870351689516,0.145760753051907,0.135387292236118,0.0623356846001655,0.0805003264295715,0.441512101894891,0.165296316182786 10 | "BaselTMA_SP41_126_X14Y7_9",0.554613318737411,1.20464396576545,0.577642773075184,1.67304306621983,1.56311983738021,1.72451674367243,2.46620106627875,2.20203835261983,2.14503544934655,1.5654611792378,1.98795262476879,0.107617950218012,0.112150935457405,1.37935198571939,0.294009768451207,0.946665099397284,1.02147974097619,0.421806244945565,0.421806244945565,0.792418752220183,0.106627993124653,1.63971011636084,0.301515601607164,0.380333597838085,0.380333597838085,0.501687379831502,1.09628041204639,0.0983596724637716,0.130034790876899,0.207883573332857,0.00761080638330121,0.00761080638330121,0.456610750890645,0.0896210293968119,4.04298577103731,0.0630212173416578,0.30240826572314,0.0373502981646327,0.826435124479329,0.121386468226999,0.227358881054528,0.152215241717528,0.126400073551901,0.420150652151168,0.156433841543841 11 | "BaselTMA_SP41_126_X14Y7_10",0.50913395949692,1.22058622932062,0.527537389178274,1.71558506204996,1.57943590200115,1.88985474424116,2.42839274009776,2.21243523037744,2.37355197431657,2.03232893709605,2.63087273640171,0.0327224360920273,0.325118021388983,2.38462296011013,0.265314858445117,1.18812379053559,1.05776762379102,0.387794912779111,0.387794912779111,1.13720884838478,0.0328188609439452,1.90993683854487,0.274004114584755,0.406666784475912,0.406666784475912,0.407673921593465,1.06827540001609,0.0950650365792463,0.245997127041375,0.0785149333702716,0.047644453635302,0.047644453635302,0.848886628160954,0.253882640339735,3.7418323410385,0.0490141226369128,0.237072417186584,0.0390821080535325,0.6743577041355,0.276578525276985,0.435942418245782,0.128295309976712,0.109421290304796,0.600302307273537,0.0731060951400031 12 | "BaselTMA_SP41_126_X14Y7_11",0.608830025730392,1.25152160427507,0.344457228255887,1.61965905012165,1.48286884452225,1.85917210483041,2.37363944950833,2.06779669468505,2.36021253092643,1.44066417134664,2.06677271166277,0.0404141929581712,0.12726467468555,1.54903783373294,0.0604751369610325,0.359222746493563,0.886523169811,0.485117986598223,0.485117986598223,0.947499053795506,0.0205949432778254,1.2777115832032,0.202603895993156,0.441209489241267,0.441209489241267,0.299101119377199,0.773251382580783,0.0803546502009576,0.182464201381651,0.0690267366753256,0.0996816852290423,0.0996816852290423,0.424364356848866,0.168878213446364,3.61808628369392,0.0279386882643336,0.210603545166566,0.0169132921706009,0.946982221000932,0.0742639503865018,0.295328260059555,0.114796803504975,0.166931321669432,0.570790542230526,0.157564936689404 13 | "BaselTMA_SP41_126_X14Y7_12",0.443653863521087,1.11521645906745,0.445161935258024,1.5231109526391,1.40970203769659,1.67320377816335,2.26292386076838,2.03608392257045,2.02076231395761,1.62376201592666,2.17061498093648,0.043196895302235,0.12520524042104,1.51594569721507,0.125256550479163,0.441748454204352,0.647734958851728,0.399569200378657,0.399569200378657,0.653227727606058,0.0882794039711532,1.33235521133485,0.231975257499933,0.375581552251365,0.375581552251365,0.254370880024891,0.690528596276582,0.10419856924114,0.149967869948951,0.0378098305449082,0.0218063018879478,0.0218063018879478,0.583722859182437,0.0881895216705521,3.38219175382825,0.0428244977856538,0.13869240665371,0.0599346395170729,0.722333100485728,0.177645906369811,0.219370995303025,0.120532691355742,0.116622398400743,0.462027722684399,0.187038336411999 14 | "BaselTMA_SP41_126_X14Y7_13",0.552635706478001,1.20432262239729,0.378310835130766,1.63510577444156,1.41690446008722,1.78127973683651,2.36680011720726,2.11435223899551,2.32381821311352,1.59479116588725,2.26517405121178,0.112480049481033,0.239245288121319,1.62108943421845,0.330806345240921,0.334645762029237,0.786412825935814,0.442779032506085,0.442779032506085,0.882964054846778,0,1.53173118681668,0.198599000667451,0.438773757840415,0.438773757840415,0.25995743013612,0.654536032837098,0.031200957917212,0.160227078670634,0.0447750862101473,0.0905552952119225,0.0905552952119225,0.45047031162161,0.226610389074651,3.62791246277059,0.166247674839988,0.146081438430856,0.107837712233924,0.893368117120901,0.176641581400065,0.223902521302041,0.128088530757384,0.212955779303418,0.456351211042372,0.295319378680642 15 | "BaselTMA_SP41_126_X14Y7_14",0.574417195982534,1.12990422914934,0.541641878717257,1.53591767840073,1.50188838637846,1.80739010009332,2.3196965921288,2.09829587005942,2.14607730737287,2.1405145142947,2.70459065116314,0.0506444960015956,0.193189193398403,1.51787972746613,0.724698593287406,0.310107592706018,0.65930033597213,0.393258086767271,0.393258086767271,0.711733164298637,0.103964801006653,1.57208373745252,0.339913262103744,0.326502017534756,0.326502017534756,0.244289594026535,0.908101147756714,0.148476691716738,0.146458306754174,0.0109124331535944,0.0394267490474644,0.0394267490474644,0.670860722215978,0.141035262478867,3.71785832258826,0.15741410650475,0.111172598652131,0.0263364768806063,0.984096349800148,0.106150595113424,0.185679853415902,0.115548268432496,0.117830779333529,0.490467269038208,0.112207669469491 16 | "BaselTMA_SP41_126_X14Y7_15",0.445141373351121,0.983549809032694,0.537630323548894,1.48867791273499,1.35947910529097,1.69340884048838,2.23931092928345,2.01271251277941,2.07140504464823,1.68988351768388,2.24375827665897,0.0239659793004077,0.0961201340299801,1.48551664789288,0.013582111402314,0.330363942281392,0.721158464025108,0.42904638938744,0.42904638938744,0.711109588015052,0.0853290689952641,1.44040195243329,0.356297107828498,0.287137656814369,0.287137656814369,0.229259377531225,0.401458022810847,0.14242762842043,0.114452093006097,0.0211902227314997,0.101271569675967,0.101271569675967,0.6780266285872,0.308925481282205,3.44972173256667,0.0791919849231345,0.159892290753897,0.0433953036656595,0.954703828926171,0.150253142016165,0.29956040263481,0.120342892891634,0.123618400948608,0.612554704071968,1.53077728953142 17 | "BaselTMA_SP41_126_X14Y7_16",0.423224124113962,0.986487058046945,0.361349919667782,1.42214013904286,1.38399461532553,1.64234047781656,2.17922874397926,1.91237774683103,2.18052005538034,1.77461076402444,2.36074129925337,0.0584691410635083,0.154197683858691,1.67281129272234,0.296603607824735,0.69280541135784,0.626312549782295,0.473788819528183,0.473788819528183,0.767828846237009,0.0510328736045171,1.4454307208539,0.302253906556529,0.282563968677143,0.282563968677143,0.28150367519358,0.606046190982351,0.0842701746540736,0.215652452975277,0.0516370026272807,0.0324127454550036,0.0324127454550036,0.729162644025991,0.289874636366653,3.44824549456649,0.0606348083608117,0.150566193834433,0.0183875461655388,1.53535223790493,0.156964149606285,0.231969500388552,0.0925539915963191,0.08672909528526,0.420317833648897,0.221002661580778 18 | "BaselTMA_SP41_126_X14Y7_17",0.498678062102147,1.17781379532244,0.522682949700635,1.51458531530078,1.40931511169072,1.74668912260261,2.30674389457336,1.9950499197128,2.20254759244282,1.93580948875749,2.53811296009035,0.0962792182483904,0.0873258979628482,1.5202803378887,0.507469063395971,0.483955276339326,0.740435581379996,0.486125527866747,0.486125527866747,0.984527287596058,0.0885008911311593,1.59905638571433,0.157846277987796,0.179280825975571,0.179280825975571,0.244728271936502,0.660520682149438,0.0590714514463765,0.288551575565284,0.032123799210221,0.131254415692856,0.131254415692856,0.50692263890735,0.225686237019723,3.48779953460387,0.05774164794599,0.115660518792748,0.0631778616343239,1.76746974870673,0.162186983688376,0.271038520848474,0.1180840476944,0.181912414195249,0.581563621271709,0.197118834439433 19 | "BaselTMA_SP41_126_X14Y7_18",0.551295539365493,1.10544722602193,0.323332706594479,1.64705154912145,1.54944374772256,1.91626417786334,2.39241224670681,2.10341448766267,2.13209703295856,1.91448956634526,2.54847390364756,0.0153521513906716,0.154836931864209,1.47523230693975,0.365308909023129,0.293447296150629,0.533473450322357,0.509065035505689,0.509065035505689,0.770741695920629,0.112149112021286,1.15747577746381,0.179127668579003,0.197534320541308,0.197534320541308,0.453477219832709,1.08040945487045,0.240022210168695,0.297897890813239,0.197467929723259,0.121092621027361,0.121092621027361,0.622657670813786,0.223826120537171,3.71997508326293,0.0933103259939839,0.563758929281138,0.0383864502156451,1.52655587223294,0.036484511329554,0.124137599062604,0.138446692919138,0.156746296737235,0.535765892451459,0.666997378692888 20 | "BaselTMA_SP41_126_X14Y7_19",0.566751369369273,1.04381736994478,0.597380072662003,1.69952350465586,1.61695685736026,1.77902779771975,2.370488975039,2.0766191568681,2.39643995938127,1.02094989843374,1.44499924627171,0.124155173315278,0.0827724742166557,1.01722024284261,0.335172377899056,0.261676850385538,0.909118854078818,0.563572034094173,0.563572034094173,0.987686382296719,0.0919551040440306,1.91586952770624,0.425899072028039,0.406057368348545,0.406057368348545,0.281018087465073,0.714751505032067,0.21776186811034,0.28163326936891,0.0938841368378904,0.0749973388020032,0.0749973388020032,0.390677068825128,0.295840321093668,4.08808532517434,0.0439164726716929,0.128377803572521,0.00257318735542014,2.02313136906579,0.31693994469329,0.299870074804783,0.145659902493801,0.0762762995139594,0.419048297486753,0.214751513476061 21 | "BaselTMA_SP41_126_X14Y7_20",0.66216447205261,1.1427611101012,0.490070333357719,1.62368417169293,1.59785740644498,1.8401527504458,2.32389271944869,2.16085075584843,2.39643995938127,1.40970366801321,1.92997590130751,0.144374527614514,0.223263999536455,1.1968632350793,0.126631145552913,0.305905072125029,1.85360377737747,0.537817112447803,0.537817112447803,0.794852570414301,0.0916352917062696,1.52470417756647,0.307877295875169,0.420095498435575,0.420095498435575,0.413007222615918,1.43541192918613,0.107706760072633,0.197430435790434,0.0825285451229568,0.0671987386726368,0.0671987386726368,0.215556196177536,0.269852830634172,4.13241517362576,0.121888885437114,0.187165095535051,0.0133313540532762,1.90631147993183,0.135198919236669,0.413270320814482,0.0903610540119895,0.118018146697121,0.554547661556131,1.53077728953142 22 | "BaselTMA_SP41_126_X14Y7_21",0.66216447205261,1.19257303401254,0.577911685388714,1.75350788699315,1.59495334685388,1.90958686741675,2.54700273641141,2.22575680214045,2.39643995938127,1.82298955711786,2.37502666429573,0.0419339411977843,0.243867697719173,1.70523792026173,0.258877862483865,0.354543466923212,1.22474456543533,0.533824439689289,0.533824439689289,1.13720884838478,0,2.06030919677546,0.432965360289237,0.399308542075274,0.399308542075274,0.376220186482061,0.832276289654143,0.0850354663351016,0.235122656357751,0.0722896065411798,0.0446969733283998,0.0446969733283998,0.537523569205682,0.21128144308236,3.96540866078807,0.295401813144955,0.271044052875508,0.0373338464631744,2.20529157198546,0.27639138694643,0.502929413692908,0.132705797905321,0.143503918459105,0.60531233638389,0.160929108496762 23 | -------------------------------------------------------------------------------- /tests/test-data/test_rds.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camlab-bioml/astir/61fce25cff8baefcd4102d4f7bd990064740b0a6/tests/test-data/test_rds.rds -------------------------------------------------------------------------------- /tests/test-data/test_rds_design.csv: -------------------------------------------------------------------------------- 1 | "","(Intercept)","target_col" 2 | "1",1,30 3 | "2",1,6 4 | "3",1,22 5 | "4",1,27 6 | "5",1,18 7 | "6",1,37 8 | "7",1,11 9 | "8",1,3 10 | "9",1,32 11 | "10",1,15 12 | "11",1,11 13 | "12",1,6 14 | "13",1,6 15 | "14",1,39 16 | "15",1,15 17 | "16",1,39 18 | "17",1,3 19 | "18",1,1 20 | "19",1,6 21 | "20",1,16 22 | "21",1,3 23 | "22",1,3 24 | "23",1,38 25 | "24",1,18 26 | "25",1,15 27 | "26",1,6 28 | "27",1,16 29 | "28",1,6 30 | "29",1,3 31 | "30",1,6 32 | "31",1,3 33 | "32",1,11 34 | "33",1,18 35 | "34",1,15 36 | "35",1,3 37 | "36",1,29 38 | "37",1,3 39 | "38",1,15 40 | "39",1,14 41 | "40",1,3 42 | "41",1,3 43 | "42",1,11 44 | "43",1,4 45 | "44",1,3 46 | "45",1,6 47 | "46",1,16 48 | "47",1,22 49 | "48",1,15 50 | "49",1,37 51 | "50",1,5 52 | "51",1,15 53 | "52",1,3 54 | "53",1,35 55 | "54",1,3 56 | "55",1,15 57 | "56",1,15 58 | "57",1,6 59 | "58",1,3 60 | "59",1,6 61 | "60",1,6 62 | "61",1,10 63 | "62",1,6 64 | "63",1,6 65 | "64",1,3 66 | "65",1,19 67 | "66",1,6 68 | "67",1,16 69 | "68",1,16 70 | "69",1,15 71 | "70",1,15 72 | "71",1,6 73 | "72",1,3 74 | "73",1,3 75 | "74",1,17 76 | "75",1,15 77 | "76",1,9 78 | "77",1,6 79 | "78",1,18 80 | "79",1,3 81 | "80",1,16 82 | "81",1,6 83 | "82",1,6 84 | "83",1,11 85 | "84",1,11 86 | "85",1,15 87 | "86",1,11 88 | "87",1,16 89 | "88",1,15 90 | "89",1,4 91 | "90",1,16 92 | "91",1,6 93 | "92",1,18 94 | "93",1,6 95 | "94",1,35 96 | "95",1,12 97 | "96",1,7 98 | "97",1,5 99 | "98",1,15 100 | "99",1,8 101 | "100",1,6 102 | "101",1,16 103 | "102",1,3 104 | "103",1,37 105 | "104",1,11 106 | "105",1,8 107 | "106",1,18 108 | "107",1,3 109 | "108",1,4 110 | "109",1,18 111 | "110",1,4 112 | "111",1,15 113 | "112",1,11 114 | "113",1,3 115 | "114",1,32 116 | "115",1,4 117 | "116",1,18 118 | "117",1,39 119 | "118",1,5 120 | "119",1,16 121 | "120",1,6 122 | "121",1,9 123 | "122",1,29 124 | "123",1,6 125 | "124",1,6 126 | "125",1,11 127 | "126",1,3 128 | "127",1,15 129 | "128",1,20 130 | "129",1,4 131 | "130",1,2 132 | "131",1,6 133 | "132",1,3 134 | "133",1,16 135 | "134",1,18 136 | "135",1,6 137 | "136",1,18 138 | "137",1,30 139 | "138",1,3 140 | "139",1,3 141 | "140",1,6 142 | "141",1,15 143 | "142",1,6 144 | "143",1,17 145 | "144",1,6 146 | "145",1,18 147 | "146",1,5 148 | "147",1,15 149 | "148",1,6 150 | "149",1,11 151 | "150",1,23 152 | "151",1,9 153 | "152",1,35 154 | "153",1,15 155 | "154",1,6 156 | "155",1,18 157 | "156",1,3 158 | "157",1,18 159 | "158",1,11 160 | "159",1,35 161 | "160",1,31 162 | "161",1,6 163 | "162",1,4 164 | "163",1,3 165 | "164",1,12 166 | "165",1,3 167 | "166",1,3 168 | "167",1,29 169 | "168",1,18 170 | "169",1,3 171 | "170",1,8 172 | "171",1,8 173 | "172",1,6 174 | "173",1,9 175 | "174",1,11 176 | "175",1,30 177 | "176",1,25 178 | "177",1,18 179 | "178",1,6 180 | "179",1,15 181 | "180",1,15 182 | "181",1,11 183 | "182",1,18 184 | "183",1,18 185 | "184",1,19 186 | "185",1,18 187 | "186",1,1 188 | "187",1,29 189 | "188",1,15 190 | "189",1,34 191 | "190",1,4 192 | "191",1,3 193 | "192",1,26 194 | "193",1,3 195 | "194",1,3 196 | "195",1,4 197 | "196",1,15 198 | "197",1,6 199 | "198",1,6 200 | "199",1,6 201 | "200",1,11 202 | -------------------------------------------------------------------------------- /tests/test-data/test_rds_marker.yml: -------------------------------------------------------------------------------- 1 | cell_types: 2 | Treg: 3 | - CD3.Q605 4 | - CD4.PETR 5 | Cytotoxic T cell: 6 | - CD3.Q605 7 | - CD8a.Q705 8 | Macrophage: 9 | - CD14.Q800 -------------------------------------------------------------------------------- /tests/test_bin_astir.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | import unittest 5 | import warnings 6 | 7 | import pandas as pd 8 | import rootpath 9 | import torch 10 | import yaml 11 | 12 | from astir.astir import Astir 13 | 14 | # import os 15 | # import sys 16 | # import rootpath 17 | # 18 | # module_path = rootpath.detect() 19 | # if module_path not in sys.path: 20 | # sys.path.append(module_path) 21 | # print(sys.path) 22 | # 23 | # module_path = os.path.join(rootpath.detect(), "astir") 24 | # if module_path not in sys.path: 25 | # sys.path.append(module_path) 26 | # print(sys.path) 27 | 28 | 29 | class TestBinAstir(unittest.TestCase): 30 | def __init__(self, *args, **kwargs): 31 | super(TestBinAstir, self).__init__(*args, **kwargs) 32 | self.exec_path = os.path.join(rootpath.detect(), "bin/astir") 33 | self.expr_csv_file = os.path.join( 34 | os.path.dirname(__file__), "test-data/test_data.csv" 35 | ) 36 | self.marker_yaml_file = os.path.join( 37 | os.path.dirname(__file__), "test-data/jackson-2020-markers.yml" 38 | ) 39 | self.output_file = os.path.join(os.path.dirname(__file__), "output") 40 | 41 | self.expr = pd.read_csv(self.expr_csv_file, index_col=0) 42 | with open(self.marker_yaml_file, "r") as stream: 43 | self.marker_dict = yaml.safe_load(stream) 44 | 45 | def test_basic_command(self): 46 | warnings.filterwarnings("ignore", category=UserWarning) 47 | 48 | bash_command = "python -W ignore {} {} {} {} {}".format( 49 | self.exec_path, 50 | "state", 51 | self.expr_csv_file, 52 | self.marker_yaml_file, 53 | self.output_file, 54 | ) 55 | process = subprocess.Popen(bash_command.split(), stdout=subprocess.PIPE) 56 | output, error = process.communicate() 57 | self.assertIsNone(error) 58 | 59 | read_output = pd.read_csv(self.output_file, index_col=0) 60 | self.assertEqual(len(read_output), len(self.expr)) 61 | 62 | states = self.marker_dict["cell_states"].keys() 63 | self.assertEqual(len(read_output.columns), len(states)) 64 | 65 | def test_command_all_flags(self): 66 | warnings.filterwarnings("ignore", category=UserWarning) 67 | ( 68 | design, 69 | max_epochs, 70 | lr, 71 | batch_size, 72 | random_seed, 73 | n_init, 74 | n_init_epochs, 75 | dtype, 76 | delta_loss, 77 | delta_loss_batch, 78 | ) = (None, 2, 1e-1, 128, 1234, 1, 1, torch.float64, 1e-3, 10) 79 | bash_command = "python -W ignore {} {} {} {} {}".format( 80 | self.exec_path, 81 | "state", 82 | self.expr_csv_file, 83 | self.marker_yaml_file, 84 | self.output_file, 85 | ) 86 | bash_command += " --design {}".format(design) 87 | bash_command += " --max_epochs {}".format(max_epochs) 88 | bash_command += " --learning_rate {}".format(lr) 89 | bash_command += " --batch_size {}".format(batch_size) 90 | bash_command += " --random_seed {}".format(random_seed) 91 | bash_command += " --n_init {}".format(n_init) 92 | bash_command += " --n_init_epochs {}".format(n_init_epochs) 93 | bash_command += " --dtype {}".format(dtype) 94 | bash_command += " --delta_loss {}".format(delta_loss) 95 | bash_command += " --delta_loss_batch {}".format(delta_loss_batch) 96 | process = subprocess.Popen(bash_command.split(), stdout=subprocess.PIPE) 97 | output, error = process.communicate() 98 | self.assertIsNone(error) 99 | 100 | # Create Astir object to compare 101 | ast = Astir( 102 | input_expr=self.expr, 103 | marker_dict=self.marker_dict, 104 | design=design, 105 | random_seed=random_seed, 106 | dtype=dtype, 107 | ) 108 | 109 | ast.fit_state( 110 | max_epochs=max_epochs, 111 | learning_rate=lr, 112 | batch_size=batch_size, 113 | delta_loss=delta_loss, 114 | n_init=n_init, 115 | n_init_epochs=n_init_epochs, 116 | delta_loss_batch=delta_loss_batch, 117 | ) 118 | 119 | expected_assign = ast.get_cellstates() 120 | actual_assign = pd.read_csv(self.output_file, index_col=0) 121 | self.assertEqual(len(expected_assign), len(actual_assign)) 122 | self.assertTrue((expected_assign.columns == actual_assign.columns).all()) 123 | 124 | self.assertTrue( 125 | (abs(actual_assign.to_numpy() - expected_assign.to_numpy()) < 0.01).all() 126 | ) 127 | -------------------------------------------------------------------------------- /tests/test_code_smell.py: -------------------------------------------------------------------------------- 1 | """ Tests the following for SCDataset: 2 | - Whether all methods have docstrings 3 | - Whether all methods contain type hints and return type hints 4 | - Whether return type hints are correct 5 | - Any other code smells like a subclass has the same method definitions as 6 | its superclass 7 | """ 8 | import unittest 9 | 10 | from astir import Astir 11 | from astir.data import SCDataset 12 | from astir.models import ( 13 | AstirModel, 14 | CellStateModel, 15 | CellTypeModel, 16 | StateRecognitionNet, 17 | TypeRecognitionNet, 18 | ) 19 | 20 | 21 | class TestCodeSmells(unittest.TestCase): 22 | def __init__(self, *args, **kwargs): 23 | super(TestCodeSmells, self).__init__(*args, **kwargs) 24 | self.classes = [ 25 | Astir, 26 | CellStateModel, 27 | CellTypeModel, 28 | StateRecognitionNet, 29 | TypeRecognitionNet, 30 | AstirModel, 31 | SCDataset, 32 | ] 33 | self.class_paths = [ 34 | "astir/astir.py", 35 | "astir/models/cellstate.py", 36 | "astir/models/celltype.py", 37 | "astir/models/cellstate_recognet.py", 38 | "astir/models/celltype_recognet.py", 39 | "astir/models/abstract.py", 40 | "astir/data/scdataset.py", 41 | ] 42 | 43 | def test_docstrings_exists_all_methods(self): 44 | for cl in self.classes: 45 | cl_name = cl.__name__ 46 | class_dict = dict(cl.__dict__) 47 | 48 | class_dict.pop("__module__") 49 | class_dict.pop("__init__") 50 | 51 | no_doc = [] 52 | for key, value in class_dict.items(): 53 | if value.__doc__ is None: 54 | no_doc.append(key) 55 | 56 | err_msg = "{} has methods without docstring: ".format(cl_name) 57 | for method in no_doc: 58 | err_msg += method + ", " 59 | self.assertTrue(no_doc == [], err_msg[:-2]) 60 | 61 | def test_type_hints_exist_all_methods(self): 62 | from typing import get_type_hints 63 | from inspect import signature 64 | 65 | for cl in self.classes: 66 | cl_name = cl.__name__ 67 | class_dict = dict(cl.__dict__) 68 | 69 | class_dict.pop("__module__") 70 | class_dict.pop("__doc__") 71 | 72 | no_type_hint = [] 73 | no_return_hint = [] 74 | for key, value in class_dict.items(): 75 | try: 76 | type_hints = get_type_hints(value) 77 | except: 78 | continue 79 | 80 | if not type_hints.__contains__("return"): 81 | no_return_hint.append(key) 82 | else: 83 | type_hints.pop("return") 84 | params = list(signature(value).parameters) 85 | params.remove("self") 86 | 87 | if len(params) != len(type_hints): 88 | no_type_hint.append(key) 89 | 90 | err_msg_type_hint = "{} is missing type hints for methods: " "".format( 91 | cl_name 92 | ) 93 | for method in no_type_hint: 94 | err_msg_type_hint += method + ", " 95 | 96 | err_msg_return_hint = "{} is missing return hints for " "methods: ".format( 97 | cl_name 98 | ) 99 | for method in no_return_hint: 100 | err_msg_return_hint += method + ", " 101 | self.assertTrue(no_type_hint == [], err_msg_type_hint[:-2]) 102 | self.assertTrue(no_return_hint == [], err_msg_return_hint[:-2]) 103 | 104 | def test_correct_return_types_and_detect_code_smells(self): 105 | import subprocess, rootpath, os 106 | 107 | for cl_path in self.class_paths: 108 | root_dir = rootpath.detect() 109 | path = os.path.join(root_dir, cl_path) 110 | 111 | process = subprocess.Popen( 112 | ["mypy", path, "--ignore-missing-imports", "--no-site-packages"], 113 | stdout=subprocess.PIPE, 114 | stderr=subprocess.PIPE, 115 | ) 116 | 117 | stdout, stderr = process.communicate() 118 | stdout = stdout.decode("utf-8") 119 | stderr = stderr.decode("utf-8") 120 | output = str(stdout).split("\n") 121 | 122 | errors = [] 123 | for line in output: 124 | if line.__contains__("error:"): 125 | errors.append(line) 126 | 127 | self.assertTrue( 128 | errors == [], 129 | "Following errors were produced by " 130 | "MyPy: \n{}".format("\n".join(errors)), 131 | ) 132 | self.assertTrue( 133 | stderr == "", "Following error were produced by MyPy: {}".format(stderr) 134 | ) 135 | 136 | def test_data_reader_code_smell(self): 137 | from typing import get_type_hints 138 | from inspect import signature 139 | from astir.data.data_readers import ( 140 | from_csv_yaml, 141 | from_csv_dir_yaml, 142 | from_loompy_yaml, 143 | from_anndata_yaml, 144 | ) 145 | 146 | funcs = [from_csv_yaml, from_csv_dir_yaml, from_loompy_yaml, from_anndata_yaml] 147 | 148 | # Test whether all of them have type hints 149 | param_msgs = [] 150 | docstring_msgs = [] 151 | for func in funcs: 152 | type_hints = get_type_hints(func) 153 | param_with_hints = list(type_hints.keys()) 154 | all_params = list(signature(func).parameters) 155 | 156 | param_diff = list(set(all_params) - set(param_with_hints)) 157 | 158 | for param in param_diff: 159 | param_msgs.append( 160 | "astir.data.data_readers.{} needs type hint " 161 | "for parameter {}".format(func.__name__, param) 162 | ) 163 | 164 | # Docstring test 165 | if func.__doc__ is None: 166 | docstring_msgs.append( 167 | "astir.data.data_readers.{} needs a " 168 | "docstring".format(func.__name__) 169 | ) 170 | 171 | self.assertTrue(param_msgs == [], "\n".join(param_msgs)) 172 | self.assertTrue(docstring_msgs == [], "\n".join(docstring_msgs)) 173 | 174 | 175 | if __name__ == "__main__": 176 | unittest.main() 177 | -------------------------------------------------------------------------------- /tests/test_notebooks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | import warnings 4 | 5 | import nbformat 6 | import rootpath 7 | from nbconvert.preprocessors import ExecutePreprocessor 8 | 9 | 10 | def run_notebook(notebook_path): 11 | nb_name, _ = os.path.splitext(os.path.basename(notebook_path)) 12 | 13 | with open(notebook_path) as f: 14 | nb = nbformat.read(f, as_version=4) 15 | 16 | # Configure the notebook execution mode 17 | proc = ExecutePreprocessor(timeout=600, kernel_name="python3") 18 | proc.allow_errors = True 19 | 20 | # Run the notebook 21 | root_path = rootpath.detect() 22 | path = root_path + "/docs/tutorials/notebooks" 23 | proc.preprocess(nb, {"metadata": {"path": path}}) 24 | # Collect all errors 25 | errors = [] 26 | for cell in nb.cells: 27 | if "outputs" in cell: 28 | for output in cell["outputs"]: 29 | if output.output_type == "error": 30 | errors.append(output) 31 | 32 | return nb, errors 33 | 34 | 35 | class TestNotebook(unittest.TestCase): 36 | def __init__(self, *args, **kwargs): 37 | super(TestNotebook, self).__init__(*args, **kwargs) 38 | self.maxDiff = None 39 | 40 | def test_for_errors(self): 41 | warnings.filterwarnings("ignore") 42 | root_path = rootpath.detect() 43 | dirname = os.path.join(root_path, "docs/tutorials/notebooks") 44 | 45 | nb_names = [ 46 | os.path.join(dirname, fn) 47 | for fn in os.listdir(dirname) 48 | if os.path.splitext(fn)[1] == ".ipynb" 49 | ] 50 | 51 | for fn in nb_names: 52 | _, errors = run_notebook(fn) 53 | self.assertEqual(errors, [], "Unexpected error in {}".format(fn)) 54 | 55 | 56 | if __name__ == "__main__": 57 | unittest.main() 58 | --------------------------------------------------------------------------------