├── .github
    └── workflows
    │   ├── python-package-release.yml
    │   └── python-package.yml
├── .gitignore
├── LICENSE
├── README.md
├── scanpy-scripts-tests.bats
├── scanpy_scripts
    ├── __init__.py
    ├── cli.py
    ├── click_utils.py
    ├── cmd_options.py
    ├── cmd_utils.py
    ├── cmds.py
    ├── lib
    │   ├── __init__.py
    │   ├── _bbknn.py
    │   ├── _combat.py
    │   ├── _diffexp.py
    │   ├── _diffmap.py
    │   ├── _dpt.py
    │   ├── _fdg.py
    │   ├── _filter.py
    │   ├── _hvg.py
    │   ├── _leiden.py
    │   ├── _louvain.py
    │   ├── _mnn.py
    │   ├── _neighbors.py
    │   ├── _norm.py
    │   ├── _paga.py
    │   ├── _pca.py
    │   ├── _read.py
    │   ├── _scrublet.py
    │   ├── _tsne.py
    │   └── _umap.py
    └── obj_utils.py
├── setup.py
└── test-env.yaml


/.github/workflows/python-package-release.yml:
--------------------------------------------------------------------------------
 1 | name: Python package release
 2 | 
 3 | on: 
 4 |     push:
 5 |         tags:
 6 |             - v*
 7 | 
 8 | jobs:
 9 |   build:
10 | 
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |     - uses: actions/checkout@v2
15 |     
16 |     - name: Set up Python 3
17 |       uses: actions/setup-python@v2
18 |       with:
19 |         python-version: '3.x'
20 |     
21 |     - name: Install dependencies
22 |       run: |
23 |         pip install -U setuptools>=40.1
24 | 
25 |     - name: Build dists
26 |       run: |
27 |         pip install wheel
28 |         python setup.py sdist bdist_wheel
29 |     
30 |     - name: Publish to PyPI
31 |       uses: pypa/gh-action-pypi-publish@master
32 |       with:
33 |         user: __token__
34 |         password: ${{ secrets.pypi_password }}    
35 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | name: Python package
 2 | 
 3 | on: [pull_request]
 4 | 
 5 | defaults:
 6 |   run:
 7 |     # for conda env activation
 8 |     shell: bash -l {0}
 9 | 
10 | jobs:
11 |   build:
12 | 
13 |     runs-on: ubuntu-latest
14 |     strategy:
15 |       matrix:
16 |         python-version: ["3.8", "3.9"]
17 | 
18 |     steps:
19 |     - uses: actions/checkout@v2
20 |     
21 |     - name: Setup mamba
22 |       uses: mamba-org/setup-micromamba@v2
23 |       with:
24 |         environment-file: test-env.yaml
25 |         cache-downloads: true
26 |         condarc: |
27 |           channels: 
28 |             - conda-forge
29 |             - bioconda
30 |             - defaults
31 |         create-args: |
32 |           python=${{ matrix.python-version }}
33 | 
34 |     - name: Run black manually
35 |       run: |
36 |         black --check --verbose ./
37 | 
38 |     # - name: Install dependencies
39 |     #   run: |
40 |     #     sudo apt-get install libhdf5-dev
41 |     #     pip install -U setuptools>=40.1 wheel 'cmake<3.20' pytest
42 |     #     pip install $(pwd)/scanpy-scripts
43 |     #     # python -m pip install $(pwd)/scanpy --no-deps --ignore-installed -vv
44 |     
45 |     - name: Run unit tests
46 |       run: |
47 |         # needed for __version__ to be available
48 |         pip install . --no-deps --ignore-installed
49 |         pytest --doctest-modules -v ./
50 |     
51 |     - name: Test with bats
52 |       run: |
53 |         ./scanpy-scripts-tests.bats
54 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /.idea/
 2 | /build/
 3 | /dist/
 4 | /*.egg-info/
 5 | /post_install_tests/
 6 | *cache*/
 7 | *.pyc
 8 | /.*history
 9 | /.*swp
10 | data
11 | compressed
12 | uncompressed
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # scanpy-scripts [![Anaconda-Server Badge](https://anaconda.org/bioconda/scanpy-scripts/badges/installer/conda.svg)](https://anaconda.org/bioconda/scanpy-scripts) 
 2 | 
 3 | A command-line interface for functions of the Scanpy suite, to facilitate flexible constrution of workflows, for example in Galaxy, Nextflow, Snakemake etc.
 4 | 
 5 | ## Install
 6 | 
 7 | The recommended way of using this package is through the latest container produced by Bioconda [here](https://quay.io/repository/biocontainers/scanpy-scripts?tab=tags). If you must, one can install scanpy-scripts via conda:
 8 | 
 9 | ```bash
10 | conda install scanpy-scripts
11 | ```
12 | 
13 | pip installation is also possible, however the version of mnnpy is not patched as in the conda version, and so the `integrate` command will not work.
14 | 
15 | ```bash
16 | pip install scanpy-scripts
17 | ```
18 | 
19 | For development installation, we suggest following the github actions python-package.yml file.
20 | 
21 | Currently, tests run on python 3.9, so those are the recommended versions if not installing via conda. BKNN doesn't currently install on Python 3.10 due to a skip in Bioconda.
22 | 
23 | ## Test installation
24 | 
25 | There is an example script included:
26 | 
27 | ```bash
28 | scanpy-scripts-tests.bats
29 | ```
30 | 
31 | This requires the [bats](https://github.com/sstephenson/bats) testing framework to run. The script downloads [a well-known test 10X dataset]('https://s3-us-west-2.amazonaws.com/10x.files/samples/cell/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz) and executes all of the commands described below.
32 | 
33 | ## Commands
34 | 
35 | Available commands are described below. Each has usage instructions available via `--help`, consult function documentation in scanpy for further details.
36 | 
37 | ```
38 | Usage: scanpy-cli [OPTIONS] COMMAND [ARGS]...
39 | 
40 |   Command line interface to [scanpy](https://github.com/theislab/scanpy)
41 | 
42 | Options:
43 |   --debug              Print debug information
44 |   --verbosity INTEGER  Set scanpy verbosity
45 |   --njobs INTEGER      Set scanpy default number of jobs/CPUs, defaults to 1
46 |   --version            Show the version and exit.
47 |   --help               Show this message and exit.
48 | 
49 | Commands:
50 |   read       Read 10x data and save in specified format.
51 |   filter     Filter data based on specified conditions.
52 |   norm       Normalise data per cell.
53 |   hvg        Find highly variable genes.
54 |   scale      Scale data per gene.
55 |   regress    Regress-out observation variables.
56 |   pca        Dimensionality reduction by PCA.
57 |   neighbor   Compute a neighbourhood graph of observations.
58 |   embed      Embed cells into two-dimensional space.
59 |   cluster    Cluster cells into sub-populations.
60 |   diffexp    Find markers for each clusters.
61 |   paga       Trajectory inference by abstract graph analysis.
62 |   dpt        Calculate diffusion pseudotime relative to the root cells.
63 |   integrate  Integrate cells from different experimental batches.
64 |   multiplet  Execute methods for multiplet removal.
65 |   plot       Visualise data.
66 |   ```
67 | 
68 |   ## Versioning
69 | 
70 |   Major and major versions will follow the scanpy versions. The first digit of the patch should follow the scanpy patch version as well, subsequent digits in the patch are reserved for changes in this repository.
71 | 


--------------------------------------------------------------------------------
/scanpy-scripts-tests.bats:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bats
  2 | 
  3 | # Extract the test data
  4 | setup() {
  5 |     scanpy="scanpy-cli"
  6 |     test_dir="post_install_tests"
  7 |     data_dir="${test_dir}/data"
  8 |     output_dir="${test_dir}/outputs"
  9 |     raw_matrix="${data_dir}/matrix.mtx"
 10 |     raw_matrix_from_raw="${data_dir}/raw/matrix.mtx"
 11 |     raw_matrix_from_layer="${data_dir}/layer/matrix.mtx"
 12 |     singlet_obs="${data_dir}/singlet_obs.txt"
 13 |     batch_obs="${data_dir}/batch_obs.txt"
 14 |     read_opt="-x $data_dir --show-obj stdout"
 15 |     read_obj="${output_dir}/read.h5ad"
 16 |     filter_opt="--save-raw -p n_genes 200 2500 -p c:n_counts 0 50000 -p n_cells 3 inf -p pct_counts_mito 0 0.2 -c mito '!True' --show-obj stdout --export-mtx ${output_dir}/filtered --mtx-compression gzip"
 17 |     filter_obj="${output_dir}/filter.h5ad"
 18 |     filter_mtx_gz="${output_dir}/filtered_matrix.mtx.gz"
 19 |     test_clustering='louvain_k10_r0_5'
 20 |     scrublet_tsv="${output_dir}/scrublet.tsv"
 21 |     scrublet_png="${output_dir}/scrublet.png"
 22 |     scrublet_obj="${output_dir}/scrublet.h5ad"
 23 |     scrublet_batched_obj="${output_dir}/scrublet_batched.h5ad"
 24 |     scrublet_simulate_obj="${output_dir}/scrublet_simulate.h5ad"
 25 |     scrublet_opt="--input-obj-sim ${scrublet_simulate_obj} --filter --export-table ${scrublet_tsv}"
 26 |     scrublet_batched_opt="--filter --batch-key batch"
 27 |     norm_mtx="${output_dir}/norm"
 28 |     norm_opt="--save-layer filtered -t 10000 -l all -n after -X ${norm_mtx} --show-obj stdout"
 29 |     norm_obj="${output_dir}/norm.h5ad"
 30 |     hvg_opt="-m 0.0125 3 -d 0.5 inf -s --show-obj stdout"
 31 |     always_hvg="${data_dir}/always_hvg.txt"
 32 |     never_hvg="${data_dir}/never_hvg.txt"
 33 |     hvg_opt_always_never="--always-hv-genes-file ${always_hvg} --never-hv-genes-file ${never_hvg}"
 34 |     hvg_obj="${output_dir}/hvg.h5ad"
 35 |     hvg_obj_on_off="${output_dir}/hvg_on_off.h5ad"
 36 |     regress_opt="-k n_counts --show-obj stdout"
 37 |     regress_obj="${output_dir}/regress.h5ad"
 38 |     scale_opt="--save-layer normalised -m 10 --show-obj stdout"
 39 |     scale_obj="${output_dir}/scale.h5ad"
 40 |     pca_embed="${output_dir}/pca.tsv"
 41 |     pca_opt="--n-comps 50 -V auto --show-obj stdout -E ${pca_embed}"
 42 |     pca_obj="${output_dir}/pca.h5ad"
 43 |     neighbor_opt="-k 5,10,20 -n 25 -m umap -t euclidean --show-obj stdout"
 44 |     neighbor_obj="${output_dir}/neighbor.h5ad"
 45 |     tsne_embed="${output_dir}/tsne.tsv"
 46 |     tsne_opt="-n 25 --use-rep X_pca --learning-rate 200 -E ${tsne_embed}"
 47 |     tsne_obj="${output_dir}/tsne.h5ad"
 48 |     umap_embed="${output_dir}/umap.tsv"
 49 |     umap_opt="--neighbors-key k10 --min-dist 0.75 --alpha 1 --gamma 1 -E ${umap_embed}"
 50 |     umap_obj="${output_dir}/umap.h5ad"
 51 |     fdg_embed="${output_dir}/fdg.tsv"
 52 |     fdg_opt="--neighbors-key k10 --layout fr -E ${fdg_embed} --init-pos paga"
 53 |     fdg_obj="${output_dir}/fdg.h5ad"
 54 |     louvain_tsv="${output_dir}/louvain.tsv"
 55 |     louvain_opt="-r 0.1,0.5,1 --neighbors-key k10 --key-added k10 --export-cluster ${louvain_tsv}"
 56 |     louvain_obj="${output_dir}/louvain.h5ad"
 57 |     leiden_tsv="${output_dir}/leiden.tsv"
 58 |     leiden_opt="-r 0.3,0.7 --neighbors-key k10 --key-added k10 -F loom --loom-write-obsm-varm --export-cluster ${leiden_tsv}"
 59 |     leiden_obj="${output_dir}/leiden.loom"
 60 |     diffexp_tsv="${output_dir}/diffexp.tsv"
 61 |     diffexp_opt="-g ${test_clustering} --reference rest --filter-params min_in_group_fraction:0.25,min_fold_change:1.5 --save ${diffexp_tsv}"
 62 |     diffexp_obj="${output_dir}/diffexp.h5ad"
 63 |     test_singlet_clustering='groupby_with_singlet'
 64 |     diffexp_singlet_tsv="${output_dir}/diffexp_singlet.tsv"
 65 |     diffexp_singlet_opt="-g ${test_singlet_clustering} --reference rest --filter-params min_in_group_fraction:0.25,min_fold_change:1.5 --save ${diffexp_singlet_tsv}"
 66 |     diffexp_singlet_obj="${output_dir}/diffexp_singlet.h5ad"
 67 |     paga_opt="--neighbors-key k10 --key-added ${test_clustering} --groups ${test_clustering} --model v1.2"
 68 |     paga_obj="${output_dir}/paga.h5ad"
 69 |     diffmap_embed="${output_dir}/diffmap.tsv"
 70 |     diffmap_opt="--neighbors-key k10 --n-comps 10 -E ${diffmap_embed}"
 71 |     diffmap_obj="${output_dir}/diffmap.h5ad"
 72 |     dpt_opt="--neighbors-key k10 --key-added k10 --n-dcs 10 --disallow-kendall-tau-shift --root ${test_clustering} 0"
 73 |     dpt_obj="${output_dir}/dpt.h5ad"
 74 |     plt_embed_opt="--projection 2d --color ${test_clustering} --title test"
 75 |     plt_embed_pdf="${output_dir}/umap_${test_clustering}.pdf"
 76 |     plt_paga_pdf="${output_dir}/paga_k10_r0_7.pdf"
 77 |     plt_paga_obj="${output_dir}/paga_k10_r0_7.h5ad"
 78 |     plt_paga_opt="--use-key paga_${test_clustering} --node-size-scale 2 --edge-width-scale 0.5 --basis diffmap --color dpt_pseudotime_k10 --frameoff --output-obj $plt_paga_obj"
 79 |     test_markers='LDHB,CD3D,CD3E'
 80 |     diffexp_plot_opt="--var-names $test_markers --use-raw --dendrogram --groupby ${test_clustering}"
 81 |     plt_stacked_violin_opt="${diffexp_plot_opt} --no-jitter --swap-axes"
 82 |     plt_stacked_violin_pdf="${output_dir}/sviolin_${test_clustering}_LDHB_CD3D_CD3E.pdf"
 83 |     plt_dotplot_pdf="${output_dir}/dot_${test_clustering}_LDHB_CD3D_CD3E.pdf"
 84 |     plt_matrixplot_pdf="${output_dir}/matrix_${test_clustering}_LDHB_CD3D_CD3E.pdf"
 85 |     plt_heatmap_pdf="${output_dir}/heatmap_${test_clustering}_LDHB_CD3D_CD3E.pdf"
 86 |     plt_rank_genes_groups_opt="--rgg --groups 3,4"
 87 |     plt_rank_genes_groups_singlet_opt="--rgg"
 88 |     plt_rank_genes_groups_stacked_violin_pdf="${output_dir}/rggsviolin_${test_clustering}.pdf"
 89 |     plt_rank_genes_groups_matrix_pdf="${output_dir}/rggmatrix_${test_clustering}.pdf"
 90 |     plt_rank_genes_groups_dot_pdf="${output_dir}/rggdot_${test_clustering}.pdf"
 91 |     plt_rank_genes_groups_dot_singlet_pdf="${output_dir}/rggdot_${test_singlet_clustering}.pdf"
 92 |     plt_rank_genes_groups_heatmap_pdf="${output_dir}/rggheatmap_${test_clustering}.pdf"
 93 |     harmony_integrate_obj="${output_dir}/harmony_integrate.h5ad"
 94 |     harmony_integrate_opt="--batch-key ${test_clustering}"
 95 |     harmony_plt_embed_opt="--projection 2d --color ${test_clustering} --title 'PCA embeddings after harmony' --basis 'X_pca_harmony'"
 96 |     noharmony_plt_embed_opt="--projection 2d --color ${test_clustering} --title 'PCA embeddings before harmony' --basis 'X_pca'"
 97 |     harmony_integrated_pca_pdf="${output_dir}/harmony_pca_${test_clustering}.pdf"
 98 |     noharmony_integrated_pca_pdf="${output_dir}/pca_${test_clustering}.pdf"
 99 |     bbknn_obj="${output_dir}/bbknn.h5ad"
100 |     bbknn_opt="--batch-key ${test_clustering} --key-added bbknn"
101 |     mnn_obj="${output_dir}/mnn.h5ad"
102 |     mnn_opt="--save-layer uncorrected --batch-key ${test_clustering}"
103 |     combat_obj="${output_dir}/combat.h5ad"
104 |     combat_opt="--batch-key ${test_clustering}"
105 |     
106 | 
107 |     if [ ! -d "$data_dir" ]; then
108 |         mkdir -p $data_dir
109 |     fi
110 | 
111 |     if [ ! -d "$output_dir" ]; then
112 |         mkdir -p $output_dir
113 |     fi
114 | }
115 | 
116 | @test "Extract test data from Scanpy" {
117 |     if [ "$resume" = 'true' ] && [ -f "$raw_matrix" ]; then
118 |         skip "$raw_matrix exists"
119 |     fi
120 | 
121 |     run rm -rf ${data_dir}/* && eval "echo -e \"import scanpy as sc\nfrom scanpy_scripts.cmd_utils import write_mtx\nimport os\nos.makedirs('$data_dir', exist_ok=True)\nwrite_mtx(sc.datasets.pbmc3k(), '$data_dir/')\" | python"
122 | 
123 |     [ "$status" -eq 0 ]
124 |     [ -f "$raw_matrix" ]
125 | }
126 | 
127 | @test "Test MTX write from .raw" {
128 |     if [ "$resume" = 'true' ] && [ -f "$raw_matrix_from_raw" ]; then
129 |         skip "$raw_matrix exists"
130 |     fi
131 | 
132 |     run rm -rf ${data_dir}/raw/* && eval "echo -e \"import scanpy as sc\nfrom scanpy_scripts.cmd_utils import write_mtx\nimport os\nos.makedirs('$data_dir/raw', exist_ok=True)\nadata=sc.datasets.pbmc3k();adata.raw=adata\nwrite_mtx(adata, '$data_dir/raw/', use_raw=True)\" | python"
133 | 
134 |     [ "$status" -eq 0 ]
135 |     [ -f "$raw_matrix_from_raw" ]
136 | }
137 | 
138 | @test "Add genes to be considered HVGs" {
139 |     if [ "$resume" = 'true' ] && [ -f "$always_hvg" ]; then
140 |         skip "$always_hvg exists"
141 |     fi
142 | 
143 |     run eval "echo -e 'MIR1302-10\nFAM138A' > $always_hvg"
144 | }
145 | 
146 | @test "Add genes not to be considered HVGs" {
147 |     if [ "$resume" = 'true' ] && [ -f "$never_hvg" ]; then
148 |         skip "$never_hvg exists"
149 |     fi
150 | 
151 |     run eval "echo -e 'ISG15\nTNFRSF4' > $never_hvg"
152 | }
153 | 
154 | @test "Test MTX write from layers" {
155 |     if [ "$resume" = 'true' ] && [ -f "$raw_matrix_from_layer" ]; then
156 |         skip "$raw_matrix exists"
157 |     fi
158 | 
159 |     run rm -rf ${data_dir}/layer/* && eval "echo -e \"import scanpy as sc\nfrom scanpy_scripts.cmd_utils import write_mtx\nimport os\nos.makedirs('$data_dir/layer', exist_ok=True)\nadata=sc.datasets.pbmc3k();adata.layers['test']=adata.X\nwrite_mtx(adata, '$data_dir/layer/', use_layer='test')\" | python"
160 | 
161 |     [ "$status" -eq 0 ]
162 |     [ -f "$raw_matrix_from_layer" ]
163 | }
164 | 
165 | @test "Make .obs with a singlet cell group" {
166 | 
167 |     if [ "$resume" = 'true' ] && [ -f "$singlet_obs" ]; then
168 |         skip "$singlet_obs exists"
169 |     fi
170 | 
171 |     run rm -rf $singlet_obs && eval "echo -e \"index\tgroupby_with_singlet\" > $singlet_obs && head -n 1 $data_dir/barcodes.tsv | awk -v cluster='cluster1' '{print \$1\"\t\"cluster}' >> $singlet_obs && sed -n '2,100p;101q' $data_dir/barcodes.tsv | awk -v cluster='cluster3' '{print \$1\"\t\"cluster}' >> $singlet_obs && tail -n +101 $data_dir/barcodes.tsv | awk -v cluster='cluster2' '{print \$1\"\t\"cluster}' >> $singlet_obs"
172 | 
173 |     [ "$status" -eq 0 ]
174 |     [ -f "$singlet_obs" ]
175 | }
176 | 
177 | @test "Make a batch variable" {
178 | 
179 |     if [ "$resume" = 'true' ] && [ -f "$batch_obs" ]; then
180 |         skip "$singlet_obs exists"
181 |     fi
182 | 
183 |     run rm -rf $batch_obs && echo -e "batch\n$(printf "%0.sbatch1\n" {1..1350})\n$(printf "%0.sbatch2\n" {1..1350})" > $batch_obs
184 | 
185 |     [ "$status" -eq 0 ]
186 |     [ -f "$batch_obs" ]
187 | }
188 | 
189 | # Read 10x dataset
190 | 
191 | @test "Scanpy object creation from 10x" {
192 |     if [ "$resume" = 'true' ] && [ -f "$read_obj" ]; then
193 |         skip "$read_obj exists and resume is set to 'true'"
194 |     fi
195 | 
196 |     run rm -f $read_obj && eval "paste -d $'\t' $singlet_obs $batch_obs > obs.txt && $scanpy read --extra-obs obs.txt $read_opt $read_obj"
197 | 
198 |     [ "$status" -eq 0 ]
199 |     [ -f  "$read_obj" ]
200 | }
201 | 
202 | # Filter
203 | 
204 | @test "Filter cells and genes from a raw object" {
205 |     if [ "$resume" = 'true' ] && [ -f "$filter_obj" ]; then
206 |         skip "$filter_obj exists and resume is set to 'true'"
207 |     fi
208 | 
209 |     run rm -f $filter_obj && eval "$scanpy filter $filter_opt $read_obj $filter_obj"
210 | 
211 |     [ "$status" -eq 0 ]
212 |     [ -f  "$filter_obj" ]
213 |     [ -f  "$filter_mtx_gz" ]
214 | }
215 | 
216 | # Normalise
217 | 
218 | @test "Normalise expression values per cell" {
219 |     if [ "$resume" = 'true' ] && [ -f "$norm_obj" ]; then
220 |         skip "$norm_obj exists and resume is set to 'true'"
221 |     fi
222 | 
223 |     run rm -f $norm_obj && eval "$scanpy norm $norm_opt $filter_obj $norm_obj"
224 | 
225 |     [ "$status" -eq 0 ]
226 |     [ -f  "$norm_obj" ] && [ -f "${norm_mtx}_matrix.mtx" ]
227 | }
228 | 
229 | # Find variable genes
230 | 
231 | @test "Find variable genes" {
232 |     if [ "$resume" = 'true' ] && [ -f "$hvg_obj" ]; then
233 |         skip "$hvg_obj exists and resume is set to 'true'"
234 |     fi
235 | 
236 |     run rm -f $hvg_obj $hvg_obj && eval "$scanpy hvg $hvg_opt $norm_obj $hvg_obj"
237 | 
238 |     [ "$status" -eq 0 ]
239 |     [ -f  "$hvg_obj" ]
240 | }
241 | 
242 | @test "Find variable genes with optional turn on/off lists" {
243 |     if [ "$resume" = 'true' ] && [ -f "$hvg_obj_on_off" ]; then
244 |         skip "$hvg_obj_on_off exists and resume is set to 'true'"
245 |     fi
246 | 
247 |     run rm -f $hvg_obj_on_off && eval "$scanpy hvg $hvg_opt_always_never $norm_obj $hvg_obj_on_off"
248 | }
249 | 
250 | # Do separate doublet simulation step (normally we'd just let the main scrublet
251 | # process do this).
252 | 
253 | @test "Run Scrublet doublet simulation" {
254 |     if [ "$resume" = 'true' ] && [ -f "$scrublet_simulate_obj" ]; then
255 |         skip "$scrublet_simulate_obj exists and resume is set to 'true'"
256 |     fi
257 | 
258 |     run rm -f $srublet_simulate_obj && eval "$scanpy multiplet scrublet_simulate_doublets $hvg_obj $scrublet_simulate_obj"
259 | 
260 |     [ "$status" -eq 0 ]
261 |     [ -f  "$scrublet_simulate_obj" ]
262 | }
263 | 
264 | # Detect multiplets with Scrublet
265 | 
266 | @test "Run Scrublet for multiplet detection" {
267 |     if [ "$resume" = 'true' ] && [ -f "$scrublet_obj" ]; then
268 |         skip "$scrublet_obj exists and resume is set to 'true'"
269 |     fi
270 | 
271 |     run rm -f $scrublet_obj && eval "$scanpy multiplet scrublet $scrublet_opt $hvg_obj $scrublet_obj"
272 | 
273 |     [ "$status" -eq 0 ]
274 |     [ -f  "$scrublet_obj" ] && [ -f "$scrublet_tsv" ]
275 | }
276 | 
277 | # Run the doublet plot from Scrublet
278 | 
279 | @test "Run Scrublet score distribution plot" {
280 |     if [ "$resume" = 'true' ] && [ -f "$scrublet_png" ]; then
281 |         skip "$scrublet_png exists and resume is set to 'true'"
282 |     fi
283 | 
284 |     run rm -f $scrublet_png && eval "$scanpy plot scrublet $scrublet_obj $scrublet_png"
285 | 
286 |     [ "$status" -eq 0 ]
287 |     [ -f  "$scrublet_png" ]
288 | }
289 | 
290 | # Detect multiplets with Scrublet (batched)
291 | 
292 | @test "Run Scrublet for multiplet detection (batched)" {
293 |     if [ "$resume" = 'true' ] && [ -f "$scrublet_batched_obj" ]; then
294 |         skip "$scrublet_batched_obj exists and resume is set to 'true'"
295 |     fi
296 | 
297 |     run rm -f $scrublet_batched_obj && eval "$scanpy multiplet scrublet $scrublet_batched_opt $read_obj $scrublet_batched_obj"
298 | 
299 |     [ "$status" -eq 0 ]
300 |     [ -f  "$scrublet_batched_obj" ]
301 | }
302 | 
303 | 
304 | # Regress out variables
305 | 
306 | @test "Regress out unwanted variable" {
307 |     if [ "$resume" = 'true' ] && [ -f "$regress_obj" ]; then
308 |         skip "$regress_obj exists and resume is set to 'true'"
309 |     fi
310 | 
311 |     run rm -f $regress_obj && eval "$scanpy regress $regress_opt $hvg_obj $regress_obj"
312 | 
313 |     [ "$status" -eq 0 ]
314 |     [ -f  "$regress_obj" ]
315 | }
316 | 
317 | # Scale expression values
318 | 
319 | @test "Scale expression values" {
320 |     if [ "$resume" = 'true' ] && [ -f "$scale_obj" ]; then
321 |         skip "$scale_obj exists and resume is set to 'true'"
322 |     fi
323 | 
324 |     run rm -f $scale_obj && eval "$scanpy scale $scale_opt $hvg_obj $scale_obj"
325 | 
326 |     [ "$status" -eq 0 ]
327 |     [ -f  "$scale_obj" ]
328 | }
329 | 
330 | # Run PCA
331 | 
332 | @test "Run principal component analysis" {
333 |     if [ "$resume" = 'true' ] && [ -f "$pca_obj" ]; then
334 |         skip "$pca_obj exists and resume is set to 'true'"
335 |     fi
336 | 
337 |     run rm -f $pca_obj && eval "$scanpy pca $pca_opt $scale_obj $pca_obj"
338 | 
339 |     [ "$status" -eq 0 ]
340 |     [ -f  "$pca_obj" ]
341 | }
342 | 
343 | # Compute graph
344 | 
345 | @test "Run compute neighbor graph" {
346 |     if [ "$resume" = 'true' ] && [ -f "$neighbor_obj" ]; then
347 |         skip "$scaled_object exists and resume is set to 'true'"
348 |     fi
349 | 
350 |     run rm -f $neighbor_obj && eval "$scanpy neighbor $neighbor_opt $pca_obj $neighbor_obj"
351 | 
352 |     [ "$status" -eq 0 ]
353 |     [ -f  "$neighbor_obj" ]
354 | }
355 | 
356 | # Run TSNE
357 | 
358 | @test "Run TSNE analysis" {
359 |     if [ "$resume" = 'true' ] && [ -f "$tsne_obj" ]; then
360 |         skip "$tsne_obj exists and resume is set to 'true'"
361 |     fi
362 | 
363 |     run rm -f $tsne_obj && eval "$scanpy embed tsne $tsne_opt $pca_obj $tsne_obj"
364 | 
365 |     [ "$status" -eq 0 ]
366 |     [ -f  "$tsne_obj" ] && [ -f "$tsne_embed" ]
367 | }
368 | 
369 | # Run UMAP
370 | 
371 | @test "Run UMAP analysis" {
372 |     if [ "$resume" = 'true' ] && [ -f "$umap_obj" ]; then
373 |         skip "$umap_obj exists and resume is set to 'true'"
374 |     fi
375 | 
376 |     run rm -f $umap_obj && eval "$scanpy embed umap $umap_opt $neighbor_obj $umap_obj"
377 | 
378 |     [ "$status" -eq 0 ]
379 |     [ -f  "$umap_obj" ] && [ -f "$umap_embed" ]
380 | }
381 | 
382 | # Find clusters Louvain
383 | 
384 | @test "Run find cluster (louvain)" {
385 |     if [ "$resume" = 'true' ] && [ -f "$louvain_obj" ]; then
386 |         skip "$louvain_obj exists and resume is set to 'true'"
387 |     fi
388 | 
389 |     run rm -f $louvain_obj && eval "$scanpy cluster louvain $louvain_opt $umap_obj $louvain_obj"
390 | 
391 |     [ "$status" -eq 0 ]
392 |     [ -f  "$louvain_obj" ] && [ -f "$louvain_tsv" ]
393 | }
394 | 
395 | # Find clusters Leiden
396 | 
397 | @test "Run find cluster (leiden)" {
398 |     if [ "$resume" = 'true' ] && [ -f "$leiden_obj" ]; then
399 |         skip "$leiden_obj exists and resume is set to 'true'"
400 |     fi
401 | 
402 |     run rm -f $leiden_obj && eval "$scanpy cluster leiden $leiden_opt $umap_obj $leiden_obj"
403 | 
404 |     [ "$status" -eq 0 ]
405 |     [ -f  "$leiden_obj" ] && [ -f "$leiden_tsv" ]
406 | }
407 | 
408 | # Find markers
409 | 
410 | @test "Run find markers" {
411 |     if [ "$resume" = 'true' ] && [ -f "$diffexp_obj" ]; then
412 |         skip "$diffexp_obj exists and resume is set to 'true'"
413 |     fi
414 | 
415 |     run rm -f $diffexp_obj $diffexp_tsv && eval "$scanpy diffexp $diffexp_opt $louvain_obj $diffexp_obj"
416 | 
417 |     [ "$status" -eq 0 ]
418 |     [ -f  "$diffexp_obj" ] && [ -f "$diffexp_tsv" ]
419 | }
420 | 
421 | # Find markers, with singlet group
422 | 
423 | @test "Run find markers, with singlet group ignored" {
424 |     if [ "$resume" = 'true' ] && [ -f "$diffexp_singlet_obj" ]; then
425 |         skip "$diffexp_singlet_obj exists and resume is set to 'true'"
426 |     fi
427 | 
428 |     run rm -f $diffexp_singlet_obj $diffexp_singlet_tsv && eval "$scanpy diffexp $diffexp_singlet_opt $louvain_obj $diffexp_singlet_obj"
429 | 
430 |     [ "$status" -eq 0 ]
431 |     [ -f  "$diffexp_singlet_obj" ] && [ -f "$diffexp_singlet_tsv" ]
432 | }
433 | 
434 | # Run PAGA
435 | 
436 | @test "Run PAGA" {
437 |     if [ "$resume" = 'true' ] && [ -f "$paga_obj" ]; then
438 |         skip "$paga_obj exists and resume is set to 'true'"
439 |     fi
440 | 
441 |     run rm -f $paga_obj && eval "$scanpy paga $paga_opt $louvain_obj $paga_obj"
442 | 
443 |     [ "$status" -eq 0 ]
444 |     [ -f  "$paga_obj" ]
445 | }
446 | 
447 | # Run Diffmap
448 | 
449 | @test "Run Diffmap" {
450 |     if [ "$resume" = 'true' ] && [ -f "$diffmap_obj" ]; then
451 |         skip "$diffmap_obj exists and resume is set to 'true'"
452 |     fi
453 | 
454 |     run rm -f $diffmap_obj && eval "$scanpy embed diffmap $diffmap_opt $paga_obj $diffmap_obj"
455 | 
456 |     [ "$status" -eq 0 ]
457 |     [ -f  "$diffmap_obj" ] && [ -f "$diffmap_embed" ]
458 | }
459 | 
460 | # Run DPT
461 | 
462 | @test "Run DPT" {
463 |     if [ "$resume" = 'true' ] && [ -f "$dpt_obj" ]; then
464 |         skip "$dpt_obj exists and resume is set to 'true'"
465 |     fi
466 | 
467 |     run rm -f $dpt_obj && eval "$scanpy dpt $dpt_opt $diffmap_obj $dpt_obj"
468 | 
469 |     [ "$status" -eq 0 ]
470 |     [ -f  "$dpt_obj" ]
471 | }
472 | 
473 | # Run Plot embedding
474 | 
475 | @test "Run Plot embedding" {
476 |     if [ "$resume" = 'true' ] && [ -f "$plt_embed_pdf" ]; then
477 |         skip "$plt_embed_pdf exists and resume is set to 'true'"
478 |     fi
479 | 
480 |     run rm -f $plt_embed_pdf && eval "$scanpy plot embed $plt_embed_opt $louvain_obj $plt_embed_pdf"
481 | 
482 |     [ "$status" -eq 0 ]
483 |     [ -f  "$plt_embed_pdf" ]
484 | }
485 | 
486 | # Run Plot paga
487 | 
488 | @test "Run Plot trajectory" {
489 |     if [ "$resume" = 'true' ] && [ -f "$plt_paga_pdf" ]; then
490 |         skip "$plt_paga_pdf exists and resume is set to 'true'"
491 |     fi
492 | 
493 |     run rm -f $plt_paga_pdf && eval "$scanpy plot paga $plt_paga_opt $dpt_obj $plt_paga_pdf"
494 | 
495 |     [ "$status" -eq 0 ]
496 |     [ -f  "$plt_paga_pdf" ] && [ -f  "$plt_paga_obj" ]
497 | }
498 | 
499 | # Run FDG, with initial coordinates from paga plotting
500 | 
501 | @test "Run FDG analysis" {
502 |     if [ "$resume" = 'true' ] && [ -f "$fdg_obj" ]; then
503 |         skip "$fdg_obj exists and resume is set to 'true'"
504 |     fi
505 | 
506 |     run rm -f $fdg_obj && eval "$scanpy embed fdg $fdg_opt $plt_paga_obj $fdg_obj"
507 | 
508 |     [ "$status" -eq 0 ]
509 |     [ -f  "$fdg_obj" ] && [ -f "$fdg_embed" ]
510 | }
511 | 
512 | 
513 | # Plot a stacked violin plot for markers
514 | 
515 | @test "Run Plot stacked violins" {
516 |     if [ "$resume" = 'true' ] && [ -f "$plt_stacked_violin_pdf" ]; then
517 |         skip "$plt_stacked_violin_pdf exists and resume is set to 'true'"
518 |     fi
519 | 
520 |     run rm -f $plt_stacked_violin_pdf && eval "$scanpy plot sviol $plt_stacked_violin_opt $diffexp_obj $plt_stacked_violin_pdf"
521 | 
522 |     [ "$status" -eq 0 ]
523 |     [ -f  "$plt_stacked_violin_pdf" ]
524 | }
525 | 
526 | # Plot ranking of genes using a stacked violin plot for markers
527 | 
528 | @test "Run Plot ranking of genes using stacked_violin plot" {
529 |     if [ "$resume" = 'true' ] && [ -f "$plt_rank_genes_groups_stacked_violin_pdf" ]; then
530 |         skip "$plt_rank_genes_groups_stacked_violin_pdf exists and resume is set to 'true'"
531 |     fi
532 | 
533 |     run rm -f $plt_rank_genes_groups_stacked_violin_pdf && eval "$scanpy plot sviol $plt_rank_genes_groups_opt $diffexp_obj $plt_rank_genes_groups_stacked_violin_pdf"
534 | 
535 |     [ "$status" -eq 0 ]
536 |     [ -f  "$plt_rank_genes_groups_stacked_violin_pdf" ]
537 | }
538 | 
539 | # Plot a dot plot for markers
540 | 
541 | @test "Run Plot dotplot" {
542 |     if [ "$resume" = 'true' ] && [ -f "$plt_dotplot_pdf" ]; then
543 |         skip "$plt_dotplot_pdf exists and resume is set to 'true'"
544 |     fi
545 | 
546 |     run rm -f $plt_dotplot_pdf && eval "$scanpy plot dot $diffexp_plot_opt $diffexp_obj $plt_dotplot_pdf"
547 |     
548 |     [ "$status" -eq 0 ]
549 |     [ -f  "$plt_dotplot_pdf" ]
550 | }
551 | 
552 | # Plot ranking of genes using a dot plot for markers
553 | 
554 | @test "Run Plot ranking of genes using a dot plot" {
555 |     if [ "$resume" = 'true' ] && [ -f "$plt_rank_genes_groups_dot_pdf" ]; then
556 |         skip "$plt_rank_genes_groups_dot_pdf exists and resume is set to 'true'"
557 |     fi
558 | 
559 |     run rm -f $plt_rank_genes_groups_dot_pdf && eval "$scanpy plot dot $plt_rank_genes_groups_opt $diffexp_obj $plt_rank_genes_groups_dot_pdf"
560 | 
561 |     [ "$status" -eq 0 ]
562 |     [ -f  "$plt_rank_genes_groups_dot_pdf" ]
563 | }
564 | 
565 | # Plot ranking of genes using a dot plot for markers, high resolution clustering
566 | 
567 | @test "Run Plot ranking of genes using a dot plot, high resolution clustering" {
568 |     if [ "$resume" = 'true' ] && [ -f "$plt_rank_genes_groups_dot_singlet_pdf" ]; then
569 |         skip "$plt_rank_genes_groups_dot_singlet_pdf exists and resume is set to 'true'"
570 |     fi
571 | 
572 |     run rm -f $plt_rank_genes_groups_dot_singlet_pdf && eval "$scanpy plot dot $plt_rank_genes_groups_singlet_opt $diffexp_singlet_obj $plt_rank_genes_groups_dot_singlet_pdf"
573 | 
574 |     [ "$status" -eq 0 ]
575 |     [ -f  "$plt_rank_genes_groups_dot_singlet_pdf" ]
576 | }
577 | 
578 | # Plot a matrix plot for markers
579 | 
580 | @test "Run Plot matrix" {
581 |     if [ "$resume" = 'true' ] && [ -f "$plt_matrixplot_pdf" ]; then
582 |         skip "$plt_matrixplot_pdf exists and resume is set to 'true'"
583 |     fi
584 | 
585 |     run rm -f $plt_matrixplot_pdf && eval "$scanpy plot matrix $diffexp_plot_opt $diffexp_obj $plt_matrixplot_pdf"
586 |     
587 |     [ "$status" -eq 0 ]
588 |     [ -f  "$plt_matrixplot_pdf" ]
589 | }
590 | 
591 | # Plot ranking of genes using a matrix plot for markers
592 | 
593 | @test "Run Plot ranking of genes using a matrix plot" {
594 |     if [ "$resume" = 'true' ] && [ -f "$plt_rank_genes_groups_matrix_pdf" ]; then
595 |         skip "$plt_rank_genes_groups_matrix_pdf exists and resume is set to 'true'"
596 |     fi
597 | 
598 |     run rm -f $plt_rank_genes_groups_matrix_pdf && eval "$scanpy plot matrix $plt_rank_genes_groups_opt $diffexp_obj $plt_rank_genes_groups_matrix_pdf"
599 | 
600 |     [ "$status" -eq 0 ]
601 |     [ -f  "$plt_rank_genes_groups_matrix_pdf" ]
602 | }
603 | 
604 | # Plot a matrix plot for markers
605 | 
606 | @test "Run Heatmap" {
607 |     if [ "$resume" = 'true' ] && [ -f "$plt_heatmap_pdf" ]; then
608 |         skip "$plt_matrixplot_pdf exists and resume is set to 'true'"
609 |     fi
610 | 
611 |     run rm -f $plt_heatmap_pdf && eval "$scanpy plot heat $diffexp_plot_opt $diffexp_obj $plt_heatmap_pdf"
612 |     
613 |     [ "$status" -eq 0 ]
614 |     [ -f  "$plt_heatmap_pdf" ]
615 | }
616 | 
617 | # Plot ranking of genes using a matrix plot for markers
618 | 
619 | @test "Run Plot ranking of genes using a heatmap" {
620 |     if [ "$resume" = 'true' ] && [ -f "$plt_rank_genes_groups_heatmap_pdf" ]; then
621 |         skip "$plt_rank_genes_groups_heatmap_pdf exists and resume is set to 'true'"
622 |     fi
623 | 
624 |     run rm -f $plt_rank_genes_groups_heatmap_pdf && eval "$scanpy plot heat $plt_rank_genes_groups_opt $diffexp_obj $plt_rank_genes_groups_heatmap_pdf"
625 | 
626 |     [ "$status" -eq 0 ]
627 |     [ -f  "$plt_rank_genes_groups_matrix_pdf" ]
628 | }
629 | 
630 | # Do harmony batch correction, using clustering as batch (just for test purposes)
631 | 
632 | @test "Run Harmony batch integration using clustering as batch" {
633 |     if [ "$resume" = 'true' ] && [ -f "$harmony_integrate_obj" ]; then
634 |         skip "$harmony_integrate_obj exists and resume is set to 'true'"
635 |     fi
636 | 
637 |     run rm -f $harmony_integrate_obj && eval "$scanpy integrate harmony $harmony_integrate_opt $louvain_obj $harmony_integrate_obj"
638 | 
639 |     [ "$status" -eq 0 ]
640 |     [ -f  "$plt_rank_genes_groups_matrix_pdf" ]
641 | 
642 | }
643 | 
644 | # Run Plot PCA embedding before harmony
645 | 
646 | @test "Run Plot PCA embedding before Harmony" {
647 |     if [ "$resume" = 'true' ] && [ -f "$noharmony_integrated_pca_pdf" ]; then
648 |         skip "$noharmony_integrated_pca_pdf exists and resume is set to 'true'"
649 |     fi
650 | 
651 |     run rm -f $noharmony_integrated_pca_pdf && eval "$scanpy plot embed $noharmony_plt_embed_opt $louvain_obj $noharmony_integrated_pca_pdf"
652 | 
653 |     [ "$status" -eq 0 ]
654 |     [ -f  "$noharmony_integrated_pca_pdf" ]
655 | }
656 | 
657 | # Run Plot PCA embedding after harmony
658 | 
659 | @test "Run Plot PCA embedding after Harmony" {
660 |     if [ "$resume" = 'true' ] && [ -f "$harmony_integrated_pca_pdf" ]; then
661 |         skip "$harmony_integrated_pca_pdf exists and resume is set to 'true'"
662 |     fi
663 | 
664 |     run rm -f $harmony_integrated_pca_pdf && eval "$scanpy plot embed $harmony_plt_embed_opt $harmony_integrate_obj $harmony_integrated_pca_pdf"
665 | 
666 |     [ "$status" -eq 0 ]
667 |     [ -f  "$harmony_integrated_pca_pdf" ]
668 | }
669 | 
670 | # Do bbknn batch correction, using clustering as batch (just for test purposes)
671 | 
672 | @test "Run BBKNN batch integration using clustering as batch" {
673 |     if [ "$resume" = 'true' ] && [ -f "$bbknn_obj" ]; then
674 |         skip "$bbknn_obj exists and resume is set to 'true'"
675 |     fi
676 | 
677 |     run rm -f $bbknn_obj && eval "$scanpy integrate bbknn $bbknn_opt $louvain_obj $bbknn_obj"
678 | 
679 |     [ "$status" -eq 0 ]
680 |     [ -f  "$plt_rank_genes_groups_matrix_pdf" ]
681 | }
682 | 
683 | # Do MNN batch correction, using clustering as batch (just for test purposes)
684 | # Commented as it fails with scanpy 1.9.1 
685 | #
686 | # @test "Run MNN batch integration using clustering as batch" {
687 | #    if [ "$resume" = 'true' ] && [ -f "$mnn_obj" ]; then
688 | #        skip "$mnn_obj exists and resume is set to 'true'"
689 | #    fi
690 | #
691 | #    run rm -f $mnn_obj && eval "$scanpy integrate mnn $mnn_opt $louvain_obj $mnn_obj"
692 | #
693 | #    [ "$status" -eq 0 ]
694 | #    [ -f  "$mnn_obj" ]
695 | #}
696 | 
697 | # Do ComBat batch correction, using clustering as batch (just for test purposes)
698 | 
699 | @test "Run Combat batch integration using clustering as batch" {
700 |     if [ "$resume" = 'true' ] && [ -f "$combat_obj" ]; then
701 |         skip "$combat_obj exists and resume is set to 'true'"
702 |     fi
703 | 
704 |     run rm -f $combat_obj && eval "$scanpy integrate combat $combat_opt $louvain_obj $combat_obj"
705 | 
706 |     [ "$status" -eq 0 ]
707 |     [ -f  "$combat_obj" ]
708 | }
709 | 
710 | # Local Variables:
711 | # mode: sh
712 | # End:
713 | 


--------------------------------------------------------------------------------
/scanpy_scripts/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Provides version, author and exports
 3 | """
 4 | import importlib.metadata
 5 | 
 6 | __version__ = importlib.metadata.version("scanpy-scripts")
 7 | 
 8 | __author__ = ", ".join(
 9 |     [
10 |         "Ni Huang",
11 |         "Pablo Moreno",
12 |         "Jonathan Manning",
13 |         "Philipp Angerer",
14 |     ]
15 | )
16 | 
17 | from . import lib
18 | 


--------------------------------------------------------------------------------
/scanpy_scripts/cli.py:
--------------------------------------------------------------------------------
  1 | """
  2 | scanpy
  3 | """
  4 | 
  5 | import logging
  6 | import click
  7 | import scanpy as sc
  8 | from .click_utils import NaturalOrderGroup
  9 | from .cmds import (
 10 |     READ_CMD,
 11 |     FILTER_CMD,
 12 |     NORM_CMD,
 13 |     HVG_CMD,
 14 |     SCALE_CMD,
 15 |     REGRESS_CMD,
 16 |     PCA_CMD,
 17 |     NEIGHBOR_CMD,
 18 |     UMAP_CMD,
 19 |     TSNE_CMD,
 20 |     FDG_CMD,
 21 |     LOUVAIN_CMD,
 22 |     LEIDEN_CMD,
 23 |     DIFFEXP_CMD,
 24 |     PAGA_CMD,
 25 |     DIFFMAP_CMD,
 26 |     DPT_CMD,
 27 |     PLOT_EMBED_CMD,
 28 |     PLOT_PAGA_CMD,
 29 |     PLOT_STACKED_VIOLIN_CMD,
 30 |     PLOT_DOT_CMD,
 31 |     PLOT_MATRIX_CMD,
 32 |     PLOT_HEATMAP_CMD,
 33 |     HARMONY_INTEGRATE_CMD,
 34 |     SCRUBLET_MULTIPLET_CMD,
 35 |     SCRUBLET_MULTIPLET_SIMULATE_CMD,
 36 |     SCRUBLET_MULTIPLET_PLOT_CMD,
 37 |     BBKNN_CMD,
 38 |     MNN_CORRECT_CMD,
 39 |     COMBAT_CMD,
 40 | )
 41 | 
 42 | 
 43 | @click.group(cls=NaturalOrderGroup)
 44 | @click.option(
 45 |     "--debug",
 46 |     is_flag=True,
 47 |     default=False,
 48 |     help="Print debug information",
 49 | )
 50 | @click.option(
 51 |     "--verbosity",
 52 |     type=click.INT,
 53 |     default=3,
 54 |     help="Set scanpy verbosity",
 55 | )
 56 | @click.option(
 57 |     "--njobs",
 58 |     type=click.INT,
 59 |     default=1,
 60 |     help="Set scanpy default number of jobs/CPUs, defaults 1",
 61 | )
 62 | @click.version_option(
 63 |     version="0.2.0",
 64 |     prog_name="scanpy",
 65 | )
 66 | def cli(debug=False, verbosity=3, njobs=1):
 67 |     """
 68 |     Command line interface to [scanpy](https://github.com/theislab/scanpy)
 69 |     """
 70 |     log_level = logging.DEBUG if debug else logging.INFO
 71 |     logging.basicConfig(
 72 |         level=log_level,
 73 |         format=(
 74 |             "%(asctime)s; %(levelname)s; %(filename)s; " "%(funcName)s(): %(message)s"
 75 |         ),
 76 |         datefmt="%y-%m-%d %H:%M:%S",
 77 |     )
 78 |     logging.debug("debugging")
 79 |     sc.settings.verbosity = verbosity
 80 |     sc.settings.n_jobs = njobs
 81 |     return 0
 82 | 
 83 | 
 84 | cli.add_command(READ_CMD)
 85 | cli.add_command(FILTER_CMD)
 86 | cli.add_command(NORM_CMD)
 87 | cli.add_command(HVG_CMD)
 88 | cli.add_command(SCALE_CMD)
 89 | cli.add_command(REGRESS_CMD)
 90 | cli.add_command(PCA_CMD)
 91 | cli.add_command(NEIGHBOR_CMD)
 92 | 
 93 | 
 94 | @cli.group(cls=NaturalOrderGroup)
 95 | def embed():
 96 |     """Embed cells into two-dimensional space."""
 97 | 
 98 | 
 99 | embed.add_command(UMAP_CMD)
100 | embed.add_command(TSNE_CMD)
101 | embed.add_command(FDG_CMD)
102 | embed.add_command(DIFFMAP_CMD)
103 | 
104 | 
105 | @cli.group(cls=NaturalOrderGroup)
106 | def cluster():
107 |     """Cluster cells into sub-populations."""
108 | 
109 | 
110 | cluster.add_command(LOUVAIN_CMD)
111 | cluster.add_command(LEIDEN_CMD)
112 | 
113 | 
114 | cli.add_command(DIFFEXP_CMD)
115 | cli.add_command(PAGA_CMD)
116 | cli.add_command(DPT_CMD)
117 | 
118 | 
119 | @cli.group(cls=NaturalOrderGroup)
120 | def integrate():
121 |     """Integrate cells from different experimental batches."""
122 | 
123 | 
124 | integrate.add_command(HARMONY_INTEGRATE_CMD)
125 | integrate.add_command(BBKNN_CMD)
126 | integrate.add_command(MNN_CORRECT_CMD)
127 | integrate.add_command(COMBAT_CMD)
128 | 
129 | 
130 | @cli.group(cls=NaturalOrderGroup)
131 | def multiplet():
132 |     """Execute methods for multiplet removal."""
133 | 
134 | 
135 | multiplet.add_command(SCRUBLET_MULTIPLET_CMD)
136 | multiplet.add_command(SCRUBLET_MULTIPLET_SIMULATE_CMD)
137 | 
138 | 
139 | @cli.group(cls=NaturalOrderGroup)
140 | def plot():
141 |     """Visualise data."""
142 | 
143 | 
144 | plot.add_command(PLOT_EMBED_CMD)
145 | plot.add_command(PLOT_PAGA_CMD)
146 | plot.add_command(PLOT_STACKED_VIOLIN_CMD)
147 | plot.add_command(PLOT_DOT_CMD)
148 | plot.add_command(PLOT_MATRIX_CMD)
149 | plot.add_command(PLOT_HEATMAP_CMD)
150 | plot.add_command(SCRUBLET_MULTIPLET_PLOT_CMD)
151 | 


--------------------------------------------------------------------------------
/scanpy_scripts/click_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Provide helper functions for command line parsing with click
  3 | """
  4 | 
  5 | import click
  6 | import sys
  7 | 
  8 | 
  9 | class NaturalOrderGroup(click.Group):
 10 |     """Command group trying to list subcommands in the order they were added.
 11 | 
 12 |     With decorator, use::
 13 | 
 14 |         @click.group(cls=NaturalOrderGroup)
 15 |     """
 16 | 
 17 |     def list_commands(self, ctx):
 18 |         """List command names as they are in commands dict.
 19 | 
 20 |         If the dict is OrderedDict, it will preserve the order commands
 21 |         were added.
 22 |         """
 23 |         return self.commands.keys()
 24 | 
 25 | 
 26 | class CommaSeparatedText(click.ParamType):
 27 |     """
 28 |     Comma separated text
 29 |     """
 30 | 
 31 |     def __init__(self, dtype=click.STRING, simplify=False, length=None):
 32 |         self.dtype = dtype
 33 |         self.dtype_name = _get_type_name(dtype)
 34 |         self.simplify = simplify
 35 |         self.length = length
 36 |         if length and length <= 3:
 37 |             self.name = ",".join([f"{self.dtype_name}"] * length)
 38 |         else:
 39 |             self.name = "{}[,{}...]".format(self.dtype_name, self.dtype_name)
 40 | 
 41 |     def convert(self, value, param, ctx):
 42 |         """
 43 |         >>> @click.command()
 44 |         ... @click.option('--test-param')
 45 |         ... def test_cmd():
 46 |         ...     pass
 47 |         ...
 48 |         >>> ctx = click.Context(test_cmd)
 49 |         >>> param = test_cmd.params[0]
 50 |         >>> test_cst1 = CommaSeparatedText()
 51 |         >>> test_cst2 = CommaSeparatedText(click.INT, length=2)
 52 |         >>> test_cst3 = CommaSeparatedText(click.FLOAT, simplify=True)
 53 |         >>>
 54 |         >>> test_cst1.convert(None, param, ctx)
 55 |         >>> test_cst2.convert('7,2', param, ctx)
 56 |         [7, 2]
 57 |         >>> test_cst2.convert('7.2', param, ctx)
 58 |         Traceback (most recent call last):
 59 |         ...
 60 |         click.exceptions.BadParameter: 7.2 is not a valid integer
 61 |         >>> test_cst2.convert('7', param, ctx)
 62 |         Traceback (most recent call last):
 63 |         ...
 64 |         click.exceptions.BadParameter: 7 is not a valid comma separated list of length 2
 65 |         >>> test_cst3.convert('7.2', param, ctx)
 66 |         7.2
 67 |         """
 68 |         try:
 69 |             if value is None:
 70 |                 converted = None
 71 |             else:
 72 |                 converted = list(map(self.dtype, str(value).split(",")))
 73 |                 if self.simplify and len(converted) == 1:
 74 |                     converted = converted[0]
 75 |         except ValueError:
 76 |             self.fail(
 77 |                 "{} is not a valid comma separated list of {}".format(
 78 |                     value, self.dtype_name
 79 |                 ),
 80 |                 param,
 81 |                 ctx,
 82 |             )
 83 |         if self.length:
 84 |             if len(converted) != self.length:
 85 |                 self.fail(
 86 |                     "{} is not a valid comma separated list of length {}".format(
 87 |                         value, self.length
 88 |                     ),
 89 |                     param,
 90 |                     ctx,
 91 |                 )
 92 |         return converted
 93 | 
 94 | 
 95 | class Dictionary(click.ParamType):
 96 |     """
 97 |     Text to be parsed as a python dict definition
 98 |     """
 99 | 
100 |     def __init__(self, keys=None):
101 |         self.name = "TEXT:VAL[,TEXT:VAL...]"
102 |         self.keys = keys
103 | 
104 |     def convert(self, value, param, ctx):
105 |         """
106 |         >>> @click.command()
107 |         ... @click.option('--my-param', type=Dictionary(keys=('abc', 'def', 'ghi', 'jkl', 'mno')))
108 |         ... def test_cmd():
109 |         ...     pass
110 |         ...
111 |         >>> ctx = click.Context(test_cmd)
112 |         >>> param = test_cmd.params[0]
113 |         >>> dict_param = param.type
114 |         >>> dict_str1 = 'abc:0.1,def:TRUE,ghi:False,jkl:None,mno:some_string'
115 |         >>> dict_str2 = 'abc:0.1,def:TRUE,ghi:False,jkl:None,mnp:some_string'
116 |         >>> dict_str3 = ''
117 |         >>> dict_param.convert(dict_str1, param, ctx)
118 |         {'abc': 0.1, 'def': True, 'ghi': False, 'jkl': None, 'mno': 'some_string'}
119 |         >>> dict_param.convert(dict_str2, param, ctx)
120 |         Traceback (most recent call last):
121 |         ...
122 |         click.exceptions.BadParameter: mnp is not a valid key (('abc', 'def', 'ghi', 'jkl', 'mno'))
123 |         >>> dict_param.convert(dict_str3, param, ctx)
124 |         Traceback (most recent call last):
125 |         ...
126 |         click.exceptions.BadParameter:  is not a valid python dict definition
127 |         """
128 |         try:
129 |             converted = dict()
130 |             for token in value.split(","):
131 |                 if ":" not in token:
132 |                     raise ValueError
133 |                 key, _, value = token.partition(":")
134 |                 if not key:
135 |                     raise ValueError
136 |                 if isinstance(self.keys, (list, tuple)) and key not in self.keys:
137 |                     self.fail(f"{key} is not a valid key ({self.keys})")
138 |                 if value == "None":
139 |                     value = None
140 |                 elif value.lower() == "true":
141 |                     value = True
142 |                 elif value.lower() == "false":
143 |                     value = False
144 |                 else:
145 |                     try:
146 |                         value = float(value)
147 |                     except ValueError:
148 |                         pass
149 |                 converted[key] = value
150 |             return converted
151 |         except ValueError:
152 |             self.fail(f"{value} is not a valid python dict definition", param, ctx)
153 | 
154 | 
155 | def _get_type_name(obj):
156 |     name = "text"
157 |     try:
158 |         name = getattr(obj, "name")
159 |     except AttributeError:
160 |         name = getattr(obj, "__name__")
161 |     return name
162 | 
163 | 
164 | def valid_limit(ctx, param, value):
165 |     """
166 |     Callback function that checks order of numeric inputs
167 | 
168 |     >>> @click.command()
169 |     ... @click.option('--test-param', help='Sample help')
170 |     ... def test_cmd():
171 |     ...     pass
172 |     ...
173 |     >>> ctx = click.Context(test_cmd)
174 |     >>> param = test_cmd.params[0]
175 |     >>> valid_limit(ctx, param, value=[0.0125, 3])
176 |     [0.0125, 3]
177 |     >>> valid_limit(ctx, param, value=[0.0125, -0.0125])
178 |     Traceback (most recent call last):
179 |         ...
180 |     click.exceptions.BadParameter: lower limit must not exceed upper limit
181 |     >>> valid_limit(ctx, param, value=[0.0125, 0.0125])
182 |     [0.0125, 0.0125]
183 |     """
184 |     if value[0] > value[1]:
185 |         param.type.fail("lower limit must not exceed upper limit", param, ctx)
186 |     return value
187 | 
188 | 
189 | def valid_parameter_limits(ctx, param, value):
190 |     """
191 |     Callback function that checks order of multiple numeric inputs
192 | 
193 |     >>> @click.command()
194 |     ... @click.option('--test-param', type=(click.STRING, click.FLOAT, click.FLOAT), multiple=True)
195 |     ... def test_cmd():
196 |     ...     pass
197 |     ...
198 |     >>> ctx = click.Context(test_cmd)
199 |     >>> param = test_cmd.params[0]
200 |     >>> valid_parameter_limits(ctx, param, [['a', 0.0, 2.0]])
201 |     [['a', 0.0, 2.0]]
202 |     >>> valid_parameter_limits(ctx, param, [['b', 0.0, 0.0]])
203 |     [['b', 0.0, 0.0]]
204 |     >>> valid_parameter_limits(ctx, param, [['c', 0.0, -1.0]])
205 |     Traceback (most recent call last):
206 |         ...
207 |     click.exceptions.BadParameter: lower limit must not exceed upper limit
208 |     >>> valid_parameter_limits(ctx, param, [['a', 0.0, 2.0], ['c', 0.0, -1.0]])
209 |     Traceback (most recent call last):
210 |         ...
211 |     click.exceptions.BadParameter: lower limit must not exceed upper limit
212 |     """
213 |     for val in value:
214 |         if val[1] > val[2]:
215 |             param.type.fail("lower limit must not exceed upper limit", param, ctx)
216 |     return value
217 | 
218 | 
219 | def mutually_exclusive_with(param_name):
220 |     internal_name = param_name.strip("-").replace("-", "_").lower()
221 | 
222 |     def valid_mutually_exclusive(ctx, param, value):
223 |         try:
224 |             other_value = ctx.params[internal_name]
225 |         except KeyError:
226 |             return value
227 |         if (value is None) == (other_value is None):
228 |             param.type.fail(
229 |                 'mutually exclusive with "{}", one and only one must be '
230 |                 "specified.".format(param_name),
231 |                 param,
232 |                 ctx,
233 |             )
234 |         return value
235 | 
236 |     return valid_mutually_exclusive
237 | 
238 | 
239 | def required_by(param_name):
240 |     internal_name = param_name.strip("-").replace("-", "_").lower()
241 | 
242 |     def required(ctx, param, value):
243 |         try:
244 |             other_value = ctx.params[internal_name]
245 |         except KeyError:
246 |             return value
247 |         if other_value and not value:
248 |             param.type.fail(
249 |                 'required by "{}".'.format(param_name),
250 |                 param,
251 |                 ctx,
252 |             )
253 |         return value
254 | 
255 |     return required
256 | 
257 | 
258 | if __name__ == "__main__":
259 |     import doctest
260 | 
261 |     sys.exit(doctest.testmod(verbose=True)[0])
262 | 


--------------------------------------------------------------------------------
/scanpy_scripts/cmd_options.py:
--------------------------------------------------------------------------------
   1 | """
   2 | Provide cmd options
   3 | """
   4 | 
   5 | import click
   6 | 
   7 | from .click_utils import (
   8 |     CommaSeparatedText,
   9 |     Dictionary,
  10 |     mutually_exclusive_with,
  11 |     required_by,
  12 |     valid_limit,
  13 |     valid_parameter_limits,
  14 | )
  15 | 
  16 | COMMON_OPTIONS = {
  17 |     "input": [
  18 |         click.argument(
  19 |             "input_obj",
  20 |             metavar="<input_obj>",
  21 |             type=click.Path(exists=True, dir_okay=False),
  22 |         ),
  23 |         click.option(
  24 |             "--input-format",
  25 |             "-f",
  26 |             type=click.Choice(["anndata", "loom"]),
  27 |             default="anndata",
  28 |             show_default=True,
  29 |             help="Input object format.",
  30 |         ),
  31 |     ],
  32 |     "output": [
  33 |         click.argument(
  34 |             "output_obj",
  35 |             metavar="<output_obj>",
  36 |             type=click.Path(dir_okay=False, writable=True),
  37 |         ),
  38 |         click.option(
  39 |             "--output-format",
  40 |             "-F",
  41 |             type=click.Choice(["anndata", "loom", "zarr"]),
  42 |             default="anndata",
  43 |             show_default=True,
  44 |             help="Output object format.",
  45 |         ),
  46 |         click.option(
  47 |             "--zarr-chunk-size",
  48 |             "-z",
  49 |             type=click.INT,
  50 |             default=1000,
  51 |             show_default=True,
  52 |             help="Chunk size for writing output in zarr format.",
  53 |         ),
  54 |         click.option(
  55 |             "--loom-write-obsm-varm",
  56 |             "-b",
  57 |             is_flag=True,
  58 |             default=False,
  59 |             show_default=True,
  60 |             help="Write obsm and varm to the Loom file?",
  61 |         ),
  62 |         click.option(
  63 |             "--export-mtx",
  64 |             "-X",
  65 |             type=click.Path(dir_okay=True, writable=True),
  66 |             default=None,
  67 |             show_default=True,
  68 |             help="When specified, using it as prefix for exporting mtx files. "
  69 |             'If not empty and not ending with "/" or "_", a "_" will be '
  70 |             "appended.",
  71 |         ),
  72 |         click.option(
  73 |             "--mtx-compression",
  74 |             "-G",
  75 |             type=click.Choice(["zip", "gzip", "bz2", "zstd"]),
  76 |             default=None,
  77 |             show_default=True,
  78 |             help="Compression type for MTX output.",
  79 |         ),
  80 |         click.option(
  81 |             "--show-obj",
  82 |             type=click.Choice(["stdout", "stderr"]),
  83 |             default=None,
  84 |             show_default=True,
  85 |             help="Print output object summary info to specified stream.",
  86 |         ),
  87 |     ],
  88 |     "save": [
  89 |         click.option(
  90 |             "--save-raw",
  91 |             "-r",
  92 |             is_flag=True,
  93 |             default=False,
  94 |             show_default=True,
  95 |             help="Save adata to adata.raw before processing.",
  96 |         ),
  97 |         click.option(
  98 |             "--save-layer",
  99 |             "-y",
 100 |             type=click.STRING,
 101 |             default=None,
 102 |             show_default=True,
 103 |             help="Save adata.X to the specified layer before processing.",
 104 |         ),
 105 |     ],
 106 |     "plot": [
 107 |         click.argument(
 108 |             "output_fig",
 109 |             metavar="<output_fig>",
 110 |             type=click.Path(dir_okay=False, writable=True),
 111 |         ),
 112 |         click.option(
 113 |             "--fig-size",
 114 |             type=CommaSeparatedText(click.INT, length=2),
 115 |             default="7,7",
 116 |             show_default=True,
 117 |             help="Figure size.",
 118 |         ),
 119 |         click.option(
 120 |             "--fig-dpi",
 121 |             type=click.INT,
 122 |             default=80,
 123 |             show_default=True,
 124 |             help="Figure DPI.",
 125 |         ),
 126 |         click.option(
 127 |             "--fig-fontsize",
 128 |             type=click.INT,
 129 |             default=15,
 130 |             show_default=True,
 131 |             help="Figure font size.",
 132 |         ),
 133 |     ],
 134 |     "frame_title": [
 135 |         click.option(
 136 |             "--frameon/--frameoff",
 137 |             "frameon",
 138 |             default=True,
 139 |             show_default=True,
 140 |             help="Draw a frame around the plot",
 141 |         ),
 142 |         click.option(
 143 |             "--title",
 144 |             type=CommaSeparatedText(simplify=True),
 145 |             default=None,
 146 |             show_default=True,
 147 |             help="Provide title for the plot or panels.",
 148 |         ),
 149 |     ],
 150 |     "use_pc": [
 151 |         click.option(
 152 |             "--n-pcs",
 153 |             "-n",
 154 |             type=click.INT,
 155 |             default=None,
 156 |             show_default=True,
 157 |             help="Use this many PCs. Use `.X` if --n-pcs is 0 when --use-rep is "
 158 |             "None.",
 159 |         ),
 160 |         click.option(
 161 |             "--use-rep",
 162 |             "-u",
 163 |             type=click.STRING,
 164 |             default=None,
 165 |             show_default=True,
 166 |             help="Use the indicated representation. If None, the representation is "
 167 |             "chosen automatically: for `.n_vars` < 50, `.X` is used, otherwise "
 168 |             "`X_pca` is used. If `X_pca` is not present, it's computed with "
 169 |             "default parameters.",
 170 |         ),
 171 |     ],
 172 |     "knn_graph": [
 173 |         click.option(
 174 |             "--neighbors-key",
 175 |             type=click.STRING,
 176 |             default=None,
 177 |             show_default=False,
 178 |             help="If not specified, look in .uns[‘neighbors’] for neighbors "
 179 |             "settings and .obsp[‘connectivities’], .obsp[‘distances’] for connectivities and "
 180 |             "distances respectively (default storage places for pp.neighbors). If specified, "
 181 |             "look in .uns[neighbors_key] for neighbors settings and "
 182 |             ".obsp[.uns[neighbors_key][‘connectivities_key’]], "
 183 |             ".obsp[.uns[neighbors_key][‘distances_key’]] for connectivities and distances "
 184 |             "respectively.",
 185 |         ),
 186 |         click.option(
 187 |             "--obsp",
 188 |             type=click.STRING,
 189 |             default=None,
 190 |             show_default=True,
 191 |             help="Use .obsp[obsp] as adjacency. You can’t specify both obsp and "
 192 |             "neighbors_key at the same time.",
 193 |         ),
 194 |         click.option(
 195 |             "--directed/--undirected",
 196 |             "directed",
 197 |             default=True,
 198 |             show_default=True,
 199 |             help="Interpret the adjacency matrix as directed graph.",
 200 |         ),
 201 |         click.option(
 202 |             "--use-weights",
 203 |             is_flag=True,
 204 |             default=False,
 205 |             show_default=True,
 206 |             help="Use weights from KNN graph.",
 207 |         ),
 208 |     ],
 209 |     "neighbor_metric": click.option(
 210 |         "--metric",
 211 |         "-t",
 212 |         type=click.Choice(
 213 |             [
 214 |                 "cityblock",
 215 |                 "cosine",
 216 |                 "euclidean",
 217 |                 "l1",
 218 |                 "l2",
 219 |                 "manhattan",
 220 |                 "braycurtis",
 221 |                 "canberra",
 222 |                 "chebyshev",
 223 |                 "correlation",
 224 |                 "dice",
 225 |                 "hamming",
 226 |                 "jaccard",
 227 |                 "kulsinski",
 228 |                 "mahalanobis",
 229 |                 "minkowski",
 230 |                 "rogerstanimoto",
 231 |                 "russellrao",
 232 |                 "seuclidean",
 233 |                 "sokalmichener",
 234 |                 "sokalsneath",
 235 |                 "sqeuclidean",
 236 |                 "yule",
 237 |             ]
 238 |         ),
 239 |         default="euclidean",
 240 |         show_default=True,
 241 |         help="A known metric’s name.",
 242 |     ),
 243 |     "layer": click.option(
 244 |         "--layer",
 245 |         type=CommaSeparatedText(simplify=True),
 246 |         default=None,
 247 |         show_default=True,
 248 |         help="Name of the AnnData object layer that wants to be plotted. By "
 249 |         "default adata.raw.X is plotted. If use_raw=False is set, then adata.X "
 250 |         "is plotted. If layer is set to a valid layer name, then the layer is "
 251 |         "plotted. layer takes precedence over use_raw.",
 252 |     ),
 253 |     "n_comps": click.option(
 254 |         "--n-comps",
 255 |         type=click.INT,
 256 |         default=None,
 257 |         show_default=True,
 258 |         help="Number of components to compute",
 259 |     ),
 260 |     "key_added": click.option(
 261 |         "--key-added",
 262 |         type=CommaSeparatedText(simplify=True),
 263 |         default=None,
 264 |         show_default=True,
 265 |         help="Key under which to add the computed results",
 266 |     ),
 267 |     "random_state": click.option(
 268 |         "--random-state",
 269 |         "-S",
 270 |         type=click.INT,
 271 |         default=0,
 272 |         show_default=True,
 273 |         help="Seed for random number generator.",
 274 |     ),
 275 |     "use_raw": click.option(
 276 |         "--use-raw/--no-raw",
 277 |         "use_raw",
 278 |         default=None,
 279 |         show_default=True,
 280 |         help="Use expression values in `.raw` if present.",
 281 |     ),
 282 |     "zero_center": click.option(
 283 |         "--no-zero-center",
 284 |         "zero_center",
 285 |         is_flag=True,
 286 |         flag_value=False,
 287 |         default=True,
 288 |         help="When set, omit zero-centering variables to allow efficient "
 289 |         "handling of sparse input.",
 290 |     ),
 291 |     "n_jobs": click.option(
 292 |         "--n-jobs",
 293 |         "-J",
 294 |         type=click.INT,
 295 |         default=None,
 296 |         show_default=True,
 297 |         help="Number of jobs for parallel computation.",
 298 |     ),
 299 |     "restrict_to": click.option(
 300 |         "--restrict-to",
 301 |         type=(click.STRING, CommaSeparatedText()),
 302 |         default=(None, None),
 303 |         show_default=True,
 304 |         help="Restrict the clustering to the categories within the key for "
 305 |         'sample annotation, in the form of "obs_key list_of_categories".',
 306 |     ),
 307 |     "export_embedding": click.option(
 308 |         "--export-embedding",
 309 |         "-E",
 310 |         type=click.Path(dir_okay=False, writable=True),
 311 |         default=None,
 312 |         show_default=True,
 313 |         help="Export embeddings in a tab-separated text table.",
 314 |     ),
 315 |     "export_cluster": click.option(
 316 |         "--export-cluster",
 317 |         type=click.Path(dir_okay=False, writable=True),
 318 |         default=None,
 319 |         show_default=True,
 320 |         help="Export embeddings in a tab-separated text table.",
 321 |     ),
 322 |     "var_names": click.option(
 323 |         "--var-names",
 324 |         type=(CommaSeparatedText()),
 325 |         show_default=True,
 326 |         help="var_names should be a valid subset of adata.var_names.",
 327 |     ),
 328 |     "gene_symbols": click.option(
 329 |         "--gene-symbols",
 330 |         type=CommaSeparatedText(simplify=True),
 331 |         default=None,
 332 |         show_default=True,
 333 |         help="Column name in .var DataFrame that stores gene symbols. By "
 334 |         "default this is assumed to be the index column of the .var "
 335 |         "DataFrame. Setting this option allows alternative names to be "
 336 |         "used.",
 337 |     ),
 338 |     "diffexp_plot": [
 339 |         click.option(
 340 |             "--rgg",
 341 |             is_flag=True,
 342 |             default=False,
 343 |             show_default=True,
 344 |             help="When set, use the rank_genes_groups_ form of the function, "
 345 |             "where gene lists are automatically selected.",
 346 |         ),
 347 |         click.option(
 348 |             "--groupby",
 349 |             type=CommaSeparatedText(simplify=True),
 350 |             default=None,
 351 |             show_default=True,
 352 |             help="The key of the observation grouping to consider.",
 353 |         ),
 354 |         click.option(
 355 |             "--log",
 356 |             is_flag=True,
 357 |             default=False,
 358 |             show_default=True,
 359 |             help="Plot on logarithmic axis.",
 360 |         ),
 361 |         click.option(
 362 |             "--num-categories",
 363 |             type=click.INT,
 364 |             default=7,
 365 |             show_default=True,
 366 |             help="Only used if groupby observation is not categorical. This value "
 367 |             "determines the number of groups into which the groupby observation "
 368 |             "should be subdivided.",
 369 |         ),
 370 |         click.option(
 371 |             "--dendrogram",
 372 |             is_flag=True,
 373 |             default=False,
 374 |             show_default=False,
 375 |             help="If True, a dendrogram based on the hierarchical clustering "
 376 |             "between the groupby categories is added. The dendrogram information is "
 377 |             "computed using scanpy.tl.dendrogram(). If tl.dendrogram has not been "
 378 |             "called previously the function is called with default parameters.",
 379 |         ),
 380 |         click.option(
 381 |             "--standard-scale",
 382 |             type=click.Choice(["var", "obs"]),
 383 |             default=None,
 384 |             show_default=True,
 385 |             help="Whether or not to standardize that dimension between 0 and 1, "
 386 |             "meaning for each variable or group, subtract the minimum and divide "
 387 |             "each by its maximum.",
 388 |         ),
 389 |     ],
 390 |     "sviol": [
 391 |         click.option(
 392 |             "--no-stripplot",
 393 |             "stripplot",
 394 |             is_flag=True,
 395 |             default=True,
 396 |             show_default=True,
 397 |             help="When set, do not add a stripplot on top of the violin plot.",
 398 |         ),
 399 |         click.option(
 400 |             "--no-jitter",
 401 |             "jitter",
 402 |             is_flag=True,
 403 |             default=True,
 404 |             show_default=True,
 405 |             help="Suppress jitter in the stripplot (only when stripplot is True)",
 406 |         ),
 407 |         click.option(
 408 |             "--size",
 409 |             type=click.INT,
 410 |             default=1,
 411 |             show_default=True,
 412 |             help="Size of the jitter points.",
 413 |         ),
 414 |         click.option(
 415 |             "--order",
 416 |             type=CommaSeparatedText(),
 417 |             default=None,
 418 |             show_default=True,
 419 |             help="Order in which to show the categories.",
 420 |         ),
 421 |         click.option(
 422 |             "--scale",
 423 |             type=click.Choice(["area", "count", "width"]),
 424 |             default="width",
 425 |             show_default=True,
 426 |             help="The method used to scale the width of each violin. If ‘area’, "
 427 |             "each violin will have the same area. If ‘count’, the width of the "
 428 |             "violins will be scaled by the number of observations in that bin. If "
 429 |             "‘width’, each violin will have the same width.",
 430 |         ),
 431 |         click.option(
 432 |             "--row-palette",
 433 |             type=CommaSeparatedText(simplify=True),
 434 |             default="muted",
 435 |             show_default=True,
 436 |             help="The row palette determines the colors to use in each of the "
 437 |             "stacked violin plots. The value should be a valid seaborn palette name "
 438 |             "or a valic matplotlib colormap (see "
 439 |             "https://seaborn.pydata.org/generated/seaborn.color_palette.html). "
 440 |             "Alternatively, a single color name or hex value can be passed. E.g. "
 441 |             "‘red’ or ‘#cc33ff’.",
 442 |         ),
 443 |     ],
 444 |     "dot": [
 445 |         click.option(
 446 |             "--expression-cutoff",
 447 |             type=click.FLOAT,
 448 |             default=0,
 449 |             show_default=True,
 450 |             help="Expression cutoff that is used for binarizing the gene expression "
 451 |             "and determining the fraction of cells expressing given genes. A gene is "
 452 |             "expressed only if the expression value is greater than this threshold.",
 453 |         ),
 454 |         click.option(
 455 |             "--mean-only-expressed",
 456 |             is_flag=True,
 457 |             default=False,
 458 |             show_default=True,
 459 |             help="If True, gene expression is averaged only over the cells "
 460 |             "expressing the given genes.",
 461 |         ),
 462 |         click.option(
 463 |             "--color-map",
 464 |             type=CommaSeparatedText(simplify=True),
 465 |             default="Reds",
 466 |             show_default=True,
 467 |             help="String denoting matplotlib color map.",
 468 |         ),
 469 |         click.option(
 470 |             "--dot-max",
 471 |             type=click.FLOAT,
 472 |             default=None,
 473 |             show_default=True,
 474 |             help="If none, the maximum dot size is set to the maximum fraction "
 475 |             "value found (e.g. 0.6). If given, the value should be a number between "
 476 |             "0 and 1. All fractions larger than dot_max are clipped to this value.",
 477 |         ),
 478 |         click.option(
 479 |             "--dot-min",
 480 |             type=click.FLOAT,
 481 |             default=None,
 482 |             show_default=True,
 483 |             help="If none, the minimum dot size is set to 0. If given, the value "
 484 |             "should be a number between 0 and 1. All fractions smaller than dot_min "
 485 |             "are clipped to this value.",
 486 |         ),
 487 |         click.option(
 488 |             "--smallest-dot",
 489 |             type=click.FLOAT,
 490 |             default=0,
 491 |             show_default=True,
 492 |             help="If none, the smallest dot has size 0. All expression levels with "
 493 |             "dot_min are potted with smallest_dot dot size.",
 494 |         ),
 495 |     ],
 496 |     "heat": [
 497 |         click.option(
 498 |             "--show-gene-labels",
 499 |             is_flag=True,
 500 |             default=None,
 501 |             show_default=True,
 502 |             help="By default gene labels are shown when there are 50 or less "
 503 |             "genes. Otherwise the labels are removed.",
 504 |         ),
 505 |     ],
 506 |     "swap_axes": click.option(
 507 |         "--swap-axes",
 508 |         is_flag=True,
 509 |         default=False,
 510 |         show_default=True,
 511 |         help="By default, the x axis contains var_names (e.g. genes) and the y "
 512 |         "axis the groupby categories. By setting swap_axes then x are the "
 513 |         "groupby categories and y the var_names. When swapping axes "
 514 |         "var_group_positions are no longer used.",
 515 |     ),
 516 |     "rank_genes_groups_plots": [
 517 |         click.option(
 518 |             "--groups",
 519 |             type=CommaSeparatedText(),
 520 |             default=None,
 521 |             show_default=True,
 522 |             help="The groups for which to show the gene ranking.",
 523 |         ),
 524 |         click.option(
 525 |             "--n-genes",
 526 |             "-n",
 527 |             type=click.INT,
 528 |             default=10,
 529 |             show_default=True,
 530 |             help="Number of genes to show.",
 531 |         ),
 532 |     ],
 533 |     "root": click.option(
 534 |         "--root",
 535 |         type=click.INT,
 536 |         default=0,
 537 |         show_default=True,
 538 |         help="If choosing a tree layout, this is the index of the root node.",
 539 |     ),
 540 |     "plot_embed": [
 541 |         click.option(
 542 |             "--use-raw/--no-raw",
 543 |             default=None,
 544 |             show_default=True,
 545 |             help="Use `.raw` attribute for coloring with gene expression. If "
 546 |             "`None`, uses `.raw` if present.",
 547 |         ),
 548 |         click.option(
 549 |             "--groups",
 550 |             type=click.STRING,
 551 |             default=None,
 552 |             help="Key for categorical in `.obs`. You can pass your predefined "
 553 |             "groups by choosing any categorical annotation of observations.",
 554 |         ),
 555 |     ],
 556 |     "batch_key": click.option(
 557 |         "--batch-key",
 558 |         "key",
 559 |         type=click.STRING,
 560 |         required=True,
 561 |         help="The name of the column in adata.obs that differentiates among "
 562 |         "experiments/batches.",
 563 |     ),
 564 |     "batch_layer": click.option(
 565 |         "--layer",
 566 |         "-l",
 567 |         type=click.STRING,
 568 |         default=None,
 569 |         show_default=True,
 570 |         help="Layer to batch correct. By default corrects the contents of .X.",
 571 |     ),
 572 |     "scrublet": [
 573 |         click.option(
 574 |             "--sim-doublet-ratio",
 575 |             type=click.FLOAT,
 576 |             default=2.0,
 577 |             show_default=True,
 578 |             help="Number of doublets to simulate relative to the number of "
 579 |             "observed transcriptomes.",
 580 |         ),
 581 |         click.option(
 582 |             "--synthetic-doublet-umi-subsampling",
 583 |             type=click.FLOAT,
 584 |             default=1.0,
 585 |             show_default=True,
 586 |             help="Where input_obj_sim not suplied, rate for sampling UMIs when "
 587 |             "creating synthetic doublets. If 1.0, each doublet is created by "
 588 |             "simply adding the UMI counts from two randomly sampled observed "
 589 |             "transcriptomes.  For values less than 1, the UMI counts are added "
 590 |             "and then randomly sampled at the specified rate.",
 591 |         ),
 592 |     ],
 593 | }
 594 | 
 595 | COMMON_OPTIONS["opt_output"] = [
 596 |     click.option(
 597 |         "--output-obj",
 598 |         type=click.Path(dir_okay=False, writable=True),
 599 |         help="Optionally output an object to the specified path.",
 600 |     ),
 601 |     *COMMON_OPTIONS["output"][1:],
 602 | ]
 603 | 
 604 | CMD_OPTIONS = {
 605 |     "read": [
 606 |         click.option(
 607 |             "--input-10x-h5",
 608 |             "-i",
 609 |             type=click.Path(exists=True, dir_okay=False),
 610 |             callback=mutually_exclusive_with("--input-10x-mtx"),
 611 |             help="Input 10x data in Cell-Ranger hdf5 format.",
 612 |         ),
 613 |         click.option(
 614 |             "--input-10x-mtx",
 615 |             "-x",
 616 |             type=click.Path(exists=True, file_okay=False),
 617 |             callback=mutually_exclusive_with("--input-10x-h5"),
 618 |             help="Path of input folder containing 10x data in mtx format.",
 619 |         ),
 620 |         *COMMON_OPTIONS["output"],
 621 |         click.option(
 622 |             "--genome",
 623 |             "-g",
 624 |             callback=required_by("--input-10x-h5"),
 625 |             default="hg19",
 626 |             show_default=True,
 627 |             help="Name of the genome group in hdf5 file, required by "
 628 |             '"--input-10x-h5".',
 629 |         ),
 630 |         click.option(
 631 |             "--var-names",
 632 |             "-v",
 633 |             type=click.Choice(["gene_symbols", "gene_ids"]),
 634 |             callback=required_by("--input-10x-mtx"),
 635 |             default="gene_symbols",
 636 |             show_default=True,
 637 |             help="Attribute to be used as the index of the variable table, "
 638 |             'required by "--input-10x-mtx".',
 639 |         ),
 640 |         click.option(
 641 |             "--extra-obs",
 642 |             type=click.Path(exists=True, dir_okay=False),
 643 |             default=None,
 644 |             show_default=True,
 645 |             help="Extra cell metadata table, must be tab-separated with a header "
 646 |             "row and an index column, and with matched dimension.",
 647 |         ),
 648 |         click.option(
 649 |             "--extra-var",
 650 |             type=click.Path(exists=True, dir_okay=False),
 651 |             default=None,
 652 |             show_default=True,
 653 |             help="Extra gene metadata table, must be tab-separated with a header "
 654 |             "row and an index column, and with matched dimension.",
 655 |         ),
 656 |     ],
 657 |     "filter": [
 658 |         *COMMON_OPTIONS["input"],
 659 |         *COMMON_OPTIONS["output"],
 660 |         COMMON_OPTIONS["save"][0],  # --save-raw
 661 |         click.option(
 662 |             "--gene-name",
 663 |             "-g",
 664 |             type=click.STRING,
 665 |             default="index",
 666 |             show_default=True,
 667 |             help="Name of the variable that contains gene names, used for flagging "
 668 |             'mitochondria genes when column "mito" is absent from `.var`.',
 669 |         ),
 670 |         click.option(
 671 |             "--list-attr",
 672 |             "-l",
 673 |             is_flag=True,
 674 |             default=False,
 675 |             help="When set, list attributes that can be filtered on.",
 676 |         ),
 677 |         click.option(
 678 |             "--param",
 679 |             "-p",
 680 |             type=(click.STRING, click.FLOAT, click.FLOAT),
 681 |             multiple=True,
 682 |             callback=valid_parameter_limits,
 683 |             help="Numerical parameters used to filter the data, "
 684 |             'in the format of "-p name min max". '
 685 |             "Multiple -p entries allowed.",
 686 |         ),
 687 |         click.option(
 688 |             "--category",
 689 |             "-c",
 690 |             type=(click.STRING, CommaSeparatedText()),
 691 |             multiple=True,
 692 |             help="Categorical attributes used to filter the data, "
 693 |             'in the format of "-c <name> <values>", '
 694 |             "where entries with attribute <name> with value in <values> are kept. "
 695 |             'If <values> is preceded by "!", entries with value in <values> are '
 696 |             "removed. Multiple -c entries allowed.",
 697 |         ),
 698 |         click.option(
 699 |             "--subset",
 700 |             "-s",
 701 |             type=(click.STRING, click.File()),
 702 |             multiple=True,
 703 |             help='Similar to --category in the format of "-s <name> <file>", '
 704 |             "but the <file> to be a one-column table that provides the values. "
 705 |             "Multiple -s entries allowed.",
 706 |         ),
 707 |         click.option(
 708 |             "--force-recalc",
 709 |             is_flag=True,
 710 |             default=False,
 711 |             help="When set, re-calculate `pct_counts_<qc_variable>` and "
 712 |             "`pct_counts_in_top_<n>_genes` even if they exist.",
 713 |         ),
 714 |     ],
 715 |     "norm": [
 716 |         *COMMON_OPTIONS["input"],
 717 |         *COMMON_OPTIONS["output"],
 718 |         *COMMON_OPTIONS["save"],
 719 |         COMMON_OPTIONS["key_added"],
 720 |         click.option(
 721 |             "--no-log-transform",
 722 |             "log_transform",
 723 |             is_flag=True,
 724 |             default=True,
 725 |             show_default=True,
 726 |             help="When set, do not apply (natural) log transform following normalisation.",
 727 |         ),
 728 |         click.option(
 729 |             "--normalize-to",
 730 |             "-t",
 731 |             "target_sum",
 732 |             type=float,
 733 |             default=10_000,
 734 |             show_default=True,
 735 |             help="Normalize per cell nUMI to this number.",
 736 |         ),
 737 |         click.option(
 738 |             "--exclude-highly-expressed",
 739 |             "-e",
 740 |             "exclude_highly_expressed",
 741 |             is_flag=True,
 742 |             default=False,
 743 |             show_default=True,
 744 |             help="Exclude (very) highly expressed genes for the computation of "
 745 |             "the normalization factor (size factor) for each cell. A gene is considered "
 746 |             "highly expressed, if it has more than max_fraction of the total counts in at "
 747 |             "least one cell. The not-excluded genes will sum up to the number "
 748 |             "specified by --normalize-to.",
 749 |         ),
 750 |         click.option(
 751 |             "--max-fraction",
 752 |             "-m",
 753 |             "max_fraction",
 754 |             type=float,
 755 |             default=0.05,
 756 |             show_default=True,
 757 |             help="If exclude_highly_expressed=True, consider cells as highly "
 758 |             "expressed that have more counts than max_fraction of the original total counts "
 759 |             "in at least one cell.",
 760 |         ),
 761 |         click.option(
 762 |             "--layers",
 763 |             "-l",
 764 |             type=CommaSeparatedText(simplify=True),
 765 |             default=None,
 766 |             show_default=True,
 767 |             help="List of layers to normalize. Set to 'all' to normalize all layers.",
 768 |         ),
 769 |         click.option(
 770 |             "--layer-norm",
 771 |             "-n",
 772 |             "layer_norm",
 773 |             type=click.Choice(["after", "X"]),
 774 |             default=None,
 775 |             show_default=True,
 776 |             help="Specifies how to normalize layers: 1) If None, after "
 777 |             "normalization, for each layer in layers each cell has a total count equal to "
 778 |             "the median of the counts_per_cell before normalization of the layer. 2) If "
 779 |             "'after', for each layer in layers each cell has a total count equal to "
 780 |             "target_sum. 3) If 'X', for each layer in layers each cell has a total count "
 781 |             "equal to the median of total counts for observations (cells) of adata.X before "
 782 |             "normalization.'",
 783 |         ),
 784 |     ],
 785 |     "hvg": [
 786 |         *COMMON_OPTIONS["input"],
 787 |         *COMMON_OPTIONS["output"],
 788 |         click.option(
 789 |             "--mean-limits",
 790 |             "-m",
 791 |             type=(click.FLOAT, click.FLOAT),
 792 |             callback=valid_limit,
 793 |             default=(0.0125, 3),
 794 |             show_default=True,
 795 |             help="Cutoffs for the mean of expression" 'in the format of "-m min max".',
 796 |         ),
 797 |         click.option(
 798 |             "--disp-limits",
 799 |             "-d",
 800 |             type=(click.FLOAT, click.FLOAT),
 801 |             callback=valid_limit,
 802 |             default=(0.5, float("inf")),
 803 |             show_default=True,
 804 |             help="Cutoffs for the dispersion of expression"
 805 |             'in the format of "-d min max".',
 806 |         ),
 807 |         click.option(
 808 |             "--span",
 809 |             type=click.FLOAT,
 810 |             default=0.3,
 811 |             show_default=True,
 812 |             help="The fraction of the data (cells) used when estimating the "
 813 |             "variance in the loess model fit if flavor='seurat_v3'.",
 814 |         ),
 815 |         click.option(
 816 |             "--n-bins",
 817 |             "-b",
 818 |             type=click.INT,
 819 |             default=20,
 820 |             show_default=True,
 821 |             help="Number of bins for binning the mean gene expression.",
 822 |         ),
 823 |         click.option(
 824 |             "--n-top-genes",
 825 |             "-t",
 826 |             type=click.INT,
 827 |             default=None,
 828 |             show_default=True,
 829 |             help="Number of highly-variable genes to keep.",
 830 |         ),
 831 |         click.option(
 832 |             "--flavor",
 833 |             "-v",
 834 |             type=click.Choice(["seurat", "cell_ranger", "seurat_v3"]),
 835 |             default="seurat",
 836 |             show_default=True,
 837 |             help="Choose the flavor for computing normalized dispersion.",
 838 |         ),
 839 |         click.option(
 840 |             "--subset",
 841 |             "-s",
 842 |             is_flag=True,
 843 |             default=False,
 844 |             help="When set, inplace subset to highly-variable genes, otherwise "
 845 |             "only flag highly-variable genes.",
 846 |         ),
 847 |         click.option(
 848 |             "--batch-key",
 849 |             "batch_key",
 850 |             type=click.STRING,
 851 |             default=None,
 852 |             help="If specified, highly-variable genes are selected within each "
 853 |             "batch separately and merged. This simple process avoids the selection of "
 854 |             "batch-specific genes and acts as a lightweight batch correction method. For all "
 855 |             "flavors, genes are first sorted by how many batches they are a HVG. For "
 856 |             "dispersion-based flavors ties are broken by normalized dispersion. If flavor = "
 857 |             "'seurat_v3', ties are broken by the median (across batches) rank based on "
 858 |             "within-batch normalized variance.",
 859 |         ),
 860 |         click.option(
 861 |             "--always-hv-genes-file",
 862 |             "always_hv_genes_file",
 863 |             type=click.Path(exists=True),
 864 |             default=None,
 865 |             help="If specified, the gene identifers in this file will be set as highly variable in the var dataframe after HVGs are computed.",
 866 |         ),
 867 |         click.option(
 868 |             "--never-hv-genes-file",
 869 |             "never_hv_genes_file",
 870 |             type=click.Path(exists=True),
 871 |             default=None,
 872 |             help="If specified, the gene identifers in this file will be removed from highly variable in the var dataframe (set to false) after HVGs are computed.",
 873 |         ),
 874 |     ],
 875 |     "scale": [
 876 |         *COMMON_OPTIONS["input"],
 877 |         *COMMON_OPTIONS["output"],
 878 |         *COMMON_OPTIONS["save"],
 879 |         COMMON_OPTIONS["zero_center"],
 880 |         click.option(
 881 |             "--max-value",
 882 |             "-m",
 883 |             type=click.FLOAT,
 884 |             default=None,
 885 |             show_default=True,
 886 |             help="When specified, clip to this value after scaling, otherwise do "
 887 |             "not clip",
 888 |         ),
 889 |         click.option(
 890 |             "--layer",
 891 |             "-l",
 892 |             type=CommaSeparatedText(simplify=True),
 893 |             default=None,
 894 |             help="If provided, which element of layers to scale.",
 895 |         ),
 896 |     ],
 897 |     "regress": [
 898 |         *COMMON_OPTIONS["input"],
 899 |         *COMMON_OPTIONS["output"],
 900 |         *COMMON_OPTIONS["save"],
 901 |         COMMON_OPTIONS["n_jobs"],
 902 |         click.option(
 903 |             "--keys",
 904 |             "-k",
 905 |             type=CommaSeparatedText(simplify=True),
 906 |             default=None,
 907 |             show_default=True,
 908 |             help="Key(s) for observation annotation on which to regress.",
 909 |         ),
 910 |     ],
 911 |     "pca": [
 912 |         *COMMON_OPTIONS["input"],
 913 |         *COMMON_OPTIONS["output"],
 914 |         COMMON_OPTIONS["zero_center"],
 915 |         COMMON_OPTIONS["random_state"],
 916 |         COMMON_OPTIONS["export_embedding"],
 917 |         COMMON_OPTIONS["n_comps"],
 918 |         click.option(
 919 |             "--svd-solver",
 920 |             "-V",
 921 |             type=click.Choice(["auto", "arpack", "randomized"]),
 922 |             default="auto",
 923 |             show_default=True,
 924 |             help="SVD solver to use.",
 925 |         ),
 926 |         click.option(
 927 |             "--use-all",
 928 |             "-a",
 929 |             "use_highly_variable",
 930 |             is_flag=True,
 931 |             flag_value=False,
 932 |             default=True,
 933 |             help="When set, use all genes for PCA, otherwise use "
 934 |             "highly-variable genes by default.",
 935 |         ),
 936 |         click.option(
 937 |             "--chunked",
 938 |             "-K",
 939 |             is_flag=True,
 940 |             default=False,
 941 |             help="When set, perform an incremental PCA on segments of "
 942 |             "--chunk-size, which automatically zero centers and ignore settings of "
 943 |             "--random-state and --svd-solver.",
 944 |         ),
 945 |         click.option(
 946 |             "--chunk-size",
 947 |             "-Z",
 948 |             type=click.INT,
 949 |             callback=required_by("--chunked"),
 950 |             default=None,
 951 |             show_default=True,
 952 |             help="Number of observations to include in each chunk, required by "
 953 |             "--chunked.",
 954 |         ),
 955 |     ],
 956 |     "neighbor": [
 957 |         *COMMON_OPTIONS["input"],
 958 |         *COMMON_OPTIONS["output"],
 959 |         *COMMON_OPTIONS["use_pc"],
 960 |         COMMON_OPTIONS["key_added"],
 961 |         COMMON_OPTIONS["random_state"],
 962 |         click.option(
 963 |             "--n-neighbors",
 964 |             "-k",
 965 |             type=CommaSeparatedText(click.INT, simplify=True),
 966 |             default=15,
 967 |             show_default=True,
 968 |             help="The size of local neighborhood (in terms of number of "
 969 |             "neighboring data points) used for manifold approximation. Larger "
 970 |             "values result in more global views of the manifold, while smaller "
 971 |             "values result in more local data being preserved. In general values "
 972 |             "should be in the range 2 to 100.  If --knn is set, number of nearest "
 973 |             "neighbors to be searched, othwise a Gaussian kernel width is set to "
 974 |             "the distance of the --n-neighbors neighbor.",
 975 |         ),
 976 |         click.option(
 977 |             "--no-knn",
 978 |             "knn",
 979 |             is_flag=True,
 980 |             flag_value=False,
 981 |             default=True,
 982 |             show_default=True,
 983 |             help="When NOT set, use a hard threshold to restrict the number of "
 984 |             "neighbors to --n-neighbors. Otherwise, use a Gaussian kernel to "
 985 |             "assign low weights to neighbors more distant than the --n-neighbors "
 986 |             "nearest neighbor",
 987 |         ),
 988 |         click.option(
 989 |             "--method",
 990 |             "-m",
 991 |             type=click.Choice(["umap", "gauss", "rapids"]),
 992 |             default="umap",
 993 |             show_default=True,
 994 |             help="Use umap or gauss with adaptive width for computing "
 995 |             "connectivities. Use rapids for the RAPIDS implementation of UMAP "
 996 |             "(experimental, GPU only).",
 997 |         ),
 998 |         COMMON_OPTIONS["neighbor_metric"],
 999 |     ],
1000 |     "umap": [
1001 |         *COMMON_OPTIONS["input"],
1002 |         *COMMON_OPTIONS["output"],
1003 |         COMMON_OPTIONS["knn_graph"][0],  # --neighbors-key
1004 |         COMMON_OPTIONS["random_state"],
1005 |         COMMON_OPTIONS["key_added"],
1006 |         COMMON_OPTIONS["export_embedding"],
1007 |         click.option(
1008 |             "--init-pos",
1009 |             type=click.STRING,
1010 |             default="spectral",
1011 |             show_default=True,
1012 |             help="How to initialize the low dimensional embedding. Can be "
1013 |             '"spectral", "paga" or "random", or any key of `.obsm`.',
1014 |         ),
1015 |         click.option(
1016 |             "--min-dist",
1017 |             type=click.FLOAT,
1018 |             default=0.5,
1019 |             show_default=True,
1020 |             help="The effective minimum distance between embedded points. Smaller "
1021 |             "values will result in a more clustered embedding, while larger values "
1022 |             "will results in a more even dispersal of points.",
1023 |         ),
1024 |         click.option(
1025 |             "--spread",
1026 |             type=click.FLOAT,
1027 |             default=1.0,
1028 |             show_default=True,
1029 |             help="The effective scale of embedded points, which determines the "
1030 |             "scale at which embedded points will be spread out.",
1031 |         ),
1032 |         click.option(
1033 |             "--n-components",
1034 |             type=click.INT,
1035 |             default=2,
1036 |             show_default=True,
1037 |             help="The number of dimensions of the embedding.",
1038 |         ),
1039 |         click.option(
1040 |             "--maxiter",
1041 |             type=click.INT,
1042 |             default=None,
1043 |             show_default=True,
1044 |             help="The number of iterations of the optimization.",
1045 |         ),
1046 |         click.option(
1047 |             "--alpha",
1048 |             type=click.FLOAT,
1049 |             default=1.0,
1050 |             show_default=True,
1051 |             help="The initial learning rate for the embedding optimization.",
1052 |         ),
1053 |         click.option(
1054 |             "--gamma",
1055 |             type=click.FLOAT,
1056 |             default=1.0,
1057 |             show_default=True,
1058 |             help="Weighting applied to negative samples in low dimensional "
1059 |             "embedding optimization.",
1060 |         ),
1061 |         click.option(
1062 |             "--negative-sample-rate",
1063 |             type=click.INT,
1064 |             default=5,
1065 |             show_default=True,
1066 |             help="The number of negative edge samples to use per positive edge "
1067 |             "sample in optimizing the low dimensional embedding.",
1068 |         ),
1069 |         click.option(
1070 |             "--method",
1071 |             type=click.Choice(["umap", "rapids"]),
1072 |             default="umap",
1073 |             show_default=True,
1074 |             help="Use the original ‘umap’ implementation, or ‘rapids’ "
1075 |             "(experimental, GPU only).",
1076 |         ),
1077 |     ],
1078 |     "tsne": [
1079 |         *COMMON_OPTIONS["input"],
1080 |         *COMMON_OPTIONS["output"],
1081 |         *COMMON_OPTIONS["use_pc"],
1082 |         COMMON_OPTIONS["random_state"],
1083 |         COMMON_OPTIONS["key_added"],
1084 |         COMMON_OPTIONS["n_jobs"],
1085 |         COMMON_OPTIONS["export_embedding"],
1086 |         click.option(
1087 |             "--perplexity",
1088 |             type=click.FLOAT,
1089 |             default=30,
1090 |             show_default=True,
1091 |             help="The perplexity is related to the number of nearest neighbors "
1092 |             "that is used in other manifold learning algorithms. Larger datasets "
1093 |             "usually require a larger perplexity. Consider selecting a value "
1094 |             "between 5 and 50. The choice is not extremely critical since t-SNE "
1095 |             "is quite insensitive to this parameter.",
1096 |         ),
1097 |         click.option(
1098 |             "--early-exaggeration",
1099 |             type=click.FLOAT,
1100 |             default=12,
1101 |             show_default=True,
1102 |             help="Controls how tight natural clusters in the original space are in "
1103 |             "the embedded space and how much space will be between them. For "
1104 |             "larger values, the space between natural clusters will be larger in "
1105 |             "the embedded space. Again, the choice of this parameter is not very "
1106 |             "critical. If the cost function increases during initial optimization, "
1107 |             "the early exaggeration factor or the learning rate might be too high.",
1108 |         ),
1109 |         click.option(
1110 |             "--learning-rate",
1111 |             type=click.FLOAT,
1112 |             default=1000,
1113 |             show_default=True,
1114 |             help='Note that the R-package "Rtsne" uses a default of 200. The '
1115 |             "learning rate can be a critical parameter. It should be between 100 "
1116 |             "and 1000. If the cost function increases during initial optimization, "
1117 |             "the early exaggeration factor or the learning rate might be too high. "
1118 |             "If the cost function gets stuck in a bad local minimum increasing the "
1119 |             "learning rate helps sometimes.",
1120 |         ),
1121 |         click.option(
1122 |             "--no-fast-tsne",
1123 |             "use_fast_tsne",
1124 |             is_flag=True,
1125 |             flag_value=False,
1126 |             default=True,
1127 |             show_default=True,
1128 |             help="When NOT set, use the MulticoreTSNE package by D. Ulyanov if "
1129 |             "installed.",
1130 |         ),
1131 |     ],
1132 |     "fdg": [
1133 |         *COMMON_OPTIONS["input"],
1134 |         *COMMON_OPTIONS["output"],
1135 |         COMMON_OPTIONS["random_state"],
1136 |         COMMON_OPTIONS["export_embedding"],
1137 |         COMMON_OPTIONS["root"],
1138 |         click.option(
1139 |             "--init-pos",
1140 |             type=click.STRING,
1141 |             default=None,
1142 |             help="Use precomputed coordinates for initialization. Can be any key "
1143 |             'of `.obsm` or "paga" if .uns["paga"] is present',
1144 |         ),
1145 |         click.option(
1146 |             "--layout",
1147 |             type=click.Choice(
1148 |                 ["fa", "fr", "grid_fr", "kk", "lgl", "drl", "rt", "rt_circular"]
1149 |             ),
1150 |             default="fa",
1151 |             show_default=True,
1152 |             help='Name of any valid igraph layout, including "fa" (ForceAtlas2), '
1153 |             '"fr" (Fruchterman Reingold), "grid_fr" (Grid Fruchterman Reingold, '
1154 |             'faster than "fr"), "kk" (Kamadi Kawai, slower than "fr"), "lgl" '
1155 |             '(Large Graph Layout, very fast), "drl" (Distributed Recursive Layout, '
1156 |             'pretty fast) and "rt" (Reingold Tilford tree layout).',
1157 |         ),
1158 |         click.option(
1159 |             "--key-added-ext",
1160 |             type=click.STRING,
1161 |             default=None,
1162 |             show_default=True,
1163 |             help="By default, append 'layout'",
1164 |         ),
1165 |         click.option(
1166 |             "--init-pos",
1167 |             type=click.STRING,
1168 |             default=None,
1169 |             show_default=True,
1170 |             help='How to initialize the low dimensional embedding. Can be "paga", '
1171 |             "or any valid key of `.obsm`.",
1172 |         ),
1173 |         COMMON_OPTIONS["knn_graph"][0],  # --neighbors-key
1174 |         COMMON_OPTIONS["knn_graph"][1],  # --obsp
1175 |     ],
1176 |     "louvain": [
1177 |         *COMMON_OPTIONS["input"],
1178 |         *COMMON_OPTIONS["output"],
1179 |         COMMON_OPTIONS["export_cluster"],
1180 |         *COMMON_OPTIONS["knn_graph"],
1181 |         COMMON_OPTIONS["restrict_to"],
1182 |         COMMON_OPTIONS["random_state"],
1183 |         COMMON_OPTIONS["key_added"],
1184 |         click.option(
1185 |             "--flavor",
1186 |             type=click.Choice(["vtraag", "igraph"]),
1187 |             default="vtraag",
1188 |             show_default=True,
1189 |             help="Choose between two packages for computing the clustering. "
1190 |             '"vtraag" is much powerful, and the default.',
1191 |         ),
1192 |         click.option(
1193 |             "--resolution",
1194 |             "-r",
1195 |             type=CommaSeparatedText(click.FLOAT, simplify=True),
1196 |             default=1,
1197 |             show_default=True,
1198 |             help='For the default flavor "vtraag", you can provide a resolution. '
1199 |             "Higher resolution means finding more and smaller clusters.",
1200 |         ),
1201 |     ],
1202 |     "leiden": [
1203 |         *COMMON_OPTIONS["input"],
1204 |         *COMMON_OPTIONS["output"],
1205 |         COMMON_OPTIONS["export_cluster"],
1206 |         *COMMON_OPTIONS["knn_graph"],
1207 |         COMMON_OPTIONS["restrict_to"],
1208 |         COMMON_OPTIONS["random_state"],
1209 |         COMMON_OPTIONS["key_added"],
1210 |         click.option(
1211 |             "--resolution",
1212 |             "-r",
1213 |             type=CommaSeparatedText(click.FLOAT, simplify=True),
1214 |             default=1,
1215 |             show_default=True,
1216 |             help="A parameter value controlling the coarseness of the clustering. "
1217 |             'Higher values lead to more clusters. Set to "None" if overriding '
1218 |             "--partition_type to one that doesn't accept `resolution_parameter`.",
1219 |         ),
1220 |         click.option(
1221 |             "--n-iterations",
1222 |             type=click.INT,
1223 |             default=-1,
1224 |             show_default=True,
1225 |             help="How many iterations of the Leiden clustering algorithm to "
1226 |             "perform. -1 has the algorithm run until it reaches its optimal "
1227 |             "clustering.",
1228 |         ),
1229 |     ],
1230 |     "diffexp": [
1231 |         *COMMON_OPTIONS["input"],
1232 |         *COMMON_OPTIONS["output"],
1233 |         COMMON_OPTIONS["use_raw"],
1234 |         COMMON_OPTIONS["key_added"],
1235 |         click.option(
1236 |             "--layer",
1237 |             "-l",
1238 |             type=click.STRING,
1239 |             default=None,
1240 |             help="Key from adata.layers whose value will be used to perform tests on.",
1241 |         ),
1242 |         click.option(
1243 |             "--groupby",
1244 |             "-g",
1245 |             type=click.STRING,
1246 |             required=True,
1247 |             help="The key of the observations grouping to consider.",
1248 |         ),
1249 |         click.option(
1250 |             "--groups",
1251 |             type=CommaSeparatedText(simplify=True),
1252 |             default="all",
1253 |             show_default=True,
1254 |             help="Subset of groups to which comparison shall be restricted.",
1255 |         ),
1256 |         click.option(
1257 |             "--reference",
1258 |             type=click.STRING,
1259 |             default="rest",
1260 |             show_default=True,
1261 |             help='If "rest", compare each group to the union of the rest of the '
1262 |             "groups. If a group identifier, compare with respect to this group.",
1263 |         ),
1264 |         click.option(
1265 |             "--n-genes",
1266 |             "-n",
1267 |             type=click.INT,
1268 |             default=None,
1269 |             show_default=True,
1270 |             help="The number of genes that appear in the retured tables. By "
1271 |             "default return all available genes depending on the value of "
1272 |             "--use-raw.",
1273 |         ),
1274 |         click.option(
1275 |             "--method",
1276 |             type=click.Choice(["logreg", "t-test", "wilcoxon", "t-test_overestim_var"]),
1277 |             default="t-test_overestim_var",
1278 |             show_default=True,
1279 |             help="Method of performing differential expression analysis.",
1280 |         ),
1281 |         click.option(
1282 |             "--corr-method",
1283 |             type=click.Choice(["benjamini-hochberg", "bonferroni"]),
1284 |             default="benjamini-hochberg",
1285 |             show_default=True,
1286 |             help='P-value correction method. Used only for "t-test", '
1287 |             '"t-test_overestim_var" and "wilcoxon".',
1288 |         ),
1289 |         click.option(
1290 |             "--rankby-abs",
1291 |             is_flag=True,
1292 |             default=False,
1293 |             show_default=True,
1294 |             help="Rank genes by the absolute value of the score, not by the score. "
1295 |             "The returned scores are never the absolute values.",
1296 |         ),
1297 |         click.option(
1298 |             "--pts",
1299 |             is_flag=True,
1300 |             default=False,
1301 |             show_default=True,
1302 |             help="Compute the fraction of cells expressing the genes.",
1303 |         ),
1304 |         click.option(
1305 |             "--tie-correct",
1306 |             is_flag=True,
1307 |             default=False,
1308 |             show_default=True,
1309 |             help="Use tie correction for 'wilcoxon' scores. Used only for "
1310 |             "'wilcoxon'.",
1311 |         ),
1312 |         click.option(
1313 |             "--filter-params",
1314 |             type=Dictionary(
1315 |                 keys=[
1316 |                     "min_in_group_fraction",
1317 |                     "max_out_group_fraction",
1318 |                     "min_fold_change",
1319 |                 ]
1320 |             ),
1321 |             default=None,
1322 |             show_default=True,
1323 |             help="Parameters for filtering DE results, valid parameters are: "
1324 |             '"min_in_group_fraction" (float), "max_out_group_fraction" (float), '
1325 |             '"min_fold_change" (float).',
1326 |         ),
1327 |         click.option(
1328 |             "--logreg-param",
1329 |             type=Dictionary(),
1330 |             default=None,
1331 |             show_default=True,
1332 |             help="Parameters passed to `sklearn.linear_model.LogisticRegression`.",
1333 |         ),
1334 |         click.option(
1335 |             "--save",
1336 |             type=click.Path(dir_okay=False, writable=True),
1337 |             default=None,
1338 |             show_default=True,
1339 |             help="Tab-separated table to store results of differential expression "
1340 |             "analysis.",
1341 |         ),
1342 |     ],
1343 |     "paga": [
1344 |         *COMMON_OPTIONS["input"],
1345 |         *COMMON_OPTIONS["output"],
1346 |         COMMON_OPTIONS["knn_graph"][0],  # --neighbors-key
1347 |         COMMON_OPTIONS["key_added"],
1348 |         click.option(
1349 |             "--groups",
1350 |             type=click.STRING,
1351 |             required=True,
1352 |             help="Key for categorical in `.obs`. You can pass your predefined "
1353 |             "groups by choosing any categorical annotation of observations.",
1354 |         ),
1355 |         click.option(
1356 |             "--model",
1357 |             type=click.Choice(["v1.2", "v1.0"]),
1358 |             default="v1.2",
1359 |             show_default=True,
1360 |             help="The PAGA connectivity model.",
1361 |         ),
1362 |         click.option(
1363 |             "--use-rna-velocity",
1364 |             is_flag=True,
1365 |             default=False,
1366 |             show_default=True,
1367 |             help="Use RNA velocity to orient edges in the abstracted graph and "
1368 |             "estimate transitions. Requires that adata.uns contains a directed single-cell "
1369 |             "graph with key velocity_graph. This feature might be subject to change in the "
1370 |             "future.",
1371 |         ),
1372 |     ],
1373 |     "diffmap": [
1374 |         *COMMON_OPTIONS["input"],
1375 |         *COMMON_OPTIONS["output"],
1376 |         COMMON_OPTIONS["knn_graph"][0],  # --neighbors-key
1377 |         COMMON_OPTIONS["key_added"],
1378 |         COMMON_OPTIONS["export_embedding"],
1379 |         COMMON_OPTIONS["n_comps"],
1380 |     ],
1381 |     "dpt": [
1382 |         *COMMON_OPTIONS["input"],
1383 |         *COMMON_OPTIONS["output"],
1384 |         COMMON_OPTIONS["knn_graph"][0],  # --neighbors-key
1385 |         COMMON_OPTIONS["key_added"],
1386 |         click.option(
1387 |             "--root",
1388 |             type=(click.STRING, click.STRING),
1389 |             default=(None, None),
1390 |             show_default=True,
1391 |             help="Specify a categorical annotaion of observations (`.obs`) and a "
1392 |             "value representing the root cells.",
1393 |         ),
1394 |         click.option(
1395 |             "--n-dcs",
1396 |             type=click.INT,
1397 |             default=10,
1398 |             show_default=True,
1399 |             help="The number of diffusion components to use.",
1400 |         ),
1401 |         click.option(
1402 |             "--n-branchings",
1403 |             type=click.INT,
1404 |             default=0,
1405 |             show_default=True,
1406 |             help="Number of branchings to detect.",
1407 |         ),
1408 |         click.option(
1409 |             "--min-group-size",
1410 |             type=click.FLOAT,
1411 |             default=0.01,
1412 |             show_default=True,
1413 |             help="During recursive splitting of branches for --n-branchings > 1, "
1414 |             "do not consider branches/groups that contain fewer than this fraction "
1415 |             "of the total number of data points.",
1416 |         ),
1417 |         click.option(
1418 |             "--disallow-kendall-tau-shift",
1419 |             "allow_kendall_tau_shift",
1420 |             is_flag=True,
1421 |             default=True,
1422 |             show_default=True,
1423 |             help="By default: If a very small branch is detected upon "
1424 |             "splitting, shift away from maximum correlation in Kendall tau criterion of "
1425 |             "[Haghverdi16] to stabilize the splitting. Use flag to disable this.",
1426 |         ),
1427 |     ],
1428 |     "combat": [
1429 |         *COMMON_OPTIONS["input"],
1430 |         *COMMON_OPTIONS["output"],
1431 |         COMMON_OPTIONS["batch_key"],
1432 |         COMMON_OPTIONS["batch_layer"],
1433 |         click.option(
1434 |             "--key-added",
1435 |             type=click.STRING,
1436 |             default=None,
1437 |             show_default=True,
1438 |             help="Key under which to add the computed results. By default a new "
1439 |             "layer will be created called 'combat', 'combat_{layer}' or "
1440 |             "'combat_layer_{key_added}' where those parameters were specified. A value of 'X' "
1441 |             "causes batch-corrected values to overwrite the original content of .X.",
1442 |         ),
1443 |         click.option(
1444 |             "--covariates",
1445 |             type=(CommaSeparatedText()),
1446 |             default=None,
1447 |             show_default=True,
1448 |             help="Comma-separated list of additional covariates besides the "
1449 |             "batch variable such as adjustment variables or biological condition. This "
1450 |             "parameter refers to the design matrix X in Equation 2.1 in [Johnson07] and to "
1451 |             "the mod argument in the original combat function in the sva R package.  Note "
1452 |             "that not including covariates may introduce bias or lead to the removal of "
1453 |             "biological signal in unbalanced designs.",
1454 |         ),
1455 |     ],
1456 |     "harmony": [
1457 |         *COMMON_OPTIONS["input"],
1458 |         *COMMON_OPTIONS["output"],
1459 |         COMMON_OPTIONS["batch_key"],
1460 |         click.option(
1461 |             "--basis",
1462 |             type=click.STRING,
1463 |             default="X_pca",
1464 |             show_default=True,
1465 |             help="The name of the field in adata.obsm where the PCA table is "
1466 |             "stored. Defaults to 'X_pca', which is the default for sc.tl.pca().",
1467 |         ),
1468 |         click.option(
1469 |             "--adjusted-basis",
1470 |             type=click.STRING,
1471 |             default="X_pca_harmony",
1472 |             show_default=True,
1473 |             help="The name of the field in adata.obsm where the adjusted PCA "
1474 |             "table will be stored after running this function.",
1475 |         ),
1476 |         click.option(
1477 |             "--theta",
1478 |             type=click.FLOAT,
1479 |             default=2,
1480 |             show_default=True,
1481 |             help="Diversity clustering penalty parameter. theta=0 does not encourage any "
1482 |             "diversity. Larger values of theta result in more diverse clusters.",
1483 |         ),
1484 |         click.option(
1485 |             "--lambda",
1486 |             "lamb",
1487 |             type=click.FLOAT,
1488 |             default=1,
1489 |             show_default=True,
1490 |             help="Ridge regression penalty parameter. Lambda must be strictly "
1491 |             "positive.  Smaller values result in more aggressive correction.",
1492 |         ),
1493 |         click.option(
1494 |             "--sigma",
1495 |             type=click.FLOAT,
1496 |             default=0.1,
1497 |             show_default=True,
1498 |             help="Width of soft kmeans clusters. Sigma scales the distance from "
1499 |             "a cell to cluster centroids. Larger values of sigma result in cells assigned to "
1500 |             "more clusters. Smaller values of sigma make soft kmeans cluster approach hard "
1501 |             "clustering.",
1502 |         ),
1503 |         click.option(
1504 |             "--n-clust",
1505 |             "nclust",
1506 |             type=click.INT,
1507 |             default=None,
1508 |             show_default=False,
1509 |             help="Number of clusters in model. nclust=1 equivalent to simple "
1510 |             "linear regression.",
1511 |         ),
1512 |         click.option(
1513 |             "--tau",
1514 |             type=click.INT,
1515 |             default=0,
1516 |             show_default=True,
1517 |             help="Protection against overclustering small datasets with large ones. "
1518 |             "tau is the expected number of cells per cluster.",
1519 |         ),
1520 |         click.option(
1521 |             "--block-size",
1522 |             type=click.FLOAT,
1523 |             default=0.05,
1524 |             show_default=True,
1525 |             help="What proportion of cells to update during clustering. Between "
1526 |             "0 to 1, default 0.05. Larger values may be faster but less accurate.",
1527 |         ),
1528 |         click.option(
1529 |             "--max-iter-cluster",
1530 |             "max_iter_kmeans",
1531 |             type=click.INT,
1532 |             default=20,
1533 |             show_default=True,
1534 |             help="Maximum number of rounds to run clustering at each round of "
1535 |             "Harmony.",
1536 |         ),
1537 |         click.option(
1538 |             "--max-iter-harmony",
1539 |             type=click.INT,
1540 |             default=10,
1541 |             show_default=True,
1542 |             help="Maximum number of rounds to run Harmony. One round of Harmony "
1543 |             "involves one clustering and one correction step.",
1544 |         ),
1545 |         click.option(
1546 |             "--epsilon-cluster",
1547 |             type=click.FLOAT,
1548 |             default=1e-5,
1549 |             show_default=True,
1550 |             help="Convergence tolerance for clustering round of Harmony Set to "
1551 |             "-Inf to never stop early.",
1552 |         ),
1553 |         click.option(
1554 |             "--epsilon-harmony",
1555 |             type=click.FLOAT,
1556 |             default=1e-5,
1557 |             show_default=True,
1558 |             help="Convergence tolerance for clustering round of Harmony Set to "
1559 |             "-Inf to never stop early.",
1560 |         ),
1561 |         COMMON_OPTIONS["random_state"],
1562 |     ],
1563 |     "mnn": [
1564 |         *COMMON_OPTIONS["input"],
1565 |         *COMMON_OPTIONS["output"],
1566 |         *COMMON_OPTIONS["save"],
1567 |         COMMON_OPTIONS["batch_key"],
1568 |         COMMON_OPTIONS["batch_layer"],
1569 |         click.option(
1570 |             "--key-added",
1571 |             type=click.STRING,
1572 |             default=None,
1573 |             show_default=True,
1574 |             help="Key under which to add the computed results. By default a new "
1575 |             "layer will be created called 'mnn', 'mnn_{layer}' or "
1576 |             "'mnn_layer_{key_added}' where those parameters were specified. A value of 'X' "
1577 |             "causes batch-corrected values to overwrite the original content of .X.",
1578 |         ),
1579 |         click.option(
1580 |             "--var-subset",
1581 |             type=(click.STRING, CommaSeparatedText()),
1582 |             multiple=True,
1583 |             help="The subset of vars (list of str) to be used when performing "
1584 |             "MNN correction in the format of '--var-subset <name> <values>'. Typically, use "
1585 |             "the highly variable genes (HVGs) like '--var-subset highly_variable True'. When "
1586 |             "unset, uses all vars.",
1587 |         ),
1588 |         click.option(
1589 |             "--n-neighbors",
1590 |             "-k",
1591 |             type=CommaSeparatedText(click.INT, simplify=True),
1592 |             default=20,
1593 |             show_default=True,
1594 |             help="Number of mutual nearest neighbors.",
1595 |         ),
1596 |         click.option(
1597 |             "--sigma",
1598 |             type=click.FLOAT,
1599 |             default=1.0,
1600 |             show_default=True,
1601 |             help="The bandwidth of the Gaussian smoothing kernel used to "
1602 |             "compute the correction vectors.",
1603 |         ),
1604 |         click.option(
1605 |             "--no-cos_norm_in",
1606 |             "cos_norm_in",
1607 |             is_flag=True,
1608 |             default=True,
1609 |             help="Default behaviour is to perform cosine normalization on the "
1610 |             "input data prior to calculating distances between cells. Use this "
1611 |             "flag to disable that behaviour.",
1612 |         ),
1613 |         click.option(
1614 |             "--no-cos_norm_out",
1615 |             "cos_norm_out",
1616 |             is_flag=True,
1617 |             default=True,
1618 |             help="Default behaviour is to perform cosine normalization prior to "
1619 |             "computing corrected expression values. Use this flag to disable that "
1620 |             "behaviour.",
1621 |         ),
1622 |         click.option(
1623 |             "--svd-dim",
1624 |             type=click.INT,
1625 |             default=None,
1626 |             show_default=True,
1627 |             help="The number of dimensions to use for summarizing biological "
1628 |             "substructure within each batch. If not set, biological components "
1629 |             "will not be removed from the correction vectors.",
1630 |         ),
1631 |         click.option(
1632 |             "--no-var-adj",
1633 |             is_flag=True,
1634 |             default=True,
1635 |             help="Default behaviour is to adjust variance of the correction "
1636 |             "vectors. Use this flag to disable that behaviour. Note this step takes most "
1637 |             "computing time.",
1638 |         ),
1639 |         click.option(
1640 |             "--compute-angle",
1641 |             is_flag=True,
1642 |             default=False,
1643 |             help="When set, compute the angle between each cell’s correction "
1644 |             "vector and the biological subspace of the reference batch.",
1645 |         ),
1646 |         click.option(
1647 |             "--svd-mode",
1648 |             type=click.Choice(["svd", "rsvd", "irlb"]),
1649 |             default="rsvd",
1650 |             show_default=True,
1651 |             help="'svd' computes SVD using a non-randomized SVD-via-ID "
1652 |             "algorithm, while 'rsvd' uses a randomized version. 'irlb' performs truncated "
1653 |             "SVD by implicitly restarted Lanczos bidiagonalization (forked from "
1654 |             "https://github.com/airysen/irlbpy).",
1655 |         ),
1656 |     ],
1657 |     "bbknn": [
1658 |         *COMMON_OPTIONS["input"],
1659 |         *COMMON_OPTIONS["output"],
1660 |         COMMON_OPTIONS["key_added"],
1661 |         COMMON_OPTIONS["batch_key"],
1662 |         click.option(
1663 |             "--use-rep",
1664 |             "-u",
1665 |             type=click.STRING,
1666 |             default="X_pca",
1667 |             show_default=True,
1668 |             help="The dimensionality reduction in .obsm to use for neighbour "
1669 |             "detection.",
1670 |         ),
1671 |         COMMON_OPTIONS["use_pc"][0],  # --n-pcs
1672 |         click.option(
1673 |             "--no-approx",
1674 |             "approx",
1675 |             is_flag=True,
1676 |             default=True,
1677 |             help="Default behaviour is to use annoy’s approximate neighbour "
1678 |             "finding. This results in a quicker run time for large datasets while also "
1679 |             "potentially increasing the degree of batch correction. Use this flag to disable "
1680 |             "that behaviour.",
1681 |         ),
1682 |         COMMON_OPTIONS["neighbor_metric"],
1683 |         click.option(
1684 |             "--neighbors-within-batch",
1685 |             type=click.INT,
1686 |             default=3,
1687 |             show_default=True,
1688 |             help="How many top neighbours to report for each batch; total "
1689 |             "number of neighbours will be this number times the number of batches.",
1690 |         ),
1691 |         click.option(
1692 |             "--trim",
1693 |             type=click.INT,
1694 |             default=None,
1695 |             show_default=True,
1696 |             help="Trim the neighbours of each cell to these many top "
1697 |             "connectivities. May help with population independence and improve the tidiness "
1698 |             "of clustering. The lower the value the more independent the individual "
1699 |             "populations, at the cost of more conserved batch effect. If None, sets the "
1700 |             "parameter value automatically to 10 times the total number of neighbours for "
1701 |             "each cell. Set to 0 to skip.",
1702 |         ),
1703 |         click.option(
1704 |             "--annoy-n-trees",
1705 |             type=click.INT,
1706 |             default=10,
1707 |             show_default=True,
1708 |             help="Only used when approx=True. The number of trees to construct "
1709 |             "in the annoy forest. More trees give higher precision when querying, at the "
1710 |             "cost of increased run time and resource intensity.",
1711 |         ),
1712 |         click.option(
1713 |             "--no-use-faiss",
1714 |             "use_faiss",
1715 |             is_flag=True,
1716 |             default=True,
1717 |             help="Default behaviour If approx=False and the metric is "
1718 |             "“euclidean”, is to use the faiss package to compute nearest neighbours if "
1719 |             "installed. This improves performance at a minor cost to numerical precision as "
1720 |             "faiss operates on float32. Use this flag to disable that behaviour.",
1721 |         ),
1722 |         click.option(
1723 |             "--set-op-mix-ratio",
1724 |             type=click.FLOAT,
1725 |             default=1,
1726 |             show_default=True,
1727 |             help="UMAP connectivity computation parameter, float between 0 and "
1728 |             "1, controlling the blend between a connectivity matrix formed exclusively from "
1729 |             "mutual nearest neighbour pairs (0) and a union of all observed neighbour "
1730 |             "relationships with the mutual pairs emphasised (1).",
1731 |         ),
1732 |         click.option(
1733 |             "--local-connectivity",
1734 |             type=click.INT,
1735 |             default=1,
1736 |             show_default=True,
1737 |             help="UMAP connectivity computation parameter, how many nearest "
1738 |             "neighbors of each cell are assumed to be fully connected (and given a "
1739 |             "connectivity value of 1)",
1740 |         ),
1741 |     ],
1742 |     "scrublet": [
1743 |         *COMMON_OPTIONS["input"],
1744 |         *COMMON_OPTIONS["output"],
1745 |         click.option(
1746 |             "--batch-key",
1747 |             "batch_key",
1748 |             type=click.STRING,
1749 |             help="The name of the column in adata.obs that differentiates among "
1750 |             "experiments/batches. Doublets will be detected in each batch separately.",
1751 |         ),
1752 |         click.option(
1753 |             "--input-obj-sim",
1754 |             "adata_sim",
1755 |             type=click.Path(exists=True, dir_okay=False),
1756 |             default=None,
1757 |             help="(Advanced use case) Optional annData object generated by "
1758 |             "sc.external.pp.scrublet_simulate_doublets(), with same number of  "
1759 |             "vars as adata. This should have been built from input_obj after "
1760 |             "filtering genes and cells and selcting highly-variable genes.",
1761 |         ),
1762 |         click.option(
1763 |             "--threshold",
1764 |             type=click.FLOAT,
1765 |             default=None,
1766 |             show_default=True,
1767 |             help="Doublet score threshold for calling a transcriptome a "
1768 |             "doublet. If not set, this is set automatically by looking for the "
1769 |             "minimum between the two modes of the doublet_scores_sim_ histogram. "
1770 |             "It is best practice to check the threshold visually using the "
1771 |             "doublet_scores_sim_ histogram and/or based on co-localization of "
1772 |             "predicted doublets in a 2-D embedding.",
1773 |         ),
1774 |         *COMMON_OPTIONS["scrublet"],
1775 |         click.option(
1776 |             "--expected-doublet-rate",
1777 |             type=click.FLOAT,
1778 |             default=0.05,
1779 |             show_default=True,
1780 |             help="Where input_obj_sim not suplied, the estimated doublet rate "
1781 |             "for the experiment.",
1782 |         ),
1783 |         click.option(
1784 |             "--stdev-doublet-rate",
1785 |             type=click.FLOAT,
1786 |             default=0.02,
1787 |             show_default=True,
1788 |             help="Where input_obje_sim not suplied, uncertainty in the expected "
1789 |             "doublet rate.",
1790 |         ),
1791 |         click.option(
1792 |             "--knn-dist-metric",
1793 |             "-t",
1794 |             type=click.Choice(
1795 |                 [
1796 |                     "cityblock",
1797 |                     "cosine",
1798 |                     "euclidean",
1799 |                     "l1",
1800 |                     "l2",
1801 |                     "manhattan",
1802 |                     "braycurtis",
1803 |                     "canberra",
1804 |                     "chebyshev",
1805 |                     "correlation",
1806 |                     "dice",
1807 |                     "hamming",
1808 |                     "jaccard",
1809 |                     "kulsinski",
1810 |                     "mahalanobis",
1811 |                     "minkowski",
1812 |                     "rogerstanimoto",
1813 |                     "russellrao",
1814 |                     "seuclidean",
1815 |                     "sokalmichener",
1816 |                     "sokalsneath",
1817 |                     "sqeuclidean",
1818 |                     "yule",
1819 |                 ]
1820 |             ),
1821 |             default="euclidean",
1822 |             show_default=True,
1823 |             help="A known metric’s name.",
1824 |         ),
1825 |         click.option(
1826 |             "--no-normalize-variance",
1827 |             "normalize_variance",
1828 |             is_flag=True,
1829 |             default=True,
1830 |             help="Default is to normalize the data such that each gene has a "
1831 |             "variance of 1. sklearn.decomposition.TruncatedSVD will be used for "
1832 |             "dimensionality reduction, if --no-mean-center is set. Use this flag "
1833 |             "to disable that behaviour.",
1834 |         ),
1835 |         click.option(
1836 |             "--log-transform",
1837 |             is_flag=True,
1838 |             default=False,
1839 |             show_default=True,
1840 |             help="Whether to use :func:~scanpy.pp.log1p to log-transform the "
1841 |             "data prior to PCA.",
1842 |         ),
1843 |         click.option(
1844 |             "--no-mean-center",
1845 |             "mean_center",
1846 |             is_flag=True,
1847 |             default=True,
1848 |             help="If True, center the data such that each gene has a mean of 0. "
1849 |             "sklearn.decomposition.PCA will be used for dimensionality "
1850 |             "reduction.",
1851 |         ),
1852 |         click.option(
1853 |             "--n-pcs",
1854 |             "n_prin_comps",
1855 |             type=click.INT,
1856 |             default=30,
1857 |             show_default=True,
1858 |             help="Number of principal components used to embed the "
1859 |             "transcriptomes prior to k-nearest-neighbor graph construction.",
1860 |         ),
1861 |         click.option(
1862 |             "--no-approx",
1863 |             "use_approx_neighbors",
1864 |             is_flag=True,
1865 |             default=True,
1866 |             help="Default behaviour is to use the approximate nearest neighbor "
1867 |             "method (annoy) for the KNN classifier. Use this flag to disable "
1868 |             "that behaviour.",
1869 |         ),
1870 |         click.option(
1871 |             "--get-doublet-neighbor-parents",
1872 |             is_flag=True,
1873 |             default=False,
1874 |             show_default=True,
1875 |             help="If set, return (in .uns) the parent transcriptomes that "
1876 |             "generated the doublet neighbors of each observed transcriptome. "
1877 |             "This information can be used to infer the cell states that "
1878 |             "generated a given doublet state.",
1879 |         ),
1880 |         click.option(
1881 |             "--n-neighbors",
1882 |             "-k",
1883 |             type=CommaSeparatedText(click.INT, simplify=True),
1884 |             default=None,
1885 |             show_default=True,
1886 |             help="Number of neighbors used to construct the KNN graph of "
1887 |             "observed transcriptomes and simulated doublets. If not set, this is "
1888 |             "automatically set to np.round(0.5 * np.sqrt(n_obs)).",
1889 |         ),
1890 |         click.option(
1891 |             "--filter",
1892 |             "filter",
1893 |             is_flag=True,
1894 |             default=False,
1895 |             help="By default, the output object is annotated but not filtered "
1896 |             "according to the scrublet status. Setting this flag will cause "
1897 |             "predicted multiplet elements to be removed.",
1898 |         ),
1899 |         click.option(
1900 |             "--no-verbose",
1901 |             "verbose",
1902 |             is_flag=True,
1903 |             default=True,
1904 |             help="Default behaviour is to print progress updates. Use this flag "
1905 |             "to disable that.",
1906 |         ),
1907 |         click.option(
1908 |             "--export-table",
1909 |             type=click.Path(dir_okay=False, writable=True),
1910 |             default=None,
1911 |             show_default=True,
1912 |             help="Export a table of double scores and calls to the specified file.",
1913 |         ),
1914 |         COMMON_OPTIONS["random_state"],
1915 |     ],
1916 |     "plot_scrublet": [
1917 |         *COMMON_OPTIONS["input"],
1918 |         *COMMON_OPTIONS["plot"],
1919 |         click.option(
1920 |             "--scale-hist-obs",
1921 |             "-b",
1922 |             type=click.Choice(["linear", "log", "symlog", "logit"]),
1923 |             default="log",
1924 |             show_default=True,
1925 |             help="Set y axis scale transformation in matplotlib for the plot of observed transcriptomes.",
1926 |         ),
1927 |         click.option(
1928 |             "--scale-hist-sim",
1929 |             "-s",
1930 |             type=click.Choice(["linear", "log", "symlog", "logit"]),
1931 |             default="linear",
1932 |             show_default=True,
1933 |             help="Set y axis scale transformation in matplotlib for the plot of observed transcriptomes.",
1934 |         ),
1935 |     ],
1936 |     "scrublet_simulate_doublets": [
1937 |         *COMMON_OPTIONS["input"],
1938 |         *COMMON_OPTIONS["output"],
1939 |         *COMMON_OPTIONS["scrublet"],
1940 |         click.option(
1941 |             "--layer",
1942 |             "-l",
1943 |             type=click.STRING,
1944 |             default=None,
1945 |             help="Layer of adata where raw values are stored, or ‘X’ if values "
1946 |             "are in .X.",
1947 |         ),
1948 |     ],
1949 |     "embed": [
1950 |         *COMMON_OPTIONS["input"],
1951 |         *COMMON_OPTIONS["plot"],
1952 |         *COMMON_OPTIONS["frame_title"],
1953 |         COMMON_OPTIONS["layer"],
1954 |         click.option(
1955 |             "--basis",
1956 |             type=click.STRING,
1957 |             default="umap",
1958 |             show_default=True,
1959 |             help="Name of the embedding to plot, must be a key of `.obsm` without "
1960 |             'the prefix "X_".',
1961 |         ),
1962 |         click.option(
1963 |             "--color",
1964 |             type=CommaSeparatedText(simplify=True),
1965 |             default=None,
1966 |             show_default=True,
1967 |             help="Keys for annotations of observations/cells or variables/genes.",
1968 |         ),
1969 |         click.option(
1970 |             "--legend-loc",
1971 |             type=click.Choice(["right margin", "on data"]),
1972 |             default="right margin",
1973 |             show_default=True,
1974 |             help='Location of legend, either "on data", "right margin" or valid '
1975 |             "keywords for `matplotlib.legend`.",
1976 |         ),
1977 |         click.option(
1978 |             "--legend-fontsize",
1979 |             type=click.INT,
1980 |             default=15,
1981 |             show_default=True,
1982 |             help="Legend font size.",
1983 |         ),
1984 |         click.option(
1985 |             "--size",
1986 |             type=click.FLOAT,
1987 |             default=None,
1988 |             show_default=True,
1989 |             help="Point size. Automatically computed if not specified.",
1990 |         ),
1991 |         COMMON_OPTIONS["gene_symbols"],
1992 |         click.option(
1993 |             "--edges",
1994 |             is_flag=True,
1995 |             default=False,
1996 |             show_default=True,
1997 |             help="Show edges.",
1998 |         ),
1999 |         click.option(
2000 |             "--edges-width",
2001 |             type=click.FLOAT,
2002 |             default=0.1,
2003 |             show_default=True,
2004 |             help="Width of edges.",
2005 |         ),
2006 |         click.option(
2007 |             "--edges-color",
2008 |             type=click.STRING,
2009 |             default=None,
2010 |             show_default=True,
2011 |             help="Color of edges. See draw_networkx_edges().",
2012 |         ),
2013 |         COMMON_OPTIONS["knn_graph"][0],  # --neighbors-key
2014 |         click.option(
2015 |             "--no-sort-order",
2016 |             "sort_order",
2017 |             is_flag=True,
2018 |             default=True,
2019 |             show_default=True,
2020 |             help="Disable default behaviour: for continuous annotations used as "
2021 |             "color parameter, plot data points with higher values on top of others.",
2022 |         ),
2023 |         *COMMON_OPTIONS["plot_embed"],
2024 |         click.option(
2025 |             "--components",
2026 |             type=click.STRING,
2027 |             default=None,
2028 |             show_default=True,
2029 |             help="For instance, ['1,2', '2,3']. To plot all available components use 'all'.",
2030 |         ),
2031 |         click.option(
2032 |             "--projection",
2033 |             type=click.Choice(["2d", "3d"]),
2034 |             default="2d",
2035 |             show_default=True,
2036 |             help="Projection of plot.",
2037 |         ),
2038 |     ],
2039 |     "plot_paga": [
2040 |         *COMMON_OPTIONS["input"],
2041 |         *COMMON_OPTIONS["plot"],
2042 |         *COMMON_OPTIONS["frame_title"],
2043 |         *COMMON_OPTIONS["plot_embed"],
2044 |         COMMON_OPTIONS["random_state"],
2045 |         click.option(
2046 |             "--use-key",
2047 |             type=click.STRING,
2048 |             default="paga",
2049 |             show_default=True,
2050 |             help="The key in `.uns` that contains trajectory information.",
2051 |         ),
2052 |         click.option(
2053 |             "--layout",
2054 |             type=click.Choice(["fa", "fr", "grid_fr", "kk", "lgl", "drl", "rt"]),
2055 |             default="fr",
2056 |             show_default=True,
2057 |             help="Plotting layout that computes positions.",
2058 |         ),
2059 |         click.option(
2060 |             "--init-pos",
2061 |             type=click.STRING,
2062 |             default=None,
2063 |             show_default=True,
2064 |             help="Plotting layout that computes positions.",
2065 |         ),
2066 |         click.option(
2067 |             "--threshold",
2068 |             type=click.FLOAT,
2069 |             default=0.01,
2070 |             show_default=True,
2071 |             help="Do not draw edges for weights below this threshold. Set to 0 to "
2072 |             "include all edges.",
2073 |         ),
2074 |         COMMON_OPTIONS["root"],
2075 |         click.option(
2076 |             "--root",
2077 |             type=click.INT,
2078 |             default=0,
2079 |             show_default=True,
2080 |             help="If choosing a tree layout, this is the index of the root node.",
2081 |         ),
2082 |         click.option(
2083 |             "--transitions",
2084 |             type=click.STRING,
2085 |             default=None,
2086 |             show_default=True,
2087 |             help='Key for `.uns["paga"]` that specifies the matrix, e.g. '
2088 |             "`transition_confidence`, that stores the arrows.",
2089 |         ),
2090 |         click.option(
2091 |             "--single-component",
2092 |             is_flag=True,
2093 |             default=False,
2094 |             show_default=True,
2095 |             help="Restrict to largest connected component",
2096 |         ),
2097 |         click.option(
2098 |             "--solid-edges",
2099 |             type=click.Choice(["connectivities", "connectivities_tree"]),
2100 |             default="connectivities",
2101 |             show_default=True,
2102 |             help='Key for `.uns["paga"]` that specifies the matrix that stores the '
2103 |             "edges to be drawn solid black.",
2104 |         ),
2105 |         click.option(
2106 |             "--basis",
2107 |             type=click.STRING,
2108 |             default=None,
2109 |             show_default=True,
2110 |             help="Name of the embedding to plot, must be a key of `.obsm` without "
2111 |             'the prefix "X_".',
2112 |         ),
2113 |         click.option(
2114 |             "--color",
2115 |             type=CommaSeparatedText(simplify=True),
2116 |             default=None,
2117 |             show_default=True,
2118 |             help="Key(s) for annotation of observations/cells or variables/genes. Comma-separated if more than one",
2119 |         ),
2120 |         click.option(
2121 |             "--legend-loc",
2122 |             type=click.Choice(["right margin", "on data"]),
2123 |             default="right margin",
2124 |             show_default=True,
2125 |             help='Location of legend, either "on data", "right margin" or valid '
2126 |             "keywords for `matplotlib.legend`.",
2127 |         ),
2128 |         click.option(
2129 |             "--size",
2130 |             type=click.FLOAT,
2131 |             default=None,
2132 |             show_default=True,
2133 |             help="Point size. Automatically computed if not specified.",
2134 |         ),
2135 |         click.option(
2136 |             "--node-size-scale",
2137 |             type=click.FLOAT,
2138 |             default=1.0,
2139 |             show_default=True,
2140 |             help="Increase of decrease the size of the nodes.",
2141 |         ),
2142 |         click.option(
2143 |             "--fontsize",
2144 |             type=click.INT,
2145 |             default=None,
2146 |             show_default=True,
2147 |             help="Font size for node labels.",
2148 |         ),
2149 |         click.option(
2150 |             "--edge-width-scale",
2151 |             type=click.FLOAT,
2152 |             default=1.0,
2153 |             show_default=True,
2154 |             help="Increase of decrease the width of the edges.",
2155 |         ),
2156 |         click.option(
2157 |             "--arrowsize",
2158 |             type=click.INT,
2159 |             default=30,
2160 |             show_default=True,
2161 |             help="For directed graphs, specify the length and width of the arrowhead.",
2162 |         ),
2163 |         *COMMON_OPTIONS["opt_output"],
2164 |     ],
2165 |     "sviol": [
2166 |         *COMMON_OPTIONS["input"],
2167 |         *COMMON_OPTIONS["plot"],
2168 |         COMMON_OPTIONS["use_raw"],
2169 |         COMMON_OPTIONS["var_names"],
2170 |         *COMMON_OPTIONS["rank_genes_groups_plots"],
2171 |         COMMON_OPTIONS["layer"],
2172 |         *COMMON_OPTIONS["diffexp_plot"],
2173 |         COMMON_OPTIONS["gene_symbols"],
2174 |         *COMMON_OPTIONS["sviol"],
2175 |         COMMON_OPTIONS["swap_axes"],
2176 |     ],
2177 |     "dot": [
2178 |         *COMMON_OPTIONS["input"],
2179 |         *COMMON_OPTIONS["plot"],
2180 |         COMMON_OPTIONS["use_raw"],
2181 |         COMMON_OPTIONS["var_names"],
2182 |         *COMMON_OPTIONS["rank_genes_groups_plots"],
2183 |         COMMON_OPTIONS["layer"],
2184 |         *COMMON_OPTIONS["diffexp_plot"],
2185 |         COMMON_OPTIONS["gene_symbols"],
2186 |         *COMMON_OPTIONS["dot"],
2187 |     ],
2188 |     "matrix": [
2189 |         *COMMON_OPTIONS["input"],
2190 |         *COMMON_OPTIONS["plot"],
2191 |         COMMON_OPTIONS["use_raw"],
2192 |         COMMON_OPTIONS["var_names"],
2193 |         *COMMON_OPTIONS["rank_genes_groups_plots"],
2194 |         COMMON_OPTIONS["layer"],
2195 |         *COMMON_OPTIONS["diffexp_plot"],
2196 |         COMMON_OPTIONS["gene_symbols"],
2197 |     ],
2198 |     "heat": [
2199 |         *COMMON_OPTIONS["input"],
2200 |         *COMMON_OPTIONS["plot"],
2201 |         COMMON_OPTIONS["use_raw"],
2202 |         COMMON_OPTIONS["var_names"],
2203 |         *COMMON_OPTIONS["rank_genes_groups_plots"],
2204 |         COMMON_OPTIONS["layer"],
2205 |         *COMMON_OPTIONS["diffexp_plot"],
2206 |         COMMON_OPTIONS["gene_symbols"],
2207 |         *COMMON_OPTIONS["heat"],
2208 |         COMMON_OPTIONS["swap_axes"],
2209 |     ],
2210 | }
2211 | 


--------------------------------------------------------------------------------
/scanpy_scripts/cmd_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Provide helper functions for constructing sub-commands
  3 | """
  4 | 
  5 | import click
  6 | import pandas as pd
  7 | import scanpy as sc
  8 | import scanpy.external as sce
  9 | 
 10 | from .cmd_options import CMD_OPTIONS
 11 | from .lib._paga import plot_paga
 12 | from .lib._scrublet import plot_scrublet
 13 | from .obj_utils import _save_matrix
 14 | 
 15 | 
 16 | def make_subcmd(cmd_name, func, cmd_desc, arg_desc, opt_set=None):
 17 |     """
 18 |     Factory function that returns a sub-command function
 19 |     """
 20 |     opt_set = opt_set if opt_set else cmd_name
 21 |     options = CMD_OPTIONS[opt_set]
 22 |     option_spec = [click.command(cmd_name)]
 23 |     option_spec.extend(options)
 24 | 
 25 |     def add_docstring(cmd_desc, arg_desc):
 26 |         def docstring_dec(obj):
 27 |             obj.__doc__ = obj.__doc__.format(cmd_desc=cmd_desc, arg_desc=arg_desc)
 28 |             return obj
 29 | 
 30 |         return docstring_dec
 31 | 
 32 |     @add_options(option_spec)
 33 |     @add_docstring(cmd_desc, arg_desc)
 34 |     def cmd(
 35 |         input_obj=None,
 36 |         output_obj=None,
 37 |         input_format=None,
 38 |         output_format=None,
 39 |         zarr_chunk_size=None,
 40 |         loom_write_obsm_varm=False,
 41 |         export_mtx=None,
 42 |         mtx_compression=None,
 43 |         show_obj=None,
 44 |         **kwargs,
 45 |     ):
 46 |         """{cmd_desc}\n\n\b\n{arg_desc}"""
 47 |         if input_obj:
 48 |             adata = _read_obj(input_obj, input_format=input_format)
 49 |             func(adata, **kwargs)
 50 |         else:
 51 |             adata = func(**kwargs)
 52 | 
 53 |         if output_obj:
 54 |             _write_obj(
 55 |                 adata,
 56 |                 output_obj,
 57 |                 output_format=output_format,
 58 |                 chunk_size=zarr_chunk_size,
 59 |                 write_obsm_varm=loom_write_obsm_varm,
 60 |                 export_mtx=export_mtx,
 61 |                 mtx_compression=mtx_compression,
 62 |                 show_obj=show_obj,
 63 |             )
 64 |         return 0
 65 | 
 66 |     return cmd
 67 | 
 68 | 
 69 | def add_options(options):
 70 |     """
 71 |     Returns a decorator to group multiple click decorators
 72 |     """
 73 | 
 74 |     def _add_options(func):
 75 |         for option in reversed(options):
 76 |             func = option(func)
 77 |         return func
 78 | 
 79 |     return _add_options
 80 | 
 81 | 
 82 | def _fix_booleans(df):
 83 |     for var in df.columns:
 84 |         if (
 85 |             df[var].dtype.kind == "O"
 86 |             and df[var].dtype.name == "object"
 87 |             and set(pd.Categorical(df[var])).issubset(set(["True", "False", "nan"]))
 88 |         ):
 89 |             d = {"False": True, "False": False, "nan": False}
 90 |             df[var] = df[var].map(d).astype(bool)
 91 |     return df
 92 | 
 93 | 
 94 | def _read_obj(input_obj, input_format="anndata", **kwargs):
 95 |     if input_format == "anndata":
 96 |         adata = sc.read_h5ad(input_obj, **kwargs)
 97 |     elif input_format == "loom":
 98 |         adata = sc.read_loom(input_obj, **kwargs)
 99 |     else:
100 |         raise NotImplementedError("Unsupported input format: {}".format(input_format))
101 |     adata.var = _fix_booleans(adata.var)
102 |     adata.obs = _fix_booleans(adata.obs)
103 | 
104 |     return adata
105 | 
106 | 
107 | def _write_obj(
108 |     adata,
109 |     output_obj,
110 |     output_format="anndata",
111 |     chunk_size=None,
112 |     export_mtx=None,
113 |     mtx_compression=None,
114 |     show_obj=None,
115 |     write_obsm_varm=False,
116 |     **kwargs,
117 | ):
118 |     if output_format == "anndata":
119 |         adata.write(output_obj, compression="gzip")
120 |     elif output_format == "loom":
121 |         adata.write_loom(output_obj, write_obsm_varm=write_obsm_varm)
122 |     elif output_format == "zarr":
123 |         adata.write_zarr(output_obj, chunk_size=chunk_size, **kwargs)
124 |     else:
125 |         raise NotImplementedError("Unsupported output format: {}".format(output_format))
126 |     if export_mtx:
127 |         compression = None
128 |         if mtx_compression is not None:
129 |             compression = {"method": mtx_compression}
130 | 
131 |         write_mtx(adata, fname_prefix=export_mtx, compression=compression, **kwargs)
132 |     if show_obj:
133 |         click.echo(adata, err=show_obj == "stderr")
134 |     return 0
135 | 
136 | 
137 | def write_mtx(
138 |     adata,
139 |     fname_prefix="",
140 |     var=None,
141 |     obs=None,
142 |     use_raw=False,
143 |     use_layer=None,
144 |     compression=None,
145 | ):
146 |     """Export AnnData object to mtx formt
147 |     * Parameters
148 |         + adata : AnnData
149 |         An AnnData object
150 |         + fname_prefix : str
151 |         Prefix of the exported files. If not empty and not ending with '/' or '_',
152 |         a '_' will be appended. Full names will be <fname_prefix>matrix.mtx,
153 |         <fname_prefix>genes.tsv, <fname_prefix>barcodes.tsv
154 |         + var : list
155 |         A list of column names to be exported to gene table
156 |         + obs : list
157 |         A list of column names to be exported to barcode/cell table
158 |         + use_raw : bool
159 |         Take data the matrix from .raw.X?
160 |         + use_layer: str
161 |         Specify a layer to use instead of .X (non-raw only)
162 |         + compression: None, str or dict
163 |         Compression parameter for Pandas' to_csv(). For compression, a dict
164 |         with a 'method' key, e.g. {'method': 'gzip', 'compresslevel': 1,
165 |         'mtime': 1}
166 | 
167 |     >>> import os
168 |     >>> from pathlib import Path
169 |     >>> adata = sc.datasets.pbmc3k()
170 |     >>> # Test uncompressed write
171 |     >>> Path("uncompressed").mkdir(parents=True, exist_ok=True)
172 |     >>> write_mtx(adata, fname_prefix = 'uncompressed/', use_raw = False, use_layer = None, var = ['gene_name'])
173 |     >>> sorted(os.listdir('uncompressed'))
174 |     ['barcodes.tsv', 'genes.tsv', 'matrix.mtx']
175 |     >>> # Test that the matrix is the same when we read it back
176 |     >>> test_readable = sc.read_10x_mtx('uncompressed')
177 |     >>> if any(test_readable.obs_names != adata.obs_names) or any(test_readable.var_names != adata.var_names) or (test_readable.X[1].sum() - adata.X[1].sum()) > 1e-5:
178 |     ...   print("Re-read matrix is different to the one we stored, something is wrong with the writing")
179 |     >>> # Test compressed write
180 |     >>> Path("compressed").mkdir(parents=True, exist_ok=True)
181 |     >>> write_mtx(adata, fname_prefix = 'compressed/', use_raw = False, use_layer = None, var = ['gene_name'], compression = {'method': 'gzip'})
182 |     >>> sorted(os.listdir('compressed'))
183 |     ['barcodes.tsv.gz', 'genes.tsv.gz', 'matrix.mtx.gz']
184 |     """
185 |     if fname_prefix and not (fname_prefix.endswith("/") or fname_prefix.endswith("_")):
186 |         fname_prefix = fname_prefix + "_"
187 |     if var is None:
188 |         var = []
189 |     if obs is None:
190 |         obs = []
191 | 
192 |     import scipy.sparse as sp
193 | 
194 |     if use_raw:
195 |         var_source = adata.raw.var
196 |         mat = sp.coo_matrix(adata.raw.X)
197 |     else:
198 |         var_source = adata.var
199 |         if use_layer is not None:
200 |             mat = sp.coo_matrix(adata.layers[use_layer])
201 |         else:
202 |             mat = sp.coo_matrix(adata.X)
203 | 
204 |     obs = list(set(obs) & set(adata.obs.columns))
205 |     var = list(set(var) & set(var_source.columns))
206 | 
207 |     n_obs, n_var = mat.shape
208 |     n_entry = len(mat.data)
209 | 
210 |     # Define the header lines as a Pandas DataFrame so we can use the same compression
211 |     header = pd.DataFrame(
212 |         ["%%MatrixMarket matrix coordinate real general", f"{n_var} {n_obs} {n_entry}"]
213 |     )
214 |     df = pd.DataFrame({"col": mat.col + 1, "row": mat.row + 1, "data": mat.data})
215 | 
216 |     # Define outputs
217 |     mtx_fname = fname_prefix + "matrix.mtx"
218 |     gene_fname = fname_prefix + "genes.tsv"
219 |     barcode_fname = fname_prefix + "barcodes.tsv"
220 | 
221 |     # Write matrix with Pandas CSV and use its compression where requested
222 |     if (
223 |         compression is not None
224 |         and type(compression) is dict
225 |         and "method" in compression
226 |     ):
227 |         compressed_exts = {"zip": "zip", "gzip": "gz", "bz2": "bz2", "zstd": "zst"}
228 |         ext = compressed_exts.get(compression["method"], "None")
229 | 
230 |         if ext is None:
231 |             errmsg = "Invalid compression method"
232 |             raise Exception(errmsg)
233 | 
234 |         mtx_fname += f".{ext}"
235 |         gene_fname += f".{ext}"
236 |         barcode_fname += f".{ext}"
237 |     else:
238 |         compression = None
239 | 
240 |     header.to_csv(mtx_fname, header=False, index=False, compression=compression)
241 |     df.to_csv(
242 |         mtx_fname, sep=" ", header=False, index=False, compression=compression, mode="a"
243 |     )
244 | 
245 |     # Now write the obs and var, also with compression if appropriate
246 |     obs_df = adata.obs[obs].reset_index(level=0)
247 |     obs_df.to_csv(
248 |         barcode_fname, sep="\t", header=False, index=False, compression=compression
249 |     )
250 |     var_df = var_source[var].reset_index(level=0)
251 |     if not var:
252 |         var_df["gene"] = var_df["index"]
253 |     var_df.to_csv(
254 |         gene_fname, sep="\t", header=False, index=False, compression=compression
255 |     )
256 | 
257 | 
258 | def make_plot_function(func_name, kind=None):
259 |     """Make plot function that handles common plotting parameters"""
260 | 
261 |     # Provide a function translation
262 | 
263 |     plot_funcs = {
264 |         "embedding": sc.pl.embedding,
265 |         "scatter": sc.pl.scatter,
266 |         "sviol": sc.pl.stacked_violin,
267 |         "rgg_sviol": sc.pl.rank_genes_groups_stacked_violin,
268 |         "dot": sc.pl.dotplot,
269 |         "rgg_dot": sc.pl.rank_genes_groups_dotplot,
270 |         "matrix": sc.pl.matrixplot,
271 |         "rgg_matrix": sc.pl.rank_genes_groups_matrixplot,
272 |         "heat": sc.pl.heatmap,
273 |         "rgg_heat": sc.pl.rank_genes_groups_heatmap,
274 |     }
275 | 
276 |     def plot_function(
277 |         adata,
278 |         output_fig=None,
279 |         fig_size=None,
280 |         fig_dpi=300,
281 |         fig_fontsize=15,
282 |         **kwargs,
283 |     ):
284 |         sc.settings.set_figure_params(dpi=fig_dpi, fontsize=fig_fontsize)
285 |         if fig_size:
286 |             from matplotlib import rcParams
287 | 
288 |             rcParams.update({"figure.figsize": fig_size})
289 | 
290 |         # Choose the function to run
291 | 
292 |         is_rgg = False
293 | 
294 |         if func_name in plot_funcs:
295 |             if "rgg" in kwargs:
296 |                 if kwargs["rgg"] == True:
297 |                     is_rgg = True
298 |                     func = plot_funcs["rgg_" + func_name]
299 |                     kwargs.pop("var_names", None)
300 |                 else:
301 |                     func = plot_funcs[func_name]
302 |                     kwargs.pop("groups", None)
303 |                     kwargs.pop("n_genes", None)
304 | 
305 |                 kwargs.pop("rgg")
306 |             else:
307 |                 func = plot_funcs[func_name]
308 |         else:
309 |             func = globals()[func_name]
310 | 
311 |         # Generate the output file name
312 | 
313 |         figname = False
314 |         showfig = True
315 |         if output_fig:
316 |             import os
317 | 
318 |             import matplotlib.pyplot as plt
319 | 
320 |             sc.settings.figdir = os.path.dirname(output_fig) or "."
321 | 
322 |             figname = os.path.basename(output_fig)
323 |             showfig = False
324 | 
325 |         # Run the selected function
326 | 
327 |         func(adata, save=figname, show=showfig, **kwargs)
328 | 
329 |         # Rename output to the spefied file name. We need to work out what
330 |         # prefix the function will have used for its output files.
331 | 
332 |         if output_fig:
333 |             prefix = ""
334 |             if func_name == "scatter" or func_name == "embedding":
335 |                 prefix = kwargs.get("basis", func.__name__)
336 |             elif kind:
337 |                 prefix = kind
338 |             elif func_name in plot_funcs:
339 |                 prefix = plot_funcs[func_name].__name__.split(".")[-1]
340 |                 if func_name in [
341 |                     "sviol",
342 |                     "rgg_sviol",
343 |                     "dot",
344 |                     "rgg_dot",
345 |                     "matrix",
346 |                     "rgg_matrix",
347 |                 ]:
348 |                     prefix = prefix + "_"
349 | 
350 |             os.rename(os.path.join(sc.settings.figdir, prefix + figname), output_fig)
351 |             plt.close()
352 | 
353 |     return plot_function
354 | 
355 | 
356 | # Wrap matrix-processing functions in logic to back up .X or specified input
357 | # layers prior to processing
358 | 
359 | 
360 | def make_matrix_function(func):
361 |     def matrix_function(
362 |         adata,
363 |         save_raw=True,
364 |         save_layer=None,
365 |         **kwargs,
366 |     ):
367 | 
368 |         # For the subset of matrix functions that allow layer specification,
369 |         # pass that as the thing to save.
370 | 
371 |         layer = None
372 |         if "layer" in kwargs:
373 |             layer = kwargs["layer"]
374 | 
375 |         _save_matrix(adata, save_raw, save_layer=save_layer, layer=layer)
376 |         func(adata, **kwargs)
377 |         return adata
378 | 
379 |     return matrix_function
380 | 


--------------------------------------------------------------------------------
/scanpy_scripts/cmds.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Provides sub-commands
  3 | """
  4 | 
  5 | import os
  6 | import sys
  7 | import scanpy as sc
  8 | import scanpy.external as sce
  9 | 
 10 | from .cmd_utils import (
 11 |     make_subcmd,
 12 |     make_plot_function,
 13 |     make_matrix_function,
 14 | )
 15 | from .lib._read import read_10x
 16 | from .lib._filter import filter_anndata
 17 | from .lib._norm import normalize
 18 | from .lib._hvg import hvg
 19 | from .lib._pca import pca
 20 | from .lib._neighbors import neighbors
 21 | from .lib._umap import umap
 22 | from .lib._tsne import tsne
 23 | from .lib._fdg import fdg
 24 | from .lib._louvain import louvain
 25 | from .lib._leiden import leiden
 26 | from .lib._diffexp import diffexp
 27 | from .lib._paga import paga
 28 | from .lib._diffmap import diffmap
 29 | from .lib._dpt import dpt
 30 | from .lib._bbknn import bbknn
 31 | from .lib._mnn import mnn_correct
 32 | from .lib._combat import combat
 33 | from .lib._scrublet import scrublet, scrublet_simulate_doublets
 34 | 
 35 | LANG = os.environ.get("LANG", None)
 36 | 
 37 | if LANG is None or not (
 38 |     LANG.endswith("UTF-8")
 39 |     or LANG.endswith("UTF8")
 40 |     or LANG.endswith("utf-8")
 41 |     or LANG.endswith("utf8")
 42 | ):
 43 |     print("This programme requires a UTF-8 locale, please check your $LANG setting.")
 44 |     sys.exit(0)
 45 | 
 46 | 
 47 | _I_DESC = "<input_obj>:   input file in format specfied by --input-format"
 48 | _O_DESC = "<output_obj>:  output file in format specfied by --output-format"
 49 | _P_DESC = "<output_fig>:  output figure in pdf or png format"
 50 | _IO_DESC = "\n".join([_I_DESC, _O_DESC])
 51 | _IP_DESC = "\n".join([_I_DESC, _P_DESC])
 52 | 
 53 | 
 54 | READ_CMD = make_subcmd(
 55 |     "read",
 56 |     read_10x,
 57 |     cmd_desc="Read 10x data and save in specified format.",
 58 |     arg_desc=_O_DESC,
 59 | )
 60 | 
 61 | 
 62 | FILTER_CMD = make_subcmd(
 63 |     "filter",
 64 |     make_matrix_function(filter_anndata),
 65 |     cmd_desc="Filter data based on specified conditions.",
 66 |     arg_desc=_IO_DESC,
 67 | )
 68 | 
 69 | 
 70 | NORM_CMD = make_subcmd(
 71 |     "norm",
 72 |     make_matrix_function(normalize),
 73 |     cmd_desc="Normalise data per cell.",
 74 |     arg_desc=_IO_DESC,
 75 | )
 76 | 
 77 | 
 78 | HVG_CMD = make_subcmd(
 79 |     "hvg",
 80 |     hvg,
 81 |     cmd_desc="Find highly variable genes.",
 82 |     arg_desc=_IO_DESC,
 83 | )
 84 | 
 85 | 
 86 | SCALE_CMD = make_subcmd(
 87 |     "scale",
 88 |     make_matrix_function(sc.pp.scale),
 89 |     cmd_desc="Scale data per gene.",
 90 |     arg_desc=_IO_DESC,
 91 | )
 92 | 
 93 | 
 94 | REGRESS_CMD = make_subcmd(
 95 |     "regress",
 96 |     make_matrix_function(sc.pp.regress_out),
 97 |     cmd_desc="Regress-out observation variables.",
 98 |     arg_desc=_IO_DESC,
 99 | )
100 | 
101 | 
102 | PCA_CMD = make_subcmd(
103 |     "pca",
104 |     pca,
105 |     cmd_desc="Dimensionality reduction by PCA.",
106 |     arg_desc=_IO_DESC,
107 | )
108 | 
109 | NEIGHBOR_CMD = make_subcmd(
110 |     "neighbor",
111 |     neighbors,
112 |     cmd_desc="Compute a neighbourhood graph of observations.",
113 |     arg_desc=_IO_DESC,
114 | )
115 | 
116 | UMAP_CMD = make_subcmd(
117 |     "umap",
118 |     umap,
119 |     cmd_desc="Embed the neighborhood graph using UMAP.",
120 |     arg_desc=_IO_DESC,
121 | )
122 | 
123 | TSNE_CMD = make_subcmd(
124 |     "tsne",
125 |     tsne,
126 |     cmd_desc="Embed the cells using t-SNE.",
127 |     arg_desc=_IO_DESC,
128 | )
129 | 
130 | FDG_CMD = make_subcmd(
131 |     "fdg",
132 |     fdg,
133 |     cmd_desc="Embed the neighborhood graph using force-directed graph.",
134 |     arg_desc=_IO_DESC,
135 | )
136 | 
137 | DIFFMAP_CMD = make_subcmd(
138 |     "diffmap",
139 |     diffmap,
140 |     cmd_desc="Embed the neighborhood graph using diffusion map.",
141 |     arg_desc=_IO_DESC,
142 | )
143 | 
144 | LOUVAIN_CMD = make_subcmd(
145 |     "louvain",
146 |     louvain,
147 |     cmd_desc="Find clusters by Louvain algorithm.",
148 |     arg_desc=_IO_DESC,
149 | )
150 | 
151 | LEIDEN_CMD = make_subcmd(
152 |     "leiden",
153 |     leiden,
154 |     cmd_desc="Find clusters by Leiden algorithm.",
155 |     arg_desc=_IO_DESC,
156 | )
157 | 
158 | DIFFEXP_CMD = make_subcmd(
159 |     "diffexp",
160 |     diffexp,
161 |     cmd_desc="Find markers for each clusters.",
162 |     arg_desc=_IO_DESC,
163 | )
164 | 
165 | PAGA_CMD = make_subcmd(
166 |     "paga",
167 |     paga,
168 |     cmd_desc="Trajectory inference by abstract graph analysis.",
169 |     arg_desc=_IO_DESC,
170 | )
171 | 
172 | DPT_CMD = make_subcmd(
173 |     "dpt",
174 |     dpt,
175 |     cmd_desc="Calculate diffusion pseudotime relative to the root cells.",
176 |     arg_desc=_IO_DESC,
177 | )
178 | 
179 | PLOT_EMBED_CMD = make_subcmd(
180 |     "embed",
181 |     make_plot_function("embedding"),
182 |     cmd_desc="Plot cell embeddings.",
183 |     arg_desc=_IP_DESC,
184 | )
185 | 
186 | PLOT_STACKED_VIOLIN_CMD = make_subcmd(
187 |     "sviol",
188 |     make_plot_function("sviol"),
189 |     cmd_desc="Plot stacked violin plots.",
190 |     arg_desc=_IP_DESC,
191 | )
192 | 
193 | PLOT_DOT_CMD = make_subcmd(
194 |     "dot",
195 |     make_plot_function("dot"),
196 |     cmd_desc="Plot a dot plot of expression values.",
197 |     arg_desc=_IP_DESC,
198 | )
199 | 
200 | PLOT_MATRIX_CMD = make_subcmd(
201 |     "matrix",
202 |     make_plot_function("matrix"),
203 |     cmd_desc="Plot a heatmap of the mean expression values per cluster.",
204 |     arg_desc=_IP_DESC,
205 | )
206 | 
207 | PLOT_HEATMAP_CMD = make_subcmd(
208 |     "heat",
209 |     make_plot_function("heat"),
210 |     cmd_desc="Plot a heatmap of the expression values of genes.",
211 |     arg_desc=_IP_DESC,
212 | )
213 | 
214 | PLOT_PAGA_CMD = make_subcmd(
215 |     "paga",
216 |     make_plot_function("plot_paga", kind="paga"),
217 |     cmd_desc="Plot PAGA trajectories.",
218 |     arg_desc=_IP_DESC,
219 |     opt_set="plot_paga",
220 | )
221 | 
222 | COMBAT_CMD = make_subcmd(
223 |     "combat",
224 |     combat,
225 |     cmd_desc="ComBat function for batch effect correction",
226 |     arg_desc=_IO_DESC,
227 | )
228 | 
229 | HARMONY_INTEGRATE_CMD = make_subcmd(
230 |     "harmony",
231 |     sce.pp.harmony_integrate,
232 |     cmd_desc="Use harmonypy [Korunsky19] to integrate different experiments.",
233 |     arg_desc=_IO_DESC,
234 | )
235 | 
236 | BBKNN_CMD = make_subcmd(
237 |     "bbknn",
238 |     bbknn,
239 |     cmd_desc="Batch balanced kNN [Polanski19].",
240 |     arg_desc=_IO_DESC,
241 | )
242 | 
243 | MNN_CORRECT_CMD = make_subcmd(
244 |     "mnn",
245 |     make_matrix_function(mnn_correct),
246 |     cmd_desc="Correct batch effects by matching mutual nearest neighbors [Haghverdi18] [Kang18].",
247 |     arg_desc=_IO_DESC,
248 | )
249 | 
250 | SCRUBLET_MULTIPLET_CMD = make_subcmd(
251 |     "scrublet",
252 |     scrublet,
253 |     cmd_desc="Filter out likely multiplets from droplet data using Scrublet [Wolock2019].",
254 |     arg_desc=_IO_DESC,
255 | )
256 | 
257 | SCRUBLET_MULTIPLET_SIMULATE_CMD = make_subcmd(
258 |     "scrublet_simulate_doublets",
259 |     scrublet_simulate_doublets,
260 |     cmd_desc="Simulate doublets with random transcriptome pairs for Scrublet [Wolock2019].",
261 |     arg_desc=_IO_DESC,
262 | )
263 | 
264 | SCRUBLET_MULTIPLET_PLOT_CMD = make_subcmd(
265 |     "scrublet",
266 |     make_plot_function("plot_scrublet", "scrublet_score_distribution"),
267 |     cmd_desc="Plot histogram of doublet scores for observed transcriptomes and simulated doublets..",
268 |     arg_desc=_IP_DESC,
269 |     opt_set="plot_scrublet",
270 | )
271 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Provides exported functions
 3 | """
 4 | 
 5 | from ._read import read_10x
 6 | from ._filter import filter_anndata
 7 | from ._norm import normalize
 8 | from ._hvg import hvg
 9 | from ._neighbors import neighbors
10 | from ._umap import umap
11 | from ._fdg import fdg
12 | from ._tsne import tsne
13 | from ._louvain import louvain
14 | from ._leiden import leiden
15 | from ._diffexp import diffexp, diffexp_paired, extract_de_table
16 | from ._diffmap import diffmap
17 | from ._dpt import dpt
18 | from ._paga import paga, plot_paga
19 | from ..cmd_utils import _read_obj as read_obj
20 | from ..cmd_utils import _write_obj as write_obj
21 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_bbknn.py:
--------------------------------------------------------------------------------
 1 | """
 2 | scanpy external bbknn
 3 | """
 4 | 
 5 | import scanpy.external as sce
 6 | 
 7 | from ..obj_utils import (
 8 |     _backup_default_key,
 9 |     _delete_backup_key,
10 |     _rename_default_key,
11 | )
12 | 
13 | # Wrapper for bbknn allowing use of non-standard slot
14 | 
15 | 
16 | def bbknn(adata, key=None, key_added=None, **kwargs):
17 |     """
18 |     Wrapper function for sce.pp.bbknn(), for supporting non-standard neighbors slot
19 |     """
20 | 
21 |     _backup_default_key(adata.uns, "neighbors")
22 |     _backup_default_key(adata.obsp, "distances")
23 |     _backup_default_key(adata.obsp, "connectivities")
24 |     sce.pp.bbknn(adata, batch_key=key, **kwargs)
25 | 
26 |     if key_added:
27 |         _rename_default_key(adata.uns, "neighbors", f"{key_added}")
28 |         _rename_default_key(adata.obsp, "distances", f"{key_added}_distances")
29 |         _rename_default_key(adata.obsp, "connectivities", f"{key_added}_connectivities")
30 |     else:
31 |         _delete_backup_key(adata.uns, "neighbors")
32 |         _delete_backup_key(adata.obsp, "distances")
33 |         _delete_backup_key(adata.obsp, "connectivities")
34 | 
35 |     return adata
36 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_combat.py:
--------------------------------------------------------------------------------
 1 | """
 2 | scanpy combat
 3 | """
 4 | 
 5 | import scanpy as sc
 6 | 
 7 | # Wrapper for mnn allowing use of non-standard slot
 8 | 
 9 | 
10 | def combat(adata, key=None, key_added=None, layer=None, **kwargs):
11 |     """
12 |     Wrapper function for scanpy.pp.combat(), for supporting non-standard slots
13 |     """
14 | 
15 |     # If layer is set then we have to move the contents of that layer into
16 |     # .X for analysis. We back up the original .X, but only if the user hasn't
17 |     # specified to overwrite it anyway.
18 | 
19 |     if layer:
20 |         if key_added and key_added != "X":
21 |             adata.layers["X_backup"] = adata.X
22 | 
23 |         adata.X = adata.layers[layer]
24 | 
25 |     # If we're storing results in .X (whether from .X or from a layer), run in
26 |     # place to save copying objects.
27 | 
28 |     if key_added and key_added == "X":
29 |         sc.pp.combat(adata, key=key, **kwargs)
30 | 
31 |     # If we're storing in 'layers' (key_added is not set, or is not X, then
32 |     # don't run in place, and put the matrix in the specified layer.
33 | 
34 |     else:
35 | 
36 |         cdata = sc.pp.combat(adata, key=key, inplace=False, **kwargs)
37 | 
38 |         combat_key = "combat"
39 |         if layer:
40 |             combat_key = f"{combat_key}_{layer}"
41 | 
42 |             # If we ran from a layer, restore the .X we had to overwrite
43 | 
44 |             adata.X = adata.layers["X_backup"]
45 |             del adata.layers["X_backup"]
46 | 
47 |         if key_added:
48 |             combat_key = f"{combat_key}_{key_added}"
49 | 
50 |         adata.layers[combat_key] = cdata
51 | 
52 |     return adata
53 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_diffexp.py:
--------------------------------------------------------------------------------
  1 | """
  2 | scanpy diffexp
  3 | """
  4 | 
  5 | import logging
  6 | import math
  7 | 
  8 | import pandas as pd
  9 | import scanpy as sc
 10 | 
 11 | 
 12 | def diffexp(
 13 |     adata,
 14 |     use_raw=None,
 15 |     n_genes=None,
 16 |     key_added="rank_genes_groups",
 17 |     layer=None,
 18 |     logreg_param=None,
 19 |     filter_params=None,
 20 |     save=None,
 21 |     groupby=None,
 22 |     groups=None,
 23 |     **kwargs,
 24 | ):
 25 |     """
 26 |     Wrapper function for sc.tl.rank_genes_groups.
 27 | 
 28 |     Test that we can load a single group.
 29 |     >>> import os
 30 |     >>> from pathlib import Path
 31 |     >>> adata = sc.datasets.krumsiek11()
 32 |     >>> tbl = diffexp(adata, groupby='cell_type', groups='Mo', reference='progenitor')
 33 |     >>> # get the size of the data frame
 34 |     >>> tbl.shape
 35 |     (11, 8)
 36 |     """
 37 |     if adata.raw is None:
 38 |         use_raw = False
 39 | 
 40 |     if n_genes is None:
 41 |         n_genes = adata.raw.shape[1] if use_raw else adata.shape[1]
 42 | 
 43 |     if logreg_param and isinstance(logreg_param, dict):
 44 |         for key, val in logreg_param:
 45 |             kwargs[key] = val
 46 | 
 47 |     key_added = key_added if key_added else "rank_genes_groups"
 48 |     diff_key = (key_added + f"_{layer}") if layer else key_added
 49 | 
 50 |     if groups == "all":
 51 | 
 52 |         # Avoid divisions by zeros for singlet groups. See
 53 |         # https://github.com/theislab/scanpy/pull/1490#issuecomment-726031442.
 54 | 
 55 |         groups_to_test = list(
 56 |             adata.obs[groupby].value_counts().loc[lambda x: x > 1].index
 57 |         )
 58 | 
 59 |         if len(groups_to_test) < len(adata.obs[groupby].cat.categories):
 60 |             groups = groups_to_test
 61 |             logging.warning(
 62 |                 "Singlet groups removed before passing to rank_genes_groups()"
 63 |             )
 64 | 
 65 |     # avoid issue when groups is a single group as a string simplified by click
 66 |     # https://github.com/ebi-gene-expression-group/scanpy-scripts/issues/123
 67 |     if groups != "all" and isinstance(groups, str):
 68 |         groups = [groups]
 69 | 
 70 |     sc.tl.rank_genes_groups(
 71 |         adata,
 72 |         use_raw=use_raw,
 73 |         n_genes=n_genes,
 74 |         key_added=diff_key,
 75 |         groupby=groupby,
 76 |         groups=groups,
 77 |         **kwargs,
 78 |     )
 79 | 
 80 |     de_tbl = extract_de_table(adata.uns[diff_key])
 81 | 
 82 |     if isinstance(filter_params, dict):
 83 |         key_filtered = diff_key + "_filtered"
 84 |         sc.tl.filter_rank_genes_groups(
 85 |             adata,
 86 |             key=diff_key,
 87 |             key_added=key_filtered,
 88 |             use_raw=use_raw,
 89 |             **filter_params,
 90 |         )
 91 | 
 92 |         # there are non strings on recarray object at this point, in
 93 |         # adata.uns['rank_genes_groups_filtered']['names']
 94 |         # for instance:
 95 |         # adata.uns['rank_genes_groups_filtered']['names'][0]
 96 |         # (nan, nan, 'NKG7', nan, nan, 'PPBP')
 97 |         # this now upsets h5py > 3.0
 98 |         de_tbl = extract_de_table(adata.uns[key_filtered])
 99 |         de_tbl = de_tbl.loc[de_tbl.genes.astype(str) != "nan", :]
100 | 
101 |         # change nan for strings in adata.uns['rank_genes_groups_filtered']['names']
102 |         # TODO on scanpy updates, check if this is not done within scanpy so that we can remove this
103 |         for row in range(0, len(adata.uns[key_filtered]["names"])):
104 |             for col in range(0, len(adata.uns[key_filtered]["names"][row])):
105 |                 element = adata.uns[key_filtered]["names"][row][col]
106 |                 if isinstance(element, float) and math.isnan(element):
107 |                     adata.uns[key_filtered]["names"][row][col] = "nan"
108 | 
109 |     if save:
110 |         de_tbl.to_csv(save, sep="\t", header=True, index=False)
111 | 
112 |     return de_tbl
113 | 
114 | 
115 | def diffexp_paired(adata, groupby, pair, **kwargs):
116 |     """
117 |     Restrict DE to between a pair of clusters, return both up and down genes
118 |     """
119 |     test, ref = pair
120 |     de_key = f"de.{test}-{ref}"
121 |     up_de = diffexp(
122 |         adata,
123 |         key_added=de_key,
124 |         groupby=groupby,
125 |         groups=[test],
126 |         reference=ref,
127 |         **kwargs,
128 |     )
129 |     ref, test = pair
130 |     de_key = f"de.{test}-{ref}"
131 |     down_de = diffexp(
132 |         adata,
133 |         key_added=de_key,
134 |         groupby=groupby,
135 |         groups=[test],
136 |         reference=ref,
137 |         **kwargs,
138 |     )
139 |     return up_de, down_de
140 | 
141 | 
142 | def extract_de_table(de_dict):
143 |     """
144 |     Extract DE table from adata.uns
145 |     """
146 |     if de_dict["params"]["method"] == "logreg":
147 |         requested_fields = ("scores",)
148 |     else:
149 |         requested_fields = (
150 |             "scores",
151 |             "logfoldchanges",
152 |             "pvals",
153 |             "pvals_adj",
154 |         )
155 |     gene_df = _recarray_to_dataframe(de_dict["names"], "genes")[
156 |         ["cluster", "rank", "genes"]
157 |     ]
158 |     gene_df["ref"] = de_dict["params"]["reference"]
159 |     gene_df = gene_df[["cluster", "ref", "rank", "genes"]]
160 |     de_df = pd.DataFrame(
161 |         {
162 |             field: _recarray_to_dataframe(de_dict[field], field)[field]
163 |             for field in requested_fields
164 |             if field in de_dict
165 |         }
166 |     )
167 |     return gene_df.merge(de_df, left_index=True, right_index=True)
168 | 
169 | 
170 | def _recarray_to_dataframe(array, field_name):
171 |     return (
172 |         pd.DataFrame(array)
173 |         .reset_index()
174 |         .rename(columns={"index": "rank"})
175 |         .melt(id_vars="rank", var_name="cluster", value_name=field_name)
176 |     )
177 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_diffmap.py:
--------------------------------------------------------------------------------
 1 | """
 2 | scanpy diffmap
 3 | """
 4 | 
 5 | import scanpy as sc
 6 | from ..obj_utils import (
 7 |     _rename_obsm_key,
 8 |     write_embedding,
 9 | )
10 | 
11 | 
12 | def diffmap(
13 |     adata,
14 |     key_added=None,
15 |     export_embedding=None,
16 |     **kwargs,
17 | ):
18 |     """
19 |     Wrapper function for sc.tl.diffmap, for supporting named slot
20 |     """
21 |     sc.tl.diffmap(adata, **kwargs)
22 | 
23 |     diffmap_key = "X_diffmap"
24 |     if key_added:
25 |         diffmap_key = f"{diffmap_key}_{key_added}"
26 |         _rename_obsm_key(adata, "X_diffmap", diffmap_key)
27 | 
28 |     if export_embedding is not None:
29 |         write_embedding(adata, diffmap_key, export_embedding, key_added=key_added)
30 |     return adata
31 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_dpt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | scanpy dpt
 3 | """
 4 | 
 5 | import numpy as np
 6 | import scanpy as sc
 7 | from ..obj_utils import (
 8 |     _rename_default_key,
 9 | )
10 | 
11 | 
12 | def dpt(
13 |     adata,
14 |     root=None,
15 |     use_diffmap="X_diffmap",
16 |     key_added=None,
17 |     **kwargs,
18 | ):
19 |     """
20 |     Wrapper function for sc.tl.dpt
21 |     """
22 |     if root is None or not (isinstance(root, (list, tuple)) and len(root) == 2):
23 |         root = (None, None)
24 |     if "iroot" not in adata.uns.keys() and root[0] is None:
25 |         raise ValueError(
26 |             "Annotate your data with root cell first, i.e. "
27 |             'boolean vector `.uns["iroot"]` is required.'
28 |         )
29 |     if root[0] is not None:
30 |         adata.uns["iroot"] = np.random.choice(
31 |             np.flatnonzero(adata.obs[root[0]] == root[1])
32 |         )
33 | 
34 |     sc.tl.dpt(adata, **kwargs)
35 |     if key_added:
36 |         dpt_key = f"dpt_pseudotime_{key_added}"
37 |         _rename_default_key(adata.obs, "dpt_pseudotime", dpt_key)
38 | 
39 |     return adata
40 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_fdg.py:
--------------------------------------------------------------------------------
 1 | """
 2 | scanpy fdg
 3 | """
 4 | 
 5 | import scanpy as sc
 6 | from ..obj_utils import (
 7 |     _backup_obsm_key,
 8 |     _delete_obsm_backup_key,
 9 |     _rename_obsm_key,
10 |     write_embedding,
11 | )
12 | 
13 | 
14 | def fdg(
15 |     adata,
16 |     layout="fa",
17 |     key_added_ext=None,
18 |     random_state=0,
19 |     export_embedding=None,
20 |     **kwargs,
21 | ):
22 |     """
23 |     Wrapper function for sc.tl.draw_graph, for supporting named slot of fdg
24 |     embeddings.
25 |     """
26 |     sc.tl.draw_graph(
27 |         adata,
28 |         layout=layout,
29 |         key_added_ext=key_added_ext,
30 |         random_state=random_state,
31 |         **kwargs,
32 |     )
33 | 
34 |     fdg_key = f"X_draw_graph_{key_added_ext or layout}"
35 | 
36 |     if export_embedding is not None:
37 |         write_embedding(adata, fdg_key, export_embedding, key_added=key_added_ext)
38 |     return adata
39 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_filter.py:
--------------------------------------------------------------------------------
  1 | """
  2 | scanpy filter
  3 | """
  4 | 
  5 | import logging
  6 | import re
  7 | import click
  8 | import numpy as np
  9 | import scanpy as sc
 10 | 
 11 | 
 12 | def filter_anndata(
 13 |     adata,
 14 |     gene_name="index",
 15 |     list_attr=False,
 16 |     param=None,
 17 |     category=None,
 18 |     subset=None,
 19 |     force_recalc=False,
 20 | ):
 21 |     """
 22 |     Wrapper function for sc.pp.filter_cells() and sc.pp.filter_genes(), mainly
 23 |     for supporting arbitrary filtering
 24 |     """
 25 |     param = [] if param is None else param
 26 |     category = [] if category is None else category
 27 |     subset = [] if subset is None else subset
 28 | 
 29 |     logging.debug("--gene-name=%s", gene_name)
 30 |     logging.debug("--param=%s", param)
 31 |     logging.debug("--category=%s", category)
 32 |     logging.debug("--subset=%s", subset)
 33 | 
 34 |     if "mito" not in adata.var.keys() and gene_name:
 35 |         try:
 36 |             gene_names = getattr(adata.var, gene_name)
 37 |             k_mito = gene_names.str.startswith("MT-")
 38 |             if k_mito.sum() > 0:
 39 |                 adata.var["mito"] = k_mito
 40 |                 # adata.var["mito"] = adata.var["mito"].astype("category")
 41 |             else:
 42 |                 logging.warning(
 43 |                     "No MT genes found, skip calculating "
 44 |                     "expression of mitochondria genes"
 45 |                 )
 46 |         except AttributeError:
 47 |             logging.warning(
 48 |                 "Specified gene column [%s] not found, skip calculating "
 49 |                 "expression of mitochondria genes",
 50 |                 gene_name,
 51 |             )
 52 | 
 53 |     attributes = _get_attributes(adata)
 54 |     if list_attr:
 55 |         click.echo(_repr_obj(attributes))
 56 |         return 0
 57 | 
 58 |     conditions, qc_vars, pct_top = _get_filter_conditions(
 59 |         attributes, param, category, subset
 60 |     )
 61 | 
 62 |     layer = "counts" if "counts" in adata.layers.keys() else None
 63 |     obs_columns = adata.obs.columns
 64 |     for qv in qc_vars:
 65 |         if f"pct_counts_{qv}" in obs_columns and not force_recalc:
 66 |             logging.warning(
 67 |                 "`pct_counts_%s` exists, not overwriting " "without --force-recalc", qv
 68 |             )
 69 |             qc_vars.remove(qv)
 70 |     for pt in pct_top:
 71 |         if f"pct_counts_in_top_{pt}_genes" in obs_columns and not force_recalc:
 72 |             logging.warning(
 73 |                 "`pct_counts_%s` exists, not overwriting " "without --force-recalc", pt
 74 |             )
 75 |             pct_top.remove(pt)
 76 | 
 77 |     # Calculate mito stats if we can, even if we're not filtering by them
 78 | 
 79 |     if "mito" not in qc_vars and "mito" in adata.var.keys():
 80 |         qc_vars.append("mito")
 81 | 
 82 |     sc.pp.calculate_qc_metrics(
 83 |         adata, layer=layer, qc_vars=qc_vars, percent_top=pct_top, inplace=True
 84 |     )
 85 | 
 86 |     adata.obs["n_counts"] = adata.obs["total_counts"]
 87 |     adata.obs["n_genes"] = adata.obs["n_genes_by_counts"]
 88 |     adata.var["n_counts"] = adata.var["total_counts"]
 89 |     adata.var["n_cells"] = adata.var["n_cells_by_counts"]
 90 | 
 91 |     k_cell = np.ones(len(adata.obs)).astype(bool)
 92 |     for cond in conditions["c"]["numerical"]:
 93 |         name, vmin, vmax = cond
 94 |         attr = adata.obs[name]
 95 |         k_cell = k_cell & (attr >= vmin) & (attr <= vmax)
 96 | 
 97 |     for cond in conditions["c"]["categorical"]:
 98 |         name, values = cond
 99 |         attr = getattr(adata.obs, name).astype(str)
100 |         if values[0].startswith("!"):
101 |             values[0] = values[0][1:]
102 |             k_cell = k_cell & (~attr.isin(values))
103 |         else:
104 |             k_cell = k_cell & attr.isin(values)
105 | 
106 |     k_gene = np.ones(len(adata.var)).astype(bool)
107 |     for cond in conditions["g"]["numerical"]:
108 |         name, vmin, vmax = cond
109 |         attr = adata.var[name]
110 |         k_gene = k_gene & (attr >= vmin) & (attr <= vmax)
111 | 
112 |     for cond in conditions["g"]["categorical"]:
113 |         name, values = cond
114 |         attr = getattr(adata.var, name).astype(str)
115 |         if values[0].startswith("!"):
116 |             values[0] = values[0][1:]
117 |             k_gene = k_gene & ~(attr.isin(values))
118 |         else:
119 |             k_gene = k_gene & attr.isin(values)
120 | 
121 |     adata._inplace_subset_obs(k_cell)
122 |     adata._inplace_subset_var(k_gene)
123 | 
124 |     return adata
125 | 
126 | 
127 | def _get_attributes(adata):
128 |     attributes = {
129 |         "c": {
130 |             "numerical": [],
131 |             "categorical": ["index"],
132 |             "bool": [],
133 |         },
134 |         "g": {
135 |             "numerical": [],
136 |             "categorical": ["index"],
137 |             "bool": [],
138 |         },
139 |     }
140 | 
141 |     for attr, dtype in adata.obs.dtypes.to_dict().items():
142 |         typ = dtype.kind
143 |         if typ == "O":
144 |             if dtype.name == "category" and dtype.categories.is_boolean():
145 |                 attributes["c"]["bool"].append(attr)
146 |             attributes["c"]["categorical"].append(attr)
147 |         elif typ in ("i", "f", "u"):
148 |             attributes["c"]["numerical"].append(attr)
149 |         elif typ == "b":
150 |             attributes["c"]["bool"].append(attr)
151 |             attributes["c"]["categorical"].append(attr)
152 | 
153 |     for attr, dtype in adata.var.dtypes.to_dict().items():
154 |         typ = dtype.kind
155 |         if typ == "O":
156 |             if dtype.name == "category" and dtype.categories.is_boolean():
157 |                 attributes["g"]["bool"].append(attr)
158 |             attributes["g"]["categorical"].append(attr)
159 |         elif typ in ("i", "f", "u"):
160 |             attributes["g"]["numerical"].append(attr)
161 |         elif typ == "b":
162 |             attributes["g"]["bool"].append(attr)
163 |             attributes["g"]["categorical"].append(attr)
164 | 
165 |     attributes["c"]["numerical"].extend(
166 |         [
167 |             "n_genes",
168 |             "n_counts",
169 |         ]
170 |     )
171 | 
172 |     for attr in attributes["g"]["bool"]:
173 |         attr2 = "pct_counts_" + attr
174 |         if attr2 not in adata.obs.columns:
175 |             attr2 += "*"
176 |         attributes["c"]["numerical"].append(attr2)
177 | 
178 |     attributes["g"]["numerical"].extend(
179 |         [
180 |             "n_cells",
181 |             "n_counts",
182 |             "mean_counts",
183 |             "pct_dropout_by_counts",
184 |         ]
185 |     )
186 |     logging.debug(attributes)
187 |     return attributes
188 | 
189 | 
190 | def _attributes_exists(name, attributes, dtype):
191 |     cond_cat = ""
192 |     if name.startswith("c:") or name.startswith("g:"):
193 |         cond_cat, _, cond_name = name.partition(":")
194 |         found = int(cond_name in attributes[cond_cat][dtype])
195 |     else:
196 |         cond_name = name
197 |         if cond_name in attributes["c"][dtype]:
198 |             cond_cat += "c"
199 |         if cond_name in attributes["g"][dtype]:
200 |             cond_cat += "g"
201 |         found = len(cond_cat)
202 |     return found, cond_cat, cond_name
203 | 
204 | 
205 | def _get_filter_conditions(attributes, param, category, subset):
206 |     conditions = {
207 |         "c": {
208 |             "numerical": [],
209 |             "categorical": [],
210 |             "bool": [],
211 |         },
212 |         "g": {
213 |             "numerical": [],
214 |             "categorical": [],
215 |             "bool": [],
216 |         },
217 |     }
218 |     percent_top_pattern = re.compile(r"^pct_counts_in_top_(?P<n>\d+)_genes$")
219 |     pct_top = []
220 |     qc_vars_pattern = re.compile(r"^pct_counts_(?P<qc_var>\S+)$")
221 |     qc_vars = []
222 | 
223 |     for name, vmin, vmax in param:
224 |         found, cond_cat, cond_name = _attributes_exists(name, attributes, "numerical")
225 |         pt_match = percent_top_pattern.match(cond_name)
226 |         qv_match = qc_vars_pattern.match(cond_name)
227 |         if found > 1:
228 |             raise click.ClickException(
229 |                 f'Ambiguous parameter "{name}" found in ' "both cell and gene table"
230 |             )
231 |         if found < 1:
232 |             if pt_match:
233 |                 pct_top.append(int(pt_match["n"]))
234 |                 cond_cat = "c"
235 |             elif qv_match and qv_match["qc_var"] in attributes["g"]["bool"]:
236 |                 qc_vars.append(qv_match["qc_var"])
237 |                 cond_cat = "c"
238 |             else:
239 |                 raise click.ClickException(f'Parameter "{name}" unavailable')
240 |         if pt_match or qv_match:
241 |             vmin *= 100
242 |             vmax *= 100
243 |         conditions[cond_cat]["numerical"].append([cond_name, vmin, vmax])
244 | 
245 |     for name, values in category + subset:
246 |         found, cond_cat, cond_name = _attributes_exists(name, attributes, "categorical")
247 |         if found > 1:
248 |             raise click.ClickException(
249 |                 f'Ambiguous attribute "{name}" found in ' "both cell and gene table"
250 |             )
251 |         if found < 1:
252 |             raise click.ClickException(f'Attribute "{name}" unavailable')
253 |         if not isinstance(values, (list, tuple)):
254 |             fh = values
255 |             values = fh.read().rstrip().split("\n")
256 |             fh.close()
257 |         conditions[cond_cat]["categorical"].append((cond_name, values))
258 | 
259 |     logging.debug((conditions, qc_vars, pct_top))
260 |     return conditions, qc_vars, sorted(pct_top)
261 | 
262 | 
263 | def _repr_obj(obj, padding="  ", level=0):
264 |     if isinstance(obj, dict):
265 |         obj_str = "\n".join(
266 |             [
267 |                 "\n".join([padding * level + k + ":", _repr_obj(v, level=level + 1)])
268 |                 for k, v in obj.items()
269 |             ]
270 |         )
271 |     elif isinstance(obj, (tuple, list, set)):
272 |         obj_str = "\n".join([_repr_obj(elm, level=level) for elm in obj])
273 |     else:
274 |         obj_str = padding * level + repr(obj)
275 |     return obj_str
276 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_hvg.py:
--------------------------------------------------------------------------------
 1 | """
 2 | scanpy hvg
 3 | """
 4 | 
 5 | import numpy as np
 6 | import scanpy as sc
 7 | 
 8 | 
 9 | def hvg(
10 |     adata,
11 |     mean_limits=(0.0125, 3),
12 |     disp_limits=(0.5, float("inf")),
13 |     **kwargs,
14 | ):
15 |     """
16 |     Wrapper function for sc.highly_variable_genes()
17 |     """
18 | 
19 |     # Check for n_top_genes beeing greater than the total genes
20 | 
21 |     if "n_top_genes" in kwargs and kwargs["n_top_genes"] is not None:
22 |         kwargs["n_top_genes"] = min(adata.n_vars, kwargs["n_top_genes"])
23 | 
24 |     always_hv_genes = None
25 |     if "always_hv_genes_file" in kwargs and kwargs["always_hv_genes_file"] is not None:
26 |         with open(kwargs["always_hv_genes_file"], "r") as f:
27 |             always_hv_genes = f.read().splitlines()
28 | 
29 |     never_hv_genes = None
30 |     if "never_hv_genes_file" in kwargs and kwargs["never_hv_genes_file"] is not None:
31 |         with open(kwargs["never_hv_genes_file"], "r") as f:
32 |             never_hv_genes = f.read().splitlines()
33 | 
34 |     # to avoid upsetting the scanpy function with unexpected keyword arguments
35 |     del kwargs["always_hv_genes_file"]
36 |     del kwargs["never_hv_genes_file"]
37 | 
38 |     sc.pp.highly_variable_genes(
39 |         adata,
40 |         min_mean=mean_limits[0],
41 |         max_mean=mean_limits[1],
42 |         min_disp=disp_limits[0],
43 |         max_disp=disp_limits[1],
44 |         **kwargs,
45 |     )
46 | 
47 |     return switch_hvgs(adata, always_hv_genes, never_hv_genes)
48 | 
49 | 
50 | def switch_hvgs(adata, always_hv_genes=None, never_hv_genes=None):
51 |     """
52 |     Function to switch on/off highly variable genes based on a list of genes.
53 | 
54 |     >>> adata = sc.datasets.pbmc3k()
55 |     >>> sc.pp.normalize_total(adata)
56 |     >>> sc.pp.log1p(adata)
57 |     >>> sc.pp.highly_variable_genes(adata)
58 |     >>> adata = switch_hvgs(adata, always_hv_genes=['MIR1302-10', 'FAM138A'], never_hv_genes=['ISG15', 'TNFRSF4'])
59 |     >>> adata.var.loc['ISG15'].highly_variable
60 |     False
61 |     >>> adata.var.loc['TNFRSF4'].highly_variable
62 |     False
63 |     >>> adata.var.loc['MIR1302-10'].highly_variable
64 |     True
65 |     >>> adata.var.loc['CPSF3L'].highly_variable
66 |     True
67 |     """
68 |     if always_hv_genes is not None:
69 |         adata.var.highly_variable = np.logical_or(
70 |             adata.var.highly_variable, adata.var_names.isin(always_hv_genes)
71 |         )
72 | 
73 |     if never_hv_genes is not None:
74 |         adata.var.highly_variable = np.logical_and(
75 |             adata.var.highly_variable, ~adata.var_names.isin(never_hv_genes)
76 |         )
77 | 
78 |     return adata
79 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_leiden.py:
--------------------------------------------------------------------------------
 1 | """
 2 | scanpy leiden
 3 | """
 4 | 
 5 | import scanpy as sc
 6 | from ..obj_utils import write_obs
 7 | 
 8 | 
 9 | def leiden(
10 |     adata,
11 |     resolution,
12 |     neighbors_key=None,
13 |     obsp=None,
14 |     key_added=None,
15 |     export_cluster=None,
16 |     **kwargs,
17 | ):
18 |     """
19 |     Wrapper function for sc.tl.leiden, for supporting multiple resolutions.
20 |     """
21 |     keys = []
22 |     if kwargs.get("restrict_to", None) and not kwargs["restrict_to"][0]:
23 |         kwargs["restrict_to"] = None
24 | 
25 |     if not isinstance(resolution, (list, tuple)):
26 |         if key_added is not None and not key_added.startswith("leiden_"):
27 |             key_added = f"leiden_{key_added}"
28 |         elif key_added is None:
29 |             key_added = "leiden"
30 |         sc.tl.leiden(
31 |             adata,
32 |             resolution=resolution,
33 |             neighbors_key=neighbors_key,
34 |             obsp=obsp,
35 |             key_added=key_added,
36 |             **kwargs,
37 |         )
38 |         keys.append(key_added)
39 |     else:
40 |         for i, res in enumerate(resolution):
41 |             res_key = str(res).replace(".", "_")
42 |             if key_added is None:
43 |                 graph_key = (
44 |                     ("_" + f"{neighbors_key or obsp}") if neighbors or obsp else ""
45 |                 )
46 |                 key = f"leiden{graph_key}_r{res_key}"
47 |             elif not isinstance(key_added, (list, tuple)):
48 |                 key = f"leiden_{key_added}_r{res_key}"
49 |             elif len(key_added) == len(resolution):
50 |                 key = key_added[i]
51 |             else:
52 |                 raise ValueError(
53 |                     "`key_added` can only be None, a scalar, or an "
54 |                     "iterable of the same length as `resolution`."
55 |                 )
56 |             keys.extend(
57 |                 leiden(
58 |                     adata,
59 |                     resolution=res,
60 |                     neighbors_key=neighbors_key,
61 |                     obsp=obsp,
62 |                     key_added=key,
63 |                     **kwargs,
64 |                 )
65 |             )
66 | 
67 |     if export_cluster:
68 |         write_obs(adata, keys, export_cluster)
69 | 
70 |     return keys
71 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_louvain.py:
--------------------------------------------------------------------------------
 1 | """
 2 | scanpy louvain
 3 | """
 4 | 
 5 | import scanpy as sc
 6 | 
 7 | from ..obj_utils import write_obs
 8 | 
 9 | 
10 | def louvain(
11 |     adata,
12 |     resolution,
13 |     neighbors_key=None,
14 |     obsp=None,
15 |     key_added=None,
16 |     export_cluster=None,
17 |     **kwargs,
18 | ):
19 |     """
20 |     Wrapper function for sc.tl.louvain, for supporting multiple resolutions.
21 |     """
22 |     keys = []
23 |     if kwargs["restrict_to"] and not kwargs["restrict_to"][0]:
24 |         kwargs["restrict_to"] = None
25 | 
26 |     if not isinstance(resolution, (list, tuple)):
27 |         if key_added is not None and not key_added.startswith("louvain_"):
28 |             key_added = f"louvain_{key_added}"
29 |         elif key_added is None:
30 |             key_added = "louvain"
31 |         sc.tl.louvain(
32 |             adata,
33 |             resolution=resolution,
34 |             key_added=key_added,
35 |             neighbors_key=neighbors_key,
36 |             obsp=obsp,
37 |             **kwargs,
38 |         )
39 |         keys.append(key_added)
40 |     else:
41 |         for i, res in enumerate(resolution):
42 | 
43 |             res_key = str(res).replace(".", "_")
44 | 
45 |             if key_added is None:
46 |                 graph_key = (
47 |                     ("_" + f"{neighbors_key or obsp}") if neighbors or obsp else ""
48 |                 )
49 |                 key = f"louvain{graph_key}_r{res_key}"
50 |             elif not isinstance(key_added, (list, tuple)):
51 |                 key = f"louvain_{key_added}_r{res_key}"
52 |             elif len(key_added) == len(resolution):
53 |                 key = key_added[i]
54 |             else:
55 |                 raise ValueError(
56 |                     "`key_added` can only be None, a scalar, or an "
57 |                     "iterable of the same length as `resolution`."
58 |                 )
59 |             keys.extend(
60 |                 louvain(
61 |                     adata,
62 |                     resolution=res,
63 |                     neighbors_key=neighbors_key,
64 |                     obsp=obsp,
65 |                     key_added=key,
66 |                     **kwargs,
67 |                 )
68 |             )
69 | 
70 |     if export_cluster:
71 |         write_obs(adata, keys, export_cluster)
72 | 
73 |     return keys
74 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_mnn.py:
--------------------------------------------------------------------------------
  1 | """
  2 | scanpy external mnn
  3 | """
  4 | 
  5 | import click
  6 | import numpy as np
  7 | import scanpy.external as sce
  8 | import logging
  9 | 
 10 | # Wrapper for mnn allowing use of non-standard slot
 11 | 
 12 | 
 13 | def mnn_correct(adata, key=None, key_added=None, var_subset=None, layer=None, **kwargs):
 14 |     """
 15 |     Wrapper function for sce.pp.mnn_correct(), for supporting non-standard neighbors slot
 16 |     """
 17 | 
 18 |     # mnn will use .X, so we need to put other layers there for processing
 19 | 
 20 |     logging.warning(
 21 |         "Use mnn_correct at your own risk, environment installation seems faulty for this module."
 22 |     )
 23 | 
 24 |     if layer:
 25 |         adata.layers["X_backup"] = adata.X
 26 |         adata.X = adata.layers[layer]
 27 | 
 28 |     # mnn_correct() wants batches in separate adatas
 29 | 
 30 |     batches = np.unique(adata.obs[key])
 31 |     alldata = []
 32 |     for batch in batches:
 33 |         alldata.append(
 34 |             adata[
 35 |                 adata.obs[key] == batch,
 36 |             ]
 37 |         )
 38 | 
 39 |     # Process var_subset into a list of strings that can be provided to
 40 |     # mnn_correct()
 41 | 
 42 |     if var_subset is not None and len(var_subset) > 0 and var_subset[0] is not None:
 43 | 
 44 |         subset = []
 45 | 
 46 |         for name, values in var_subset:
 47 |             if name in adata.var:
 48 |                 if adata.var[name].dtype == "bool":
 49 |                     values = [True if x.lower() == "true" else x for x in values]
 50 |             else:
 51 |                 raise click.ClickException(f'Var "{name}" unavailable')
 52 | 
 53 |             ind = [x in values for x in adata.var[name]]
 54 |             subset = subset + adata.var.index[ind].to_list()
 55 | 
 56 |         var_subset = set(subset)
 57 |         print("Will use %d selected genes for MNN" % len(var_subset))
 58 | 
 59 |     else:
 60 |         var_subset = None
 61 | 
 62 |     # Here's the main bit
 63 | 
 64 |     cdata = sce.pp.mnn_correct(
 65 |         *alldata,
 66 |         var_subset=var_subset,
 67 |         do_concatenate=True,
 68 |         index_unique=None,
 69 |         **kwargs,
 70 |     )
 71 | 
 72 |     # If user has specified key_added = X then they want us to overwrite .X,
 73 |     # othwerwise copy the .X to a named layer of the original object. In either
 74 |     # case make sure obs and var are the same as the original.
 75 | 
 76 |     if key_added is None or key_added != "X":
 77 | 
 78 |         mnn_key = "mnn"
 79 |         if layer:
 80 |             mnn_key = f"{mnn_key}_{layer}"
 81 | 
 82 |             # Layers is set (so we're not storing computed results in the .X,
 83 |             # and we had to overwrite .X to run mnn), and key_added shows we're
 84 |             # not storing in the .X, so we need to restore from the backup.
 85 | 
 86 |             adata.X = adata.layers["X_backup"]
 87 | 
 88 |         if key_added:
 89 |             mnn_key = f"{mnn_key}_{key_added}"
 90 | 
 91 |         adata.layers[mnn_key] = cdata[0][adata.obs.index, adata.var.index].X
 92 | 
 93 |     else:
 94 |         adata.X = cdata[0][adata.obs.index, adata.var.index].X
 95 | 
 96 |     # Delete the backup of .X if we needed one
 97 | 
 98 |     if layer:
 99 |         del adata.layers["X_backup"]
100 | 
101 |     return adata
102 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_neighbors.py:
--------------------------------------------------------------------------------
 1 | """
 2 | scanpy neighbors
 3 | """
 4 | 
 5 | import scanpy as sc
 6 | from ..obj_utils import (
 7 |     _backup_default_key,
 8 |     _delete_backup_key,
 9 |     _rename_default_key,
10 | )
11 | 
12 | 
13 | def neighbors(adata, n_neighbors=15, key_added=None, **kwargs):
14 |     """
15 |     Wrapper function for sc.pp.neighbors(), for supporting multiple n_neighbors
16 |     """
17 |     if not isinstance(n_neighbors, (list, tuple)):
18 |         sc.pp.neighbors(adata, n_neighbors=n_neighbors, key_added=key_added, **kwargs)
19 |     else:
20 |         for i, n_nb in enumerate(n_neighbors):
21 |             if key_added is None:
22 |                 graph_key = f"k{n_nb}"
23 |             elif not isinstance(key_added, (list, tuple)):
24 |                 graph_key = f"{key_added}_k{n_nb}"
25 |             elif len(key_added) == len(n_neighbors):
26 |                 graph_key = key_added[i]
27 |             else:
28 |                 raise ValueError(
29 |                     "`key_added` can only be None, a scalar, or an "
30 |                     "iterable of the same length as `n_neighbors`."
31 |                 )
32 |             neighbors(
33 |                 adata,
34 |                 n_neighbors=n_nb,
35 |                 key_added=graph_key,
36 |                 **kwargs,
37 |             )
38 |     return adata
39 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_norm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | scanpy norm
 3 | """
 4 | 
 5 | import scanpy as sc
 6 | import math
 7 | 
 8 | 
 9 | def normalize(adata, log_transform=True, **kwargs):
10 |     """
11 |     Wrapper function for sc.pp.normalize_per_cell() and sc.pp.log1p(), mainly
12 |     for supporting different ways of saving raw data.
13 |     """
14 |     sc.pp.normalize_total(adata, **kwargs)
15 |     if log_transform:
16 |         # Natural logarithm is the default by scanpy, if base is not set
17 |         base = math.e
18 |         sc.pp.log1p(adata, base=base)
19 |         # scanpy is not setting base in uns['log1p'] keys, but later on asking for it
20 |         if "log1p" in adata.uns_keys() and "base" not in adata.uns["log1p"]:
21 |             # Note that setting base to None doesn't solve the problem at other modules that check for base later on
22 |             # as adata.uns["log1p"]["base"] = None gets dropped at either anndata write or read.
23 |             adata.uns["log1p"]["base"] = base
24 | 
25 |     return adata
26 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_paga.py:
--------------------------------------------------------------------------------
  1 | """
  2 | scanpy paga
  3 | """
  4 | 
  5 | import numpy as np
  6 | import scanpy as sc
  7 | from ..obj_utils import (
  8 |     _backup_default_key,
  9 |     _delete_backup_key,
 10 |     _rename_default_key,
 11 |     _set_default_key,
 12 |     _restore_default_key,
 13 | )
 14 | 
 15 | 
 16 | def paga(
 17 |     adata,
 18 |     key_added=None,
 19 |     **kwargs,
 20 | ):
 21 |     """
 22 |     Wrapper function for sc.tl.paga, for supporting named slot
 23 |     """
 24 |     sc.tl.paga(adata, **kwargs)
 25 | 
 26 |     if key_added:
 27 |         paga_key = f"paga_{key_added}"
 28 |         _rename_default_key(adata.uns, "paga", paga_key)
 29 |     else:
 30 |         _delete_backup_key(adata.uns, "paga")
 31 | 
 32 |     return adata
 33 | 
 34 | 
 35 | def plot_paga(
 36 |     adata,
 37 |     use_key="paga",
 38 |     basis=None,
 39 |     layout=None,
 40 |     init_pos=None,
 41 |     legend_loc="on data",
 42 |     color=None,
 43 |     size=None,
 44 |     title=None,
 45 |     show=None,
 46 |     save=None,
 47 |     **kwargs,
 48 | ):
 49 |     """Make PAGA plot"""
 50 |     if basis is not None and f"X_{basis}" in adata.obsm.keys():
 51 |         ax = sc.pl.embedding(
 52 |             adata,
 53 |             basis=basis,
 54 |             color=color,
 55 |             legend_loc=legend_loc,
 56 |             size=size,
 57 |             title=None,
 58 |             save=False,
 59 |             show=False,
 60 |         )
 61 | 
 62 |         grouping = adata.uns[use_key]["groups"]
 63 |         categories = list(adata.obs[grouping].cat.categories)
 64 |         obsm = adata.obsm[f"X_{basis}"]
 65 |         group_pos = np.zeros((len(categories), 2))
 66 |         for i, label in enumerate(categories):
 67 |             offset = 1 if basis.startswith("diffmap") else 0
 68 |             _scatter = obsm[adata.obs[grouping] == label, (0 + offset) : (2 + offset)]
 69 |             x_pos, y_pos = np.median(_scatter, axis=0)
 70 |             group_pos[i] = [x_pos, y_pos]
 71 | 
 72 |         _set_default_key(adata.uns, "paga", use_key)
 73 |         kwargs["node_size_scale"] = 0
 74 |         kwargs["fontsize"] = 1
 75 |         kwargs["pos"] = group_pos
 76 |         kwargs["color"] = None
 77 |         try:
 78 |             sc.pl.paga(
 79 |                 adata,
 80 |                 ax=ax,
 81 |                 title=title,
 82 |                 show=show,
 83 |                 save=save,
 84 |                 **kwargs,
 85 |             )
 86 |         finally:
 87 |             _restore_default_key(adata.uns, "paga", use_key)
 88 |     else:
 89 |         _set_default_key(adata.uns, "paga", use_key)
 90 |         try:
 91 |             sc.pl.paga(
 92 |                 adata,
 93 |                 layout=layout,
 94 |                 init_pos=init_pos,
 95 |                 color=color,
 96 |                 title=title,
 97 |                 show=show,
 98 |                 save=save,
 99 |                 **kwargs,
100 |             )
101 |         finally:
102 |             _restore_default_key(adata.uns, "paga", use_key)
103 | 
104 |     return adata
105 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_pca.py:
--------------------------------------------------------------------------------
 1 | """
 2 | scanpy pca
 3 | """
 4 | 
 5 | import logging
 6 | import scanpy as sc
 7 | from ..obj_utils import write_embedding
 8 | 
 9 | 
10 | def pca(adata, key_added=None, export_embedding=None, **kwargs):
11 |     """
12 |     Wrapper function for sc.pp.pca, for supporting named slot
13 |     """
14 | 
15 |     # omit "svd_solver" to let scanpy choose automatically
16 |     if "svd_solver" in kwargs and kwargs["svd_solver"] == "auto":
17 |         del kwargs["svd_solver"]
18 | 
19 |     if key_added:
20 |         if "X_pca" in adata.obsm.keys():
21 |             adata.obsm["X_pca_bkup"] = adata.obsm["X_pca"]
22 |         sc.pp.pca(adata, **kwargs)
23 |         pca_key = f"X_pca_{key_added}"
24 |         adata.obsm[pca_key] = adata.obsm["X_pca"]
25 |         del adata.obsm["X_pca"]
26 |         if "X_pca_bkup" in adata.obsm.keys():
27 |             adata.obsm["X_pca"] = adata.obsm["X_pca_bkup"]
28 |             del adata.obsm["X_pca_bkup"]
29 |     else:
30 |         sc.pp.pca(adata, **kwargs)
31 |         pca_key = "X_pca"
32 | 
33 |     if export_embedding is not None:
34 |         write_embedding(adata, pca_key, export_embedding, key_added=key_added)
35 |     return adata
36 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_read.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Provides read_10x()
 3 | """
 4 | 
 5 | import pandas as pd
 6 | import scanpy as sc
 7 | 
 8 | 
 9 | def read_10x(
10 |     input_10x_h5,
11 |     input_10x_mtx,
12 |     genome="hg19",
13 |     var_names="gene_symbols",
14 |     extra_obs=None,
15 |     extra_var=None,
16 | ):
17 |     """
18 |     Wrapper function for sc.read_10x_h5() and sc.read_10x_mtx(), mainly to
19 |     support adding extra metadata
20 |     """
21 |     if input_10x_h5 is not None:
22 |         adata = sc.read_10x_h5(input_10x_h5, genome=genome)
23 |     elif input_10x_mtx is not None:
24 |         adata = sc.read_10x_mtx(input_10x_mtx, var_names=var_names)
25 | 
26 |     if extra_obs:
27 |         obs_tbl = pd.read_csv(extra_obs, sep="\t", header=0, index_col=0)
28 |         adata.obs = adata.obs.merge(
29 |             obs_tbl,
30 |             how="left",
31 |             left_index=True,
32 |             right_index=True,
33 |             suffixes=(False, False),
34 |         )
35 | 
36 |     if extra_var:
37 |         var_tbl = pd.read_csv(extra_var, sep="\t", header=0, index_col=0)
38 |         adata.var = adata.var.merge(
39 |             var_tbl,
40 |             how="left",
41 |             left_index=True,
42 |             right_index=True,
43 |             suffixes=(False, False),
44 |         )
45 |     return adata
46 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_scrublet.py:
--------------------------------------------------------------------------------
 1 | """
 2 | scanpy external scrublet
 3 | """
 4 | 
 5 | import anndata
 6 | import numpy as np
 7 | import pandas as pd
 8 | import scanpy as sc
 9 | import scanpy.external as sce
10 | 
11 | from ..obj_utils import write_obs
12 | 
13 | # Wrapper for scrublet allowing text export and filtering
14 | 
15 | 
16 | def scrublet(adata, adata_sim=None, filter=False, export_table=None, **kwargs):
17 |     """
18 |     Wrapper function for sce.pp.scrublet(), to allow filtering of resulting object
19 |     """
20 | 
21 |     # Do we need to read an object with the doublet simulations?
22 | 
23 |     if adata_sim:
24 |         adata_sim = sc.read_h5ad(adata_sim)
25 | 
26 |     sce.pp.scrublet(adata, adata_sim=adata_sim, **kwargs)
27 | 
28 |     # Do any export before optional filtering
29 | 
30 |     if export_table:
31 |         write_obs(adata, ["doublet_score", "predicted_doublet"], export_table)
32 | 
33 |     # Filter out predited doublets
34 | 
35 |     if filter:
36 |         adata._inplace_subset_obs(np.invert(adata.obs["predicted_doublet"]))
37 | 
38 |     return adata
39 | 
40 | 
41 | # Run the doublet simulation.
42 | 
43 | 
44 | def scrublet_simulate_doublets(adata, **kwargs):
45 |     adata_sim = sce.pp.scrublet_simulate_doublets(adata, **kwargs)
46 |     adata._init_as_actual(
47 |         X=adata_sim.X, obs=adata_sim.obs, obsm=adata_sim.obsm, uns=adata.uns
48 |     )
49 | 
50 | 
51 | # Just absorb the extra plotting args before passing to
52 | # scanpy.external.pl.scrublet_score_distribution
53 | 
54 | 
55 | def plot_scrublet(
56 |     adata, scale_hist_obs="log", scale_hist_sim="linear", fig_size=(8, 3), **kwargs
57 | ):
58 |     """
59 |     Wrapper function for sce.pl.scrublet_score_distribution(), to allow
60 |     plotting of score distribution
61 |     """
62 |     sce.pl.scrublet_score_distribution(
63 |         adata,
64 |         scale_hist_obs=scale_hist_obs,
65 |         scale_hist_sim=scale_hist_sim,
66 |         figsize=fig_size,
67 |         **kwargs
68 |     )
69 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_tsne.py:
--------------------------------------------------------------------------------
 1 | """
 2 | scanpy tsne
 3 | """
 4 | 
 5 | import scanpy as sc
 6 | from ..obj_utils import (
 7 |     _backup_obsm_key,
 8 |     _rename_obsm_key,
 9 |     _delete_obsm_backup_key,
10 |     write_embedding,
11 | )
12 | 
13 | 
14 | def tsne(
15 |     adata,
16 |     key_added=None,
17 |     random_state=0,
18 |     export_embedding=None,
19 |     **kwargs,
20 | ):
21 |     """
22 |     Wrapper function for sc.tl.tsne, for supporting named slot of tsne embeddings
23 |     """
24 |     if not isinstance(random_state, (list, tuple)):
25 |         _backup_obsm_key(adata, "X_tsne")
26 | 
27 |         sc.tl.tsne(adata, random_state=random_state, **kwargs)
28 | 
29 |         tsne_key = "X_tsne"
30 |         if key_added:
31 |             tsne_key = f"X_tsne_{key_added}"
32 |             _rename_obsm_key(adata, "X_tsne", tsne_key)
33 |         else:
34 |             _delete_obsm_backup_key(adata, "X_tsne")
35 | 
36 |         if export_embedding is not None:
37 |             write_embedding(adata, tsne_key, export_embedding, key_added=key_added)
38 |     else:
39 |         for i, rseed in enumerate(random_state):
40 |             if key_added is None:
41 |                 tsne_key = f"r{rseed}"
42 |             elif not isinstance(key_added, (list, tuple)):
43 |                 tsne_key = f"{key_added}_r{rseed}"
44 |             elif len(key_added) == len(random_state):
45 |                 tsne_key = key_added[i]
46 |             else:
47 |                 raise ValueError(
48 |                     "`key_added` can only be None, a scalar, or "
49 |                     "an iterable of the same length as "
50 |                     "`random_state`."
51 |                 )
52 |             tsne(
53 |                 adata,
54 |                 key_added=tsne_key,
55 |                 random_state=rseed,
56 |                 **kwargs,
57 |             )
58 |     return adata
59 | 


--------------------------------------------------------------------------------
/scanpy_scripts/lib/_umap.py:
--------------------------------------------------------------------------------
 1 | """
 2 | scanpy umap
 3 | """
 4 | 
 5 | import scanpy as sc
 6 | from ..obj_utils import (
 7 |     _set_default_key,
 8 |     _restore_default_key,
 9 |     _backup_obsm_key,
10 |     _rename_obsm_key,
11 |     _delete_obsm_backup_key,
12 |     write_embedding,
13 | )
14 | 
15 | 
16 | def umap(
17 |     adata,
18 |     key_added=None,
19 |     random_state=0,
20 |     export_embedding=None,
21 |     **kwargs,
22 | ):
23 |     """
24 |     Wrapper function for sc.tl.umap, for supporting named slot of umap embeddings
25 |     """
26 |     if not isinstance(random_state, (list, tuple)):
27 |         _backup_obsm_key(adata, "X_umap")
28 | 
29 |         sc.tl.umap(adata, random_state=random_state, **kwargs)
30 | 
31 |         umap_key = "X_umap"
32 |         if key_added:
33 |             umap_key = f"X_umap_{key_added}"
34 |             _rename_obsm_key(adata, "X_umap", umap_key)
35 |         else:
36 |             _delete_obsm_backup_key(adata, "X_umap")
37 | 
38 |         if export_embedding is not None:
39 |             write_embedding(adata, umap_key, export_embedding, key_added=key_added)
40 |     else:
41 |         for i, rseed in enumerate(random_state):
42 |             if key_added is None:
43 |                 umap_key = f"r{rseed}"
44 |             elif not isinstance(key_added, (list, tuple)):
45 |                 umap_key = f"{key_added}_r{rseed}"
46 |             elif len(key_added) == len(random_state):
47 |                 umap_key = key_added[i]
48 |             else:
49 |                 raise ValueError(
50 |                     "`key_added` can only be None, a scalar, or an "
51 |                     "iterable of the same length as `random_state`."
52 |                 )
53 |             umap(
54 |                 adata,
55 |                 key_added=umap_key,
56 |                 random_state=rseed,
57 |                 **kwargs,
58 |             )
59 |     return adata
60 | 


--------------------------------------------------------------------------------
/scanpy_scripts/obj_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Provide helper functions for constructing sub-commands
  3 | """
  4 | 
  5 | import scanpy as sc
  6 | import pandas as pd
  7 | 
  8 | 
  9 | def write_obs(adata, keys, obs_fn, sep="\t"):
 10 |     """Export cell clustering as a text table"""
 11 |     if not isinstance(keys, (list, tuple)):
 12 |         keys = [keys]
 13 |     for key in keys:
 14 |         if key not in adata.obs.keys():
 15 |             raise KeyError(f"{key} is not a valid `.uns` key")
 16 |     adata.obs[keys].reset_index(level=0).rename(columns={"index": "cells"}).to_csv(
 17 |         obs_fn, sep=sep, header=True, index=False
 18 |     )
 19 | 
 20 | 
 21 | def write_embedding(adata, key, embed_fn, n_comp=None, sep="\t", key_added=None):
 22 |     """Export cell embeddings as a txt table"""
 23 |     if key_added:
 24 |         if embed_fn.endswith(".tsv"):
 25 |             embed_fn = embed_fn[0:-4]
 26 |         embed_fn = f"{embed_fn}_{key_added}.tsv"
 27 |     if key not in adata.obsm.keys():
 28 |         raise KeyError(f"{key} is not a valid `.obsm` key")
 29 |     mat = adata.obsm[key].copy()
 30 |     if n_comp is not None and mat.shape[1] >= n_comp:
 31 |         mat = mat[:, 0:n_comp]
 32 |     pd.DataFrame(mat, index=adata.obs_names).to_csv(
 33 |         embed_fn, sep=sep, header=False, index=True
 34 |     )
 35 | 
 36 | 
 37 | # The functions below handles slot key.
 38 | #
 39 | # Default keys are those read and written by scanpy functions by default, e.g
 40 | # "X_pca", "neighbors", "louvain", etc.
 41 | #
 42 | # Of them, `obsm_key` specifically refers to those used for embedding, e.g
 43 | # "X_pca", "X_tsne", "X_umap", etc.
 44 | #
 45 | # The approach for supplying a non-standard key to a function as input is:
 46 | # if the function only reads the value in the default key, we first backup the
 47 | # value in the default key, then write the value of the non-standard key into
 48 | # the standard key, run the funtion, and finally restore the value of the
 49 | # default key from backup and delete the backup.
 50 | #
 51 | # The approach for writting the results of a function to a non-standard key is:
 52 | # if the function only writes to the default key, we first backup the value in
 53 | # the default key, run the function, copy the value of the default key to the
 54 | # desired non-standard key, and finally restore the value of the default key
 55 | # from backup and delete the backup.
 56 | #
 57 | # Specical treatment for obsm_key is needed, as the underlying data type is not
 58 | # a python dictionary but a numpy array.
 59 | 
 60 | 
 61 | def _backup_default_key(slot, default):
 62 |     if default in slot.keys():
 63 |         bkup_key = f"{default}_bkup"
 64 |         if bkup_key in slot.keys():
 65 |             sc.logging.warn(f"overwrite existing {bkup_key}")
 66 |         slot[bkup_key] = slot[default]
 67 | 
 68 | 
 69 | def _restore_default_key(slot, default, key=None):
 70 |     if key != default:
 71 |         bkup_key = f"{default}_bkup"
 72 |         if bkup_key in slot.keys():
 73 |             slot[default] = slot[bkup_key]
 74 |             del slot[bkup_key]
 75 | 
 76 | 
 77 | def _delete_backup_key(slot, default):
 78 |     bkup_key = f"{default}_bkup"
 79 |     if bkup_key in slot.keys():
 80 |         del slot[bkup_key]
 81 | 
 82 | 
 83 | def _set_default_key(slot, default, key):
 84 |     if key != default:
 85 |         if key not in slot.keys():
 86 |             raise KeyError(f"{key} does not exist")
 87 |         _backup_default_key(slot, default)
 88 |         slot[default] = slot[key]
 89 | 
 90 | 
 91 | def _rename_default_key(slot, default, key):
 92 |     if not default in slot.keys():
 93 |         raise KeyError(f"{default} does not exist")
 94 |     slot[key] = slot[default]
 95 |     del slot[default]
 96 |     _restore_default_key(slot, default)
 97 | 
 98 | 
 99 | def _backup_obsm_key(adata, key):
100 |     if key in adata.obsm_keys():
101 |         bkup_key = f"{key}_bkup"
102 |         if bkup_key in adata.obsm_keys():
103 |             sc.logging.warn(f"overwrite existing {bkup_key}")
104 |         adata.obsm[bkup_key] = adata.obsm[key]
105 | 
106 | 
107 | def _restore_obsm_key(adata, key, new_key=None):
108 |     if new_key != key:
109 |         bkup_key = f"{key}_bkup"
110 |         if bkup_key in adata.obsm_keys():
111 |             adata.obsm[key] = adata.obsm[bkup_key]
112 |             del adata.obsm[bkup_key]
113 | 
114 | 
115 | def _delete_obsm_backup_key(adata, key):
116 |     bkup_key = f"{key}_bkup"
117 |     if bkup_key in adata.obsm_keys():
118 |         del adata.obsm[bkup_key]
119 | 
120 | 
121 | def _set_obsm_key(adata, key, new_key):
122 |     if new_key != key:
123 |         if new_key not in adata.obsm_keys():
124 |             raise KeyError(f"{new_key} does not exist")
125 |         _backup_obsm_key(adata, key)
126 |         adata.obsm[key] = adata.obsm[new_key]
127 | 
128 | 
129 | def _rename_obsm_key(adata, from_key, to_key):
130 |     if not from_key in adata.obsm_keys():
131 |         raise KeyError(f"{from_key} does not exist")
132 |     adata.obsm[to_key] = adata.obsm[from_key]
133 |     del adata.obsm[from_key]
134 |     _restore_obsm_key(adata, from_key)
135 | 
136 | 
137 | # Place the content of .X or specified layer in a specified backup location.
138 | 
139 | 
140 | def _save_matrix(adata, save_raw=False, save_layer=None, layer=None):
141 |     if save_raw:
142 |         adata.raw = adata
143 |     if save_layer is not None:
144 |         if layer is not None:
145 |             if layer not in adata.layers():
146 |                 raise KeyError(f"Layer {layer} does not exist")
147 |             adata.layers[save_layer] = adata.layers[layer]
148 |         else:
149 |             adata.layers[save_layer] = adata.X
150 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | with open("README.md", "r") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setup(
 7 |     name="scanpy-scripts",
 8 |     version="1.9.301",
 9 |     author="nh3",
10 |     author_email="nh3@users.noreply.github.com",
11 |     description="Scripts for using scanpy from the command line",
12 |     long_description=long_description,
13 |     long_description_content_type="text/markdown",
14 |     url="https://github.com/ebi-gene-expression-group/scanpy-scripts",
15 |     packages=find_packages(),
16 |     scripts=[
17 |         "scanpy-scripts-tests.bats",
18 |     ],
19 |     entry_points=dict(
20 |         console_scripts=[
21 |             "scanpy-cli=scanpy_scripts.cli:cli",
22 |             "scanpy-read-10x=scanpy_scripts.cmds:READ_CMD",
23 |             "scanpy-filter-cells=scanpy_scripts.cmds:FILTER_CMD",
24 |             "scanpy-filter-genes=scanpy_scripts.cmds:FILTER_CMD",
25 |             "scanpy-normalise-data=scanpy_scripts.cmds:NORM_CMD",
26 |             "scanpy-find-variable-genes=scanpy_scripts.cmds:HVG_CMD",
27 |             "scanpy-scale-data=scanpy_scripts.cmds:SCALE_CMD",
28 |             "scanpy-regress=scanpy_scripts.cmds:REGRESS_CMD",
29 |             "scanpy-run-pca=scanpy_scripts.cmds:PCA_CMD",
30 |             "scanpy-neighbors=scanpy_scripts.cmds:NEIGHBOR_CMD",
31 |             "scanpy-run-tsne=scanpy_scripts.cmds:TSNE_CMD",
32 |             "scanpy-run-umap=scanpy_scripts.cmds:UMAP_CMD",
33 |             "scanpy-find-cluster=scanpy_scripts.cli:cluster",
34 |             "scanpy-find-markers=scanpy_scripts.cmds:DIFFEXP_CMD",
35 |         ]
36 |     ),
37 |     install_requires=[
38 |         # "packaging",
39 |         # "anndata",
40 |         # "scipy",
41 |         # "matplotlib",
42 |         # "pandas",
43 |         # "h5py<3.0.0",
44 |         "scanpy==1.9.3",
45 |         "louvain",
46 |         "igraph",
47 |         "leidenalg",
48 |         "loompy",
49 |         "Click<8",
50 |         # "umap-learn",
51 |         "harmonypy>=0.0.5",
52 |         "bbknn>=1.5.0,<1.6.0",
53 |         "mnnpy>=0.1.9.5",
54 |         "scipy<1.9.0",
55 |         "scikit-learn<1.3.0",
56 |         "scrublet",
57 |         "fa2",
58 |     ],
59 | )
60 | 


--------------------------------------------------------------------------------
/test-env.yaml:
--------------------------------------------------------------------------------
 1 | name: scanpy-scripts
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - scanpy=1.9.3
 8 |   - louvain
 9 |   - igraph
10 |   - leidenalg
11 |   - loompy
12 |   - Click <8
13 |   - harmonypy>=0.0.5
14 |   - bbknn>=1.5.0,<1.6.0
15 |   - mnnpy>=0.1.9.5
16 |   # for mnnpy using n_jobs
17 |   - scipy <1.9.0
18 |   - scikit-learn <1.3.0
19 |   - scrublet
20 |   - fa2
21 |   # for testing
22 |   - bats
23 |   - black
24 |   - pytest
25 | 


--------------------------------------------------------------------------------