├── .gitattributes ├── .github └── workflows │ ├── build_docs.yml │ ├── doc-tests.yml │ └── pypi-auto-deploy.yml ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── Rlibs.R ├── besca ├── Import │ ├── __init__.py │ ├── _labelings.py │ └── _read.py ├── README.md ├── Rlibs.R ├── __init__.py ├── _helper.py ├── _logging.py ├── _notebook.py ├── _version.py ├── datasets │ ├── __init__.py │ ├── _datasets.py │ ├── _helper.py │ ├── _mito.py │ ├── data │ │ └── __init__.py │ ├── genesets │ │ ├── CellNames_scseqCMs6_Extrasigs.gmt │ │ ├── CellNames_scseqCMs6_Extrasigs.mouse.gmt │ │ ├── CellNames_scseqCMs6_config.mouse.png │ │ ├── CellNames_scseqCMs6_config.mouse.tsv │ │ ├── CellNames_scseqCMs6_config.png │ │ ├── CellNames_scseqCMs6_config.tsv │ │ ├── CellNames_scseqCMs6_sigs.gmt │ │ ├── CellNames_scseqCMs6_sigs.mouse.gmt │ │ ├── CryptScoreParikh2019.gmt │ │ ├── HumanCD45p_scseqCMs6.gmt │ │ ├── Immune.gmt │ │ └── __init__.py │ ├── homologs │ │ ├── MGItoHGNC.tsv │ │ └── __init__.py │ ├── mito_files │ │ ├── __init__.py │ │ ├── cyno.mito.tsv │ │ ├── cynomolgus.mito.tsv │ │ ├── human.mito.tsv │ │ ├── human.ribosomal.tsv │ │ ├── mouse.mito.ensembl.list │ │ ├── mouse.mito.tsv │ │ ├── pig.mito.tsv │ │ ├── rat.mito.tsv │ │ └── test.mito.tsv │ └── nomenclature │ │ ├── CellTypes_v1.tsv │ │ └── __init__.py ├── examples │ ├── example_function.py │ ├── example_gallery.py │ ├── example_include_plot_in_documentation.py │ └── gallery_examples │ │ ├── README.txt │ │ ├── plotting │ │ ├── README.txt │ │ ├── plot_celltype_quantification.py │ │ ├── plot_filtering.py │ │ ├── plot_qc.py │ │ ├── plot_riverplot.py │ │ └── plot_split_gene_expression.py │ │ ├── preprocessing │ │ ├── README.txt │ │ ├── plot_example_filtering.py │ │ └── plot_pca_neighbors_clustering.py │ │ ├── tools │ │ ├── README.txt │ │ └── plot_reclustering_function.py │ │ └── workflows │ │ ├── README.txt │ │ └── plot_celltype_annotation.py ├── export │ ├── __init__.py │ └── _export.py ├── pl │ ├── __init__.py │ ├── _celltype_quantification.py │ ├── _crispr_plots.py │ ├── _dot_heatmap.py │ ├── _filter_threshold_plots.py │ ├── _general.py │ ├── _nomenclature_network.py │ ├── _qc_plots.py │ ├── _riverplot.py │ ├── _split_gene_expression.py │ └── _update_palette.py ├── pp │ ├── __init__.py │ ├── _crispr_pp.py │ ├── _filtering.py │ ├── _fraction_counts.py │ ├── _fraction_pos.py │ ├── _normalization.py │ └── _wrapper_Rfuncs.py ├── st │ ├── _FAIR_export.py │ ├── __init__.py │ ├── _qc_report.py │ ├── _setup_funcs.py │ ├── _wrapper_Rfuncs.py │ ├── _wrapper_funcs.py │ └── style.css └── tl │ ├── README.md │ ├── __init__.py │ ├── _annot_compare.py │ ├── _annotate_cellnames.py │ ├── _count_occurrences.py │ ├── auto_annot │ ├── __init__.py │ └── _auto_annot.py │ ├── bcor │ ├── __init__.py │ └── _mnnpy_batchcorrection.py │ ├── crispr │ ├── __init__.py │ └── _crispr_tools.py │ ├── dge │ ├── __init__.py │ └── _dge.py │ ├── rc │ ├── __init__.py │ └── _reclustering.py │ └── sig │ ├── __init__.py │ ├── _annot.py │ ├── _gems_link.py │ ├── _helper.py │ ├── _io_sig.py │ ├── _metrics.py │ ├── _sig.py │ └── _silhouette.py ├── devtools ├── README.md ├── install_besca_editable.bash ├── requirements.txt └── run_workbooks.bash ├── docs ├── Makefile └── source │ ├── _images │ ├── Thumbs.db │ └── besca_outline.jpg │ ├── _static │ └── css │ │ └── custom.css │ ├── adding_new_functions.rst │ ├── auto_examples │ ├── index.rst │ ├── plotting │ │ ├── images │ │ │ ├── sphx_glr_filtering_plots_001.png │ │ │ ├── sphx_glr_plot_celltype_quantification_001.png │ │ │ ├── sphx_glr_plot_celltype_quantification_002.png │ │ │ ├── sphx_glr_plot_celltype_quantification_003.png │ │ │ ├── sphx_glr_plot_celltype_quantification_004.png │ │ │ ├── sphx_glr_plot_filtering_001.png │ │ │ ├── sphx_glr_plot_qc_001.png │ │ │ ├── sphx_glr_plot_qc_002.png │ │ │ ├── sphx_glr_plot_qc_003.png │ │ │ ├── sphx_glr_plot_qc_004.png │ │ │ ├── sphx_glr_plot_qc_005.png │ │ │ ├── sphx_glr_plot_split_gene_expression_001.png │ │ │ ├── sphx_glr_plot_split_gene_expression_002.png │ │ │ ├── sphx_glr_plot_split_gene_expression_003.png │ │ │ └── thumb │ │ │ │ ├── sphx_glr_filtering_plots_thumb.png │ │ │ │ ├── sphx_glr_plot_celltype_quantification_thumb.png │ │ │ │ ├── sphx_glr_plot_filtering_thumb.png │ │ │ │ ├── sphx_glr_plot_qc_thumb.png │ │ │ │ └── sphx_glr_plot_split_gene_expression_thumb.png │ │ ├── plot_celltype_quantification.ipynb │ │ ├── plot_celltype_quantification.py │ │ ├── plot_celltype_quantification.py.md5 │ │ ├── plot_celltype_quantification.rst │ │ ├── plot_filtering.ipynb │ │ ├── plot_filtering.py │ │ ├── plot_filtering.py.md5 │ │ ├── plot_filtering.rst │ │ ├── plot_qc.ipynb │ │ ├── plot_qc.py │ │ ├── plot_qc.py.md5 │ │ ├── plot_qc.rst │ │ ├── plot_riverplot.ipynb │ │ ├── plot_riverplot.py │ │ ├── plot_riverplot.py.md5 │ │ ├── plot_riverplot.rst │ │ ├── plot_split_gene_expression.ipynb │ │ ├── plot_split_gene_expression.py │ │ ├── plot_split_gene_expression.py.md5 │ │ ├── plot_split_gene_expression.rst │ │ └── sg_execution_times.rst │ ├── preprocessing │ │ ├── images │ │ │ ├── sphx_glr_plot_example_filtering_001.png │ │ │ ├── sphx_glr_plot_example_filtering_002.png │ │ │ ├── sphx_glr_plot_example_filtering_003.png │ │ │ ├── sphx_glr_plot_pca_neighbors_clustering_001.png │ │ │ ├── sphx_glr_plot_pca_neighbors_clustering_002.png │ │ │ ├── sphx_glr_plot_pca_neighbors_clustering_003.png │ │ │ ├── sphx_glr_plot_pca_neighbors_clustering_004.png │ │ │ ├── sphx_glr_plot_pca_neighbors_clustering_005.png │ │ │ └── thumb │ │ │ │ ├── sphx_glr_filtering_thumb.png │ │ │ │ ├── sphx_glr_plot_example_filtering_thumb.png │ │ │ │ └── sphx_glr_plot_pca_neighbors_clustering_thumb.png │ │ ├── plot_example_filtering.ipynb │ │ ├── plot_example_filtering.py │ │ ├── plot_example_filtering.py.md5 │ │ ├── plot_example_filtering.rst │ │ ├── plot_pca_neighbors_clustering.ipynb │ │ ├── plot_pca_neighbors_clustering.py │ │ ├── plot_pca_neighbors_clustering.py.md5 │ │ ├── plot_pca_neighbors_clustering.rst │ │ └── sg_execution_times.rst │ ├── tools │ │ ├── images │ │ │ ├── sphx_glr_plot_reclustering_function_001.png │ │ │ └── thumb │ │ │ │ ├── sphx_glr_plot_reclustering_function_thumb.png │ │ │ │ ├── sphx_glr_plot_simple_example_thumb.png │ │ │ │ └── sphx_glr_simple_example_thumb.png │ │ ├── plot_reclustering_function.ipynb │ │ ├── plot_reclustering_function.py │ │ ├── plot_reclustering_function.py.md5 │ │ ├── plot_reclustering_function.rst │ │ ├── sg_execution_times.rst │ │ └── tt_plot_reclustering_function.py │ └── workflows │ │ ├── images │ │ ├── sphx_glr_plot_celltype_annotation_001.png │ │ ├── sphx_glr_plot_celltype_annotation_002.png │ │ ├── sphx_glr_plot_celltype_annotation_003.png │ │ ├── sphx_glr_plot_celltype_annotation_004.png │ │ ├── sphx_glr_plot_celltype_annotation_005.png │ │ ├── sphx_glr_plot_celltype_annotation_006.png │ │ ├── sphx_glr_plot_celltype_annotation_007.png │ │ ├── sphx_glr_plot_celltype_annotation_008.png │ │ ├── sphx_glr_plot_celltype_annotation_009.png │ │ ├── sphx_glr_plot_celltype_annotation_010.png │ │ └── thumb │ │ │ ├── sphx_glr_annotate_celltypes_thumb.png │ │ │ ├── sphx_glr_plot_celltype_annotation_thumb.png │ │ │ └── sphx_glr_plot_celltype_quantification_thumb.png │ │ ├── plot_celltype_annotation.ipynb │ │ ├── plot_celltype_annotation.py │ │ ├── plot_celltype_annotation.py.md5 │ │ ├── plot_celltype_annotation.rst │ │ ├── plot_celltype_quantification.ipynb │ │ ├── plot_celltype_quantification.py │ │ ├── plot_celltype_quantification.py.md5 │ │ ├── plot_celltype_quantification.rst │ │ └── sg_execution_times.rst │ ├── bc.rst │ ├── bcor │ ├── besca.tl.bcor.batch_correct.rst │ └── besca.tl.bcor.postprocess_mnnpy.rst │ ├── besca.rst │ ├── besca │ ├── besca.Import.add_cell_labeling.rst │ ├── besca.Import.assert_adata.rst │ ├── besca.Import.read_mtx.rst │ ├── besca.export.X_to_mtx.rst │ ├── besca.export.analysis_metadata.rst │ ├── besca.export.clustering.rst │ ├── besca.export.generate_gep.rst │ ├── besca.export.labeling.rst │ ├── besca.export.labeling_info.rst │ ├── besca.export.pseudobulk.rst │ ├── besca.export.ranked_genes.rst │ ├── besca.export.raw_to_mtx.rst │ ├── besca.pl.box_per_ind.rst │ ├── besca.pl.celllabel_quant_boxplot.rst │ ├── besca.pl.celllabel_quant_stackedbar.rst │ ├── besca.pl.detected_genes.rst │ ├── besca.pl.dot_heatmap.rst │ ├── besca.pl.dot_heatmap_split.rst │ ├── besca.pl.dot_heatmap_split_greyscale.rst │ ├── besca.pl.dropouts.rst │ ├── besca.pl.gene_expr_split.rst │ ├── besca.pl.gene_expr_split_stacked.rst │ ├── besca.pl.kp_cells.rst │ ├── besca.pl.kp_counts.rst │ ├── besca.pl.kp_genes.rst │ ├── besca.pl.library_size.rst │ ├── besca.pl.librarysize_overview.rst │ ├── besca.pl.max_counts.rst │ ├── besca.pl.max_genes.rst │ ├── besca.pl.max_mito.rst │ ├── besca.pl.nomenclature_network.rst │ ├── besca.pl.riverplot_2categories.rst │ ├── besca.pl.stacked_split_violin.rst │ ├── besca.pl.top_genes_counts.rst │ ├── besca.pl.transcript_capture_efficiency.rst │ ├── besca.pl.update_qualitative_palette.rst │ ├── besca.pp.filter.rst │ ├── besca.pp.filter_gene_list.rst │ ├── besca.pp.frac_pos.rst │ ├── besca.pp.frac_reads.rst │ ├── besca.pp.fraction_counts.rst │ ├── besca.pp.mean_expr.rst │ ├── besca.pp.normalize_geometric.rst │ ├── besca.pp.top_counts_genes.rst │ ├── besca.pp.top_expressed_genes.rst │ ├── besca.st.additional_labeling.rst │ ├── besca.st.celltype_labeling.rst │ ├── besca.st.export_celltype.rst │ ├── besca.st.export_clustering.rst │ ├── besca.st.export_cp10k.rst │ ├── besca.st.export_metadata.rst │ ├── besca.st.export_rank.rst │ ├── besca.st.export_regressedOut.rst │ ├── besca.st.filtering_cells_genes_min.rst │ ├── besca.st.filtering_mito_genes_max.rst │ ├── besca.st.read_matrix.rst │ ├── besca.tl.annotate_cells_clustering.rst │ ├── besca.tl.bcor.batch_correct.rst │ ├── besca.tl.bcor.postprocess_mnnpy.rst │ ├── besca.tl.count_occurrence.rst │ ├── besca.tl.count_occurrence_subset.rst │ ├── besca.tl.count_occurrence_subset_conditions.rst │ ├── besca.tl.dge.get_de.rst │ ├── besca.tl.dge.perform_dge.rst │ ├── besca.tl.dge.plot_interactive_volcano.rst │ ├── besca.tl.rc.annotate_new_cellnames.rst │ └── besca.tl.rc.recluster.rst │ ├── besca_standard_pipeline.rst │ ├── conf.py │ ├── dge │ ├── besca.tl.dge.get_de.rst │ ├── besca.tl.dge.perform_dge.rst │ └── besca.tl.dge.plot_interactive_volcano.rst │ ├── export │ ├── besca.export.X_to_mtx.rst │ ├── besca.export.analysis_metadata.rst │ ├── besca.export.clustering.rst │ ├── besca.export.generate_gep.rst │ ├── besca.export.labeling.rst │ ├── besca.export.labeling_info.rst │ ├── besca.export.pseudobulk.rst │ ├── besca.export.ranked_genes.rst │ └── besca.export.raw_to_mtx.rst │ ├── helper_functions │ ├── besca.concate_adata.rst │ ├── besca.convert_ensembl_to_symbol.rst │ ├── besca.convert_symbol_to_ensembl.rst │ ├── besca.get_ameans.rst │ ├── besca.get_means.rst │ ├── besca.get_raw.rst │ └── besca.subset_adata.rst │ ├── import │ ├── besca.Import.add_cell_labeling.rst │ ├── besca.Import.assert_adata.rst │ └── besca.Import.read_mtx.rst │ ├── index.rst │ ├── plotting │ ├── besca.pl.box_per_ind.rst │ ├── besca.pl.celllabel_quant_boxplot.rst │ ├── besca.pl.celllabel_quant_stackedbar.rst │ ├── besca.pl.detected_genes.rst │ ├── besca.pl.dot_heatmap.rst │ ├── besca.pl.dot_heatmap_split.rst │ ├── besca.pl.dot_heatmap_split_greyscale.rst │ ├── besca.pl.dropouts.rst │ ├── besca.pl.gene_expr_split.rst │ ├── besca.pl.gene_expr_split_stacked.rst │ ├── besca.pl.kp_cells.rst │ ├── besca.pl.kp_counts.rst │ ├── besca.pl.kp_genes.rst │ ├── besca.pl.library_size.rst │ ├── besca.pl.librarysize_overview.rst │ ├── besca.pl.max_counts.rst │ ├── besca.pl.max_genes.rst │ ├── besca.pl.max_mito.rst │ ├── besca.pl.nomenclature_network.rst │ ├── besca.pl.riverplot_2categories.rst │ ├── besca.pl.stacked_split_violin.rst │ ├── besca.pl.top_genes_counts.rst │ ├── besca.pl.transcript_capture_efficiency.rst │ └── besca.pl.update_qualitative_palette.rst │ ├── preprocessing │ ├── besca.pp.filter.rst │ ├── besca.pp.filter_gene_list.rst │ ├── besca.pp.frac_pos.rst │ ├── besca.pp.frac_reads.rst │ ├── besca.pp.fraction_counts.rst │ ├── besca.pp.mean_expr.rst │ ├── besca.pp.normalize_geometric.rst │ ├── besca.pp.top_counts_genes.rst │ └── besca.pp.top_expressed_genes.rst │ ├── reclustering │ ├── besca.tl.rc.annotate_new_cellnames.rst │ └── besca.tl.rc.recluster.rst │ ├── scripts │ ├── environment_besca.yml │ ├── environment_besca_test.yml │ ├── example_structure_environment.yml │ ├── example_structure_environment_combined.yml │ ├── example_structure_environment_pip.yml │ ├── gallery_package_structure.txt │ ├── python_minimal_package_structure.txt │ └── python_package_structure.txt │ ├── sig │ ├── besca.tl.sig.add_anno.rst │ ├── besca.tl.sig.combined_signature_score.rst │ ├── besca.tl.sig.compute_signed_score.rst │ ├── besca.tl.sig.export_annotconfig.rst │ ├── besca.tl.sig.filter_siggenes.rst │ ├── besca.tl.sig.get_gems.rst │ ├── besca.tl.sig.get_similar_geneset.rst │ ├── besca.tl.sig.getset.rst │ ├── besca.tl.sig.insert_gems.rst │ ├── besca.tl.sig.make_anno.rst │ ├── besca.tl.sig.match_cluster.rst │ ├── besca.tl.sig.obtain_dblabel.rst │ ├── besca.tl.sig.read_GMT_sign.rst │ ├── besca.tl.sig.read_annotconfig.rst │ └── besca.tl.sig.score_mw.rst │ ├── standardworkflow │ ├── besca.st.additional_labeling.rst │ ├── besca.st.celltype_labeling.rst │ ├── besca.st.export_celltype.rst │ ├── besca.st.export_clustering.rst │ ├── besca.st.export_cp10k.rst │ ├── besca.st.export_metadata.rst │ ├── besca.st.export_rank.rst │ ├── besca.st.export_regressedOut.rst │ ├── besca.st.filtering_cells_genes_min.rst │ ├── besca.st.filtering_mito_genes_max.rst │ └── besca.st.read_matrix.rst │ ├── tools │ ├── besca.tl.annotate_cells_clustering.rst │ ├── besca.tl.count_occurrence.rst │ ├── besca.tl.count_occurrence_subset.rst │ └── besca.tl.count_occurrence_subset_conditions.rst │ ├── tutorials.rst │ ├── tutorials │ ├── adata_to_eset.ipynb │ ├── auto_annot_tutorial.ipynb │ ├── bescape_tutorial.ipynb │ ├── notebook1_data_processing_pbmc3k.ipynb │ ├── notebook2_celltype_annotation_pbmc3k.ipynb │ ├── notebook3_batch_correction.ipynb │ ├── scRNAseq_tutorial.ipynb │ └── scRNAseq_tutorial.umap_comparison_figure.png │ └── tutorials_html │ ├── adata_to_eset.html │ ├── bescape_tutorial.html │ └── generate_tutorialHtml.sh ├── environment.lock.yml ├── environment.yml ├── pytest.ini ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── data │ └── st │ │ └── wrapper_funcs │ │ └── labeling_test_CTL_OLD │ │ └── labelings │ │ └── celltype │ │ ├── WilxRank.gct │ │ ├── WilxRank.logFC.gct │ │ ├── WilxRank.pvalues.gct │ │ ├── average.gct │ │ ├── cell2labels.tsv │ │ ├── celltype_labelinfo.tsv │ │ ├── fract_pos.gct │ │ └── labelinfo.tsv └── test_wrapper_funcs.py ├── versioneer.py └── workbooks ├── README.md ├── Signature_exports.ipynb ├── celltype_annotation_besca.ipynb ├── minimal_notebook.ipynb └── standard_workflow_besca2.ipynb /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text eol=lf 3 | # Denote all files that are truly binary and should not be modified. 4 | *.png binary 5 | *.jpg binary 6 | *.h5ad binary 7 | besca/_version.py export-subst 8 | -------------------------------------------------------------------------------- /.github/workflows/build_docs.yml: -------------------------------------------------------------------------------- 1 | name: Build Sphinx Documentation 2 | on: 3 | workflow_dispatch: 4 | 5 | jobs: 6 | cached-job: 7 | name: Cached (${{ matrix.python-version }}, ${{ matrix.os }}) 8 | runs-on: ${{ matrix.os }} 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | os: ["ubuntu-latest"] 13 | python-version: ["3.8"] 14 | steps: 15 | - name: Check out the repo and use mamba 16 | uses: actions/checkout@v3 17 | - uses: conda-incubator/setup-miniconda@v2 18 | with: 19 | miniforge-variant: Mambaforge 20 | miniforge-version: latest 21 | activate-environment: besca_create_docu 22 | use-mamba: true 23 | - name: Cache conda packages 24 | id: cache-conda 25 | uses: actions/cache@v3 26 | env: 27 | cache-name: cache-conda-packages 28 | with: 29 | path: ~/ 30 | key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/docs/environment.lock.yml') }} 31 | restore-keys: | 32 | ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/docs/environment.lock.yml') }} 33 | 34 | - name: Get Date 35 | id: get-date 36 | run: echo "::set-output name=today::$(/bin/date -u '+%Y%m%d')" 37 | shell: bash 38 | 39 | - name: Cache conda env 40 | uses: actions/cache@v3 41 | with: 42 | path: ${{ env.CONDA }}/envs 43 | key: conda-${{ runner.os }}--${{ runner.arch }}--${{ steps.get-date.outputs.today }}-${{ hashFiles('**/docs/environment.lock.yml') }}-${{ env.CACHE_NUMBER }} 44 | env: 45 | # Increase this value to reset cache if etc/example-environment.yml has not changed 46 | CACHE_NUMBER: 0 47 | id: cache 48 | 49 | - name: Update environment 50 | run: | 51 | pwd 52 | ls 53 | cd docs 54 | head environment.lock.yml 55 | mamba env update -n besca_create_docu -f docs/environment.lock.yml 56 | if: steps.cache.outputs.cache-hit != 'true' 57 | 58 | - name: Run Sphinx 59 | run: | 60 | mamba init 61 | source ~/.bashrc 62 | mamba activate besca_create_docu 63 | cd docs 64 | make html 65 | -------------------------------------------------------------------------------- /.github/workflows/doc-tests.yml: -------------------------------------------------------------------------------- 1 | name: Run doctests 2 | 3 | on: [release] 4 | 5 | jobs: 6 | doc-test-job: 7 | name: Doctests 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Check out the repo 11 | uses: actions/checkout@v3 12 | - uses: conda-incubator/setup-miniconda@v2 13 | with: 14 | miniconda-version: "latest" 15 | - name: Bash 16 | shell: bash -l {0} 17 | run: | 18 | conda install -c conda-forge mamba 19 | mamba init 20 | source ~/.bashrc 21 | mamba env create -f environment.yml 22 | mamba activate besca_dev 23 | pip install pytest 24 | cd besca 25 | export PYTHONDONTWRITEBYTECODE=1 26 | pytest --doctest-modules -W ignore::PendingDeprecationWarning --durations=0 27 | cd .. 28 | pytest 29 | -------------------------------------------------------------------------------- /.github/workflows/pypi-auto-deploy.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI 2 | 3 | on: push 4 | 5 | jobs: 6 | build: 7 | name: Build distribution 📦 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v4 12 | - name: Set up Python 13 | uses: actions/setup-python@v5 14 | with: 15 | python-version: "3.8" 16 | - name: Install pypa/build 17 | run: >- 18 | python3 -m 19 | pip install 20 | setuptools wheel 21 | --user 22 | - name: Build a binary wheel and a source tarball 23 | run: python3 setup.py sdist bdist_wheel 24 | - name: Store the distribution packages 25 | uses: actions/upload-artifact@v4 26 | with: 27 | name: python-package-distributions 28 | path: dist/ 29 | 30 | publish-to-pypi: 31 | name: >- 32 | Publish Python 🐍 distribution 📦 to PyPI 33 | if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes 34 | needs: 35 | - build 36 | runs-on: ubuntu-latest 37 | environment: 38 | name: publishing 39 | url: https://pypi.org/p/besca 40 | permissions: 41 | id-token: write # IMPORTANT: mandatory for trusted publishing 42 | 43 | steps: 44 | - name: Download all the dists 45 | uses: actions/download-artifact@v4 46 | with: 47 | name: python-package-distributions 48 | path: dist/ 49 | - name: Publish distribution 📦 to PyPI 50 | uses: pypa/gh-action-pypi-publish@release/v1 51 | 52 | github-release: 53 | name: >- 54 | Sign the Python 🐍 distribution 📦 with Sigstore 55 | and upload them to GitHub Release 56 | needs: 57 | - publish-to-pypi 58 | runs-on: ubuntu-latest 59 | 60 | permissions: 61 | contents: write # IMPORTANT: mandatory for making GitHub Releases 62 | id-token: write # IMPORTANT: mandatory for sigstore 63 | 64 | steps: 65 | - name: Download all the dists 66 | uses: actions/download-artifact@v4 67 | with: 68 | name: python-package-distributions 69 | path: dist/ 70 | - name: Sign the dists with Sigstore 71 | uses: sigstore/gh-action-sigstore-python@v2.1.1 72 | with: 73 | inputs: >- 74 | ./dist/*.tar.gz 75 | ./dist/*.whl 76 | - name: Create GitHub Release 77 | env: 78 | GITHUB_TOKEN: ${{ github.token }} 79 | run: >- 80 | gh release create 81 | '${{ github.ref_name }}' 82 | --repo '${{ github.repository }}' 83 | --notes "" 84 | - name: Upload artifact signatures to GitHub Release 85 | env: 86 | GITHUB_TOKEN: ${{ github.token }} 87 | # Upload to GitHub Release using the `gh` CLI. 88 | # `dist/` contains the built packages, and the 89 | # sigstore-produced signatures and certificates. 90 | run: >- 91 | gh release upload 92 | '${{ github.ref_name }}' dist/** 93 | --repo '${{ github.repository }}' 94 | 95 | # publish-to-testpypi: 96 | # name: Publish Python 🐍 distribution 📦 to TestPyPI 97 | # needs: 98 | # - build 99 | # runs-on: ubuntu-latest 100 | 101 | # environment: 102 | # name: publishing 103 | # url: https://test.pypi.org/p/besca 104 | 105 | # permissions: 106 | # id-token: write # IMPORTANT: mandatory for trusted publishing 107 | 108 | # steps: 109 | # - name: Download all the dists 110 | # uses: actions/download-artifact@v4 111 | # with: 112 | # name: python-package-distributions 113 | # path: dist/ 114 | # - name: Publish distribution 📦 to TestPyPI 115 | # uses: pypa/gh-action-pypi-publish@release/v1 116 | # with: 117 | # repository-url: https://test.pypi.org/legacy/ 118 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | besca.egg-info/ 3 | .ipynb_checkpoints 4 | cache/ 5 | workbooks/test.ipynb 6 | workbooks/analyzed 7 | workbooks/standard_workflow_besca2-requirements.txt 8 | workbooks/*nbconvert.ipynb 9 | workbooks/*requirements.txt 10 | figures_testing/ 11 | *~ 12 | *.swp 13 | 14 | build 15 | pip 16 | *.pyc 17 | 18 | README.private.md 19 | *_venv 20 | *.h5ad 21 | *.html 22 | besca_test 23 | *.gmtx 24 | dist 25 | 26 | .DS_Store 27 | 28 | # Unit test / coverage reports 29 | htmlcov/ 30 | .tox/ 31 | .coverage 32 | .coverage.* 33 | .cache 34 | junit.xml 35 | nosetests.xml 36 | coverage.xml 37 | *,cover 38 | 39 | # Sphinx documentation 40 | docs/_build/ 41 | 42 | .vscode -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include versioneer.py 2 | include besca/_version.py 3 | -------------------------------------------------------------------------------- /Rlibs.R: -------------------------------------------------------------------------------- 1 | args = commandArgs(trailingOnly=TRUE) 2 | libloc <- args[1] 3 | reposloc<-"https://cloud.r-project.org/" #consider args[2] 4 | dir.create(libloc) 5 | 6 | .libPaths(libloc) 7 | if (!require("devtools")) install.packages("devtools", lib = libloc, repos = reposloc) 8 | if (!require("remotes")) install.packages("remotes", lib = libloc, repos = reposloc) 9 | if (!require("withr")) install.packages("withr", lib = libloc, repos = reposloc) 10 | if (!require("vctrs")) install.packages("vctrs", lib = libloc, repos = reposloc) 11 | if (!require("patchwork")) with_libpaths(new = libloc, devtools::install_github("thomasp85/patchwork")) 12 | if (!require("dsb")) install.packages("dsb", lib = libloc, repos = reposloc) #to verify if same 13 | if (!require("tidyverse")) install.packages("tidyverse", lib = libloc, repos = reposloc) 14 | if (!require("magrittr")) install.packages("magrittr", lib = libloc, repos = reposloc) 15 | if (!require("data.table")) install.packages("data.table", lib = libloc, repos = reposloc) 16 | if (!require("Matrix")) install.packages("Matrix", lib = libloc, repos = reposloc) 17 | if (!require("ggplot2")) install.packages("ggplot2", lib = libloc, repos = reposloc) 18 | if (!require("readr")) install.packages("readr", lib = libloc, repos = reposloc) 19 | if (!require("Seurat")) install.packages("Seurat", lib = libloc, repos = reposloc) 20 | if (!require("intrinsicDimension")) install.packages("intrinsicDimension", lib = libloc, repos = reposloc) 21 | if (!require("scater")) install.packages("scater", lib = libloc, repos = reposloc) 22 | if (!requireNamespace("BiocManager", quietly = TRUE)) 23 | install.packages("BiocManager",lib = libloc, repos = reposloc) 24 | if (!require("DropletUtils")) BiocManager::install("DropletUtils", lib = libloc) 25 | if (!require("scry")) BiocManager::install("scry", lib = libloc) #requires R >=4.0.3 26 | -------------------------------------------------------------------------------- /besca/Import/__init__.py: -------------------------------------------------------------------------------- 1 | from besca.Import._read import read_mtx, assert_adata 2 | from besca.Import._labelings import add_cell_labeling 3 | 4 | __all__ = ["read_mtx", "add_cell_labeling", "assert_adata"] 5 | -------------------------------------------------------------------------------- /besca/Import/_labelings.py: -------------------------------------------------------------------------------- 1 | from pandas import read_csv 2 | from os.path import join as pathjoin 3 | 4 | 5 | def add_cell_labeling(adata, filepath, label="celltype"): 6 | """add a labeling written out in the FAIR formating to adata.obs 7 | 8 | A laveling contained in the FAIR compliant cell2labels.tsv is added to adata.obs. The string supplied 9 | in the parameter label is used to name the column in adata.obs that contains the imported labeling. 10 | 11 | All cells that are not labeled in the cell2labels.tsv will be annotated with 'not labeled'. 12 | 13 | parameters 14 | ---------- 15 | adata: `AnnData` 16 | the AnnData object whose obs should be updated 17 | filepath: `str` 18 | filepath to the cell2labels.tsv that is to be appended to adata.obs 19 | label: `str` | default = 'celltype' 20 | string indicating the label that is to be added to the annotation that is being imported 21 | 22 | returns 23 | ------- 24 | None 25 | updates the supplied AnnData object 26 | 27 | """ 28 | 29 | labeling = read_csv(pathjoin(filepath, "cell2labels.tsv"), sep="\t", index_col=0) 30 | labeling.rename(columns={"LABEL": label}, inplace=True) 31 | 32 | adata.obs[label] = "not labeled" 33 | adata.obs.update(labeling) 34 | 35 | return None 36 | -------------------------------------------------------------------------------- /besca/README.md: -------------------------------------------------------------------------------- 1 | # Overview of Abbreviations 2 | 3 | pp = preprocessing 4 | tl = tools 5 | pl = plotting 6 | st = standardpipeline 7 | Import = importing functions (note that we did not use `import` as module 8 | because it is a keyword) 9 | export = exporting functions 10 | -------------------------------------------------------------------------------- /besca/Rlibs.R: -------------------------------------------------------------------------------- 1 | args = commandArgs(trailingOnly=TRUE) 2 | libloc <- args[1] 3 | 4 | dir.create(libloc) 5 | 6 | .libPaths(libloc) 7 | if (!require("devtools")) install.packages("devtools", lib = libloc) 8 | if (!require("withr")) install.packages("withr", lib = libloc) 9 | if (!require("vctrs")) install.packages("vctrs", lib = libloc) 10 | if (!require("patchwork")) install.packages("patchwork", lib = libloc) 11 | if (!require("dsb")) with_libpaths(new = libloc, install_github("MattPM/dsb")) 12 | if (!require("tidyverse")) install.packages("tidyverse", lib = libloc) 13 | if (!require("magrittr")) install.packages("magrittr", lib = libloc) 14 | if (!require("data.table")) install.packages("data.table", lib = libloc) 15 | if (!require("Matrix")) install.packages("Matrix", lib = libloc) 16 | if (!require("DropletUtils")) with_libpaths(new = libloc, install_github("MarioniLab/DropletUtils")) 17 | if (!require("BiocManager")) install.packages("BiocManager", lib = libloc) 18 | if (!require("scater")) BiocManager::install("scater", lib = libloc) 19 | 20 | -------------------------------------------------------------------------------- /besca/__init__.py: -------------------------------------------------------------------------------- 1 | from besca import pl 2 | from besca import tl 3 | from besca import pp 4 | from besca import st 5 | from besca import datasets 6 | from besca import export 7 | from besca import Import 8 | 9 | 10 | from besca._helper import ( 11 | subset_adata, 12 | convert_ensembl_to_symbol, 13 | convert_symbol_to_ensembl, 14 | get_raw, 15 | get_ameans, 16 | get_means, 17 | concate_adata, 18 | get_singlegenedf, 19 | print_software_versions 20 | ) 21 | 22 | from besca._notebook import ( 23 | save_notebook, 24 | save_notebook_return_path, 25 | convert_notebook_to_HTML 26 | ) 27 | 28 | __all__ = [ 29 | "pl", 30 | "tl", 31 | "pp", 32 | "st", 33 | "datasets", 34 | "export", 35 | "subset_adata", 36 | "import", 37 | "convert_ensembl_to_symbol", 38 | "convert_symbol_to_ensembl", 39 | "get_raw", 40 | "get_ameans", 41 | "get_means", 42 | "concate_adata", 43 | "get_singlegenedf", 44 | "print_software_versions", 45 | "convert_notebook_to_HTML" 46 | ] 47 | 48 | from besca._version import get_versions 49 | 50 | __version__ = get_versions()["version"] 51 | del get_versions 52 | 53 | from . import _version 54 | __version__ = _version.get_versions()['version'] 55 | -------------------------------------------------------------------------------- /besca/_logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import datetime 4 | 5 | 6 | def initialize_logger(log_file): 7 | logger = logging.getLogger() 8 | logger.setLevel(logging.INFO) 9 | 10 | # create standard log file handler and set level to info 11 | handler = logging.FileHandler(log_file, "a", encoding=None, delay="true") 12 | handler.setLevel(logging.INFO) 13 | formatter = logging.Formatter("%(message)s") 14 | handler.setFormatter(formatter) 15 | logger.addHandler(handler) 16 | 17 | # create console handler and set level to info 18 | handler = logging.StreamHandler() 19 | handler.setLevel(logging.INFO) 20 | formatter = logging.Formatter("LOG MESSAGE: %(message)s") 21 | handler.setFormatter(formatter) 22 | logger.addHandler(handler) 23 | 24 | 25 | def initialize_log_file( 26 | analysis_name, 27 | root_path, 28 | species, 29 | batch_to_correct, 30 | standard_min_genes, 31 | standard_min_cells, 32 | standard_min_counts, 33 | standard_n_genes, 34 | standard_percent_mito, 35 | standard_max_counts, 36 | version, 37 | ): 38 | 39 | logging.info("Standard Pipeline Version " + version + " used") 40 | logging.info(datetime.datetime.today().strftime("%Y-%m-%d")) 41 | logging.info( 42 | "Analysis '" + analysis_name + "' on data located in'" + root_path + "'" 43 | ) 44 | logging.info("species: " + species) 45 | logging.info("Batch effect to correct: " + batch_to_correct) 46 | logging.info("Parameters:") 47 | logging.info("\tstandard_min_genes = " + str(standard_min_genes)) 48 | logging.info("\tstandard_min_cells = " + str(standard_min_cells)) 49 | logging.info("\tstandard_min_counts = " + str(standard_min_counts)) 50 | logging.info("\tstandard_n_genes = " + str(standard_n_genes)) 51 | logging.info("\tstandard_max_counts = " + str(standard_max_counts)) 52 | logging.info("\tstandard_percent_mito = " + str(standard_percent_mito)) 53 | -------------------------------------------------------------------------------- /besca/_notebook.py: -------------------------------------------------------------------------------- 1 | """Helper functions for working with notebooks""" 2 | 3 | from IPython.display import display, Javascript 4 | from glob import glob 5 | import os 6 | import subprocess 7 | import logging 8 | 9 | def save_notebook(): 10 | """Use JavaScript to simulate saving notebook, which makes sure 11 | that the current notebook is the last one in the current directory 12 | that is modified""" 13 | 14 | display(Javascript( 15 | "document.body.dispatchEvent(" 16 | "new KeyboardEvent('keydown', {key:'s', keyCode: 83, ctrlKey: true}" 17 | "))" 18 | )) 19 | 20 | 21 | def save_notebook_return_path(): 22 | """Save the current notebook and return its full path""" 23 | 24 | save_notebook() 25 | ipynbs = glob("*.ipynb") 26 | curr_dir = os.getcwd() 27 | max_file = None 28 | max_mtime = 0 29 | for fname in ipynbs: 30 | full_path = os.path.join(curr_dir, fname) 31 | mtime = os.stat(full_path).st_mtime 32 | if mtime > max_mtime: 33 | max_mtime = mtime 34 | max_file = full_path 35 | return max_file 36 | 37 | 38 | def convert_notebook_to_HTML(): 39 | """Convert the current notebook to HTML""" 40 | 41 | current = save_notebook_return_path() 42 | if current is not None: 43 | res = subprocess.run(['jupyter', 'nbconvert', 44 | '--to', 'html', current], 45 | shell=False, capture_output=True) 46 | else: 47 | res = None 48 | logging.warning('No notebook is found - no conversion is done') 49 | 50 | return(res) 51 | -------------------------------------------------------------------------------- /besca/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from besca.datasets._datasets import ( 2 | Baron2016_processed, 3 | Baron2016_raw, 4 | Granja2019_citeSeq, 5 | Granja2019_processed, 6 | Granja2019_raw, 7 | Haber2017_processed, 8 | Haber2017_raw, 9 | Kotliarov2020_processed, 10 | Kotliarov2020_raw, 11 | Kotliarov2020_citeSeq, 12 | Lee2020_processed, 13 | Lee2020_raw, 14 | Martin2019_processed, 15 | Martin2019_raw, 16 | Peng2019_processed, 17 | Peng2019_raw, 18 | Segerstolpe2016_processed, 19 | Smillie2019_processed, 20 | Smillie2019_raw, 21 | crispr_10x_filtered, 22 | crispr_10x_unfiltered, 23 | load_immune_signatures, 24 | pbmc3k_filtered, 25 | pbmc3k_processed, 26 | pbmc3k_raw, 27 | ) 28 | from besca.datasets._mito import get_mito_genes 29 | from besca.datasets._helper import ( 30 | simulated_pbmc3k_raw, 31 | simulated_Kotliarov2020_processed, 32 | simulated_pbmc3k_processed, 33 | simulated_Haber2017_processed, 34 | simulated_Baron2016_processed 35 | ) 36 | 37 | __all__ = [ 38 | "pbmc3k_raw", 39 | "pbmc3k_filtered", 40 | "pbmc3k_processed", 41 | "Smillie2019_raw", 42 | "Smillie2019_processed", 43 | "crispr_10x_filtered", 44 | "crispr_10x_unfiltered", 45 | "Martin2019_raw", 46 | "Martin2019_processed", 47 | "Haber2017_raw", 48 | "Haber2017_processed", 49 | "Granja2019_citeSeq", 50 | "Granja2019_processed", 51 | "Granja2019_raw", 52 | "get_mito_genes", 53 | "Kotliarov2020_raw", 54 | "Kotliarov2020_citeSeq", 55 | "Kotliarov2020_processed", 56 | "Baron2016_raw", 57 | "Baron2016_processed", 58 | "Lee2020_raw", 59 | "Lee2020_processed", 60 | "Peng2019_raw", 61 | "Peng2019_processed", 62 | "Segerstolpe2016_processed", 63 | "load_immune_signatures", 64 | "simulated_pbmc3k_raw", 65 | "simulated_Kotliarov2020_processed", 66 | "simulated_pbmc3k_processed", 67 | "simulated_Haber2017_processed", 68 | "simulated_Baron2016_processed" 69 | ] 70 | -------------------------------------------------------------------------------- /besca/datasets/_mito.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from pandas import read_csv 4 | 5 | 6 | def get_mito_genes(species: str = "human", annotation_type: str = "ENSEMBL"): 7 | """Returns the array of genes annotated as mitochondrial in species. 8 | Parameters 9 | ---------- 10 | species:`str`| default = human ; species of the datasets. 11 | Accepted: cyno, cynomolgus, human, mouse, rat, pig 12 | annotation_type:`str`| default = ENSEMBL. ENSEMBL or SYMBOL accepted 13 | 14 | Returns 15 | ------- 16 | mito_genes : array of str 17 | 18 | Example 19 | ------- 20 | >>> pytest.skip('Test will be skipped, because slow downloading of file in the github action job can occur a timeout') 21 | >>> import besca as bc 22 | >>> mito_genes = bc.datasets.get_mito_genes('human') 23 | >>> mito_genes 24 | ['ENSG00000198695', 'ENSG00000198712', 'ENSG00000198727', 'ENSG00000198763', 'ENSG00000198786', 'ENSG00000198804', 'ENSG00000198840', 'ENSG00000198886', 'ENSG00000198888', 'ENSG00000198899', 'ENSG00000198938', 'ENSG00000212907', 'ENSG00000228253'] 25 | 26 | """ 27 | valid = {"cyno", "cynomolgus", "human", "mouse", "rat", "pig"} 28 | if species not in valid: 29 | raise ValueError("species must be one of %s." % valid) 30 | ref_mito_file = os.path.dirname(__file__) + "/mito_files/" + species + ".mito.tsv" 31 | # ENS_GENE_ID GENE_SYMBOL (2 cols) 32 | if annotation_type == "SYMBOL": 33 | mito_list = list(read_csv(ref_mito_file, header=None, sep="\t")[1]) 34 | elif annotation_type == "ENSEMBL": 35 | mito_list = list(read_csv(ref_mito_file, header=None, sep="\t")[0]) 36 | else: 37 | raise ValueError("annotation_type must be either SYMBOL or ENSEMBL") 38 | return mito_list 39 | -------------------------------------------------------------------------------- /besca/datasets/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bedapub/besca/27c36b5e7d22790f429056cb8b1c2539469bc50a/besca/datasets/data/__init__.py -------------------------------------------------------------------------------- /besca/datasets/genesets/CellNames_scseqCMs6_config.mouse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bedapub/besca/27c36b5e7d22790f429056cb8b1c2539469bc50a/besca/datasets/genesets/CellNames_scseqCMs6_config.mouse.png -------------------------------------------------------------------------------- /besca/datasets/genesets/CellNames_scseqCMs6_config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bedapub/besca/27c36b5e7d22790f429056cb8b1c2539469bc50a/besca/datasets/genesets/CellNames_scseqCMs6_config.png -------------------------------------------------------------------------------- /besca/datasets/genesets/CryptScoreParikh2019.gmt: -------------------------------------------------------------------------------- 1 | CryptScoreParikh2019 Crypt score as defined in [Parikh et al. Nature. 2019]:From stem cells (low score) to undifferentiated cells to differentiated top crypt cells (high score) SELENOP CEACAM7 PLAC8 CEACAM1 TSPAN1 CEACAM5 CEACAM6 IFI27 DHRS9 KRT20 RHOC CD177 PKIB HPGD LYPD8 2 | -------------------------------------------------------------------------------- /besca/datasets/genesets/Immune.gmt: -------------------------------------------------------------------------------- 1 | lymphocyte PTPRC 2 | myeloid S100A8 S100A9 CST3 3 | Bcell CD19 CD79A MS4A1 4 | Tcells CD3E CD3G CD3D 5 | CD4 CD4 6 | CD8 CD8A CD8B 7 | NKcell NKG7 GNLY NCAM1 8 | monocyte CST3 CSF1R ITGAM CD14 FCGR3A FCGR3B 9 | macrophage CD14 IL1B LYZ CD163 ITGAX CD68 CSF1R FCGR3A 10 | -------------------------------------------------------------------------------- /besca/datasets/genesets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bedapub/besca/27c36b5e7d22790f429056cb8b1c2539469bc50a/besca/datasets/genesets/__init__.py -------------------------------------------------------------------------------- /besca/datasets/homologs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bedapub/besca/27c36b5e7d22790f429056cb8b1c2539469bc50a/besca/datasets/homologs/__init__.py -------------------------------------------------------------------------------- /besca/datasets/mito_files/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bedapub/besca/27c36b5e7d22790f429056cb8b1c2539469bc50a/besca/datasets/mito_files/__init__.py -------------------------------------------------------------------------------- /besca/datasets/mito_files/cyno.mito.tsv: -------------------------------------------------------------------------------- 1 | ENSMFAG00000046415 ND1 2 | ENSMFAG00000046416 ND2 3 | ENSMFAG00000046417 COX1 4 | ENSMFAG00000046418 COX2 5 | ENSMFAG00000046419 ATP8 6 | ENSMFAG00000046420 ATP6 7 | ENSMFAG00000046421 COX3 8 | ENSMFAG00000046422 ND3 9 | ENSMFAG00000046423 ND4L 10 | ENSMFAG00000046424 ND4 11 | ENSMFAG00000046425 ND5 12 | ENSMFAG00000046426 ND6 13 | ENSMFAG00000046427 CYTB 14 | -------------------------------------------------------------------------------- /besca/datasets/mito_files/cynomolgus.mito.tsv: -------------------------------------------------------------------------------- 1 | ENSMFAG00000046415 ND1 2 | ENSMFAG00000046416 ND2 3 | ENSMFAG00000046417 COX1 4 | ENSMFAG00000046418 COX2 5 | ENSMFAG00000046419 ATP8 6 | ENSMFAG00000046420 ATP6 7 | ENSMFAG00000046421 COX3 8 | ENSMFAG00000046422 ND3 9 | ENSMFAG00000046423 ND4L 10 | ENSMFAG00000046424 ND4 11 | ENSMFAG00000046425 ND5 12 | ENSMFAG00000046426 ND6 13 | ENSMFAG00000046427 CYTB 14 | -------------------------------------------------------------------------------- /besca/datasets/mito_files/human.mito.tsv: -------------------------------------------------------------------------------- 1 | ENSG00000198695 MT-ND6 2 | ENSG00000198712 MT-CO2 3 | ENSG00000198727 MT-CYB 4 | ENSG00000198763 MT-ND2 5 | ENSG00000198786 MT-ND5 6 | ENSG00000198804 MT-CO1 7 | ENSG00000198840 MT-ND3 8 | ENSG00000198886 MT-ND4 9 | ENSG00000198888 MT-ND1 10 | ENSG00000198899 MT-ATP6 11 | ENSG00000198938 MT-CO3 12 | ENSG00000212907 MT-ND4L 13 | ENSG00000228253 MT-ATP8 14 | -------------------------------------------------------------------------------- /besca/datasets/mito_files/mouse.mito.ensembl.list: -------------------------------------------------------------------------------- 1 | ENSMUSG00000102011 2 | ENSMUSG00000100246 3 | ENSMUSG00000100533 4 | ENSMUSG00000096178 5 | ENSMUSG00000100964 6 | ENSMUSG00000099619 7 | ENSMUSG00000099399 8 | ENSMUSG00000095134 9 | ENSMUSG00000095366 10 | ENSMUSG00000096768 11 | ENSMUSG00000099871 12 | ENSMUSG00000096850 13 | ENSMUSG00000064336 14 | ENSMUSG00000064337 15 | ENSMUSG00000064338 16 | ENSMUSG00000064339 17 | ENSMUSG00000064340 18 | ENSMUSG00000064341 19 | ENSMUSG00000064342 20 | ENSMUSG00000064343 21 | ENSMUSG00000064344 22 | ENSMUSG00000064345 23 | ENSMUSG00000064346 24 | ENSMUSG00000064347 25 | ENSMUSG00000064348 26 | ENSMUSG00000064349 27 | ENSMUSG00000064350 28 | ENSMUSG00000064351 29 | ENSMUSG00000064352 30 | ENSMUSG00000064353 31 | ENSMUSG00000064354 32 | ENSMUSG00000064355 33 | ENSMUSG00000064356 34 | ENSMUSG00000064357 35 | ENSMUSG00000064358 36 | ENSMUSG00000064359 37 | ENSMUSG00000064360 38 | ENSMUSG00000064361 39 | ENSMUSG00000065947 40 | ENSMUSG00000064363 41 | ENSMUSG00000064364 42 | ENSMUSG00000064365 43 | ENSMUSG00000064366 44 | ENSMUSG00000064367 45 | ENSMUSG00000064368 46 | ENSMUSG00000064369 47 | ENSMUSG00000064370 48 | ENSMUSG00000064371 49 | ENSMUSG00000064372 50 | -------------------------------------------------------------------------------- /besca/datasets/mito_files/mouse.mito.tsv: -------------------------------------------------------------------------------- 1 | ENSMUSG00000064341 mt-Nd1 2 | ENSMUSG00000064345 mt-Nd2 3 | ENSMUSG00000064351 mt-Co1 4 | ENSMUSG00000064354 mt-Co2 5 | ENSMUSG00000064356 mt-Atp8 6 | ENSMUSG00000064357 mt-Atp6 7 | ENSMUSG00000064358 mt-Co3 8 | ENSMUSG00000064360 mt-Nd3 9 | ENSMUSG00000065947 mt-Nd4l 10 | ENSMUSG00000064363 mt-Nd4 11 | ENSMUSG00000064367 mt-Nd5 12 | ENSMUSG00000064368 mt-Nd6 13 | ENSMUSG00000064370 mt-Cytb 14 | -------------------------------------------------------------------------------- /besca/datasets/mito_files/pig.mito.tsv: -------------------------------------------------------------------------------- 1 | ENSSSCG00000018080 ATP8 2 | ENSSSCG00000018075 COX1 3 | ENSSSCG00000018078 COX2 4 | ENSSSCG00000018082 COX3 5 | ENSSSCG00000018094 CYTB 6 | ENSSSCG00000018069 ND2 7 | ENSSSCG00000018084 ND3 8 | ENSSSCG00000018086 ND4L 9 | ENSSSCG00000018091 ND5 10 | ENSSSCG00000018092 ND6 11 | -------------------------------------------------------------------------------- /besca/datasets/mito_files/rat.mito.tsv: -------------------------------------------------------------------------------- 1 | ENSRNOG00000029042 Mt-nd6 2 | ENSRNOG00000029707 Mt-nd4 3 | ENSRNOG00000029971 Mt-nd5 4 | ENSRNOG00000030371 Mt-co2 5 | ENSRNOG00000030644 Mt-nd1 6 | ENSRNOG00000030700 Mt-cox3 7 | ENSRNOG00000031033 Mt-nd2 8 | ENSRNOG00000031053 Mt-nd4l 9 | ENSRNOG00000031766 Mt-cyb 10 | ENSRNOG00000031979 Mt-atp6 11 | ENSRNOG00000033299 Mt-atp8 12 | ENSRNOG00000033615 Mt-nd3 13 | ENSRNOG00000034234 Mt-co1 14 | -------------------------------------------------------------------------------- /besca/datasets/mito_files/test.mito.tsv: -------------------------------------------------------------------------------- 1 | ENSMUSG00000064341 Gene_1 2 | ENSMUSG00000064345 Gene_2 3 | ENSMUSG00000064351 Gene_3 4 | ENSMUSG00000064354 Gene_4 5 | ENSMUSG00000064356 Gene_5 6 | ENSMUSG00000064357 Gene_6 7 | -------------------------------------------------------------------------------- /besca/datasets/nomenclature/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bedapub/besca/27c36b5e7d22790f429056cb8b1c2539469bc50a/besca/datasets/nomenclature/__init__.py -------------------------------------------------------------------------------- /besca/examples/example_function.py: -------------------------------------------------------------------------------- 1 | def function_name( 2 | param1="default_value1", param2="default_value2", param3="default_value3" 3 | ): 4 | """one-line function description that shows up in summaries. 5 | 6 | more extensive multi line function description explaining exactly what the function 7 | does and is intended for examples for code execution of the function can also be 8 | provided here 9 | 10 | Parameters 11 | ---------- 12 | param1: `type` | default = default_value1 13 | brief description of what param1 controls and to what it should be set 14 | param2: `type` | default = default_value2 15 | brief description of what param1 controls and to what it should be set 16 | param3: `type` | default = default_value3 17 | brief description of what param1 controls and to what it should be set 18 | 19 | Returns 20 | ------- 21 | Type 22 | Information on what the function returns 23 | 24 | Example 25 | ------- 26 | 27 | >>> #insert example code here 28 | >>> 1 + 1 29 | 2 30 | 31 | # this code is only displayed not executed 32 | 33 | """ 34 | 35 | # function body 36 | # do something here 37 | -------------------------------------------------------------------------------- /besca/examples/example_gallery.py: -------------------------------------------------------------------------------- 1 | """ 2 | Title of example 3 | ================ 4 | 5 | Here goes your description of the example 6 | """ 7 | 8 | # start writing some code 9 | 10 | ############################################################################### 11 | # sub heading 12 | # ----------- 13 | # 14 | # Here you add additional text you want to include 15 | # there is no need to finish this section with any specific syntax 16 | # just leave a line empty after this comment 17 | 18 | # more code 19 | 20 | ############################################################################### 21 | # this is an additional text comment without a heading 22 | # this is more of that comment 23 | 24 | # more code 25 | -------------------------------------------------------------------------------- /besca/examples/example_include_plot_in_documentation.py: -------------------------------------------------------------------------------- 1 | """ 2 | ... 3 | 4 | Example 5 | ------- 6 | 7 | Description of your example. 8 | 9 | >>> # this is code that will be displayed but not executed 10 | >>> # it should be a duplicate of the code used to generate the plot 11 | >>> # people will not be able to see how you generated the plot 12 | >>> ## plotting code 1 13 | >>> ## plotting code 2 14 | 15 | .. plot:: 16 | 17 | >>> # this is code that will be displayed but not executed 18 | >>> # it should be a duplicate of the code used to generate the plot 19 | >>> # people will not be able to see how you generated the plot 20 | >>> ## plotting code 1 21 | >>> ## plotting code 2 22 | 23 | """ 24 | -------------------------------------------------------------------------------- /besca/examples/gallery_examples/README.txt: -------------------------------------------------------------------------------- 1 | Code Examples 2 | ============= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 -------------------------------------------------------------------------------- /besca/examples/gallery_examples/plotting/README.txt: -------------------------------------------------------------------------------- 1 | Plotting examples 2 | ----------------- 3 | 4 | This is a gallery containing some plotting examples. 5 | 6 | 7 | -------------------------------------------------------------------------------- /besca/examples/gallery_examples/plotting/plot_celltype_quantification.py: -------------------------------------------------------------------------------- 1 | """ 2 | Visualize cell fractions 3 | ======================== 4 | 5 | This example demonstrates how to generate celltype quantification plots. These types of plots 6 | can be used to visually represent the number of cells that belong to a certain subset or condition. 7 | 8 | """ 9 | 10 | import besca as bc 11 | import pytest 12 | pytest.skip('Test is only for here as example and should not be executed') 13 | 14 | # import dataset to workwith 15 | adata = bc.datasets.Peng2019_processed() 16 | 17 | ##################### 18 | # quantify specific celllabels as a stacked barplot 19 | 20 | bc.pl.celllabel_quant_stackedbar( 21 | adata, count_variable="Cell_type", subset_variable="Patient" 22 | ) 23 | 24 | 25 | ##################### 26 | # quantify number of cells belong to each condition in a specific subset 27 | # 28 | # here each dot represents one Patient, the boxplots are grouped according to tissue type (Normal or Tumoral) 29 | bc.pl.celllabel_quant_boxplot( 30 | adata, 31 | count_variable="Cell_type", 32 | subset_variable="Patient", 33 | condition_identifier="Type", 34 | plot_percentage=True, 35 | ) 36 | 37 | ##################### 38 | # here you can also choose to plot total counts instead of percentages 39 | bc.pl.celllabel_quant_boxplot( 40 | adata, 41 | count_variable="Cell_type", 42 | subset_variable="Patient", 43 | condition_identifier="Type", 44 | plot_percentage=False, 45 | ) 46 | -------------------------------------------------------------------------------- /besca/examples/gallery_examples/plotting/plot_filtering.py: -------------------------------------------------------------------------------- 1 | """ 2 | filtering functions 3 | =================== 4 | 5 | This example shows you how to generate plots to visualize the chosen filter threshold. 6 | This way you can easily check (visually) if your chosen threshold is a good one. 7 | 8 | """ 9 | 10 | import besca as bc 11 | import matplotlib.pyplot as plt 12 | import pytest 13 | pytest.skip('Test is only for here as example and should not be executed') 14 | adata = bc.datasets.pbmc3k_raw() 15 | 16 | # define thresholds 17 | min_genes = 600 18 | min_cells = 2 19 | min_UMI = 600 20 | max_UMI = 6500 21 | max_mito = 0.05 22 | max_genes = 1900 23 | 24 | # Visualize filtering thresholds 25 | fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(ncols=3, nrows=2) 26 | fig.set_figwidth(15) 27 | fig.set_figheight(8) 28 | fig.tight_layout(pad=4.5) 29 | 30 | bc.pl.kp_genes(adata, min_genes=min_genes, ax=ax1) 31 | bc.pl.kp_cells(adata, min_cells=min_cells, ax=ax2) 32 | bc.pl.kp_counts(adata, min_counts=min_UMI, ax=ax3) 33 | bc.pl.max_counts(adata, max_counts=max_UMI, ax=ax4) 34 | bc.pl.max_mito( 35 | adata, max_mito=max_mito, annotation_type="SYMBOL", species="human", ax=ax5 36 | ) 37 | bc.pl.max_genes(adata, max_genes=max_genes) 38 | -------------------------------------------------------------------------------- /besca/examples/gallery_examples/plotting/plot_qc.py: -------------------------------------------------------------------------------- 1 | """ 2 | quality control plots 3 | ===================== 4 | 5 | This example shows you the inbuilt quality control plots from besca. 6 | 7 | """ 8 | 9 | # import libraries 10 | import besca as bc 11 | import matplotlib.pyplot as plt 12 | import pytest 13 | pytest.skip('Test is only for here as example and should not be executed') 14 | ############################################################################### 15 | # 16 | # Before beginning any analysis it is useful to take a detailled look at your dataset 17 | # to get an understanding for its characteristics. 18 | 19 | # import data 20 | adata = bc.datasets.pbmc3k_raw() 21 | 22 | ############################################################################### 23 | # transcript capture efficiency 24 | # ----------------------------- 25 | # 26 | # Plotting the transcript capture efficiency will give you an overview of the expression 27 | # of genes within cells relative to the total UMI counts. 28 | 29 | # transcript capture efficiency 30 | fig, ax = plt.subplots(1) 31 | fig.set_figwidth(8) 32 | fig.set_figheight(5) 33 | fig.tight_layout() 34 | 35 | bc.pl.transcript_capture_efficiency(adata, ax=ax) 36 | 37 | ############################################################################### 38 | # overview of library size unprocessed 39 | # ------------------------------------ 40 | # 41 | # This gives you an overview of the read distribution per cell. High quality cells 42 | # will have a larger number of reads per cell and this is a parameter you can use to 43 | # filter out low quality cells. The number of reads you would expect per cell is strongly 44 | # dependent on the single-cell sequencing method you used. 45 | 46 | bc.pl.librarysize_overview(adata) 47 | 48 | ############################################################################### 49 | # most strongly expressed transcripts 50 | # ----------------------------------- 51 | # 52 | # This will let you identify the genes which dominant your experiment 53 | # (generally you would expect mitochondrial and ribosomal genes, 54 | # in this dataset these genes have been removed beforehand). 55 | 56 | bc.pl.top_genes_counts(adata=adata, top_n=25) 57 | 58 | ############################################################################### 59 | # visualize the processed dataset 60 | # ------------------------------- 61 | # 62 | # After performing your filtering it is generally a good idea to take another look 63 | # at your dataset to ensure that the filtering parameters used were reasonable. 64 | 65 | adata = bc.datasets.pbmc3k_processed() 66 | 67 | ############################################################################### 68 | # 69 | # Please note that the displayed counts have already been scaled. You would now expect 70 | # a more or less normal distribution of library size within your dataset. 71 | 72 | bc.pl.librarysize_overview(adata) 73 | -------------------------------------------------------------------------------- /besca/examples/gallery_examples/plotting/plot_riverplot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Comparing categorical variable 3 | =================== 4 | 5 | This example shows you how to generate riverplots to compare categorical columns, 6 | for example to compare multiple annotations 7 | This way you can easily check (visually) discripancies. 8 | 9 | """ 10 | 11 | 12 | import besca as bc 13 | import pytest 14 | pytest.skip('Test is only for here as example and should not be executed') 15 | # import data 16 | adata = bc.datasets.Baron2016_processed() 17 | 18 | ############################################################################### 19 | # compare two categories: annotations made by different annotators 20 | # ---------------------- 21 | 22 | 23 | bc.pl.riverplot_2categories(adata, ["assigned_cluster", "celltype2"]) 24 | -------------------------------------------------------------------------------- /besca/examples/gallery_examples/plotting/plot_split_gene_expression.py: -------------------------------------------------------------------------------- 1 | """ 2 | # TODO 3 | plotting gene expression 4 | ======================== 5 | 6 | This example shows you some of the different plots you can use to plot gene expression. 7 | 8 | """ 9 | import besca as bc 10 | import pytest 11 | pytest.skip('Test is only for here as example and should not be executed') 12 | # import data 13 | adata = bc.datasets.Haber2017_processed() 14 | 15 | ############################################################################### 16 | # compare two conditions 17 | # ---------------------- 18 | # 19 | # You can use the split violin plot to compare gene expression for two different conditions. 20 | 21 | bc.pl.gene_expr_split(adata, genes=["Defa24", "Gm15284"], split_variable="donor") 22 | 23 | ############################################################################### 24 | # 25 | # use a stacked split violin plot to compare this for several genes at the same time 26 | 27 | bc.pl.gene_expr_split_stacked( 28 | adata=adata, 29 | genes=["Defa24", "Gm15284"], 30 | split_variable="donor", 31 | subset_variable="region_x", 32 | ) 33 | -------------------------------------------------------------------------------- /besca/examples/gallery_examples/preprocessing/README.txt: -------------------------------------------------------------------------------- 1 | Preprocessing 2 | ------------- 3 | 4 | This section highlights some of the features of the preprocessing functions included within BESCA. -------------------------------------------------------------------------------- /besca/examples/gallery_examples/preprocessing/plot_example_filtering.py: -------------------------------------------------------------------------------- 1 | """ 2 | performing filtering using besca 3 | ================================ 4 | 5 | This example demonstrates the entire process of filtering out cells/genes ob subpar quality 6 | before proceeding with analysis. 7 | 8 | """ 9 | 10 | import besca as bc 11 | import scanpy as sc 12 | import matplotlib.pyplot as plt 13 | import pytest 14 | pytest.skip('Test is only for here as example and should not be executed') 15 | # load example dataset 16 | adata = bc.datasets.pbmc3k_raw() 17 | 18 | # set standard filtering parameters 19 | min_genes = 600 20 | min_cells = 2 21 | min_UMI = 600 22 | max_UMI = 6500 23 | max_mito = 0.05 24 | max_genes = 1900 25 | 26 | ############################################################################### 27 | # visualization of thresholds 28 | # --------------------------- 29 | # 30 | # First the chosen thresholds are visualized to ensure that a suitable cutoff has been chosen. 31 | 32 | # Visualize filtering thresholds 33 | fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(ncols=3, nrows=2) 34 | fig.set_figwidth(15) 35 | fig.set_figheight(8) 36 | fig.tight_layout(pad=4.5) 37 | 38 | bc.pl.kp_genes(adata, min_genes=min_genes, ax=ax1) 39 | bc.pl.kp_cells(adata, min_cells=min_cells, ax=ax2) 40 | bc.pl.kp_counts(adata, min_counts=min_UMI, ax=ax3) 41 | bc.pl.max_counts(adata, max_counts=max_UMI, ax=ax4) 42 | bc.pl.max_mito( 43 | adata, max_mito=max_mito, annotation_type="SYMBOL", species="human", ax=ax5 44 | ) 45 | bc.pl.max_genes(adata, max_genes=max_genes) 46 | 47 | ############################################################################### 48 | # application of filtering thresholds 49 | # ----------------------------------- 50 | # 51 | # Using the chosen thresholds the data is filtered. Before and after filtering results are depicted to compare. 52 | 53 | # visualize data before filtering 54 | sc.pl.violin( 55 | adata, ["n_counts", "n_genes", "percent_mito"], multi_panel=True, jitter=0.4 56 | ) 57 | 58 | print( 59 | "The AnnData object currently contains:", 60 | str(adata.shape[0]), 61 | "cells and", 62 | str(adata.shape[1]), 63 | "genes", 64 | ) 65 | print(adata) 66 | 67 | # perform filtering 68 | adata = bc.pp.filter( 69 | adata, 70 | max_counts=max_UMI, 71 | max_genes=max_genes, 72 | max_mito=max_mito, 73 | min_genes=min_genes, 74 | min_counts=min_UMI, 75 | min_cells=min_cells, 76 | ) 77 | 78 | # visualize data after filtering 79 | sc.pl.violin( 80 | adata, ["n_counts", "n_genes", "percent_mito"], multi_panel=True, jitter=0.4 81 | ) 82 | 83 | print( 84 | "The AnnData object now contains:", 85 | str(adata.shape[0]), 86 | "cells and", 87 | str(adata.shape[1]), 88 | "genes", 89 | ) 90 | print(adata) 91 | -------------------------------------------------------------------------------- /besca/examples/gallery_examples/preprocessing/plot_pca_neighbors_clustering.py: -------------------------------------------------------------------------------- 1 | """ 2 | cluster generation 3 | ================== 4 | 5 | This example demonstrates how to perform highly variable gene selection, PCA, nearest neighbor calculation, and clustering. 6 | 7 | """ 8 | 9 | import besca as bc 10 | import scanpy as sc 11 | import pytest 12 | pytest.skip('Test is only for here as example and should not be executed') 13 | # import example dataset that has previously been filtered 14 | adata = bc.datasets.pbmc3k_filtered() 15 | ## We get the raw matrix containing all the initial genes, keeping the filtering on the cells 16 | adata = bc.get_raw(adata) 17 | 18 | ############################################################################### 19 | # highly variable gene selection 20 | # ------------------------------ 21 | # 22 | # select highly variable genes (considers correction for gene expression level) 23 | 24 | # define thresholds for highly variable genes 25 | variable_genes_min_mean = 0.01 26 | variable_genes_max_mean = 5 27 | variable_genes_min_disp = 0.4 28 | 29 | # identify genes with variable expression 30 | filter_result = sc.pp.filter_genes_dispersion( 31 | adata.X, 32 | min_mean=variable_genes_min_mean, 33 | max_mean=variable_genes_max_mean, 34 | min_disp=variable_genes_min_disp, 35 | ) 36 | sc.pl.filter_genes_dispersion(filter_result) 37 | nbr_variable_genes = sum(filter_result.gene_subset) 38 | print("number of variable genes selected ", nbr_variable_genes) 39 | 40 | # perform the actual filtering 41 | adata = adata[:, filter_result.gene_subset] 42 | 43 | ############################################################################### 44 | # set random seed 45 | # --------------- 46 | # To get reproducible results you need to define a random seed for all of the stochastic 47 | # processes, such as e.g. PCA, neighbors, etc. 48 | 49 | # set random seed 50 | random_seed = 0 51 | 52 | ############################################################################### 53 | # PCA 54 | # --- 55 | 56 | # log transform our data (is easier to work with numbers like this) 57 | sc.pp.log1p(adata) 58 | 59 | # Scale data to unit variance and zero mean, and cut-off at max value 10 60 | sc.pp.scale(adata, max_value=10) 61 | 62 | # calculate 50 principle components of the dataset 63 | sc.tl.pca(adata, random_state=random_seed, svd_solver="arpack") 64 | 65 | # visualize the amount of variance explained by each PC 66 | sc.pl.pca_variance_ratio(adata) 67 | 68 | # visualize the loadings onto the first 3 PCs 69 | sc.pl.pca_loadings(adata) 70 | 71 | ############################################################################### 72 | # nearest neighbors 73 | # ----------------- 74 | 75 | sc.pp.neighbors(adata, n_neighbors=15, random_state=random_seed, n_pcs=50) 76 | 77 | ############################################################################### 78 | # leiden clustering 79 | # ------------------ 80 | 81 | sc.tl.leiden(adata, random_state=random_seed) 82 | 83 | ############################################################################### 84 | # UMAP and t-SNE generation 85 | # ------------------------- 86 | 87 | # calculate UMAP 88 | sc.tl.umap(adata, random_state=random_seed) 89 | 90 | # calculate t-SNE 91 | sc.tl.tsne(adata, random_state=random_seed) 92 | 93 | ############################################################################### 94 | # visualize the results 95 | # --------------------- 96 | 97 | sc.pl.umap(adata, color=["leiden"]) 98 | sc.pl.tsne(adata, color=["leiden"]) 99 | -------------------------------------------------------------------------------- /besca/examples/gallery_examples/tools/README.txt: -------------------------------------------------------------------------------- 1 | Tools 2 | ----- 3 | 4 | This section contains all examples related to besca tools. -------------------------------------------------------------------------------- /besca/examples/gallery_examples/tools/plot_reclustering_function.py: -------------------------------------------------------------------------------- 1 | """ 2 | reclustering on specific louvain clusters 3 | ========================================= 4 | 5 | This example demonstrates who to perform a reclustering on a selected subset of 6 | louvain clusters. You will want to do this for example during the process of celltype 7 | annotation, when the clusters do not have a sufficient resolution to seperate 8 | all clusters and mixed cell populations still exist. 9 | 10 | """ 11 | 12 | import besca as bc 13 | import scanpy as sc 14 | import pytest 15 | pytest.skip('Test is only for here as example and should not be executed') 16 | # load and preprocess data (here we will start from a preprocessed dataset) 17 | adata = bc.datasets.pbmc3k_processed() 18 | 19 | # extract subset using the recluster function whcih is part of the reclustering (rc) toolkit 20 | adata_subset = bc.tl.rc.recluster( 21 | adata, 22 | celltype=("2", "3", "4", "5", "6", "8", "9", "10", "11", "12"), 23 | celltype_label="leiden", 24 | resolution=1.2, 25 | ) 26 | 27 | 28 | # visualize the new clusters 29 | sc.pl.umap( 30 | adata_subset, color=["leiden", "CD3G", "CD8A", "CD4", "IL7R", "NKG7", "GNLY"] 31 | ) 32 | 33 | # append new celltype labels to the subclusters. 34 | # This is an approximative hand annotation that should be dealt into more widths. 35 | labels = [ 36 | "NK cell", # 0 37 | "CD4 T-cell", # 1 38 | "CD8 T-cell", # 2 39 | "CD4 T-cell", # 3 40 | "CD8 T-cell", # 4 41 | "CD8 T-cell", # 5 42 | "CD4 T-cell", # 6 43 | "CD4 T-cell", # 7 44 | "CD4 T-cell", # 8 45 | "CD4 T-cell", # 9 46 | "CD4 T-cell", # 10 47 | "CD4 T-cell", # 11 48 | "CD4 T-cell", # 12 49 | "CD4 T-cell", # 13 50 | "CD4 T-cell", # 14 51 | "CD4 T-cell", # 15 52 | "CD4 T-cell", # 16 53 | "CD4 T-cell", # 17 54 | "CD4 T-cell", # 18 55 | "CD4 T-cell", # 19 56 | ] # 10 57 | 58 | new_labels = [labels[i] for i in range(len(adata_subset.obs.get("leiden").value_counts().index.tolist()))] 59 | 60 | 61 | # merge the labels back into the original adata object 62 | # note this will overwrite what ever was saved in adata.obs.celltype; 63 | # Here is was not assigned yet. 64 | bc.tl.rc.annotate_new_cellnames( 65 | adata, adata_subset, names=new_labels, new_label="celltype" 66 | ) 67 | 68 | -------------------------------------------------------------------------------- /besca/examples/gallery_examples/workflows/README.txt: -------------------------------------------------------------------------------- 1 | Workflows 2 | --------- 3 | 4 | Demonstration workflows showing you how to peform certain tasks -------------------------------------------------------------------------------- /besca/export/__init__.py: -------------------------------------------------------------------------------- 1 | from besca.export._export import ( 2 | X_to_mtx, 3 | analysis_metadata, 4 | clustering, 5 | generate_gep, 6 | write_labeling_to_files, 7 | labeling_info, 8 | pseudobulk, 9 | ranked_genes, 10 | raw_to_mtx, 11 | ) 12 | 13 | __all__ = [ 14 | "X_to_mtx", 15 | "raw_to_mtx", 16 | "clustering", 17 | "write_labeling_to_files", 18 | "labeling_info", 19 | "analysis_metadata", 20 | "generate_gep", 21 | "ranked_genes", 22 | "pseudobulk", 23 | ] 24 | -------------------------------------------------------------------------------- /besca/pl/__init__.py: -------------------------------------------------------------------------------- 1 | from besca.pl._filter_threshold_plots import ( 2 | kp_genes, 3 | kp_counts, 4 | kp_cells, 5 | max_counts, 6 | max_genes, 7 | max_mito, 8 | ) 9 | from besca.pl._split_gene_expression import gene_expr_split, gene_expr_split_stacked 10 | from besca.pl._celltype_quantification import ( 11 | celllabel_quant_boxplot, 12 | celllabel_quant_stackedbar, 13 | ) 14 | from besca.pl._qc_plots import ( 15 | dropouts, 16 | librarysize_overview, 17 | detected_genes, 18 | library_size, 19 | transcript_capture_efficiency, 20 | top_genes_counts, 21 | ) 22 | 23 | from besca.pl._crispr_plots import ( 24 | infection_count, 25 | cell_per_KO, 26 | infection_level, 27 | plot_expression_by_sample, 28 | avg_KO_persample, 29 | KO_dotplot, 30 | compute_plot_de_crispr, 31 | enrichement_per_cluster, 32 | plot_comparison_of_cells 33 | ) 34 | from besca.pl._general import stacked_split_violin, box_per_ind, flex_dotplot 35 | from besca.pl._dot_heatmap import dot_heatmap, dot_heatmap_split, dot_heatmap_split_greyscale 36 | from besca.pl._update_palette import update_qualitative_palette 37 | from besca.pl._nomenclature_network import nomenclature_network 38 | from besca.pl._riverplot import riverplot_2categories 39 | 40 | __all__ = [ 41 | "kp_genes", 42 | "kp_counts", 43 | "kp_cells", 44 | "max_counts", 45 | "max_genes", 46 | "max_mito", 47 | "dropouts", 48 | "detected_genes", 49 | "library_size", 50 | "librarysize_overview", 51 | "transcript_capture_efficiency", 52 | "top_genes_counts", 53 | "infection_count", 54 | "cell_per_KO", 55 | "infection_level", 56 | "plot_expression_by_sample", 57 | "avg_KO_persample", 58 | "KO_dotplot", 59 | "compute_plot_de_crispr", 60 | "enrichement_per_cluster", 61 | "plot_comparison_of_cells", 62 | "gene_expr_split", 63 | "gene_expr_split_stacked", 64 | "box_per_ind", 65 | "stacked_split_violin", 66 | "celllabel_quant_boxplot", 67 | "celllabel_quant_stackedbar", 68 | "dot_heatmap", 69 | "dot_heatmap_split", 70 | "dot_heatmap_split_greyscale", 71 | "update_qualitative_palette", 72 | "nomenclature_network", 73 | "riverplot_2categories", 74 | "flex_dotplot", 75 | ] 76 | -------------------------------------------------------------------------------- /besca/pl/_nomenclature_network.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | import networkx as nx 4 | import pandas as pd 5 | from matplotlib import pyplot as plt 6 | 7 | def nomenclature_network( 8 | config_file: str, 9 | selected_roots=[], 10 | root_term="None", 11 | font_size=7, 12 | node_size=200, 13 | node_color="tan", 14 | alpha=0.8, 15 | figsize=(7,7) 16 | ): 17 | """Plot a nomenclature network based on annotation config file. 18 | 19 | This function plots the relations between celltypes as described within an annotation config file, as the one provided with besca. 20 | It displays parent - child term relation as a directed graph G ( V, E); Subsetting of such graph is possible using selected_roots argument. 21 | 22 | 23 | Parameters 24 | ---------- 25 | config_file: `str` 26 | config file from besca, expects a path to a tab separated file containing Parent and Term columns 27 | selected_roots: `list` 28 | if list contained terms, will only display the hierarchy starting from those terms. 29 | root_term : `str` 30 | the string indicating in the config file that a term does not have a parent term. 31 | figsize: (width, height) or None | default = (7,7) 32 | optional parameter to define the figure size of the plot that is to be generated 33 | 34 | Returns 35 | ------- 36 | Figure 37 | A matplotlib plt object containing the generated plot. 38 | 39 | Example 40 | ------- 41 | >>> import besca as bc 42 | >>> import pkg_resources 43 | >>> config_file = pkg_resources.resource_filename('besca', 'datasets/genesets/CellNames_scseqCMs6_config.tsv') 44 | >>> plt = bc.pl.nomenclature_network(config_file) 45 | >>> plt.show() 46 | >>> plt = bc.pl.nomenclature_network(config_file, selected_roots = ['Epithelial', 'Tcell']) 47 | >>> plt.show() 48 | 49 | """ 50 | pydot_import = importlib.util.find_spec("pydot") 51 | 52 | if pydot_import is None: 53 | raise ImportError( 54 | "_nomenclature_network.py requires pydot. Install with pip install pydot" 55 | ) 56 | # read tsv file 57 | df = pd.read_csv(config_file, sep="\t") 58 | 59 | # By default root parents have the entry "None". we need to replace this with its own name so a network per root is created 60 | roots_to_set = df["Parent"] == root_term 61 | for row, root in zip(df.iterrows(), roots_to_set): 62 | if root: 63 | df.at[row[0], "Parent"] = row[1]["Term"] 64 | 65 | # We create the network with networkx library 66 | G = nx.from_pandas_edgelist( 67 | df, target="Term", source="Parent", create_using=nx.DiGraph() 68 | ) 69 | ## Subgraph extraction if specific roots were given 70 | if selected_roots: 71 | selected_nodes = set() 72 | for ss in selected_roots: 73 | try: 74 | selected_nodes.update(nx.descendants(G, ss)) 75 | except Exception as e: 76 | print(ss + " node not found in config file") 77 | 78 | G = G.subgraph(list(selected_nodes) + selected_roots) 79 | 80 | if figsize is not None: 81 | plt.figure(3, figsize=(figsize[0], figsize[1])) 82 | nx.draw_networkx( 83 | G, 84 | nx.nx_pydot.pydot_layout(G), 85 | font_size=font_size, 86 | node_size=node_size, 87 | node_color=node_color, 88 | alpha=alpha, 89 | ) 90 | plt.axis("off") 91 | plt.tight_layout() 92 | 93 | return plt 94 | -------------------------------------------------------------------------------- /besca/pl/_update_palette.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | import pandas as pd 4 | from anndata import AnnData 5 | from typing import Dict 6 | 7 | 8 | def check_colors(aColor): 9 | """ 10 | convert the color given in hex if needed. This avoid warning message a posteriori 11 | parameters 12 | ---------- 13 | aColor: `` 14 | color to check; expected tupple. Hex would be returned as input 15 | 16 | returns 17 | ------- 18 | the color in hex 19 | """ 20 | if isinstance(aColor, tuple) and len(aColor) == 3: 21 | if aColor < (1, 1, 1): 22 | r = round(aColor[0] * 255) 23 | g = round(aColor[1] * 255) 24 | b = round(aColor[2] * 255) 25 | else: # assuming rgb 26 | r = aColor[0] 27 | g = aColor[1] 28 | b = aColor[1] 29 | return "#{:02x}{:02x}{:02x}".format(r, g, b) 30 | else: 31 | matchingHex = re.search(r"^#(?:[0-9a-fA-F]{3}){1,2}$", aColor) 32 | if matchingHex: 33 | return aColor 34 | else: 35 | sys.exit("Color " + str(aColor) + "could not be converted") 36 | 37 | 38 | def update_qualitative_palette( 39 | adata: AnnData, 40 | palette: Dict[str, str], 41 | group: str = "leiden", 42 | checkColors: bool = True, 43 | ) -> None: 44 | """Update adata object such that the umap will adhere to the palette provided. 45 | 46 | parameters 47 | ---------- 48 | adata: `AnnData` 49 | the AnnData object 50 | palette: `dict` 51 | dict with keys as the values of the group observation. To avoid warning from matlib it is advised to have \ 52 | hex color values 53 | group: `str` 54 | string identifying the column name of adata.obs where colors will be set. 55 | Used internally like this: `pd.Categorical(adata.obs[]).categories.tolist()` 56 | checkColors: `boolean` 57 | check the colors inputed to transform them if needed into a hex values. tupple of RBG of 0-1 values cna be converted. 58 | returns 59 | ------- 60 | None; update the AnnData object, that the color order matches the order of the AnnData object categories 61 | """ 62 | 63 | # get the groups/categories in the same way as scanpy does it in scanpy/plotting/_tools/scatterplots.py: _get_palette 64 | category_list = pd.Categorical(adata.obs[group]).categories.tolist() 65 | 66 | # Checking Validity 67 | if not all(elem in palette.keys() for elem in category_list): 68 | sys.exit( 69 | "Please provide a palette dict containing all element of the group " + group 70 | ) 71 | if checkColors: 72 | palette = {k: check_colors(color) for k, color in palette.items()} 73 | 74 | newColorList = [] 75 | for category_name in category_list: 76 | newColorList.append(palette[category_name]) 77 | 78 | adata.uns[group + "_colors"] = newColorList.copy() 79 | return None 80 | -------------------------------------------------------------------------------- /besca/pp/__init__.py: -------------------------------------------------------------------------------- 1 | from besca.pp._filtering import filter, filter_gene_list 2 | from besca.pp._fraction_pos import ( 3 | frac_pos, 4 | frac_reads, 5 | mean_expr, 6 | top_counts_genes, 7 | top_expressed_genes, 8 | ) 9 | from besca.pp._fraction_counts import fraction_counts 10 | from besca.pp._normalization import normalize_geometric 11 | from besca.pp._wrapper_Rfuncs import valOutlier, scTransform 12 | from besca.pp._crispr_pp import filter_perturb, extract_target 13 | 14 | __all__ = [ 15 | "filter", 16 | "filter_gene_list", 17 | "frac_pos", 18 | "frac_reads", 19 | "mean_expr", 20 | "top_expressed_genes", 21 | "fraction_counts", 22 | "top_counts_genes", 23 | "normalize_geometric", 24 | "valOutlier", 25 | "scTransform", 26 | "filter_perturb", 27 | "extract_target" 28 | ] 29 | -------------------------------------------------------------------------------- /besca/pp/_fraction_counts.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pandas import read_csv 3 | from numpy import sum, any 4 | import warnings 5 | from besca.datasets._mito import get_mito_genes 6 | 7 | def fraction_counts( 8 | adata, species="human", name="percent_mito", use_genes="SYMBOL", specific_file=None 9 | ): 10 | """Function to calculate fraction of counts per cell from a gene list. 11 | This function calculates the fraction of counts per cell for 12 | a list of genes (for example mito genes) if no specific file is given. 13 | Note that the input file consists of two columns 14 | (ENSEMBL gene id and gene symbol) tab 15 | separated 16 | 17 | Parameters 18 | ---------- 19 | adata: `AnnData` 20 | AnnData object 21 | species: `str` | default = human 22 | species for mitochondrial content evaluation 23 | name: `str` | default = percent_mito 24 | String identifying the column name to which the results 25 | should be written to in the AnnData.obs object 26 | use_genes: `SYMBOL` or `ENSEMBL` | default = SYMBOL 27 | String defining whether ENSEMBL id's or gene symbols are used in the 28 | adata.var_names (defines which column of input gene list is read) 29 | specific_file: `str` | default None. 30 | if indicated, the file will be used to extract the gene list 31 | Returns 32 | ------- 33 | None 34 | Returns None but updates adata with new column named 'name' 35 | containing calculated fraction of counts. 36 | 37 | Example 38 | ------- 39 | >>> import besca as bc 40 | >>> import os 41 | >>> adata = bc.datasets.simulated_pbmc3k_raw() 42 | >>> bc.pp.fraction_counts(adata, 'human', use_genes='SYMBOL', specific_file=f"{os.path.dirname(__file__)[:-3]}/datasets/mito_files/test.mito.tsv") 43 | >>> counts = adata.obs.head(5) 44 | """ 45 | if specific_file is None: 46 | gene_list = get_mito_genes(species, use_genes) 47 | else: 48 | # ENS_GENE_ID GENE_SYMBOL (2 cols) 49 | if use_genes == "SYMBOL": 50 | gene_list = list(read_csv(specific_file, header=None, sep="\t")[1]) 51 | elif use_genes == "ENSEMBL": 52 | gene_list = list(read_csv(specific_file, header=None, sep="\t")[0]) 53 | genes = [i for i in adata.var_names if i in gene_list] 54 | # for each cell compute fraction of counts in gene_list vs. all genes 55 | # axis=1 --> sum over rows 56 | if len(genes) > 0: 57 | n_counts = sum(adata.X, axis=1).A1 58 | if any(n_counts == 0): 59 | warnings.warn( 60 | "Some of the cells contain no counts. \ 61 | Do not forget to remove 'empty' cells from data." 62 | ) 63 | n_counts[n_counts == 0] = float("inf") 64 | adata.obs[name] = sum(adata[:, genes].X, axis=1).A1 / n_counts 65 | else: 66 | adata.obs[name] = 0.0 67 | print( 68 | "None of the genes from input list found in data set. \ 69 | Please ensure you have correctly specified use_genes to match \ 70 | the type of genes saved in adata.var_names." 71 | ) 72 | return None 73 | -------------------------------------------------------------------------------- /besca/st/__init__.py: -------------------------------------------------------------------------------- 1 | from besca.st._FAIR_export import ( 2 | export_cp10k, 3 | export_regressedOut, 4 | export_norm_citeseq, 5 | export_clustering, 6 | export_metadata, 7 | export_rank, 8 | export_celltype, 9 | ) 10 | from besca.st._wrapper_funcs import ( 11 | setup, 12 | setup_citeseq, 13 | read_matrix, 14 | filtering_cells_genes_min, 15 | filtering_mito_genes_max, 16 | per_cell_normalize, 17 | clr_normalize, 18 | highly_variable_genes, 19 | regress_out, 20 | batch_correction, 21 | pca_neighbors_umap, 22 | clustering, 23 | additional_labeling, 24 | celltype_labeling, 25 | ) 26 | from besca.st._wrapper_Rfuncs import dsb_normalize, deviance, maxLikGlobalDimEst 27 | from besca.st._setup_funcs import create_button, create_popup 28 | from besca.st._qc_report import write_qc 29 | 30 | __all__ = [ 31 | "read_matrix", 32 | "filtering_cells_genes_min", 33 | "filtering_mito_genes_max", 34 | "export_cp10k", 35 | "export_regressedOut", 36 | "export_clustering", 37 | "export_metadata", 38 | "export_rank", 39 | "export_celltype", 40 | "additional_labeling", 41 | "celltype_labeling", 42 | ] 43 | -------------------------------------------------------------------------------- /besca/st/_setup_funcs.py: -------------------------------------------------------------------------------- 1 | from IPython.display import HTML 2 | from IPython.display import Javascript 3 | 4 | 5 | def create_button(): 6 | input_form = """ 7 |
8 | Continue running notebook with the given filtering parameters.
9 | 10 |
11 | """ 12 | 13 | javascript = """ 14 |